Upgrade (and a lot of compression) of https://yaya-cout.github.io/Upsilon-Workshop/view/c5eb5ad8-f65f-45da-8c39-0289ebdc90a8
Can’t remember what I did precisely though.
from kandinsky import fill_rect as F,draw_string as D try:import numpy as np except:from ulab import numpy as np from random import randint as R,random as A,choice as C from time import monotonic as M from ion import keydown as K from micropython import kbd_intr as I class QLearning: def __init__(it,acts,div=8): it.acts=acts;it.step=.3;it.gamma=.9 try:it.eps=float(input("Epsilon: ")) except:it.eps=1.0 it.epsDecayRate=.98;it.minEps=.001;it.lq=[dict()];it.div=div def State(self,s): a=s.obdir(-1);b=s.obdir(0);c=s.obdir(1);state=[s.dx==1,s.dx==-1,s.dy==1,s.dy==-1,s.x<s.xf,s.x>s.xf,s.y<s.yf,s.y>s.yf,s.collide(a[0],a[1]),s.collide(b[0],b[1]),s.collide(c[0],c[1])];dec=0 for i in range(len(state)):dec+=state[len(state)-1-i]*2**i return dec def act(it,state): if A()<=it.eps:return C(it.acts) i=it.dico(state);qVals=it.lq[i][state];maxQ=max(it.lq[i][state].values());actsWithMaxQ=[a for a,q in qVals.items()if q==maxQ];return C(actsWithMaxQ) def upq(it,state,reward,act,newstate,done):maxQnext=max(it.lq[it.dico(newstate)][newstate].values());i=it.find(state);it.lq[i][state][act]=int(it.lq[i][state][act]+it.step*(reward+it.gamma*maxQnext*(1-done)-it.lq[i][state][act])*100000)/100000 def find(it,state): n=0 while 1: if n==len(it.lq):return-1 try:it.lq[n][state] except:n+=1;continue break return n def dico(it,state): i=it.find(state) if i+1:return i if len(it.lq[-1])==it.div:it.lq.append(dict()) it.lq[-1][state]={a:.0 for a in it.acts};return len(it.lq)-1 def decEps(it):it.eps=max(it.minEps,it.eps*it.epsDecayRate) def comp(n,x):return"0"*(x-len(str(n)))+str(n) class Snake: def __init__(it,col="w",hunger="grey",div=8,sup=0):it.brain=QLearning([-1,0,1],div);it.sup=sup;it.hunger=hunger;it.col=col;it.best=0;it.terrain=np.zeros((20,29));it.reset();it.scores=[] def reset(it):it.cd=1;it.terrain*=0;it.time=0;it.score=0;it.terrain[8][6]=6;it.terrain[8][7]=6;it.terrain[8][8]=0;it.x,it.y=8,8;it.xq,it.yq=6,8;it.dx,it.dy=1,0;it.xf,it.yf=R(0,27),R(2,18) def collide(it,dx=0,dy=0,head=0): if it.y+dy<2 or it.y+dy>19 or it.x+dx<0 or it.x+dx>28:return 1 return it.terrain[it.y+dy][it.x+dx]!=0 def disp(it):F(it.x*11+1,it.y*11+1,10,10,it.col);F(it.xf*11+1,it.yf*11+1,10,10,it.hunger) def move(it): it.terrain[it.y][it.x]=6*(it.dx==1)+4*(it.dx==-1)+2*(it.dy==1)+8*(it.dy==-1);it.x+=it.dx;it.y+=it.dy if it.x!=it.xf or it.y!=it.yf:val=it.terrain[it.yq][it.xq];it.terrain[it.yq][it.xq]=0;F(1+11*it.xq,1+11*it.yq,10,10,fond);it.xq+=(val==6)-(val==4);it.yq+=(val==2)-(val==8);return 1-2*(it.energy<=50)#-(not self.calc(0,0))*50 it.xf,it.yf=R(0,26),R(2,16) while it.terrain[it.yf][it.xf]!=0:it.xf,it.yf=R(0,26),R(2,16) F(it.xf*11+1,it.yf*11+1,10,10,it.hunger);it.energy=200;it.score+=1;it.best=max(it.best,it.score);D("#"+str(it.gen)+" - "+str(it.score)+" - B:"+str(it.best)+" - M:"+str(round(np.mean(np.array(it.scores))))+" ",5,3,"w",fond);return 100 def obdir(it,dir): if dir==0:dx,dy=it.dx,it.dy else: if it.dx==0:dx=-dir*it.dy;dy=0 elif it.dy==0:dy=dir*it.dx;dx=0 return dx,dy def play(it): D("#"+str(it.gen)+" - "+str(it.score)+" - B:"+str(it.best)+" - M:"+str(round(np.mean(np.array(it.scores))))+" ",5,3,"w",fond);it.energy=200;old=M() while(not it.collide())and it.energy: it.disp();dir=0 while M()-old<.02+(K(4)*.2): if K(4): if(K(0)or K(36))and it.dx==0: if it.dy==1:dir=1 else:dir=-1 elif(K(1)or K(31))and it.dy==0: if it.dx==1:dir=-1 else:dir=1 elif(K(2)or K(43))and it.dy==0: if it.dx==1:dir=1 else:dir=-1 elif(K(3)or K(38))and it.dx==0: if it.dy==1:dir=-1 else:dir=1 old=M();state=it.brain.State(it) if 1-K(4):dir=it.brain.act(state) it.dx,it.dy=it.obdir(dir);reward=it.move()-100*it.collide()-100*(it.energy==0);newstate=it.brain.State(it);it.brain.upq(state,reward,dir,newstate,it.collide());it.brain.decEps();state=newstate;it.energy-=1;it.time+=1#;l=str((len(it.brain.lq)-1)*it.brain.div+len(it.brain.lq[-1]));D(l,316-len(l)*10,3,"w",fond) if len(it.scores)>=10:it.scores.remove(it.scores[0]) it.scores.append(it.score);return def train(n=100,div=8,gen=0): F(0,0,320,222,"w");F(1,1,318,220,fond);F(0,3+18,320,1,"w") for gen in range(1+gen,gen+n+1):F(0,319,320,1,"w");F(1,1+3+18,318,220-3-18,fond);s.gen=gen;s.play();s.reset() def game(n=50): try:from brain import*;s.brain.lq=brain except ImportError:print("No brain");gen=0 try:train(n,div=8) except KeyboardInterrupt:0 try: if input("save? (y/n): ")!="y":return with open('brain.py',"w")as f: f.write("\nbrain=[") for i in s.brain.lq:f.write(str(i).replace(" ","").replace("0.",".")+","+"\n") f.write("]\n") except:0 I(6) del I fond="k" s=Snake() game()