ai_snake_ql2.py

Created by mathieu-croslacoste

Created on September 15, 2025

4.63 KB

Upgrade (and a lot of compression) of https://yaya-cout.github.io/Upsilon-Workshop/view/c5eb5ad8-f65f-45da-8c39-0289ebdc90a8

Can’t remember what I did precisely though.


from kandinsky import fill_rect as F,draw_string as D
try:import numpy as np
except:from ulab import numpy as np
from random import randint as R,random as A,choice as C
from time import monotonic as M
from ion import keydown as K
from micropython import kbd_intr as I
class QLearning:
 def __init__(it,acts,div=8):
  it.acts=acts;it.step=.3;it.gamma=.9
  try:it.eps=float(input("Epsilon: "))
  except:it.eps=1.0
  it.epsDecayRate=.98;it.minEps=.001;it.lq=[dict()];it.div=div
 def State(self,s):
  a=s.obdir(-1);b=s.obdir(0);c=s.obdir(1);state=[s.dx==1,s.dx==-1,s.dy==1,s.dy==-1,s.x<s.xf,s.x>s.xf,s.y<s.yf,s.y>s.yf,s.collide(a[0],a[1]),s.collide(b[0],b[1]),s.collide(c[0],c[1])];dec=0
  for i in range(len(state)):dec+=state[len(state)-1-i]*2**i
  return dec
 def act(it,state):
  if A()<=it.eps:return C(it.acts)
  i=it.dico(state);qVals=it.lq[i][state];maxQ=max(it.lq[i][state].values());actsWithMaxQ=[a for a,q in qVals.items()if q==maxQ];return C(actsWithMaxQ)
 def upq(it,state,reward,act,newstate,done):maxQnext=max(it.lq[it.dico(newstate)][newstate].values());i=it.find(state);it.lq[i][state][act]=int(it.lq[i][state][act]+it.step*(reward+it.gamma*maxQnext*(1-done)-it.lq[i][state][act])*100000)/100000
 def find(it,state):
  n=0
  while 1:
   if n==len(it.lq):return-1
   try:it.lq[n][state]
   except:n+=1;continue
   break
  return n
 def dico(it,state):
  i=it.find(state)
  if i+1:return i
  if len(it.lq[-1])==it.div:it.lq.append(dict())
  it.lq[-1][state]={a:.0 for a in it.acts};return len(it.lq)-1
 def decEps(it):it.eps=max(it.minEps,it.eps*it.epsDecayRate)
def comp(n,x):return"0"*(x-len(str(n)))+str(n)
class Snake:
 def __init__(it,col="w",hunger="grey",div=8,sup=0):it.brain=QLearning([-1,0,1],div);it.sup=sup;it.hunger=hunger;it.col=col;it.best=0;it.terrain=np.zeros((20,29));it.reset();it.scores=[]
 def reset(it):it.cd=1;it.terrain*=0;it.time=0;it.score=0;it.terrain[8][6]=6;it.terrain[8][7]=6;it.terrain[8][8]=0;it.x,it.y=8,8;it.xq,it.yq=6,8;it.dx,it.dy=1,0;it.xf,it.yf=R(0,27),R(2,18)
 def collide(it,dx=0,dy=0,head=0):
  if it.y+dy<2 or it.y+dy>19 or it.x+dx<0 or it.x+dx>28:return 1
  return it.terrain[it.y+dy][it.x+dx]!=0
 def disp(it):F(it.x*11+1,it.y*11+1,10,10,it.col);F(it.xf*11+1,it.yf*11+1,10,10,it.hunger)
 def move(it):
  it.terrain[it.y][it.x]=6*(it.dx==1)+4*(it.dx==-1)+2*(it.dy==1)+8*(it.dy==-1);it.x+=it.dx;it.y+=it.dy
  if it.x!=it.xf or it.y!=it.yf:val=it.terrain[it.yq][it.xq];it.terrain[it.yq][it.xq]=0;F(1+11*it.xq,1+11*it.yq,10,10,fond);it.xq+=(val==6)-(val==4);it.yq+=(val==2)-(val==8);return 1-2*(it.energy<=50)#-(not self.calc(0,0))*50
  it.xf,it.yf=R(0,26),R(2,16)
  while it.terrain[it.yf][it.xf]!=0:it.xf,it.yf=R(0,26),R(2,16)
  F(it.xf*11+1,it.yf*11+1,10,10,it.hunger);it.energy=200;it.score+=1;it.best=max(it.best,it.score);D("#"+str(it.gen)+" - "+str(it.score)+" - B:"+str(it.best)+" - M:"+str(round(np.mean(np.array(it.scores))))+"  ",5,3,"w",fond);return 100
 def obdir(it,dir):
  if dir==0:dx,dy=it.dx,it.dy
  else:
   if it.dx==0:dx=-dir*it.dy;dy=0
   elif it.dy==0:dy=dir*it.dx;dx=0
  return dx,dy
 def play(it):
  D("#"+str(it.gen)+" - "+str(it.score)+" - B:"+str(it.best)+" - M:"+str(round(np.mean(np.array(it.scores))))+"  ",5,3,"w",fond);it.energy=200;old=M()
  while(not it.collide())and it.energy:
   it.disp();dir=0
   while M()-old<.02+(K(4)*.2):
    if K(4):
     if(K(0)or K(36))and it.dx==0:
      if it.dy==1:dir=1
      else:dir=-1
     elif(K(1)or K(31))and it.dy==0:
      if it.dx==1:dir=-1
      else:dir=1
     elif(K(2)or K(43))and it.dy==0:
      if it.dx==1:dir=1
      else:dir=-1
     elif(K(3)or K(38))and it.dx==0:
      if it.dy==1:dir=-1
      else:dir=1
   old=M();state=it.brain.State(it)
   if 1-K(4):dir=it.brain.act(state)
   it.dx,it.dy=it.obdir(dir);reward=it.move()-100*it.collide()-100*(it.energy==0);newstate=it.brain.State(it);it.brain.upq(state,reward,dir,newstate,it.collide());it.brain.decEps();state=newstate;it.energy-=1;it.time+=1#;l=str((len(it.brain.lq)-1)*it.brain.div+len(it.brain.lq[-1]));D(l,316-len(l)*10,3,"w",fond)
  if len(it.scores)>=10:it.scores.remove(it.scores[0])
  it.scores.append(it.score);return
def train(n=100,div=8,gen=0):
 F(0,0,320,222,"w");F(1,1,318,220,fond);F(0,3+18,320,1,"w")
 for gen in range(1+gen,gen+n+1):F(0,319,320,1,"w");F(1,1+3+18,318,220-3-18,fond);s.gen=gen;s.play();s.reset()
def game(n=50):
 try:from brain import*;s.brain.lq=brain
 except ImportError:print("No brain");gen=0
 try:train(n,div=8)
 except KeyboardInterrupt:0
 try:
  if input("save? (y/n): ")!="y":return
  with open('brain.py',"w")as f:
   f.write("\nbrain=[")
   for i in s.brain.lq:f.write(str(i).replace(" ","").replace("0.",".")+","+"\n")
   f.write("]\n")
 except:0
I(6)
del I
fond="k"
s=Snake()
game()

During your visit to our site, NumWorks needs to install "cookies" or use other technologies to collect data about you in order to:

With the exception of Cookies essential to the operation of the site, NumWorks leaves you the choice: you can accept Cookies for audience measurement by clicking on the "Accept and continue" button, or refuse these Cookies by clicking on the "Continue without accepting" button or by continuing your browsing. You can update your choice at any time by clicking on the link "Manage my cookies" at the bottom of the page. For more information, please consult our cookies policy.