test2.py

Created by naul

Created on April 01, 2023

4.15 KB


import math
import cmath
import matplotlib.pyplot as plt
import turtle
import random
import kandinsky
import time

# Define the game environment
class TicTacToeEnv:
    winning_combos = [(0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7), (2, 5, 8), (0, 4, 8), (2, 4, 6)]
    
    def __init__(self):
        self.board = [0] * 9
        self.player = 1
        self.winning_combos = [
            [0, 1, 2],
            [3, 4, 5],
            [6, 7, 8],
            [0, 3, 6],
            [1, 4, 7],
            [2, 5, 8],
            [0, 4, 8],
            [2, 4, 6]
        ]
        
    def reset(self):
        self.board = [0] * 9
        self.player = 1
        
    def get_state(self):
        return tuple(self.board)
        
    def take_action(self, action):
        self.board[action] = self.player
        self.player = -self.player
        
    def is_game_over(self):
        for combo in self.winning_combos:
            if self.board[combo[0]] == self.board[combo[1]] == self.board[combo[2]] and self.board[combo[0]] != 0:
                return True
        if all(pos != 0 for pos in self.board):
            return True
        return False
        
    def get_reward(self):
        for combo in self.winning_combos:
            if self.board[combo[0]] == self.board[combo[1]] == self.board[combo[2]] and self.board[combo[0]] != 0:
                if self.board[combo[0]] == 1:
                    return 1
                else:
                    return -1
        if all(pos != 0 for pos in self.board):
            return 0
        return None

# Define the Q-learning agent
class QLearningAgent:
    def __init__(self, alpha=0.5, gamma=0.9, epsilon=0.1):
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.Q = {}
        
    def get_action(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.randint(0, 8)
        if state not in self.Q:
            self.Q[state] = [0] * 9
        return self.Q[state].index(max(self.Q[state]))
        
    def update(self, state, action, next_state, reward):
        if state not in self.Q:
            self.Q[state] = [0] * 9
        if next_state not in self.Q:
            self.Q[next_state] = [0] * 9
        self.Q[state][action] += self.alpha * (reward + self.gamma * max(self.Q[next_state]) - self.Q[state][action])

# Train the agent
env = TicTacToeEnv()
agent = QLearningAgent()

for episode in range(10000):
    state = env.get_state()
    while not env.is_game_over():
        action = agent.get_action(state)
        env.take_action(action)
        next_state = env.get_state()
        reward = env.get_reward()
        if reward is not None:
            agent.update(state, action, next_state, reward)
            break
        state = next_state
    env.reset()
    
# Play a game against the trained agent
env = TicTacToeEnv()
state = env.get_state()
while not env.is_game_over():
    if env.player == 1:
        print("Your turn")
        valid_move = False
        while not valid_move:
            row = int(input("Enter row (0-2): "))
            col = int(input("Enter col (0-2): "))
            action = row * 3 + col
            if env.board[action] == 0:
                valid_move = True
            else:
                print("Invalid move")
    else:
        action = agent.get_action(state)
        print("Invalid move")
    env.take_action(action)
    state = env.get_state()
    for i in range(3):
        for j in range(3):
            if env.board[i * 3 + j] == 1:
                turtle.penup()
                turtle.goto(j * 50 - 50, 150 - i * 50)
                turtle.pendown()
                turtle.circle(20)
            elif env.board[i * 3 + j] == -1:
                turtle.penup()
                turtle.goto(j * 50 - 25, 175 - i * 50)
                turtle.pendown()
                turtle.goto(j * 50 + 25, 125 - i * 50)
                turtle.penup()
                turtle.goto(j * 50 - 25, 125 - i * 50)
                turtle.pendown()
                turtle.goto(j * 50 + 25, 175 - i * 50)
    time.sleep(1)

if env.get_reward() == 1:
    print("You win!")
elif env.get_reward() == -1:
    print("Agent wins!")
else:
    print("Draw")