test2.py

Created by naul

Created on April 01, 2023

4.15 KB


import math
import cmath
import matplotlib.pyplot as plt
import turtle
import random
import kandinsky
import time

# Define the game environment
class TicTacToeEnv:
    winning_combos = [(0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7), (2, 5, 8), (0, 4, 8), (2, 4, 6)]
    
    def __init__(self):
        self.board = [0] * 9
        self.player = 1
        self.winning_combos = [
            [0, 1, 2],
            [3, 4, 5],
            [6, 7, 8],
            [0, 3, 6],
            [1, 4, 7],
            [2, 5, 8],
            [0, 4, 8],
            [2, 4, 6]
        ]
        
    def reset(self):
        self.board = [0] * 9
        self.player = 1
        
    def get_state(self):
        return tuple(self.board)
        
    def take_action(self, action):
        self.board[action] = self.player
        self.player = -self.player
        
    def is_game_over(self):
        for combo in self.winning_combos:
            if self.board[combo[0]] == self.board[combo[1]] == self.board[combo[2]] and self.board[combo[0]] != 0:
                return True
        if all(pos != 0 for pos in self.board):
            return True
        return False
        
    def get_reward(self):
        for combo in self.winning_combos:
            if self.board[combo[0]] == self.board[combo[1]] == self.board[combo[2]] and self.board[combo[0]] != 0:
                if self.board[combo[0]] == 1:
                    return 1
                else:
                    return -1
        if all(pos != 0 for pos in self.board):
            return 0
        return None

# Define the Q-learning agent
class QLearningAgent:
    def __init__(self, alpha=0.5, gamma=0.9, epsilon=0.1):
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.Q = {}
        
    def get_action(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.randint(0, 8)
        if state not in self.Q:
            self.Q[state] = [0] * 9
        return self.Q[state].index(max(self.Q[state]))
        
    def update(self, state, action, next_state, reward):
        if state not in self.Q:
            self.Q[state] = [0] * 9
        if next_state not in self.Q:
            self.Q[next_state] = [0] * 9
        self.Q[state][action] += self.alpha * (reward + self.gamma * max(self.Q[next_state]) - self.Q[state][action])

# Train the agent
env = TicTacToeEnv()
agent = QLearningAgent()

for episode in range(10000):
    state = env.get_state()
    while not env.is_game_over():
        action = agent.get_action(state)
        env.take_action(action)
        next_state = env.get_state()
        reward = env.get_reward()
        if reward is not None:
            agent.update(state, action, next_state, reward)
            break
        state = next_state
    env.reset()
    
# Play a game against the trained agent
env = TicTacToeEnv()
state = env.get_state()
while not env.is_game_over():
    if env.player == 1:
        print("Your turn")
        valid_move = False
        while not valid_move:
            row = int(input("Enter row (0-2): "))
            col = int(input("Enter col (0-2): "))
            action = row * 3 + col
            if env.board[action] == 0:
                valid_move = True
            else:
                print("Invalid move")
    else:
        action = agent.get_action(state)
        print("Invalid move")
    env.take_action(action)
    state = env.get_state()
    for i in range(3):
        for j in range(3):
            if env.board[i * 3 + j] == 1:
                turtle.penup()
                turtle.goto(j * 50 - 50, 150 - i * 50)
                turtle.pendown()
                turtle.circle(20)
            elif env.board[i * 3 + j] == -1:
                turtle.penup()
                turtle.goto(j * 50 - 25, 175 - i * 50)
                turtle.pendown()
                turtle.goto(j * 50 + 25, 125 - i * 50)
                turtle.penup()
                turtle.goto(j * 50 - 25, 125 - i * 50)
                turtle.pendown()
                turtle.goto(j * 50 + 25, 175 - i * 50)
    time.sleep(1)

if env.get_reward() == 1:
    print("You win!")
elif env.get_reward() == -1:
    print("Agent wins!")
else:
    print("Draw")

During your visit to our site, NumWorks needs to install "cookies" or use other technologies to collect data about you in order to:

With the exception of Cookies essential to the operation of the site, NumWorks leaves you the choice: you can accept Cookies for audience measurement by clicking on the "Accept and continue" button, or refuse these Cookies by clicking on the "Continue without accepting" button or by continuing your browsing. You can update your choice at any time by clicking on the link "Manage my cookies" at the bottom of the page. For more information, please consult our cookies policy.