import math import cmath import matplotlib.pyplot as plt import turtle import random import kandinsky import time # Define the game environment class TicTacToeEnv: winning_combos = [(0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7), (2, 5, 8), (0, 4, 8), (2, 4, 6)] def __init__(self): self.board = [0] * 9 self.player = 1 self.winning_combos = [ [0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 3, 6], [1, 4, 7], [2, 5, 8], [0, 4, 8], [2, 4, 6] ] def reset(self): self.board = [0] * 9 self.player = 1 def get_state(self): return tuple(self.board) def take_action(self, action): self.board[action] = self.player self.player = -self.player def is_game_over(self): for combo in self.winning_combos: if self.board[combo[0]] == self.board[combo[1]] == self.board[combo[2]] and self.board[combo[0]] != 0: return True if all(pos != 0 for pos in self.board): return True return False def get_reward(self): for combo in self.winning_combos: if self.board[combo[0]] == self.board[combo[1]] == self.board[combo[2]] and self.board[combo[0]] != 0: if self.board[combo[0]] == 1: return 1 else: return -1 if all(pos != 0 for pos in self.board): return 0 return None # Define the Q-learning agent class QLearningAgent: def __init__(self, alpha=0.5, gamma=0.9, epsilon=0.1): self.alpha = alpha self.gamma = gamma self.epsilon = epsilon self.Q = {} def get_action(self, state): if random.uniform(0, 1) < self.epsilon: return random.randint(0, 8) if state not in self.Q: self.Q[state] = [0] * 9 return self.Q[state].index(max(self.Q[state])) def update(self, state, action, next_state, reward): if state not in self.Q: self.Q[state] = [0] * 9 if next_state not in self.Q: self.Q[next_state] = [0] * 9 self.Q[state][action] += self.alpha * (reward + self.gamma * max(self.Q[next_state]) - self.Q[state][action]) # Train the agent env = TicTacToeEnv() agent = QLearningAgent() for episode in range(10000): state = env.get_state() while not env.is_game_over(): action = agent.get_action(state) env.take_action(action) next_state = env.get_state() reward = env.get_reward() if reward is not None: agent.update(state, action, next_state, reward) break state = next_state env.reset() # Play a game against the trained agent env = TicTacToeEnv() state = env.get_state() while not env.is_game_over(): if env.player == 1: print("Your turn") valid_move = False while not valid_move: row = int(input("Enter row (0-2): ")) col = int(input("Enter col (0-2): ")) action = row * 3 + col if env.board[action] == 0: valid_move = True else: print("Invalid move") else: action = agent.get_action(state) print("Invalid move") env.take_action(action) state = env.get_state() for i in range(3): for j in range(3): if env.board[i * 3 + j] == 1: turtle.penup() turtle.goto(j * 50 - 50, 150 - i * 50) turtle.pendown() turtle.circle(20) elif env.board[i * 3 + j] == -1: turtle.penup() turtle.goto(j * 50 - 25, 175 - i * 50) turtle.pendown() turtle.goto(j * 50 + 25, 125 - i * 50) turtle.penup() turtle.goto(j * 50 - 25, 125 - i * 50) turtle.pendown() turtle.goto(j * 50 + 25, 175 - i * 50) time.sleep(1) if env.get_reward() == 1: print("You win!") elif env.get_reward() == -1: print("Agent wins!") else: print("Draw")