Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import time
- import pickle
- import pandas as pd
- from tqdm import tqdm
- EMPTY = '_'
- GRID_SIZE = 3
- PLAYER = ['X', 'O']
- def show_board(board):
- board = np.array(board)
- print()
- for i in range(GRID_SIZE):
- for j in range(GRID_SIZE):
- print('|', end='')
- print(board[i, j], end='')
- print('|')
- print()
- def get_legal_moves(board):
- legal_moves = []
- for i in range(len(board)):
- for j in range(len(board[i])):
- if board[i][j] == EMPTY:
- legal_moves.append((i, j))
- return legal_moves
- def get_human_move(player):
- human = input(f"Player {player}, enter a square: ")
- valid = map_index(human)
- while not valid:
- print("Invalid input! Please enter a number between 1 and 9.")
- return get_human_move(player)
- return valid[1]
- def map_index(int_choice):
- num_pad_map = {'7': (0, 0), '8': (0, 1), '9': (0, 2), '4': (1, 0), '5': (1, 1), '6': (1, 2), '1': (2, 0), '2': (2, 1), '3': (2, 2)}
- if int_choice in num_pad_map:
- return True, num_pad_map[int_choice]
- else:
- return False
- def make_move(move, board, player):
- row, col = move
- if board[row, col] == EMPTY:
- board[row, col] = player
- else:
- print("That square is already taken!")
- move = get_human_move(player) # Ask for input again
- make_move(move, board, player)
- return board
- def check_win(board):
- # Check rows
- for row in board:
- if row[0] == row[1] == row[2] and row[0] != EMPTY:
- return True, row[0]
- # Check columns
- for col in range(3):
- if board[0][col] == board[1][col] == board[2][col] and board[0][col] != EMPTY:
- return True, board[0][col]
- # Check diagonals
- if board[0][0] == board[1][1] == board[2][2] and board[0][0] != EMPTY:
- return True, board[0][0]
- if board[0][2] == board[1][1] == board[2][0] and board[0][2] != EMPTY:
- return True, board[0][2]
- # Check draw
- if all(board[i][j] != EMPTY for i in range(3) for j in range(3)):
- return True, 'Draw'
- return False, None
- def state_to_index(state):
- mapping = {'X': 1, 'O': -1, EMPTY: 0}
- index = 0
- for i, value in enumerate(state):
- index += (3 ** i) * mapping[value]
- return abs(index)
- def get_action(legal_moves, Q_table, state_index, epsilon):
- if np.random.rand() < epsilon:
- action = np.random.choice(len(legal_moves))
- return legal_moves[action]
- else:
- best_action = None
- best_q_value = float('-inf')
- for action in legal_moves:
- action_index = action[0] * GRID_SIZE + action[1]
- q_value = Q_table[state_index][action_index]
- if q_value > best_q_value:
- best_q_value = q_value
- best_action = action
- return best_action
- def opponent(moves):
- move = np.random.randint(len(moves))
- return moves[move]
- def train_agent(episodes):
- q_table = np.zeros((3 ** (GRID_SIZE * GRID_SIZE), GRID_SIZE * GRID_SIZE))
- start_time = time.time()
- print("Training started...")
- for episode in tqdm(range(episodes), desc="Training Progress"):
- winner = False
- board = [[EMPTY] * GRID_SIZE for _ in range(GRID_SIZE)]
- turn = 0
- while not winner:
- legal_moves = get_legal_moves(board)
- state = tuple(np.array(board).flatten())
- state_index = state_to_index(state)
- if PLAYER[turn % 2] == 'X':
- action = get_action(legal_moves, q_table, state_index, epsilon)
- row, col = action
- else:
- row, col = opponent(legal_moves)
- board[row][col] = PLAYER[turn % 2]
- winner, result = check_win(board)
- if winner:
- if result == 'X':
- reward = 1
- elif result == 'O':
- reward = -1
- else:
- reward = 0
- else:
- reward = 0
- next_state = tuple(np.array(board).flatten())
- next_state_index = state_to_index(next_state)
- action_index = row * GRID_SIZE + col
- q_table[state_index][action_index] += alpha * (reward + gamma * np.max(q_table[next_state_index]) - q_table[state_index][action_index])
- turn += 1
- end_time = time.time()
- elapsed_time = end_time - start_time
- print(f"Training time: {elapsed_time} seconds")
- return q_table
- def save_q_table(q_table, filename):
- with open(filename, 'wb') as f:
- pickle.dump(q_table, f)
- def load_q_table(filename):
- with open(filename, 'rb') as f:
- q_table = pickle.load(f)
- return q_table
- def save_q_table_to_excel(q_table, filename):
- df = pd.DataFrame(q_table)
- df.to_excel(filename, index=False)
- print(df)
- def play_game(q_table, wins, losses, draws):
- board = np.zeros((GRID_SIZE, GRID_SIZE), dtype=str)
- board.fill(EMPTY)
- game_over = False
- round = 0
- while not game_over:
- show_board(board)
- if PLAYER[round % 2] == 'X':
- state = tuple(np.array(board).flatten())
- state_index = state_to_index(state)
- move = get_action(get_legal_moves(board), q_table, state_index, 0)
- print(f"AI played at {move}")
- else:
- #move = get_human_move(PLAYER[round % 2])
- move = opponent(get_legal_moves(board))
- board = make_move(move, board, PLAYER[round % 2])
- game_over, result = check_win(board)
- round += 1
- show_board(board)
- if result == 'Draw':
- print("It's a draw!")
- draws += 1
- else:
- print(f"Player {result} wins!")
- if result == 'X':
- wins +=1
- else:
- losses +=1
- return (wins, losses, draws)
- # Initialize Q-learning parameters
- alpha = 0.3 # Learning rate
- gamma = 0.9 # Discount factor
- epsilon = 1 # Exploration rate
- episodes = 20000000 # number of episodes for training
- # Train the agent
- #q_table = train_agent(episodes)
- # Save the Q-table
- #save_q_table(q_table, 'q_table.pkl')
- #save_q_table_to_excel(q_table, f'q_table_{episodes}_episodes.xlsx')
- # Load the Q-table (if needed)
- q_table = load_q_table('q_table.pkl')
- wins, losses, draws = 0, 0, 0
- # Play against the trained AI
- for i in range(1000):
- wins, losses, draws = play_game(q_table, wins, losses, draws)
- # Create a DataFrame
- df = pd.DataFrame({
- 'Result': ['Wins', 'Losses', 'Draws'],
- 'Count': [wins, losses, draws]
- })
- # Print the DataFrame
- print(df.to_string(index=False))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement