import numpy as np
import time
import pickle
import pandas as pd
from tqdm import tqdm

EMPTY = '_'
GRID_SIZE = 3
PLAYER = ['X', 'O']

def show_board(board):
    board = np.array(board)
    print()
    for i in range(GRID_SIZE):
        for j in range(GRID_SIZE):
            print('|', end='')
            print(board[i, j], end='')
        print('|')
    print()

def get_legal_moves(board):
    legal_moves = []
    for i in range(len(board)):
        for j in range(len(board[i])):
            if board[i][j] == EMPTY:
                legal_moves.append((i, j))
    return legal_moves

def get_human_move(player):
    human = input(f"Player {player}, enter a square: ")
    valid = map_index(human)
    while not valid:
        print("Invalid input! Please enter a number between 1 and 9.")
        return get_human_move(player)
    return valid[1]

def map_index(int_choice):
    num_pad_map = {'7': (0, 0), '8': (0, 1), '9': (0, 2), '4': (1, 0), '5': (1, 1), '6': (1, 2), '1': (2, 0), '2': (2, 1), '3': (2, 2)}
    if int_choice in num_pad_map:
        return True, num_pad_map[int_choice]
    else:
        return False

def make_move(move, board, player):
    row, col = move
    if board[row, col] == EMPTY:
        board[row, col] = player
    else:
        print("That square is already taken!")
        move = get_human_move(player)  # Ask for input again
        make_move(move, board, player)
    return board

def check_win(board):
    # Check rows
    for row in board:
        if row[0] == row[1] == row[2] and row[0] != EMPTY:
            return True, row[0]

    # Check columns
    for col in range(3):
        if board[0][col] == board[1][col] == board[2][col] and board[0][col] != EMPTY:
            return True, board[0][col]

    # Check diagonals
    if board[0][0] == board[1][1] == board[2][2] and board[0][0] != EMPTY:
        return True, board[0][0]
    if board[0][2] == board[1][1] == board[2][0] and board[0][2] != EMPTY:
        return True, board[0][2]

    # Check draw
    if all(board[i][j] != EMPTY for i in range(3) for j in range(3)):
        return True, 'Draw'
    return False, None

def state_to_index(state):
    mapping = {'X': 1, 'O': -1, EMPTY: 0}
    index = 0
    for i, value in enumerate(state):
        index += (3 ** i) * mapping[value]
    return abs(index)

def get_action(legal_moves, Q_table, state_index, epsilon):
    if np.random.rand() < epsilon:
        action = np.random.choice(len(legal_moves))
        return legal_moves[action]
    else:
        best_action = None
        best_q_value = float('-inf')
        for action in legal_moves:
            action_index = action[0] * GRID_SIZE + action[1]
            q_value = Q_table[state_index][action_index]
            if q_value > best_q_value:
                best_q_value = q_value
                best_action = action
        return best_action

def opponent(moves):
    move = np.random.randint(len(moves))
    return moves[move]

def train_agent(episodes):
    q_table = np.zeros((3 ** (GRID_SIZE * GRID_SIZE), GRID_SIZE * GRID_SIZE))
    start_time = time.time()
    print("Training started...")

    for episode in tqdm(range(episodes), desc="Training Progress"):
        winner = False
        board = [[EMPTY] * GRID_SIZE for _ in range(GRID_SIZE)]
        turn = 0
        while not winner:
            legal_moves = get_legal_moves(board)
            state = tuple(np.array(board).flatten())
            state_index = state_to_index(state)

            if PLAYER[turn % 2] == 'X':
                action = get_action(legal_moves, q_table, state_index, epsilon)
                row, col = action
            else:
                row, col = opponent(legal_moves)

            board[row][col] = PLAYER[turn % 2]
            winner, result = check_win(board)

            if winner:
                if result == 'X':
                    reward = 1
                elif result == 'O':
                    reward = -1
                else:
                    reward = 0
            else:
                reward = 0

            next_state = tuple(np.array(board).flatten())
            next_state_index = state_to_index(next_state)
            action_index = row * GRID_SIZE + col
            q_table[state_index][action_index] += alpha * (reward + gamma * np.max(q_table[next_state_index]) - q_table[state_index][action_index])

            turn += 1

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Training time: {elapsed_time} seconds")
    return q_table

def save_q_table(q_table, filename):
    with open(filename, 'wb') as f:
        pickle.dump(q_table, f)

def load_q_table(filename):
    with open(filename, 'rb') as f:
        q_table = pickle.load(f)
    return q_table

def save_q_table_to_excel(q_table, filename):
    df = pd.DataFrame(q_table)
    df.to_excel(filename, index=False)
    print(df)

def play_game(q_table, wins, losses, draws):
    board = np.zeros((GRID_SIZE, GRID_SIZE), dtype=str)
    board.fill(EMPTY)
    game_over = False
    round = 0

    while not game_over:
        show_board(board)
        if PLAYER[round % 2] == 'X':
            state = tuple(np.array(board).flatten())
            state_index = state_to_index(state)
            move = get_action(get_legal_moves(board), q_table, state_index, 0)
            print(f"AI played at {move}")
        else:
            #move = get_human_move(PLAYER[round % 2])
            move = opponent(get_legal_moves(board))
        board = make_move(move, board, PLAYER[round % 2])
        game_over, result = check_win(board)
        round += 1

    show_board(board)
    if result == 'Draw':
        print("It's a draw!")
        draws += 1
    else:
        print(f"Player {result} wins!")
        if result == 'X':
            wins +=1
        else:
            losses +=1
    
    return (wins, losses, draws)

# Initialize Q-learning parameters
alpha = 0.3  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 1  # Exploration rate
episodes = 20000000  # number of episodes for training

# Train the agent
#q_table = train_agent(episodes)

# Save the Q-table
#save_q_table(q_table, 'q_table.pkl')
#save_q_table_to_excel(q_table, f'q_table_{episodes}_episodes.xlsx')

# Load the Q-table (if needed)
q_table = load_q_table('q_table.pkl')

wins, losses, draws = 0, 0, 0
# Play against the trained AI
for i in range(1000):
    wins, losses, draws = play_game(q_table, wins, losses, draws)



# Create a DataFrame
df = pd.DataFrame({
    'Result': ['Wins', 'Losses', 'Draws'],
    'Count': [wins, losses, draws]
})

# Print the DataFrame
print(df.to_string(index=False))