Untitled a guest Dec 13th, 2018

1. # Initialize state values
2. player = ['X','O',' ']
3. states_dict = {}
4. all_possible_states = [[list(i[0:3]),list(i[3:6]),list(i[6:10])] for i in itertools.product(player, repeat = 9)]
5. n_states = len(all_possible_states)
6. n_actions = 9   # 9 spaces
7. state_values_for_AI = np.full((n_states),0.0)
8. print("n_states = %i \nn_actions = %i"%(n_states, n_actions))
9.
10. for i in range(n_states):
11.     states_dict[i] = all_possible_states[i]
12.     winner, _ = check_current_state(states_dict[i])
13.     if winner == 'O':   # AI won
14.         state_values_for_AI[i] = 1
15.     elif winner == 'X':   # AI lost
16.         state_values_for_AI[i] = -1
17.
18. def update_state_value(curr_state_idx, next_state_idx, learning_rate):
19.     new_value = state_values_for_AI[curr_state_idx] + learning_rate*(state_values_for_AI[next_state_idx]  - state_values_for_AI[curr_state_idx])
20.     state_values_for_AI[curr_state_idx] = new_value
21.
22. def getBestMove(state, player, epsilon):
23.     '''
24.     Reinforcement Learning Algorithm
25.     '''
26.     moves = []
27.     curr_state_values = []
28.     empty_cells = []
29.     for i in range(3):
30.         for j in range(3):
31.             if state[i][j] is ' ':
32.                 empty_cells.append(i*3 + (j+1))
33.
34.     for empty_cell in empty_cells:
35.         moves.append(empty_cell)
36.         new_state = copy_game_state(state)
37.         play_move(new_state, player, empty_cell)
38.         next_state_idx = list(states_dict.keys())[list(states_dict.values()).index(new_state)]
39.         curr_state_values.append(state_values_for_AI[next_state_idx])
40.
41.     print('Possible moves = ' + str(moves))
42.     print('Move values = ' + str(curr_state_values))
43.     best_move_idx = np.argmax(curr_state_values)
44.
45.     if np.random.uniform(0,1) <= epsilon:       # Exploration
46.         best_move = random.choice(empty_cells)
47.         print('Agent decides to explore! Takes action = ' + str(best_move))
48.         epsilon *= 0.99
49.     else:     # Exploitation
50.         best_move = moves[best_move_idx]
51.         print('Agent decides to exploit! Takes action = ' + str(best_move))
52.     return best_move
