• API
• FAQ
• Tools
• Archive
SHARE
TWEET # Untitled a guest Dec 13th, 2018 63 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. # Initialize state values
2. player = ['X','O',' ']
3. states_dict = {}
4. all_possible_states = [[list(i[0:3]),list(i[3:6]),list(i[6:10])] for i in itertools.product(player, repeat = 9)]
5. n_states = len(all_possible_states)
6. n_actions = 9   # 9 spaces
7. state_values_for_AI = np.full((n_states),0.0)
8. print("n_states = %i \nn_actions = %i"%(n_states, n_actions))
9.
10. for i in range(n_states):
11.     states_dict[i] = all_possible_states[i]
12.     winner, _ = check_current_state(states_dict[i])
13.     if winner == 'O':   # AI won
14.         state_values_for_AI[i] = 1
15.     elif winner == 'X':   # AI lost
16.         state_values_for_AI[i] = -1
17.
18. def update_state_value(curr_state_idx, next_state_idx, learning_rate):
19.     new_value = state_values_for_AI[curr_state_idx] + learning_rate*(state_values_for_AI[next_state_idx]  - state_values_for_AI[curr_state_idx])
20.     state_values_for_AI[curr_state_idx] = new_value
21.
22. def getBestMove(state, player, epsilon):
23.     '''
24.     Reinforcement Learning Algorithm
25.     '''
26.     moves = []
27.     curr_state_values = []
28.     empty_cells = []
29.     for i in range(3):
30.         for j in range(3):
31.             if state[i][j] is ' ':
32.                 empty_cells.append(i*3 + (j+1))
33.
34.     for empty_cell in empty_cells:
35.         moves.append(empty_cell)
36.         new_state = copy_game_state(state)
37.         play_move(new_state, player, empty_cell)
38.         next_state_idx = list(states_dict.keys())[list(states_dict.values()).index(new_state)]
39.         curr_state_values.append(state_values_for_AI[next_state_idx])
40.
41.     print('Possible moves = ' + str(moves))
42.     print('Move values = ' + str(curr_state_values))
43.     best_move_idx = np.argmax(curr_state_values)
44.
45.     if np.random.uniform(0,1) <= epsilon:       # Exploration
46.         best_move = random.choice(empty_cells)
47.         print('Agent decides to explore! Takes action = ' + str(best_move))
48.         epsilon *= 0.99
49.     else:     # Exploitation
50.         best_move = moves[best_move_idx]
51.         print('Agent decides to exploit! Takes action = ' + str(best_move))
52.     return best_move
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.
Not a member of Pastebin yet?