Guest User

Untitled

a guest
Dec 13th, 2018
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.00 KB | None | 0 0
  1. # Initialize state values
  2. player = ['X','O',' ']
  3. states_dict = {}
  4. all_possible_states = [[list(i[0:3]),list(i[3:6]),list(i[6:10])] for i in itertools.product(player, repeat = 9)]
  5. n_states = len(all_possible_states)
  6. n_actions = 9 # 9 spaces
  7. state_values_for_AI = np.full((n_states),0.0)
  8. print("n_states = %i \nn_actions = %i"%(n_states, n_actions))
  9.  
  10. for i in range(n_states):
  11. states_dict[i] = all_possible_states[i]
  12. winner, _ = check_current_state(states_dict[i])
  13. if winner == 'O': # AI won
  14. state_values_for_AI[i] = 1
  15. elif winner == 'X': # AI lost
  16. state_values_for_AI[i] = -1
  17.  
  18. def update_state_value(curr_state_idx, next_state_idx, learning_rate):
  19. new_value = state_values_for_AI[curr_state_idx] + learning_rate*(state_values_for_AI[next_state_idx] - state_values_for_AI[curr_state_idx])
  20. state_values_for_AI[curr_state_idx] = new_value
  21.  
  22. def getBestMove(state, player, epsilon):
  23. '''
  24. Reinforcement Learning Algorithm
  25. '''
  26. moves = []
  27. curr_state_values = []
  28. empty_cells = []
  29. for i in range(3):
  30. for j in range(3):
  31. if state[i][j] is ' ':
  32. empty_cells.append(i*3 + (j+1))
  33.  
  34. for empty_cell in empty_cells:
  35. moves.append(empty_cell)
  36. new_state = copy_game_state(state)
  37. play_move(new_state, player, empty_cell)
  38. next_state_idx = list(states_dict.keys())[list(states_dict.values()).index(new_state)]
  39. curr_state_values.append(state_values_for_AI[next_state_idx])
  40.  
  41. print('Possible moves = ' + str(moves))
  42. print('Move values = ' + str(curr_state_values))
  43. best_move_idx = np.argmax(curr_state_values)
  44.  
  45. if np.random.uniform(0,1) <= epsilon: # Exploration
  46. best_move = random.choice(empty_cells)
  47. print('Agent decides to explore! Takes action = ' + str(best_move))
  48. epsilon *= 0.99
  49. else: # Exploitation
  50. best_move = moves[best_move_idx]
  51. print('Agent decides to exploit! Takes action = ' + str(best_move))
  52. return best_move
Add Comment
Please, Sign In to add comment