daily pastebin goal
70%
SHARE
TWEET

Untitled

a guest Dec 13th, 2018 62 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Initialize state values
  2. player = ['X','O',' ']
  3. states_dict = {}
  4. all_possible_states = [[list(i[0:3]),list(i[3:6]),list(i[6:10])] for i in itertools.product(player, repeat = 9)]
  5. n_states = len(all_possible_states)
  6. n_actions = 9   # 9 spaces
  7. state_values_for_AI = np.full((n_states),0.0)
  8. print("n_states = %i \nn_actions = %i"%(n_states, n_actions))
  9.  
  10. for i in range(n_states):
  11.     states_dict[i] = all_possible_states[i]
  12.     winner, _ = check_current_state(states_dict[i])
  13.     if winner == 'O':   # AI won
  14.         state_values_for_AI[i] = 1
  15.     elif winner == 'X':   # AI lost
  16.         state_values_for_AI[i] = -1
  17.  
  18. def update_state_value(curr_state_idx, next_state_idx, learning_rate):
  19.     new_value = state_values_for_AI[curr_state_idx] + learning_rate*(state_values_for_AI[next_state_idx]  - state_values_for_AI[curr_state_idx])
  20.     state_values_for_AI[curr_state_idx] = new_value
  21.  
  22. def getBestMove(state, player, epsilon):
  23.     '''
  24.     Reinforcement Learning Algorithm
  25.     '''    
  26.     moves = []
  27.     curr_state_values = []
  28.     empty_cells = []
  29.     for i in range(3):
  30.         for j in range(3):
  31.             if state[i][j] is ' ':
  32.                 empty_cells.append(i*3 + (j+1))
  33.    
  34.     for empty_cell in empty_cells:
  35.         moves.append(empty_cell)
  36.         new_state = copy_game_state(state)
  37.         play_move(new_state, player, empty_cell)
  38.         next_state_idx = list(states_dict.keys())[list(states_dict.values()).index(new_state)]
  39.         curr_state_values.append(state_values_for_AI[next_state_idx])
  40.        
  41.     print('Possible moves = ' + str(moves))
  42.     print('Move values = ' + str(curr_state_values))    
  43.     best_move_idx = np.argmax(curr_state_values)
  44.    
  45.     if np.random.uniform(0,1) <= epsilon:       # Exploration
  46.         best_move = random.choice(empty_cells)
  47.         print('Agent decides to explore! Takes action = ' + str(best_move))
  48.         epsilon *= 0.99
  49.     else:     # Exploitation
  50.         best_move = moves[best_move_idx]
  51.         print('Agent decides to exploit! Takes action = ' + str(best_move))
  52.     return best_move
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top