Advertisement
brian_dot_casa

Untitled

Aug 26th, 2023
137
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.82 KB | Source Code | 0 0
  1. import numpy as np
  2.  
  3. # Define the environment (grid world) with named spaces
  4. env = np.array([["S", "A", "#"],
  5.                 ["B", "C", "G"]])
  6.  
  7. # Define actions (up, down, left, right)
  8. actions = ["UP", "DOWN", "LEFT", "RIGHT"]
  9. num_actions = len(actions)
  10.  
  11. # Define Q-learning parameters
  12. learning_rate = 0.1
  13. discount_factor = 0.9
  14. num_episodes = 1000
  15.  
  16. # Convert environment to a flat array
  17. env_flat = env.flatten()
  18.  
  19. # Q-learning table (Q-values for each state-action pair)
  20. q_table = np.zeros((env_flat.shape[0], num_actions))
  21.  
  22. # Q-learning algorithm
  23. for episode in range(num_episodes):
  24.     state = 0  # Starting state (index of "S")
  25.     done = False
  26.    
  27.     while not done:
  28.         # Choose an action using epsilon-greedy policy
  29.         epsilon = 0.1
  30.         if np.random.rand() < epsilon:
  31.             action = np.random.choice(num_actions)
  32.         else:
  33.             action = np.argmax(q_table[state, :])
  34.        
  35.         # Perform the action and observe the next state and reward
  36.         next_state = state + 1 if action == 3 else state - 1  # Move right or left
  37.         reward = 1 if env_flat[next_state] == "G" else 0
  38.        
  39.         # Update Q-value using the Q-learning update rule
  40.         q_table[state, action] = q_table[state, action] + learning_rate * \
  41.                                 (reward + discount_factor * np.max(q_table[next_state, :]) - q_table[state, action])
  42.        
  43.         state = next_state
  44.         done = (env_flat[state] == "G")
  45.  
  46. print("Training complete.\n")
  47.  
  48. # Print the final Q-table values for each state-action pair
  49. for state in range(len(env_flat)):
  50.     state_name = env_flat[state]
  51.     for action in range(num_actions):
  52.         action_name = actions[action]
  53.         q_value = q_table[state, action]
  54.         print(f"State: {state_name}, Action: {action_name}, Q-value: {q_value}")
  55.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement