Advertisement
brian_dot_casa

Untitled

Aug 26th, 2023
132
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.74 KB | Source Code | 0 0
  1. import numpy as np
  2. import torch
  3. import torch.nn as nn
  4. import torch.optim as optim
  5.  
  6. # Define the environment (grid world)
  7. # S: Start, G: Goal, #: Obstacle
  8. # Agent's goal is to reach G while avoiding obstacles
  9. # -----------
  10. # | S |     |
  11. # | # |  G  |
  12. # -----------
  13.  
  14. # Define the grid world as a numpy array
  15. env = np.array([["S", " ", "#"],
  16.                 [" ", " ", "G"]])
  17.  
  18. # Define actions (up, down, left, right)
  19. actions = ["UP", "DOWN", "LEFT", "RIGHT"]
  20. num_actions = len(actions)
  21.  
  22. # Define Q-learning parameters
  23. learning_rate = 0.1
  24. discount_factor = 0.9
  25. num_episodes = 1000
  26.  
  27. # Convert environment to a flat array
  28. env_flat = env.flatten()
  29.  
  30. # Q-learning table (Q-values for each state-action pair)
  31. q_table = np.zeros((env_flat.shape[0], num_actions))
  32.  
  33. # Q-learning algorithm
  34. for episode in range(num_episodes):
  35.     state = 0  # Starting state (index of "S")
  36.     done = False
  37.    
  38.     while not done:
  39.         # Choose an action using epsilon-greedy policy
  40.         epsilon = 0.1
  41.         if np.random.rand() < epsilon:
  42.             action = np.random.choice(num_actions)
  43.         else:
  44.             action = np.argmax(q_table[state, :])
  45.        
  46.         # Perform the action and observe the next state and reward
  47.         next_state = state + 1 if action == 3 else state - 1  # Move right or left
  48.         reward = 1 if env_flat[next_state] == "G" else 0
  49.        
  50.         # Update Q-value using the Q-learning update rule
  51.         q_table[state, action] = q_table[state, action] + learning_rate * \
  52.                                 (reward + discount_factor * np.max(q_table[next_state, :]) - q_table[state, action])
  53.        
  54.         state = next_state
  55.         done = (env_flat[state] == "G")
  56.    
  57. print("Q-table:")
  58. print(q_table)
  59.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement