Advertisement
Guest User

Untitled

a guest
Jun 22nd, 2018
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.56 KB | None | 0 0
  1. import numpy as np
  2. import gym
  3. import random
  4.  
  5. # Setup Env
  6. env = gym.make('Taxi-v2')
  7.  
  8. # Params
  9. maxEpisodes = 1000
  10. maxSteps = 5000
  11. epsilon = 1.0
  12. degradeRate = 0.01
  13. gamma = 0.9
  14. alpha = 1.0
  15.  
  16. # Setup the qTable
  17. qTable = np.zeros((env.observation_space.n, env.action_space.n))
  18.  
  19. # Update qTable
  20. def updateQTable(state, action, reward, statePrime, Terminal):
  21.  
  22. maxTerm = max(qTable[statePrime])
  23. if Terminal:
  24. print("Terminal")
  25. maxTerm = 0
  26.  
  27. qTable[state][action] = ((1 - alpha) * qTable[state][action]) + alpha * (reward + gamma * maxTerm)
  28.  
  29. # Generate Action
  30. def generateAction(currentState):
  31. # Epsilon Greedy
  32. # Random action with probability epsilon
  33. # Best known action with probability 1-epsilon
  34. if random.random() < epsilon:
  35. return env.action_space.sample()
  36. else:
  37. return np.argmax(qTable[currentState])
  38.  
  39. # Run Learner
  40. for episode in range(maxEpisodes): # Loop for each episode
  41. state = env.reset() # Initialize S
  42.  
  43. epsilon = epsilon - degradeRate # Degrade Epsilon
  44.  
  45. terminal = False
  46.  
  47. for step in range(maxSteps): # Loop for each step of episode
  48. action = generateAction(state) # Choose A from S using policy derived from Q
  49.  
  50. statePrime, reward, done, info = env.step(action) # Take action A, observe R, S'
  51.  
  52. if reward == 20:
  53. terminal = True
  54.  
  55. updateQTable(state, action, reward, statePrime, terminal) # Update Q table
  56.  
  57. if terminal:
  58. break
  59.  
  60. state = statePrime
  61.  
  62. print(qTable)
  63. print(qTable[462][4])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement