Guest User

Untitled

a guest
Jul 22nd, 2018
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.31 KB | None | 0 0
  1. import gym
  2. import numpy as np
  3. import time, pickle, os
  4.  
  5. env = gym.make('FrozenLake-v0')
  6.  
  7. epsilon = 0.9
  8. # min_epsilon = 0.1
  9. # max_epsilon = 1.0
  10. # decay_rate = 0.01
  11.  
  12. total_episodes = 10000
  13. max_steps = 100
  14.  
  15. lr_rate = 0.81
  16. gamma = 0.96
  17.  
  18. Q = np.zeros((env.observation_space.n, env.action_space.n))
  19.  
  20. def choose_action(state):
  21. action=0
  22. if np.random.uniform(0, 1) < epsilon:
  23. action = env.action_space.sample()
  24. else:
  25. action = np.argmax(Q[state, :])
  26. return action
  27.  
  28. def learn(state, state2, reward, action, action2):
  29. predict = Q[state, action]
  30. target = reward + gamma * Q[state2, action2]
  31. Q[state, action] = Q[state, action] + lr_rate * (target - predict)
  32.  
  33. # Start
  34. rewards=0
  35.  
  36. for episode in range(total_episodes):
  37. t = 0
  38. state = env.reset()
  39. action = choose_action(state)
  40.  
  41. while t < max_steps:
  42. env.render()
  43.  
  44. state2, reward, done, info = env.step(action)
  45.  
  46. action2 = choose_action(state2)
  47.  
  48. learn(state, state2, reward, action, action2)
  49.  
  50. state = state2
  51. action = action2
  52.  
  53. t += 1
  54. rewards+=1
  55.  
  56. if done:
  57. break
  58. # epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode)
  59. # os.system('clear')
  60. time.sleep(0.1)
  61.  
  62.  
  63. print ("Score over time: ", rewards/total_episodes)
  64. print(Q)
  65.  
  66. with open("frozenLake_qTable_sarsa.pkl", 'wb') as f:
  67. pickle.dump(Q, f)
Add Comment
Please, Sign In to add comment