Advertisement
Guest User

Untitled

a guest
Jan 19th, 2017
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.33 KB | None | 0 0
  1. import gym
  2. from gym import wrappers
  3. import numpy as np
  4.  
  5. env = gym.make("FrozenLake-v0")
  6. env = wrappers.Monitor(env, "/tmp/gym-results", force=True)
  7.  
  8. # initialize Q-Table
  9. Q = np.zeros([env.observation_space.n,env.action_space.n])
  10.  
  11. # set learning parameter
  12. lr = .75
  13. y = 0.99
  14. num_episodes = 3000
  15.  
  16. # create lists to contain total rewards and steps per episode
  17.  
  18. rList = []
  19. sList = []
  20.  
  21. for i in range(num_episodes):
  22. # Reset environment and get first new observation
  23. s = env.reset()
  24. rAll = 0
  25. d = False
  26. j = 0
  27. sList=[]
  28. # The Q-Table learning algorithm
  29. while not d and j<250:
  30. j+=1
  31. # Choose an action by greedily (with noise) picking from Q table
  32. a = np.argmax(Q[s,:] + np.random.randn(1,env.action_space.n)*(5./(i+1)))
  33.  
  34. # Get new state and reward from environment
  35. s1,r,d,_ = env.step(a)
  36.  
  37. # Get negative reward every step
  38. if r==0 :
  39. r=-0.001
  40.  
  41. # Q-Learning
  42. Q[s,a]= Q[s,a]+lr*(r+y* np.max(Q[s1,:])-Q[s,a])
  43. s=s1
  44. rAll=rAll+r
  45. sList.append(s)
  46.  
  47. rList.append(rAll)
  48. if r==1 :
  49. print(sList)
  50. print("Episode {} finished after {} timesteps with r={}. Running score: {}".format(i, j,rAll ,np.mean(rList)))
  51.  
  52.  
  53. env.close()
  54.  
  55. print ("Final Q-Table Values")
  56. print (" left down right up")
  57. print (Q)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement