Advertisement
Guest User

Untitled

a guest
Mar 28th, 2020
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.64 KB | None | 0 0
  1. init = tf.initialize_all_variables()
  2.  
  3. # Set learning parameters
  4. y = .99
  5. e = 0.1
  6. num_episodes = 2000
  7. #create lists to contain total rewards and steps per episode
  8. jList = []
  9. rList = []
  10. with tf.Session() as sess:
  11. sess.run(init)
  12. for i in range(num_episodes):
  13. #Reset environment and get first new observation
  14. s = env.reset()
  15. rAll = 0
  16. d = False
  17. j = 0
  18. #The Q-Network
  19. while j < 99:
  20. j+=1
  21. #Choose an action by greedily (with e chance of random action) from the Q-network
  22. a,allQ = sess.run([predict,Qout],feed_dict={inputs1:np.identity(16)[s:s+1]})
  23. if np.random.rand(1) < e:
  24. a[0] = env.action_space.sample()
  25. #Get new state and reward from environment
  26. s1,r,d,_ = env.step(a[0])
  27. #Obtain the Q' values by feeding the new state through our network
  28. Q1 = sess.run(Qout,feed_dict={inputs1:np.identity(16)[s1:s1+1]})
  29. #Obtain maxQ' and set our target value for chosen action.
  30. maxQ1 = np.max(Q1)
  31. targetQ = allQ
  32. targetQ[0,a[0]] = r + y*maxQ1
  33. #Train our network using target and predicted Q values
  34. _,W1 = sess.run([updateModel,W],feed_dict={inputs1:np.identity(16)[s:s+1],nextQ:targetQ})
  35. rAll += r
  36. s = s1
  37. if d == True:
  38. #Reduce chance of random action as we train the model.
  39. e = 1./((i/50) + 10)
  40. break
  41. jList.append(j)
  42. rList.append(rAll)
  43. print "Percent of succesful episodes: " + str(sum(rList)/num_episodes) + "%"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement