Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- init = tf.initialize_all_variables()
- # Set learning parameters
- y = .99
- e = 0.1
- num_episodes = 2000
- #create lists to contain total rewards and steps per episode
- jList = []
- rList = []
- with tf.Session() as sess:
- sess.run(init)
- for i in range(num_episodes):
- #Reset environment and get first new observation
- s = env.reset()
- rAll = 0
- d = False
- j = 0
- #The Q-Network
- while j < 99:
- j+=1
- #Choose an action by greedily (with e chance of random action) from the Q-network
- a,allQ = sess.run([predict,Qout],feed_dict={inputs1:np.identity(16)[s:s+1]})
- if np.random.rand(1) < e:
- a[0] = env.action_space.sample()
- #Get new state and reward from environment
- s1,r,d,_ = env.step(a[0])
- #Obtain the Q' values by feeding the new state through our network
- Q1 = sess.run(Qout,feed_dict={inputs1:np.identity(16)[s1:s1+1]})
- #Obtain maxQ' and set our target value for chosen action.
- maxQ1 = np.max(Q1)
- targetQ = allQ
- targetQ[0,a[0]] = r + y*maxQ1
- #Train our network using target and predicted Q values
- _,W1 = sess.run([updateModel,W],feed_dict={inputs1:np.identity(16)[s:s+1],nextQ:targetQ})
- rAll += r
- s = s1
- if d == True:
- #Reduce chance of random action as we train the model.
- e = 1./((i/50) + 10)
- break
- jList.append(j)
- rList.append(rAll)
- print "Percent of succesful episodes: " + str(sum(rList)/num_episodes) + "%"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement