Advertisement
Guest User

Untitled

a guest
Apr 26th, 2017
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.65 KB | None | 0 0
  1. def evaluate(env, nn, sess, weights_values, render=False):
  2.     state = env.reset()
  3.     reward_sum = 0
  4.     done = False
  5.  
  6.     # if this was in the loop, took a lot of time. Why?
  7.     y = nn.forward(nn.x, nn.weights)
  8.     prediction = tf.nn.sigmoid(y)
  9.     action_node = tf.greater(prediction, tf.random_uniform(tf.shape(prediction)))
  10.     d = dict(zip(nn.weights, weights_values)) # weights
  11.  
  12.     while not done:
  13.         if render:
  14.             env.render()
  15.         d[nn.x] = [state] # input
  16.         action = sess.run(action_node, d)[0][0]
  17.         state, reward, done, _ = env.step(action)
  18.         reward_sum = reward_sum + reward
  19.  
  20.     return reward_sum
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement