Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def evaluate(env, nn, sess, weights_values, render=False):
- state = env.reset()
- reward_sum = 0
- done = False
- # if this was in the loop, took a lot of time. Why?
- y = nn.forward(nn.x, nn.weights)
- prediction = tf.nn.sigmoid(y)
- action_node = tf.greater(prediction, tf.random_uniform(tf.shape(prediction)))
- d = dict(zip(nn.weights, weights_values)) # weights
- while not done:
- if render:
- env.render()
- d[nn.x] = [state] # input
- action = sess.run(action_node, d)[0][0]
- state, reward, done, _ = env.step(action)
- reward_sum = reward_sum + reward
- return reward_sum
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement