Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class NN:
- def __init__(self, layers):
- self.layers = layers
- self.x = tf.placeholder(tf.float32, shape=[None, layers[0]])
- self.weights = [tf.placeholder(tf.float32, shape=(n, m))
- for (n,m) in zip(layers[:-1],layers[1:])]
- # one std per weight matrix
- self.init_std = [1.0 for _ in range(len(layers)-1)]
- def init_weights(self, std):
- weights = []
- for i in range(len(self.layers)-1):
- shape = (self.layers[i], self.layers[i+1])
- weights.append(np.random.normal(0, std[i], shape))
- return np.array(weights)
- def forward(self, x, W):
- count = len(self.layers)-1
- for i in range(count-1): # last pass without relu
- x = tf.nn.relu(tf.matmul(x, W[i]))
- x = tf.matmul(x, W[count-1])
- return x
- def evaluate(env, nn, sess, weights_values, render=False):
- state = env.reset()
- reward_sum = 0
- done = False
- # if this was in the loop, took a lot of time. Why?
- y = nn.forward(nn.x, nn.weights)
- prediction = tf.nn.sigmoid(y)
- action_node = tf.greater(prediction, tf.constant(0.5))
- d = dict(zip(nn.weights, weights_values)) # weights
- while not done:
- if render:
- env.render()
- d[nn.x] = [state] # input
- action = sess.run(action_node, d)[0][0]
- state, reward, done, _ = env.step(action)
- reward_sum = reward_sum + reward
- return reward_sum
- def CEM(env, nn, sess, iterations=10, k=100):
- stddev = nn.init_std
- for i in range(iterations):
- print "iteration", i
- weights_rewards = []
- for i in range(k):
- weights_values = nn.init_weights(stddev)
- reward = evaluate(env, nn, sess, weights_values)
- weights_rewards.append((weights_values, reward))
- weights_rewards.sort(key=lambda tup : tup[1], reverse=True)
- best_rewards = [r for (w,r) in weights_rewards[:int(k/5)]]
- best_weights = [w for (w,r) in weights_rewards[:int(k/5)]]
- print best_rewards
- # For each weight matrix compute separate std
- stddev = []
- for i in range(len(nn.layers)-1):
- weights_i = [weights[i] for weights in best_weights]
- stddev.append(np.std(weights_i, axis = 0))
- # Final evaluation
- weights_values = nn.init_weights(stddev)
- evaluate(env, nn, sess, weights_values, render=True)
- env = gym.make('CartPole-v1')
- state = env.reset()
- input_shape = helpers.intprod(state.shape)
- nn = NN([input_shape, 10, 1])
- sess = tf.Session()
- CEM(env, nn, sess)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement