Advertisement
Guest User

Untitled

a guest
Apr 26th, 2017
180
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.61 KB | None | 0 0
  1. class NN:
  2.     def __init__(self, layers):
  3.         self.layers = layers
  4.         self.x = tf.placeholder(tf.float32, shape=[None, layers[0]])
  5.         self.weights = [tf.placeholder(tf.float32, shape=(n, m))
  6.                 for (n,m) in zip(layers[:-1],layers[1:])]
  7.         # one std per weight matrix
  8.         self.init_std = [1.0 for _ in range(len(layers)-1)]
  9.  
  10.     def init_weights(self, std):
  11.         weights = []
  12.         for i in range(len(self.layers)-1):
  13.             shape = (self.layers[i], self.layers[i+1])
  14.             weights.append(np.random.normal(0, std[i], shape))
  15.         return np.array(weights)
  16.  
  17.     def forward(self, x, W):
  18.         count = len(self.layers)-1
  19.         for i in range(count-1): # last pass without relu
  20.             x = tf.nn.relu(tf.matmul(x, W[i]))
  21.         x = tf.matmul(x, W[count-1])
  22.         return x
  23.  
  24. def evaluate(env, nn, sess, weights_values, render=False):
  25.     state = env.reset()
  26.     reward_sum = 0
  27.     done = False
  28.  
  29.     # if this was in the loop, took a lot of time. Why?
  30.     y = nn.forward(nn.x, nn.weights)
  31.     prediction = tf.nn.sigmoid(y)
  32.     action_node = tf.greater(prediction, tf.constant(0.5))
  33.     d = dict(zip(nn.weights, weights_values)) # weights
  34.  
  35.     while not done:
  36.         if render:
  37.             env.render()
  38.         d[nn.x] = [state] # input
  39.         action = sess.run(action_node, d)[0][0]
  40.         state, reward, done, _ = env.step(action)
  41.         reward_sum = reward_sum + reward
  42.  
  43.     return reward_sum
  44.            
  45.  
  46. def CEM(env, nn, sess, iterations=10, k=100):
  47.     stddev = nn.init_std
  48.     for i in range(iterations):
  49.         print "iteration", i
  50.         weights_rewards = []
  51.         for i in range(k):
  52.             weights_values = nn.init_weights(stddev)
  53.             reward = evaluate(env, nn, sess, weights_values)
  54.             weights_rewards.append((weights_values, reward))
  55.  
  56.         weights_rewards.sort(key=lambda tup : tup[1], reverse=True)
  57.         best_rewards = [r for (w,r) in weights_rewards[:int(k/5)]]
  58.         best_weights = [w for (w,r) in weights_rewards[:int(k/5)]]
  59.  
  60.         print best_rewards
  61.         # For each weight matrix compute separate std
  62.         stddev = []
  63.         for i in range(len(nn.layers)-1):
  64.             weights_i = [weights[i] for weights in best_weights]
  65.             stddev.append(np.std(weights_i, axis = 0))
  66.  
  67.    
  68.     # Final evaluation
  69.     weights_values = nn.init_weights(stddev)
  70.     evaluate(env, nn, sess, weights_values, render=True)
  71.  
  72. env = gym.make('CartPole-v1')
  73. state = env.reset()
  74. input_shape = helpers.intprod(state.shape)
  75. nn = NN([input_shape, 10, 1])
  76. sess = tf.Session()
  77. CEM(env, nn, sess)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement