Advertisement
Guest User

Untitled

a guest
Aug 3rd, 2017
129
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.12 KB | None | 0 0
  1. import gym
  2. import random
  3. import numpy as np
  4. import tflearn
  5. from tflearn.layers.core import input_data, dropout, fully_connected
  6. from tflearn.layers.estimator import regression
  7. from statistics import median, mean
  8. from collections import Counter
  9. import tensorflow as tf
  10. import sys
  11.  
  12. LR = 1e-3
  13. env = gym.make("CartPole-v0")
  14. env.reset()
  15. goal_steps = 500
  16. score_requirement = 70
  17. initial_games = 300
  18.  
  19. n_nodes_hl1 = 500
  20. n_nodes_hl2 = 500
  21. n_nodes_hl3 = 500
  22. n_classes = 2
  23. batch_size = 200
  24.  
  25.  
  26.  
  27. def create_randoms():
  28. for episode in range(5):
  29. env.reset()
  30. for t in range(goal_steps):
  31. #env.render()
  32. action = env.action_space.sample()
  33. observation, reward, done, info = env.step(action)
  34. if done:
  35. break
  36.  
  37.  
  38.  
  39. def create_population():
  40. training_data = []
  41. scores = []
  42. accepted_scores= []
  43. for iteration in range(initial_games):
  44.  
  45. score = 0
  46. game_memory = []
  47. prev_observation = []
  48. for _ in range(goal_steps):
  49. action = random.randrange(0,2)
  50. observation, reward, done, info = env.step(action)
  51.  
  52. if len(prev_observation) > 0:
  53. game_memory.append([prev_observation,action])
  54.  
  55. prev_observation = observation
  56. score += reward
  57. if done:
  58. break
  59. if score > score_requirement:
  60. accepted_scores.append(score)
  61. for data in game_memory:
  62. if data[1] == 1:
  63. output =[0,1]
  64. elif data[1] == 0:
  65. output = [1,0]
  66.  
  67. training_data.append([data[0],output])
  68. env.reset()
  69. scores.append(score)
  70.  
  71. training_data_save = np.array(training_data)
  72. np.save('saved.npy',training_data_save)
  73.  
  74. print('Average accepted score:',mean(accepted_scores))
  75. print('Median accepted score:',median(accepted_scores))
  76. print(Counter(accepted_scores))
  77.  
  78. return training_data
  79.  
  80. def neural_network_modelv2(data):
  81. # (input_data * weights) +biases
  82. hidden_1_layer = {'weights': tf.Variable(tf.random_normal([4, n_nodes_hl1])),
  83. 'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))}
  84.  
  85. hidden_2_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
  86. 'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))}
  87.  
  88. hidden_3_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
  89. 'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))}
  90.  
  91. output_layer = {'weights': tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
  92. 'biases': tf.Variable(tf.random_normal([n_classes])), }
  93. data = tf.cast(data, tf.float32)
  94. l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
  95. l1 = tf.nn.relu(l1)
  96.  
  97. l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
  98. l2 = tf.nn.relu(l2)
  99.  
  100. l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
  101. l3 = tf.nn.relu(l3)
  102.  
  103. output_layer = tf.matmul(l3, output_layer['weights']) + output_layer['biases']
  104.  
  105. return output_layer
  106.  
  107. def train_modelv2(training_data, model=False):
  108. x = tf.placeholder('float')
  109. y = tf.placeholder('float')
  110.  
  111. trainingX = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]))
  112. trainingY = [i[1] for i in training_data]
  113.  
  114.  
  115. nn = neural_network_modelv2(trainingX)
  116. mn = tf.nn.softmax_cross_entropy_with_logits(logits=nn, labels=y)
  117. cost = tf.reduce_mean(mn)
  118. optimizer = tf.train.AdamOptimizer().minimize(cost)
  119.  
  120. hm_epochs = 10
  121.  
  122. with tf.Session() as sess:
  123.  
  124. sess.run(tf.initialize_all_variables())
  125.  
  126. # Train
  127. for epoch in range(hm_epochs):
  128. epoch_loss = 0
  129. ca, c = sess.run([optimizer, cost], feed_dict={x: trainingX, y: trainingY})
  130. epoch_loss += c
  131. print('Training done')
  132.  
  133. scores = []
  134. choices = []
  135. for each_game in range(10):
  136. score = 0
  137. game_memory = []
  138. prev_obs = []
  139. env.reset()
  140. for _ in range(goal_steps):
  141. # env.render()
  142. if len(prev_obs) == 0:
  143. action = random.randrange(0, 2)
  144. else:
  145. action = (sess.run([nn], feed_dict={x: prev_obs.reshape(-1, len(prev_obs))})[0])
  146. print(action)
  147.  
  148.  
  149. choices.append(action)
  150.  
  151. new_observation, reward, done, info = env.step(action)
  152.  
  153. prev_obs = new_observation
  154. game_memory.append([new_observation, action])
  155. score += reward
  156.  
  157. if done: break
  158.  
  159. scores.append(score)
  160.  
  161. print('Average Score:', sum(scores) / len(scores))
  162. print('choice 1:{} choice 0:{}'.format(choices.count(1) / len(choices), choices.count(0) / len(choices)))
  163. print(score_requirement)
  164.  
  165. training_data = create_population()
  166. model = train_modelv2(training_data)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement