Guest User

Untitled

a guest
Dec 14th, 2018
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.82 KB | None | 0 0
  1. # imports:
  2. import gym
  3. import random
  4. import numpy as np
  5. import tflearn
  6. from tflearn.layers.core import input_data, dropout, fully_connected
  7. from tflearn.layers.estimator import regression
  8. from statistics import median, mean
  9. from collections import Counter
  10.  
  11. # learning rate, this can be changed empirically:
  12. LR = 1e-3
  13. # import the env from gym:
  14. env = gym.make("CartPole-v0")
  15. # starts a clean slate:
  16. env.reset()
  17. # steps would be actually frames..
  18. goal_steps = 200
  19. # learn from episodes that have more than this score:
  20. score_requirement = 50
  21. # num of games:
  22. initial_games = 1000
  23.  
  24.  
  25.  
  26. def some_random_games_first():
  27. # Each of these is its own game.
  28. for episode in range(5):
  29. env.reset()
  30. # this is each frame, up to 200...but we wont make it that far.
  31. for t in range(goal_steps):
  32. # This will display the environment
  33. # Only display if you really want to see it.
  34. # Takes much longer to display it. uncomment to see:
  35. env.render()
  36.  
  37. # Sample a random action. In this environment, the action can be 0 or 1, which is left or right.
  38. action = env.action_space.sample()
  39.  
  40. # this executes the environment with an action,
  41. # and returns the observation of the environment,
  42. # the reward, if the env is over, and other info.
  43. observation, reward, done, info = env.step(action)
  44. if done:
  45. break
  46.  
  47. # some_random_games_first()
  48.  
  49. # the game played:
  50. def initial_population():
  51. # random moves and observations for scores above score_requirement:
  52. training_data = []
  53. # all scores:
  54. scores = []
  55. # just the scores that met our threshold:
  56. accepted_scores = []
  57. # iterate through however many games we want:
  58. for _ in range(initial_games):
  59. score = 0
  60. # save scores for the game:
  61. game_memory = []
  62. # previous observation that we saw
  63. prev_observation = []
  64. # for each frame in 200
  65. for _ in range(goal_steps):
  66. # choose random action (0 or 1)
  67. # somewhat like action = env.action_space.sample() that we had before, but less general:
  68. action = random.randrange(0,2)
  69. # as before:
  70. observation, reward, done, info = env.step(action)
  71.  
  72. # notice that the observation is returned FROM the action
  73. # so we'll store the previous observation here, pairing
  74. # the prev observation to the action we'll take.
  75. if len(prev_observation) > 0 :
  76. # actually save into the game memory:
  77. game_memory.append([prev_observation, action])
  78. # since previous observation was saved corresponding to the aciton, update to current observation:
  79. prev_observation = observation
  80. #
  81. score+=reward
  82. if done: break
  83.  
  84. # save scores that met the target score i.e. score_requirement:
  85. if score >= score_requirement:
  86. accepted_scores.append(score)
  87. for data in game_memory:
  88. # while this game is binary (move left or right) for its actions,
  89. # it is better to write a more generalized version to enable reacher games:
  90. # convert to one-hot (this is the output layer for our neural network) the left or right move:
  91. if data[1] == 1:
  92. # so this would be the output layer:
  93. output = [0,1]
  94. elif data[1] == 0:
  95. output = [1,0]
  96. # saving our training data, which is observations and output layer generated:
  97. training_data.append([data[0], output])
  98.  
  99. # reset env to play again
  100. env.reset()
  101. # save overall scores
  102. scores.append(score)
  103.  
  104. # save the overall training data for logging purposes.
  105. # this is absolutely optional:
  106. training_data_save = np.array(training_data)
  107. np.save('training_data_save.npy',training_data_save)
  108.  
  109. # the average of the scores that were over the bar we set (sort of a benchmark):
  110. print('Average accepted score:',mean(accepted_scores))
  111. # same, but median:
  112. print('Median score for accepted scores:',median(accepted_scores))
  113. # count the num of accepted scores
  114. print(Counter(accepted_scores))
  115.  
  116. return training_data
  117.  
  118. # initial_population()
  119.  
  120. def neural_network_model(input_size):
  121. # input layer:
  122. network = input_data(shape=[None, input_size, 1], name='input')
  123. # fully connected layer, using relu and 128 nodes:
  124. network = fully_connected(network, 128, activation='relu')
  125. # dropout to improve performance (optional):
  126. # the .5 is the dropout rate; I've read that .5 is recommended but this should be empirically tested:
  127. network = dropout(network, 0.5)
  128. # layers, layers...
  129. network = fully_connected(network, 256, activation='relu')
  130. network = dropout(network, 0.8)
  131.  
  132. network = fully_connected(network, 512, activation='relu')
  133. network = dropout(network, 0.5)
  134.  
  135. network = fully_connected(network, 256, activation='relu')
  136. network = dropout(network, 0.5)
  137.  
  138. network = fully_connected(network, 128, activation='relu')
  139. network = dropout(network, 0.5)
  140. # fully connected to wrap up an output:
  141. network = fully_connected(network, 2, activation='softmax')
  142. # squeeze through regression to get a tangible output:
  143. network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')
  144. model = tflearn.DNN(network, tensorboard_dir='log')
  145.  
  146. return model
  147.  
  148. # mode can also be imported instead of False (otherwise, will be created)
  149. def train_model(training_data, model=False):
  150. # parse observations from training data and reshape them so they can be squeezed onto the network:
  151. X = np.array([i[0] for i in training_data]).reshape(-1,len(training_data[0][0]),1)
  152. # actions:
  153. y = [i[1] for i in training_data]
  154.  
  155. if not model:
  156. # if model does not alreayd exist, create it:
  157. model = neural_network_model(input_size = len(X[0]))
  158. # fit the model:
  159. model.fit({'input': X}, {'targets': y}, n_epoch=7, snapshot_step=500, show_metric=True, run_id='openai_learning')
  160. return model
  161.  
  162. # that's somewhat unnecessary renaming:
  163. training_data = initial_population()
  164. # model is actuayll a trained model here:
  165. model = train_model(training_data)
  166.  
  167. # I might have done instead the shorter version:
  168. # model = train_model(initial_population())
  169. # there is no right or wrong, it's a matter of style I think.
  170.  
  171. # save the model:
  172. model.save('notGreat.model')
  173.  
  174. #################################### actually play the game: ####################################
  175.  
  176. scores = []
  177. choices = []
  178. # play 10 games:
  179. for each_game in range(10):
  180. # same setup:
  181. score = 0
  182. game_memory = []
  183. prev_obs = []
  184. env.reset()
  185. for _ in range(goal_steps):
  186. # comment out if speed is importnat:
  187. env.render()
  188. # choose randomly for the first time:
  189. if len(prev_obs)==0:
  190. action = random.randrange(0,2)
  191. else:
  192. # get the maximum of one-hot of the prediction based on the previous step:
  193. action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0])
  194.  
  195. # I would refarctor here once again to something like:
  196. # prediction = model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0]
  197. # action = np.argmax(prediction)
  198.  
  199. choices.append(action)
  200.  
  201. new_observation, reward, done, info = env.step(action)
  202. prev_obs = new_observation
  203.  
  204. game_memory.append([new_observation, action])
  205. score+=reward
  206. if done: break
  207.  
  208. scores.append(score)
  209.  
  210. # different matrics:
  211. print('Average Score:',sum(scores)/len(scores))
  212. print('choice 1:{} choice 0:{}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices)))
  213. print(score_requirement)
Add Comment
Please, Sign In to add comment