Advertisement
Guest User

Untitled

a guest
Jul 25th, 2017
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.21 KB | None | 0 0
  1. """
  2. __name__ = predict.py
  3. __author__ = Yash Patel
  4. __description__ = Full prediction code of OpenAI Cartpole environment using Keras
  5. """
  6.  
  7. import gym
  8. import numpy as np
  9. from keras.models import Sequential
  10. from keras.layers import Dense, Dropout
  11.  
  12. def gather_data(env):
  13. num_trials = 10000
  14. min_score = 50
  15. sim_steps = 500
  16. trainingX, trainingY = [], []
  17.  
  18. scores = []
  19. for _ in range(num_trials):
  20. observation = env.reset()
  21. score = 0
  22. training_sampleX, training_sampleY = [], []
  23. for step in range(sim_steps):
  24. # action corresponds to the previous observation so record before step
  25. action = np.random.randint(0, 2)
  26. one_hot_action = np.zeros(2)
  27. one_hot_action[action] = 1
  28. training_sampleX.append(observation)
  29. training_sampleY.append(one_hot_action)
  30.  
  31. observation, reward, done, _ = env.step(action)
  32. score += reward
  33. if done:
  34. break
  35. if score > min_score:
  36. scores.append(score)
  37. trainingX += training_sampleX
  38. trainingY += training_sampleY
  39.  
  40. trainingX, trainingY = np.array(trainingX), np.array(trainingY)
  41. print("Average: {}".format(np.mean(scores)))
  42. print("Median: {}".format(np.median(scores)))
  43. return trainingX, trainingY
  44.  
  45. def create_model():
  46. model = Sequential()
  47. model.add(Dense(128, input_shape=(4,), activation="relu"))
  48. model.add(Dropout(0.6))
  49.  
  50. model.add(Dense(256, activation="relu"))
  51. model.add(Dropout(0.6))
  52.  
  53. model.add(Dense(512, activation="relu"))
  54. model.add(Dropout(0.6))
  55.  
  56. model.add(Dense(256, activation="relu"))
  57. model.add(Dropout(0.6))
  58.  
  59. model.add(Dense(128, activation="relu"))
  60. model.add(Dropout(0.6))
  61. model.add(Dense(2, activation="softmax"))
  62.  
  63. model.compile(
  64. loss="categorical_crossentropy",
  65. optimizer="adam",
  66. metrics=["accuracy"])
  67. return model
  68.  
  69. def predict():
  70. env = gym.make("CartPole-v0")
  71. trainingX, trainingY = gather_data(env)
  72. model = create_model()
  73. model.fit(trainingX, trainingY, epochs=5)
  74.  
  75. scores = []
  76. num_trials = 50
  77. sim_steps = 500
  78. for _ in range(num_trials):
  79. observation = env.reset()
  80. score = 0
  81. for step in range(sim_steps):
  82. action = np.argmax(model.predict(observation.reshape(1,4)))
  83. observation, reward, done, _ = env.step(action)
  84. score += reward
  85. if done:
  86. break
  87. scores.append(score)
  88.  
  89. print(np.mean(scores))
  90.  
  91. if __name__ == "__main__":
  92. predict()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement