Guest User

Untitled

a guest
May 23rd, 2018
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.22 KB | None | 0 0
  1. import random
  2. import gym
  3. import numpy as np
  4. from collections import deque
  5. from keras.models import Sequential
  6. from keras.layers import Dense
  7. from keras.optimizers import Adam
  8.  
  9. EPISODES = 2000
  10.  
  11.  
  12. class DQNAgent:
  13. def __init__(self, state_size, action_size):
  14. self.state_size = state_size
  15. self.action_size = action_size
  16. self.memory = deque(maxlen=2000)
  17. self.gamma = 0.95 # discount rate
  18. self.epsilon = 1.0 # exploration rate
  19. self.epsilon_min = 0.01
  20. self.epsilon_decay = 0.995
  21. self.learning_rate = 0.001
  22. self.model = self._build_model()
  23.  
  24. def _build_model(self):
  25. # Neural Net for Deep-Q learning Model
  26. model = Sequential()
  27. model.add(Dense(24, input_dim=self.state_size, activation='relu'))
  28. model.add(Dense(24, activation='relu'))
  29. model.add(Dense(self.action_size, activation='linear'))
  30. model.compile(loss='mse',
  31. optimizer=Adam(lr=self.learning_rate))
  32. return model
  33.  
  34. def remember(self, state, action, reward, next_state, done):
  35. self.memory.append((state, action, reward, next_state, done))
  36.  
  37. def act(self, state):
  38. if np.random.rand() <= self.epsilon:
  39. return random.randrange(self.action_size)
  40. act_values = self.model.predict(state)
  41. return np.argmax(act_values[0]) # returns action
  42.  
  43. def replay(self, batch_size):
  44. minibatch = random.sample(self.memory, batch_size)
  45. for state, action, reward, next_state, done in minibatch:
  46. target = reward
  47. if not done:
  48. target = (reward + self.gamma *
  49. np.amax(self.model.predict(next_state)[0]))
  50. target_f = self.model.predict(state)
  51. target_f[0][action] = target
  52. self.model.fit(state, target_f, epochs=1, verbose=0)
  53. if self.epsilon > self.epsilon_min:
  54. self.epsilon *= self.epsilon_decay
  55.  
  56. def load(self, name):
  57. print('load weight')
  58. self.model.load_weights(name)
  59.  
  60. def save(self, name):
  61. print('save weight')
  62. self.model.save_weights(name)
  63.  
  64.  
  65.  
  66.  
  67.  
  68.  
  69. if __name__ == "__main__":
  70.  
  71. env = gym.make('CartPole-v0')
  72. state_size = env.observation_space.shape[0]
  73. action_size = env.action_space.n
  74. print('state_size', state_size)
  75. print('action_size', action_size)
  76.  
  77. agent = DQNAgent(state_size, action_size)
  78. #agent.load("./cartpole-dqn.h5")
  79. done = False
  80. batch_size = 32
  81.  
  82. for e in range(EPISODES):
  83. state = env.reset()
  84. state = np.reshape(state, [1, state_size])
  85.  
  86. for time in range(500):
  87. env.render()
  88. action = agent.act(state)
  89. next_state, reward, done, _ = env.step(action)
  90.  
  91. reward = reward if not done else -10
  92. next_state = np.reshape(next_state, [1, state_size])
  93. agent.remember(state, action, reward, next_state, done)
  94. state = next_state
  95. if done:
  96. print("episode: {}/{}, score: {}, e: {:.2}"
  97. .format(e, EPISODES, time, agent.epsilon))
  98. break
  99.  
  100. if len(agent.memory) > batch_size:
  101. agent.replay(batch_size)
  102.  
  103. #if e % 50 == 0:
  104. # agent.save("./cartpole-dqn.h5")
Add Comment
Please, Sign In to add comment