Advertisement
Guest User

Untitled

a guest
Jul 21st, 2019
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.56 KB | None | 0 0
  1. def replay(self):
  2. if len(self.recent_memory) < minibatch_size:
  3. return
  4.  
  5. minibatch = random.sample(self.recent_memory, minibatch_size)
  6.  
  7. state_batch, q_values_batch = [], []
  8.  
  9. for state, action, reward, next_state, done in minibatch:
  10. # Get predictions for all actions for the current state.
  11. q_values = self.model.predict(state)
  12.  
  13. # If we're not done, add on the future predicted reward at the discounted rate.
  14. if done:
  15. q_values[0][action] = reward
  16. else:
  17. future_reward = np.amax(self.model.predict(next_state)[0])
  18. q_values[0][action] = reward + self.gamma * future_reward
  19.  
  20. state_batch.append(state[0])
  21. q_values_batch.append(q_values[0])
  22.  
  23. # Re-fit the model to move it closer to this newly calculated reward.
  24. self.model.fit(np.array(state_batch), np.array(q_values_batch))
  25.  
  26. def replay(self):
  27. if len(self.recent_memory) < minibatch_size:
  28. return
  29.  
  30. minibatch = random.sample(self.recent_memory, minibatch_size)
  31.  
  32. for state, action, reward, next_state, done in minibatch:
  33. # Get predictions for all actions for the current state.
  34. q_values = self.model.predict(state)
  35.  
  36. # If we're not done, add on the future predicted reward at the discounted rate.
  37. if done:
  38. q_values[0][action] = reward
  39. else:
  40. future_reward = np.amax(self.model.predict(next_state)[0])
  41. q_values[0][action] = reward + self.gamma * future_reward
  42.  
  43. self.model.fit(state, q_values, epochs=1, verbose=0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement