Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def replay(self):
- if len(self.recent_memory) < minibatch_size:
- return
- minibatch = random.sample(self.recent_memory, minibatch_size)
- state_batch, q_values_batch = [], []
- for state, action, reward, next_state, done in minibatch:
- # Get predictions for all actions for the current state.
- q_values = self.model.predict(state)
- # If we're not done, add on the future predicted reward at the discounted rate.
- if done:
- q_values[0][action] = reward
- else:
- future_reward = np.amax(self.model.predict(next_state)[0])
- q_values[0][action] = reward + self.gamma * future_reward
- state_batch.append(state[0])
- q_values_batch.append(q_values[0])
- # Re-fit the model to move it closer to this newly calculated reward.
- self.model.fit(np.array(state_batch), np.array(q_values_batch))
- def replay(self):
- if len(self.recent_memory) < minibatch_size:
- return
- minibatch = random.sample(self.recent_memory, minibatch_size)
- for state, action, reward, next_state, done in minibatch:
- # Get predictions for all actions for the current state.
- q_values = self.model.predict(state)
- # If we're not done, add on the future predicted reward at the discounted rate.
- if done:
- q_values[0][action] = reward
- else:
- future_reward = np.amax(self.model.predict(next_state)[0])
- q_values[0][action] = reward + self.gamma * future_reward
- self.model.fit(state, q_values, epochs=1, verbose=0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement