Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Excerpt of DDPG agent object.
- class Agent():
- '''Interact with and learn from environment.'''
- def learn(self, experiences, gamma):
- """Update policy and value parameters using given batch of experience tuples.
- Q_targets = r + γ * critic_target(next_state, actor_target(next_state))
- where:
- actor_target(state) -> action
- critic_target(state, action) -> Q-value
- Params
- ======
- experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples
- gamma (float): discount factor
- """
- states, actions, rewards, next_states, dones = experiences
- # ---------------------------- update critic ---------------------------- #
- # Get predicted next-state actions and Q values from target models
- actions_next = self.actor_target(next_states)
- Q_targets_next = self.critic_target(next_states, actions_next)
- # Compute Q targets for current states (y_i)
- Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))
- # Compute critic loss
- Q_expected = self.critic_local(states, actions)
- critic_loss = F.mse_loss(Q_expected, Q_targets)
- # Minimize the loss
- self.critic_optimizer.zero_grad()
- critic_loss.backward()
- torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1) # clip gradient to max 1
- self.critic_optimizer.step()
- # ---------------------------- update actor ---------------------------- #
- # Compute actor loss
- actions_pred = self.actor_local(states)
- actor_loss = -self.critic_local(states, actions_pred).mean()
- # Minimize the loss
- self.actor_optimizer.zero_grad()
- actor_loss.backward()
- self.actor_optimizer.step()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement