Guest User

Untitled

a guest
Jun 23rd, 2018
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.83 KB | None | 0 0
  1. def calc_sil_policy_val_loss(self, batch):
  2. '''
  3. Calculate the SIL policy losses for actor and critic
  4. sil_policy_loss = -log_prob * max(R - v_pred, 0)
  5. sil_val_loss = norm(max(R - v_pred, 0)) / 2
  6. This is called on a randomly-sample batch from experience replay
  7. '''
  8. returns = math_util.calc_returns(batch, self.gamma)
  9. v_preds = self.calc_v(batch['states'])
  10. clipped_advs = torch.clamp(returns - v_preds, min=0.0)
  11. log_probs = self.calc_log_probs(batch)
  12.  
  13. sil_policy_loss = torch.mean(- log_probs * v_preds)
  14. sil_val_loss = torch.norm(clipped_advs ** 2) / 2
  15.  
  16. if torch.cuda.is_available() and self.net.gpu:
  17. sil_policy_loss = sil_policy_loss.cuda()
  18. sil_val_loss = sil_val_loss.cuda()
  19. return sil_policy_loss, sil_val_loss
Add Comment
Please, Sign In to add comment