Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def update(self, states, actions, rewards, values):
- # Calculate values (or advantage) at outside of update process.
- advantage = reward - values
- action_probs = self.actor(states)
- selected_action_probs = action_probs[self.to_one_hot(actions)]
- neg_logs = - log(selected_action_probs)
- policy_loss = reduce_mean(neg_logs * advantages)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement