Guest User

Untitled

a guest
Nov 12th, 2018
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.98 KB | None | 0 0
  1. import numpy as np
  2.  
  3. def calculate_discounted_return_backwards(value, gamma, normalize=True):
  4. ret = np.zeros(value.shape[0]) # initialize the array we hold our values in
  5. i = value.shape[0]-1 # starting at the end, heading backwards
  6. while i >= 0: # until we reach the start of the array
  7. ret[i] += value[i] # add the
  8. try: # we surround in a try catch in case we are at the start
  9. ret[i] += (ret[i+1] * gamma) # we add the previous return with a decay
  10. except: # if there was no ret[i+1]
  11. pass # should only occur for first element
  12. i -= 1 # move to the next element
  13.  
  14. cut_off = int(1./(1. - gamma))*2
  15. if normalize:
  16. return ret[:len(ret)-cut_off]*(1.-gamma)
  17. return ret[:len(ret)-cut_off]
Add Comment
Please, Sign In to add comment