Advertisement
Guest User

Untitled

a guest
Jan 15th, 2019
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.07 KB | None | 0 0
  1. # %%writefile /tmp/rl1_3.py
  2. class UCB(object):
  3.  
  4. def __init__(self, number_of_arms):
  5. self._number_of_arms = number_of_arms
  6. self.name = 'ucb'
  7. self.reset()
  8.  
  9. def UCB_parameter(self,c,numerator,denominator):
  10. c*(np.sqrt(np.log(numerator)/denominator))
  11.  
  12. def step(self, previous_action, reward):
  13.  
  14. c=2
  15.  
  16. if previous_action is None:
  17. action = argmax(self._estimates)
  18. return action
  19.  
  20. if previous_action is not None:
  21. self._counts[previous_action] += 1
  22. r = reward
  23. q_a = self._estimates[previous_action]
  24. n_a = self._counts[previous_action]
  25. self._estimates[previous_action] += (r - q_a)/n_a
  26.  
  27. # action = argmax(self._estimates + UCB_parameter(self,c,np.sum(counts),counts[previous_action]))
  28. action = argmax(self._estimates + (c*(np.sqrt(np.log(np.sum(self._counts))/self._counts[previous_action]))))
  29.  
  30. return action
  31.  
  32.  
  33. def reset(self):
  34. self._estimates = np.zeros((self._number_of_arms,))
  35. self._counts = np.zeros((self._number_of_arms,))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement