Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # %%writefile /tmp/rl1_3.py
- class UCB(object):
- def __init__(self, number_of_arms):
- self._number_of_arms = number_of_arms
- self.name = 'ucb'
- self.reset()
- def UCB_parameter(self,c,numerator,denominator):
- c*(np.sqrt(np.log(numerator)/denominator))
- def step(self, previous_action, reward):
- c=2
- if previous_action is None:
- action = argmax(self._estimates)
- return action
- if previous_action is not None:
- self._counts[previous_action] += 1
- r = reward
- q_a = self._estimates[previous_action]
- n_a = self._counts[previous_action]
- self._estimates[previous_action] += (r - q_a)/n_a
- # action = argmax(self._estimates + UCB_parameter(self,c,np.sum(counts),counts[previous_action]))
- action = argmax(self._estimates + (c*(np.sqrt(np.log(np.sum(self._counts))/self._counts[previous_action]))))
- return action
- def reset(self):
- self._estimates = np.zeros((self._number_of_arms,))
- self._counts = np.zeros((self._number_of_arms,))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement