daily pastebin goal
66%
SHARE
TWEET

Untitled

a guest Jan 15th, 2019 65 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # %%writefile /tmp/rl1_3.py
  2. class UCB(object):
  3.  
  4.   def __init__(self, number_of_arms):
  5.     self._number_of_arms = number_of_arms
  6.     self.name = 'ucb'
  7.     self.reset()
  8.    
  9.   def UCB_parameter(self,c,numerator,denominator):
  10.     c*(np.sqrt(np.log(numerator)/denominator))
  11.  
  12.   def step(self, previous_action, reward):
  13.    
  14.     c=2
  15.    
  16.     if previous_action is None:
  17.       action = argmax(self._estimates)
  18.       return action
  19.      
  20.     if previous_action is not None:
  21.       self._counts[previous_action] += 1
  22.       r = reward
  23.       q_a = self._estimates[previous_action]
  24.       n_a = self._counts[previous_action]
  25.       self._estimates[previous_action] += (r - q_a)/n_a
  26.    
  27. #   action = argmax(self._estimates + UCB_parameter(self,c,np.sum(counts),counts[previous_action]))
  28.     action = argmax(self._estimates + (c*(np.sqrt(np.log(np.sum(self._counts))/self._counts[previous_action]))))      
  29.                    
  30.     return action
  31.      
  32.  
  33.   def reset(self):
  34.     self._estimates = np.zeros((self._number_of_arms,))
  35.     self._counts = np.zeros((self._number_of_arms,))
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top