SHARE
TWEET

Untitled

a guest May 19th, 2019 59 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. class BanditSimulation(object):
  2.    
  3.     def __init__(self, k_arms, T, number_of_runs):
  4.         self.T = T
  5.         self.number_of_runs = number_of_runs
  6.         self.k_arms = k_arms
  7.         self.cumulative_regret = {"Thompson Sampling":np.zeros((T,number_of_runs)),
  8.                                   "UCB":np.zeros((T,number_of_runs))}
  9.    
  10.     def run(self):
  11.        
  12.         for run_number in range(self.number_of_runs):
  13.            
  14.             mu_vector = np.random.uniform(size=self.k_arms)
  15.              
  16.             arms = [BernoulliArm(mu) for mu in mu_vector]
  17.            
  18.             ucb = UCB(self.k_arms)
  19.          
  20.             thompson_sampling = ThompsonSampling(self.k_arms)
  21.            
  22.             for t in range(self.T):
  23.                
  24.                 ucb_chosen_arm = ucb.select_arm()
  25.                 ucb_reward = arms[ucb_chosen_arm].draw()
  26.                 ucb.update(ucb_chosen_arm, ucb_reward)
  27.                 ucb_regret = max(mu_vector) - ucb_reward
  28.                
  29.                 thompson_sampling_chosen_arm = thompson_sampling.select_arm()
  30.                 thompson_sampling_reward = arms[thompson_sampling_chosen_arm].draw()
  31.                 thompson_sampling.update(thompson_sampling_chosen_arm, thompson_sampling_reward)
  32.                 thompson_sampling_regret = max(mu_vector) - thompson_sampling_reward
  33.  
  34.                 if t==0:
  35.                     self.cumulative_regret["UCB"][t,run_number] = ucb_regret
  36.                     self.cumulative_regret["Thompson Sampling"][t,run_number] = thompson_sampling_regret
  37.                 else:
  38.                     self.cumulative_regret["UCB"][t,run_number] = ucb_regret + self.cumulative_regret["UCB"][t-1,run_number]
  39.                     self.cumulative_regret["Thompson Sampling"][t,run_number] = thompson_sampling_regret + self.cumulative_regret["Thompson Sampling"][t-1,run_number]
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top