Advertisement
Guest User

Untitled

a guest
May 19th, 2019
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.81 KB | None | 0 0
  1. class BanditSimulation(object):
  2.  
  3. def __init__(self, k_arms, T, number_of_runs):
  4. self.T = T
  5. self.number_of_runs = number_of_runs
  6. self.k_arms = k_arms
  7. self.cumulative_regret = {"Thompson Sampling":np.zeros((T,number_of_runs)),
  8. "UCB":np.zeros((T,number_of_runs))}
  9.  
  10. def run(self):
  11.  
  12. for run_number in range(self.number_of_runs):
  13.  
  14. mu_vector = np.random.uniform(size=self.k_arms)
  15.  
  16. arms = [BernoulliArm(mu) for mu in mu_vector]
  17.  
  18. ucb = UCB(self.k_arms)
  19.  
  20. thompson_sampling = ThompsonSampling(self.k_arms)
  21.  
  22. for t in range(self.T):
  23.  
  24. ucb_chosen_arm = ucb.select_arm()
  25. ucb_reward = arms[ucb_chosen_arm].draw()
  26. ucb.update(ucb_chosen_arm, ucb_reward)
  27. ucb_regret = max(mu_vector) - ucb_reward
  28.  
  29. thompson_sampling_chosen_arm = thompson_sampling.select_arm()
  30. thompson_sampling_reward = arms[thompson_sampling_chosen_arm].draw()
  31. thompson_sampling.update(thompson_sampling_chosen_arm, thompson_sampling_reward)
  32. thompson_sampling_regret = max(mu_vector) - thompson_sampling_reward
  33.  
  34. if t==0:
  35. self.cumulative_regret["UCB"][t,run_number] = ucb_regret
  36. self.cumulative_regret["Thompson Sampling"][t,run_number] = thompson_sampling_regret
  37. else:
  38. self.cumulative_regret["UCB"][t,run_number] = ucb_regret + self.cumulative_regret["UCB"][t-1,run_number]
  39. self.cumulative_regret["Thompson Sampling"][t,run_number] = thompson_sampling_regret + self.cumulative_regret["Thompson Sampling"][t-1,run_number]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement