1. class BanditSimulation(object):
2.
3.     def __init__(self, k_arms, T, number_of_runs):
4.         self.T = T
5.         self.number_of_runs = number_of_runs
6.         self.k_arms = k_arms
7.         self.cumulative_regret = {"Thompson Sampling":np.zeros((T,number_of_runs)),
8.                                   "UCB":np.zeros((T,number_of_runs))}
9.
10.     def run(self):
11.
12.         for run_number in range(self.number_of_runs):
13.
14.             mu_vector = np.random.uniform(size=self.k_arms)
15.
16.             arms = [BernoulliArm(mu) for mu in mu_vector]
17.
18.             ucb = UCB(self.k_arms)
19.
20.             thompson_sampling = ThompsonSampling(self.k_arms)
21.
22.             for t in range(self.T):
23.
24.                 ucb_chosen_arm = ucb.select_arm()
25.                 ucb_reward = arms[ucb_chosen_arm].draw()
26.                 ucb.update(ucb_chosen_arm, ucb_reward)
27.                 ucb_regret = max(mu_vector) - ucb_reward
28.
29.                 thompson_sampling_chosen_arm = thompson_sampling.select_arm()
30.                 thompson_sampling_reward = arms[thompson_sampling_chosen_arm].draw()
31.                 thompson_sampling.update(thompson_sampling_chosen_arm, thompson_sampling_reward)
32.                 thompson_sampling_regret = max(mu_vector) - thompson_sampling_reward
33.
34.                 if t==0:
35.                     self.cumulative_regret["UCB"][t,run_number] = ucb_regret
36.                     self.cumulative_regret["Thompson Sampling"][t,run_number] = thompson_sampling_regret
37.                 else:
38.                     self.cumulative_regret["UCB"][t,run_number] = ucb_regret + self.cumulative_regret["UCB"][t-1,run_number]
39.                     self.cumulative_regret["Thompson Sampling"][t,run_number] = thompson_sampling_regret + self.cumulative_regret["Thompson Sampling"][t-1,run_number]
