Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class BanditSimulation(object):
- def __init__(self, k_arms, T, number_of_runs):
- self.T = T
- self.number_of_runs = number_of_runs
- self.k_arms = k_arms
- self.cumulative_regret = {"Thompson Sampling":np.zeros((T,number_of_runs)),
- "UCB":np.zeros((T,number_of_runs))}
- def run(self):
- for run_number in range(self.number_of_runs):
- mu_vector = np.random.uniform(size=self.k_arms)
- arms = [BernoulliArm(mu) for mu in mu_vector]
- ucb = UCB(self.k_arms)
- thompson_sampling = ThompsonSampling(self.k_arms)
- for t in range(self.T):
- ucb_chosen_arm = ucb.select_arm()
- ucb_reward = arms[ucb_chosen_arm].draw()
- ucb.update(ucb_chosen_arm, ucb_reward)
- ucb_regret = max(mu_vector) - ucb_reward
- thompson_sampling_chosen_arm = thompson_sampling.select_arm()
- thompson_sampling_reward = arms[thompson_sampling_chosen_arm].draw()
- thompson_sampling.update(thompson_sampling_chosen_arm, thompson_sampling_reward)
- thompson_sampling_regret = max(mu_vector) - thompson_sampling_reward
- if t==0:
- self.cumulative_regret["UCB"][t,run_number] = ucb_regret
- self.cumulative_regret["Thompson Sampling"][t,run_number] = thompson_sampling_regret
- else:
- self.cumulative_regret["UCB"][t,run_number] = ucb_regret + self.cumulative_regret["UCB"][t-1,run_number]
- self.cumulative_regret["Thompson Sampling"][t,run_number] = thompson_sampling_regret + self.cumulative_regret["Thompson Sampling"][t-1,run_number]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement