Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- rounds_count = data.shape[0]
- ad_types_count = data.shape[1]
- ads_selected = []
- numbers_of_selections = [0] * ad_types_count
- sums_of_rewards = [0] * ad_types_count
- total_reward = 0
- for round_index in range(0, rounds_count):
- selected_ad_index = 0
- max_upper_bound = 0
- for ad_index in range(0, ad_types_count):
- # If the ad was selected at least one time
- if (numbers_of_selections[ad_index]> 0):
- average_reward = sums_of_rewards[ad_index] / numbers_of_selections[ad_index]
- delta_i = math.sqrt(3/2 * math.log(round_index + 1) / numbers_of_selections[ad_index])
- upper_bound = average_reward + delta_i
- # Otherwise: for the first 10 rounds we select each ad type one time
- else:
- upper_bound = 1e400
- if (upper_bound > max_upper_bound):
- max_upper_bound = upper_bound
- selected_ad_index = ad_index
- ads_selected.append(selected_ad_index)
- numbers_of_selections[selected_ad_index] = numbers_of_selections[selected_ad_index] + 1
- reward = data.values[round_index, selected_ad_index]
- sums_of_rewards[selected_ad_index] = sums_of_rewards[selected_ad_index] + reward
- total_reward = total_reward + reward
- # Visualising the results
- plt.hist(ads_selected)
- plt.title("Histogram of ads selections")
- plt.xlabel("Ads")
- plt.ylabel("Number of times each ad was selected")
- plt.show()
Add Comment
Please, Sign In to add comment