Venciity

Upper Confidence Bound Implementation

Apr 26th, 2018
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.40 KB | None | 0 0
  1. rounds_count = data.shape[0]
  2. ad_types_count = data.shape[1]
  3. ads_selected = []
  4. numbers_of_selections = [0] * ad_types_count
  5. sums_of_rewards = [0] * ad_types_count
  6. total_reward = 0
  7.  
  8. for round_index in range(0, rounds_count):
  9.     selected_ad_index = 0
  10.     max_upper_bound = 0
  11.     for ad_index in range(0, ad_types_count):
  12.         # If the ad was selected at least one time
  13.         if (numbers_of_selections[ad_index]> 0):
  14.             average_reward = sums_of_rewards[ad_index] / numbers_of_selections[ad_index]
  15.             delta_i = math.sqrt(3/2 * math.log(round_index + 1) / numbers_of_selections[ad_index])
  16.             upper_bound = average_reward + delta_i
  17.         # Otherwise: for the first 10 rounds we select each ad type one time
  18.         else:
  19.             upper_bound = 1e400
  20.         if (upper_bound > max_upper_bound):
  21.             max_upper_bound = upper_bound
  22.             selected_ad_index = ad_index
  23.     ads_selected.append(selected_ad_index)
  24.     numbers_of_selections[selected_ad_index] = numbers_of_selections[selected_ad_index] + 1
  25.     reward = data.values[round_index, selected_ad_index]
  26.     sums_of_rewards[selected_ad_index] = sums_of_rewards[selected_ad_index] + reward
  27.     total_reward = total_reward + reward
  28.  
  29. # Visualising the results
  30. plt.hist(ads_selected)
  31. plt.title("Histogram of ads selections")
  32. plt.xlabel("Ads")
  33. plt.ylabel("Number of times each ad was selected")
  34. plt.show()
Add Comment
Please, Sign In to add comment