Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Mon Sep 9 20:30:29 2019
- @author: DASA0
- """
- # Upper Confidence Bound in Reinforcement Learning
- # Importing the libraries
- import numpy as np
- import matplotlib.pyplot as plt
- import pandas as pd
- # Importing the dataset
- dataset = pd.read_csv('Ads_CTR_Optimisation.csv')
- # Implementing Random Selection
- import math
- N = 10000
- d = 10
- ads_selected = []
- no_of_selections = [0]*d
- sum_rewards = [0]*d
- total_reward = 0
- for n in range(0, N):
- max_upper_bound = 0
- for i in range(0,d):
- if no_of_selections[i] > 0:
- avg_reward = sum_rewards[i] / no_of_selections[i]
- delta_i = math.sqrt(3/2 * math.log(n + 1) / no_of_selections[i])
- upper_bound = avg_reward + delta_i
- else:
- upper_bound = 1e400
- if upper_bound > max_upper_bound:
- max_upper_bound = upper_bound
- ad = i
- ads_selected.append(ad)
- no_of_selections[ad] = no_of_selections[ad] + 1
- reward = dataset.values[n, ad]
- sum_rewards[ad] = sum_rewards[ad] + reward
- total_reward = total_reward + reward
- # Visualising the results
- plt.hist(ads_selected)
- plt.title('Histogram of ads selections')
- plt.xlabel('Ads')
- plt.ylabel('Number of times each ad was selected')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement