Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Fri Apr 28 15:50:45 2017
- @author: CrystalOng
- """
- import numpy as np
- import math as m
- import matplotlib.pyplot as plt
- def UCB(mu1,mu2,T):
- l = [0,0]
- u = [1,1]
- Ti = [0,0]
- X = [[0],[0]]
- v=0
- for i in range(1,T+1):
- X[0].append(np.random.binomial(1,mu1))
- X[1].append(np.random.binomial(1,mu2))
- ad = 0
- if(u[0]>u[1]):
- ad = 0
- elif(u[0]<u[1]):
- ad = 1
- else:
- ad = np.random.binomial(1,0.5)
- X[1-ad][i] = 0
- v = v+X[ad][i]
- Ti[ad] =Ti[ad]+1
- mub = sum(X[ad])/Ti[ad]
- u[ad] = mub + m.sqrt(2*(m.log(T,10))/Ti[ad])
- l[ad] = mub - m.sqrt(2*(m.log(T,10))/Ti[ad])
- return v
- def thomson(mu1,mu2,T):
- pi=0
- a = [1,1]
- b = [1,1]
- X = [0,0]
- for i in range(1,T+1):
- th1 = np.random.beta(a[0],b[0])
- th2 = np.random.beta(a[1],b[1])
- X = [np.random.binomial(1,mu1),np.random.binomial(1,mu2)]
- ad =0
- if(th1>th2):
- ad = 0
- elif(th1<th2):
- ad = 1
- else:
- ad = np.random.binomial(1,0.5)
- X[1-ad] = 0
- pi = pi+X[ad]
- a[ad] = a[ad]+X[ad]
- b[ad] = b[ad]+1-X[ad]
- return pi
- x = np.arange(0.01,0.31,0.01)
- mu1=0.5
- T=1000
- avreg = []
- avreg2 =[]
- #TEST ALL DELTAS
- for delta in x:
- reg = []
- reg2 = []
- #SIMULATE AND GET THE AVERAGE 1000 TIMES
- for i in range(500000):
- if(i%250000):
- print(i)
- v=UCB(mu1,mu1+delta,T)
- pi=thomson(mu1,mu1+delta,T)
- reg.append((T*(mu1+delta)-v))
- reg2.append(T*(mu1+delta)- pi)
- avreg.append(np.mean(reg))
- avreg2.append(np.mean(reg2))
- plt.plot(x,avreg,'red',x,avreg2,'blue')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement