Advertisement
Guest User

Untitled

a guest
Jul 27th, 2017
48
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.42 KB | None | 0 0
  1. import random
  2. import math
  3.  
  4.  
  5. class Bandit:
  6. def __init__(self, n=10, exp_avg=0.0, exp_var=1.0, var=1.0):
  7. self.n = n
  8. self.exp_avg = exp_avg
  9. self.exp_var = exp_var
  10. self.var = var
  11. self.p = [random.normalvariate(exp_avg, exp_var) for i in range(n)]
  12.  
  13. def pull(self, arm):
  14. return random.normalvariate(self.p[arm], self.var)
  15.  
  16.  
  17. class GreedyMethod:
  18. def __init__(self, n=10, e=0.0):
  19. self.n = n
  20. self.e = e
  21. self.times = [0] * n
  22. self.values = [0] * n
  23.  
  24. def select(self):
  25. if random.random() < self.e:
  26. return random.randint(0, self.n - 1)
  27. else:
  28. return self.values.index(max(self.values))
  29.  
  30. def reflect(self, arm, value):
  31. self.times[arm] += 1
  32. self.values[arm] += (value - self.values[arm]) / self.times[arm]
  33.  
  34.  
  35. class SoftmaxMethod:
  36. def __init__(self, n=10, t=0.2):
  37. self.n = n
  38. self.t = t
  39. self.times = [0] * n
  40. self.values = [0] * n
  41.  
  42. def select(self):
  43. e = list(map(lambda v: math.exp(v / self.t), self.values))
  44. e_sum = sum(e)
  45. rand = random.random()
  46.  
  47. for i in range(self.n):
  48. if rand < e[i] / e_sum:
  49. return i
  50. else:
  51. rand -= e[i] / e_sum
  52.  
  53. def reflect(self, arm, value):
  54. self.times[arm] += 1
  55. self.values[arm] += (value - self.values[arm]) / self.times[arm]
  56.  
  57. # Number of loop
  58. count = 1000
  59.  
  60. # Init bandit
  61. bandit = Bandit(10)
  62.  
  63. # Greedy Method
  64. greedy = GreedyMethod(10)
  65.  
  66. for i in range(count):
  67. selected_arm = greedy.select()
  68. bandit_value = bandit.pull(selected_arm)
  69. greedy.reflect(selected_arm, bandit_value)
  70.  
  71. # Epsilon greedy method
  72. epsilon_greedy = GreedyMethod(10, 0.1)
  73.  
  74. for i in range(count):
  75. selected_arm = epsilon_greedy.select()
  76. bandit_value = bandit.pull(selected_arm)
  77. epsilon_greedy.reflect(selected_arm, bandit_value)
  78.  
  79. # SoftmaxMethod
  80. softmax = SoftmaxMethod(10, 0.4)
  81.  
  82. for i in range(count):
  83. selected_arm = softmax.select()
  84. bandit_value = bandit.pull(selected_arm)
  85. softmax.reflect(selected_arm, bandit_value)
  86.  
  87. print("greedy max: ", greedy.values.index(max(greedy.values)), max(greedy.values))
  88. print("epsilon greedy max: ", epsilon_greedy.values.index(max(epsilon_greedy.values)), max(epsilon_greedy.values))
  89. print("softmax max: ", softmax.values.index(max(softmax.values)), max(softmax.values))
  90. print("solution: ", bandit.p.index(max(bandit.p)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement