Guest User

Untitled

a guest
May 22nd, 2018
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.90 KB | None | 0 0
  1. import random
  2.  
  3. class EpsilonGreedy():
  4. def __init__(self, epsilon, counts, values):
  5. self.epsilon = epsilon # probability of explore
  6. self.counts = counts # number of pulls for each arms
  7. self.values = values # average amount of reward we've gotten from each arms
  8. return
  9.  
  10. def initialize(self, n_arms):
  11. self.counts = [0 for col in range(n_arms)]
  12. self.values = [0.0 for col in range(n_arms)]
  13. return
  14.  
  15. def ind_max(self, x):
  16. m = max(x)
  17. return x.index(m)
  18.  
  19. def select_arm(self):
  20. if random.random() > self.epsilon:
  21. return self.ind_max(self.values)
  22. else:
  23. return random.randrange(len(self.values))
  24.  
  25. def update(self, chosen_arm, reward):
  26. self.counts[chosen_arm] = self.counts[chosen_arm] + 1
  27. n = self.counts[chosen_arm]
  28.  
  29. value = self.values[chosen_arm]
  30. new_value = ((n-1) / float(n)) * value + (1 / float(n)) * reward # weighted average of the previously estimated value and the reward we just received
  31. self.values[chosen_arm] = new_value
  32. return
  33.  
  34.  
  35. class BernoulliArm():
  36. def __init__(self, p):
  37. self.p = p
  38.  
  39. def draw(self):
  40. if random.random() > self.p:
  41. return 0.0
  42. else:
  43. return 1.0
  44.  
  45.  
  46. def test_algorithm(algo, arms, num_sims, horizon):
  47. chosen_arms = [0.0 for i in range(num_sims * horizon)]
  48. rewards = [0.0 for i in range(num_sims * horizon)]
  49. cumulative_rewards = [0.0 for i in range(num_sims * horizon)]
  50. sim_nums = [0.0 for i in range(num_sims * horizon)]
  51. times = [0.0 for i in range(num_sims * horizon)]
  52.  
  53. for sim in range(num_sims):
  54. sim = sim + 1
  55. algo.initialize(len(arms))
  56.  
  57. for t in range(horizon):
  58. t = t + 1
  59. index = (sim -1) * horizon + t - 1 #???
  60. sim_nums[index] = sim
  61. times[index] = t
  62.  
  63. chosen_arm = algo.select_arm()
  64. chosen_arms[index] = chosen_arm
  65. reward = arms[chosen_arms[index]].draw()
  66. rewards[index] = reward
  67.  
  68. if t == 1:
  69. cumulative_rewards[index] = reward
  70. else:
  71. cumulative_rewards[index] = cumulative_rewards[index - 1] + reward
  72.  
  73. algo.update(chosen_arm, reward)
  74.  
  75. return [sim_nums, times, chosen_arms, rewards, cumulative_rewards]
  76.  
  77.  
  78. random.seed(1)
  79.  
  80. means = [0.1, 0.1, 0.1, 0.1, 0.9]
  81. n_arms = len(means)
  82. random.shuffle(means)
  83. arms = list(map(lambda mu: BernoulliArm(mu), means))
  84. # print("Best arm is "+ str(ind_max(means)))
  85.  
  86. f = open("~/algorithms/epsilon_greedy/standard_results.tsv", "w")
  87.  
  88. for epsilon in [0.1, 0.2, 0.3, 0.4, 0.5]:
  89. algo = EpsilonGreedy(epsilon, [], [])
  90. algo.initialize(n_arms)
  91. results = test_algorithm(algo, arms, 5000, 250)
  92. for i in range(len(results[0])):
  93. f.write(str(epsilon) + "\t")
  94. f.write("\t".join([str(results[j][i]) for j in range(len(results))]) + "\n")
  95.  
  96. f.close()
Add Comment
Please, Sign In to add comment