Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- class EpsilonGreedy():
- def __init__(self, epsilon, counts, values):
- self.epsilon = epsilon # probability of explore
- self.counts = counts # number of pulls for each arms
- self.values = values # average amount of reward we've gotten from each arms
- return
- def initialize(self, n_arms):
- self.counts = [0 for col in range(n_arms)]
- self.values = [0.0 for col in range(n_arms)]
- return
- def ind_max(self, x):
- m = max(x)
- return x.index(m)
- def select_arm(self):
- if random.random() > self.epsilon:
- return self.ind_max(self.values)
- else:
- return random.randrange(len(self.values))
- def update(self, chosen_arm, reward):
- self.counts[chosen_arm] = self.counts[chosen_arm] + 1
- n = self.counts[chosen_arm]
- value = self.values[chosen_arm]
- new_value = ((n-1) / float(n)) * value + (1 / float(n)) * reward # weighted average of the previously estimated value and the reward we just received
- self.values[chosen_arm] = new_value
- return
- class BernoulliArm():
- def __init__(self, p):
- self.p = p
- def draw(self):
- if random.random() > self.p:
- return 0.0
- else:
- return 1.0
- def test_algorithm(algo, arms, num_sims, horizon):
- chosen_arms = [0.0 for i in range(num_sims * horizon)]
- rewards = [0.0 for i in range(num_sims * horizon)]
- cumulative_rewards = [0.0 for i in range(num_sims * horizon)]
- sim_nums = [0.0 for i in range(num_sims * horizon)]
- times = [0.0 for i in range(num_sims * horizon)]
- for sim in range(num_sims):
- sim = sim + 1
- algo.initialize(len(arms))
- for t in range(horizon):
- t = t + 1
- index = (sim -1) * horizon + t - 1 #???
- sim_nums[index] = sim
- times[index] = t
- chosen_arm = algo.select_arm()
- chosen_arms[index] = chosen_arm
- reward = arms[chosen_arms[index]].draw()
- rewards[index] = reward
- if t == 1:
- cumulative_rewards[index] = reward
- else:
- cumulative_rewards[index] = cumulative_rewards[index - 1] + reward
- algo.update(chosen_arm, reward)
- return [sim_nums, times, chosen_arms, rewards, cumulative_rewards]
- random.seed(1)
- means = [0.1, 0.1, 0.1, 0.1, 0.9]
- n_arms = len(means)
- random.shuffle(means)
- arms = list(map(lambda mu: BernoulliArm(mu), means))
- # print("Best arm is "+ str(ind_max(means)))
- f = open("~/algorithms/epsilon_greedy/standard_results.tsv", "w")
- for epsilon in [0.1, 0.2, 0.3, 0.4, 0.5]:
- algo = EpsilonGreedy(epsilon, [], [])
- algo.initialize(n_arms)
- results = test_algorithm(algo, arms, 5000, 250)
- for i in range(len(results[0])):
- f.write(str(epsilon) + "\t")
- f.write("\t".join([str(results[j][i]) for j in range(len(results))]) + "\n")
- f.close()
Add Comment
Please, Sign In to add comment