Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import matplotlib.pyplot as plt
- class RestlessBandit:
- def __init__(self, n_arms, alpha=1.0):
- self.n_arms = n_arms
- self.alpha = alpha
- self.t = 0
- self.rewards = [0.0] * n_arms
- self.plays = [0] * n_arms
- def play(self):
- self.t += 1
- ucb = [self.rewards[i] + self.alpha * np.sqrt(np.log(self.t) / self.plays[i]) for i in range(self.n_arms)]
- return np.argmax(ucb)
- def update(self, arm, reward):
- self.plays[arm] += 1
- self.rewards[arm] = ((self.plays[arm] - 1) * self.rewards[arm] + reward) / self.plays[arm]
- # Define the number of arms
- n_arms = 5
- # Create an instance of the RestlessBandit class
- bandit = RestlessBandit(n_arms)
- # Define the number of rounds to play
- n_rounds = 5000
- # Define the true expected rewards for each arm
- true_rewards = [0.1, 0.2, 0.3, 0.4, 0.5]
- # Store the results for each round
- results = []
- # Play the bandit for each round
- for i in range(n_rounds):
- # Choose an arm to play
- arm = bandit.play()
- # Simulate the reward for the chosen arm
- reward = np.random.normal(true_rewards[arm], 1.0)
- # Update the rewards for the chosen arm
- bandit.update(arm, reward)
- # Update the true expected rewards for each arm
- true_rewards = [reward + np.random.normal(0, 0.01) for reward in true_rewards]
- # Store the results for each round
- results.append(reward)
- # Plot the results
- plt.plot(results)
- plt.xlabel("Round")
- plt.ylabel("Reward")
- plt.title("Restless Bandit Problem")
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement