Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Player:
- # ....
- def update_strategy(self):
- """
- Set the preference (strategy) of choosing an action to be proportional to positive regrets. e.g, a strategy that prefers PAPER can be [0.2, 0.6, 0.2]
- """
- self.strategy = np.copy(self.regret_sum)
- self.strategy[self.strategy < 0] = 0 # reset negative regrets to zero
- summation = sum(self.strategy)
- if summation > 0:
- # normalise
- self.strategy /= summation
- else:
- # uniform distribution to reduce exploitability
- self.strategy = np.repeat(1 / RPS.n_actions, RPS.n_actions)
- self.strategy_sum += self.strategy
- def learn_avg_strategy(self):
- # averaged strategy converges to Nash Equilibrium
- summation = sum(self.strategy_sum)
- if summation > 0:
- self.avg_strategy = self.strategy_sum / summation
- else:
- self.avg_strategy = np.repeat(1/RPS.n_actions, RPS.n_actions)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement