Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class GLResourceDistribution(gym.Env):
- def __init__(self):
- self.stakes = {}
- self.ACTIONS = {}
- moves = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110]
- count = 0
- for i in moves:
- for j in moves:
- for k in moves:
- if (i + j + k >= 100) and (i + j + k == 110) and (i >= j) and (i >= k):
- local_stakes = [i + 5, j, k]
- self.ACTIONS[count] = local_stakes
- count += 1
- self.action_space = spaces.Discrete(count)
- self.observation_space = spaces.Box(low=np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
- high=np.array([1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0]))
- self.rew = 0
- self.rews = {}
- self.seed()
- self.steps = []
- self.is_done = {}
- self.in_second_lock = {}
- self.last_stakes = {}
- self.beta = 0.0005
- self.alpha = 1
- self.x = {}
- self.epoch = 0
- def seed(self, seed=None):
- self.np_random, seed = seeding.np_random(seed)
- return [seed]
- def get_action_space(self):
- return self.action_space
- def get_observation_space(self):
- return self.observation_space
- def get_rew(self):
- return self.rew
- def get_rew(self, id):
- return self.rews[id]
- def set_stake(self, id, action):
- self.stakes[id] = action
- def get_stakes_len(self):
- return len(self.stakes)
- def get_stakes(self):
- return self.stakes
- def clear_stakes(self):
- self.stakes = {}
- def get_last_stakes(self):
- return self.last_stakes
- def step(self, u):
- self.stakes[1] = self.ACTIONS[u]
- print (str(self.epoch) + " " + str(self.stakes[1]))
- self.epoch += 1
- second_stake = np.random.rand(3)
- third_stake = np.random.rand(3)
- second_stake = second_stake / sum(second_stake)
- third_stake = third_stake / sum(third_stake)
- second_stake *= 115
- third_stake *= 115
- self.stakes[2] = second_stake
- self.stakes[3] = third_stake
- self.last_stakes = self.stakes
- s_matrix = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
- ids = [1, 2, 3]
- for id in ids:
- s_matrix[id - 1][0] = self.stakes[id][0]
- s_matrix[id - 1][1] = self.stakes[id][1]
- s_matrix[id - 1][2] = self.stakes[id][2]
- self.x = {}
- for id in ids:
- payoff = (s_matrix[0][id - 1] + s_matrix[1][id - 1] + s_matrix[2][id - 1]) / 3
- self.x[id] = payoff
- penalty = {}
- for id in ids:
- penalty[id] = self.beta * (((self.x[1] - s_matrix[id - 1][0]) ** 2) + ((self.x[2] - s_matrix[id - 1][1]) ** 2) + ((self.x[3] - s_matrix[id - 1][2]) ** 2))
- penalty_sum = penalty[1] + penalty[2] + penalty[3]
- for id in ids:
- refund = penalty[id] - self.alpha * penalty_sum / 3
- r = 25
- if id == 1:
- r = 1
- if id == 2:
- r = 9
- self.rews[id] = np.sqrt(self.x[id] + r) - refund
- return self._get_obs(), self.x[1] / np.exp(penalty[1]), False, {}
- def reset(self):
- self.rews[1] = 0
- self.rews[2] = 0
- self.rews[3] = 0
- return np.array([0, 0, 0, 0, 0, 0])
- def _get_obs(self):
- return np.array([self.stakes[2][0], self.stakes[2][1], self.stakes[2][2], self.stakes[3][0], self.stakes[3][1], self.stakes[3][2]])
- def get_obs(self):
- return self._get_obs()
- def render(self, mode='human'):
- pass
- def close(self):
- if self.viewer: self.viewer.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement