Advertisement
Guest User

Untitled

a guest
May 21st, 2018
164
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.76 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. """
  4. Train an agent on Sonic using PPO2 from OpenAI Baselines.
  5. """
  6.  
  7. import tensorflow as tf
  8.  
  9. from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
  10. import baselines.ppo2.ppo2 as ppo2
  11. import baselines.ppo2.policies as policies
  12. import gym_remote.exceptions as gre
  13. from baselines import bench, logger
  14.  
  15. import gym
  16. import numpy as np
  17.  
  18. from baselines.common.atari_wrappers import WarpFrame, FrameStack
  19. import gym_remote.client as grc
  20.  
  21. from retro_contest.local import make
  22.  
  23. def make_env(scale_rew=True, stack=False):
  24.     env = make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1')
  25.     env = SonicDiscretizer(env)
  26.     env = AllowBacktracking(env)
  27.     if scale_rew:
  28.         env = RewardScaler(env)
  29.     env = WarpFrame(env)
  30.     if stack:
  31.         env = FrameStack(env, 4)
  32.     env = bench.Monitor(env, logger.get_dir())
  33.     return env
  34.  
  35. class SonicDiscretizer(gym.ActionWrapper):
  36.     """
  37.    Wrap a gym-retro environment and make it use discrete
  38.    actions for the Sonic game.
  39.    """
  40.     def __init__(self, env):
  41.         super(SonicDiscretizer, self).__init__(env)
  42.         buttons = ["B", "A", "MODE", "START", "UP", "DOWN", "LEFT", "RIGHT", "C", "Y", "X", "Z"]
  43.         actions = [['LEFT'], ['RIGHT'], ['LEFT', 'DOWN'], ['RIGHT', 'DOWN'], ['DOWN'],
  44.                    ['DOWN', 'B'], ['B']]
  45.         self._actions = []
  46.         for action in actions:
  47.             arr = np.array([False] * 12)
  48.             for button in action:
  49.                 arr[buttons.index(button)] = True
  50.             self._actions.append(arr)
  51.         self.action_space = gym.spaces.Discrete(len(self._actions))
  52.  
  53.     def action(self, a): # pylint: disable=W0221
  54.         return self._actions[a].copy()
  55.  
  56. class RewardScaler(gym.RewardWrapper):
  57.     """
  58.    Bring rewards to a reasonable scale for PPO.
  59.  
  60.    This is incredibly important and effects performance
  61.    drastically.
  62.    """
  63.     def reward(self, reward):
  64.         return reward * 0.01
  65.  
  66. class AllowBacktracking(gym.Wrapper):
  67.     """
  68.    Use deltas in max(X) as the reward, rather than deltas
  69.    in X. This way, agents are not discouraged too heavily
  70.    from exploring backwards if there is no way to advance
  71.    head-on in the level.
  72.    """
  73.     def __init__(self, env):
  74.         super(AllowBacktracking, self).__init__(env)
  75.         self._cur_x = 0
  76.         self._max_x = 0
  77.  
  78.     def reset(self, **kwargs): # pylint: disable=E0202
  79.         self._cur_x = 0
  80.         self._max_x = 0
  81.         return self.env.reset(**kwargs)
  82.  
  83.     def step(self, action): # pylint: disable=E0202
  84.         obs, rew, done, info = self.env.step(action)
  85.         self._cur_x += rew
  86.         rew = max(0, self._cur_x - self._max_x)
  87.         self._max_x = max(self._max_x, self._cur_x)
  88.         return obs, rew, done, info
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement