Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- """
- Train an agent on Sonic using PPO2 from OpenAI Baselines.
- """
- import tensorflow as tf
- from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
- import baselines.ppo2.ppo2 as ppo2
- import baselines.ppo2.policies as policies
- import gym_remote.exceptions as gre
- from baselines import bench, logger
- import gym
- import numpy as np
- from baselines.common.atari_wrappers import WarpFrame, FrameStack
- import gym_remote.client as grc
- from retro_contest.local import make
- def make_env(scale_rew=True, stack=False):
- env = make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1')
- env = SonicDiscretizer(env)
- env = AllowBacktracking(env)
- if scale_rew:
- env = RewardScaler(env)
- env = WarpFrame(env)
- if stack:
- env = FrameStack(env, 4)
- env = bench.Monitor(env, logger.get_dir())
- return env
- class SonicDiscretizer(gym.ActionWrapper):
- """
- Wrap a gym-retro environment and make it use discrete
- actions for the Sonic game.
- """
- def __init__(self, env):
- super(SonicDiscretizer, self).__init__(env)
- buttons = ["B", "A", "MODE", "START", "UP", "DOWN", "LEFT", "RIGHT", "C", "Y", "X", "Z"]
- actions = [['LEFT'], ['RIGHT'], ['LEFT', 'DOWN'], ['RIGHT', 'DOWN'], ['DOWN'],
- ['DOWN', 'B'], ['B']]
- self._actions = []
- for action in actions:
- arr = np.array([False] * 12)
- for button in action:
- arr[buttons.index(button)] = True
- self._actions.append(arr)
- self.action_space = gym.spaces.Discrete(len(self._actions))
- def action(self, a): # pylint: disable=W0221
- return self._actions[a].copy()
- class RewardScaler(gym.RewardWrapper):
- """
- Bring rewards to a reasonable scale for PPO.
- This is incredibly important and effects performance
- drastically.
- """
- def reward(self, reward):
- return reward * 0.01
- class AllowBacktracking(gym.Wrapper):
- """
- Use deltas in max(X) as the reward, rather than deltas
- in X. This way, agents are not discouraged too heavily
- from exploring backwards if there is no way to advance
- head-on in the level.
- """
- def __init__(self, env):
- super(AllowBacktracking, self).__init__(env)
- self._cur_x = 0
- self._max_x = 0
- def reset(self, **kwargs): # pylint: disable=E0202
- self._cur_x = 0
- self._max_x = 0
- return self.env.reset(**kwargs)
- def step(self, action): # pylint: disable=E0202
- obs, rew, done, info = self.env.step(action)
- self._cur_x += rew
- rew = max(0, self._cur_x - self._max_x)
- self._max_x = max(self._max_x, self._cur_x)
- return obs, rew, done, info
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement