Guest User

Untitled

a guest
Aug 16th, 2018
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.09 KB | None | 0 0
  1. import gym
  2. import numpy as np
  3.  
  4. from stable_baselines.common.policies import MlpPolicy
  5. from stable_baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
  6. from stable_baselines.common import set_global_seeds
  7. from stable_baselines.acktr import ACKTR
  8.  
  9. def make_env(env_id, rank, seed=0):
  10. """
  11. Utility function for multiprocessed env.
  12.  
  13. :param env_id: (str) the environment ID
  14. :param num_env: (int) the number of environment you wish to have in subprocesses
  15. :param seed: (int) the inital seed for RNG
  16. :param rank: (int) index of the subprocess
  17. """
  18. def _init():
  19. env = gym.make(env_id)
  20. env.seed(seed + rank)
  21. return env
  22. set_global_seeds(seed)
  23. return _init
  24.  
  25. env_id = "CartPole-v1"
  26. num_cpu = 4 # Number of processes to use
  27. # Create the vectorized environment
  28. env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
  29.  
  30. model = ACKTR(MlpPolicy, env, verbose=1)
  31. model.learn(total_timesteps=25000)
  32.  
  33. obs = env.reset()
  34. for _ in range(1000):
  35. action, _states = model.predict(obs)
  36. obs, rewards, dones, info = env.step(action)
  37. env.render()
Add Comment
Please, Sign In to add comment