Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import gym
- import numpy as np
- from stable_baselines.common.policies import MlpPolicy
- from stable_baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
- from stable_baselines.common import set_global_seeds
- from stable_baselines.acktr import ACKTR
- def make_env(env_id, rank, seed=0):
- """
- Utility function for multiprocessed env.
- :param env_id: (str) the environment ID
- :param num_env: (int) the number of environment you wish to have in subprocesses
- :param seed: (int) the inital seed for RNG
- :param rank: (int) index of the subprocess
- """
- def _init():
- env = gym.make(env_id)
- env.seed(seed + rank)
- return env
- set_global_seeds(seed)
- return _init
- env_id = "CartPole-v1"
- num_cpu = 4 # Number of processes to use
- # Create the vectorized environment
- env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
- model = ACKTR(MlpPolicy, env, verbose=1)
- model.learn(total_timesteps=25000)
- obs = env.reset()
- for _ in range(1000):
- action, _states = model.predict(obs)
- obs, rewards, dones, info = env.step(action)
- env.render()
Add Comment
Please, Sign In to add comment