Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import ray #v2.3.1
- from ray import tune
- from typing import Tuple, Any
- import gymnasium as gym #0.26.3
- from gymnasium.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv
- import numpy as np #1.23.5
- from ray.rllib.algorithms.mbmpo import MBMPOConfig
- ray.init()
- # my wrapper class for the environment
- class HalfCheetahEnv4(HalfCheetahEnv):
- def __init__(self, env_config) -> None:
- print("ENV CONFIG:",env_config)
- super().__init__()
- def step(self, action):
- print(action)
- return super().step(action)
- def reward(self, obs, action, obs_next):
- print('reward called!')
- if obs.ndim == 2 and action.ndim == 2:
- assert obs.shape == obs_next.shape
- forward_vel = obs_next[:, 8]
- ctrl_cost = 0.1 * np.sum(np.square(action), axis=1)
- reward = forward_vel - ctrl_cost
- return np.minimum(np.maximum(-1000.0, reward), 1000.0)
- else:
- forward_vel = obs_next[8]
- ctrl_cost = 0.1 * np.square(action).sum()
- reward = forward_vel - ctrl_cost
- return np.minimum(np.maximum(-1000.0, reward), 1000.0)
- tune.run("MBMPO",
- config={"env": HalfCheetahEnv4,
- "framework": "torch",
- #"inner_adaptation_steps": 1,
- #"maml_optimizer_steps": 8,
- #"gamma": 0.99,
- #"lambda": 1.0,
- #"lr": 0.001,
- #"clip_param": 0.5,
- #"kl_target": 0.003,
- #"kl_coeff": 0.0000000001,
- #"num_workers": 20,
- #"inner_lr": 0.001,
- #"clip_actions": False,
- #"num_maml_steps": 15,
- #"model":{"fcnet_hiddens": [32, 32], "free_log_std": True},
- },
- stop={"training_iteration": 500}
- )
- # It does not produce any result files that can be used with tensorboard
Advertisement
Add Comment
Please, Sign In to add comment