Guest User

Untitled

a guest
Apr 22nd, 2023
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.91 KB | None | 0 0
  1. import ray #v2.3.1
  2. from ray import tune
  3. from typing import Tuple, Any
  4. import gymnasium as gym #0.26.3
  5. from gymnasium.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv
  6. import numpy as np #1.23.5
  7. from ray.rllib.algorithms.mbmpo import MBMPOConfig
  8.  
  9. ray.init()
  10.  
  11. # my wrapper class for the environment
  12. class HalfCheetahEnv4(HalfCheetahEnv):
  13.     def __init__(self, env_config) -> None:
  14.         print("ENV CONFIG:",env_config)
  15.         super().__init__()
  16.  
  17.     def step(self, action):
  18.         print(action)
  19.         return super().step(action)
  20.    
  21.     def reward(self, obs, action, obs_next):
  22.         print('reward called!')
  23.         if obs.ndim == 2 and action.ndim == 2:
  24.             assert obs.shape == obs_next.shape
  25.             forward_vel = obs_next[:, 8]
  26.             ctrl_cost = 0.1 * np.sum(np.square(action), axis=1)
  27.             reward = forward_vel - ctrl_cost
  28.             return np.minimum(np.maximum(-1000.0, reward), 1000.0)
  29.         else:
  30.             forward_vel = obs_next[8]
  31.             ctrl_cost = 0.1 * np.square(action).sum()
  32.             reward = forward_vel - ctrl_cost
  33.             return np.minimum(np.maximum(-1000.0, reward), 1000.0)
  34.  
  35. tune.run("MBMPO",
  36.         config={"env": HalfCheetahEnv4,
  37.                 "framework": "torch",
  38.                 #"inner_adaptation_steps": 1,
  39.                 #"maml_optimizer_steps": 8,
  40.                 #"gamma": 0.99,
  41.                 #"lambda": 1.0,
  42.                 #"lr": 0.001,
  43.                 #"clip_param": 0.5,
  44.                 #"kl_target": 0.003,
  45.                 #"kl_coeff": 0.0000000001,
  46.                 #"num_workers": 20,
  47.                 #"inner_lr": 0.001,
  48.                 #"clip_actions": False,
  49.                 #"num_maml_steps": 15,
  50.                 #"model":{"fcnet_hiddens": [32, 32], "free_log_std": True},
  51.         },
  52.         stop={"training_iteration": 500}
  53.     )
  54.  
  55. # It does not produce any result files that can be used with tensorboard
Advertisement
Add Comment
Please, Sign In to add comment