Untitled

import ray #v2.3.1
from ray import tune
from typing import Tuple, Any
import gymnasium as gym #0.26.3
from gymnasium.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv
import numpy as np #1.23.5
from ray.rllib.algorithms.mbmpo import MBMPOConfig

ray.init()

# my wrapper class for the environment
class HalfCheetahEnv4(HalfCheetahEnv):
    def __init__(self, env_config) -> None:
        print("ENV CONFIG:",env_config)
        super().__init__()

    def step(self, action):
        print(action)
        return super().step(action)

    def reward(self, obs, action, obs_next):
        print('reward called!')
        if obs.ndim == 2 and action.ndim == 2:
            assert obs.shape == obs_next.shape
            forward_vel = obs_next[:, 8]
            ctrl_cost = 0.1 * np.sum(np.square(action), axis=1)
            reward = forward_vel - ctrl_cost
            return np.minimum(np.maximum(-1000.0, reward), 1000.0)
        else:
            forward_vel = obs_next[8]
            ctrl_cost = 0.1 * np.square(action).sum()
            reward = forward_vel - ctrl_cost
            return np.minimum(np.maximum(-1000.0, reward), 1000.0)

tune.run("MBMPO",
        config={"env": HalfCheetahEnv4,
                "framework": "torch",
                #"inner_adaptation_steps": 1,
                #"maml_optimizer_steps": 8,
                #"gamma": 0.99,
                #"lambda": 1.0,
                #"lr": 0.001,
                #"clip_param": 0.5,
                #"kl_target": 0.003,
                #"kl_coeff": 0.0000000001,
                #"num_workers": 20,
                #"inner_lr": 0.001,
                #"clip_actions": False,
                #"num_maml_steps": 15,
                #"model":{"fcnet_hiddens": [32, 32], "free_log_std": True},
        },
        stop={"training_iteration": 500}
    )

# It does not produce any result files that can be used with tensorboard