Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- log_dir: ${oc.env:ROOT,.}/logs
- training:
- max_round: 1000000
- max_stage: 1
- hf_push_frequency: 1
- num_generations: 1
- num_transplant_trees: 1
- seed: 42
- fp16: false # set this line to true if your hardware supports fp16
- blockchain:
- alchemy_url: "https://gensyn-testnet.g.alchemy.com/public"
- contract_address: ${oc.env:SWARM_CONTRACT,null} # This is set by modal-login in run_rl_swarm.sh
- org_id: ${oc.env:ORG_ID,null} # This is set by modal-login in run_rl_swarm.sh
- mainnet_chain_id: 685685 # currently unused, will be used with WalletSwarmCoordinator
- modal_proxy_url: "http://localhost:3000/api/"
- communications:
- initial_peers:
- - '/ip4/38.101.215.15/tcp/30011/p2p/QmQ2gEXoPJg6iMBSUFWGzAabS2VhnzuS782Y637hGjfsRJ'
- - '/ip4/38.101.215.15/tcp/30012/p2p/QmWhiaLrx3HRZfgXc2i7KW5nMUNK7P9tRc71yFJdGEZKkC'
- - '/ip4/38.101.215.15/tcp/30013/p2p/QmQa1SCfYTxx7RvU7qJJRo79Zm1RAwPpkeLueDVJuBBmFp'
- eval:
- judge_base_url: https://swarm-judge-102957787771.us-east1.run.app
- hydra:
- run:
- dir: ${log_dir}
- game_manager:
- _target_: genrl_swarm.game.game_manager.SwarmGameManager
- max_stage: ${training.max_stage}
- max_round: ${training.max_round}
- log_dir: ${log_dir}
- hf_token: ${oc.env:HUGGINGFACE_ACCESS_TOKEN,null}
- hf_push_frequency: ${training.hf_push_frequency}
- run_mode: "train_and_evaluate"
- bootnodes: ${communications.initial_peers}
- game_state:
- _target_: genrl_swarm.state.game_state.GameState
- round: 0
- stage: 0
- reward_manager:
- _target_: genrl_swarm.rewards.DefaultRewardManager
- reward_fn_store:
- _target_: genrl_swarm.rewards.reward_store.RewardFnStore
- max_rounds: ${training.max_round}
- reward_fn_stores:
- - _target_: genrl_swarm.rewards.reward_store.RoundRewardFnStore
- num_stages: ${training.max_stage}
- reward_fns:
- - _target_: genrl_swarm.examples.rgym.rewards.RGRewards
- trainer:
- _target_: genrl_swarm.examples.rgym.trainer.GRPOTrainerModule
- models:
- - _target_: transformers.AutoModelForCausalLM.from_pretrained
- pretrained_model_name_or_path: ${oc.env:MODEL_NAME, ${gpu_model_choice:${default_large_model_pool},${default_small_model_pool}}}
- config:
- _target_: trl.trainer.GRPOConfig
- logging_dir: ${log_dir}
- fp16: ${training.fp16}
- log_with: wandb
- log_dir: ${log_dir}
- epsilon: 0.2
- epsilon_high: 0.28
- num_generations: ${training.num_generations}
- judge_base_url: ${eval.judge_base_url}
- data_manager:
- _target_: genrl_swarm.examples.rgym.data.ReasoningGymDataManager
- yaml_config_path: "genrl-swarm/src/genrl_swarm/examples/rgym/datasets.yaml"
- num_train_samples: 1
- num_evaluation_samples: 0
- system_prompt_id: 'default'
- seed: ${training.seed}
- num_transplant_trees: ${training.num_transplant_trees}
- communication:
- _target_: genrl_swarm.communication.hivemind.hivemind_backend.HivemindBackend
- initial_peers: ${communications.initial_peers}
- identity_path: ${oc.env:IDENTITY_PATH,null}
- coordinator:
- _target_: genrl_swarm.blockchain.coordinator.ModalSwarmCoordinator
- web3_url: ${blockchain.alchemy_url}
- contract_address: ${blockchain.contract_address}
- org_id: ${blockchain.org_id}
- modal_proxy_url: ${blockchain.modal_proxy_url}
- default_large_model_pool:
- - nvidia/AceInstruct-1.5B
- - dnotitia/Smoothie-Qwen3-1.7B
- - Gensyn/Qwen2.5-1.5B-Instruct
- default_small_model_pool:
- - Gensyn/Qwen2.5-0.5B-Instruct
- - Qwen/Qwen3-0.6B
Advertisement
Add Comment
Please, Sign In to add comment