dimamachine

Untitled

Jul 8th, 2025
280
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.49 KB | None | 0 0
  1. log_dir: ${oc.env:ROOT,.}/logs
  2.  
  3. training:
  4. max_round: 1000000
  5. max_stage: 1
  6. hf_push_frequency: 1
  7. num_generations: 1
  8. num_transplant_trees: 1
  9. seed: 42
  10. fp16: false # set this line to true if your hardware supports fp16
  11.  
  12. blockchain:
  13. alchemy_url: "https://gensyn-testnet.g.alchemy.com/public"
  14. contract_address: ${oc.env:SWARM_CONTRACT,null} # This is set by modal-login in run_rl_swarm.sh
  15. org_id: ${oc.env:ORG_ID,null} # This is set by modal-login in run_rl_swarm.sh
  16. mainnet_chain_id: 685685 # currently unused, will be used with WalletSwarmCoordinator
  17. modal_proxy_url: "http://localhost:3000/api/"
  18.  
  19. communications:
  20. initial_peers:
  21. - '/ip4/38.101.215.15/tcp/30011/p2p/QmQ2gEXoPJg6iMBSUFWGzAabS2VhnzuS782Y637hGjfsRJ'
  22. - '/ip4/38.101.215.15/tcp/30012/p2p/QmWhiaLrx3HRZfgXc2i7KW5nMUNK7P9tRc71yFJdGEZKkC'
  23. - '/ip4/38.101.215.15/tcp/30013/p2p/QmQa1SCfYTxx7RvU7qJJRo79Zm1RAwPpkeLueDVJuBBmFp'
  24.  
  25. eval:
  26. judge_base_url: https://swarm-judge-102957787771.us-east1.run.app
  27.  
  28. hydra:
  29. run:
  30. dir: ${log_dir}
  31.  
  32. game_manager:
  33. _target_: genrl_swarm.game.game_manager.SwarmGameManager
  34. max_stage: ${training.max_stage}
  35. max_round: ${training.max_round}
  36. log_dir: ${log_dir}
  37. hf_token: ${oc.env:HUGGINGFACE_ACCESS_TOKEN,null}
  38. hf_push_frequency: ${training.hf_push_frequency}
  39. run_mode: "train_and_evaluate"
  40. bootnodes: ${communications.initial_peers}
  41. game_state:
  42. _target_: genrl_swarm.state.game_state.GameState
  43. round: 0
  44. stage: 0
  45. reward_manager:
  46. _target_: genrl_swarm.rewards.DefaultRewardManager
  47. reward_fn_store:
  48. _target_: genrl_swarm.rewards.reward_store.RewardFnStore
  49. max_rounds: ${training.max_round}
  50. reward_fn_stores:
  51. - _target_: genrl_swarm.rewards.reward_store.RoundRewardFnStore
  52. num_stages: ${training.max_stage}
  53. reward_fns:
  54. - _target_: genrl_swarm.examples.rgym.rewards.RGRewards
  55. trainer:
  56. _target_: genrl_swarm.examples.rgym.trainer.GRPOTrainerModule
  57. models:
  58. - _target_: transformers.AutoModelForCausalLM.from_pretrained
  59. pretrained_model_name_or_path: ${oc.env:MODEL_NAME, ${gpu_model_choice:${default_large_model_pool},${default_small_model_pool}}}
  60. config:
  61. _target_: trl.trainer.GRPOConfig
  62. logging_dir: ${log_dir}
  63. fp16: ${training.fp16}
  64. log_with: wandb
  65. log_dir: ${log_dir}
  66. epsilon: 0.2
  67. epsilon_high: 0.28
  68. num_generations: ${training.num_generations}
  69. judge_base_url: ${eval.judge_base_url}
  70. data_manager:
  71. _target_: genrl_swarm.examples.rgym.data.ReasoningGymDataManager
  72. yaml_config_path: "genrl-swarm/src/genrl_swarm/examples/rgym/datasets.yaml"
  73. num_train_samples: 1
  74. num_evaluation_samples: 0
  75. system_prompt_id: 'default'
  76. seed: ${training.seed}
  77. num_transplant_trees: ${training.num_transplant_trees}
  78. communication:
  79. _target_: genrl_swarm.communication.hivemind.hivemind_backend.HivemindBackend
  80. initial_peers: ${communications.initial_peers}
  81. identity_path: ${oc.env:IDENTITY_PATH,null}
  82. coordinator:
  83. _target_: genrl_swarm.blockchain.coordinator.ModalSwarmCoordinator
  84. web3_url: ${blockchain.alchemy_url}
  85. contract_address: ${blockchain.contract_address}
  86. org_id: ${blockchain.org_id}
  87. modal_proxy_url: ${blockchain.modal_proxy_url}
  88.  
  89. default_large_model_pool:
  90. - nvidia/AceInstruct-1.5B
  91. - dnotitia/Smoothie-Qwen3-1.7B
  92. - Gensyn/Qwen2.5-1.5B-Instruct
  93.  
  94. default_small_model_pool:
  95. - Gensyn/Qwen2.5-0.5B-Instruct
  96. - Qwen/Qwen3-0.6B
Advertisement
Add Comment
Please, Sign In to add comment