Guest User

OPT-1.3b PPO

a guest
Nov 15th, 2023
179
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/bin/bash
  2. # Copyright (c) Microsoft Corporation.
  3. # SPDX-License-Identifier: Apache-2.0
  4.  
  5. # DeepSpeed Team
  6. ACTOR_MODEL_PATH=$1
  7. CRITIC_MODEL_PATH=$2
  8. ACTOR_ZERO_STAGE=$3
  9. CRITIC_ZERO_STAGE=$4
  10. OUTPUT=$5
  11. if [ "$ACTOR_ZERO_STAGE" == "" ]; then
  12.     ACTOR_ZERO_STAGE=2
  13. fi
  14. if [ "$CRITIC_ZERO_STAGE" == "" ]; then
  15.     CRITIC_ZERO_STAGE=2
  16. fi
  17.  
  18. # if actor and critic model names are not provided, then use the publicly available AdamG012/chat-opt-1.3b-sft-deepspeed and AdamG012/chat-opt-350m-reward-deepspeed
  19.  
  20. mkdir -p $OUTPUT
  21.  
  22. Num_Padding_at_Beginning=1 # this is model related
  23.  
  24. Actor_Lr=1e-5
  25. Critic_Lr=5e-6
  26.  
  27. deepspeed --master_port 12346 main.py \
  28.    --data_path stanfordnlp/SHP \
  29.    --data_split 0,0,10 \
  30.    --actor_model_name_or_path $ACTOR_MODEL_PATH \
  31.    --critic_model_name_or_path $CRITIC_MODEL_PATH \
  32.    --num_padding_at_beginning 1 \
  33.    --per_device_generation_batch_size 8 \
  34.    --per_device_training_batch_size 8 \
  35.    --generation_batches 1 \
  36.    --ppo_epochs 1 \
  37.    --max_answer_seq_len 256 \
  38.    --max_prompt_seq_len 256 \
  39.    --actor_learning_rate ${Actor_Lr} \
  40.    --critic_learning_rate ${Critic_Lr} \
  41.    --num_train_epochs 1 \
  42.    --lr_scheduler_type cosine \
  43.    --gradient_accumulation_steps 1 \
  44.    --disable_actor_dropout \
  45.    --num_warmup_steps 100 \
  46.    --deepspeed --seed 1234 \
  47.    --enable_hybrid_engine \
  48.    --actor_zero_stage $ACTOR_ZERO_STAGE \
  49.    --critic_zero_stage $CRITIC_ZERO_STAGE \
  50.    --enable_ema \
  51.    --output_dir $OUTPUT \
  52.    --enable_tensorboard \
  53.    --tensorboard_path $OUTPUT \
  54.     | tee $OUTPUT/training.log
Advertisement
Add Comment
Please, Sign In to add comment