Guest User

OPT SFT 350m

a guest
Nov 15th, 2023
323
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/bin/bash
  2. # Copyright (c) Microsoft Corporation.
  3. # SPDX-License-Identifier: Apache-2.0
  4.  
  5. # DeepSpeed Team
  6. OUTPUT=$1
  7. SFTMODEL=$2
  8. ZERO_STAGE=$3
  9. if [ "$ZERO_STAGE" == "" ]; then
  10. ZERO_STAGE=2
  11. fi
  12. mkdir -p $OUTPUT
  13.  
  14. deepspeed main.py \
  15. --data_path stanfordnlp/SHP \
  16. --data_split 2,4,4 \
  17. --model_name_or_path $SFTMODEL \
  18. --num_padding_at_beginning 1 \
  19. --per_device_train_batch_size 4 \
  20. --per_device_eval_batch_size 4 \
  21. --max_seq_len 512 \
  22. --learning_rate 5e-5 \
  23. --weight_decay 0.1 \
  24. --num_train_epochs 1 \
  25. --disable_dropout \
  26. --gradient_accumulation_steps 1 \
  27. --lr_scheduler_type cosine \
  28. --num_warmup_steps 0 \
  29. --seed 1234 \
  30. --zero_stage $ZERO_STAGE \
  31. --deepspeed \
  32. --output_dir $OUTPUT \
  33. | tee $OUTPUT/training.log
Advertisement
Add Comment
Please, Sign In to add comment