SHOW:
|
|
- or go back to the newest paste.
| 1 | #!/bin/bash | |
| 2 | # Copyright (c) Microsoft Corporation. | |
| 3 | # SPDX-License-Identifier: Apache-2.0 | |
| 4 | ||
| 5 | # DeepSpeed Team | |
| 6 | OUTPUT=$1 | |
| 7 | ZERO_STAGE=$2 | |
| 8 | - | if [ "$OUTPUT" == "" ]; then |
| 8 | + | |
| 9 | - | OUTPUT=/nobackup/jirayu/models/opt-2.7b-sft |
| 9 | + | |
| 10 | fi | |
| 11 | mkdir -p $OUTPUT | |
| 12 | ||
| 13 | deepspeed main.py \ | |
| 14 | --data_path stanfordnlp/SHP \ | |
| 15 | --data_split 10,0,0 \ | |
| 16 | --model_name_or_path facebook/opt-2.7b \ | |
| 17 | --per_device_train_batch_size 8 \ | |
| 18 | --per_device_eval_batch_size 8 \ | |
| 19 | --max_seq_len 512 \ | |
| 20 | --learning_rate 1e-5 \ | |
| 21 | --weight_decay 0. \ | |
| 22 | --num_train_epochs 2 \ | |
| 23 | --gradient_accumulation_steps 1 \ | |
| 24 | --lr_scheduler_type cosine \ | |
| 25 | --num_warmup_steps 0 \ | |
| 26 | --seed 1234 \ | |
| 27 | --zero_stage $ZERO_STAGE \ | |
| 28 | --deepspeed \ | |
| 29 | --enable_tensorboard \ | |
| 30 | --tensorboard_path $OUTPUT \ | |
| 31 | --output_dir $OUTPUT \ | |
| 32 | | tee $OUTPUT/training.log | |
| 33 |