SHOW:
|
|
- or go back to the newest paste.
1 | #!/bin/bash | |
2 | # Copyright (c) Microsoft Corporation. | |
3 | # SPDX-License-Identifier: Apache-2.0 | |
4 | ||
5 | # DeepSpeed Team | |
6 | OUTPUT=$1 | |
7 | ZERO_STAGE=$2 | |
8 | - | if [ "$OUTPUT" == "" ]; then |
8 | + | |
9 | - | OUTPUT=/nobackup/jirayu/models/opt-125m-sft |
9 | + | |
10 | fi | |
11 | mkdir -p $OUTPUT | |
12 | ||
13 | deepspeed main.py \ | |
14 | --data_path stanfordnlp/SHP \ | |
15 | --data_split 10,0,0 \ | |
16 | --model_name_or_path facebook/opt-125m \ | |
17 | --per_device_train_batch_size 8 \ | |
18 | --per_device_eval_batch_size 8 \ | |
19 | --max_seq_len 512 \ | |
20 | --learning_rate 1e-5 \ | |
21 | --weight_decay 0. \ | |
22 | --num_train_epochs 2 \ | |
23 | --gradient_accumulation_steps 1 \ | |
24 | --lr_scheduler_type cosine \ | |
25 | --num_warmup_steps 0 \ | |
26 | --seed 1234 \ | |
27 | --zero_stage $ZERO_STAGE \ | |
28 | --deepspeed \ | |
29 | --enable_tensorboard \ | |
30 | --tensorboard_path $OUTPUT \ | |
31 | --output_dir $OUTPUT \ | |
32 | | tee $OUTPUT/training.log | |
33 |