View difference between Paste ID: xwaL9WM3 and VU7zYBbZ
SHOW: | | - or go back to the newest paste.
1
#!/bin/bash
2
# Copyright (c) Microsoft Corporation.
3
# SPDX-License-Identifier: Apache-2.0
4
5
# DeepSpeed Team
6
OUTPUT=$1
7
ZERO_STAGE=$2
8-
if [ "$OUTPUT" == "" ]; then
8+
9-
    OUTPUT=/nobackup/jirayu/models/opt-2.7b-sft
9+
10
fi
11
mkdir -p $OUTPUT
12
13
deepspeed main.py \
14
   --data_path stanfordnlp/SHP \
15
   --data_split 10,0,0 \
16
   --model_name_or_path facebook/opt-2.7b \
17
   --per_device_train_batch_size 8 \
18
   --per_device_eval_batch_size 8 \
19
   --max_seq_len 512 \
20
   --learning_rate 1e-5 \
21
   --weight_decay 0. \
22
   --num_train_epochs 2 \
23
   --gradient_accumulation_steps 1 \
24
   --lr_scheduler_type cosine \
25
   --num_warmup_steps 0 \
26
   --seed 1234 \
27
   --zero_stage $ZERO_STAGE \
28
   --deepspeed \
29
   --enable_tensorboard \
30
   --tensorboard_path $OUTPUT \
31
   --output_dir $OUTPUT \
32
   | tee $OUTPUT/training.log
33