View difference between Paste ID: hitmPibN and mX8bjvBW
SHOW: | | - or go back to the newest paste.
1
#!/bin/bash
2
# Copyright (c) Microsoft Corporation.
3
# SPDX-License-Identifier: Apache-2.0
4
5
# DeepSpeed Team
6
OUTPUT=$1
7
SFTMODEL=$2
8
ZERO_STAGE=$3
9-
if [ "$OUTPUT" == "" ]; then
9+
10-
    OUTPUT=/nobackup/jirayu/models/opt-350m-rm
10+
11
fi
12
mkdir -p $OUTPUT
13
14
deepspeed main.py \
15
   --data_path stanfordnlp/SHP \
16
   --data_split 2,4,4 \
17
   --model_name_or_path $SFTMODEL \
18
   --num_padding_at_beginning 1 \
19
   --per_device_train_batch_size 4 \
20
   --per_device_eval_batch_size 4 \
21
   --max_seq_len 512 \
22
   --learning_rate 5e-5 \
23
   --weight_decay 0.1 \
24
   --num_train_epochs 1 \
25
   --disable_dropout \
26
   --gradient_accumulation_steps 1 \
27
   --lr_scheduler_type cosine \
28
   --num_warmup_steps 0 \
29
   --seed 1234 \
30
   --zero_stage $ZERO_STAGE \
31
   --deepspeed \
32
   --output_dir $OUTPUT \
33
   | tee $OUTPUT/training.log