Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # Please run this script under ${project_id} in project directory of
- # https://github.com/shizhediao/llm-ft
- # COMMIT: d5fecf30ba8011067b10cf51fede53a5ab6574e4
- deepspeed_args="--master_port=11000" # Default argument
- if [ $# -ge 1 ]; then
- deepspeed_args="$1"
- fi
- exp_id=llama-7b-rm-not-formatted
- project_dir=$(cd "$(dirname $0)"/..; pwd)
- output_dir=${project_dir}/output_models/${exp_id}
- log_dir=${project_dir}/log/${exp_id}
- dataset_path=${project_dir}/data/hh_rlhf/rm-not-formatted/hh_rlhf_rm_data.json
- if [ ! -d data/hh_rlhf ]; then
- cd data && ./download.sh hh_rlhf && cd -
- fi
- mkdir -p ${output_dir} ${log_dir}
- deepspeed ${deepspeed_args} \
- examples/reward_modeling.py \
- --model_name_or_path output_models/llama-7b-sft-not-formatted \
- --dataset_path ${dataset_path} \
- --output_dir ${output_dir} --overwrite_output_dir \
- --num_train_epochs 1 \
- --learning_rate 5e-6 \
- --block_size 512 \
- --per_device_train_batch_size 2 \
- --per_device_eval_batch_size 1 \
- --deepspeed configs/ds_config_zero3.json \
- --fp16 \
- --run_name llama-7b-rm \
- --validation_split_percentage 10 \
- --logging_steps 10 \
- --do_train \
- --ddp_timeout 72000 \
- --save_steps 999999 \
- --evaluation_strategy steps \
- --eval_steps 1000 \
- --weight_decay 0.001 \
- --dataloader_num_workers 1 \
- | tee ${log_dir}/train.log \
- 2> ${log_dir}/train.err
Advertisement
Add Comment
Please, Sign In to add comment