diff --git a/scripts/run_dpo_align_qlora.sh b/scripts/run_dpo_align_qlora.sh new file mode 100644 index 000000000..3f498f8e0 --- /dev/null +++ b/scripts/run_dpo_align_qlora.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Please run this script under ${project_id} in project directory of + +# Parses arguments +model_name_or_path=meta-llama/Meta-Llama-3-70B-Instruct +dataset_path=data/dpo-mix-7k +output_dir=output_models/finetuned_llama3_70b_dpo_example +# specify gpus/single gpu here by +# `--include localhost:0,1` or `--include localhost:0` + +while [[ $# -ge 1 ]]; do + key="$1" + case ${key} in + -m|--model_name_or_path) + model_name_or_path="$2" + shift + ;; + -d|--dataset_path) + dataset_path="$2" + shift + ;; + -o|--output_lora_path) + output_dir="$2" + shift + ;; + --deepspeed_args) + deepspeed_args="$2" + shift + ;; + *) + echo "error: unknown option \"${key}\"" 1>&2 + exit 1 + esac + shift +done +exp_id=dpo +project_dir=$(cd "$(dirname $0)"/..; pwd) +log_dir=${project_dir}/log/${exp_id} +mkdir -p ${output_dir} ${log_dir} + +deepspeed ${deepspeed_args} \ + examples/dpo_train.py \ + --model_name_or_path ${model_name_or_path} \ + --dataset_path ${dataset_path} \ + --output_dir ${output_dir} \ + --run_name dpo \ + --max_steps 200 \ + --learning_rate 1e-6 \ + --use_qlora 1 \ + --lora_r 8 \ + --sanity_check True \ + --save_aggregated_lora 0\ + --logging_steps 20 \ + | tee ${log_dir}/train.log \ + 2> ${log_dir}/train.err