Skip to content

Commit

Permalink
Merge pull request #762 from gzliyu/main
Browse files Browse the repository at this point in the history
[New feature] Integrate DPO
  • Loading branch information
research4pan authored Apr 12, 2024
2 parents 11ad2d1 + 427c95a commit 8a70f48
Show file tree
Hide file tree
Showing 5 changed files with 477 additions and 58 deletions.
54 changes: 54 additions & 0 deletions examples/dpo_train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 7/4/2024 20:31
# @Author : Yu Li
# @Site :
# @File : dpo_train.py
# 0. imports
import logging
import os
import sys

sys.path.remove(os.path.abspath(os.path.dirname(sys.argv[0])))
from dataclasses import dataclass, field
from typing import Optional

from transformers import HfArgumentParser, pipeline, AutoTokenizer

from lmflow.args import (
ModelArguments,
DatasetArguments,
AutoArguments,
)
import torch
from lmflow.datasets.dataset import Dataset
from lmflow.models.auto_model import AutoModel
from lmflow.pipeline.auto_pipeline import AutoPipeline

if __name__ == "__main__":
# Parses arguments
pipeline_name = "dpo_aligner"
PipelineArguments = AutoArguments.get_pipeline_args_class(pipeline_name)
parser = HfArgumentParser((
ModelArguments,
DatasetArguments,
PipelineArguments,
))

model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses()

# Initializes pipeline, dataset and model for reward training
aligner = AutoPipeline.get_pipeline(
pipeline_name=pipeline_name,
model_args=model_args,
data_args=data_args,
pipeline_args=pipeline_args,
)
model = AutoModel.get_model(model_args)

# Aligns model with rewards
aligned_model = aligner.align(
model=model,
dataset=None,
reward_model=None
)
55 changes: 55 additions & 0 deletions scripts/run_dpo_align.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash
# Please run this script under ${project_id} in project directory of

# Parses arguments
model_name_or_path=meta-llama/Llama-2-7b-hf
dataset_path=data/stack-exchange-paired-dummy
output_dir=output_models/dpo
deepspeed_args="--master_port=11000"
# specify gpus/single gpu here by
# `--include localhost:0,1` or `--include localhost:0`

while [[ $# -ge 1 ]]; do
key="$1"
case ${key} in
-m|--model_name_or_path)
model_name_or_path="$2"
shift
;;
-d|--dataset_path)
dataset_path="$2"
shift
;;
-o|--output_lora_path)
output_dir="$2"
shift
;;
--deepspeed_args)
deepspeed_args="$2"
shift
;;
*)
echo "error: unknown option \"${key}\"" 1>&2
exit 1
esac
shift
done
exp_id=dpo
project_dir=$(cd "$(dirname $0)"/..; pwd)
log_dir=${project_dir}/log/${exp_id}
mkdir -p ${output_dir} ${log_dir}

deepspeed ${deepspeed_args} \
examples/dpo_train.py \
--model_name_or_path ${model_name_or_path} \
--dataset_path ${dataset_path} \
--output_dir ${output_dir} \
--max_steps 200 \
--learning_rate 1e-4 \
--use_lora 1 \
--lora_r 8 \
--sanity_check True \
--save_aggregated_lora 0\
--logging_steps 20 \
| tee ${log_dir}/train.log \
2> ${log_dir}/train.err
Loading

0 comments on commit 8a70f48

Please # to comment.