Add explanation for LISA with model-parallelism

OptimalScale · Apr 2, 2024 · 14d25f3 · 14d25f3
1 parent 9492b1b
commit 14d25f3
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -124,6 +124,8 @@ cd data && ./download.sh alpaca && cd -
   --lisa_interval_steps 20
 ```
 
+We are still working on integrating official model-parallelism support for LISA. Please stay tuned :smile:
+
 ### Finetuning (LoRA)
 LoRA is a parameter-efficient finetuning algorithm and is more efficient than full finetuning.
 ```sh

diff --git a/scripts/run_finetune_with_lisa.sh b/scripts/run_finetune_with_lisa.sh
@@ -22,7 +22,7 @@ per_device_train_batch_size=1
 num_gpu=$(python -c "import torch; print(torch.cuda.device_count())")
 ds_config_file=configs/ds_config_zero0_no_offload.json
 if [ ${num_gpu} -ge 2 ]; then
-  ds_config_file=configs/ds_config_zero2_no_offload.json
+  ds_config_file=configs/ds_config_zero3.json
 fi
 
 while [[ $# -ge 1 ]]; do
@@ -85,7 +85,7 @@ project_dir=$(cd "$(dirname $0)"/..; pwd)
 log_dir=${project_dir}/log/${exp_id}
 mkdir -p ${output_dir} ${log_dir}
 
-python examples/finetune.py \
+deepspeed "--master_port=11000" examples/finetune.py \
     --model_name_or_path ${model_name_or_path} \
     --dataset_path ${dataset_path} \
     --output_dir ${output_dir} --overwrite_output_dir \
@@ -97,7 +97,7 @@ python examples/finetune.py \
     --bf16 \
     --torch_dtype bfloat16 \
     --run_name finetune \
-    --optim paged_adamw_32bit \
+    --deepspeed ${ds_config_file} \
     --validation_split_percentage 0 \
     --logging_steps 20 \
     --do_train \