forked from microsoft/SuperScaler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaceso_gpt_execute_docker.sh
30 lines (22 loc) · 1.28 KB
/
aceso_gpt_execute_docker.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#! /bin/bash
ROOT_PATH=$(pwd)
exp_setting=$1
model_name=gpt
cd $ROOT_PATH/runtime
if [ "$exp_setting" == "small" ]; then
echo "[ERROR] this script is specially designed for large-scale running with docker, please run 'bash scripts/aceso_gpt_execute.sh small' instead"
elif [ "$exp_setting" == "large" ]; then
## Paths
RESULT_PATH=${ROOT_PATH}/logs-large/aceso/
## 1node (4GPUs and 8GPUs)
docker exec -i aceso-ae bash -c "cd $ROOT_PATH/runtime && bash scripts/run_${model_name}_1node.sh"
# ## 2nodes
parallel-ssh -i -t 0 -h pssh-2workers.host "docker restart aceso-ae"
parallel-ssh -i -t 0 -h pssh-2workers.host "docker exec -i aceso-ae bash -c 'cd $ROOT_PATH/runtime && bash scripts/run_${model_name}_2nodes_docker.sh'"
# ## 4nodes
parallel-ssh -i -t 0 -h pssh-4workers.host "docker restart aceso-ae"
parallel-ssh -i -t 0 -h pssh-4workers.host "docker exec -i aceso-ae bash -c 'cd $ROOT_PATH/runtime && bash scripts/run_${model_name}_4nodes_docker.sh'"
python3 scripts/show_best_perf.py $model_name $RESULT_PATH 2>&1 | tee -a ${RESULT_PATH}full_log.log
elif [ "$exp_setting" == "scale" ]; then
echo "[ERROR] this script is specially designed for large-scale running with docker, please run 'bash scripts/aceso_gpt_execute.sh scale' instead"
fi