forked from boat1603/SuperAI_LLM_FineTune
-
Notifications
You must be signed in to change notification settings - Fork 0
/
submit_multinode.sh
27 lines (21 loc) · 980 Bytes
/
submit_multinode.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/bin/bash
#SBATCH -p gpu # Specify partition [Compute/Memory/GPU]
#SBATCH -N 2 -c 64 # Specify number of nodes and processors per task
#SBATCH --ntasks-per-node=1 # Specify number of tasks per node
#SBATCH --gpus-per-node=4 # Specify total number of GPUs
#SBATCH -t 120:00:00 # Specify maximum time limit (hour: minute: second)
#SBATCH -A <project_name> # Specify project name
#SBATCH -J llm_finetuning # Specify job name
export NCCL_DEBUG=INFO
export NCCL_SOCKET_IFNAME=hsn
START=`date`
starttime=$(date +%s)
export WANDB_MODE="offline"
# sent to sub script
export HOSTNAMES=`scontrol show hostnames "$SLURM_JOB_NODELIST"`
export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
export MASTER_PORT=12802
export COUNT_NODE=`scontrol show hostnames "$SLURM_JOB_NODELIST" | wc -l`
echo go $COUNT_NODE
echo $HOSTNAMES
srun sh smultinode.sh