From ed5c490eb82052c340c91292760c9b3355d6b174 Mon Sep 17 00:00:00 2001
From: Gabriele Castellano <gabry.c92x@gmail.com>
Date: Thu, 16 Jan 2025 03:33:53 +0100
Subject: [PATCH] describe behavior of -psn parameter for single plane topology
 generation (#66)

---
 .../topo/gen_HPN_7.0_topo_mulgpus_one_link.py |  2 +-
 docs/Tutorial.md                              | 21 ++++++++++++-------
 2 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/astra-sim-alibabacloud/inputs/topo/gen_HPN_7.0_topo_mulgpus_one_link.py b/astra-sim-alibabacloud/inputs/topo/gen_HPN_7.0_topo_mulgpus_one_link.py
index e750e3b..34aa041 100755
--- a/astra-sim-alibabacloud/inputs/topo/gen_HPN_7.0_topo_mulgpus_one_link.py
+++ b/astra-sim-alibabacloud/inputs/topo/gen_HPN_7.0_topo_mulgpus_one_link.py
@@ -374,7 +374,7 @@ def main():
     parser.add_argument('-g','--gpu',type=int,default=32,help='gpus num,default 32')
     parser.add_argument('-gt','--gpu_type',type=str,default='H800',help='gpu_type,default H800')
     parser.add_argument('-gps','--gpu_per_server',type=int,default=8,help='gpu_per_server,default 8')
-    parser.add_argument('-psn','--psw_switch_num',type=int,default=120,help='psw_switch_num,default 120')
+    parser.add_argument('-psn','--psw_switch_num',type=int,default=120,help='psw_switch_num (halved in single plane case),default 120')
     parser.add_argument('-asn','--asw_switch_num',type=int,default=8,help='asw_switch_num,default 8')
     parser.add_argument('-nsps','--nv_switch_per_server',type=int,default=1,help='nv_switch_per_server,default 1')
     parser.add_argument('-npa','--nics_per_aswitch',type=int,default=128,help='nnics per asw,default 128')
diff --git a/docs/Tutorial.md b/docs/Tutorial.md
index ae9b084..9e8996a 100755
--- a/docs/Tutorial.md
+++ b/docs/Tutorial.md
@@ -156,7 +156,7 @@ Using the same workload as SimAI-Analytical, generated by [SimAI-WorkloadGenerat
 
 Before running SimAI-Simulator, you need to generate a `topo` file that can be recognized by `ns-3-alibabacloud`.
 
-As shown in the figure below, the first row represents various parameters: `node_num` is the total number of nodes, `gpus_per_server` refers to the number of GPUs per server (currently, we bind each NIC to a GPU as a single node), `nvswitch_num` indicates the number of NVSwitch nodes (specifically used to implement the NVLS algorithm), `switch_num` is the number of switches, `link_num` is the total number of connections, and `gpu_type_str` describes the type of GPU.
+The figure below shows an example of a topology file; the first row represents various parameters: `node_num` is the total number of nodes, `gpus_per_server` refers to the number of GPUs per server (currently, we bind each NIC to a GPU as a single node), `nvswitch_num` indicates the number of NVSwitch nodes (specifically used to implement the NVLS algorithm), `switch_num` is the number of switches, `link_num` is the total number of connections, and `gpu_type_str` describes the type of GPU.
 
 | Abbreviation       | Description                                     |
 |--------------------|-------------------------------------------------|
@@ -173,13 +173,18 @@ python3 ./astra-sim-alibabacloud/inputs/topo/gen_HPN_7.0_topo_mulgpus_one_link.p
 ```
 <img src="./images/simai_topo.png" alt="simai_topo" width="30%">
 
-The `link_num` is `20` because each ASW (Aggregation Switch) is connected to a single PSW (Pod Switch, node 17). Since the topology uses `-psn 1` (single-plane topology), only one plane of PSWs is utilized, limiting the connections to `4` between ASWs and PSWs.
-
-To increase the `link_num` to `24`, you need to enable a dual-plane topology by setting `-ps`n to `2` in the command. This will activate both planes of PSWs, doubling the connections between ASWs and PSWs. For example:
-
-```bash
-python3 ./astra-sim-alibabacloud/inputs/topo/gen_HPN_7.0_topo_mulgpus_one_link.py -g 8 -gt A100 -bw 400Gbps -nvbw 2400Gbps -psn 2
-```
+> 💡 **Important Note**: note that the generated file features a mismatch between `link_num` in the first line (i.e., `20`), and the number 
+> of links described below, which are `24` instead. Hence, the last `4` links will be ignored by the simulator. 
+> In particular, only half of the links between ASWs and the PSW are actually used. This happens because the topology 
+> generator script assumes that parameter `-psn` refers to the total number of PSW switches in the case of dual plane 
+> topology; however, by default the script generates a single plane topology, halving the links to PSW switches.
+>
+> Therefore, when generating single plane topologies, please set parameter `-psn` to the double of the desired number of PSW (e.g., to create one PSW, set it to `2` instead). This will enable
+> all the generated links (in the example, it will increase `link_num` from `20` to `24`).
+>
+>```bash
+>python3 ./astra-sim-alibabacloud/inputs/topo/gen_HPN_7.0_topo_mulgpus_one_link.py -g 8 -gt A100 -bw 400Gbps -nvbw 2400Gbps -psn 2
+>```
 
 You can choose to customize any `topo` following the format shown above. Of course, we also provide a script to directly generate a `topo` for the HPN architecture.