Tweak example 3 to run on Mps devices. Bugfix in README for device ar…

…gs for running fortran on all devices.
Cambridge-ICCS · Feb 13, 2025 · 4149316 · 4149316
1 parent 838f3ae
commit 4149316
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 21 deletions.
diff --git a/examples/3_MultiGPU/README.md b/examples/3_MultiGPU/README.md
@@ -20,8 +20,8 @@ the TorchScript model in inference mode.
 To run this example requires:
 
 - CMake
-- Two (or more) CUDA or XPU GPU devices.
-- FTorch (installed with CUDA or XPU enabled as described in main package)
+- Two (or more) CUDA or XPU GPU devices (or a single Mps device).
+- FTorch (installed with a GPU_DEVICE enabled as described in main package)
 - Python 3
 
 ## Running
@@ -36,36 +36,33 @@ pip install -r requirements.txt
 
 You can check that everything is working by running `simplenet.py`:
 ```
-python3 simplenet.py --device_type cuda
+python3 simplenet.py --device_type <my_device_type>
 ```
-for a CUDA device or
-```
-python3 simplenet.py --device_type xpu
-```
-for an XPU device.
+where `<my_device_type>` is `cuda`/`xpu`/`mps` as appropriate for your device.
+
 As before, this defines the network and runs it with an input tensor
 [0.0, 1.0, 2.0, 3.0, 4.0]. The difference is that the code will make use of the
 default GPU device (index 0) to produce the result:
 ```
 SimpleNet forward pass on CUDA device 0
 tensor([[0, 2, 4, 6, 8]])
 ```
-and similarly for XPU.
+for CUDA, and similarly for other device types.
 
 To save the `SimpleNet` model to TorchScript run the modified version of the
 `pt2ts.py` tool:
 ```
-python3 pt2ts.py --device_type <cuda/xpu>
+python3 pt2ts.py --device_type <my_device_type>
 ```
-which will generate `saved_multigpu_model_<cuda/xpu>.pt` - the TorchScript
+which will generate `saved_multigpu_model_<my_device_type>.pt` - the TorchScript
 instance of the network. The only difference with the earlier example is that
 the model is built to be run on GPU devices rather than on CPU.
 
 You can check that everything is working by running the
 `multigpu_infer_python.py` script. It's set up such that it loops over two GPU
 devices. Run with:
 ```
-python3 multigpu_infer_python.py --device_type <cuda/xpu>
+python3 multigpu_infer_python.py --device_type <my_device_type>
 ```
 This reads the model in from the TorchScript file and runs it with an different input
 tensor on each GPU device: [0.0, 1.0, 2.0, 3.0, 4.0], plus the device index in each
@@ -74,6 +71,7 @@ entry. The result should be:
 Output on device 0: tensor([[0., 2., 4., 6., 8.]])
 Output on device 1: tensor([[ 2., 4.,  6.,  8., 10.]])
 ```
+Note that Mps will only use device 0.
 
 At this point we no longer require Python, so can deactivate the virtual environment:
 ```
@@ -95,9 +93,9 @@ cmake --build .
 and should match the compiler that was used to locally build FTorch.)
 
 To run the compiled code calling the saved `SimpleNet` TorchScript from
-Fortran, run the executable with an argument of the saved model file:
+Fortran, run the executable with argumentsof device type and the saved model file:
 ```
-./multigpu_infer_fortran ../saved_multigpu_model_<cuda/xpu>.pt
+./multigpu_infer_fortran <cuda/xpu/mps> ../saved_multigpu_model_<cuda/xpu>.pt
 ```
 
 This runs the model with the same inputs as described above and should produce (some
@@ -108,6 +106,7 @@ input on device 1: [  1.0,  2.0,  3.0,  4.0,  5.0]
 output on device 0: [  0.0,  2.0,  4.0,  6.0,  8.0]
 output on device 1: [  2.0,  4.0,  6.0,  8.0, 10.0]
 ```
+Again, note that Mps will only use device 0.
 
 Alternatively, we can use `make`, instead of CMake, copying the Makefile over from the
 first example:

diff --git a/examples/3_MultiGPU/multigpu_infer_fortran.f90 b/examples/3_MultiGPU/multigpu_infer_fortran.f90
@@ -4,7 +4,8 @@ program inference
    use, intrinsic :: iso_fortran_env, only : sp => real32
 
    ! Import our library for interfacing with PyTorch
-   use ftorch, only : torch_model, torch_tensor, torch_kCPU, torch_kCUDA, torch_kXPU, &
+   use ftorch, only : torch_model, torch_tensor, &
+                      torch_kCPU, torch_kCUDA, torch_kXPU, torch_kMPS, &
                       torch_tensor_from_array, torch_model_load, torch_model_forward, &
                       torch_delete
 
@@ -27,7 +28,7 @@ program inference
    type(torch_tensor), dimension(1) :: out_tensors
 
    ! Variables for multi-GPU setup
-   integer, parameter :: num_devices = 2
+   integer :: num_devices = 2
    integer :: device_type, device_index, i
 
    ! Get device type as first command line argument and TorchScript model file as second command
@@ -41,6 +42,9 @@ program inference
       device_type = torch_kCUDA
    else if (trim(args(1)) == "xpu") then
       device_type = torch_kXPU
+   else if (trim(args(1)) == "mps") then
+      device_type = torch_kMPS
+      num_devices = 1
    else
       write (*,*) "Error :: invalid device type", trim(args(1))
       stop 999

diff --git a/examples/3_MultiGPU/multigpu_infer_python.py b/examples/3_MultiGPU/multigpu_infer_python.py
@@ -36,8 +36,7 @@ def deploy(saved_model: str, device: str, batch_size: int = 1) -> torch.Tensor:
         # XPU devices need to be initialised before use
         torch.xpu.init()
     elif device.startswith("mps"):
-        mps_error = "FTorch has not been tested with multiple MPS devices"
-        raise ValueError(mps_error)
+        pass
     else:
         device_error = f"Device '{device}' not recognised."
         raise ValueError(device_error)
@@ -67,15 +66,16 @@ def deploy(saved_model: str, device: str, batch_size: int = 1) -> torch.Tensor:
         "--device_type",
         help="Device type to run the inference on",
         type=str,
-        choices=["cpu", "cuda", "xpu"],
+        choices=["cpu", "cuda", "xpu", "mps"],
         default="cuda",
     )
     parsed_args = parser.parse_args()
     device_type = parsed_args.device_type
 
     saved_model_file = f"saved_multigpu_model_{device_type}.pt"
 
-    num_devices = 2
+    # Use 2 devices unless Mps for which there is only one
+    num_devices = 1 if device_type == "mps" else 2
 
     for device_index in range(num_devices):
         device_to_run = f"{device_type}:{device_index}"

diff --git a/run_test_suite.sh b/run_test_suite.sh
@@ -26,7 +26,7 @@ show_help() {
 }
 
 # Parse command line arguments
-BUILD_DIR="build"
+BUILD_DIR="build_mac_conda"
 RUN_INTEGRATION=true
 RUN_UNIT=true
 VERBOSE=false