Skip to content

[Offload] Rework compiling device code for unit test suites #144776

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from

Conversation

jhuber6
Copy link
Contributor

@jhuber6 jhuber6 commented Jun 18, 2025

Summary:
I'll probably want to use this as a more generic utility in the future.
This patch reworks it to make it a top level function. I also tried to
decouple this from the OpenMP utilities to make that easier in the
future. Instead, I just use -march=native functionality which is the
same thing. Needed a small hack to skip the linker stage for checking if
that works.

This should still create the same output as far as I'm aware.

@jhuber6
Copy link
Contributor Author

jhuber6 commented Jun 18, 2025

@callumfare @RossBrunton

@llvmbot
Copy link
Member

llvmbot commented Jun 18, 2025

@llvm/pr-subscribers-offload

Author: Joseph Huber (jhuber6)

Changes

Summary:
I'll probably want to use this as a more generic utility in the future.
This patch reworks it to make it a top level function. I also tried to
decouple this from the OpenMP utilities to make that easier in the
future. Instead, I just use -march=native functionality which is the
same thing. Needed a small hack to skip the linker stage for checking if
that works.

This should still create the same output as far as I'm aware.


Full diff: https://github.com/llvm/llvm-project/pull/144776.diff

2 Files Affected:

  • (modified) offload/unittests/CMakeLists.txt (+73-1)
  • (modified) offload/unittests/OffloadAPI/device_code/CMakeLists.txt (+1-67)
diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 985dd892d8046..cbeae5ce72989 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -1,6 +1,73 @@
 add_custom_target(OffloadUnitTests)
 set_target_properties(OffloadUnitTests PROPERTIES FOLDER "Tests/UnitTests")
 
+function(add_offload_test_device_code test_filename test_name)
+  set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
+  set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+
+  # Try to build with support for NVPTX devices.
+  if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+    find_package(CUDAToolkit QUIET)
+    if(CUDAToolkit_FOUND)
+      get_filename_component(cuda_path "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
+    endif()
+    check_cxx_compiler_flag(
+      "--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX)
+
+    if(PLATFORM_HAS_NVPTX)
+      set(nvptx_arch "native")
+    elseif(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
+      set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH}")
+    endif()
+
+    if(nvptx_arch AND CUDAToolkit_FOUND)
+      set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin")
+      add_custom_command(
+        OUTPUT ${output_file}
+        COMMAND ${CMAKE_C_COMPILER}
+        --target=nvptx64-nvidia-cuda -march=${nvptx_arch}
+        -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
+        -c ${SRC_PATH} -o ${output_file}
+        DEPENDS ${SRC_PATH}
+      )
+      add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file})
+    endif()
+  endif()
+
+  # Try to build with support for AMDGPU devices.
+  if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+    check_cxx_compiler_flag("--target=amdgcn-amd-amdhsa -mcpu=native" PLATFORM_HAS_AMDGPU)
+
+    if(PLATFORM_HAS_AMDGPU)
+      set(amdgpu_arch "native")
+    elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
+      set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}")
+    endif()
+
+    if(amdgpu_arch)
+      set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
+      add_custom_command(
+        OUTPUT ${output_file}
+        COMMAND ${CMAKE_C_COMPILER}
+        --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
+        -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
+        -c ${SRC_PATH} -o ${output_file}
+        DEPENDS ${SRC_PATH}
+      )
+      add_custom_target(${test_name}.amdgpu DEPENDS ${output_file})
+    endif()
+  endif()
+
+  # Create a single dependency target for the device code.
+  add_custom_target(${test_name})
+  if(TARGET ${test_name}.amdgpu)
+    add_dependencies(${test_name} ${test_name}.amdgpu)
+  endif()
+  if(TARGET ${test_name}.nvptx64)
+    add_dependencies(${test_name} ${test_name}.nvptx64)
+  endif()
+endfunction()
+
 function(add_offload_unittest test_dirname)
   set(target_name "${test_dirname}.unittests")
 
@@ -9,10 +76,15 @@ function(add_offload_unittest test_dirname)
   add_unittest(OffloadUnitTests "${target_name}"
     ${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp
     ${files})
-  add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} OffloadUnitTestsDeviceBins)
+  add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} offload_device_binaries)
   target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
   target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
   target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
 endfunction()
 
+set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
+  "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
+set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
+  "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
+
 add_subdirectory(OffloadAPI)
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index c2e4d0cb24e6e..c475bc6f314dd 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -1,72 +1,6 @@
-macro(add_offload_test_device_code test_filename test_name)
-    set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
-
-    # Build for NVPTX
-    if(OFFLOAD_TEST_TARGET_NVIDIA)
-        set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin)
-        add_custom_command(OUTPUT ${BIN_PATH}
-            COMMAND
-            ${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
-            ${ARGN}
-            -march=${LIBOMPTARGET_DEP_CUDA_ARCH}
-            --cuda-path=${CUDA_ROOT}
-            ${SRC_PATH} -o ${BIN_PATH}
-            DEPENDS ${SRC_PATH}
-        )
-        list(APPEND BIN_PATHS ${BIN_PATH})
-    endif()
-
-    # Build for AMDGPU
-    if(OFFLOAD_TEST_TARGET_AMDGPU)
-        set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin)
-        add_custom_command(OUTPUT ${BIN_PATH}
-            COMMAND
-            ${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
-            ${ARGN}
-            -mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
-            ${SRC_PATH} -o ${BIN_PATH}
-            DEPENDS ${SRC_PATH}
-        )
-        list(APPEND BIN_PATHS ${BIN_PATH})
-    endif()
-
-    # TODO: Build for host CPU
-endmacro()
-
-
-# Decide what device targets to build for. LibomptargetGetDependencies is
-# included at the top-level so the GPUs present on the system are already
-# detected.
-set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
-    "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
-set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
-    "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
-
-find_package(CUDAToolkit QUIET)
-if(CUDAToolkit_FOUND)
-  get_filename_component(CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
-endif()
-if (OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
-    set(LIBOMPTARGET_DEP_CUDA_ARCH ${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH})
-    set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-elseif (LIBOMPTARGET_FOUND_NVIDIA_GPU AND CUDA_ROOT AND "cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
-    set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-endif()
-
-if (OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
-    set(LIBOMPTARGET_DEP_AMDGPU_ARCH ${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH})
-    set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-elseif (LIBOMPTARGET_FOUND_AMDGPU_GPU AND "amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
-    list(GET LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST 0 LIBOMPTARGET_DEP_AMDGPU_ARCH)
-    set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-endif()
-
 add_offload_test_device_code(foo.c foo)
 add_offload_test_device_code(bar.c bar)
-# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
-# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
 add_offload_test_device_code(noargs.c noargs -O3)
 
-add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
-
+add_custom_target(offload_device_binaries DEPENDS foo bar noargs)
 set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)

@jhuber6 jhuber6 force-pushed the ReworkDeviceCode branch from 4bc1aa3 to 447f1dc Compare June 19, 2025 14:34
Summary:
I'll probably want to use this as a more generic utility in the future.
This patch reworks it to make it a top level function. I also tried to
decouple this from the OpenMP utilities to make that easier in the
future. Instead, I just use `-march=native` functionality which is the
same thing. Needed a small hack to skip the linker stage for checking if
that works.

This should still create the same output as far as I'm aware.
@jhuber6 jhuber6 force-pushed the ReworkDeviceCode branch from 447f1dc to fffb54b Compare June 19, 2025 14:40
target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
endfunction()

set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
"Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this also take a list of arches here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No

Comment on lines 41 to 45
if(PLATFORM_HAS_AMDGPU)
set(amdgpu_arch "native")
elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}")
endif()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that on a platform that has an AMDGPU, the arch cannot be forcefully overwritten?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, I should reverse these.

target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
endfunction()

set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
Copy link
Contributor

@shiltian shiltian Jun 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd call it NVPTX_ARCH since we use AMDGPU instead of AMD.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was existing so I didn't modify it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, this is really bad…

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can modify it in a follow up, I wanted this to mostly be NFC

# for free to join this conversation on GitHub. Already have an account? # to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants