-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[Offload] Rework compiling device code for unit test suites #144776
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-offload Author: Joseph Huber (jhuber6) ChangesSummary: This should still create the same output as far as I'm aware. Full diff: https://github.com/llvm/llvm-project/pull/144776.diff 2 Files Affected:
diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 985dd892d8046..cbeae5ce72989 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -1,6 +1,73 @@
add_custom_target(OffloadUnitTests)
set_target_properties(OffloadUnitTests PROPERTIES FOLDER "Tests/UnitTests")
+function(add_offload_test_device_code test_filename test_name)
+ set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
+ set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+
+ # Try to build with support for NVPTX devices.
+ if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+ find_package(CUDAToolkit QUIET)
+ if(CUDAToolkit_FOUND)
+ get_filename_component(cuda_path "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
+ endif()
+ check_cxx_compiler_flag(
+ "--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX)
+
+ if(PLATFORM_HAS_NVPTX)
+ set(nvptx_arch "native")
+ elseif(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
+ set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH}")
+ endif()
+
+ if(nvptx_arch AND CUDAToolkit_FOUND)
+ set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin")
+ add_custom_command(
+ OUTPUT ${output_file}
+ COMMAND ${CMAKE_C_COMPILER}
+ --target=nvptx64-nvidia-cuda -march=${nvptx_arch}
+ -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
+ -c ${SRC_PATH} -o ${output_file}
+ DEPENDS ${SRC_PATH}
+ )
+ add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file})
+ endif()
+ endif()
+
+ # Try to build with support for AMDGPU devices.
+ if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+ check_cxx_compiler_flag("--target=amdgcn-amd-amdhsa -mcpu=native" PLATFORM_HAS_AMDGPU)
+
+ if(PLATFORM_HAS_AMDGPU)
+ set(amdgpu_arch "native")
+ elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
+ set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}")
+ endif()
+
+ if(amdgpu_arch)
+ set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
+ add_custom_command(
+ OUTPUT ${output_file}
+ COMMAND ${CMAKE_C_COMPILER}
+ --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
+ -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
+ -c ${SRC_PATH} -o ${output_file}
+ DEPENDS ${SRC_PATH}
+ )
+ add_custom_target(${test_name}.amdgpu DEPENDS ${output_file})
+ endif()
+ endif()
+
+ # Create a single dependency target for the device code.
+ add_custom_target(${test_name})
+ if(TARGET ${test_name}.amdgpu)
+ add_dependencies(${test_name} ${test_name}.amdgpu)
+ endif()
+ if(TARGET ${test_name}.nvptx64)
+ add_dependencies(${test_name} ${test_name}.nvptx64)
+ endif()
+endfunction()
+
function(add_offload_unittest test_dirname)
set(target_name "${test_dirname}.unittests")
@@ -9,10 +76,15 @@ function(add_offload_unittest test_dirname)
add_unittest(OffloadUnitTests "${target_name}"
${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp
${files})
- add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} OffloadUnitTestsDeviceBins)
+ add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} offload_device_binaries)
target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
endfunction()
+set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
+ "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
+set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
+ "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
+
add_subdirectory(OffloadAPI)
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index c2e4d0cb24e6e..c475bc6f314dd 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -1,72 +1,6 @@
-macro(add_offload_test_device_code test_filename test_name)
- set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
-
- # Build for NVPTX
- if(OFFLOAD_TEST_TARGET_NVIDIA)
- set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin)
- add_custom_command(OUTPUT ${BIN_PATH}
- COMMAND
- ${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
- ${ARGN}
- -march=${LIBOMPTARGET_DEP_CUDA_ARCH}
- --cuda-path=${CUDA_ROOT}
- ${SRC_PATH} -o ${BIN_PATH}
- DEPENDS ${SRC_PATH}
- )
- list(APPEND BIN_PATHS ${BIN_PATH})
- endif()
-
- # Build for AMDGPU
- if(OFFLOAD_TEST_TARGET_AMDGPU)
- set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin)
- add_custom_command(OUTPUT ${BIN_PATH}
- COMMAND
- ${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
- ${ARGN}
- -mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
- ${SRC_PATH} -o ${BIN_PATH}
- DEPENDS ${SRC_PATH}
- )
- list(APPEND BIN_PATHS ${BIN_PATH})
- endif()
-
- # TODO: Build for host CPU
-endmacro()
-
-
-# Decide what device targets to build for. LibomptargetGetDependencies is
-# included at the top-level so the GPUs present on the system are already
-# detected.
-set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
- "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
-set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
- "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
-
-find_package(CUDAToolkit QUIET)
-if(CUDAToolkit_FOUND)
- get_filename_component(CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
-endif()
-if (OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
- set(LIBOMPTARGET_DEP_CUDA_ARCH ${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH})
- set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-elseif (LIBOMPTARGET_FOUND_NVIDIA_GPU AND CUDA_ROOT AND "cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
- set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-endif()
-
-if (OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
- set(LIBOMPTARGET_DEP_AMDGPU_ARCH ${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH})
- set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-elseif (LIBOMPTARGET_FOUND_AMDGPU_GPU AND "amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
- list(GET LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST 0 LIBOMPTARGET_DEP_AMDGPU_ARCH)
- set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-endif()
-
add_offload_test_device_code(foo.c foo)
add_offload_test_device_code(bar.c bar)
-# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
-# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
add_offload_test_device_code(noargs.c noargs -O3)
-add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
-
+add_custom_target(offload_device_binaries DEPENDS foo bar noargs)
set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
|
4bc1aa3
to
447f1dc
Compare
Summary: I'll probably want to use this as a more generic utility in the future. This patch reworks it to make it a top level function. I also tried to decouple this from the OpenMP utilities to make that easier in the future. Instead, I just use `-march=native` functionality which is the same thing. Needed a small hack to skip the linker stage for checking if that works. This should still create the same output as far as I'm aware.
447f1dc
to
fffb54b
Compare
target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}") | ||
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON}) | ||
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE}) | ||
endfunction() | ||
|
||
set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING | ||
"Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61") | ||
set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this also take a list of arches here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No
offload/unittests/CMakeLists.txt
Outdated
if(PLATFORM_HAS_AMDGPU) | ||
set(amdgpu_arch "native") | ||
elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH) | ||
set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}") | ||
endif() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this mean that on a platform that has an AMDGPU, the arch cannot be forcefully overwritten?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch, I should reverse these.
target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}") | ||
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON}) | ||
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE}) | ||
endfunction() | ||
|
||
set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd call it NVPTX_ARCH
since we use AMDGPU
instead of AMD
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was existing so I didn't modify it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, this is really bad…
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can modify it in a follow up, I wanted this to mostly be NFC
Summary:
I'll probably want to use this as a more generic utility in the future.
This patch reworks it to make it a top level function. I also tried to
decouple this from the OpenMP utilities to make that easier in the
future. Instead, I just use
-march=native
functionality which is thesame thing. Needed a small hack to skip the linker stage for checking if
that works.
This should still create the same output as far as I'm aware.