Skip to content

Commit fffb54b

Browse files
committed
[Offload] Rework compiling device code for unit test suites
Summary: I'll probably want to use this as a more generic utility in the future. This patch reworks it to make it a top level function. I also tried to decouple this from the OpenMP utilities to make that easier in the future. Instead, I just use `-march=native` functionality which is the same thing. Needed a small hack to skip the linker stage for checking if that works. This should still create the same output as far as I'm aware.
1 parent 19360e6 commit fffb54b

File tree

2 files changed

+74
-68
lines changed

2 files changed

+74
-68
lines changed

offload/unittests/CMakeLists.txt

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,72 @@
11
add_custom_target(OffloadUnitTests)
22
set_target_properties(OffloadUnitTests PROPERTIES FOLDER "Tests/UnitTests")
33

4+
function(add_offload_test_device_code test_filename test_name)
5+
set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
6+
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
7+
8+
# Try to build with support for NVPTX devices.
9+
if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
10+
find_package(CUDAToolkit QUIET)
11+
if(CUDAToolkit_FOUND)
12+
get_filename_component(cuda_path "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
13+
endif()
14+
check_cxx_compiler_flag(
15+
"--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX)
16+
17+
if(PLATFORM_HAS_NVPTX)
18+
set(nvptx_arch "native")
19+
elseif(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
20+
set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH}")
21+
endif()
22+
23+
if(nvptx_arch AND CUDAToolkit_FOUND)
24+
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin")
25+
add_custom_command(
26+
OUTPUT ${output_file}
27+
COMMAND ${CMAKE_C_COMPILER}
28+
--target=nvptx64-nvidia-cuda -march=${nvptx_arch}
29+
-nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
30+
-c ${SRC_PATH} -o ${output_file}
31+
DEPENDS ${SRC_PATH}
32+
)
33+
add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file})
34+
endif()
35+
endif()
36+
37+
# Try to build with support for AMDGPU devices.
38+
if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
39+
check_cxx_compiler_flag("--target=amdgcn-amd-amdhsa -mcpu=native" PLATFORM_HAS_AMDGPU)
40+
41+
if(PLATFORM_HAS_AMDGPU)
42+
set(amdgpu_arch "native")
43+
elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
44+
set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}")
45+
endif()
46+
47+
if(amdgpu_arch)
48+
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
49+
add_custom_command(
50+
OUTPUT ${output_file}
51+
COMMAND ${CMAKE_C_COMPILER}
52+
--target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
53+
-nogpulib -flto ${ARGN} -c ${SRC_PATH} -o ${output_file}
54+
DEPENDS ${SRC_PATH}
55+
)
56+
add_custom_target(${test_name}.amdgpu DEPENDS ${output_file})
57+
endif()
58+
endif()
59+
60+
# Create a single dependency target for the device code.
61+
add_custom_target(${test_name}.bin)
62+
if(TARGET ${test_name}.amdgpu)
63+
add_dependencies(${test_name}.bin ${test_name}.amdgpu)
64+
endif()
65+
if(TARGET ${test_name}.nvptx64)
66+
add_dependencies(${test_name}.bin ${test_name}.nvptx64)
67+
endif()
68+
endfunction()
69+
470
function(add_offload_unittest test_dirname)
571
set(target_name "${test_dirname}.unittests")
672

@@ -9,10 +75,15 @@ function(add_offload_unittest test_dirname)
975
add_unittest(OffloadUnitTests "${target_name}"
1076
${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp
1177
${files})
12-
add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} OffloadUnitTestsDeviceBins)
78+
add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} offload_device_binaries)
1379
target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
1480
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
1581
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
1682
endfunction()
1783

84+
set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
85+
"Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
86+
set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
87+
"Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
88+
1889
add_subdirectory(OffloadAPI)
Lines changed: 2 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,7 @@
1-
macro(add_offload_test_device_code test_filename test_name)
2-
set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
3-
4-
# Build for NVPTX
5-
if(OFFLOAD_TEST_TARGET_NVIDIA)
6-
set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin)
7-
add_custom_command(OUTPUT ${BIN_PATH}
8-
COMMAND
9-
${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
10-
${ARGN}
11-
-march=${LIBOMPTARGET_DEP_CUDA_ARCH}
12-
--cuda-path=${CUDA_ROOT}
13-
${SRC_PATH} -o ${BIN_PATH}
14-
DEPENDS ${SRC_PATH}
15-
)
16-
list(APPEND BIN_PATHS ${BIN_PATH})
17-
endif()
18-
19-
# Build for AMDGPU
20-
if(OFFLOAD_TEST_TARGET_AMDGPU)
21-
set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin)
22-
add_custom_command(OUTPUT ${BIN_PATH}
23-
COMMAND
24-
${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
25-
${ARGN}
26-
-mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
27-
${SRC_PATH} -o ${BIN_PATH}
28-
DEPENDS ${SRC_PATH}
29-
)
30-
list(APPEND BIN_PATHS ${BIN_PATH})
31-
endif()
32-
33-
# TODO: Build for host CPU
34-
endmacro()
35-
36-
37-
# Decide what device targets to build for. LibomptargetGetDependencies is
38-
# included at the top-level so the GPUs present on the system are already
39-
# detected.
40-
set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
41-
"Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
42-
set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
43-
"Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
44-
45-
find_package(CUDAToolkit QUIET)
46-
if(CUDAToolkit_FOUND)
47-
get_filename_component(CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
48-
endif()
49-
if (OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
50-
set(LIBOMPTARGET_DEP_CUDA_ARCH ${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH})
51-
set(OFFLOAD_TEST_TARGET_NVIDIA ON)
52-
elseif (LIBOMPTARGET_FOUND_NVIDIA_GPU AND CUDA_ROOT AND "cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
53-
set(OFFLOAD_TEST_TARGET_NVIDIA ON)
54-
endif()
55-
56-
if (OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
57-
set(LIBOMPTARGET_DEP_AMDGPU_ARCH ${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH})
58-
set(OFFLOAD_TEST_TARGET_AMDGPU ON)
59-
elseif (LIBOMPTARGET_FOUND_AMDGPU_GPU AND "amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
60-
list(GET LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST 0 LIBOMPTARGET_DEP_AMDGPU_ARCH)
61-
set(OFFLOAD_TEST_TARGET_AMDGPU ON)
62-
endif()
63-
641
add_offload_test_device_code(foo.c foo)
652
add_offload_test_device_code(bar.c bar)
66-
# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
67-
# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
3+
# Compile with optimizations to eliminate AMDGPU implicit arguments.
684
add_offload_test_device_code(noargs.c noargs -O3)
695

70-
add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
71-
6+
add_custom_target(offload_device_binaries DEPENDS foo.bin bar.bin noargs.bin)
727
set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)

0 commit comments

Comments
 (0)