Skip to content

Commit

Permalink
[jt774] (before merging upstream/master) improve logic of "if CUDA el…
Browse files Browse the repository at this point in the history
…se HIP else neither" in CODEGEN cudacpp.mk
  • Loading branch information
valassi committed Jan 24, 2024
1 parent 1a6496a commit d2e2f47
Showing 1 changed file with 108 additions and 120 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ UNAME_P := $(shell uname -p)
include ../../Source/make_opts
#-------------------------------------------------------------------------------

#=== Configure common compiler flags for C++ and CUDA
#=== Configure common compiler flags for C++ and CUDA/HIP

INCFLAGS = -I.
OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here
Expand Down Expand Up @@ -104,69 +104,73 @@ endif

#-------------------------------------------------------------------------------

CUDA_COMPILER_PATH := $(shell compiler="`which nvcc 2>/dev/null`" && while [ -L "$$compiler" ]; do compiler=`readlink "$$compiler"`; done && echo "$$compiler")
HIP_COMPILER_PATH := $(shell compiler="`which hipcc 2>/dev/null`" && while [ -L "$$compiler" ]; do compiler=`readlink "$$compiler"`; done && echo "$$compiler")

ifeq ($(findstring nvcc,$(CUDA_COMPILER_PATH)),nvcc)
#=== Configure the CUDA compiler

# If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505)
# This is because it is impossible to pass this to "GPUFLAGS += -ccbin <host-compiler>" below
ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache <host-compiler>" from outside
$(warning CUDA builds are not supported for multi-word CXX "$(CXX)")
override CUDA_HOME=disabled
endif

# If CUDA_HOME is not set, try to set it from the location of NVCC
ifndef CUDA_HOME
CUDA_HOME = $(patsubst %%bin/nvcc,%%,$(shell which nvcc 2>/dev/null))
$(warning CUDA_HOME was not set: using "$(CUDA_HOME)")
endif

# Set GPUCC as $(CUDA_HOME)/bin/nvcc if it exists
ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),)
GPUCC = $(CUDA_HOME)/bin/nvcc
USE_NVTX ?=-DUSE_NVTX
# See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html
# See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
# Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster).
# Embed device code for 70, and PTX for 70+.
# Export MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to use another value or list of values (see #533).
# Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity).
MADGRAPH_CUDA_ARCHITECTURE ?= 70
###CUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533
###CUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533
comma:=,
CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch))
CUINC = -I$(CUDA_HOME)/include/
CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here!
CUOPTFLAGS = -lineinfo
GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math
###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow
###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1)
GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h
# Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12)

CUBUILDRULEFLAGS = -Xcompiler -fPIC -c
CCBUILDRULEFLAGS = -Xcompiler -fPIC -c -x cu

CUDATESTFLAGS = -lcuda

else ifneq ($(origin REQUIRE_CUDA),undefined)
# If REQUIRE_CUDA is set but no cuda is found, stop here (e.g. for CI tests on GPU #443)
$(error No cuda installation found (set CUDA_HOME or make GPUCC visible in PATH))
else
# No cuda. Switch cuda compilation off and go to common random numbers in C++
$(warning CUDA_HOME is not set or is invalid: export CUDA_HOME to compile with cuda)
override GPUCC=
override USE_NVTX=
override CUINC=
override CURANDLIBFLAGS=
endif
#=== Configure the GPU compiler (CUDA or HIP)

# FIXME! (AV 24.01.2024)
# In the current implementation (without separate builds for C++ and CUDA/HIP), we first check for cudacc and hipcc in CUDA_HOME and HIP_HOME.
# If CUDA_HOME or HIP_HOME are not set, try to determine them from the path to cudacc and hipcc.
# While convoluted, this is currently necessary to allow disabling CUDA/HIP builds by setting CUDA_HOME or HIP_HOME to invalid paths.
# This will (probably?) be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775).

# If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA and HIP builds (issue #505)
# This is because it is impossible to pass this to "GPUFLAGS += -ccbin <host-compiler>" below
ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache <host-compiler>" from outside
$(warning CUDA and HIP builds are not supported for multi-word CXX "$(CXX)")
override CUDA_HOME=disabled
override HIP_HOME=disabled
endif

# If CUDA_HOME is not set, try to set it from the path to nvcc
ifndef CUDA_HOME
CUDA_HOME = $(patsubst %%bin/nvcc,%%,$(shell which nvcc 2>/dev/null))
$(warning CUDA_HOME was not set: using "$(CUDA_HOME)")
endif

# If HIP_HOME is not set, try to set it from the path to hipcc
ifndef HIP_HOME
HIP_HOME = $(patsubst %%bin/hipcc,%%,$(HIP_COMPILER_PATH))
$(warning HIP_HOME was not set: using "$(HIP_HOME)")
endif

# FIXME! (AV 24.01.2024)
# In the current implementation (without separate builds for C++ and CUDA/HIP),
# builds are performed for HIP only if CUDA is not found in the path.
# If both CUDA and HIP are installed, HIP builds can be triggered by unsetting CUDA_HOME.
# This will be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775).

#--- Option 1: CUDA exists -> use CUDA

# Set GPUCC as $(CUDA_HOME)/bin/nvcc if it exists
ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),)

GPUCC = $(CUDA_HOME)/bin/nvcc
USE_NVTX ?=-DUSE_NVTX
# See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html
# See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
# Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster).
# Embed device code for 70, and PTX for 70+.
# Export MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to use another value or list of values (see #533).
# Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity).
MADGRAPH_CUDA_ARCHITECTURE ?= 70
###CUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533
###CUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533
comma:=,
CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch))
CUINC = -I$(CUDA_HOME)/include/
CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here!
CUOPTFLAGS = -lineinfo
GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math
###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow
###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1)
GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h
# Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12)
CUBUILDRULEFLAGS = -Xcompiler -fPIC -c
CCBUILDRULEFLAGS = -Xcompiler -fPIC -c -x cu
CUDATESTFLAGS = -lcuda

# Set the host C++ compiler for GPUCC via "-ccbin <host-compiler>"
# (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported)
Expand All @@ -177,71 +181,55 @@ ifeq ($(findstring nvcc,$(CUDA_COMPILER_PATH)),nvcc)
GPUFLAGS += -allow-unsupported-compiler
endif

else ifeq ($(findstring hipcc,$(HIP_COMPILER_PATH)),hipcc)
#=== Configure the HIP compiler
else ifneq ($(origin REQUIRE_CUDA),undefined)

# If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA/HIP builds (issue #505)
# This is because it is impossible to pass this to "GPUFLAGS += -ccbin <host-compiler>" below
ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache <host-compiler>" from outside
$(warning HIP builds are not supported for multi-word CXX "$(CXX)")
override HIP_HOME=disabled
endif
# If REQUIRE_CUDA is set but no cuda is found, stop here (e.g. for CI tests on GPU #443)
$(error No cuda installation found (set CUDA_HOME or make GPUCC visible in PATH))

# If HIP_HOME is not set, try to set it from the location of GPUCC
ifndef HIP_HOME
HIP_HOME = $(patsubst %%bin/hipcc,%%,$(HIP_COMPILER_PATH))
$(warning HIP_HOME was not set: using "$(HIP_HOME)")
endif
#--- Option 2: CUDA does not exist, HIP exists -> use HIP

# Set GPUCC as $(HIP_HOME)/bin/hipcc if it exists
ifneq ($(wildcard $(HIP_HOME)/bin/hipcc),)
GPUCC = $(HIP_HOME)/bin/hipcc

# Should maybe find something equivelant to this in HIP
#USE_NVTX ?=-DUSE_NVTX

HIPARCHFLAGS = -target x86_64-linux-gnu --offload-arch=gfx90a
HIPINC = -I$(HIP_HOME)/include/

# -DHIP_FAST_MATH equivelant to -use_fast_math in HIP
# (But only for single precision line 208: https://rocm-developer-tools.github.io/HIP/hcc__detail_2math__functions_8h_source.html)
GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(HIPINC) $(HIPARCHFLAGS) -DHIP_FAST_MATH -DHIP_PLATFORM=amd -fPIC
###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow
GPUFLAGS += -std=c++17
# Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12)
###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12)

CUBUILDRULEFLAGS = -fPIC -c
CCBUILDRULEFLAGS = -fPIC -c

else ifneq ($(origin REQUIRE_HIP),undefined)
# If REQUIRE_HIP is set but no cuda is found, stop here (e.g. for CI tests on GPU #443)
$(error No hip installation found (set HIP_HOME or make GPUCC visible in PATH))
else
# No hip. Switch hip compilation off and go to common random numbers in C++
$(warning HIP_HOME is not set or is invalid: export HIP_HOME to compile with hip)
override GPUCC=
override USE_NVTX=
override CUINC=
override CURANDLIBFLAGS=
endif
# Set GPUCC as $(HIP_HOME)/bin/hipcc if it exists
else ifneq ($(wildcard $(HIP_HOME)/bin/hipcc),)

# Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504)
ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined)
GPUFLAGS += -allow-unsupported-compiler
endif
GPUCC = $(HIP_HOME)/bin/hipcc
#USE_NVTX ?=-DUSE_NVTX # should maybe find something equivalent to this in HIP?
HIPARCHFLAGS = -target x86_64-linux-gnu --offload-arch=gfx90a
HIPINC = -I$(HIP_HOME)/include/
# Note: -DHIP_FAST_MATH is equivalent to -use_fast_math in HIP
# (but only for single precision line 208: https://rocm-developer-tools.github.io/HIP/hcc__detail_2math__functions_8h_source.html)
GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(HIPINC) $(HIPARCHFLAGS) -DHIP_FAST_MATH -DHIP_PLATFORM=amd -fPIC
###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow
GPUFLAGS += -std=c++17
###GPUFLAGS+= --maxrregcount 255 # (AV: is this option valid on HIP and meaningful on AMD GPUs?)
CUBUILDRULEFLAGS = -fPIC -c
CCBUILDRULEFLAGS = -fPIC -c

else ifneq ($(origin REQUIRE_HIP),undefined)

# If REQUIRE_HIP is set but no HIP is found, stop here (e.g. for CI tests on GPU #443)
$(error No hip installation found (set HIP_HOME or make GPUCC visible in PATH))

#--- Option 3: CUDA does not exist, HIP does not exist -> switch off both CUDA and HIP

else

# No cudacc and no hipcc: switch CUDA and HIP compilation off and go to common random numbers in C++
$(warning CUDA_HOME is not set or is invalid: export CUDA_HOME to compile with cuda)
$(warning HIP_HOME is not set or is invalid: export HIP_HOME to compile with hip)
override GPUCC=
override USE_NVTX=
override CUINC=
override CURANDLIBFLAGS=

endif

# Export GPUCC (so that it can also be used in cudacpp_src.mk?)
export GPUCC
export GPUFLAGS

#-------------------------------------------------------------------------------

#=== Configure ccache for C++ and CUDA builds
#=== Configure ccache for C++ and CUDA/HIP builds

# Enable ccache if USECCACHE=1
ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1)
Expand All @@ -258,7 +246,7 @@ endif

#-------------------------------------------------------------------------------

#=== Configure PowerPC-specific compiler flags for C++ and CUDA
#=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP

# PowerPC-specific CXX compiler flags (being reviewed)
ifeq ($(UNAME_P),ppc64le)
Expand All @@ -274,7 +262,7 @@ else
######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto)
endif

# PowerPC-specific CUDA compiler flags (to be reviewed!)
# PowerPC-specific CUDA/HIP compiler flags (to be reviewed!)
ifeq ($(UNAME_P),ppc64le)
GPUFLAGS+= -Xcompiler -mno-float128
endif
Expand Down Expand Up @@ -360,7 +348,7 @@ export OMPFLAGS

#-------------------------------------------------------------------------------

#=== Set the CUDA/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD, RNDGEN
#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD, RNDGEN

# Set the build flags appropriate to OMPFLAGS
$(info OMPFLAGS=$(OMPFLAGS))
Expand Down

0 comments on commit d2e2f47

Please # to comment.