-
Notifications
You must be signed in to change notification settings - Fork 1
Building on Panther (ppc64le) with IBM XL compilers
IBM XL C/C++ (V13.1.5) and Fortran (V15.1.5).
Change Makefile.in
to:
# Which compiler to use
CC = xlc_r
# What optimization level to use
OPTFLAGS = -O
# What options to be used by the compiler
COPTIONS = -qsaveopt -qsimd=auto -qsmp=omp -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qhot=level=2 -qnoipa -qlibansi -qfloat=subnormals
# What options to be used by the loader
LDOPTIONS = $(COPTIONS)
# What archiving to use
AR = ar rv
# What to use for indexing the archive
RANLIB = ranlib
CC="nvcc -ccbin `which xlC_r`" CXX="nvcc -ccbin `which xlC_r`" ./configure --prefix=$CDS_HOME/ppc64le --enable-dependency-tracking --disable-cairo --disable-cpuid --disable-libxml2 --enable-static --disable-shared
Makefile
changes:
--- magma-2.2.0/Makefile 2016-11-21 01:21:13.000000000 +0000
+++ Makefile 2017-02-09 11:51:44.083563083 +0000
@@ -60,8 +60,8 @@
CXXFLAGS += -DHAVE_CUBLAS
# where testers look for MAGMA libraries
-RPATH = -Wl,-rpath,../lib
-RPATH2 = -Wl,-rpath,../../lib
+#RPATH = -Wl,-rpath,../lib
+#RPATH2 = -Wl,-rpath,../../lib
codegen = python tools/codegen.py
@@ -73,7 +73,7 @@
GPU_TARGET += sm20
endif
ifneq ($(findstring Kepler, $(GPU_TARGET)),)
- GPU_TARGET += sm30 sm35
+ GPU_TARGET += sm35
endif
ifneq ($(findstring Maxwell, $(GPU_TARGET)),)
GPU_TARGET += sm50 sm52
@@ -156,7 +156,7 @@
PTROBJ = control/sizeptr.$(o_ext)
PTREXEC = control/sizeptr
PTRSIZE = $(shell if [ -x $(PTREXEC) ]; then $(PTREXEC); else echo 8; fi)
-PTROPT = -Dmagma_devptr_t="integer(kind=$(PTRSIZE))"
+PTROPT = -WF,-Dmagma_devptr_t="integer(kind=$(PTRSIZE))"
$(PTREXEC): $(PTROBJ)
-$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
make.inc
:
#//////////////////////////////////////////////////////////////////////////////
# -- MAGMA (version 2.2.0) --
# Univ. of Tennessee, Knoxville
# Univ. of California, Berkeley
# Univ. of Colorado, Denver
# @date November 2016
#//////////////////////////////////////////////////////////////////////////////
# GPU_TARGET contains one or more of Fermi, Kepler, or Maxwell,
# to specify for which GPUs you want to compile MAGMA:
# Fermi - NVIDIA compute capability 2.x cards
# Kepler - NVIDIA compute capability 3.x cards
# Maxwell - NVIDIA compute capability 5.x cards
# The default is "Fermi Kepler".
# Note that NVIDIA no longer supports 1.x cards, as of CUDA 6.5.
# See http://developer.nvidia.com/cuda-gpus
#
GPU_TARGET ?= Kepler
# --------------------
# programs
CC = xlc_r
CXX = xlC_r
NVCC = nvcc -ccbin /gpfs/panther/local/apps/ibm/xlC/13.1.5/bin/xlC_r
FORT = xlf_r
ARCH = ar
ARCHFLAGS = cr
RANLIB = ranlib
# --------------------
# flags
# Use -fPIC to make shared (.so) and static (.a) library;
# can be commented out if making only static library.
#FPIC = -fPIC
CFLAGS = -O $(FPIC) -DNDEBUG -DNOCHANGE -qsaveopt -qsimd=auto -qsmp=omp -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qhot=level=2 -qnoipa -qlibansi -qfloat=subnormals
FFLAGS = -O $(FPIC) -qlistopt -qsaveopt -qthreaded -qnosave -qsimd=auto -qsmp=omp -qsclk=micro -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qstacktemp=-1 -qtbtable=full -qwarn64 -WF,-qfpp -WF,-qppsuborigarg -qhot=level=2 -qnoipa -qlibansi -qlibmpi -qfloat=subnormals -WF,-DNDEBUG -WF,-DNOCHANGE
F90FLAGS = -O $(FPIC) -qlistopt -qsaveopt -qthreaded -qnosave -qsimd=auto -qsmp=omp -qsclk=micro -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qstacktemp=-1 -qtbtable=full -qwarn64 -WF,-qfpp -WF,-qppsuborigarg -qhot=level=2 -qnoipa -qlibansi -qlibmpi -qfloat=subnormals -WF,-DNDEBUG -qlanglvl=extended
NVCCFLAGS = -O -DNDEBUG -DNOCHANGE -Xcompiler "-qsimd=auto" -Xcompiler "-qsmp=omp" -Xcompiler "-qarch=pwr8" -Xcompiler "-qtune=pwr8:smt8" -Xcompiler "-qcache=auto" -Xcompiler "-qmaxmem=-1" -Xcompiler "-qhot=level=2" -Xcompiler "-qnoipa" -Xcompiler "-qlibansi" -Xcompiler "-qfloat=subnormals"
LDFLAGS = $(CFLAGS)
# C++11 (gcc >= 4.7) is not required, but has benefits like atomic operations
CXXFLAGS := $(CFLAGS)
#CFLAGS += -qlanglvl=...
# --------------------
# libraries
# gcc with OpenBLAS (includes LAPACK)
LIB = -lesslsmp -lessl -llapack -lesslsmp -lessl -lrefblas -lxlsmp -lxlf90_r -lxlfmath
LIB += -lcublas -lcusparse -lcudart -lcudadevrt
# --------------------
# directories
# define library directories preferably in your environment, or here.
CUDADIR ?= /usr/local/cuda
-include make.check-cuda
LIBDIR = -L$(CUDADIR)/lib64 -L/usr/lib64 -L/gpfs/cds/local/SCD/jpf02/vxn61-jpf02/ppc64le/lapack/XL -L/gpfs/panther/local/apps/ibm/xlsmp/4.1.5/lib -L/gpfs/panther/local/apps/ibm/xlf/15.1.5/lib
INC = -I$(CUDADIR)/include
Library is build and many testing drivers work fine, but there are some OpenMP(/linkage?) problems with a particular one:
xlC_r -O -DNDEBUG -DNOCHANGE -qsaveopt -qsimd=auto -qsmp=omp -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qhot=level=2 -qnoipa -qlibansi -qfloat=subnormals -DHAVE_CUBLAS -DMIN_CUDA_ARCH=350 \
-o testing/testing_zheevd testing/testing_zheevd.o \
-L./testing -ltest \
-L./lib -lmagma \
-L./testing/lin -llapacktest \
-L/usr/local/cuda/lib64 -L/usr/lib64 -L/gpfs/cds/local/SCD/jpf02/vxn61-jpf02/ppc64le/lapack/XL -L/gpfs/panther/local/apps/ibm/xlsmp/4.1.5/lib -L/gpfs/panther/local/apps/ibm/xlf/15.1.5/lib -lesslsmp -lessl -llapack -lesslsmp -lessl -lrefblas -lxlsmp -lxlf90_r -lxlfmath -lcublas -lcusparse -lcudart -lcudadevrt
./lib/libmagma.a(dlaex3_m.o):(.bss+0x0): multiple definition of `__ompCritical::info'
./lib/libmagma.a(dlaex3.o):(.bss+0x0): first defined here
make: *** [testing/testing_zheevd] Error 1
Change configure.ac
to assume that HWLOC is present:
--- /gpfs/fairthorpe/local/SCD/jpf02/vxn61-jpf02/CDS/spral/configure.ac 2016-10-20 16:30:21.000000000 +0100
+++ configure.ac 2017-02-09 13:18:44.575643000 +0000
@@ -72,12 +72,13 @@
)
# Check for hwloc
-PKG_PROG_PKG_CONFIG # initialise $PKG_CONFIG
-PKG_CONFIG="$PKG_CONFIG --static" # we will be linking statically
-PKG_CHECK_MODULES([HWLOC], [hwloc],
- AC_DEFINE(HAVE_HWLOC,1,[Define if you have hwloc library]),
- AC_MSG_WARN([hwloc not supplied: cannot detect NUMA regions])
- )
+#PKG_PROG_PKG_CONFIG # initialise $PKG_CONFIG
+#PKG_CONFIG="$PKG_CONFIG --static" # we will be linking statically
+#PKG_CHECK_MODULES([HWLOC], [hwloc],
+# AC_DEFINE(HAVE_HWLOC,1,[Define if you have hwloc library]),
+# AC_MSG_WARN([hwloc not supplied: cannot detect NUMA regions])
+# )
+AC_DEFINE(HAVE_HWLOC,1,[Define if you have hwloc library])
AS_IF([test "x$NVCC" != x], [
SPRAL_NVCC_LIB
Fix Makefile.am
to avoid GNU OpenMP usage and some incompatible flags:
--- /gpfs/fairthorpe/local/SCD/jpf02/vxn61-jpf02/CDS/spral/Makefile.am 2016-10-20 16:30:21.000000000 +0100
+++ Makefile.am 2017-02-09 13:26:39.065125856 +0000
@@ -6,7 +6,7 @@
# NVCC setup
PTX_FLAGS = -v
#NVCCFLAGS = -Iinclude -arch=sm_20 -g -Xptxas="${PTX_FLAGS}"
-OPENMP_LIB = -lgomp # FIXME: autoconf this
+#OPENMP_LIB = -lgomp # FIXME: autoconf this
AM_NVCC_FLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src
AM_LD_FLAGS = -lcuda
NVCCLINK = \
@@ -16,8 +16,8 @@
# Include directory for standard C
AM_CFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src $(OPENMP_CFLAGS) \
- $(HWLOC_CFLAGS) -std=c99
-AM_CXXFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src -std=c++11 \
+ $(HWLOC_CFLAGS)
+AM_CXXFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src \
$(OPENMP_CXXFLAGS) $(GTG_INCLUDE) $(HWLOC_CFLAGS)
AM_FCFLAGS = $(OPENMP_CXXFLAGS) # assume CXX and FC use same flags...
If needed, configure
will ask for newer config.sub
and config.guess
, since ppc64le
architecture support is lacking from the tools currently installed on Panther.
Configure like this:
LDFLAGS="-L/usr/lib64 -L/gpfs/cds/local/SCD/jpf02/vxn61-jpf02/ppc64le/lib -L/gpfs/cds/local/SCD/jpf02/vxn61-jpf02/ppc64le/lapack/XL -L/gpfs/panther/local/apps/ibm/xlsmp/4.1.5/lib -L/gpfs/panther/local/apps/ibm/xlf/15.1.5/lib" LIBS="-lhwloc" CPPFLAGS="-I/gpfs/cds/local/SCD/jpf02/vxn61-jpf02/ppc64le/include" CC=xlc_r CFLAGS="-qsaveopt -qsimd=auto -qsmp=omp -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qhot=level=2 -qnoipa -qlibansi -qfloat=subnormals" CXX=xlC_r CXXFLAGS="-qlanglvl=extended1y -qsaveopt -qsimd=auto -qsmp=omp -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qhot=level=2 -qnoipa -qlibansi -qfloat=subnormals" F77=xlf_r FFLAGS="-qlistopt -qsaveopt -qthreaded -qnosave -qsimd=auto -qsmp=omp -qsclk=micro -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qstacktemp=-1 -qtbtable=full -qwarn64 -WF,-qfpp -WF,-qppsuborigarg -qassert=contiguous:refalign -qhot=level=2 -qnoipa -qlibansi -qlibmpi -qfloat=subnormals" FC=xlf2008_r FCFLAGS="-qlistopt -qsaveopt -qthreaded -qnosave -qsimd=auto -qsmp=omp -qsclk=micro -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qstacktemp=-1 -qtbtable=full -qwarn64 -WF,-qfpp -WF,-qppsuborigarg -qhot=level=2 -qnoipa -qlibansi -qlibmpi -qfloat=subnormals" NVCC="nvcc -ccbin `which xlC_r`" NVCCFLAGS='-arch sm_37 -Xcompiler "-qsimd=auto" -Xcompiler "-qsmp=omp" -Xcompiler "-qarch=pwr8" -Xcompiler "-qtune=pwr8:smt8" -Xcompiler "-qcache=auto" -Xcompiler "-qmaxmem=-1" -Xcompiler "-qhot=level=2" -Xcompiler "-qnoipa" -Xcompiler "-qlibansi" -Xcompiler "-qfloat=subnormals"' ./configure --prefix=$CDS_HOME/ppc64le --enable-dependency-tracking --with-blas="-lesslsmp -lessl -llapack -lesslsmp -lessl -lrefblas -lxlsmp -lxlf90_r -lxlfmath" --with-lapack="-lesslsmp -lessl -llapack -lesslsmp -lessl -lrefblas -lxlsmp -lxlf90_r -lxlfmath" --with-metis="-lmetis"
A couple of errors:
xlf2008_r -qlistopt -qsaveopt -qthreaded -qnosave -qsimd=auto -qsmp=omp -qsclk=micro -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qstacktemp=-1 -qtbtable=full -qwarn64 -WF,-qfpp -WF,-qppsuborigarg -qhot=level=2 -qnoipa -qlibansi -qlibmpi -qfloat=subnormals -c -o src/ssids/fkeep.o src/ssids/fkeep.f90
"src/ssids/fkeep.f90", line 93.24: 1514-391 (S) A variable in the SHARED clause must not be a pointee, or a THREADLOCAL common block variable.
"src/ssids/fkeep.f90", line 111.6: 1515-019 (S) Syntax is incorrect.
"src/ssids/fkeep.f90", line 147.6: 1515-019 (S) Syntax is incorrect.
** spral_ssids_fkeep === End of Compilation 1 ===
1501-511 Compilation failed for file fkeep.f90.
The first error: removing fkeep
from the shared
clause removes the error, although it should cause one, due to default(none)
...
The other errors relate to support for OpenMP taskgroup
-related commands not working properly...
...END OF XL USAGE FOR NOW...