Skip to content

Building on Panther (ppc64le) with IBM XL compilers

Vedran Novakovic edited this page Feb 10, 2017 · 1 revision

IBM XL C/C++ (V13.1.5) and Fortran (V15.1.5).

METIS

Change Makefile.in to:

# Which compiler to use
CC = xlc_r

# What optimization level to use
OPTFLAGS = -O

# What options to be used by the compiler
COPTIONS = -qsaveopt -qsimd=auto -qsmp=omp -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qhot=level=2 -qnoipa -qlibansi -qfloat=subnormals

# What options to be used by the loader
LDOPTIONS = $(COPTIONS)

# What archiving to use
AR = ar rv

# What to use for indexing the archive
RANLIB = ranlib

HWLOC

CC="nvcc -ccbin `which xlC_r`" CXX="nvcc -ccbin `which xlC_r`" ./configure --prefix=$CDS_HOME/ppc64le --enable-dependency-tracking --disable-cairo --disable-cpuid --disable-libxml2 --enable-static --disable-shared

MAGMA

Makefile changes:

--- magma-2.2.0/Makefile	2016-11-21 01:21:13.000000000 +0000
+++ Makefile	2017-02-09 11:51:44.083563083 +0000
@@ -60,8 +60,8 @@
 CXXFLAGS  += -DHAVE_CUBLAS
 
 # where testers look for MAGMA libraries
-RPATH      = -Wl,-rpath,../lib
-RPATH2     = -Wl,-rpath,../../lib
+#RPATH      = -Wl,-rpath,../lib
+#RPATH2     = -Wl,-rpath,../../lib
 
 codegen    = python tools/codegen.py
 
@@ -73,7 +73,7 @@
     GPU_TARGET += sm20
 endif
 ifneq ($(findstring Kepler, $(GPU_TARGET)),)
-    GPU_TARGET += sm30 sm35
+    GPU_TARGET += sm35
 endif
 ifneq ($(findstring Maxwell, $(GPU_TARGET)),)
     GPU_TARGET += sm50 sm52
@@ -156,7 +156,7 @@
 PTROBJ  = control/sizeptr.$(o_ext)
 PTREXEC = control/sizeptr
 PTRSIZE = $(shell if [ -x $(PTREXEC) ]; then $(PTREXEC); else echo 8; fi)
-PTROPT  = -Dmagma_devptr_t="integer(kind=$(PTRSIZE))"
+PTROPT  = -WF,-Dmagma_devptr_t="integer(kind=$(PTRSIZE))"
 
 $(PTREXEC): $(PTROBJ)
 	-$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<

make.inc:

#//////////////////////////////////////////////////////////////////////////////
#   -- MAGMA (version 2.2.0) --
#      Univ. of Tennessee, Knoxville
#      Univ. of California, Berkeley
#      Univ. of Colorado, Denver
#      @date November 2016
#//////////////////////////////////////////////////////////////////////////////

# GPU_TARGET contains one or more of Fermi, Kepler, or Maxwell,
# to specify for which GPUs you want to compile MAGMA:
#     Fermi   - NVIDIA compute capability 2.x cards
#     Kepler  - NVIDIA compute capability 3.x cards
#     Maxwell - NVIDIA compute capability 5.x cards
# The default is "Fermi Kepler".
# Note that NVIDIA no longer supports 1.x cards, as of CUDA 6.5.
# See http://developer.nvidia.com/cuda-gpus
#
GPU_TARGET ?= Kepler

# --------------------
# programs

CC        = xlc_r
CXX       = xlC_r
NVCC      = nvcc -ccbin /gpfs/panther/local/apps/ibm/xlC/13.1.5/bin/xlC_r
FORT      = xlf_r

ARCH      = ar
ARCHFLAGS = cr
RANLIB    = ranlib


# --------------------
# flags

# Use -fPIC to make shared (.so) and static (.a) library;
# can be commented out if making only static library.
#FPIC      = -fPIC

CFLAGS    = -O $(FPIC) -DNDEBUG -DNOCHANGE -qsaveopt -qsimd=auto -qsmp=omp -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qhot=level=2 -qnoipa -qlibansi -qfloat=subnormals
FFLAGS    = -O $(FPIC) -qlistopt -qsaveopt -qthreaded -qnosave -qsimd=auto -qsmp=omp -qsclk=micro -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qstacktemp=-1 -qtbtable=full -qwarn64 -WF,-qfpp -WF,-qppsuborigarg -qhot=level=2 -qnoipa -qlibansi -qlibmpi -qfloat=subnormals -WF,-DNDEBUG -WF,-DNOCHANGE
F90FLAGS  = -O $(FPIC) -qlistopt -qsaveopt -qthreaded -qnosave -qsimd=auto -qsmp=omp -qsclk=micro -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qstacktemp=-1 -qtbtable=full -qwarn64 -WF,-qfpp -WF,-qppsuborigarg -qhot=level=2 -qnoipa -qlibansi -qlibmpi -qfloat=subnormals -WF,-DNDEBUG -qlanglvl=extended
NVCCFLAGS = -O -DNDEBUG -DNOCHANGE -Xcompiler "-qsimd=auto" -Xcompiler "-qsmp=omp" -Xcompiler "-qarch=pwr8" -Xcompiler "-qtune=pwr8:smt8" -Xcompiler "-qcache=auto" -Xcompiler "-qmaxmem=-1" -Xcompiler "-qhot=level=2" -Xcompiler "-qnoipa" -Xcompiler "-qlibansi" -Xcompiler "-qfloat=subnormals"
LDFLAGS   = $(CFLAGS) 

# C++11 (gcc >= 4.7) is not required, but has benefits like atomic operations
CXXFLAGS := $(CFLAGS)
#CFLAGS   += -qlanglvl=...


# --------------------
# libraries

# gcc with OpenBLAS (includes LAPACK)
LIB       = -lesslsmp -lessl -llapack -lesslsmp -lessl -lrefblas -lxlsmp -lxlf90_r -lxlfmath
LIB      += -lcublas -lcusparse -lcudart -lcudadevrt


# --------------------
# directories

# define library directories preferably in your environment, or here.
CUDADIR ?= /usr/local/cuda
-include make.check-cuda

LIBDIR    = -L$(CUDADIR)/lib64 -L/usr/lib64 -L/gpfs/cds/local/SCD/jpf02/vxn61-jpf02/ppc64le/lapack/XL -L/gpfs/panther/local/apps/ibm/xlsmp/4.1.5/lib -L/gpfs/panther/local/apps/ibm/xlf/15.1.5/lib
INC       = -I$(CUDADIR)/include

Library is build and many testing drivers work fine, but there are some OpenMP(/linkage?) problems with a particular one:

xlC_r -O  -DNDEBUG -DNOCHANGE -qsaveopt -qsimd=auto -qsmp=omp -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qhot=level=2 -qnoipa -qlibansi -qfloat=subnormals -DHAVE_CUBLAS -DMIN_CUDA_ARCH=350   \
-o testing/testing_zheevd testing/testing_zheevd.o \
-L./testing -ltest \
-L./lib -lmagma \
-L./testing/lin -llapacktest \
-L/usr/local/cuda/lib64 -L/usr/lib64 -L/gpfs/cds/local/SCD/jpf02/vxn61-jpf02/ppc64le/lapack/XL -L/gpfs/panther/local/apps/ibm/xlsmp/4.1.5/lib -L/gpfs/panther/local/apps/ibm/xlf/15.1.5/lib -lesslsmp -lessl -llapack -lesslsmp -lessl -lrefblas -lxlsmp -lxlf90_r -lxlfmath -lcublas -lcusparse -lcudart -lcudadevrt
./lib/libmagma.a(dlaex3_m.o):(.bss+0x0): multiple definition of `__ompCritical::info'
./lib/libmagma.a(dlaex3.o):(.bss+0x0): first defined here
make: *** [testing/testing_zheevd] Error 1

SPRAL

Change configure.ac to assume that HWLOC is present:

--- /gpfs/fairthorpe/local/SCD/jpf02/vxn61-jpf02/CDS/spral/configure.ac	2016-10-20 16:30:21.000000000 +0100
+++ configure.ac	2017-02-09 13:18:44.575643000 +0000
@@ -72,12 +72,13 @@
    )
 
 # Check for hwloc
-PKG_PROG_PKG_CONFIG # initialise $PKG_CONFIG
-PKG_CONFIG="$PKG_CONFIG --static" # we will be linking statically
-PKG_CHECK_MODULES([HWLOC], [hwloc],
-   AC_DEFINE(HAVE_HWLOC,1,[Define if you have hwloc library]),
-   AC_MSG_WARN([hwloc not supplied: cannot detect NUMA regions])
-   )
+#PKG_PROG_PKG_CONFIG # initialise $PKG_CONFIG
+#PKG_CONFIG="$PKG_CONFIG --static" # we will be linking statically
+#PKG_CHECK_MODULES([HWLOC], [hwloc],
+#   AC_DEFINE(HAVE_HWLOC,1,[Define if you have hwloc library]),
+#   AC_MSG_WARN([hwloc not supplied: cannot detect NUMA regions])
+#   )
+AC_DEFINE(HAVE_HWLOC,1,[Define if you have hwloc library])
 
 AS_IF([test "x$NVCC" != x], [
    SPRAL_NVCC_LIB

Fix Makefile.am to avoid GNU OpenMP usage and some incompatible flags:

--- /gpfs/fairthorpe/local/SCD/jpf02/vxn61-jpf02/CDS/spral/Makefile.am	2016-10-20 16:30:21.000000000 +0100
+++ Makefile.am	2017-02-09 13:26:39.065125856 +0000
@@ -6,7 +6,7 @@
 # NVCC setup
 PTX_FLAGS = -v
 #NVCCFLAGS = -Iinclude -arch=sm_20 -g -Xptxas="${PTX_FLAGS}"
-OPENMP_LIB = -lgomp # FIXME: autoconf this
+#OPENMP_LIB = -lgomp # FIXME: autoconf this
 AM_NVCC_FLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src
 AM_LD_FLAGS = -lcuda
 NVCCLINK = \
@@ -16,8 +16,8 @@
 
 # Include directory for standard C
 AM_CFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src $(OPENMP_CFLAGS) \
-				$(HWLOC_CFLAGS) -std=c99
-AM_CXXFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src -std=c++11 \
+				$(HWLOC_CFLAGS)
+AM_CXXFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src \
 				  $(OPENMP_CXXFLAGS) $(GTG_INCLUDE) $(HWLOC_CFLAGS)
 AM_FCFLAGS = $(OPENMP_CXXFLAGS) # assume CXX and FC use same flags...

If needed, configure will ask for newer config.sub and config.guess, since ppc64le architecture support is lacking from the tools currently installed on Panther.

Configure like this:

LDFLAGS="-L/usr/lib64 -L/gpfs/cds/local/SCD/jpf02/vxn61-jpf02/ppc64le/lib -L/gpfs/cds/local/SCD/jpf02/vxn61-jpf02/ppc64le/lapack/XL -L/gpfs/panther/local/apps/ibm/xlsmp/4.1.5/lib -L/gpfs/panther/local/apps/ibm/xlf/15.1.5/lib" LIBS="-lhwloc" CPPFLAGS="-I/gpfs/cds/local/SCD/jpf02/vxn61-jpf02/ppc64le/include" CC=xlc_r CFLAGS="-qsaveopt -qsimd=auto -qsmp=omp -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qhot=level=2 -qnoipa -qlibansi -qfloat=subnormals" CXX=xlC_r CXXFLAGS="-qlanglvl=extended1y -qsaveopt -qsimd=auto -qsmp=omp -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qhot=level=2 -qnoipa -qlibansi -qfloat=subnormals" F77=xlf_r FFLAGS="-qlistopt -qsaveopt -qthreaded -qnosave -qsimd=auto -qsmp=omp -qsclk=micro -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qstacktemp=-1 -qtbtable=full -qwarn64 -WF,-qfpp -WF,-qppsuborigarg -qassert=contiguous:refalign -qhot=level=2 -qnoipa -qlibansi -qlibmpi -qfloat=subnormals" FC=xlf2008_r FCFLAGS="-qlistopt -qsaveopt -qthreaded -qnosave -qsimd=auto -qsmp=omp -qsclk=micro -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qstacktemp=-1 -qtbtable=full -qwarn64 -WF,-qfpp -WF,-qppsuborigarg -qhot=level=2 -qnoipa -qlibansi -qlibmpi -qfloat=subnormals" NVCC="nvcc -ccbin `which xlC_r`" NVCCFLAGS='-arch sm_37 -Xcompiler "-qsimd=auto" -Xcompiler "-qsmp=omp" -Xcompiler "-qarch=pwr8" -Xcompiler "-qtune=pwr8:smt8" -Xcompiler "-qcache=auto" -Xcompiler "-qmaxmem=-1" -Xcompiler "-qhot=level=2" -Xcompiler "-qnoipa" -Xcompiler "-qlibansi" -Xcompiler "-qfloat=subnormals"' ./configure --prefix=$CDS_HOME/ppc64le --enable-dependency-tracking --with-blas="-lesslsmp -lessl -llapack -lesslsmp -lessl -lrefblas -lxlsmp -lxlf90_r -lxlfmath" --with-lapack="-lesslsmp -lessl -llapack -lesslsmp -lessl -lrefblas -lxlsmp -lxlf90_r -lxlfmath" --with-metis="-lmetis"

A couple of errors:

xlf2008_r   -qlistopt -qsaveopt -qthreaded -qnosave -qsimd=auto -qsmp=omp -qsclk=micro -qarch=pwr8 -qtune=pwr8:smt8 -qcache=auto -qmaxmem=-1 -qstacktemp=-1 -qtbtable=full -qwarn64 -WF,-qfpp -WF,-qppsuborigarg -qhot=level=2 -qnoipa -qlibansi -qlibmpi -qfloat=subnormals -c -o src/ssids/fkeep.o src/ssids/fkeep.f90
"src/ssids/fkeep.f90", line 93.24: 1514-391 (S) A variable in the SHARED clause must not be a pointee, or a THREADLOCAL common block variable.
"src/ssids/fkeep.f90", line 111.6: 1515-019 (S) Syntax is incorrect.
"src/ssids/fkeep.f90", line 147.6: 1515-019 (S) Syntax is incorrect.
** spral_ssids_fkeep   === End of Compilation 1 ===
1501-511  Compilation failed for file fkeep.f90.

The first error: removing fkeep from the shared clause removes the error, although it should cause one, due to default(none)... The other errors relate to support for OpenMP taskgroup-related commands not working properly...

...END OF XL USAGE FOR NOW...