From 46dca9b79425248d17f1d47cbaf7352f870de353 Mon Sep 17 00:00:00 2001 From: Lauren Chilutti <60401591+laurenchilutti@users.noreply.github.com> Date: Wed, 4 Oct 2023 13:09:13 -0400 Subject: [PATCH 01/16] Create Intel_Parallelworks_CI.yaml --- .github/workflows/Intel_Parallelworks_CI.yaml | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 .github/workflows/Intel_Parallelworks_CI.yaml diff --git a/.github/workflows/Intel_Parallelworks_CI.yaml b/.github/workflows/Intel_Parallelworks_CI.yaml new file mode 100644 index 0000000..6f144b5 --- /dev/null +++ b/.github/workflows/Intel_Parallelworks_CI.yaml @@ -0,0 +1,151 @@ +name: Compile SHiELD SOLO and run tests + +# This GitHub Action Workflow is running on the cloud devcimultiintel cluster +# The tests are run inside of a container with the following software/libraries: +# -intel: 2023.2.0 +# -hdf5: 1.14.0 +# -netcdf-c: 4.9.2 +# -netcdf-fortran: 4.6.0 +# -cmake +# -libyaml + +on: + pull_request: + branches: + - main + +jobs: + checkout: + runs-on: [self-hosted, devcimultiintel] + name: Checkout Code + steps: + # It can take a long time (5-15 minutes) to spinup nodes + # so this salloc will prompt 46 nodes to startup and stay active for 20 min + # this is enough nodes for the first 17 tests to run in parallel, and we + # have 17 runners configured. + - run: salloc --partition=p2 -N 46 -J SB_$GITHUB_REF sleep 20m & + - run: /contrib/fv3/SHiELD_build_CI/checkout.sh $GITHUB_REF + + build: + runs-on: [self-hosted,devcimultiintel] + name: SOLO SHiELD build + needs: [checkout] + strategy: + fail-fast: true + max-parallel: 3 + matrix: + runpath: [/contrib/fv3/SHiELD_build_CI/] + runscript: [swcompile.sh, nhcompile.sh, hydrocompile.sh] + steps: + - env: + RUNPATH: ${{ matrix.runpath }} + RUNSCRIPT: ${{ matrix.runscript }} + run: $RUNPATH/$RUNSCRIPT $GITHUB_REF + + test: + runs-on: [self-hosted, devcimultiintel] + name: SOLO SHiELD test suite + needs: [checkout, build] + strategy: + fail-fast: false + max-parallel: 17 + matrix: + runpath: [/contrib/fv3/SHiELD_build_CI/] + runscript: + # These are placed in order of largest to smallest jobs + #layout 8,8 needs 8 nodes on dvcimultiintel cluster + - C512r20.solo.superC.sh + - C768.sw.BTwave.sh + #layout 4,8 needs 4 nodes on dvcimultiintel cluster + - C256r20.solo.superC.sh + - C384.sw.BLvortex.sh + #layout 4,4 needs 2 nodes on dvcimultiintel cluster + - C128r20.solo.superC.sh + - C128r3.solo.TC.d1.sh + - C128r3.solo.TC.h6.sh + - C128r3.solo.TC.sh + - C128r3.solo.TC.tr8.sh + - C192.sw.BLvortex.sh + - C192.sw.BTwave.sh + - C192.sw.modon.sh + - C384.sw.BTwave.sh + #layout 4,1 and 2,2 need 1 node on dvcimultiintel cluster + - C96.solo.BCdry.hyd.sh + - C96.solo.BCdry.sh + - C96.solo.BCmoist.hyd.d3.sh + - C96.solo.BCmoist.hyd.sh + - C96.solo.BCmoist.nhK.sh + - C96.solo.BCmoist.sh + - C96.solo.mtn_rest.hyd.diff2.sh + - C96.solo.mtn_rest.hyd.sh + - C96.solo.mtn_rest.nonmono.diff2.sh + - C96.solo.mtn_rest.sh + - C96.sw.BLvortex.sh + - C96.sw.BTwave.sh + - C96.sw.modon.sh + - C96.sw.RHwave.sh + - d96_1k.solo.mtn_rest_shear.olddamp.sh + - d96_1k.solo.mtn_rest_shear.sh + - d96_1k.solo.mtn_schar.mono.sh + - d96_1k.solo.mtn_schar.sh + - d96_2k.solo.bubble.n0.sh + - d96_2k.solo.bubble.nhK.sh + - d96_2k.solo.bubble.sh + - d96_500m.solo.mtn_schar.sh + steps: + # This will end the slurm job started in the checkout job + - run: scancel -n $GITHUB_REF + - env: + RUNPATH: ${{ matrix.runpath }} + RUNSCRIPT: ${{ matrix.runscript }} + run: $RUNPATH/$RUNSCRIPT $GITHUB_REF + shutdown: + runs-on: [self-hosted, devcimultiintel] + name: Shutdown Processes + if: always() + needs: [checkout, build, test] + strategy: + fail-fast: false + max-parallel: 17 + matrix: + test: + - C512r20.solo.superC + - C768.sw.BTwave + - C256r20.solo.superC + - C384.sw.BLvortex + - C128r20.solo.superC + - C128r3.solo.TC.d1 + - C128r3.solo.TC.h6 + - C128r3.solo.TC + - C128r3.solo.TC.tr8 + - C192.sw.BLvortex + - C192.sw.BTwave + - C192.sw.modon + - C384.sw.BTwave + - C96.solo.BCdry.hyd + - C96.solo.BCdry + - C96.solo.BCmoist.hyd.d3 + - C96.solo.BCmoist.hyd + - C96.solo.BCmoist.nhK + - C96.solo.BCmoist + - C96.solo.mtn_rest.hyd.diff2 + - C96.solo.mtn_rest.hyd + - C96.solo.mtn_rest.nonmono.diff2 + - C96.solo.mtn_rest + - C96.sw.BLvortex + - C96.sw.BTwave + - C96.sw.modon + - C96.sw.RHwave + - d96_1k.solo.mtn_rest_shear.olddamp + - d96_1k.solo.mtn_rest_shear + - d96_1k.solo.mtn_schar.mono + - d96_1k.solo.mtn_schar + - d96_2k.solo.bubble.n0 + - d96_2k.solo.bubble.nhK + - d96_2k.solo.bubble + - d96_500m.solo.mtn_schar + steps: + - run: scancel -n SB_$GITHUB_REF + - env: + TEST: ${{ matrix.test }} + run: scancel -n SB_$GITHUB_REF_$TEST From efd8d8a031564b550a0975328679d80a992934a9 Mon Sep 17 00:00:00 2001 From: Lauren Chilutti <60401591+laurenchilutti@users.noreply.github.com> Date: Wed, 4 Oct 2023 14:00:05 -0400 Subject: [PATCH 02/16] Create daily_cleanup_parallelworks.yaml --- .../workflows/daily_cleanup_parallelworks.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/workflows/daily_cleanup_parallelworks.yaml diff --git a/.github/workflows/daily_cleanup_parallelworks.yaml b/.github/workflows/daily_cleanup_parallelworks.yaml new file mode 100644 index 0000000..7f89005 --- /dev/null +++ b/.github/workflows/daily_cleanup_parallelworks.yaml @@ -0,0 +1,17 @@ +name: Old Build Cleanup + +# This GitHub Action Workflow is runing on the devcimultiintel cluster +# This will delete all build directories older than 30 days +# Build directories are on the cloud at /contrib/fv3/2023.2.0 + +on: + schedule: + # run daily at midnight + - cron: '0 0 * * *' + +jobs: + delete: + runs-on: [self-hosted, devcimultiintel] + name: Delete Builds + steps: + - run: find /contrib/fv3/2023.2.0/SHiELD_build/refs/pull -maxdepth 1 -mindepth 1 -mtime +30 -type d -print -delete From edc1ac2596e46dc8e79a313141bb7499c2f7426f Mon Sep 17 00:00:00 2001 From: Lauren Chilutti <60401591+laurenchilutti@users.noreply.github.com> Date: Wed, 4 Oct 2023 15:15:36 -0400 Subject: [PATCH 03/16] Update Intel_Parallelworks_CI.yaml --- .github/workflows/Intel_Parallelworks_CI.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Intel_Parallelworks_CI.yaml b/.github/workflows/Intel_Parallelworks_CI.yaml index 6f144b5..d89e855 100644 --- a/.github/workflows/Intel_Parallelworks_CI.yaml +++ b/.github/workflows/Intel_Parallelworks_CI.yaml @@ -94,7 +94,7 @@ jobs: - d96_500m.solo.mtn_schar.sh steps: # This will end the slurm job started in the checkout job - - run: scancel -n $GITHUB_REF + - run: scancel -n SB_$GITHUB_REF - env: RUNPATH: ${{ matrix.runpath }} RUNSCRIPT: ${{ matrix.runscript }} From d34020145ac2a90fbc55943bfcc2fb3cea5ffa54 Mon Sep 17 00:00:00 2001 From: Lauren Chilutti <60401591+laurenchilutti@users.noreply.github.com> Date: Mon, 22 Apr 2024 10:38:54 -0400 Subject: [PATCH 04/16] Update Intel_Parallelworks_CI.yaml --- .github/workflows/Intel_Parallelworks_CI.yaml | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/.github/workflows/Intel_Parallelworks_CI.yaml b/.github/workflows/Intel_Parallelworks_CI.yaml index d89e855..a14ed78 100644 --- a/.github/workflows/Intel_Parallelworks_CI.yaml +++ b/.github/workflows/Intel_Parallelworks_CI.yaml @@ -13,21 +13,29 @@ on: pull_request: branches: - main - + +#this should cancel in progress ci runs for the same PR +#(e.g. a second commit on the same PR comes in while CI is still running) +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: checkout: - runs-on: [self-hosted, devcimultiintel] + if: github.repository == 'NOAA-GFDL/SHiELD_build' + runs-on: [shieldbuild] name: Checkout Code steps: # It can take a long time (5-15 minutes) to spinup nodes # so this salloc will prompt 46 nodes to startup and stay active for 20 min # this is enough nodes for the first 17 tests to run in parallel, and we # have 17 runners configured. - - run: salloc --partition=p2 -N 46 -J SB_$GITHUB_REF sleep 20m & - - run: /contrib/fv3/SHiELD_build_CI/checkout.sh $GITHUB_REF + - run: salloc --partition=p2 -N 46 -J SB_$GITHUB_SHA sleep 20m & + - run: /contrib/fv3/SHiELD_build_CI/checkout.sh $GITHUB_REF $GITHUB_SHA build: - runs-on: [self-hosted,devcimultiintel] + if: github.repository == 'NOAA-GFDL/SHiELD_build' + runs-on: [shieldbuild] name: SOLO SHiELD build needs: [checkout] strategy: @@ -40,10 +48,11 @@ jobs: - env: RUNPATH: ${{ matrix.runpath }} RUNSCRIPT: ${{ matrix.runscript }} - run: $RUNPATH/$RUNSCRIPT $GITHUB_REF + run: $RUNPATH/$RUNSCRIPT $GITHUB_REF $GITHUB_SHA test: - runs-on: [self-hosted, devcimultiintel] + if: github.repository == 'NOAA-GFDL/SHiELD_build' + runs-on: [shieldbuild] name: SOLO SHiELD test suite needs: [checkout, build] strategy: @@ -94,15 +103,15 @@ jobs: - d96_500m.solo.mtn_schar.sh steps: # This will end the slurm job started in the checkout job - - run: scancel -n SB_$GITHUB_REF + - run: scancel -n SB_$GITHUB_SHA - env: RUNPATH: ${{ matrix.runpath }} RUNSCRIPT: ${{ matrix.runscript }} - run: $RUNPATH/$RUNSCRIPT $GITHUB_REF + run: $RUNPATH/$RUNSCRIPT $GITHUB_REF $GITHUB_SHA shutdown: - runs-on: [self-hosted, devcimultiintel] + if: always() && github.repository == 'NOAA-GFDL/SHiELD_build' + runs-on: [shieldbuild] name: Shutdown Processes - if: always() needs: [checkout, build, test] strategy: fail-fast: false @@ -145,7 +154,7 @@ jobs: - d96_2k.solo.bubble - d96_500m.solo.mtn_schar steps: - - run: scancel -n SB_$GITHUB_REF + - run: scancel -n $GITHUB_SHA - env: - TEST: ${{ matrix.test }} - run: scancel -n SB_$GITHUB_REF_$TEST + JOB: ${{ github.sha }}_${{ matrix.test }} + run: scancel -n $JOB From d26092fc3cc04c1a6bb256b38e2ff32dc5eaae43 Mon Sep 17 00:00:00 2001 From: Lauren Chilutti <60401591+laurenchilutti@users.noreply.github.com> Date: Mon, 22 Apr 2024 11:25:21 -0400 Subject: [PATCH 05/16] Update daily_cleanup_parallelworks.yaml --- .github/workflows/daily_cleanup_parallelworks.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/daily_cleanup_parallelworks.yaml b/.github/workflows/daily_cleanup_parallelworks.yaml index 7f89005..6d0b852 100644 --- a/.github/workflows/daily_cleanup_parallelworks.yaml +++ b/.github/workflows/daily_cleanup_parallelworks.yaml @@ -11,7 +11,7 @@ on: jobs: delete: - runs-on: [self-hosted, devcimultiintel] + runs-on: [shieldbuild] name: Delete Builds steps: - run: find /contrib/fv3/2023.2.0/SHiELD_build/refs/pull -maxdepth 1 -mindepth 1 -mtime +30 -type d -print -delete From ea78a85e5a6aa68bb2feba0e8319a0bbc77edc0e Mon Sep 17 00:00:00 2001 From: Lauren Chilutti <60401591+laurenchilutti@users.noreply.github.com> Date: Mon, 22 Apr 2024 11:27:25 -0400 Subject: [PATCH 06/16] Update Intel_Parallelworks_CI.yaml --- .github/workflows/Intel_Parallelworks_CI.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/Intel_Parallelworks_CI.yaml b/.github/workflows/Intel_Parallelworks_CI.yaml index a14ed78..eccfaad 100644 --- a/.github/workflows/Intel_Parallelworks_CI.yaml +++ b/.github/workflows/Intel_Parallelworks_CI.yaml @@ -30,7 +30,7 @@ jobs: # so this salloc will prompt 46 nodes to startup and stay active for 20 min # this is enough nodes for the first 17 tests to run in parallel, and we # have 17 runners configured. - - run: salloc --partition=p2 -N 46 -J SB_$GITHUB_SHA sleep 20m & + - run: salloc --partition=p2 -N 46 -J $GITHUB_SHA sleep 20m & - run: /contrib/fv3/SHiELD_build_CI/checkout.sh $GITHUB_REF $GITHUB_SHA build: @@ -103,7 +103,7 @@ jobs: - d96_500m.solo.mtn_schar.sh steps: # This will end the slurm job started in the checkout job - - run: scancel -n SB_$GITHUB_SHA + - run: scancel -n $GITHUB_SHA - env: RUNPATH: ${{ matrix.runpath }} RUNSCRIPT: ${{ matrix.runscript }} From c9586898311776f21d4686b283bd093df62c8f21 Mon Sep 17 00:00:00 2001 From: Lauren Chilutti <60401591+laurenchilutti@users.noreply.github.com> Date: Wed, 24 Apr 2024 09:24:56 -0400 Subject: [PATCH 07/16] Update Intel_Parallelworks_CI.yaml --- .github/workflows/Intel_Parallelworks_CI.yaml | 86 +++++++++---------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/.github/workflows/Intel_Parallelworks_CI.yaml b/.github/workflows/Intel_Parallelworks_CI.yaml index eccfaad..ebeac15 100644 --- a/.github/workflows/Intel_Parallelworks_CI.yaml +++ b/.github/workflows/Intel_Parallelworks_CI.yaml @@ -59,55 +59,55 @@ jobs: fail-fast: false max-parallel: 17 matrix: - runpath: [/contrib/fv3/SHiELD_build_CI/] - runscript: + runscript: [/contrib/fv3/SHiELD_build_CI/run_test.sh] + argument: # These are placed in order of largest to smallest jobs - #layout 8,8 needs 8 nodes on dvcimultiintel cluster - - C512r20.solo.superC.sh - - C768.sw.BTwave.sh - #layout 4,8 needs 4 nodes on dvcimultiintel cluster - - C256r20.solo.superC.sh - - C384.sw.BLvortex.sh - #layout 4,4 needs 2 nodes on dvcimultiintel cluster - - C128r20.solo.superC.sh - - C128r3.solo.TC.d1.sh - - C128r3.solo.TC.h6.sh - - C128r3.solo.TC.sh - - C128r3.solo.TC.tr8.sh - - C192.sw.BLvortex.sh - - C192.sw.BTwave.sh - - C192.sw.modon.sh - - C384.sw.BTwave.sh - #layout 4,1 and 2,2 need 1 node on dvcimultiintel cluster - - C96.solo.BCdry.hyd.sh - - C96.solo.BCdry.sh - - C96.solo.BCmoist.hyd.d3.sh - - C96.solo.BCmoist.hyd.sh - - C96.solo.BCmoist.nhK.sh - - C96.solo.BCmoist.sh - - C96.solo.mtn_rest.hyd.diff2.sh - - C96.solo.mtn_rest.hyd.sh - - C96.solo.mtn_rest.nonmono.diff2.sh - - C96.solo.mtn_rest.sh - - C96.sw.BLvortex.sh - - C96.sw.BTwave.sh - - C96.sw.modon.sh - - C96.sw.RHwave.sh - - d96_1k.solo.mtn_rest_shear.olddamp.sh - - d96_1k.solo.mtn_rest_shear.sh - - d96_1k.solo.mtn_schar.mono.sh - - d96_1k.solo.mtn_schar.sh - - d96_2k.solo.bubble.n0.sh - - d96_2k.solo.bubble.nhK.sh - - d96_2k.solo.bubble.sh - - d96_500m.solo.mtn_schar.sh + #layout 8,8 needs 8 nodes + - C512r20.solo.superC + - C768.sw.BTwave + #layout 4,8 needs 4 nodes + - C256r20.solo.superC + - C384.sw.BLvortex + #layout 4,4 needs 2 nodes + - C128r20.solo.superC + - C128r3.solo.TC.d1 + - C128r3.solo.TC.h6 + - C128r3.solo.TC + - C128r3.solo.TC.tr8 + - C192.sw.BLvortex + - C192.sw.BTwave + - C192.sw.modon + - C384.sw.BTwave + #layout 4,1 and 2,2 need 1 node + - C96.solo.BCdry.hyd + - C96.solo.BCdry + - C96.solo.BCmoist.hyd.d3 + - C96.solo.BCmoist.hyd + - C96.solo.BCmoist.nhK + - C96.solo.BCmoist + - C96.solo.mtn_rest.hyd.diff2 + - C96.solo.mtn_rest.hyd + - C96.solo.mtn_rest.nonmono.diff2 + - C96.solo.mtn_rest + - C96.sw.BLvortex + - C96.sw.BTwave + - C96.sw.modon + - C96.sw.RHwave + - d96_1k.solo.mtn_rest_shear.olddamp + - d96_1k.solo.mtn_rest_shear + - d96_1k.solo.mtn_schar.mono + - d96_1k.solo.mtn_schar + - d96_2k.solo.bubble.n0 + - d96_2k.solo.bubble.nhK + - d96_2k.solo.bubble + - d96_500m.solo.mtn_schar steps: # This will end the slurm job started in the checkout job - run: scancel -n $GITHUB_SHA - env: - RUNPATH: ${{ matrix.runpath }} RUNSCRIPT: ${{ matrix.runscript }} - run: $RUNPATH/$RUNSCRIPT $GITHUB_REF $GITHUB_SHA + ARG1: ${{ matrix.argument }} + run: $RUNSCRIPT $ARG1 $GITHUB_REF $GITHUB_SHA shutdown: if: always() && github.repository == 'NOAA-GFDL/SHiELD_build' runs-on: [shieldbuild] From ed90515e6ba6db96adabbe8934a3b351a7b4132a Mon Sep 17 00:00:00 2001 From: Lauren Chilutti Date: Wed, 24 Apr 2024 14:08:28 +0000 Subject: [PATCH 08/16] Add Parallelworks CI scripts to repo --- .github/.parallelworks/README.md | 6 ++ .github/.parallelworks/checkout.sh | 88 ++++++++++++++++++++++++++ .github/.parallelworks/hydrocompile.sh | 39 ++++++++++++ .github/.parallelworks/nhcompile.sh | 39 ++++++++++++ .github/.parallelworks/run_test.sh | 80 +++++++++++++++++++++++ .github/.parallelworks/swcompile.sh | 40 ++++++++++++ 6 files changed, 292 insertions(+) create mode 100644 .github/.parallelworks/README.md create mode 100755 .github/.parallelworks/checkout.sh create mode 100755 .github/.parallelworks/hydrocompile.sh create mode 100755 .github/.parallelworks/nhcompile.sh create mode 100755 .github/.parallelworks/run_test.sh create mode 100755 .github/.parallelworks/swcompile.sh diff --git a/.github/.parallelworks/README.md b/.github/.parallelworks/README.md new file mode 100644 index 0000000..4f1dfe6 --- /dev/null +++ b/.github/.parallelworks/README.md @@ -0,0 +1,6 @@ +# .parallelworks Directory + +The .parallelworks directory stores the CI scripts that reside on Parallelworks +These scripts are executed via the GitHub Actions Workflows in .github/workflows + +On Parallelworks these scripts are installed at: /contrib/fv3/SHiELD_build_CI diff --git a/.github/.parallelworks/checkout.sh b/.github/.parallelworks/checkout.sh new file mode 100755 index 0000000..eadf414 --- /dev/null +++ b/.github/.parallelworks/checkout.sh @@ -0,0 +1,88 @@ +#!/bin/sh -xe + +############################################################################## +## User set up variables +## Root directory for CI +dirRoot=/contrib/fv3 +## Intel version to be used +intelVersion=2023.2.0 +############################################################################## +## HPC-ME container +container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif +container_env_script=/contrib/containers/load_spack_noaa-intel.sh +############################################################################## +## Set up the directories +# First argument should be $GITHUB_REF which is the reference to the PR/branch +# to be checked out for SHiELD_build +if [ -z "$1" ] + then + echo "No branch/PR supplied; using main" + branch=main + else + echo Branch is ${1} + branch=${1} +fi +# Second Argument should be $GITHUB_SHA which is the commit hash of the +# branch or PR to trigger the CI, if run manually, you do not need a 2nd +# argument. This is needed in the circumstance where a PR is created, +# then the CI triggers, and before that CI has finished, the developer +# pushes a newer commit which triggers a second round of CI. We would +# like unique directories so that both CI runs do not interfere. +if [ -z "$2" ] + then + echo "No second argument" + commit="" + else + echo Commit is ${2} + commit=${2} +fi + +testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} +logDir=${testDir}/log +export MODULESHOME=/usr/share/lmod/lmod +#Define External Libs path +export EXTERNAL_LIBS=${dirRoot}/${intelVersion}/SHiELD_build/externallibs +mkdir -p ${EXTERNAL_LIBS} +## create directories +rm -rf ${testDir} +mkdir -p ${logDir} +# salloc commands to start up +#2 tests layout 8,8 (16 nodes) +#2 tests layout 4,8 (8 nodes) +#9 tests layout 4,4 (18 nodes) +#5 tests layout 4,1 (5 nodes) +#17 tests layout 2,2 (17 nodes) +#salloc --partition=p2 -N 64 -J ${branch} sleep 20m & + +## clone code +cd ${testDir} +git clone --recursive https://github.com/NOAA-GFDL/SHiELD_build.git +## Check out the PR +cd ${testDir}/SHiELD_build && git fetch origin ${branch}:toMerge && git merge toMerge + +##checkout components +cd ${testDir}/SHiELD_build && ./CHECKOUT_code +#Check if we already have FMS compiled +grep -m 1 "fms_release" ${testDir}/SHiELD_build/CHECKOUT_code > ${logDir}/release.txt +source ${logDir}/release.txt +echo ${fms_release} +echo `cat ${EXTERNAL_LIBS}/FMSversion` +if [[ ${fms_release} != `cat ${EXTERNAL_LIBS}/FMSversion` ]] + then + #remove libFMS if it exists + if [ -d $EXTERNAL_LIBS/libFMS ] + then + rm -rf $EXTERNAL_LIBS/libFMS + fi + if [ -e $EXTERNAL_LIBS/FMSversion ] + then + rm $EXTERNAL_LIBS/FMSversion + fi + echo $fms_release > $EXTERNAL_LIBS/FMSversion + echo $container > $EXTERNAL_LIBS/FMScontainerversion + echo $container_env_script >> $EXTERNAL_LIBS/FMScontainerversion + # Build FMS + cd ${testDir}/SHiELD_build/Build + set -o pipefail + singularity exec -B /contrib ${container} ${container_env_script} "./BUILDlibfms intel" + fi diff --git a/.github/.parallelworks/hydrocompile.sh b/.github/.parallelworks/hydrocompile.sh new file mode 100755 index 0000000..6c3fb7c --- /dev/null +++ b/.github/.parallelworks/hydrocompile.sh @@ -0,0 +1,39 @@ +#!/bin/sh -xe + +############################################################################## +## User set up variables +## Root directory for CI +dirRoot=/contrib/fv3 +## Intel version to be used +intelVersion=2023.2.0 +############################################################################## +## HPC-ME container +container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif +container_env_script=/contrib/containers/load_spack_noaa-intel.sh +############################################################################## +## Set up the directories +if [ -z "$1" ] + then + echo "No branch/PR supplied; using main" + branch=main + else + echo Branch is ${1} + branch=${1} +fi +if [ -z "$2" ] + then + echo "No second argument" + commit=none + else + echo Commit is ${2} + commit=${2} +fi +testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} +logDir=${testDir}/log +# Set up build +cd ${testDir}/SHiELD_build/Build +#Define External Libs path +export EXTERNAL_LIBS=${dirRoot}/externallibs +# Build SHiELD +set -o pipefail +singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE solo hydro 64bit repro intel clean" diff --git a/.github/.parallelworks/nhcompile.sh b/.github/.parallelworks/nhcompile.sh new file mode 100755 index 0000000..b50c837 --- /dev/null +++ b/.github/.parallelworks/nhcompile.sh @@ -0,0 +1,39 @@ +#!/bin/sh -xe + +############################################################################## +## User set up variables +## Root directory for CI +dirRoot=/contrib/fv3 +## Intel version to be used +intelVersion=2023.2.0 +############################################################################## +## HPC-ME container +container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif +container_env_script=/contrib/containers/load_spack_noaa-intel.sh +############################################################################## +## Set up the directories +if [ -z "$1" ] + then + echo "No branch/PR supplied; using main" + branch=main + else + echo Branch is ${1} + branch=${1} +fi +if [ -z "$2" ] + then + echo "No second argument" + commit=none + else + echo Commit is ${2} + commit=${2} +fi +testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} +logDir=${testDir}/log +# Set up build +cd ${testDir}/SHiELD_build/Build +#Define External Libs path +export EXTERNAL_LIBS=${dirRoot}/externallibs +# Build SHiELD +set -o pipefail +singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE solo nh 64bit repro intel clean" diff --git a/.github/.parallelworks/run_test.sh b/.github/.parallelworks/run_test.sh new file mode 100755 index 0000000..a4e9d02 --- /dev/null +++ b/.github/.parallelworks/run_test.sh @@ -0,0 +1,80 @@ +#!/bin/bash -xe +ulimit -s unlimited +############################################################################## +## User set up veriables +## Root directory for CI +dirRoot=/contrib/fv3 +## Intel version to be used +intelVersion=2023.2.0 +############################################################################## +## HPC-ME container +container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif +container_env_script=/contrib/containers/load_spack_noaa-intel.sh + +##Parse Arguments +#first argument should be the name of the test and is mandatory +if [ -z "$1" ] + then + echo "Please run this script with an argument indicating what test to run. For example:" + echo "./run_test.sh C128r20.solo.superC" + else + echo Test is ${1} + test=${1} +fi +#second argument is the branch name. This is optional. Default is main branch if none supplied +if [ -z "$2" ] + then + echo "No branch supplied; using main" + branch=main + else + echo Branch is ${2} + branch=${2} +fi +#third argument is the commit hash if running from CI. This is optional +if [ -z "$3" ] + then + echo "No commit being used in file path" + commit="" + else + echo Commit is ${3} + commit=${3} +fi + +## Set up the directories +MODULESHOME=/usr/share/lmod/lmod +testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} +logDir=${testDir}/log +baselineDir=${dirRoot}/baselines/intel/${intelVersion} + +## Run the CI Test +# Define the builddir testscriptdir and rundir +# Set the BUILDDIR for the test script to use +export BUILDDIR="${testDir}/SHiELD_build" +testscriptDir=${BUILDDIR}/RTS/CI +runDir=${BUILDDIR}/CI/BATCH-CI + +# Run CI test scripts +cd ${testscriptDir} +set -o pipefail +# Execute the test piping output to log file +./${test} " --partition=p2 --mpi=pmi2 --job-name=${commit}_${test} singularity exec -B /contrib ${container} ${container_env_script}" |& tee ${logDir}/run_${test}.log + +## Compare Restarts to Baseline +#The following tests are not expectred to have run-to-run reproducibility: +#d96_2k.solo.bubble +#d96_2k.solo.bubble.n0 +#d96_2k.solo.bubble.nhK +if [[ ${test} == "d96_2k.solo.bubble" || ${test} == "d96_2k.solo.bubble.n0" || ${test} == "d96_2k.solo.bubble.nhK" ]] + then + echo "${test} is not expected to reproduce so answers were not compared" + else + source $MODULESHOME/init/sh + export MODULEPATH=/mnt/shared/manual_modules:/usr/share/modulefiles/Linux:/usr/share/modulefiles/Core:/usr/share/lmod/lmod/modulefiles/Core:/apps/modules/modulefiles:/apps/modules/modulefamilies/intel + module load intel/2022.1.2 + module load netcdf + module load nccmp + for resFile in `ls ${baselineDir}/${test}` + do + nccmp -d ${baselineDir}/${test}/${resFile} ${runDir}/${test}/RESTART/${resFile} + done +fi diff --git a/.github/.parallelworks/swcompile.sh b/.github/.parallelworks/swcompile.sh new file mode 100755 index 0000000..4d1582f --- /dev/null +++ b/.github/.parallelworks/swcompile.sh @@ -0,0 +1,40 @@ +#!/bin/sh -xe + +############################################################################## +## User set up variables +## Root directory for CI +dirRoot=/contrib/fv3 +## Intel version to be used +intelVersion=2023.2.0 +############################################################################## +## HPC-ME container +container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif +container_env_script=/contrib/containers/load_spack_noaa-intel.sh +############################################################################## +## Set up the directories +if [ -z "$1" ] + then + echo "No branch/PR supplied; using main" + branch=main + else + echo Branch is ${1} + branch=${1} +fi +if [ -z "$2" ] + then + echo "No second argument" + commit=none + else + echo Commit is ${2} + commit=${2} +fi +testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} +logDir=${testDir}/log +# Set up build +cd ${testDir}/SHiELD_build/Build +#Define External Libs path +export EXTERNAL_LIBS=${dirRoot}/externallibs +# Build SHiELD +set -o pipefail +singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE solo sw 64bit repro intel clean" + From 8cf968520d13d63097579ca1c0e07973440ade8a Mon Sep 17 00:00:00 2001 From: Lauren Chilutti <60401591+laurenchilutti@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:55:47 -0400 Subject: [PATCH 09/16] Create Intel_Parallelworks_multbuild.yaml --- .../Intel_Parallelworks_multbuild.yaml | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 .github/workflows/Intel_Parallelworks_multbuild.yaml diff --git a/.github/workflows/Intel_Parallelworks_multbuild.yaml b/.github/workflows/Intel_Parallelworks_multbuild.yaml new file mode 100644 index 0000000..cab3867 --- /dev/null +++ b/.github/workflows/Intel_Parallelworks_multbuild.yaml @@ -0,0 +1,46 @@ +name: Test Multiple Compiles + +# This GitHub Action Workflow is running on the cloud shieldbuildciintel cluster +# The tests are run inside of a container with the following software/libraries: +# -intel: 2023.2.0 +# -hdf5: 1.14.0 +# -netcdf-c: 4.9.2 +# -netcdf-fortran: 4.6.0 +# -cmake +# -libyaml + +on: + workflow_run: + workflows: ["Compile SHiELD SOLO and run tests"] + branches: [main] + types: + - completed + +#this should cancel in progress ci runs for the same PR +#(e.g. a second commit on the same PR comes in while CI is still running) +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + compile: + if: github.repository == 'NOAA-GFDL/SHiELD_build' + runs-on: [shieldbuild] + name: Compile SHiELD + strategy: + fail-fast: false + max-parallel: 17 + matrix: + config: [shield, solo] + hyrdo: [sw, nh, hydro] + comp: [prod, repro, debug] + bit: [32bit, 64bit] + steps: + - env: + container: /contrib/containers/noaa-intel-prototype_2023.09.25.sif + container_env_script: /contrib/containers/load_spack_noaa-intel.sh + - run: | + cd /contrib/fv3/2023.2.0/SHiELD_build/$GITHUB_REF/$GITHUB_SHA/SHiELD_build/Build + export EXTERNAL_LIBS=/contrib/fv3/2023.2.0/SHiELD_build/externallibs + singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE ${{ matrix.config }} ${{ matrix.hydro }} ${{ matrix.comp }} ${{ matrix.bit }} intel clean" + From 0e6f603134d7364dc94b5f7a68225c501dd36189 Mon Sep 17 00:00:00 2001 From: Lauren Chilutti Date: Wed, 24 Apr 2024 14:58:16 +0000 Subject: [PATCH 10/16] update path to external libs --- .github/.parallelworks/hydrocompile.sh | 2 +- .github/.parallelworks/nhcompile.sh | 2 +- .github/.parallelworks/swcompile.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/.parallelworks/hydrocompile.sh b/.github/.parallelworks/hydrocompile.sh index 6c3fb7c..355f708 100755 --- a/.github/.parallelworks/hydrocompile.sh +++ b/.github/.parallelworks/hydrocompile.sh @@ -33,7 +33,7 @@ logDir=${testDir}/log # Set up build cd ${testDir}/SHiELD_build/Build #Define External Libs path -export EXTERNAL_LIBS=${dirRoot}/externallibs +export EXTERNAL_LIBS=${dirRoot}/${intelVersion}/SHiELD_build/externallibs # Build SHiELD set -o pipefail singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE solo hydro 64bit repro intel clean" diff --git a/.github/.parallelworks/nhcompile.sh b/.github/.parallelworks/nhcompile.sh index b50c837..187cfda 100755 --- a/.github/.parallelworks/nhcompile.sh +++ b/.github/.parallelworks/nhcompile.sh @@ -33,7 +33,7 @@ logDir=${testDir}/log # Set up build cd ${testDir}/SHiELD_build/Build #Define External Libs path -export EXTERNAL_LIBS=${dirRoot}/externallibs +export EXTERNAL_LIBS=${dirRoot}/${intelVersion}/SHiELD_build/externallibs # Build SHiELD set -o pipefail singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE solo nh 64bit repro intel clean" diff --git a/.github/.parallelworks/swcompile.sh b/.github/.parallelworks/swcompile.sh index 4d1582f..0cc44e5 100755 --- a/.github/.parallelworks/swcompile.sh +++ b/.github/.parallelworks/swcompile.sh @@ -33,7 +33,7 @@ logDir=${testDir}/log # Set up build cd ${testDir}/SHiELD_build/Build #Define External Libs path -export EXTERNAL_LIBS=${dirRoot}/externallibs +export EXTERNAL_LIBS=${dirRoot}/${intelVersion}/SHiELD_build/externallibs # Build SHiELD set -o pipefail singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE solo sw 64bit repro intel clean" From 97e694eef8c69e61a7f508715c5f5e988d2e94f9 Mon Sep 17 00:00:00 2001 From: Lauren Chilutti Date: Wed, 24 Apr 2024 16:50:49 +0000 Subject: [PATCH 11/16] compile with a matrix of options and cleanup arument parsing --- .github/.parallelworks/checkout.sh | 52 ++++++------- .github/.parallelworks/compile.sh | 75 +++++++++++++++++++ .github/.parallelworks/hydrocompile.sh | 39 ---------- .github/.parallelworks/nhcompile.sh | 39 ---------- .github/.parallelworks/run_test.sh | 71 ++++++++++-------- .github/.parallelworks/swcompile.sh | 40 ---------- .github/workflows/Intel_Parallelworks_CI.yaml | 18 +++-- .../Intel_Parallelworks_multbuild.yaml | 46 ------------ 8 files changed, 154 insertions(+), 226 deletions(-) create mode 100755 .github/.parallelworks/compile.sh delete mode 100755 .github/.parallelworks/hydrocompile.sh delete mode 100755 .github/.parallelworks/nhcompile.sh delete mode 100755 .github/.parallelworks/swcompile.sh delete mode 100644 .github/workflows/Intel_Parallelworks_multbuild.yaml diff --git a/.github/.parallelworks/checkout.sh b/.github/.parallelworks/checkout.sh index eadf414..dfae993 100755 --- a/.github/.parallelworks/checkout.sh +++ b/.github/.parallelworks/checkout.sh @@ -11,32 +11,34 @@ intelVersion=2023.2.0 container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif container_env_script=/contrib/containers/load_spack_noaa-intel.sh ############################################################################## -## Set up the directories -# First argument should be $GITHUB_REF which is the reference to the PR/branch -# to be checked out for SHiELD_build -if [ -z "$1" ] - then - echo "No branch/PR supplied; using main" - branch=main - else - echo Branch is ${1} - branch=${1} -fi -# Second Argument should be $GITHUB_SHA which is the commit hash of the -# branch or PR to trigger the CI, if run manually, you do not need a 2nd -# argument. This is needed in the circumstance where a PR is created, -# then the CI triggers, and before that CI has finished, the developer -# pushes a newer commit which triggers a second round of CI. We would -# like unique directories so that both CI runs do not interfere. -if [ -z "$2" ] - then - echo "No second argument" - commit="" - else - echo Commit is ${2} - commit=${2} -fi +#Parse Arguments +branch=main +commit="" +while [[ $# -gt 0 ]]; do + case $1 in + -b|--branch) + branch="$2" + shift # past argument + shift # past value + ;; + -h|--hash) + commit="$2" + shift # past argument + shift # past value + ;; + *) + echo "unknown argument" + exit 1 + ;; + esac +done + +echo "branch is $branch" +echo "commit is $commit" + + +## Set up the directories testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} logDir=${testDir}/log export MODULESHOME=/usr/share/lmod/lmod diff --git a/.github/.parallelworks/compile.sh b/.github/.parallelworks/compile.sh new file mode 100755 index 0000000..1a98720 --- /dev/null +++ b/.github/.parallelworks/compile.sh @@ -0,0 +1,75 @@ +#!/bin/sh -xe + +############################################################################## +## User set up variables +## Root directory for CI +dirRoot=/contrib/fv3 +## Intel version to be used +intelVersion=2023.2.0 +############################################################################## +## HPC-ME container +container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif +container_env_script=/contrib/containers/load_spack_noaa-intel.sh +############################################################################## + +#Parse Arguments +branch=main +commit="" +while [[ $# -gt 0 ]]; do + case $1 in + -b|--branch) + branch="$2" + shift # past argument + shift # past value + ;; + -h|--hash) + commit="$2" + shift # past argument + shift # past value + ;; + -c|--config) + config="$2" + shift # past argument + shift # past value + -hy|--hydro) + hydro="$2" + shift # past argument + shift # past value + -b|--bit) + bit="$2" + shift # past argument + shift # past value + -m|--mode) + mode="$2" + shift # past argument + shift # past value + *) + echo "unknown argument" + exit 1 + ;; + esac +done + +if [ -z $mode | -z $bit | -z $hydro | -b $config ] + then + echo "must specify config, hydro, bit, and mode options for compile" + exit 1 +fi + +echo "branch is $branch" +echo "commit is $commit" +echo "mode is $mode" +echo "bit is $bit" +echo "hydro is $hydro" +echo "config is $config" + +## Set up the directories +testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} +logDir=${testDir}/log +# Set up build +cd ${testDir}/SHiELD_build/Build +#Define External Libs path +export EXTERNAL_LIBS=${dirRoot}/${intelVersion}/SHiELD_build/externallibs +# Build SHiELD +set -o pipefail +singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE ${config} ${hydro} ${bit} ${mode} intel clean" diff --git a/.github/.parallelworks/hydrocompile.sh b/.github/.parallelworks/hydrocompile.sh deleted file mode 100755 index 355f708..0000000 --- a/.github/.parallelworks/hydrocompile.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh -xe - -############################################################################## -## User set up variables -## Root directory for CI -dirRoot=/contrib/fv3 -## Intel version to be used -intelVersion=2023.2.0 -############################################################################## -## HPC-ME container -container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif -container_env_script=/contrib/containers/load_spack_noaa-intel.sh -############################################################################## -## Set up the directories -if [ -z "$1" ] - then - echo "No branch/PR supplied; using main" - branch=main - else - echo Branch is ${1} - branch=${1} -fi -if [ -z "$2" ] - then - echo "No second argument" - commit=none - else - echo Commit is ${2} - commit=${2} -fi -testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} -logDir=${testDir}/log -# Set up build -cd ${testDir}/SHiELD_build/Build -#Define External Libs path -export EXTERNAL_LIBS=${dirRoot}/${intelVersion}/SHiELD_build/externallibs -# Build SHiELD -set -o pipefail -singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE solo hydro 64bit repro intel clean" diff --git a/.github/.parallelworks/nhcompile.sh b/.github/.parallelworks/nhcompile.sh deleted file mode 100755 index 187cfda..0000000 --- a/.github/.parallelworks/nhcompile.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh -xe - -############################################################################## -## User set up variables -## Root directory for CI -dirRoot=/contrib/fv3 -## Intel version to be used -intelVersion=2023.2.0 -############################################################################## -## HPC-ME container -container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif -container_env_script=/contrib/containers/load_spack_noaa-intel.sh -############################################################################## -## Set up the directories -if [ -z "$1" ] - then - echo "No branch/PR supplied; using main" - branch=main - else - echo Branch is ${1} - branch=${1} -fi -if [ -z "$2" ] - then - echo "No second argument" - commit=none - else - echo Commit is ${2} - commit=${2} -fi -testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} -logDir=${testDir}/log -# Set up build -cd ${testDir}/SHiELD_build/Build -#Define External Libs path -export EXTERNAL_LIBS=${dirRoot}/${intelVersion}/SHiELD_build/externallibs -# Build SHiELD -set -o pipefail -singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE solo nh 64bit repro intel clean" diff --git a/.github/.parallelworks/run_test.sh b/.github/.parallelworks/run_test.sh index a4e9d02..c33fbc6 100755 --- a/.github/.parallelworks/run_test.sh +++ b/.github/.parallelworks/run_test.sh @@ -10,36 +10,45 @@ intelVersion=2023.2.0 ## HPC-ME container container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif container_env_script=/contrib/containers/load_spack_noaa-intel.sh +############################################################################## -##Parse Arguments -#first argument should be the name of the test and is mandatory -if [ -z "$1" ] - then - echo "Please run this script with an argument indicating what test to run. For example:" - echo "./run_test.sh C128r20.solo.superC" - else - echo Test is ${1} - test=${1} -fi -#second argument is the branch name. This is optional. Default is main branch if none supplied -if [ -z "$2" ] - then - echo "No branch supplied; using main" - branch=main - else - echo Branch is ${2} - branch=${2} -fi -#third argument is the commit hash if running from CI. This is optional -if [ -z "$3" ] +#Parse Arguments +branch=main +commit="" +while [[ $# -gt 0 ]]; do + case $1 in + -b|--branch) + branch="$2" + shift # past argument + shift # past value + ;; + -h|--hash) + commit="$2" + shift # past argument + shift # past value + ;; + -t|--test) + testname="$2" + shift # past argument + shift # past value + ;; + *) + echo "unknown argument" + exit 1 + ;; + esac +done + +if [ -z $testname ] then - echo "No commit being used in file path" - commit="" - else - echo Commit is ${3} - commit=${3} + echo "must specify a test name with -t" + exit 1 fi +echo "branch is $branch" +echo "commit is $commit" +echo "test is $testname" + ## Set up the directories MODULESHOME=/usr/share/lmod/lmod testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} @@ -57,24 +66,24 @@ runDir=${BUILDDIR}/CI/BATCH-CI cd ${testscriptDir} set -o pipefail # Execute the test piping output to log file -./${test} " --partition=p2 --mpi=pmi2 --job-name=${commit}_${test} singularity exec -B /contrib ${container} ${container_env_script}" |& tee ${logDir}/run_${test}.log +./${testname} " --partition=p2 --mpi=pmi2 --job-name=${commit}_${testname} singularity exec -B /contrib ${container} ${container_env_script}" |& tee ${logDir}/run_${testname}.log ## Compare Restarts to Baseline #The following tests are not expectred to have run-to-run reproducibility: #d96_2k.solo.bubble #d96_2k.solo.bubble.n0 #d96_2k.solo.bubble.nhK -if [[ ${test} == "d96_2k.solo.bubble" || ${test} == "d96_2k.solo.bubble.n0" || ${test} == "d96_2k.solo.bubble.nhK" ]] +if [[ ${testname} == "d96_2k.solo.bubble" || ${testname} == "d96_2k.solo.bubble.n0" || ${testname} == "d96_2k.solo.bubble.nhK" ]] then - echo "${test} is not expected to reproduce so answers were not compared" + echo "${testname} is not expected to reproduce so answers were not compared" else source $MODULESHOME/init/sh export MODULEPATH=/mnt/shared/manual_modules:/usr/share/modulefiles/Linux:/usr/share/modulefiles/Core:/usr/share/lmod/lmod/modulefiles/Core:/apps/modules/modulefiles:/apps/modules/modulefamilies/intel module load intel/2022.1.2 module load netcdf module load nccmp - for resFile in `ls ${baselineDir}/${test}` + for resFile in `ls ${baselineDir}/${testname}` do - nccmp -d ${baselineDir}/${test}/${resFile} ${runDir}/${test}/RESTART/${resFile} + nccmp -d ${baselineDir}/${testname}/${resFile} ${runDir}/${testname}/RESTART/${resFile} done fi diff --git a/.github/.parallelworks/swcompile.sh b/.github/.parallelworks/swcompile.sh deleted file mode 100755 index 0cc44e5..0000000 --- a/.github/.parallelworks/swcompile.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/sh -xe - -############################################################################## -## User set up variables -## Root directory for CI -dirRoot=/contrib/fv3 -## Intel version to be used -intelVersion=2023.2.0 -############################################################################## -## HPC-ME container -container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif -container_env_script=/contrib/containers/load_spack_noaa-intel.sh -############################################################################## -## Set up the directories -if [ -z "$1" ] - then - echo "No branch/PR supplied; using main" - branch=main - else - echo Branch is ${1} - branch=${1} -fi -if [ -z "$2" ] - then - echo "No second argument" - commit=none - else - echo Commit is ${2} - commit=${2} -fi -testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} -logDir=${testDir}/log -# Set up build -cd ${testDir}/SHiELD_build/Build -#Define External Libs path -export EXTERNAL_LIBS=${dirRoot}/${intelVersion}/SHiELD_build/externallibs -# Build SHiELD -set -o pipefail -singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE solo sw 64bit repro intel clean" - diff --git a/.github/workflows/Intel_Parallelworks_CI.yaml b/.github/workflows/Intel_Parallelworks_CI.yaml index ebeac15..1c74089 100644 --- a/.github/workflows/Intel_Parallelworks_CI.yaml +++ b/.github/workflows/Intel_Parallelworks_CI.yaml @@ -31,7 +31,7 @@ jobs: # this is enough nodes for the first 17 tests to run in parallel, and we # have 17 runners configured. - run: salloc --partition=p2 -N 46 -J $GITHUB_SHA sleep 20m & - - run: /contrib/fv3/SHiELD_build_CI/checkout.sh $GITHUB_REF $GITHUB_SHA + - run: /contrib/fv3/SHiELD_build_CI/checkout.sh -b $GITHUB_REF -h $GITHUB_SHA build: if: github.repository == 'NOAA-GFDL/SHiELD_build' @@ -42,13 +42,19 @@ jobs: fail-fast: true max-parallel: 3 matrix: - runpath: [/contrib/fv3/SHiELD_build_CI/] - runscript: [swcompile.sh, nhcompile.sh, hydrocompile.sh] + runscript: [/contrib/fv3/SHiELD_build_CI/compile.sh] + config: [shield, solo] + hydro: [sw, nh, hydro] + bit: [32bit, 64bit] + mode: [repro, prod, debug] steps: - env: - RUNPATH: ${{ matrix.runpath }} RUNSCRIPT: ${{ matrix.runscript }} - run: $RUNPATH/$RUNSCRIPT $GITHUB_REF $GITHUB_SHA + CONFIG: ${{ matrix.config }} + HYDRO: ${{ matrix.hydro }} + BIT: ${{ matrix.bit }} + MODE: ${{ matrix.mode }} + run: $RUNSCRIPT -b $GITHUB_REF -h $GITHUB_SHA -c $CONFIG -hy $HYDRO -b $BIT -m $MODE test: if: github.repository == 'NOAA-GFDL/SHiELD_build' @@ -107,7 +113,7 @@ jobs: - env: RUNSCRIPT: ${{ matrix.runscript }} ARG1: ${{ matrix.argument }} - run: $RUNSCRIPT $ARG1 $GITHUB_REF $GITHUB_SHA + run: $RUNSCRIPT -t $ARG1 -b $GITHUB_REF -h $GITHUB_SHA shutdown: if: always() && github.repository == 'NOAA-GFDL/SHiELD_build' runs-on: [shieldbuild] diff --git a/.github/workflows/Intel_Parallelworks_multbuild.yaml b/.github/workflows/Intel_Parallelworks_multbuild.yaml deleted file mode 100644 index cab3867..0000000 --- a/.github/workflows/Intel_Parallelworks_multbuild.yaml +++ /dev/null @@ -1,46 +0,0 @@ -name: Test Multiple Compiles - -# This GitHub Action Workflow is running on the cloud shieldbuildciintel cluster -# The tests are run inside of a container with the following software/libraries: -# -intel: 2023.2.0 -# -hdf5: 1.14.0 -# -netcdf-c: 4.9.2 -# -netcdf-fortran: 4.6.0 -# -cmake -# -libyaml - -on: - workflow_run: - workflows: ["Compile SHiELD SOLO and run tests"] - branches: [main] - types: - - completed - -#this should cancel in progress ci runs for the same PR -#(e.g. a second commit on the same PR comes in while CI is still running) -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - compile: - if: github.repository == 'NOAA-GFDL/SHiELD_build' - runs-on: [shieldbuild] - name: Compile SHiELD - strategy: - fail-fast: false - max-parallel: 17 - matrix: - config: [shield, solo] - hyrdo: [sw, nh, hydro] - comp: [prod, repro, debug] - bit: [32bit, 64bit] - steps: - - env: - container: /contrib/containers/noaa-intel-prototype_2023.09.25.sif - container_env_script: /contrib/containers/load_spack_noaa-intel.sh - - run: | - cd /contrib/fv3/2023.2.0/SHiELD_build/$GITHUB_REF/$GITHUB_SHA/SHiELD_build/Build - export EXTERNAL_LIBS=/contrib/fv3/2023.2.0/SHiELD_build/externallibs - singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE ${{ matrix.config }} ${{ matrix.hydro }} ${{ matrix.comp }} ${{ matrix.bit }} intel clean" - From 8609b67e748a860d571aaafee9f8b91fafc2b07b Mon Sep 17 00:00:00 2001 From: Lauren Chilutti Date: Wed, 24 Apr 2024 16:58:52 +0000 Subject: [PATCH 12/16] fix typos in compile.sh --- .github/.parallelworks/compile.sh | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/.github/.parallelworks/compile.sh b/.github/.parallelworks/compile.sh index 1a98720..c218c18 100755 --- a/.github/.parallelworks/compile.sh +++ b/.github/.parallelworks/compile.sh @@ -50,7 +50,7 @@ while [[ $# -gt 0 ]]; do esac done -if [ -z $mode | -z $bit | -z $hydro | -b $config ] +if [ -z $mode ] || [ -z $bit ] || [ -z $hydro ] || [ -z $config ] then echo "must specify config, hydro, bit, and mode options for compile" exit 1 @@ -63,13 +63,18 @@ echo "bit is $bit" echo "hydro is $hydro" echo "config is $config" -## Set up the directories -testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} -logDir=${testDir}/log -# Set up build -cd ${testDir}/SHiELD_build/Build -#Define External Libs path -export EXTERNAL_LIBS=${dirRoot}/${intelVersion}/SHiELD_build/externallibs -# Build SHiELD -set -o pipefail -singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE ${config} ${hydro} ${bit} ${mode} intel clean" +if [ $hydro = "sw" ] && [ $config = "shield" ] + then + echo "this combination should not be tested" + else + ## Set up the directories + testDir=${dirRoot}/${intelVersion}/SHiELD_build/${branch}/${commit} + logDir=${testDir}/log + # Set up build + cd ${testDir}/SHiELD_build/Build + #Define External Libs path + export EXTERNAL_LIBS=${dirRoot}/${intelVersion}/SHiELD_build/externallibs + # Build SHiELD + set -o pipefail + singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE ${config} ${hydro} ${bit} ${mode} intel clean" +fi From 5afbd70fc7f08d4aea7610fbdb98bd7e3a73358d Mon Sep 17 00:00:00 2001 From: Lauren Chilutti Date: Wed, 24 Apr 2024 17:18:44 +0000 Subject: [PATCH 13/16] fixing arg parsing --- .github/.parallelworks/compile.sh | 8 ++++++-- .github/workflows/Intel_Parallelworks_CI.yaml | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/.parallelworks/compile.sh b/.github/.parallelworks/compile.sh index c218c18..ca3707d 100755 --- a/.github/.parallelworks/compile.sh +++ b/.github/.parallelworks/compile.sh @@ -31,18 +31,22 @@ while [[ $# -gt 0 ]]; do config="$2" shift # past argument shift # past value - -hy|--hydro) + ;; + --hydro) hydro="$2" shift # past argument shift # past value - -b|--bit) + ;; + --bit) bit="$2" shift # past argument shift # past value + ;; -m|--mode) mode="$2" shift # past argument shift # past value + ;; *) echo "unknown argument" exit 1 diff --git a/.github/workflows/Intel_Parallelworks_CI.yaml b/.github/workflows/Intel_Parallelworks_CI.yaml index 1c74089..5d182a9 100644 --- a/.github/workflows/Intel_Parallelworks_CI.yaml +++ b/.github/workflows/Intel_Parallelworks_CI.yaml @@ -54,7 +54,7 @@ jobs: HYDRO: ${{ matrix.hydro }} BIT: ${{ matrix.bit }} MODE: ${{ matrix.mode }} - run: $RUNSCRIPT -b $GITHUB_REF -h $GITHUB_SHA -c $CONFIG -hy $HYDRO -b $BIT -m $MODE + run: $RUNSCRIPT -b $GITHUB_REF -h $GITHUB_SHA -c $CONFIG --hydro $HYDRO --bit $BIT -m $MODE test: if: github.repository == 'NOAA-GFDL/SHiELD_build' From 7b25873c8fdbeb2c955faffc6f4c2fcebb62fae3 Mon Sep 17 00:00:00 2001 From: kaiyuan-cheng <74800123+kaiyuan-cheng@users.noreply.github.com> Date: Tue, 6 Feb 2024 14:39:45 -0500 Subject: [PATCH 14/16] Update CHECKOUT_code --- CHECKOUT_code | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHECKOUT_code b/CHECKOUT_code index e77e1f7..4014378 100755 --- a/CHECKOUT_code +++ b/CHECKOUT_code @@ -35,8 +35,8 @@ release="main" fv3_release=$release phy_release=$release -fms_release="2023.02" -drivers_release="2023.02" +fms_release="2023.04" +drivers_release=$release git clone -b ${fv3_release} https://github.com/NOAA-GFDL/GFDL_atmos_cubed_sphere git clone -b ${phy_release} https://github.com/NOAA-GFDL/SHiELD_physics From 9a31076d5efc7a774ea7b949766661ae2e5938de Mon Sep 17 00:00:00 2001 From: Lauren Chilutti Date: Wed, 24 Apr 2024 17:47:05 +0000 Subject: [PATCH 15/16] update Intel_Parallelworks_CI.yaml to allow for up to 17 concurrent compiles. --- .github/workflows/Intel_Parallelworks_CI.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Intel_Parallelworks_CI.yaml b/.github/workflows/Intel_Parallelworks_CI.yaml index 5d182a9..794190f 100644 --- a/.github/workflows/Intel_Parallelworks_CI.yaml +++ b/.github/workflows/Intel_Parallelworks_CI.yaml @@ -40,7 +40,7 @@ jobs: needs: [checkout] strategy: fail-fast: true - max-parallel: 3 + max-parallel: 17 matrix: runscript: [/contrib/fv3/SHiELD_build_CI/compile.sh] config: [shield, solo] From ed636a04d4d66b05c7658893ba2da924feb6f330 Mon Sep 17 00:00:00 2001 From: Lauren Chilutti Date: Wed, 24 Apr 2024 19:05:13 +0000 Subject: [PATCH 16/16] updating all parallelworks CI files to be bash. --- .github/.parallelworks/checkout.sh | 2 +- .github/.parallelworks/compile.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/.parallelworks/checkout.sh b/.github/.parallelworks/checkout.sh index dfae993..8d991d9 100755 --- a/.github/.parallelworks/checkout.sh +++ b/.github/.parallelworks/checkout.sh @@ -1,4 +1,4 @@ -#!/bin/sh -xe +#!/bin/bash -xe ############################################################################## ## User set up variables diff --git a/.github/.parallelworks/compile.sh b/.github/.parallelworks/compile.sh index ca3707d..63e7852 100755 --- a/.github/.parallelworks/compile.sh +++ b/.github/.parallelworks/compile.sh @@ -1,4 +1,4 @@ -#!/bin/sh -xe +#!/bin/bash -xe ############################################################################## ## User set up variables