From a81adfc514138304636b93bbdb13ebbc18d8c109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Tue, 20 Dec 2022 15:45:37 +0100 Subject: [PATCH 1/2] update ingestion script for dataset tarballs --- scripts/ingest-tarball.sh | 55 ++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/scripts/ingest-tarball.sh b/scripts/ingest-tarball.sh index 8f66c0cb..5e6db948 100755 --- a/scripts/ingest-tarball.sh +++ b/scripts/ingest-tarball.sh @@ -1,26 +1,29 @@ #!/bin/bash # Ingest a tarball containing software, a compatibility layer, -# or init scripts to the EESSI CVMFS repository, and generate -# nested catalogs in a separate transaction. +# init scripts, or dataset(s) to the EESSI CVMFS repositories, +# and generate nested catalogs in a separate transaction, if necessary. # This script has to be run on a CVMFS publisher node. # This script assumes that the given tarball is named like: -# eessi--{compat,init,software}-[additional information]-.tar.gz -# It also assumes, and verifies, that the name of the top-level directory of the contents of the -# of the tarball matches , and that name of the second level should is either compat, init, or software. +# eessi--{compat,init,software,dataset}-[additional information]-.tar.gz +# For non-dataset tarballs it also assumes, and verifies, that the name of the top-level directory +# of the contents of the of the tarball matches , +# and that name of the second level is either compat, init, or software. # Only if it passes these checks, the tarball gets ingested to the base dir in the repository specified below. -repo=pilot.eessi-hpc.org -basedir=versions +repo="pilot.eessi-hpc.org" +basedir="versions" +data_repo="data.eessi-hpc.org" +data_basedir="/" decompress="gunzip -c" # list of supported architectures for compat and software layers declare -A archs=(["aarch64"]= ["ppc64le"]= ["riscv64"]= ["x86_64"]=) # list of supported operating systems for compat and software layers declare -A oss=(["linux"]= ["macos"]=) # list of supported tarball content types -declare -A content_types=(["compat"]= ["init"]= ["software"]=) +declare -A content_types=(["compat"]= ["init"]= ["software"]= ["dataset"]=) function echo_green() { @@ -52,6 +55,18 @@ function check_repo_vars() { fi } +function check_no_version() { + if [ -z "${version}" ] + then + error "EESSI version cannot be derived from the filename." + fi + + if [ "${version}" != "noversion" ] + then + error "Tarballs that do not belong to a specific stack (e.g. dataset tarballs) should have 'noversion' in its filename." + fi +} + function check_version() { if [ -z "${version}" ] then @@ -198,6 +213,12 @@ function ingest_compat_tarball() { } +function ingest_dataset_tarball() { + repo="${data_repo}" + basedir="${data_basedir}" + cvmfs_ingest_tarball +} + # Check if a tarball has been specified if [ "$#" -ne 1 ]; then @@ -219,8 +240,16 @@ tar_first_file=$(tar tf "${tar_file}" | head -n 1) tar_top_level_dir=$(echo "${tar_first_file}" | cut -d/ -f1) tar_contents_type_dir=$(echo "${tar_first_file}" | head -n 2 | tail -n 1 | cut -d/ -f2) -# Do some checks, and ingest the tarball -check_repo_vars -check_version -check_contents_type -ingest_${tar_contents_type_dir}_tarball +if [ "${contents_type_dir}" == "dataset" ] +then + # For dataset tarballs, we have to skip some checks or do some different checks, + # as they do not belong to a specific stack version + check_no_version +else + check_version + check_contents_type + check_repo_vars +fi + +# Finally, run the ingestion function for the specific type of tarball +ingest_${contents_type_dir}_tarball From 500b582e3a0b1ff7ed857d44fd3af2adca460780 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bob=20Dr=C3=B6ge?= Date: Tue, 20 Dec 2022 15:46:03 +0100 Subject: [PATCH 2/2] add tests for dataset tarballs --- scripts/test-ingest-tarball.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/test-ingest-tarball.sh b/scripts/test-ingest-tarball.sh index 202e9283..fcd1934d 100755 --- a/scripts/test-ingest-tarball.sh +++ b/scripts/test-ingest-tarball.sh @@ -48,6 +48,8 @@ tarballs_success=( "$tstdir/eessi-2000.01-compat-macos-x86_64-123456.tar.gz 2000.01 compat/macos/x86_64" "$tstdir/eessi-2000.01-init-123456.tar.gz 2000.01 init" "$tstdir/eessi-2000.01-software-123456.tar.gz 2000.01 software/linux/x86_64/intel/haswell" + "$tstdir/eessi-noversion-dataset-WRF-3.0-123456.tar.gz none none" + "$tstdir/eessi-noversion-dataset-WRF-and-GROMACS-123456.tar.gz none none" ) # Test that should return an error @@ -76,6 +78,10 @@ tarballs_fail=( # Invalid architecture "$tstdir/eessi-2000.01-compat-123456.tar.gz 2000.01 compat/linux/sparc" "$tstdir/eessi-2000.01-compat-123456.tar.gz 2000.01 compat" + # Wrong version for dataset + "$tstdir/eessi-undefinedversion-dataset-WRF-3.0-123456.tar.gz none none" + # Wrong content type for datasets + "$tstdir/eessi-undefinedversion-data-WRF-3.0-123456.tar.gz none none" ) # Run the tests that should succeed