Skip to content

Add functionality for ingesting dataset tarballs #138

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 42 additions & 13 deletions scripts/ingest-tarball.sh
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
#!/bin/bash

# Ingest a tarball containing software, a compatibility layer,
# or init scripts to the EESSI CVMFS repository, and generate
# nested catalogs in a separate transaction.
# init scripts, or dataset(s) to the EESSI CVMFS repositories,
# and generate nested catalogs in a separate transaction, if necessary.
# This script has to be run on a CVMFS publisher node.

# This script assumes that the given tarball is named like:
# eessi-<version>-{compat,init,software}-[additional information]-<timestamp>.tar.gz
# It also assumes, and verifies, that the name of the top-level directory of the contents of the
# of the tarball matches <version>, and that name of the second level should is either compat, init, or software.
# eessi-<version>-{compat,init,software,dataset}-[additional information]-<timestamp>.tar.gz
# For non-dataset tarballs it also assumes, and verifies, that the name of the top-level directory
# of the contents of the of the tarball matches <version>,
# and that name of the second level is either compat, init, or software.

# Only if it passes these checks, the tarball gets ingested to the base dir in the repository specified below.

repo=pilot.eessi-hpc.org
basedir=versions
repo="pilot.eessi-hpc.org"
basedir="versions"
data_repo="data.eessi-hpc.org"
data_basedir="/"
decompress="gunzip -c"
# list of supported architectures for compat and software layers
declare -A archs=(["aarch64"]= ["ppc64le"]= ["riscv64"]= ["x86_64"]=)
# list of supported operating systems for compat and software layers
declare -A oss=(["linux"]= ["macos"]=)
# list of supported tarball content types
declare -A content_types=(["compat"]= ["init"]= ["software"]=)
declare -A content_types=(["compat"]= ["init"]= ["software"]= ["dataset"]=)


function echo_green() {
Expand Down Expand Up @@ -52,6 +55,18 @@ function check_repo_vars() {
fi
}

function check_no_version() {
if [ -z "${version}" ]
then
error "EESSI version cannot be derived from the filename."
fi

if [ "${version}" != "noversion" ]
then
error "Tarballs that do not belong to a specific stack (e.g. dataset tarballs) should have 'noversion' in its filename."
fi
}

function check_version() {
if [ -z "${version}" ]
then
Expand Down Expand Up @@ -198,6 +213,12 @@ function ingest_compat_tarball() {

}

function ingest_dataset_tarball() {
repo="${data_repo}"
basedir="${data_basedir}"
cvmfs_ingest_tarball
}


# Check if a tarball has been specified
if [ "$#" -ne 1 ]; then
Expand All @@ -219,8 +240,16 @@ tar_first_file=$(tar tf "${tar_file}" | head -n 1)
tar_top_level_dir=$(echo "${tar_first_file}" | cut -d/ -f1)
tar_contents_type_dir=$(echo "${tar_first_file}" | head -n 2 | tail -n 1 | cut -d/ -f2)

# Do some checks, and ingest the tarball
check_repo_vars
check_version
check_contents_type
ingest_${tar_contents_type_dir}_tarball
if [ "${contents_type_dir}" == "dataset" ]
then
# For dataset tarballs, we have to skip some checks or do some different checks,
# as they do not belong to a specific stack version
check_no_version
else
check_version
check_contents_type
check_repo_vars
fi

# Finally, run the ingestion function for the specific type of tarball
ingest_${contents_type_dir}_tarball
6 changes: 6 additions & 0 deletions scripts/test-ingest-tarball.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ tarballs_success=(
"$tstdir/eessi-2000.01-compat-macos-x86_64-123456.tar.gz 2000.01 compat/macos/x86_64"
"$tstdir/eessi-2000.01-init-123456.tar.gz 2000.01 init"
"$tstdir/eessi-2000.01-software-123456.tar.gz 2000.01 software/linux/x86_64/intel/haswell"
"$tstdir/eessi-noversion-dataset-WRF-3.0-123456.tar.gz none none"
"$tstdir/eessi-noversion-dataset-WRF-and-GROMACS-123456.tar.gz none none"
)

# Test that should return an error
Expand Down Expand Up @@ -76,6 +78,10 @@ tarballs_fail=(
# Invalid architecture
"$tstdir/eessi-2000.01-compat-123456.tar.gz 2000.01 compat/linux/sparc"
"$tstdir/eessi-2000.01-compat-123456.tar.gz 2000.01 compat"
# Wrong version for dataset
"$tstdir/eessi-undefinedversion-dataset-WRF-3.0-123456.tar.gz none none"
# Wrong content type for datasets
"$tstdir/eessi-undefinedversion-data-WRF-3.0-123456.tar.gz none none"
)

# Run the tests that should succeed
Expand Down