From e756008b4bd2eabcc0c350416e540200fa1c7ee8 Mon Sep 17 00:00:00 2001 From: Gerbenvandervries Date: Fri, 26 Apr 2024 14:28:14 +0000 Subject: [PATCH 1/7] bugfix unpredictable outputname --- nextflow/main.nf | 4 ++++ nextflow/modules/CONCORDANCE/concordance.nf | 8 ++++---- nextflow/modules/CONCORDANCE/templates/concordance.sh | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/nextflow/main.nf b/nextflow/main.nf index 657dee2..a6ce09d 100755 --- a/nextflow/main.nf +++ b/nextflow/main.nf @@ -23,14 +23,18 @@ def split_samples( row ) { def sample1Metadata = [ "processStepId": row.processStepId, "dataId": row.data1Id, "build": row.build1, + "project": row.project1, "fileType": row.fileType1, + "fileprefix": row.fileprefix, "liftover": check_liftover(row.build1, row), "file": file(row.location1)] def sample2Metadata = [ "processStepId": row.processStepId, "dataId": row.data2Id, "build": row.build2, + "project": row.project2, "fileType": row.fileType2, + "fileprefix": row.fileprefix, "liftover": check_liftover(row.build2, row), "file": file(row.location2)] diff --git a/nextflow/modules/CONCORDANCE/concordance.nf b/nextflow/modules/CONCORDANCE/concordance.nf index bf93b17..1fd4afc 100755 --- a/nextflow/modules/CONCORDANCE/concordance.nf +++ b/nextflow/modules/CONCORDANCE/concordance.nf @@ -15,15 +15,15 @@ process CONCORDANCE { vcf1 = "${files[0]}" vcf2 = "${files[1]}" - sampleFile="${id}_${meta[0].dataId}_${meta[1].dataId}.sample" - variantFile="${id}_${meta[0].dataId}_${meta[1].dataId}.variants" + sampleFile="${meta[0].fileprefix}.sample" + variantFile="${meta[0].fileprefix}.variants" template 'concordance.sh' stub: - sampleFile="${id}_${meta[0].dataId}_${meta[1].dataId}.sample" - variantFile="${id}_${meta[0].dataId}_${meta[1].dataId}.variants" + sampleFile="${meta[0].fileprefix}.sample" + variantFile="${meta[0].fileprefix}.variants" """ touch "${sampleFile}" touch "${variantFile}" diff --git a/nextflow/modules/CONCORDANCE/templates/concordance.sh b/nextflow/modules/CONCORDANCE/templates/concordance.sh index 1f4dda3..5301ee8 100755 --- a/nextflow/modules/CONCORDANCE/templates/concordance.sh +++ b/nextflow/modules/CONCORDANCE/templates/concordance.sh @@ -30,5 +30,5 @@ set -eu -D2 VCF \ -ac \ --sampleMap "${mappingfile}" \ - -o "!{id}"_"!{meta[0].dataId}"_"!{meta[1].dataId}" \ + -o "!{meta[0].fileprefix}" \ -sva From e70400086aadd52f79245d432139aa5ed23595e8 Mon Sep 17 00:00:00 2001 From: Gerbenvandervries Date: Fri, 26 Apr 2024 14:34:15 +0000 Subject: [PATCH 2/7] new version on copyConcordanceCheckData.sh, and replaced ConcordanceCheck.sh --- bin/ConcordanceCheck.sh | 148 +++++++++++++++++--------------- bin/copyConcordanceCheckData.sh | 20 +++-- 2 files changed, 91 insertions(+), 77 deletions(-) diff --git a/bin/ConcordanceCheck.sh b/bin/ConcordanceCheck.sh index e388e57..d52981f 100755 --- a/bin/ConcordanceCheck.sh +++ b/bin/ConcordanceCheck.sh @@ -165,60 +165,42 @@ log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Successfully got exclusive log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Log files will be written to ${TMP_ROOT_DIR}/logs ..." concordanceCheckVersion=$(module list | grep -o -P 'ConcordanceCheck(.+)') -module load "${htsLibVersion}" -module load "${compareGenotypeCallsVersion}" -module load "${bedToolsVersion}" module list - concordanceDir="/groups/${GROUP}/${TMP_LFS}/concordance/" -ngsVcfDir="${concordanceDir}/ngs/" -arrayVcfDir="${concordanceDir}/array/" + +# header format of .sampleId.txt +##data1Id data2Id location1 location2 fileType1 fileType2 build1 build2 processStepId while IFS= read -r sampleSheet do log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Processing samplesheet ${sampleSheet} ..." - concordanceCheckId=$(basename "${sampleSheet}" .sampleId.txt) + filePrefix="$(basename "${sampleSheet}" .sampleId.txt)" + concordanceCheckId="${filePrefix}" + controlFileBase="${concordanceDir}/logs/" + export JOB_CONTROLE_FILE_BASE="${controlFileBase}/${filePrefix}/${filePrefix}.${SCRIPT_NAME}" + logDir="${concordanceDir}/logs/${filePrefix}" + # shellcheck disable=SC2174 + mkdir -m 2770 -p "${logDir}" + mkdir -p "${concordanceDir}/jobs/${concordanceCheckId}" + log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "TEST ls ${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh" + if [[ ! -f "${concordanceDir}/jobs/${concordanceCheckId}.sh" ]] then - touch "${concordanceDir}/logs/${concordanceCheckId}.ConcordanceCheck.started" - arrayId=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $1}') - arrayVcf="${arrayId}.FINAL.vcf" - ngsId=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $2}') - ngsVcf=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $4}') - ngsVcf="$(basename "${ngsVcf}")" - getNgsVcfExtension="${ngsVcf##*.}" - if [[ "${getNgsVcfExtension}" != "gz" ]] - then - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "VCF file ${ngsVcf} is not compressed. bgzip-ping now ..." - ml HTSlib - bgzip "${ngsVcfDir}/${ngsVcf}" - ngsVcf="${ngsVcf}.gz" - fi - bedType="$(zcat "${ngsVcfDir}/${ngsVcf}" | grep -m 1 -o -P 'intervals=\[[^\]]*.bed\]' | cut -d [ -f2 | cut -d ] -f1)" - bedDir="$(dirname "${bedType}")" - bedFile="${bedDir}/captured.merged.bed" - mkdir -p "${concordanceDir}/tmp/${concordanceCheckId}/" - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Calculating concordance over ${ngsVcf} compared to ${arrayVcf}." - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Using ${bedFile} to intersect the array vcf file." + log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh FOUND" + + printf '' > "${JOB_CONTROLE_FILE_BASE}.started" + + data1Id=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $1}') + data2Id=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $2}') + log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Calculating concordance over ${data1Id} compared to ${data2Id}." log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Output file name: ${concordanceCheckId}." - # - # Remove InDel calls from NGS VCF. - # - zcat "${ngsVcfDir}/${ngsVcf}" | grep '^#' > "${concordanceDir}/tmp/${concordanceCheckId}/${ngsId}.FINAL.vcf" - zcat "${ngsVcfDir}/${ngsVcf}" | grep -v '^#' | awk '{if (length($4)<2 && length($5)<2 ){print $0}}' >> "${concordanceDir}/tmp/${concordanceCheckId}/${ngsId}.FINAL.vcf" - bgzip -c "${concordanceDir}/tmp/${concordanceCheckId}/${ngsId}.FINAL.vcf" > "${concordanceDir}/tmp/${concordanceCheckId}/${ngsId}.FINAL.vcf.gz" - tabix -p vcf "${concordanceDir}/tmp/${concordanceCheckId}/${ngsId}.FINAL.vcf.gz" - bedtools intersect -a "${arrayVcfDir}/${arrayVcf}" -b "${bedFile}" -header \ - > "${concordanceDir}/tmp/${concordanceCheckId}/${arrayId}.FINAL.ExonFiltered.vcf" \ - 2> "${concordanceDir}/logs/${concordanceCheckId}.ConcordanceCheck.started" \ - || log4Bash 'FATAL' "${LINENO}" "${FUNCNAME:-main}" "${?}" "Failed to execute 'bedtools intersect' command." - -cat << EOH > "${concordanceDir}/jobs/${concordanceCheckId}.sh" + +cat << EOH > "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh" #!/bin/bash #SBATCH --job-name=Concordance_${concordanceCheckId} -#SBATCH --output=${concordanceDir}/jobs/${concordanceCheckId}.out -#SBATCH --error=${concordanceDir}/jobs/${concordanceCheckId}.err +#SBATCH --output=${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.out +#SBATCH --error=${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.err #SBATCH --time=00:30:00 #SBATCH --cpus-per-task 1 #SBATCH --mem 6gb @@ -226,46 +208,74 @@ cat << EOH > "${concordanceDir}/jobs/${concordanceCheckId}.sh" #SBATCH --export=NONE #SBATCH --get-user-env=60L +set -o pipefail set -eu - module load "${htsLibVersion}" - module load "${compareGenotypeCallsVersion}" - module load "${bedToolsVersion}" - bgzip -c "${concordanceDir}/tmp/${concordanceCheckId}/${arrayId}.FINAL.ExonFiltered.vcf" > "${concordanceDir}/tmp/${concordanceCheckId}/${arrayId}.FINAL.ExonFiltered.vcf.gz" - tabix -p vcf "${concordanceDir}/tmp/${concordanceCheckId}/${arrayId}.FINAL.ExonFiltered.vcf.gz" - - java -XX:ParallelGCThreads=1 -Djava.io.tmpdir="${concordanceDir}/temp/" -Xmx9g -jar ${EBROOTCOMPAREGENOTYPECALLS}/CompareGenotypeCalls.jar \\ - -d1 "${concordanceDir}/tmp/${concordanceCheckId}/${arrayId}.FINAL.ExonFiltered.vcf.gz" \\ - -D1 VCF \\ - -d2 "${concordanceDir}/tmp/${concordanceCheckId}/${ngsId}.FINAL.vcf.gz" \\ - -D2 VCF \\ - -ac \\ - --sampleMap "${sampleSheet}" \\ - -o "${concordanceDir}/tmp/${concordanceCheckId}" \\ - -sva - - mv -v "${concordanceDir}/tmp/${concordanceCheckId}.sample" "${concordanceDir}/results/" - mv -v "${concordanceDir}/tmp/${concordanceCheckId}.variants" "${concordanceDir}/results/" + +# Env vars. +export TMPDIR="${TMPDIR:-/tmp}" # Default to /tmp if "${TMPDIR}" was not defined. +SCRIPT_NAME="$(basename "${0}")" +SCRIPT_NAME="${SCRIPT_NAME%.*sh}" +INSTALLATION_DIR="$(cd -P "$(dirname "${0}")/.." && pwd)" +LIB_DIR="${INSTALLATION_DIR}/lib" +CFG_DIR="${INSTALLATION_DIR}/etc" +HOSTNAME_SHORT="$(hostname -s)" +ROLE_USER="$(whoami)" +REAL_USER="$(logname 2>/dev/null || echo 'no login name')" + +# +## +### Functions. +## +# +if [[ -f "${LIB_DIR}/sharedFunctions.bash" && -r "${LIB_DIR}/sharedFunctions.bash" ]] +then + # shellcheck source=lib/sharedFunctions.bash + source "${LIB_DIR}/sharedFunctions.bash" +else + printf '%s\n' "FATAL: cannot find or cannot access sharedFunctions.bash" + exit 1 +fi + module load "${ConcordanceCheckVersion}" + module load "${nextflowVersion}" + + "${EBROOTNEXTFLOW}/nextflow" run /apps/software/ConcordanceCheck/beta/nextflow/main.nf \\ + -main-script "${EBROOTCONCORDANCECHECK}/nextflow/main.nf" \\ + --samplesheet "${sampleSheet}" \\ + -work-dir "${concordanceDir}/tmp/" \\ + --output "${concordanceDir}/results/" \\ + -profile slurm \\ + || { + log4Bash 'TRACE' "${LINENO}" "${FUNCNAME[0]:-main}" "0" " Concordance pipeline crashed. Check ${concordanceDir}/jobs/${concordanceCheckId}.out" + tail -50 "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.out" >> "${JOB_CONTROLE_FILE_BASE}.started" + mv -v "${JOB_CONTROLE_FILE_BASE}."{started,failed} + exit 1 + } + echo "${concordanceCheckVersion}" > "${concordanceDir}/results/${concordanceCheckId}.ConcordanceCheckVersion" - echo "Finished" - if [[ -e "/groups/${GROUP}/${TMP_LFS}/concordance/logs/${concordanceCheckId}.ConcordanceCheck.started" ]] + + if [[ -e "${JOB_CONTROLE_FILE_BASE}.started" ]] then - mv "/groups/${GROUP}/${TMP_LFS}/concordance/logs/${concordanceCheckId}.ConcordanceCheck."{started,finished} + mv "${JOB_CONTROLE_FILE_BASE}."{started,finished} else - touch "/groups/${GROUP}/${TMP_LFS}/concordance/logs/${concordanceCheckId}.ConcordanceCheck.finished" + touch "${JOB_CONTROLE_FILE_BASE}.finished" fi - mv "${concordanceDir}/jobs/${concordanceCheckId}.sh."{started,finished} + mv "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh."{started,finished} EOH fi - - if [[ ! -f "${concordanceDir}/jobs/${concordanceCheckId}.sh.started" ]] && [[ ! -f "${concordanceDir}/jobs/${concordanceCheckId}.sh.finished" ]] + + if [[ ! -f "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh.started" ]] && [[ ! -f "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh.finished" ]] then - cd "${concordanceDir}/jobs/" + cd "${concordanceDir}/jobs/${concordanceCheckId}" sbatch "${concordanceCheckId}.sh" + sleep 3 touch "${concordanceCheckId}.sh.started" cd - fi done < <(find "${concordanceDir}/samplesheets/" -maxdepth 1 -type f -iname "*sampleId.txt") + +# Clean exit. +# +log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Finished successfully." trap - EXIT exit 0 - diff --git a/bin/copyConcordanceCheckData.sh b/bin/copyConcordanceCheckData.sh index cf02bc1..ea68637 100755 --- a/bin/copyConcordanceCheckData.sh +++ b/bin/copyConcordanceCheckData.sh @@ -223,15 +223,19 @@ else # filePrefix=$(basename "${sampleSheet%.sampleId.txt}") log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Processing run ${filePrefix} ..." - # shellcheck disable=SC2029 - ngsVcfId=$(ssh "${DATA_MANAGER}@${HOSTNAME_TMP}" "awk '{if (NR>1){print \$2}}' \"${sampleSheet}\"") - # shellcheck disable=SC2029 - if ssh "${DATA_MANAGER}@${HOSTNAME_TMP}" test -e "${TMP_ROOT_DIAGNOSTICS_DIR}/concordance/logs/${filePrefix}.ConcordanceCheck.finished" + controlFileBase="${PRM_ROOT_DIR}/logs/${filePrefix}" + export JOB_CONTROLE_FILE_BASE="${controlFileBase}/${filePrefix}/${filePrefix}.${SCRIPT_NAME}" + logDir="${controlFileBase}/${filePrefix}" + # shellcheck disable=SC2174 + mkdir -m 2770 -p "${logDir}" + + log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "checking if exist: ${TMP_ROOT_DIAGNOSTICS_DIR}/concordance/logs/${filePrefix}/${filePrefix}.ConcordanceCheck.finished" + if ssh "${DATA_MANAGER}@${HOSTNAME_TMP}" test -e "${TMP_ROOT_DIAGNOSTICS_DIR}/concordance/logs/${filePrefix}/${filePrefix}.ConcordanceCheck.finished" then - touch "${PRM_ROOT_DIR}/concordance/logs/${filePrefix}.copyConcordanceCheckData.started" + log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "${TMP_ROOT_DIAGNOSTICS_DIR}/concordance/logs/${filePrefix}/${filePrefix}.ConcordanceCheck.finished" + touch "${JOB_CONTROLE_FILE_BASE}.started" rsync -av "${DATA_MANAGER}@${HOSTNAME_TMP}:/${TMP_ROOT_DIAGNOSTICS_DIR}/concordance/results/${filePrefix}.*" "${PRM_ROOT_DIR}/concordance/results/" - log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "removing ${PRM_ROOT_DIR}/concordance/ngs/${ngsVcfId}.final.vcf.gz" - rm -f "${PRM_ROOT_DIR}/concordance/ngs/${ngsVcfId}.final.vcf.gz" + log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Copied ${TMP_ROOT_DIAGNOSTICS_DIR}/concordance/results/${filePrefix}.*" cd "/groups/${group}/${DAT_LFS}/ConcordanceCheckOutput/" windowsPathDelimeter="\\" # @@ -244,7 +248,7 @@ else done < <(find "${PRM_ROOT_DIR}/concordance/results/" -maxdepth 1 -type f -iname "${filePrefix}.*") # shellcheck disable=SC2029 ssh "${DATA_MANAGER}@${HOSTNAME_TMP}" "mv \"${sampleSheet}\" \"${TMP_ROOT_DIAGNOSTICS_DIR}/concordance/samplesheets/archive/\"" - mv "${PRM_ROOT_DIR}/concordance/logs/${filePrefix}.copyConcordanceCheckData."{started,finished} + mv "${JOB_CONTROLE_FILE_BASE}."{started,finished} else log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "concordanceCheck for ${filePrefix} not finished (yet)" From da69c6f3b326ffda5289e3128509bb0af355f5c3 Mon Sep 17 00:00:00 2001 From: Gerbenvandervries Date: Wed, 8 May 2024 11:51:58 +0000 Subject: [PATCH 3/7] replaced path with var --- bin/ConcordanceCheck-nf.sh | 281 ------------------------------------- bin/ConcordanceCheck.sh | 2 +- 2 files changed, 1 insertion(+), 282 deletions(-) delete mode 100755 bin/ConcordanceCheck-nf.sh diff --git a/bin/ConcordanceCheck-nf.sh b/bin/ConcordanceCheck-nf.sh deleted file mode 100755 index d52981f..0000000 --- a/bin/ConcordanceCheck-nf.sh +++ /dev/null @@ -1,281 +0,0 @@ -#!/bin/bash - -# -## -### Environment and Bash sanity. -## -# -if [[ "${BASH_VERSINFO[0]}" -lt 4 ]] -then - echo "Sorry, you need at least bash 4.x to use ${0}." >&2 - exit 1 -fi - -set -e # Exit if any subcommand or pipeline returns a non-zero exit status. -set -u # Raise exception if variable is unbound. Combined with set -e will halt execution when an unbound variable is encountered. - -umask 0027 - -# Env vars. -export TMPDIR="${TMPDIR:-/tmp}" # Default to /tmp if $TMPDIR was not defined. -SCRIPT_NAME="$(basename "${0}")" -SCRIPT_NAME="${SCRIPT_NAME%.*sh}" -INSTALLATION_DIR="$(cd -P "$(dirname "${0}")/.." && pwd)" -LIB_DIR="${INSTALLATION_DIR}/lib" -CFG_DIR="${INSTALLATION_DIR}/etc" -HOSTNAME_SHORT="$(hostname -s)" -ROLE_USER="$(whoami)" -REAL_USER="$(logname 2>/dev/null || echo 'no login name')" - -# -## -### Functions. -## -# - -if [[ -f "${LIB_DIR}/sharedFunctions.bash" && -r "${LIB_DIR}/sharedFunctions.bash" ]] -then - # shellcheck source=lib/sharedFunctions.bash - source "${LIB_DIR}/sharedFunctions.bash" -else - printf '%s\n' "FATAL: cannot find or cannot access sharedFunctions.bash" - exit 1 -fi - -function showHelp() { - # - # Display commandline help on STDOUT. - # - cat <.cfg for the group specified with -g - 2. .cfg for this server. E.g.:"${HOSTNAME_SHORT}.cfg" - 3. sharedConfig.cfg for all groups and all servers. - In addition the library sharedFunctions.bash is required and this one must be located in ${LIB_DIR}. - -====================================================================================================================== - -EOH - trap - EXIT - exit 0 -} - -# -## -### Main. -## -# - -# -# Get commandline arguments. -# -log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Parsing commandline arguments ..." -while getopts ":g:l: h" opt -do - case "${opt}" in - h) - showHelp - ;; - g) - GROUP="${OPTARG}" - ;; - l) - l4b_log_level="${OPTARG^^}" - l4b_log_level_prio="${l4b_log_levels["${l4b_log_level}"]}" - ;; - \?) - log4Bash 'FATAL' "${LINENO}" "${FUNCNAME[0]:-main}" '1' "Invalid option -${OPTARG}. Try $(basename "${0}") -h for help." - ;; - :) - log4Bash 'FATAL' "${LINENO}" "${FUNCNAME[0]:-main}" '1' "Option -${OPTARG} requires an argument. Try $(basename "${0}") -h for help." - ;; - *) - log4Bash 'FATAL' "${LINENO}" "${FUNCNAME[0]:-main}" '1' "Unhandled option. Try $(basename "${0}") -h for help." - ;; - esac -done - -# -# Check commandline options. -# -if [[ -z "${GROUP:-}" ]] -then - log4Bash 'FATAL' "${LINENO}" "${FUNCNAME:-main}" '1' 'Must specify a group with -g' -fi - -# -# Source config files. -# -log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Sourcing config files ..." -declare -a configFiles=( - "${CFG_DIR}/${GROUP}.cfg" - "${CFG_DIR}/${HOSTNAME_SHORT}.cfg" - "${CFG_DIR}/sharedConfig.cfg" - "${CFG_DIR}/ConcordanceCheck.cfg" - "${HOME}/molgenis.cfg" -) - -for configFile in "${configFiles[@]}"; do - if [[ -f "${configFile}" && -r "${configFile}" ]] - then - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Sourcing config file ${configFile} ..." - # - # In some Bash versions the source command does not work properly with process substitution. - # Therefore we source a first time with process substitution for proper error handling - # and a second time without just to make sure we can use the content from the sourced files. - # - # Disable shellcheck code syntax checking for config files. - # shellcheck source=/dev/null - mixed_stdouterr=$(source "${configFile}" 2>&1) || log4Bash 'FATAL' "${LINENO}" "${FUNCNAME:-main}" "${?}" "Cannot source ${configFile}." - # shellcheck source=/dev/null - source "${configFile}" # May seem redundant, but is a mandatory workaround for some Bash versions. - else - log4Bash 'FATAL' "${LINENO}" "${FUNCNAME:-main}" '1' "Config file ${configFile} missing or not accessible." - fi -done - -# -# Make sure to use an account for cron jobs and *without* write access to prm storage. -# - -if [[ "${ROLE_USER}" != "${ATEAMBOTUSER}" ]] -then - log4Bash 'FATAL' "${LINENO}" "${FUNCNAME:-main}" '1' "This script must be executed by user ${ATEAMBOTUSER}, but you are ${ROLE_USER} (${REAL_USER})." -fi - -lockFile="${TMP_ROOT_DIR}/logs/${SCRIPT_NAME}.lock" -thereShallBeOnlyOne "${lockFile}" -log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Successfully got exclusive access to lock file ${lockFile} ..." -log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Log files will be written to ${TMP_ROOT_DIR}/logs ..." - -concordanceCheckVersion=$(module list | grep -o -P 'ConcordanceCheck(.+)') -module list - -concordanceDir="/groups/${GROUP}/${TMP_LFS}/concordance/" - -# header format of .sampleId.txt -##data1Id data2Id location1 location2 fileType1 fileType2 build1 build2 processStepId - -while IFS= read -r sampleSheet -do - log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Processing samplesheet ${sampleSheet} ..." - filePrefix="$(basename "${sampleSheet}" .sampleId.txt)" - concordanceCheckId="${filePrefix}" - controlFileBase="${concordanceDir}/logs/" - export JOB_CONTROLE_FILE_BASE="${controlFileBase}/${filePrefix}/${filePrefix}.${SCRIPT_NAME}" - logDir="${concordanceDir}/logs/${filePrefix}" - # shellcheck disable=SC2174 - mkdir -m 2770 -p "${logDir}" - mkdir -p "${concordanceDir}/jobs/${concordanceCheckId}" - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "TEST ls ${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh" - - if [[ ! -f "${concordanceDir}/jobs/${concordanceCheckId}.sh" ]] - then - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh FOUND" - - printf '' > "${JOB_CONTROLE_FILE_BASE}.started" - - data1Id=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $1}') - data2Id=$(sed 1d "${sampleSheet}" | awk 'BEGIN {FS="\t"}{print $2}') - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Calculating concordance over ${data1Id} compared to ${data2Id}." - log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Output file name: ${concordanceCheckId}." - -cat << EOH > "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh" -#!/bin/bash -#SBATCH --job-name=Concordance_${concordanceCheckId} -#SBATCH --output=${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.out -#SBATCH --error=${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.err -#SBATCH --time=00:30:00 -#SBATCH --cpus-per-task 1 -#SBATCH --mem 6gb -#SBATCH --open-mode=append -#SBATCH --export=NONE -#SBATCH --get-user-env=60L - -set -o pipefail -set -eu - -# Env vars. -export TMPDIR="${TMPDIR:-/tmp}" # Default to /tmp if "${TMPDIR}" was not defined. -SCRIPT_NAME="$(basename "${0}")" -SCRIPT_NAME="${SCRIPT_NAME%.*sh}" -INSTALLATION_DIR="$(cd -P "$(dirname "${0}")/.." && pwd)" -LIB_DIR="${INSTALLATION_DIR}/lib" -CFG_DIR="${INSTALLATION_DIR}/etc" -HOSTNAME_SHORT="$(hostname -s)" -ROLE_USER="$(whoami)" -REAL_USER="$(logname 2>/dev/null || echo 'no login name')" - -# -## -### Functions. -## -# -if [[ -f "${LIB_DIR}/sharedFunctions.bash" && -r "${LIB_DIR}/sharedFunctions.bash" ]] -then - # shellcheck source=lib/sharedFunctions.bash - source "${LIB_DIR}/sharedFunctions.bash" -else - printf '%s\n' "FATAL: cannot find or cannot access sharedFunctions.bash" - exit 1 -fi - module load "${ConcordanceCheckVersion}" - module load "${nextflowVersion}" - - "${EBROOTNEXTFLOW}/nextflow" run /apps/software/ConcordanceCheck/beta/nextflow/main.nf \\ - -main-script "${EBROOTCONCORDANCECHECK}/nextflow/main.nf" \\ - --samplesheet "${sampleSheet}" \\ - -work-dir "${concordanceDir}/tmp/" \\ - --output "${concordanceDir}/results/" \\ - -profile slurm \\ - || { - log4Bash 'TRACE' "${LINENO}" "${FUNCNAME[0]:-main}" "0" " Concordance pipeline crashed. Check ${concordanceDir}/jobs/${concordanceCheckId}.out" - tail -50 "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.out" >> "${JOB_CONTROLE_FILE_BASE}.started" - mv -v "${JOB_CONTROLE_FILE_BASE}."{started,failed} - exit 1 - } - - echo "${concordanceCheckVersion}" > "${concordanceDir}/results/${concordanceCheckId}.ConcordanceCheckVersion" - - if [[ -e "${JOB_CONTROLE_FILE_BASE}.started" ]] - then - mv "${JOB_CONTROLE_FILE_BASE}."{started,finished} - else - touch "${JOB_CONTROLE_FILE_BASE}.finished" - fi - - mv "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh."{started,finished} -EOH - fi - - if [[ ! -f "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh.started" ]] && [[ ! -f "${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh.finished" ]] - then - cd "${concordanceDir}/jobs/${concordanceCheckId}" - sbatch "${concordanceCheckId}.sh" - sleep 3 - touch "${concordanceCheckId}.sh.started" - cd - - fi -done < <(find "${concordanceDir}/samplesheets/" -maxdepth 1 -type f -iname "*sampleId.txt") - -# Clean exit. -# -log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Finished successfully." -trap - EXIT -exit 0 diff --git a/bin/ConcordanceCheck.sh b/bin/ConcordanceCheck.sh index d52981f..81b8fa7 100755 --- a/bin/ConcordanceCheck.sh +++ b/bin/ConcordanceCheck.sh @@ -238,7 +238,7 @@ fi module load "${ConcordanceCheckVersion}" module load "${nextflowVersion}" - "${EBROOTNEXTFLOW}/nextflow" run /apps/software/ConcordanceCheck/beta/nextflow/main.nf \\ + "${EBROOTNEXTFLOW}/nextflow" run "${EBROOTCONCORDANCECHECK}nextflow/main.nf" \\ -main-script "${EBROOTCONCORDANCECHECK}/nextflow/main.nf" \\ --samplesheet "${sampleSheet}" \\ -work-dir "${concordanceDir}/tmp/" \\ From d1b56557cbe4af7a24b36efd1b245487892267be Mon Sep 17 00:00:00 2001 From: Gerbenvandervries Date: Wed, 8 May 2024 12:26:27 +0000 Subject: [PATCH 4/7] slash --- bin/ConcordanceCheck.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/ConcordanceCheck.sh b/bin/ConcordanceCheck.sh index 81b8fa7..a3a95b0 100755 --- a/bin/ConcordanceCheck.sh +++ b/bin/ConcordanceCheck.sh @@ -238,8 +238,7 @@ fi module load "${ConcordanceCheckVersion}" module load "${nextflowVersion}" - "${EBROOTNEXTFLOW}/nextflow" run "${EBROOTCONCORDANCECHECK}nextflow/main.nf" \\ - -main-script "${EBROOTCONCORDANCECHECK}/nextflow/main.nf" \\ + "${EBROOTNEXTFLOW}/nextflow" run "${EBROOTCONCORDANCECHECK}/nextflow/main.nf" \\ --samplesheet "${sampleSheet}" \\ -work-dir "${concordanceDir}/tmp/" \\ --output "${concordanceDir}/results/" \\ From dd209fbc8359675e0edf1481c767f04c0ee9f12c Mon Sep 17 00:00:00 2001 From: Gerbenvandervries Date: Wed, 8 May 2024 14:37:25 +0000 Subject: [PATCH 5/7] fix logpaths --- bin/copyConcordanceCheckData.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/copyConcordanceCheckData.sh b/bin/copyConcordanceCheckData.sh index ea68637..72070fa 100755 --- a/bin/copyConcordanceCheckData.sh +++ b/bin/copyConcordanceCheckData.sh @@ -224,10 +224,9 @@ else filePrefix=$(basename "${sampleSheet%.sampleId.txt}") log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Processing run ${filePrefix} ..." controlFileBase="${PRM_ROOT_DIR}/logs/${filePrefix}" - export JOB_CONTROLE_FILE_BASE="${controlFileBase}/${filePrefix}/${filePrefix}.${SCRIPT_NAME}" - logDir="${controlFileBase}/${filePrefix}" + export JOB_CONTROLE_FILE_BASE="${controlFileBase}/${filePrefix}.${SCRIPT_NAME}" # shellcheck disable=SC2174 - mkdir -m 2770 -p "${logDir}" + mkdir -m 2770 -p "${controlFileBase}" log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "checking if exist: ${TMP_ROOT_DIAGNOSTICS_DIR}/concordance/logs/${filePrefix}/${filePrefix}.ConcordanceCheck.finished" if ssh "${DATA_MANAGER}@${HOSTNAME_TMP}" test -e "${TMP_ROOT_DIAGNOSTICS_DIR}/concordance/logs/${filePrefix}/${filePrefix}.ConcordanceCheck.finished" @@ -248,6 +247,7 @@ else done < <(find "${PRM_ROOT_DIR}/concordance/results/" -maxdepth 1 -type f -iname "${filePrefix}.*") # shellcheck disable=SC2029 ssh "${DATA_MANAGER}@${HOSTNAME_TMP}" "mv \"${sampleSheet}\" \"${TMP_ROOT_DIAGNOSTICS_DIR}/concordance/samplesheets/archive/\"" + log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "moved: ${JOB_CONTROLE_FILE_BASE}.{started,finished}" mv "${JOB_CONTROLE_FILE_BASE}."{started,finished} else From 5a5207b7f7f046701b70755fe5e887619fe4b60a Mon Sep 17 00:00:00 2001 From: Gerbenvandervries Date: Tue, 21 May 2024 14:23:33 +0000 Subject: [PATCH 6/7] relocated logfile for notifications --- bin/ConcordanceCheck.sh | 8 ++++---- bin/copyConcordanceCheckData.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/ConcordanceCheck.sh b/bin/ConcordanceCheck.sh index a3a95b0..ed25bb5 100755 --- a/bin/ConcordanceCheck.sh +++ b/bin/ConcordanceCheck.sh @@ -131,7 +131,7 @@ declare -a configFiles=( "${HOME}/molgenis.cfg" ) -for configFile in "${configFiles[@]}"; do +for configFile in "${configFiles[@]}"; do if [[ -f "${configFile}" && -r "${configFile}" ]] then log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "Sourcing config file ${configFile} ..." @@ -177,11 +177,11 @@ do log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Processing samplesheet ${sampleSheet} ..." filePrefix="$(basename "${sampleSheet}" .sampleId.txt)" concordanceCheckId="${filePrefix}" - controlFileBase="${concordanceDir}/logs/" - export JOB_CONTROLE_FILE_BASE="${controlFileBase}/${filePrefix}/${filePrefix}.${SCRIPT_NAME}" + controlFileBase="${concordanceDir}/logs/concordance/" + export JOB_CONTROLE_FILE_BASE="${controlFileBase}/${filePrefix}.${SCRIPT_NAME}" logDir="${concordanceDir}/logs/${filePrefix}" # shellcheck disable=SC2174 - mkdir -m 2770 -p "${logDir}" + mkdir -m 2770 -p "${controlFileBase}" mkdir -p "${concordanceDir}/jobs/${concordanceCheckId}" log4Bash 'DEBUG' "${LINENO}" "${FUNCNAME:-main}" '0' "TEST ls ${concordanceDir}/jobs/${concordanceCheckId}/${concordanceCheckId}.sh" diff --git a/bin/copyConcordanceCheckData.sh b/bin/copyConcordanceCheckData.sh index 72070fa..51b18ce 100755 --- a/bin/copyConcordanceCheckData.sh +++ b/bin/copyConcordanceCheckData.sh @@ -223,7 +223,7 @@ else # filePrefix=$(basename "${sampleSheet%.sampleId.txt}") log4Bash 'INFO' "${LINENO}" "${FUNCNAME:-main}" '0' "Processing run ${filePrefix} ..." - controlFileBase="${PRM_ROOT_DIR}/logs/${filePrefix}" + controlFileBase="${PRM_ROOT_DIR}/logs/concordance/" export JOB_CONTROLE_FILE_BASE="${controlFileBase}/${filePrefix}.${SCRIPT_NAME}" # shellcheck disable=SC2174 mkdir -m 2770 -p "${controlFileBase}" From 6414ac25d3beaf07587d6d342ac58286201c6e00 Mon Sep 17 00:00:00 2001 From: Gerbenvandervries Date: Tue, 21 May 2024 14:47:31 +0000 Subject: [PATCH 7/7] removed logDir --- bin/ConcordanceCheck.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/ConcordanceCheck.sh b/bin/ConcordanceCheck.sh index ed25bb5..deaaa30 100755 --- a/bin/ConcordanceCheck.sh +++ b/bin/ConcordanceCheck.sh @@ -179,7 +179,6 @@ do concordanceCheckId="${filePrefix}" controlFileBase="${concordanceDir}/logs/concordance/" export JOB_CONTROLE_FILE_BASE="${controlFileBase}/${filePrefix}.${SCRIPT_NAME}" - logDir="${concordanceDir}/logs/${filePrefix}" # shellcheck disable=SC2174 mkdir -m 2770 -p "${controlFileBase}" mkdir -p "${concordanceDir}/jobs/${concordanceCheckId}"