diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index 5edc533..9bf1f43 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -40,7 +40,11 @@ jobs: with: name: Slides path: | - slides/*.pdf + slides/Snakemake_HPC_Creators.pdf; + slides/Snakemake_HPC_Users.pdf; + slides/Snakemake_HPC_User_Creator_Combi.pdf; + slides/Snakemake_HPC_Admins.pdf; + release-please: diff --git a/config/config_Mainz_NHR.yaml b/config/config_Mainz_NHR.yaml index e680d1a..66e516d 100644 --- a/config/config_Mainz_NHR.yaml +++ b/config/config_Mainz_NHR.yaml @@ -51,10 +51,17 @@ course: # to introduce SLURM. It requires different account and partition # settings on every cluster. hello_world_script: "common/Hello_World_HPC_Mogon_NHR.tex" + cluster: + # the cluster name + name: "Mogon-NHR" + # the cluster distro + distro: "Alma-Linux" # the cluster account and default partition account: "nhr-workflow" partition: "smallcpu" # the local and remote storage prefixes on this cluster remote-job-local-storage-prefix: "/localscratch/$SLURM_JOB_ID" local-storage-prefix: "/dev/shm/$USER/snakemake" + # display program for SVG files + display_program: "display" diff --git a/images/humor/DALLE_LEGO-scientist-coding.jpg b/images/humor/DALLE_LEGO-scientist-coding.jpg new file mode 100644 index 0000000..75f51e8 Binary files /dev/null and b/images/humor/DALLE_LEGO-scientist-coding.jpg differ diff --git a/images/misc/cluster_kit.jpg b/images/misc/cluster_kit.jpg new file mode 100644 index 0000000..6b4751c Binary files /dev/null and b/images/misc/cluster_kit.jpg differ diff --git a/setup_creators/condarc_mogon b/setup_creators/condarc_mogon index ed22301..f21cdfa 100644 --- a/setup_creators/condarc_mogon +++ b/setup_creators/condarc_mogon @@ -4,10 +4,11 @@ channels: - conda-forge - bioconda - defaults - - r proxy_servers: http: http://webproxy.zdv.uni-mainz.de:8888 #https: https://webproxy.zdv.uni-mainz.de:8888 ssl_verify: false auto_update_conda: false always_yes: true # avoid confirmation(s) +channel_priority: strict +env_prompt: "($(basename {default_env})) " diff --git a/setup_creators/tutorial/profile/config.yaml b/setup_creators/tutorial/profile/Dresden_Barnard/config.yaml similarity index 100% rename from setup_creators/tutorial/profile/config.yaml rename to setup_creators/tutorial/profile/Dresden_Barnard/config.yaml diff --git a/setup_creators/tutorial/profile/Mogon_NHR/config.yaml b/setup_creators/tutorial/profile/Mogon_NHR/config.yaml new file mode 100644 index 0000000..5735202 --- /dev/null +++ b/setup_creators/tutorial/profile/Mogon_NHR/config.yaml @@ -0,0 +1,29 @@ +# Resource configuration for bioinformatics workflow on Mogon NHR +# +# This file defines resource allocations for various bioinformatics tools. +# Adjust these settings based on your specific dataset characteristics: +# - Input file sizes +# - Genome complexity +# - Coverage depth +# +# Remember to monitor job performance and adjust resources as needed. + +default-resources: + slurm_partition=small_cpu + +set-resources: + bwa_map: + runtime: 5 # Adjust based on your dataset size. Typical range: 30-120 minutes + mem_mb_per_cpu: 1800 # Increase for larger datasets, e.g., 3600 for 30x coverage + + samtools_sort: + runtime: 5 # Adjust based on your dataset size. Typical range: 20-60 minutes + mem_mb_per_cpu: 3600 # Sorting requires more memory. Increase for larger datasets + + samtools_index: + runtime: 2 # Indexing is usually quick. Adjust if needed for very large files + mem_mb_per_cpu: 1800 # Indexing typically requires less memory than sorting + + bcftools_call: + runtime: 15 # Variant calling can be time-consuming. Adjust based on genome size and coverage + mem_mb_per_cpu: 2400 # Increase for larger genomes or higher coverage datasets diff --git a/slides/Snakemake_HPC_User_Creator_Combi.tex b/slides/Snakemake_HPC_User_Creator_Combi.tex index 7cdae4b..63a37c9 100644 --- a/slides/Snakemake_HPC_User_Creator_Combi.tex +++ b/slides/Snakemake_HPC_User_Creator_Combi.tex @@ -49,6 +49,9 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \include{common/software_environment} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\include{common/HPC_101} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \include{users/Selecting_Workflows} @@ -70,9 +73,6 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \include{common/Plotting_DAGs} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\include{common/HPC_101} - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \include{creators/Decorating_the_Workflow} diff --git a/slides/common/HPC_101.tex b/slides/common/HPC_101.tex index fde9742..1ae7838 100644 --- a/slides/common/HPC_101.tex +++ b/slides/common/HPC_101.tex @@ -1,18 +1,20 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{How does Clustercomputing work?} +{ + \usebackgroundtemplate{ + \vbox to \paperheight{\vfil\hbox to \paperwidth{\hfil\includegraphics[height=\paperheight]{misc/cluster_kit}\hfil}\vfil} + % source: https://en.m.wikipedia.org/wiki/File:Text-x-python.svg + } + \frame{ + \frametitle{Using Code-Studio} + \begin{mdframed}[tikzsetting={draw=white,fill=white,fill opacity=0.8, + line width=0pt},backgroundcolor=none,leftmargin=0, + rightmargin=150,innertopmargin=4pt,roundcorner=10pt] + \tableofcontents[currentsection,sections={1-4},hideothersubsections] + \end{mdframed} + } +} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame} - \frametitle{Outline} - \begin{columns}[t] - \begin{column}{.5\textwidth} - \tableofcontents[sections={1-7},currentsection] - \end{column} - \begin{column}{.5\textwidth} - \tableofcontents[sections={8-15},currentsection] - \end{column} - \end{columns} -\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} @@ -26,6 +28,7 @@ \section{How does Clustercomputing work?} \begin{docs}[Objectives] \begin{enumerate} \item Get a feeling for the SLURM batch system. + \item We want to give you an idea of building a workflow with \emph{pure} batch system commands, first. \end{enumerate} \end{docs} \end{frame} @@ -106,6 +109,31 @@ \subsection*{The \slurm Scheduler} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \input{<++course.hello_world_script++>} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[fragile]% + \frametitle{Please Imaging \ldots} + Now, thing of your analysis workflow: QC, preprocessing, processing, analysis, post-processing and plotting and \ldots \newline + \pause + All this \emph{can} be achieved with SLURM, too all with bash: + \begin{lstlisting}[language=Bash, style=Shell] +# First, do some pre-processing for the first job. +... +# Then, submit a job without dependencies. +jid1=$(sbatch ... job1.sh) +# NOTE: ALL 'job*sh' scripts are bash scripts, +# with more logic than the "hello world" script. + +# Next, do some more logic as pre-processing for the +# follow-up jobs. ... +# multiple jobs can depend on a single job +jid2=$(sbatch --dependency=afterany:$jid1 ... job2.sh) +jid3=$(sbatch --dependency=afterany:$jid1 ... job3.sh) + \end{lstlisting} + etc. can easily be a few thousand lines for \emph{every} workflow. + \vfill +\end{frame} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{End of HPC Intro} diff --git a/slides/common/Plotting_DAGs.tex b/slides/common/Plotting_DAGs.tex index 8a829ac..f094560 100644 --- a/slides/common/Plotting_DAGs.tex +++ b/slides/common/Plotting_DAGs.tex @@ -1,18 +1,19 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Plotting DAGs} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame} - \frametitle{Outline} - \begin{columns}[t] - \begin{column}{.5\textwidth} - \tableofcontents[sections={1-7},currentsection] - \end{column} - \begin{column}{.5\textwidth} - \tableofcontents[sections={8-15},currentsection] - \end{column} - \end{columns} -\end{frame} +{ + \usebackgroundtemplate{ + \vbox to \paperheight{\vfil\hbox to \paperwidth{\hfil\includegraphics[height=\paperheight]{example_dags/rulegraph_complex.png}\hfil}\vfil} + % source: https://en.m.wikipedia.org/wiki/File:Text-x-python.svg + } + \frame{ + \frametitle{Plotting Workflow Graphs} + \begin{mdframed}[tikzsetting={draw=white,fill=white,fill opacity=0.8, + line width=0pt},backgroundcolor=none,leftmargin=0, + rightmargin=150,innertopmargin=4pt,roundcorner=10pt] + \tableofcontents[currentsection,sections={1-4},hideothersubsections] + \end{mdframed} + } +} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} diff --git a/slides/common/Sample_Data.tex b/slides/common/Sample_Data.tex index 4dc0338..db65d48 100644 --- a/slides/common/Sample_Data.tex +++ b/slides/common/Sample_Data.tex @@ -76,7 +76,7 @@ \section{Getting your Sample Data} \frametitle{\HandsOn{Obtaining Your Tutorial Sample Data}} Please run the \altverb{get_tutorial.sh} script you just copied: \begin{lstlisting}[language=Bash, style=Shell,basicstyle=\footnotesize] - $ bash get_tutorial.sh +$ bash get_tutorial.sh \end{lstlisting} \begin{task} This script will download and unpack the sample data for this course. Please take a look in this script to understand it. Where are your sample data after running this script? diff --git a/slides/common/mod_cs_mainz.tex b/slides/common/mod_cs_mainz.tex index 2cec0e6..0c63dc9 100644 --- a/slides/common/mod_cs_mainz.tex +++ b/slides/common/mod_cs_mainz.tex @@ -1,18 +1,20 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Using Code-Studio on \texttt{Mogon-NHR}} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame} - \frametitle{Outline} - \begin{columns}[t] - \begin{column}{.5\textwidth} - \tableofcontents[sections={1-9},currentsection] - \end{column} - \begin{column}{.5\textwidth} - \tableofcontents[sections={10-18},currentsection] - \end{column} - \end{columns} -\end{frame} +{ + \usebackgroundtemplate{ + \vbox to \paperheight{\vfil\hbox to \paperwidth{\hfil\includegraphics[height=\paperheight]{humor/DALLE_LEGO-scientist-coding.jpg}\hfil}\vfil} + % source: https://en.m.wikipedia.org/wiki/File:Text-x-python.svg + } + \frame{ + \frametitle{Using Code-Studio} + \begin{mdframed}[tikzsetting={draw=white,fill=white,fill opacity=0.8, + line width=0pt},backgroundcolor=none,leftmargin=0, + rightmargin=150,innertopmargin=4pt,roundcorner=10pt] + \tableofcontents[currentsection,sections={1-4},hideothersubsections] + \end{mdframed} + } + \vspace{12mm}\hfill{\tiny \lhref{https://zenodo.org/records/11147887}{from Ewa Bres \& Christian Bittner}} +} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} @@ -77,7 +79,7 @@ \section{Using Code-Studio on \texttt{Mogon-NHR}} \begin{itemize}[<+->] \item Navigate to \url{https://mod.hpc.uni-mainz.de} in your browser. \item Select the CodeServer - \item From the drop down menu select your account \verb-<<++cluster.account++>-. + \item From the drop down menu select your account "\verb,<++cluster.account++>,". \item Reserve the job to run for 8 hours. \item Number of Tasks needs to be 1. \item CPUs per Tasks needs to be 1. diff --git a/slides/creators/Python_in_Snakemake.tex b/slides/creators/Python_in_Snakemake.tex index e251b31..0f61930 100644 --- a/slides/creators/Python_in_Snakemake.tex +++ b/slides/creators/Python_in_Snakemake.tex @@ -363,15 +363,20 @@ \subsection{Running the final Workflow} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{\HandsOn{Executing this Workflow}} - Does our Workflow contain errors? We run a debug trial: - \begin{lstlisting}[language=Bash, style=Shell] + \begin{task} + Does our Workflow contain errors? We run a debug trial: + \begin{lstlisting}[language=Bash, style=Shell] $ snakemake --debug - \end{lstlisting} + \end{lstlisting} + \end{task} \pause Some targets are already present, we want the entire workflow again: \begin{lstlisting}[language=Bash, style=Shell] $ snakemake -c4 --forcerun +$ # or short with +$ snakemake -c4 -F \end{lstlisting} + \begin{question} What do you observe? Why \altverb{-c4}? \end{question} @@ -380,9 +385,9 @@ \subsection{Running the final Workflow} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{\HandsOn{Visualising the Output}} - On <++cluster.name++> the Linux distro is <++cluster.distro++>, which provides the \altverb{display}-program to display simple images. We shall invoke: + On <++cluster.name++> the Linux distro is "<++cluster.distro++>", which \newline provides the \altverb{<++cluster.display_program++>}-program to display simple images. We shall invoke: \begin{lstlisting}[language=Bash, style=Shell] -$ display plots/quals.svg +$ <++cluster.display_program++> plots/quals.svg \end{lstlisting} The figure has no axis-labels. \begin{question}