init for the new subworkflow generate-downstream-samplesheet

nf-core · Oct 29, 2024 · f2a4f6b · f2a4f6b
1 parent 85add1b
commit f2a4f6b
Show file tree

Hide file tree

Showing 5 changed files with 173 additions and 19 deletions.
diff --git a/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/main.nf b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -1,39 +1,49 @@
-//
-// Subworkflow with functionality specific to the nf-core/createtaxdb pipeline
-//
+
+
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    SUBWORKFLOW SPECIFIC FOR RNASEQ
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
 
 workflow SAMPLESHEET_RNASEQ {
     take:
     ch_reads
+    format
 
     main:
-    format = 'csv'
-
-    ch_list_for_samplesheet = ch_reads.map { meta, db ->
-        def tool = meta.tool
-        def db_name = meta.id + '-' + meta.tool
-        def db_params = ""
-        def db_type = ""
-        def db_path = file(params.outdir).toString() + '/' + meta.tool + '/' + db.getName()
-        [tool: tool, db_name: db_name, db_params: db_params, db_type: db_type, db_path: db_path]
-    }
 
-    if (params.build_bracken && params.build_kraken2) {
-        log.warn("Generated nf-core/taxprofiler samplesheet will only have a row for bracken. If Kraken2 is wished to be executed separately, duplicate row and update tool column to Kraken2!")
+    ch_list_for_samplesheet = ch_reads.map { meta, reads ->
+        def out_path     = file(params.outdir).toString() + '/relative/custom/path/'
+        def sample       = meta.id
+        def fastq_1      = meta.single_end  ? out_path + reads.getName() : out_path + reads[0].getName()
+        def fastq_2      = !meta.single_end ? out_path + reads[1].getName() : ""
+        def strandedness = "auto"
+        [sample: sample, fastq_1: fastq_1, fastq_2: fastq_2, strandedness: strandedness]
     }
 
-    channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/databases-taxprofiler", format)
+    channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/rnaseq", format)
 }
 
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    SUBWORKFLOW CALLING PIPELINE SPECIFIC SAMPLESHEET GENERATION
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
 workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
     take:
-    ch_databases
+    ch_reads
 
     main:
     def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",")
 
-    if (downstreampipeline_names.contains('taxprofiler')) {
-        SAMPLESHEET_TAXPROFILER(ch_databases)
+    if (downstreampipeline_names.contains('rnaseq')) {
+        SAMPLESHEET_RNASEQ(
+            ch_reads,
+            params.generate_pipeline_samplesheets_format
+        )
     }
 }
 

diff --git a/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test b/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test
@@ -0,0 +1,79 @@
+
+nextflow_function {
+
+    name "Test Functions"
+    script "../main.nf"
+    tag 'subworkflows'
+    tag 'generate_downstream_samplesheets'
+    tag 'subworkflows/generate_downstream_samplesheets'
+
+    test("Test Function channelToSamplesheet - csv") {
+
+        function "channelToSamplesheet"
+
+        when {
+            function {
+                """
+                // define inputs of the function here. Example:
+                input[0] = Channel.of(
+                    [
+                        [sample: 'test-pe',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        strandedness: 'auto']
+                    ],
+                    [
+                        [sample: 'test-se',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: '',
+                        strandedness: 'auto']
+                    ]
+                input[1] = "$outputDir/test.csv"
+                input[2] = "csv"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert function.success },
+                { assert snapshot(function.result).match() }
+            )
+        }
+    }
+
+    test("Test Function channelToSamplesheet - tsv") {
+
+        function "channelToSamplesheet"
+
+        when {
+            function {
+                """
+                // define inputs of the function here. Example:
+                input[0] = Channel.of(
+                    [
+                        [sample: 'test-pe',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        strandedness: 'auto']
+                    ],
+                    [
+                        [sample: 'test-se',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: '',
+                        strandedness: 'auto']
+                    ]
+                input[1] = "$outputDir/test.tsv"
+                input[2] = "tsv"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert function.success },
+                { assert snapshot(function.result).match() }
+            )
+        }
+    }
+}
diff --git a/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test b/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test
@@ -0,0 +1,43 @@
+nextflow_workflow {
+
+    name "Test Workflow GENERATE_DOWNSTREAM_SAMPLESHEETS"
+    script "../main.nf"
+    workflow "GENERATE_DOWNSTREAM_SAMPLESHEETS"
+    tag 'subworkflows'
+    tag 'generate_downstream_samplesheets'
+    tag 'subworkflows/generated_downstream_samplesheets'
+
+    test("Test worfklow rnaseq") {
+        when {
+            params {
+                outdir                                = "."
+                generate_pipeline_samplesheets        = 'rnaseq'
+                generate_pipeline_samplesheets_format = 'csv'
+            }
+            workflow {
+                """
+                input[0] = Channel.of(
+                    [
+                        [id: 'test-pe', single_end: false, reads: ['test_1.fastq.gz', 'test_2.fastq.gz']]
+                    ],
+                    [
+                        [id: 'test-se', single_end: true, reads: ['test_1.fastq.gz']]
+                    ]
+                )
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    [
+                        "${params.outdir}/downstream_samplesheets/rnaseq.csv"
+                    ]).match()
+                }
+            )
+        }
+    }
+
+}
diff --git a/nf_core/pipeline-template/workflows/pipeline.nf b/nf_core/pipeline-template/workflows/pipeline.nf
@@ -11,6 +11,7 @@
 {% if multiqc %}include { paramsSummaryMultiqc   } from '../subworkflows/nf-core/utils_nfcore_pipeline'{% endif %}
 include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 {% if citations or multiqc %}include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_{{ short_name }}_pipeline'{% endif %}
+{% if downstream_samplesheet %}include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets'{% endif %}
 {%- endif %}
 
 /*
@@ -41,6 +42,16 @@ workflow {{ short_name|upper }} {
     ch_versions = ch_versions.mix(FASTQC.out.versions.first())
     {%- endif %}
 
+
+    {% if downstream_samplesheet %}
+    //
+    // SUBWORKFLOW: Generate downstream samplesheets
+    //
+    GENERATE_DOWNSTREAM_SAMPLESHEETS(
+        ch_samplesheet
+    )
+    {% endif %}
+
     //
     // Collate and save software versions
     //

diff --git a/nf_core/pipelines/create/template_features.yml b/nf_core/pipelines/create/template_features.yml
@@ -279,6 +279,17 @@ modules:
       - "modules.json"
   nfcore_pipelines: False
   custom_pipelines: True
+downstream_samplesheet:
+  skippable_paths:
+    - "subworkflows/local/generate_downstream_samplesheets/"
+  short_description: "Include a subworkflow to generate downstream samplesheets"
+  description: "The pipeline will include the generate_downstream_samplesheets subworkflow for the generation of a samplesheet for other downstream pipelines."
+  help_text: |
+    The pipeline will include the generate_downstream_samplesheets subworkflow.
+    generate_downstream_samplesheets is a subworkflow which provides a base template for generating samplesheets.
+    The subworklow takes an input channel, converts it into the correct format and writes it to a samplesheet that can be used as an input for another pipeline.
+  nfcore_pipelines: True
+  custom_pipelines: True
 changelog:
   skippable_paths:
     - "CHANGELOG.md"