sanger-tol · DLBPointon · Sep 13, 2022 · Sep 9, 2022 · Sep 9, 2022 · Sep 13, 2022
diff --git a/assets/digest.as → assets/digest/digest.as b/assets/digest.as → assets/digest/digest.as
diff --git a/assets/treeval_test.yaml b/assets/treeval_test.yaml
@@ -6,11 +6,12 @@ assembly:
   dbVersion: "1"
   gevalType: DTOL
 reference_file: /lustre/scratch123/tol/teams/grit/geval_pipeline/geval_runs/DTOL/nxOscDoli1_1/data/DTOL_nxOscDoli1_1_FULL.fa
+fasta: /lustre/scratch123/tol/teams/grit/geval_pipeline/geval_runs/DTOL/nxOscDoli1_1/data/DTOL_nxOscDoli1_1_FULL.fa
 alignment:
   data_dir: /nfs/team135/dp24/treeval_testdata/gene_alignment_data/
   geneset: "Gae_host.Gae,CSKR_v2.CSKR"
 self_comp:
   motif_len: int
   mummer_chunk: int
 synteny:
-  synteny_genome_path: "/path/to/file"
+  synteny_genome_path: "/path/to/file"
diff --git a/conf/modules.config b/conf/modules.config
@@ -18,20 +18,9 @@ process {
         saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
     ]
 
-    withName: SAMPLESHEET_CHECK {
-        publishDir = [
-            path: { "${params.outdir}/pipeline_info" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
-    withName: FASTQC {
-        ext.args = '--quiet'
-    }
-
     withName: 'INSILICO_DIGEST:UCSC_BEDTOBIGBED' {
-        ext.args = "-as=$projectDir/assets/digest.as -type=bed4+1 -extraIndex=length"
+        ext.args        = { "-as=${projectDir}/assets/digest/digest.as -type=bed4+1 -extraIndex=length" }
+        ext.prefix      = { "${meta.id}" }
     }
 
     withName: CUSTOM_DUMPSOFTWAREVERSIONS {

diff --git a/conf/test_genealignment.config b/conf/test_genealignment.config
@@ -0,0 +1,19 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/treeval -profile test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name         = 'test_genealignment'
+    config_profile_description  = 'Minimal data set for gene alignments to input fasta'
+
+    input                       = './assets/treeval_test.yaml'
+    outdir			= './testing/'
+}
diff --git a/main.nf b/main.nf
@@ -17,7 +17,6 @@ nextflow.enable.dsl = 2
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta')
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/nextflow.config b/nextflow.config
@@ -93,7 +93,7 @@ profiles {
     singularity {
         singularity.enabled    = true
         singularity.autoMounts = true
-        docker.enabled         = true
+        docker.enabled         = false
         podman.enabled         = false
         shifter.enabled        = false
         charliecloud.enabled   = false
@@ -121,6 +121,7 @@ profiles {
     }
     test      { includeConfig 'conf/test.config'      }
     test_full { includeConfig 'conf/test_full.config' }
+    test_genealignment {includeConfig 'conf/test_genealignment.config' }
 }
 
 // Load igenomes.config if required

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -15,8 +15,8 @@
                 "input": {
                     "type": "string",
                     "format": "file-path",
-                    "mimetype": "text/csv",
-                    "pattern": "^\\S+\\.csv$",
+                    "mimetype": "text/yaml",
+                    "pattern": "^\\S+\\.yaml$",
                     "schema": "assets/schema_input.json",
                     "description": "Path to comma-separated file containing information about the samples in the experiment.",
                     "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/treeval/usage#samplesheet-input).",
@@ -54,15 +54,6 @@
                     "fa_icon": "fas fa-book",
                     "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
                 },
-                "fasta": {
-                    "type": "string",
-                    "format": "file-path",
-                    "mimetype": "text/plain",
-                    "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
-                    "description": "Path to FASTA genome file.",
-                    "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
-                    "fa_icon": "far fa-file-code"
-                },
                 "igenomes_base": {
                     "type": "string",
                     "format": "directory-path",

diff --git a/subworkflows/local/generate_genome.nf b/subworkflows/local/generate_genome.nf
@@ -1,5 +1,5 @@
 include { SAMTOOLS_FAIDX        } from '../../modules/nf-core/modules/samtools/faidx/main'
-include { GENERATE_GENOME_FILE  } from '../../modules/local/genome_file_generator'
+include { GENERATE_GENOME_FILE  } from '../../modules/local/generate_genome_file'
 include { TO_FILE               } from '../../modules/local/to_file'
 
 workflow GENERATE_GENOME {

diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
diff --git a/subworkflows/local/insilico_digest.nf b/subworkflows/local/insilico_digest.nf
@@ -9,27 +9,35 @@ include { MAKECMAP_RENAMECMAPIDS } from '../../modules/sanger-tol/nf-core-module
 include { MAKECMAP_CMAP2BED } from '../../modules/sanger-tol/nf-core-modules/makecmap/cmap2bed/main'
 include { UCSC_BEDTOBIGBED } from '../../modules/nf-core/modules/ucsc/bedtobigbed/main'
 
-
-
-nextflow.enable.dsl = 2
-
 workflow INSILICO_DIGEST {
+    take:
+    myid            // channel val(sample_id)
+    sizefile        // channel [id: sample_id], my.genome_file
+    sample          // channel [id: sample_id], reference_file
+    ch_enzyme       // channel val( "bspq1","bsss1","DLE1" )
 
     main:
-
-    sample = params.sample
-    sizefile = params.chromsize
-    myid = sample
-
-    ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" )
     ch_versions = Channel.empty()
 
-    input_fasta = [
-        [ id: myid, single_end:false ], // meta map
-        file(params.fasta, checkIfExists: true)
-    ]
-
-    MAKECMAP_FA2CMAPMULTICOLOR ( input_fasta, ch_enzyme )
+    input_fasta = sample.map { data -> 
+                                tuple([
+                                    id               : data[0].id,
+                                    single_end       : false
+                                    ],
+                                    file(data[1])
+                                )}
+
+    input_fasta
+        .combine(ch_enzyme)
+        .multiMap { data -> 
+            fasta:      tuple( data[0],
+                                data[1]
+                            )
+            enzyme:     data[2]
+            }
+        .set { fa2c_input } 
+
+    MAKECMAP_FA2CMAPMULTICOLOR ( fa2c_input.fasta, fa2c_input.enzyme )
 
     ch_cmap    = MAKECMAP_FA2CMAPMULTICOLOR.out.cmap
     ch_cmapkey = MAKECMAP_FA2CMAPMULTICOLOR.out.cmapkey
@@ -64,10 +72,12 @@ workflow INSILICO_DIGEST {
 
     ch_bedfile = MAKECMAP_CMAP2BED.out.bedfile
 
-    UCSC_BEDTOBIGBED ( ch_bedfile, sizefile)
+    UCSC_BEDTOBIGBED ( ch_bedfile, sizefile.map {it[1]}) // .as file
     ch_version = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions)
 
     emit:
     versions = ch_version
 
+    //merge into main <-- 
+
 }
diff --git a/workflows/treeval.nf b/workflows/treeval.nf
@@ -27,7 +27,7 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
-include { INPUT_READ        } from '../subworkflows/local/input_check'
+include { INPUT_READ        } from '../subworkflows/local/yaml_input'
 include { GENERATE_GENOME   } from '../subworkflows/local/generate_genome'
 include { INSILICO_DIGEST   } from '../subworkflows/local/insilico_digest'
 // include { GENE_ALIGNMENT    } from '../subworkflows/local/gene_alignment'
@@ -62,6 +62,7 @@ workflow TREEVAL {
     // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field
     //
     INPUT_READ ( params.input )
+    INPUT_READ.out.assembly_id
 
     //
     // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file
@@ -75,10 +76,13 @@ workflow TREEVAL {
     //
     //SUBWORKFLOW: 
     //
-    //INSILICO_DIGEST ( INPUT_READ.out.sample_id,
-    //                  GENERATE_GENOME.out.dot_genome,
-    //                  GENERATE_GENOME.out.reference_tuple )
-    //ch_versions = ch_versions.mix(INSILICO_DIGEST.out.versions)
+    ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" )
+
+    INSILICO_DIGEST ( INPUT_READ.out.assembly_id,
+                      GENERATE_GENOME.out.dot_genome,
+                      GENERATE_GENOME.out.reference_tuple,
+                      ch_enzyme )
+    ch_versions = ch_versions.mix(INSILICO_DIGEST.out.versions)
 
     //
     //SUBWORKFLOW: Takes input fasta to generate BB files containing alignment data
-Original file line number
+Diff line change
@@ Expand Up / @@ -17,7 +17,6 @@ nextflow.enable.dsl = 2 @@
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     */
-    params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta')
     /*
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ Expand Down @@