Merge pull request #27 from DLBPointon/digest_fix

Digest fix
sanger-tol · Sep 13, 2022 · dc0fc9d · dc0fc9d
2 parents 41acb28 + 39dfc96
commit dc0fc9d
Show file tree

Hide file tree

Showing 11 changed files with 62 additions and 92 deletions.
diff --git a/assets/digest.as → assets/digest/digest.as b/assets/digest.as → assets/digest/digest.as
diff --git a/assets/treeval_test.yaml b/assets/treeval_test.yaml
@@ -6,11 +6,12 @@ assembly:
   dbVersion: "1"
   gevalType: DTOL
 reference_file: /lustre/scratch123/tol/teams/grit/geval_pipeline/geval_runs/DTOL/nxOscDoli1_1/data/DTOL_nxOscDoli1_1_FULL.fa
+fasta: /lustre/scratch123/tol/teams/grit/geval_pipeline/geval_runs/DTOL/nxOscDoli1_1/data/DTOL_nxOscDoli1_1_FULL.fa
 alignment:
   data_dir: /nfs/team135/dp24/treeval_testdata/gene_alignment_data/
   geneset: "Gae_host.Gae,CSKR_v2.CSKR"
 self_comp:
   motif_len: int
   mummer_chunk: int
 synteny:
-  synteny_genome_path: "/path/to/file"
+  synteny_genome_path: "/path/to/file"
diff --git a/conf/modules.config b/conf/modules.config
@@ -18,20 +18,9 @@ process {
         saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
     ]
 
-    withName: SAMPLESHEET_CHECK {
-        publishDir = [
-            path: { "${params.outdir}/pipeline_info" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
-    withName: FASTQC {
-        ext.args = '--quiet'
-    }
-
     withName: 'INSILICO_DIGEST:UCSC_BEDTOBIGBED' {
-        ext.args = "-as=$projectDir/assets/digest.as -type=bed4+1 -extraIndex=length"
+        ext.args        = { "-as=${projectDir}/assets/digest/digest.as -type=bed4+1 -extraIndex=length" }
+        ext.prefix      = { "${meta.id}" }
     }
 
     withName: CUSTOM_DUMPSOFTWAREVERSIONS {

diff --git a/conf/test_genealignment.config b/conf/test_genealignment.config
@@ -0,0 +1,19 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/treeval -profile test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name         = 'test_genealignment'
+    config_profile_description  = 'Minimal data set for gene alignments to input fasta'
+
+    input                       = './assets/treeval_test.yaml'
+    outdir			= './testing/'
+}
diff --git a/main.nf b/main.nf
@@ -17,7 +17,6 @@ nextflow.enable.dsl = 2
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta')
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/nextflow.config b/nextflow.config
@@ -93,7 +93,7 @@ profiles {
     singularity {
         singularity.enabled    = true
         singularity.autoMounts = true
-        docker.enabled         = true
+        docker.enabled         = false
         podman.enabled         = false
         shifter.enabled        = false
         charliecloud.enabled   = false
@@ -121,6 +121,7 @@ profiles {
     }
     test      { includeConfig 'conf/test.config'      }
     test_full { includeConfig 'conf/test_full.config' }
+    test_genealignment {includeConfig 'conf/test_genealignment.config' }
 }
 
 // Load igenomes.config if required

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -54,15 +54,6 @@
                     "fa_icon": "fas fa-book",
                     "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
                 },
-                "fasta": {
-                    "type": "string",
-                    "format": "file-path",
-                    "mimetype": "text/plain",
-                    "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
-                    "description": "Path to FASTA genome file.",
-                    "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
-                    "fa_icon": "far fa-file-code"
-                },
                 "igenomes_base": {
                     "type": "string",
                     "format": "directory-path",

diff --git a/subworkflows/local/generate_genome.nf b/subworkflows/local/generate_genome.nf
@@ -1,5 +1,5 @@
 include { SAMTOOLS_FAIDX        } from '../../modules/nf-core/modules/samtools/faidx/main'
-include { GENERATE_GENOME_FILE  } from '../../modules/local/genome_file_generator'
+include { GENERATE_GENOME_FILE  } from '../../modules/local/generate_genome_file'
 include { TO_FILE               } from '../../modules/local/to_file'
 
 workflow GENERATE_GENOME {

diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
diff --git a/subworkflows/local/insilico_digest.nf b/subworkflows/local/insilico_digest.nf
@@ -9,27 +9,35 @@ include { MAKECMAP_RENAMECMAPIDS } from '../../modules/sanger-tol/nf-core-module
 include { MAKECMAP_CMAP2BED } from '../../modules/sanger-tol/nf-core-modules/makecmap/cmap2bed/main'
 include { UCSC_BEDTOBIGBED } from '../../modules/nf-core/modules/ucsc/bedtobigbed/main'
 
-
-
-nextflow.enable.dsl = 2
-
 workflow INSILICO_DIGEST {
+    take:
+    myid            // channel val(sample_id)
+    sizefile        // channel [id: sample_id], my.genome_file
+    sample          // channel [id: sample_id], reference_file
+    ch_enzyme       // channel val( "bspq1","bsss1","DLE1" )
 
     main:
-
-    sample = params.sample
-    sizefile = params.chromsize
-    myid = sample
-
-    ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" )
     ch_versions = Channel.empty()
 
-    input_fasta = [
-        [ id: myid, single_end:false ], // meta map
-        file(params.fasta, checkIfExists: true)
-    ]
-
-    MAKECMAP_FA2CMAPMULTICOLOR ( input_fasta, ch_enzyme )
+    input_fasta = sample.map { data -> 
+                                tuple([
+                                    id               : data[0].id,
+                                    single_end       : false
+                                    ],
+                                    file(data[1])
+                                )}
+
+    input_fasta
+        .combine(ch_enzyme)
+        .multiMap { data -> 
+            fasta:      tuple( data[0],
+                                data[1]
+                            )
+            enzyme:     data[2]
+            }
+        .set { fa2c_input } 
+
+    MAKECMAP_FA2CMAPMULTICOLOR ( fa2c_input.fasta, fa2c_input.enzyme )
 
     ch_cmap    = MAKECMAP_FA2CMAPMULTICOLOR.out.cmap
     ch_cmapkey = MAKECMAP_FA2CMAPMULTICOLOR.out.cmapkey
@@ -64,10 +72,12 @@ workflow INSILICO_DIGEST {
 
     ch_bedfile = MAKECMAP_CMAP2BED.out.bedfile
 
-    UCSC_BEDTOBIGBED ( ch_bedfile, sizefile)
+    UCSC_BEDTOBIGBED ( ch_bedfile, sizefile.map {it[1]}) // .as file
     ch_version = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions)
 
     emit:
     versions = ch_version
 
+    //merge into main <-- 
+
 }
diff --git a/workflows/treeval.nf b/workflows/treeval.nf
@@ -27,7 +27,7 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
-include { INPUT_READ        } from '../subworkflows/local/input_check'
+include { INPUT_READ        } from '../subworkflows/local/yaml_input'
 include { GENERATE_GENOME   } from '../subworkflows/local/generate_genome'
 include { INSILICO_DIGEST   } from '../subworkflows/local/insilico_digest'
 // include { GENE_ALIGNMENT    } from '../subworkflows/local/gene_alignment'
@@ -62,6 +62,7 @@ workflow TREEVAL {
     // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field
     //
     INPUT_READ ( params.input )
+    INPUT_READ.out.assembly_id
 
     //
     // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file
@@ -75,10 +76,13 @@ workflow TREEVAL {
     //
     //SUBWORKFLOW: 
     //
-    //INSILICO_DIGEST ( INPUT_READ.out.sample_id,
-    //                  GENERATE_GENOME.out.dot_genome,
-    //                  GENERATE_GENOME.out.reference_tuple )
-    //ch_versions = ch_versions.mix(INSILICO_DIGEST.out.versions)
+    ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" )
+
+    INSILICO_DIGEST ( INPUT_READ.out.assembly_id,
+                      GENERATE_GENOME.out.dot_genome,
+                      GENERATE_GENOME.out.reference_tuple,
+                      ch_enzyme )
+    ch_versions = ch_versions.mix(INSILICO_DIGEST.out.versions)
 
     //
     //SUBWORKFLOW: Takes input fasta to generate BB files containing alignment data
-Original file line number
+Diff line change
@@ Expand Up / @@ -17,7 +17,6 @@ nextflow.enable.dsl = 2 @@
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     */
-    params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta')
     /*
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ Expand Down @@