From cc517e3bf23ef5d7e9bad09511f2e1ade7e16c7d Mon Sep 17 00:00:00 2001 From: dp24 Date: Fri, 9 Sep 2022 13:04:50 +0100 Subject: [PATCH 1/3] Changes to allow for the running of INSILICO_DIGEST --- assets/{ => digest}/digest.as | 0 assets/treeval_test.yaml | 3 +- conf/modules.config | 15 ++------- main.nf | 1 - nextflow.config | 3 +- nextflow_schema.json | 13 ++------ subworkflows/local/generate_genome.nf | 2 +- subworkflows/local/input_check.nf | 44 --------------------------- subworkflows/local/insilico_digest.nf | 44 ++++++++++++++++----------- workflows/treeval.nf | 14 ++++++--- 10 files changed, 44 insertions(+), 95 deletions(-) rename assets/{ => digest}/digest.as (100%) delete mode 100644 subworkflows/local/input_check.nf diff --git a/assets/digest.as b/assets/digest/digest.as similarity index 100% rename from assets/digest.as rename to assets/digest/digest.as diff --git a/assets/treeval_test.yaml b/assets/treeval_test.yaml index 5602a7a5..ba87cc7f 100644 --- a/assets/treeval_test.yaml +++ b/assets/treeval_test.yaml @@ -6,6 +6,7 @@ assembly: dbVersion: "1" gevalType: DTOL reference_file: /lustre/scratch123/tol/teams/grit/geval_pipeline/geval_runs/DTOL/nxOscDoli1_1/data/DTOL_nxOscDoli1_1_FULL.fa +fasta: /lustre/scratch123/tol/teams/grit/geval_pipeline/geval_runs/DTOL/nxOscDoli1_1/data/DTOL_nxOscDoli1_1_FULL.fa alignment: data_dir: /nfs/team135/dp24/treeval_testdata/gene_alignment_data/ geneset: "Gae_host.Gae,CSKR_v2.CSKR" @@ -13,4 +14,4 @@ self_comp: motif_len: int mummer_chunk: int synteny: - synteny_genome_path: "/path/to/file" + synteny_genome_path: "/path/to/file" \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index e0c37535..dc7348aa 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,20 +18,9 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: FASTQC { - ext.args = '--quiet' - } - withName: 'INSILICO_DIGEST:UCSC_BEDTOBIGBED' { - ext.args = "-as=$projectDir/assets/digest.as -type=bed4+1 -extraIndex=length" + ext.args = { "-as=${projectDir}/assets/digest/digest.as -type=bed4+1 -extraIndex=length" } + ext.prefix = { "${meta.id}" } } withName: CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/main.nf b/main.nf index 3032ab19..ce7fc1cc 100644 --- a/main.nf +++ b/main.nf @@ -17,7 +17,6 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/nextflow.config b/nextflow.config index af258f1c..da7dd28c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -93,7 +93,7 @@ profiles { singularity { singularity.enabled = true singularity.autoMounts = true - docker.enabled = true + docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false @@ -121,6 +121,7 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } + test_genealignment {includeConfig 'conf/test_genealignment.config' } } // Load igenomes.config if required diff --git a/nextflow_schema.json b/nextflow_schema.json index 98b2ee64..2737ecd9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,8 +15,8 @@ "input": { "type": "string", "format": "file-path", - "mimetype": "text/csv", - "pattern": "^\\S+\\.csv$", + "mimetype": "text/yaml", + "pattern": "^\\S+\\.yaml$", "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/treeval/usage#samplesheet-input).", @@ -54,15 +54,6 @@ "fa_icon": "fas fa-book", "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, - "fasta": { - "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" - }, "igenomes_base": { "type": "string", "format": "directory-path", diff --git a/subworkflows/local/generate_genome.nf b/subworkflows/local/generate_genome.nf index 4673e4e3..51ef1082 100644 --- a/subworkflows/local/generate_genome.nf +++ b/subworkflows/local/generate_genome.nf @@ -1,5 +1,5 @@ include { SAMTOOLS_FAIDX } from '../../modules/nf-core/modules/samtools/faidx/main' -include { GENERATE_GENOME_FILE } from '../../modules/local/genome_file_generator' +include { GENERATE_GENOME_FILE } from '../../modules/local/generate_genome_file' include { TO_FILE } from '../../modules/local/to_file' workflow GENERATE_GENOME { diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 0aecf87f..00000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,44 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} diff --git a/subworkflows/local/insilico_digest.nf b/subworkflows/local/insilico_digest.nf index b43ff683..4514ed05 100755 --- a/subworkflows/local/insilico_digest.nf +++ b/subworkflows/local/insilico_digest.nf @@ -9,27 +9,35 @@ include { MAKECMAP_RENAMECMAPIDS } from '../../modules/sanger-tol/nf-core-module include { MAKECMAP_CMAP2BED } from '../../modules/sanger-tol/nf-core-modules/makecmap/cmap2bed/main' include { UCSC_BEDTOBIGBED } from '../../modules/nf-core/modules/ucsc/bedtobigbed/main' - - -nextflow.enable.dsl = 2 - workflow INSILICO_DIGEST { + take: + myid // channel val(sample_id) + sizefile // channel [id: sample_id], my.genome_file + sample // channel [id: sample_id], reference_file + ch_enzyme // channel val( "bspq1","bsss1","DLE1" ) main: - - sample = params.sample - sizefile = params.chromsize - myid = sample - - ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) ch_versions = Channel.empty() - input_fasta = [ - [ id: myid, single_end:false ], // meta map - file(params.fasta, checkIfExists: true) - ] - - MAKECMAP_FA2CMAPMULTICOLOR ( input_fasta, ch_enzyme ) + input_fasta = sample.map { data -> + tuple([ + id : data[0].id, + single_end : false + ], + file(data[1]) + )} + + input_fasta + .combine(ch_enzyme) + .multiMap { data -> + fasta: tuple( data[0], + data[1] + ) + enzyme: data[2] + } + .set { fa2c_input } + + MAKECMAP_FA2CMAPMULTICOLOR ( fa2c_input.fasta, fa2c_input.enzyme ) ch_cmap = MAKECMAP_FA2CMAPMULTICOLOR.out.cmap ch_cmapkey = MAKECMAP_FA2CMAPMULTICOLOR.out.cmapkey @@ -59,12 +67,12 @@ workflow INSILICO_DIGEST { ch_renamedcmap = MAKECMAP_RENAMECMAPIDS.out.renamedcmap - MAKECMAP_CMAP2BED ( ch_renamedcmap, ch_renamedcmap.map { it[0].id } ) + MAKECMAP_CMAP2BED ( ch_renamedcmap, ch_renamedcmap.map { it[0].id } ) // <-- Error Here even though it says UCSC ch_version = ch_versions.mix(MAKECMAP_CMAP2BED.out.versions) ch_bedfile = MAKECMAP_CMAP2BED.out.bedfile - UCSC_BEDTOBIGBED ( ch_bedfile, sizefile) + UCSC_BEDTOBIGBED ( ch_bedfile, sizefile.map {it[1]}) ch_version = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) emit: diff --git a/workflows/treeval.nf b/workflows/treeval.nf index 9de2ed3c..f89e4ca9 100644 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -27,7 +27,7 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_READ } from '../subworkflows/local/input_check' +include { INPUT_READ } from '../subworkflows/local/yaml_input' include { GENERATE_GENOME } from '../subworkflows/local/generate_genome' include { INSILICO_DIGEST } from '../subworkflows/local/insilico_digest' // include { GENE_ALIGNMENT } from '../subworkflows/local/gene_alignment' @@ -62,6 +62,7 @@ workflow TREEVAL { // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field // INPUT_READ ( params.input ) + INPUT_READ.out.assembly_id // // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file @@ -75,10 +76,13 @@ workflow TREEVAL { // //SUBWORKFLOW: // - //INSILICO_DIGEST ( INPUT_READ.out.sample_id, - // GENERATE_GENOME.out.dot_genome, - // GENERATE_GENOME.out.reference_tuple ) - //ch_versions = ch_versions.mix(INSILICO_DIGEST.out.versions) + ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) + + INSILICO_DIGEST ( INPUT_READ.out.assembly_id, + GENERATE_GENOME.out.dot_genome, + GENERATE_GENOME.out.reference_tuple, + ch_enzyme ) + ch_versions = ch_versions.mix(INSILICO_DIGEST.out.versions) // //SUBWORKFLOW: Takes input fasta to generate BB files containing alignment data From 97c4b01427febdb1114175f8a1516635aa6c137e Mon Sep 17 00:00:00 2001 From: dp24 Date: Fri, 9 Sep 2022 13:05:19 +0100 Subject: [PATCH 2/3] Adding test config --- conf/test_genealignment.config | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 conf/test_genealignment.config diff --git a/conf/test_genealignment.config b/conf/test_genealignment.config new file mode 100644 index 00000000..0b985c33 --- /dev/null +++ b/conf/test_genealignment.config @@ -0,0 +1,19 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/treeval -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'test_genealignment' + config_profile_description = 'Minimal data set for gene alignments to input fasta' + + input = './assets/treeval_test.yaml' + outdir = './testing/' +} From 227ea0b331413d90145e3f31b3050471621c187b Mon Sep 17 00:00:00 2001 From: dp24 Date: Tue, 13 Sep 2022 13:57:39 +0100 Subject: [PATCH 3/3] Updates to get digest running --- subworkflows/local/insilico_digest.nf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/insilico_digest.nf b/subworkflows/local/insilico_digest.nf index 4514ed05..8662a73e 100755 --- a/subworkflows/local/insilico_digest.nf +++ b/subworkflows/local/insilico_digest.nf @@ -67,15 +67,17 @@ workflow INSILICO_DIGEST { ch_renamedcmap = MAKECMAP_RENAMECMAPIDS.out.renamedcmap - MAKECMAP_CMAP2BED ( ch_renamedcmap, ch_renamedcmap.map { it[0].id } ) // <-- Error Here even though it says UCSC + MAKECMAP_CMAP2BED ( ch_renamedcmap, ch_renamedcmap.map { it[0].id } ) ch_version = ch_versions.mix(MAKECMAP_CMAP2BED.out.versions) ch_bedfile = MAKECMAP_CMAP2BED.out.bedfile - UCSC_BEDTOBIGBED ( ch_bedfile, sizefile.map {it[1]}) + UCSC_BEDTOBIGBED ( ch_bedfile, sizefile.map {it[1]}) // .as file ch_version = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) emit: versions = ch_version + //merge into main <-- + }