diff --git a/assets/local_testing/nxOsc-2023-05-02.dp.TEST.md b/assets/local_testing/nxOsc-2023-05-02.dp.TEST.md new file mode 100644 index 00000000..32fbd6d0 --- /dev/null +++ b/assets/local_testing/nxOsc-2023-05-02.dp.TEST.md @@ -0,0 +1,31 @@ +e769c449778489095a023896d05b87fa cds/CaenorhabditisElegans.WBcel235_cds.bigBed +29f4bb4aa841e754e6ad90a95c51a8ac cds/Gae_host.Gae_cds.bigBed +55e02bdabcbd4c03413d42026ac9e34a custom/software_versions.yml +d41d8cd98f00b204e9800998ecf8427e gap/Oscheius_DF5033_gaplen.bed +efa3906048c52a26a3b762142b138df2 gen/CaenorhabditisElegans.WBcel235_cdna.bigBed +6a1f75afdc99390c150a9abe204e856b generate/my.genome +ab841e49f59ff1dd51ed87191c2d7562 gen/Gae_host.Gae_cdna.bigBed +8b277d209db8bf97c46f118562c4b9b5 gen/OscheiusTipulae.ASM1342590v1_cdna.bigBed +1d1846bbab542500504b19bfc56cb9b2 insilico/BSPQI.bigBed +008e29071b2574e2ed50a2887f4a7fc5 insilico/BSSSI.bigBed +5f58843218b373c5addd38bc91e0d74d insilico/DLE1.bigBed +08d932ddcb01866d9cfa76dbcaf8c5f5 longread/Oscheius_DF5033.bigWig +36e4493afcd46a6c89d762fee08b2aa8 longread/Oscheius_DF5033_halfdepth.bed +7bd5f463e6cd75e876f648dce93411fc longread/Oscheius_DF5033_maxdepth.bed +82d251d88ee7d9bdbb29b68d3136b7ea longread/Oscheius_DF5033_zerodepth.bed +cf6a4dc883979ac9cafd75382aa16bdc pep/CaenorhabditisElegans.WBcel235_pep.gff.gz +84c1ad1989c7e9bcf13258b2774f4a25 pep/CaenorhabditisElegans.WBcel235_pep.gff.gz.tbi +c2cccc5ab38b0e6b4e12fea2c1151569 pep/Gae_host.Gae_pep.gff.gz +6a6522a6176761172a6313df9fc5b210 pep/Gae_host.Gae_pep.gff.gz.tbi +e012da1d0c2ea40171785ead8a294289 punchlist/CaenorhabditisElegans.WBcel235_cdna_punchlist.bed +d9da11fc3f6170a1c37c38765718ab47 punchlist/CaenorhabditisElegans.WBcel235_cds_punchlist.bed +31d4e0cec6ef4ec92d51336393a923be punchlist/CaenorhabditisElegans.WBcel235_rna_punchlist.bed +1ae4cbf700ff5b6d02c96631351f7eb8 punchlist/Gae_host.Gae_cdna_punchlist.bed +50f76662114c8a77e8604a5a539e1e9c punchlist/Gae_host.Gae_cds_punchlist.bed +c269f93c3a43697116b5aa75314e5e07 punchlist/Gae_host.Gae_rna_punchlist.bed +e5fed140728b0f0d088d983a34868d8d punchlist/OscheiusTipulae.ASM1342590v1_cdna_punchlist.bed +779ad07ceefaca4657090c9f0322ddfd repeat/Oscheius_DF5033.bigWig +9d2cca3997c9a60f66516af739eb3719 repeat/Oscheius_DF5033_renamed.bed +bb92039394cc0f2e9e6809e78be4bc9e rna/CaenorhabditisElegans.WBcel235_rna.bigBed +4254dcb32d0aed160e03d3f6c02cf636 rna/Gae_host.Gae_rna.bigBed +b2d9bea322639d2b0954a0ccc7eed800 selfcomp/Oscheius_DF5033_selfcomp.bigBed diff --git a/assets/local_testing/nxOscDF5033.yaml b/assets/local_testing/nxOscDF5033.yaml new file mode 100644 index 00000000..27a9757b --- /dev/null +++ b/assets/local_testing/nxOscDF5033.yaml @@ -0,0 +1,27 @@ +assembly: + sizeClass: S # S if {genome => 4Gb} else L + level: scaffold + sample_id: Oscheius_DF5033 + latin_name: to_provide_taxonomic_rank + classT: nematode + asmVersion: Oscheius_DF5033_1 + dbVersion: "1" + gevalType: DTOL +reference_file: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta +assem_reads: + pacbio: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/ + hic: path + supplementary: path +alignment: + data_dir: /lustre/scratch123/tol/resources/treeval/gene_alignment_data/ + common_name: "" # For future implementation (adding bee, wasp, ant etc) + geneset: "OscheiusTipulae.ASM1342590v1,CaenorhabditisElegans.WBcel235,Gae_host.Gae" + #Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv" +self_comp: + motif_len: 0 + mummer_chunk: 10 +synteny: + synteny_genome_path: /lustre/scratch123/tol/resources/treeval/synteny/ +outdir: "NEEDS TESTING" +intron: + size: "50k" diff --git a/assets/local_testing/nxOscSUBSET.yaml b/assets/local_testing/nxOscSUBSET.yaml new file mode 100644 index 00000000..54d7aadf --- /dev/null +++ b/assets/local_testing/nxOscSUBSET.yaml @@ -0,0 +1,27 @@ +assembly: + sizeClass: S # S if {genome => 4Gb} else L + level: scaffold + sample_id: OscheiusSUBSET + latin_name: to_provide_taxonomic_rank + classT: nematode + asmVersion: OscheiusSUBSET_1 + dbVersion: "1" + gevalType: DTOL +reference_file: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_SUBSET/assembly/draft/SUBSET_genome/Oscheius_SUBSET.fasta +assem_reads: + pacbio: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_SUBSET/genomic_data/pacbio/ + hic: path + supplementary: path +alignment: + data_dir: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_SUBSET/gene_set/ + common_name: "" # For future implementation (adding bee, wasp, ant etc) + geneset: "Gae_host.Gae" + #Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv" +self_comp: + motif_len: 0 + mummer_chunk: 4 +synteny: + synteny_genome_path: /lustre/scratch123/tol/resources/treeval/synteny/ +outdir: "NEEDS TESTING" +intron: + size: "50k" diff --git a/bin/cut_size.sh b/bin/cut_size.sh new file mode 100755 index 00000000..bdaf564b --- /dev/null +++ b/bin/cut_size.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +head -n 1 $1 | cut -f2 diff --git a/bin/gff_to_bed.sh b/bin/gff_to_bed.sh index 08577ee6..12f96690 100755 --- a/bin/gff_to_bed.sh +++ b/bin/gff_to_bed.sh @@ -14,5 +14,5 @@ if [ $1 == '-v']; then echo "$version" else - grep '##PAF' $1 | sed 's/##PAF\t//g'|awk 'BEGIN{FS="\t";}{a[$1]++;if(a[$1]==2)print v[$1] ORS $0;if(a[$1]>2)print;v[$1]=$0;}' | awk '$(NF+1) = ($10/$11)*100'|awk '$(NF+1) = ($10/($2*3))*100'|awk -vOFS='\t' '{print $6,$8,$9,$1,$2,$10,$(NF-1),$NF}' > $2.bed + grep '##PAF' $1 | sed 's/##PAF\t//g'|awk 'BEGIN{FS="\t";}{a[$1]++;if(a[$1]==2)print v[$1] ORS $0;if(a[$1]>2)print;v[$1]=$0;}' | awk '$(NF+1) = ($10/$11)*100'|awk '$(NF+1) = ($10/($2*3))*100'|awk -vOFS='\t' '{print $6,$8,$9,$1,$2,$10,$(NF-1),$NF}' > $2 fi diff --git a/bin/paf_to_bed12.sh b/bin/paf_to_bed12.sh index cd536f01..7de63334 100755 --- a/bin/paf_to_bed12.sh +++ b/bin/paf_to_bed12.sh @@ -14,5 +14,5 @@ if [ $1 == '-v']; then echo "$version" else - cat $1 | awk 'BEGIN{FS="\t";}{a[$1]++;if(a[$1]==2)print v[$1] ORS $0;if(a[$1]>2)print;v[$1]=$0;}' | awk '$(NF+1) = ($10/$11)*100' | awk '$(NF+1) = ($10/$2)*100' | awk -vOFS='\t' '{print $6,$8,$9,$1,$2,$10,$(NF-1),$NF}' > $2_punchlist.bed + cat $1 | awk 'BEGIN{FS="\t";}{a[$1]++;if(a[$1]==2)print v[$1] ORS $0;if(a[$1]>2)print;v[$1]=$0;}' | awk '$(NF+1) = ($10/$11)*100' | awk '$(NF+1) = ($10/$2)*100' | awk -vOFS='\t' '{print $6,$8,$9,$1,$2,$10,$(NF-1),$NF}' > $2 fi diff --git a/conf/base.config b/conf/base.config index b89b908a..b7f12dd4 100644 --- a/conf/base.config +++ b/conf/base.config @@ -18,14 +18,14 @@ process { maxErrors = '-1' withName:SAMTOOLS_MERGE { - memory = { check_max( 50.GB * task.attempt, 'memory') } + memory = { check_max( 50.GB * task.attempt, 'memory' ) } } // RESOURCES: MEMORY INTENSIVE STEPS, SOFTWARE TO BE UPDATED TO COMBAT THIS withName: '.*:.*:SELFCOMP:(SELFCOMP_ALIGNMENTBLOCKS|SELFCOMP_MAPIDS|SELFCOMP_MUMMER2BED|SELFCOMP_SPLITFASTA|BEDTOOLS_MERGE)' { - cpus = { check_max( 10 * task.attempt, 'cpus' ) } - memory = { check_max( 100.GB * task.attempt, 'memory') } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { check_max( 10 * task.attempt, 'cpus' ) } + memory = { check_max( 120.GB * task.attempt, 'memory' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } } // RESOURCES: CHANGES TO FREQUENT FAILURES BELOW THIS MEM POINT diff --git a/conf/modules.config b/conf/modules.config index 366c7c6e..8171d8fb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -78,11 +78,11 @@ process { } withName: '.*:.*:.*:(GEN_ALIGNMENTS|RNA_ALIGNMENTS|CDS_ALIGNMENTS):UCSC_BEDTOBIGBED' { - ext.prefix = { "${meta.id}_${meta.type}" } + ext.prefix = { "${meta.id}_${meta.type}" } } withName: '.*:.*:.*:PEP_ALIGNMENTS:BEDTOOLS_SORT' { - ext.prefix = { "${meta.id}_prot" } + ext.prefix = { "${meta.id}_prot" } } withName: '.*:.*:INSILICO_DIGEST:UCSC_BEDTOBIGBED' { @@ -95,6 +95,14 @@ process { ext.prefix = { "${meta.id}_selfcomp" } } + withName: '.*:.*:REPEAT_DENSITY:UCSC_BEDGRAPHTOBIGWIG' { + ext.prefix = { "${meta.id}_repeat_density" } + } + + withName: '.*:.*:GAP_FINDER:TABIX_BGZIPTABIX' { + ext.prefix = { "gap_${meta.id}" } + } + withName: '.*:.*:SYNTENY:MINIMAP2_ALIGN' { ext.args = '-t 8 -x asm10' ext.prefix = { "${meta.id}_synteny_${reference.getName().tokenize('.')[0]}" } @@ -130,16 +138,20 @@ process { withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX' { ext.args = "-d 50" - ext.prefix = { "${meta.id}_maxdepth" } + ext.prefix = { "maxdepth" } } withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN' { ext.args = "-d 50" - ext.prefix = { "${meta.id}_zerodepth" } + ext.prefix = { "zerodepth" } } withName: '.*:.*:LONGREAD_COVERAGE:GNU_SORT' { - ext.args = "-k1,1 -k2,2n" + ext.args = "-k1,1 -k2,2n" ext.prefix = { "${meta.id}_sorted" } } + + withName: '.*:.*:LONGREAD_COVERAGE:UCSC_BEDGRAPHTOBIGWIG' { + ext.prefix = 'coverage' + } } diff --git a/conf/test.config b/conf/test.config index dc6fd774..deab3488 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,24 +1,25 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests + Nextflow config file for running representative-size tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. + Defines input files and everything required to run a representative size pipeline test. Use as follows: - nextflow run nf-core/treeval -profile test_full, --outdir + nextflow run sanger-tol/treeval -profile test,singularity -entry FULL + + On LSF / tol farm: + bsub -Is -tty -e error -o out -n 2 -q oversubscribed -M4000 -R'select[mem>4000] rusage[mem=4000] span[hosts=1]' 'nextflow run main.nf -profile test,singularity,sanger' ---------------------------------------------------------------------------------------- */ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal local test dataset to check pipeline function' - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' - // Genome references - genome = 'R64-1-1' + input = 'assets/local_testing/nxOscSUBSET.yaml' } diff --git a/conf/test_full.config b/conf/test_full.config index dc6fd774..7e5c94af 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -5,20 +5,17 @@ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/treeval -profile test_full, --outdir + nextflow run sanger-tol/treeval -profile test_full,singularity,sanger + + On LSF / tol farm: + bsub -Is -tty -e error -o out -n 2 -q oversubscribed -M4000 -R'select[mem>4000] rusage[mem=4000] span[hosts=1]' 'nextflow run main.nf -profile test_full,singularity,sanger' ---------------------------------------------------------------------------------------- */ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' - - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + config_profile_name = 'Full local test profile' + config_profile_description = 'Full test dataset to check pipeline function, using a current full local dataset' - // Genome references - genome = 'R64-1-1' + input = 'assets/local_testing/nxOscDF5033.yaml' } diff --git a/conf/test_genealignment.config b/conf/test_genealignment.config deleted file mode 100644 index 8cd9f116..00000000 --- a/conf/test_genealignment.config +++ /dev/null @@ -1,19 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/treeval -profile test, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'test_genealignment' - config_profile_description = 'Minimal data set for gene alignments to input fasta' - - input = "${projectDir}/assets/treeval_test.yaml" - outdir = './testing/' -} diff --git a/conf/test_selfcomp.config b/conf/test_selfcomp.config deleted file mode 100755 index 872d58c5..00000000 --- a/conf/test_selfcomp.config +++ /dev/null @@ -1,17 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - Use as follows: - nextflow run nf-core/treeval -profile test_selfcomp, --outdir ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'test_selfcomp' - config_profile_description = 'Minimal test dataset to check selfcomp pipeline function' - - input = "${projectDir}/assets/treeval_test.yaml" - outdir = './testing/' -} diff --git a/conf/test_synteny.config b/conf/test_synteny.config deleted file mode 100755 index 8c204e66..00000000 --- a/conf/test_synteny.config +++ /dev/null @@ -1,19 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/treeval -profile test_synteny, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'test_synteny' - config_profile_description = 'Minimal test dataset to check syntenypipeline function' - - input = "${projectDir}/assets/treeval_test.yaml" - outdir = './testing/' -} diff --git a/docs/output.md b/docs/output.md index e759ba01..a2bff499 100644 --- a/docs/output.md +++ b/docs/output.md @@ -58,7 +58,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files -- `pipeline_info/` +- `treeval_info/` - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2fc0a9b9..7b5be982 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -135,7 +135,7 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") + def output_d = new File("${params.outdir}/treeval_info/") if (!output_d.exists()) { output_d.mkdirs() } diff --git a/main.nf b/main.nf index 2d7581e0..d26dce7f 100644 --- a/main.nf +++ b/main.nf @@ -32,13 +32,18 @@ WorkflowMain.initialise(workflow, params, log) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { TREEVAL } from './workflows/treeval' +include { TREEVAL } from './workflows/treeval' +include { TREEVAL_RAPID } from './workflows/treeval_rapid' // WORKFLOW: Run main nf-core/treeval analysis pipeline workflow NFCORE_TREEVAL { TREEVAL () } +workflow NFCORE_TREEVAL_RAPID { + TREEVAL_RAPID () +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN ALL WORKFLOWS @@ -49,10 +54,14 @@ workflow NFCORE_TREEVAL { // WORKFLOW: Execute a single named workflow for the pipeline // See: https://github.com/nf-core/rnaseq/issues/619 // -workflow { +workflow FULL { NFCORE_TREEVAL () } +workflow RAPID { + NFCORE_TREEVAL_RAPID () +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/modules.json b/modules.json index 476d6228..9ef5d213 100644 --- a/modules.json +++ b/modules.json @@ -10,6 +10,11 @@ "git_sha": "1d48427957205cb6acf1ffe330bd35b6bb8baa90", "installed_by": ["modules"] }, + "bedtools/genomecov": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, "bedtools/intersect": { "branch": "master", "git_sha": "da46ad4dfd38229e1514a81d3128ec7c30206f5c", @@ -25,11 +30,6 @@ "git_sha": "d3c433828498c6881adcc2ea3a93260fff1fe942", "installed_by": ["modules"] }, - "bedtools/genomecov": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, "bedtools/merge": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", @@ -95,11 +95,6 @@ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, - "seqtk/cutn": { - "branch": "master", - "git_sha": "fc2e38dcf6b3cdbe858a83a9457c1b1e018a33b5", - "installed_by": ["modules"] - }, "samtools/sort": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", @@ -110,11 +105,17 @@ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", "installed_by": ["modules"] }, - "tabix/bgziptabix": { + "seqtk/cutn": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "fc2e38dcf6b3cdbe858a83a9457c1b1e018a33b5", "installed_by": ["modules"] }, + "tabix/bgziptabix": { + "branch": "master", + "git_sha": "01b3b2509d76625b6d6cd613b349fb4777712a15", + "installed_by": ["modules"], + "patch": "modules/nf-core/tabix/bgziptabix/tabix-bgziptabix.diff" + }, "ucsc/bedgraphtobigwig": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", diff --git a/modules/local/findhalfcoverage.nf b/modules/local/findhalfcoverage.nf index 12a57404..32ae9f35 100755 --- a/modules/local/findhalfcoverage.nf +++ b/modules/local/findhalfcoverage.nf @@ -21,9 +21,9 @@ process FINDHALFCOVERAGE { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "halfcoverage" """ - findHalfcoverage.py -c $bedfile -m $my_genome -d $depthgraph > ${prefix}_halfdepth.bed + findHalfcoverage.py -c $bedfile -m $my_genome -d $depthgraph > ${prefix}.bed cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,9 +33,9 @@ process FINDHALFCOVERAGE { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "halfcoverage" """ - touch ${prefix}_halfdepth.bed + touch ${prefix}.bed cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/gap_length.nf b/modules/local/gap_length.nf index 0be29270..8e7acd13 100644 --- a/modules/local/gap_length.nf +++ b/modules/local/gap_length.nf @@ -11,14 +11,14 @@ process GAP_LENGTH { tuple val( meta ), path( file ) output: - tuple val( meta ), file( "*bed" ), emit: bed - path "versions.yml" , emit: versions + tuple val( meta ), file( "*bedgraph" ), emit: bed + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ - add_len2gap.sh $file > ${prefix}_gaplen.bed + add_len2gap.sh $file > pretext_${prefix}_gap.bedgraph cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -31,7 +31,7 @@ process GAP_LENGTH { def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "9.1" """ - touch ${prefix}_gaplen.bed + touch ${prefix}_gap.bed cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/get_largest_scaff.nf b/modules/local/get_largest_scaff.nf new file mode 100644 index 00000000..a28fcfcf --- /dev/null +++ b/modules/local/get_largest_scaff.nf @@ -0,0 +1,21 @@ +process GET_LARGEST_SCAFF { + + tag "$meta.id" + label 'process_low' + + conda "conda-forge::coreutils=9.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val( meta ), path( file ) + + output: + env largest_scaff , emit: scaff_size + + script: + """ + largest_scaff=`cut_size.sh $file` + """ +} diff --git a/modules/local/getminmaxpunches.nf b/modules/local/getminmaxpunches.nf index 7e08b722..e455cb2f 100755 --- a/modules/local/getminmaxpunches.nf +++ b/modules/local/getminmaxpunches.nf @@ -1,5 +1,5 @@ process GETMINMAXPUNCHES{ - tag "${assembly_classT}" + tag "${meta.id}" label "process_single" conda "conda-forge::coreutils=9.1" diff --git a/modules/local/gff_to_bed.nf b/modules/local/gff_to_bed.nf index 45ad4b36..73808ef6 100644 --- a/modules/local/gff_to_bed.nf +++ b/modules/local/gff_to_bed.nf @@ -15,9 +15,10 @@ process GFF_TO_BED { path "versions.yml" , emit: versions script: + def prefix = task.ext.prefix ?: "${meta.id}_${meta.type}_punchlist" def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ - gff_to_bed.sh ${file} ${meta.id} + gff_to_bed.sh ${file} ${prefix}.bed cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -27,9 +28,10 @@ process GFF_TO_BED { """ stub: + def prefix = task.ext.prefix ?: "${meta.id}_${meta.type}_punchlist" def VERSION = "9.1" """ - touch ${meta.id}.bed + touch ${prefix}.bed cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/makecmap_cmap2bed.nf b/modules/local/makecmap_cmap2bed.nf index 2f85b5c6..90addde9 100644 --- a/modules/local/makecmap_cmap2bed.nf +++ b/modules/local/makecmap_cmap2bed.nf @@ -23,7 +23,7 @@ process MAKECMAP_CMAP2BED { def prefix = task.ext.prefix ?: "${meta.id}" """ grep -v '#' $cmap > ${prefix}_${enzyme}_edited.cmap - cmap2bed.py -t ${prefix}_${enzyme}_edited.cmap -z $enzyme | sort -k1,1 -k2,2n > ${prefix}_${enzyme}.bed + cmap2bed.py -t ${prefix}_${enzyme}_edited.cmap -z $enzyme | sort -k1,1 -k2,2n > ${enzyme}.bed cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf index d3a3bbff..76267f79 100644 --- a/modules/nf-core/tabix/bgziptabix/main.nf +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -11,16 +11,17 @@ process TABIX_BGZIPTABIX { tuple val(meta), path(input) output: - tuple val(meta), path("*.gz"), path("*.tbi"), emit: gz_tbi + tuple val(meta), path("*.gz"), path("*.tbi"), optional: true, emit: gz_tbi + tuple val(meta), path("*.gz"), path("*.csi"), optional: true, emit: gz_csi path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def args2 = meta.max_scaff == 'csi' ? "--csi" : '' + def prefix = task.ext.prefix ?: "${meta.id}" """ bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz tabix $args2 ${prefix}.${input.getExtension()}.gz @@ -34,8 +35,9 @@ process TABIX_BGZIPTABIX { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.gz - touch ${prefix}.gz.tbi + touch ${prefix}.${input.getExtension()}.gz + touch ${prefix}.${input.getExtension()}.gz.tbi + touch ${prefix}.${input.getExtension()}.gz.csi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml index 49c03289..2761e271 100644 --- a/modules/nf-core/tabix/bgziptabix/meta.yml +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -37,9 +37,14 @@ output: type: file description: tabix index file pattern: "*.{gz.tbi}" + - csi: + type: file + description: tabix alternate index file + pattern: "*.{gz.csi}" - versions: type: file description: File containing software versions pattern: "versions.yml" authors: - "@maxulysse" + - "@DLBPointon" diff --git a/modules/nf-core/tabix/bgziptabix/tabix-bgziptabix.diff b/modules/nf-core/tabix/bgziptabix/tabix-bgziptabix.diff new file mode 100644 index 00000000..55016165 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tabix-bgziptabix.diff @@ -0,0 +1,18 @@ +Changes in module 'nf-core/tabix/bgziptabix' +--- modules/nf-core/tabix/bgziptabix/main.nf ++++ modules/nf-core/tabix/bgziptabix/main.nf +@@ -19,9 +19,9 @@ + task.ext.when == null || task.ext.when + + script: +- def args = task.ext.args ?: '' +- def args2 = task.ext.args2 ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" ++ def args = task.ext.args ?: '' ++ def args2 = meta.max_scaff == 'csi' ? "--csi" : '' ++ def prefix = task.ext.prefix ?: "${meta.id}" + """ + bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz + tabix $args2 ${prefix}.${input.getExtension()}.gz + +************************************************************ diff --git a/nextflow.config b/nextflow.config index 798fdd1c..27a1f0c8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,7 +15,7 @@ params { igenomes_base = null igenomes_ignore = null outdir = "./results" - tracedir = "${params.outdir}/pipeline_info" + tracedir = "${params.outdir}/treeval_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -108,9 +108,8 @@ profiles { } full_s3_test { includeConfig 'conf/full_s3_test.config' } s3_test { includeConfig 'conf/s3_test.config' } + test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } - test_genealignment { includeConfig 'conf/test_genealignment.config' } - test_selfcomp { includeConfig 'conf/test_selfcomp.config' } } // Export these variables to prevent local Python/R libraries from conflicting with those in the container diff --git a/nextflow_schema.json b/nextflow_schema.json index 584e2d69..10299225 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -194,7 +194,7 @@ "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", + "default": "${params.outdir}/treeval_info", "fa_icon": "fas fa-cogs", "hidden": true }, diff --git a/subworkflows/local/gap_finder.nf b/subworkflows/local/gap_finder.nf index 97710347..9d26c21a 100644 --- a/subworkflows/local/gap_finder.nf +++ b/subworkflows/local/gap_finder.nf @@ -1,11 +1,14 @@ #!/usr/bin/env nextflow -include { SEQTK_CUTN } from '../../modules/nf-core/seqtk/cutn/main' -include { GAP_LENGTH } from '../../modules/local/gap_length' +include { SEQTK_CUTN } from '../../modules/nf-core/seqtk/cutn/main' +include { GAP_LENGTH } from '../../modules/local/gap_length' +include { GET_LARGEST_SCAFF } from '../../modules/local/get_largest_scaff' +include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main' workflow GAP_FINDER { take: reference_tuple // Channel [ val(meta), path(fasta) ] + dot_genome main: ch_versions = Channel.empty() @@ -19,14 +22,43 @@ workflow GAP_FINDER { ch_versions = ch_versions.mix( SEQTK_CUTN.out.versions ) // - // MODULE: ADD THE LENGTH OF GAP TO BED FILE + // MODULE: Cut out the largest scaffold size and use as comparator against 512MB + // This is the cut off for TABIX using tbi indexes + // + GET_LARGEST_SCAFF ( dot_genome ) + + + // + // LOGIC: Adding the largest scaffold size to the meta data so it can be used in the modules.config + // + SEQTK_CUTN.out.bed + .combine(GET_LARGEST_SCAFF.out.scaff_size.toInteger()) + .map {meta, row, scaff -> + tuple([ id : meta.id, + max_scaff : scaff >= 500000000 ? 'csi': '' + ], + file(row) + )} + .set { modified_bed_ch } + + // + // MODULE: ADD THE LENGTH OF GAP TO BED FILE - INPUT FOR PRETEXT MODULE // GAP_LENGTH ( SEQTK_CUTN.out.bed ) ch_versions = ch_versions.mix( GAP_LENGTH.out.versions ) + // + // MODULE: BGZIP AND TABIX THE GAP FILE + // + TABIX_BGZIPTABIX ( + modified_bed_ch + ) + ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) + emit: gap_file = GAP_LENGTH.out.bed + gap_tabix = TABIX_BGZIPTABIX.out.gz_csi versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/insilico_digest.nf b/subworkflows/local/insilico_digest.nf index dd8b1324..f0a5fc4d 100755 --- a/subworkflows/local/insilico_digest.nf +++ b/subworkflows/local/insilico_digest.nf @@ -50,7 +50,7 @@ workflow INSILICO_DIGEST { ch_cmap = MAKECMAP_FA2CMAPMULTICOLOR.out.cmap ch_cmapkey = MAKECMAP_FA2CMAPMULTICOLOR.out.cmapkey - ch_version = ch_versions.mix(MAKECMAP_FA2CMAPMULTICOLOR.out.versions) + ch_versions = ch_versions.mix(MAKECMAP_FA2CMAPMULTICOLOR.out.versions) // // LOGIC: CREATES A TUPLE CONTAINING THE CMAP AND ORIGINAL GENOMIC LOCATIONS @@ -78,7 +78,7 @@ workflow INSILICO_DIGEST { // EMITS RENAMED CMAP // MAKECMAP_RENAMECMAPIDS ( ch_join.map { it[0] }, ch_join.map { it[1] } ) - ch_version = ch_versions.mix(MAKECMAP_RENAMECMAPIDS.out.versions) + ch_versions = ch_versions.mix(MAKECMAP_RENAMECMAPIDS.out.versions) ch_renamedcmap = MAKECMAP_RENAMECMAPIDS.out.renamedcmap @@ -87,7 +87,7 @@ workflow INSILICO_DIGEST { // EMITS BED FILE // MAKECMAP_CMAP2BED ( ch_renamedcmap, ch_renamedcmap.map { it[0].id } ) - ch_version = ch_versions.mix(MAKECMAP_CMAP2BED.out.versions) + ch_versions = ch_versions.mix(MAKECMAP_CMAP2BED.out.versions) ch_bedfile = MAKECMAP_CMAP2BED.out.bedfile combined_ch = ch_bedfile @@ -101,7 +101,7 @@ workflow INSILICO_DIGEST { UCSC_BEDTOBIGBED ( combined_ch.map { [it[0], it[1]] }, combined_ch.map { it[3] }, combined_ch.map { it[4] }) - ch_version = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) + ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) emit: insilico_digest_bb = UCSC_BEDTOBIGBED.out.bigbed diff --git a/workflows/treeval.nf b/workflows/treeval.nf index 56cd3589..77ccc6fe 100644 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -20,7 +20,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true */ // -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// SUBWORKFLOW TREEVAL_FULL: Consisting of a mix of local and nf-core/modules // include { YAML_INPUT } from '../subworkflows/local/yaml_input' include { GENERATE_GENOME } from '../subworkflows/local/generate_genome' @@ -50,7 +50,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft */ workflow TREEVAL { - + main: // // PRE-PIPELINE CHANNEL SETTING - channel setting for required files // @@ -78,24 +78,29 @@ workflow TREEVAL { // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field // YAML_INPUT ( input_ch ) - YAML_INPUT.out.assembly_dbVer.view() + // // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file // - GENERATE_GENOME ( YAML_INPUT.out.assembly_id, YAML_INPUT.out.reference ) - ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) + GENERATE_GENOME ( YAML_INPUT.out.assembly_id, + YAML_INPUT.out.reference + ) + ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) // // SUBWORKFLOW: Takes reference, channel of enzymes, my.genome, assembly_id and as file to generate // file with enzymatic digest sites. // ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) + + INSILICO_DIGEST ( YAML_INPUT.out.assembly_id, GENERATE_GENOME.out.dot_genome, GENERATE_GENOME.out.reference_tuple, ch_enzyme, - digest_asfile ) + digest_asfile + ) ch_versions = ch_versions.mix(INSILICO_DIGEST.out.versions) // @@ -119,30 +124,31 @@ workflow TREEVAL { YAML_INPUT.out.align_geneset, YAML_INPUT.out.align_common, YAML_INPUT.out.intron_size, - gene_alignment_asfiles, - YAML_INPUT.out.assembly_dbVer ) - + gene_alignment_asfiles + ) ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // REPEAT_DENSITY ( GENERATE_GENOME.out.reference_tuple, - GENERATE_GENOME.out.dot_genome ) - + GENERATE_GENOME.out.dot_genome + ) ch_versions = ch_versions.mix(REPEAT_DENSITY.out.versions) // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // - GAP_FINDER ( GENERATE_GENOME.out.reference_tuple ) - + GAP_FINDER ( GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.dot_genome + ) ch_versions = ch_versions.mix(GAP_FINDER.out.versions) // // SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as // file to generate a file containing sites of self-complementary sequnce. // + SELFCOMP ( GENERATE_GENOME.out.reference_tuple, GENERATE_GENOME.out.dot_genome, YAML_INPUT.out.mummer_chunk, @@ -154,9 +160,11 @@ workflow TREEVAL { // SUBWORKFLOW: Takes reference, the directory of syntenic genomes and order/clade of sequence // and generated a file of syntenic blocks. // + SYNTENY ( GENERATE_GENOME.out.reference_tuple, YAML_INPUT.out.synteny_path, - YAML_INPUT.out.assembly_classT) + YAML_INPUT.out.assembly_classT + ) ch_versions = ch_versions.mix(SYNTENY.out.versions) // @@ -165,7 +173,8 @@ workflow TREEVAL { LONGREAD_COVERAGE ( GENERATE_GENOME.out.reference_tuple, GENERATE_GENOME.out.dot_genome, YAML_INPUT.out.pacbio_reads, - YAML_INPUT.out.assembly_sizeClass ) + YAML_INPUT.out.assembly_sizeClass + ) ch_versions = ch_versions.mix(LONGREAD_COVERAGE.out.versions) // @@ -175,6 +184,60 @@ workflow TREEVAL { ch_versions.unique().collectFile(name: 'collated_versions.yml') ) } +// +// WORKFLOW: RAPID REQUIRED A SEVERELY TRUNCATED VERSION OF THE FULL WORKFLOW +// +workflow TREEVAL_RAPID { + take: + input_ch + + main: + ch_versions = Channel.empty() + + //input_ch = Channel.fromPath(params.input, checkIfExists: true) + // + // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field + // + YAML_INPUT ( input_ch ) + + // + // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file + // + GENERATE_GENOME ( YAML_INPUT.out.assembly_id, + YAML_INPUT.out.reference + ) + ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) + + // + // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS + // + GAP_FINDER ( GENERATE_GENOME.out.reference_tuple ) + ch_versions = ch_versions.mix(GAP_FINDER.out.versions) + +// TELO +// HIC + + // + // SUBWORKFLOW: Takes reference, pacbio reads + // + LONGREAD_COVERAGE ( GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.pacbio_reads, + YAML_INPUT.out.assembly_sizeClass + ) + ch_versions = ch_versions.mix(LONGREAD_COVERAGE.out.versions) + + // + // SUBWORKFLOW: Collates version data from prior subworflows + // + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + emit: + software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml + versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/workflows/treeval_rapid.nf b/workflows/treeval_rapid.nf new file mode 100644 index 00000000..553df1d5 --- /dev/null +++ b/workflows/treeval_rapid.nf @@ -0,0 +1,119 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowTreeval.initialise(params, log) + +// Check input path parameters to see if they exist +def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// SUBWORKFLOW TREEVAL_FULL: Consisting of a mix of local and nf-core/modules +// +include { YAML_INPUT } from '../subworkflows/local/yaml_input' +include { GENERATE_GENOME } from '../subworkflows/local/generate_genome' +include { INSILICO_DIGEST } from '../subworkflows/local/insilico_digest' +include { GENE_ALIGNMENT } from '../subworkflows/local/gene_alignment' +include { SELFCOMP } from '../subworkflows/local/selfcomp' +include { SYNTENY } from '../subworkflows/local/synteny' +include { REPEAT_DENSITY } from '../subworkflows/local/repeat_density' +include { GAP_FINDER } from '../subworkflows/local/gap_finder' +include { LONGREAD_COVERAGE } from '../subworkflows/local/longread_coverage' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow TREEVAL_RAPID { + + main: + ch_versions = Channel.empty() + + input_ch = Channel.fromPath(params.input, checkIfExists: true) + // + // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field + // + YAML_INPUT ( input_ch ) + + // + // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file + // + GENERATE_GENOME ( YAML_INPUT.out.assembly_id, + YAML_INPUT.out.reference + ) + ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) + + // + // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS + // + GAP_FINDER ( GENERATE_GENOME.out.reference_tuple ) + ch_versions = ch_versions.mix(GAP_FINDER.out.versions) + +// TELO +// HIC + + // + // SUBWORKFLOW: Takes reference, pacbio reads + // + LONGREAD_COVERAGE ( GENERATE_GENOME.out.reference_tuple, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.pacbio_reads, + YAML_INPUT.out.assembly_sizeClass + ) + ch_versions = ch_versions.mix(LONGREAD_COVERAGE.out.versions) + + // + // SUBWORKFLOW: Collates version data from prior subworflows + // + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + emit: + software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml + versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log) + } + NfcoreTemplate.summary(workflow, params, log) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ \ No newline at end of file