Skip to content

Commit

Permalink
Merge pull request #94 from sanger-tol/local_testing
Browse files Browse the repository at this point in the history
Local testing
  • Loading branch information
DLBPointon authored May 18, 2023
2 parents e047c5e + 3071064 commit 204d303
Show file tree
Hide file tree
Showing 32 changed files with 461 additions and 147 deletions.
31 changes: 31 additions & 0 deletions assets/local_testing/nxOsc-2023-05-02.dp.TEST.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
e769c449778489095a023896d05b87fa cds/CaenorhabditisElegans.WBcel235_cds.bigBed
29f4bb4aa841e754e6ad90a95c51a8ac cds/Gae_host.Gae_cds.bigBed
55e02bdabcbd4c03413d42026ac9e34a custom/software_versions.yml
d41d8cd98f00b204e9800998ecf8427e gap/Oscheius_DF5033_gaplen.bed
efa3906048c52a26a3b762142b138df2 gen/CaenorhabditisElegans.WBcel235_cdna.bigBed
6a1f75afdc99390c150a9abe204e856b generate/my.genome
ab841e49f59ff1dd51ed87191c2d7562 gen/Gae_host.Gae_cdna.bigBed
8b277d209db8bf97c46f118562c4b9b5 gen/OscheiusTipulae.ASM1342590v1_cdna.bigBed
1d1846bbab542500504b19bfc56cb9b2 insilico/BSPQI.bigBed
008e29071b2574e2ed50a2887f4a7fc5 insilico/BSSSI.bigBed
5f58843218b373c5addd38bc91e0d74d insilico/DLE1.bigBed
08d932ddcb01866d9cfa76dbcaf8c5f5 longread/Oscheius_DF5033.bigWig
36e4493afcd46a6c89d762fee08b2aa8 longread/Oscheius_DF5033_halfdepth.bed
7bd5f463e6cd75e876f648dce93411fc longread/Oscheius_DF5033_maxdepth.bed
82d251d88ee7d9bdbb29b68d3136b7ea longread/Oscheius_DF5033_zerodepth.bed
cf6a4dc883979ac9cafd75382aa16bdc pep/CaenorhabditisElegans.WBcel235_pep.gff.gz
84c1ad1989c7e9bcf13258b2774f4a25 pep/CaenorhabditisElegans.WBcel235_pep.gff.gz.tbi
c2cccc5ab38b0e6b4e12fea2c1151569 pep/Gae_host.Gae_pep.gff.gz
6a6522a6176761172a6313df9fc5b210 pep/Gae_host.Gae_pep.gff.gz.tbi
e012da1d0c2ea40171785ead8a294289 punchlist/CaenorhabditisElegans.WBcel235_cdna_punchlist.bed
d9da11fc3f6170a1c37c38765718ab47 punchlist/CaenorhabditisElegans.WBcel235_cds_punchlist.bed
31d4e0cec6ef4ec92d51336393a923be punchlist/CaenorhabditisElegans.WBcel235_rna_punchlist.bed
1ae4cbf700ff5b6d02c96631351f7eb8 punchlist/Gae_host.Gae_cdna_punchlist.bed
50f76662114c8a77e8604a5a539e1e9c punchlist/Gae_host.Gae_cds_punchlist.bed
c269f93c3a43697116b5aa75314e5e07 punchlist/Gae_host.Gae_rna_punchlist.bed
e5fed140728b0f0d088d983a34868d8d punchlist/OscheiusTipulae.ASM1342590v1_cdna_punchlist.bed
779ad07ceefaca4657090c9f0322ddfd repeat/Oscheius_DF5033.bigWig
9d2cca3997c9a60f66516af739eb3719 repeat/Oscheius_DF5033_renamed.bed
bb92039394cc0f2e9e6809e78be4bc9e rna/CaenorhabditisElegans.WBcel235_rna.bigBed
4254dcb32d0aed160e03d3f6c02cf636 rna/Gae_host.Gae_rna.bigBed
b2d9bea322639d2b0954a0ccc7eed800 selfcomp/Oscheius_DF5033_selfcomp.bigBed
27 changes: 27 additions & 0 deletions assets/local_testing/nxOscDF5033.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
assembly:
sizeClass: S # S if {genome => 4Gb} else L
level: scaffold
sample_id: Oscheius_DF5033
latin_name: to_provide_taxonomic_rank
classT: nematode
asmVersion: Oscheius_DF5033_1
dbVersion: "1"
gevalType: DTOL
reference_file: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta
assem_reads:
pacbio: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/
hic: path
supplementary: path
alignment:
data_dir: /lustre/scratch123/tol/resources/treeval/gene_alignment_data/
common_name: "" # For future implementation (adding bee, wasp, ant etc)
geneset: "OscheiusTipulae.ASM1342590v1,CaenorhabditisElegans.WBcel235,Gae_host.Gae"
#Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv"
self_comp:
motif_len: 0
mummer_chunk: 10
synteny:
synteny_genome_path: /lustre/scratch123/tol/resources/treeval/synteny/
outdir: "NEEDS TESTING"
intron:
size: "50k"
27 changes: 27 additions & 0 deletions assets/local_testing/nxOscSUBSET.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
assembly:
sizeClass: S # S if {genome => 4Gb} else L
level: scaffold
sample_id: OscheiusSUBSET
latin_name: to_provide_taxonomic_rank
classT: nematode
asmVersion: OscheiusSUBSET_1
dbVersion: "1"
gevalType: DTOL
reference_file: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_SUBSET/assembly/draft/SUBSET_genome/Oscheius_SUBSET.fasta
assem_reads:
pacbio: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_SUBSET/genomic_data/pacbio/
hic: path
supplementary: path
alignment:
data_dir: /lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_SUBSET/gene_set/
common_name: "" # For future implementation (adding bee, wasp, ant etc)
geneset: "Gae_host.Gae"
#Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv"
self_comp:
motif_len: 0
mummer_chunk: 4
synteny:
synteny_genome_path: /lustre/scratch123/tol/resources/treeval/synteny/
outdir: "NEEDS TESTING"
intron:
size: "50k"
3 changes: 3 additions & 0 deletions bin/cut_size.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

head -n 1 $1 | cut -f2
2 changes: 1 addition & 1 deletion bin/gff_to_bed.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ if [ $1 == '-v'];
then
echo "$version"
else
grep '##PAF' $1 | sed 's/##PAF\t//g'|awk 'BEGIN{FS="\t";}{a[$1]++;if(a[$1]==2)print v[$1] ORS $0;if(a[$1]>2)print;v[$1]=$0;}' | awk '$(NF+1) = ($10/$11)*100'|awk '$(NF+1) = ($10/($2*3))*100'|awk -vOFS='\t' '{print $6,$8,$9,$1,$2,$10,$(NF-1),$NF}' > $2.bed
grep '##PAF' $1 | sed 's/##PAF\t//g'|awk 'BEGIN{FS="\t";}{a[$1]++;if(a[$1]==2)print v[$1] ORS $0;if(a[$1]>2)print;v[$1]=$0;}' | awk '$(NF+1) = ($10/$11)*100'|awk '$(NF+1) = ($10/($2*3))*100'|awk -vOFS='\t' '{print $6,$8,$9,$1,$2,$10,$(NF-1),$NF}' > $2
fi
2 changes: 1 addition & 1 deletion bin/paf_to_bed12.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ if [ $1 == '-v'];
then
echo "$version"
else
cat $1 | awk 'BEGIN{FS="\t";}{a[$1]++;if(a[$1]==2)print v[$1] ORS $0;if(a[$1]>2)print;v[$1]=$0;}' | awk '$(NF+1) = ($10/$11)*100' | awk '$(NF+1) = ($10/$2)*100' | awk -vOFS='\t' '{print $6,$8,$9,$1,$2,$10,$(NF-1),$NF}' > $2_punchlist.bed
cat $1 | awk 'BEGIN{FS="\t";}{a[$1]++;if(a[$1]==2)print v[$1] ORS $0;if(a[$1]>2)print;v[$1]=$0;}' | awk '$(NF+1) = ($10/$11)*100' | awk '$(NF+1) = ($10/$2)*100' | awk -vOFS='\t' '{print $6,$8,$9,$1,$2,$10,$(NF-1),$NF}' > $2
fi
8 changes: 4 additions & 4 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ process {
maxErrors = '-1'

withName:SAMTOOLS_MERGE {
memory = { check_max( 50.GB * task.attempt, 'memory') }
memory = { check_max( 50.GB * task.attempt, 'memory' ) }
}

// RESOURCES: MEMORY INTENSIVE STEPS, SOFTWARE TO BE UPDATED TO COMBAT THIS
withName: '.*:.*:SELFCOMP:(SELFCOMP_ALIGNMENTBLOCKS|SELFCOMP_MAPIDS|SELFCOMP_MUMMER2BED|SELFCOMP_SPLITFASTA|BEDTOOLS_MERGE)' {
cpus = { check_max( 10 * task.attempt, 'cpus' ) }
memory = { check_max( 100.GB * task.attempt, 'memory') }
time = { check_max( 8.h * task.attempt, 'time' ) }
cpus = { check_max( 10 * task.attempt, 'cpus' ) }
memory = { check_max( 120.GB * task.attempt, 'memory' ) }
time = { check_max( 12.h * task.attempt, 'time' ) }
}

// RESOURCES: CHANGES TO FREQUENT FAILURES BELOW THIS MEM POINT
Expand Down
22 changes: 17 additions & 5 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ process {
}

withName: '.*:.*:.*:(GEN_ALIGNMENTS|RNA_ALIGNMENTS|CDS_ALIGNMENTS):UCSC_BEDTOBIGBED' {
ext.prefix = { "${meta.id}_${meta.type}" }
ext.prefix = { "${meta.id}_${meta.type}" }
}

withName: '.*:.*:.*:PEP_ALIGNMENTS:BEDTOOLS_SORT' {
ext.prefix = { "${meta.id}_prot" }
ext.prefix = { "${meta.id}_prot" }
}

withName: '.*:.*:INSILICO_DIGEST:UCSC_BEDTOBIGBED' {
Expand All @@ -95,6 +95,14 @@ process {
ext.prefix = { "${meta.id}_selfcomp" }
}

withName: '.*:.*:REPEAT_DENSITY:UCSC_BEDGRAPHTOBIGWIG' {
ext.prefix = { "${meta.id}_repeat_density" }
}

withName: '.*:.*:GAP_FINDER:TABIX_BGZIPTABIX' {
ext.prefix = { "gap_${meta.id}" }
}

withName: '.*:.*:SYNTENY:MINIMAP2_ALIGN' {
ext.args = '-t 8 -x asm10'
ext.prefix = { "${meta.id}_synteny_${reference.getName().tokenize('.')[0]}" }
Expand Down Expand Up @@ -130,16 +138,20 @@ process {

withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX' {
ext.args = "-d 50"
ext.prefix = { "${meta.id}_maxdepth" }
ext.prefix = { "maxdepth" }
}

withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN' {
ext.args = "-d 50"
ext.prefix = { "${meta.id}_zerodepth" }
ext.prefix = { "zerodepth" }
}

withName: '.*:.*:LONGREAD_COVERAGE:GNU_SORT' {
ext.args = "-k1,1 -k2,2n"
ext.args = "-k1,1 -k2,2n"
ext.prefix = { "${meta.id}_sorted" }
}

withName: '.*:.*:LONGREAD_COVERAGE:UCSC_BEDGRAPHTOBIGWIG' {
ext.prefix = 'coverage'
}
}
23 changes: 12 additions & 11 deletions conf/test.config
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running full-size tests
Nextflow config file for running representative-size tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a full size pipeline test.
Defines input files and everything required to run a representative size pipeline test.
Use as follows:
nextflow run nf-core/treeval -profile test_full,<docker/singularity> --outdir <OUTDIR>
nextflow run sanger-tol/treeval -profile test,singularity -entry FULL
On LSF / tol farm:
bsub -Is -tty -e error -o out -n 2 -q oversubscribed -M4000 -R'select[mem>4000] rusage[mem=4000] span[hosts=1]' 'nextflow run main.nf -profile test,singularity,sanger'
----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Full test profile'
config_profile_description = 'Full test dataset to check pipeline function'
config_profile_name = 'Test profile'
config_profile_description = 'Minimal local test dataset to check pipeline function'

// Input data for full size test
// TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Genome references
genome = 'R64-1-1'
input = 'assets/local_testing/nxOscSUBSET.yaml'
}
17 changes: 7 additions & 10 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,17 @@
Defines input files and everything required to run a full size pipeline test.
Use as follows:
nextflow run nf-core/treeval -profile test_full,<docker/singularity> --outdir <OUTDIR>
nextflow run sanger-tol/treeval -profile test_full,singularity,sanger
On LSF / tol farm:
bsub -Is -tty -e error -o out -n 2 -q oversubscribed -M4000 -R'select[mem>4000] rusage[mem=4000] span[hosts=1]' 'nextflow run main.nf -profile test_full,singularity,sanger'
----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Full test profile'
config_profile_description = 'Full test dataset to check pipeline function'

// Input data for full size test
// TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
config_profile_name = 'Full local test profile'
config_profile_description = 'Full test dataset to check pipeline function, using a current full local dataset'

// Genome references
genome = 'R64-1-1'
input = 'assets/local_testing/nxOscDF5033.yaml'
}
19 changes: 0 additions & 19 deletions conf/test_genealignment.config

This file was deleted.

17 changes: 0 additions & 17 deletions conf/test_selfcomp.config

This file was deleted.

19 changes: 0 additions & 19 deletions conf/test_synteny.config

This file was deleted.

2 changes: 1 addition & 1 deletion docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
<details markdown="1">
<summary>Output files</summary>

- `pipeline_info/`
- `treeval_info/`
- Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`.
- Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline.
- Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
Expand Down
2 changes: 1 addition & 1 deletion lib/NfcoreTemplate.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class NfcoreTemplate {
}

// Write summary e-mail HTML to a file
def output_d = new File("${params.outdir}/pipeline_info/")
def output_d = new File("${params.outdir}/treeval_info/")
if (!output_d.exists()) {
output_d.mkdirs()
}
Expand Down
13 changes: 11 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,18 @@ WorkflowMain.initialise(workflow, params, log)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { TREEVAL } from './workflows/treeval'
include { TREEVAL } from './workflows/treeval'
include { TREEVAL_RAPID } from './workflows/treeval_rapid'

// WORKFLOW: Run main nf-core/treeval analysis pipeline
workflow NFCORE_TREEVAL {
TREEVAL ()
}

workflow NFCORE_TREEVAL_RAPID {
TREEVAL_RAPID ()
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN ALL WORKFLOWS
Expand All @@ -49,10 +54,14 @@ workflow NFCORE_TREEVAL {
// WORKFLOW: Execute a single named workflow for the pipeline
// See: https://github.com/nf-core/rnaseq/issues/619
//
workflow {
workflow FULL {
NFCORE_TREEVAL ()
}

workflow RAPID {
NFCORE_TREEVAL_RAPID ()
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
Expand Down
Loading

0 comments on commit 204d303

Please # to comment.