diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c337315 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +*.code-workspace +*.pyc +*.pyo +.*.sw? +.DS_Store +.nextflow* +.nf-test.log +.nf-test/ +.screenrc +__pycache__ +output/ +results*/ +test.xml +test_output/ +tests/data/ +work/ +.github/CODEOWNERS-tmp +bin/python_modules/__init__.pyc +bin/python_modules/Options.pyc +bin/python_modules/Tabfile.pyc \ No newline at end of file diff --git a/conf/dkfz_cluster_hg37.config b/conf/dkfz_cluster_hg37.config index ed64735..7b00834 100644 --- a/conf/dkfz_cluster_hg37.config +++ b/conf/dkfz_cluster_hg37.config @@ -57,7 +57,6 @@ params { mapability_file = "${params.data_path}/databases/UCSC/wgEncodeCrgMapabilityAlign100mer_chr.bedGraph.gz" replication_time_file = "${params.data_path}/databases/ENCODE/ReplicationTime_10cellines_mean_10KB.Rda" gc_content_file = "${params.data_path}/stats/hg19_GRch37_100genomes_gc_content_10kb.txt" - gene_annotation_file = "${params.data_path}/tools_data/ACEseq/INFORM_druggable_genes.csv" // get breakpoints/ PSCBS gaps centromer_file = "${params.data_path}/stats/hg19_gaps.txt" diff --git a/conf/dkfz_cluster_hg38.config b/conf/dkfz_cluster_hg38.config index 238e323..73552d3 100644 --- a/conf/dkfz_cluster_hg38.config +++ b/conf/dkfz_cluster_hg38.config @@ -58,7 +58,6 @@ params { mapability_file = "${params.data_path}/stats/mappability_m2e2/GRCh38_Mappability_Align_100mer_m2e2.ALT_HLA.bedGraph.gz" replication_time_file = "${params.data_path}/databases/ENCODE/RepliSeq/time_mean_10KB.Rda" gc_content_file = "${params.data_path}/stats/gc_content_hg38.txt" - gene_annotation_file = "${params.data_path}/tools_data/ACEseq/INFORM_druggable_genes.hg38.tsv" // get breakpoints/ PSCBS gaps centromer_file = "assets/hg38/gap_with_centromeres.header.txt" diff --git a/conf/modules.config b/conf/modules.config index 44c460f..3052f6b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -158,7 +158,7 @@ process { mode: params.publish_dir_mode ] } - withName: 'ESTIMATE_HDRSCORE' { + withName: 'ESTIMATE_HRDSCORE' { publishDir = [ path: {"${params.outdir}/${meta.id}"}, pattern: "*{.txt}", diff --git a/conf/test.config b/conf/test.config index 9018cfb..431bbfc 100644 --- a/conf/test.config +++ b/conf/test.config @@ -64,7 +64,6 @@ params { mapability_file = "${projectDir}/testdata/annotations/mappability5.test.bed.gz" replication_time_file = "${projectDir}/testdata/annotations/ReplicationTime_10cellines_mean_10KB.Rda" gc_content_file = "assets/hg19/hg19_GRch37_100genomes_gc_content_10kb.txt" - gene_annotation_file = "${projectDir}/testdata/annotations/druggable_genes.csv" // get breakpoints/ PSCBS gaps centromer_file = "assets/hg19/hg19_gaps.txt" diff --git a/conf/test_full.config b/conf/test_full.config index fe5265f..a4fdea6 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -58,7 +58,6 @@ params { mapability_file = "${params.data_path}/databases/UCSC/wgEncodeCrgMapabilityAlign100mer_chr.bedGraph.gz" replication_time_file = "${params.data_path}/databases/ENCODE/ReplicationTime_10cellines_mean_10KB.Rda" gc_content_file = "${params.data_path}/stats/hg19_GRch37_100genomes_gc_content_10kb.txt" - gene_annotation_file = "${params.data_path}/tools_data/ACEseq/INFORM_druggable_genes.csv" // get breakpoints/ PSCBS gaps centromer_file = "${params.data_path}/stats/hg19_gaps.txt" diff --git a/conf/test_hg37.config b/conf/test_hg37.config index 94d511b..62a4290 100644 --- a/conf/test_hg37.config +++ b/conf/test_hg37.config @@ -63,7 +63,6 @@ params { mapability_file = "${projectDir}/testdata/annotations/mappability5.test.bed.gz" replication_time_file = "${projectDir}/testdata/annotations/ReplicationTime_10cellines_mean_10KB.Rda" gc_content_file = "assets/hg19/hg19_GRch37_100genomes_gc_content_10kb.txt" - gene_annotation_file = "${projectDir}/testdata/annotations/druggable_genes.csv" // get breakpoints/ PSCBS gaps centromer_file = "assets/hg19/hg19_gaps.txt" diff --git a/conf/test_nocontrol_hg37.config b/conf/test_nocontrol_hg37.config index 4680902..75b2267 100644 --- a/conf/test_nocontrol_hg37.config +++ b/conf/test_nocontrol_hg37.config @@ -57,7 +57,6 @@ params { mapability_file = "${params.data_path}/databases/UCSC/wgEncodeCrgMapabilityAlign100mer_chr.bedGraph.gz" replication_time_file = "${params.data_path}/databases/ENCODE/ReplicationTime_10cellines_mean_10KB.Rda" gc_content_file = "${params.data_path}/stats/hg19_GRch37_100genomes_gc_content_10kb.txt" - gene_annotation_file = "${params.data_path}/tools_data/ACEseq/INFORM_druggable_genes.csv" // get breakpoints/ PSCBS gaps centromer_file = "${params.data_path}/stats/hg19_gaps.txt" diff --git a/modules/local/estimate_hdrscore.nf b/modules/local/estimate_hrdscore.nf similarity index 91% rename from modules/local/estimate_hdrscore.nf rename to modules/local/estimate_hrdscore.nf index 2fd280f..93d357e 100644 --- a/modules/local/estimate_hdrscore.nf +++ b/modules/local/estimate_hrdscore.nf @@ -1,4 +1,4 @@ -process ESTIMATE_HDRSCORE { +process ESTIMATE_HRDSCORE { tag "$meta.id" label 'process_single' @@ -11,7 +11,6 @@ process ESTIMATE_HDRSCORE { each path(blacklist) each path(centromers) each path(cytobands) - val(chr_prefix) output: tuple val(meta), path("*.txt") , emit: txt @@ -22,7 +21,6 @@ process ESTIMATE_HDRSCORE { script: def prefix = task.ext.prefix ?: "${meta.id}" - def chrprefix = chr_prefix == "chr" ? "chr": "no" """ estimateHRDScore.sh \\ diff --git a/modules/local/generate_plots.nf b/modules/local/generate_plots.nf index 651b475..e1cf0f1 100644 --- a/modules/local/generate_plots.nf +++ b/modules/local/generate_plots.nf @@ -13,7 +13,7 @@ process GENERATE_PLOTS { output: path('*.png') - tuple val(meta), path('*.txt') , emit: hdr_estimate_files + tuple val(meta), path('*.txt') , emit: hrd_estimate_files tuple val(meta), path("*_cnv_parameter_*.txt") , emit: cnv_params path "versions.yml" , emit: versions diff --git a/subworkflows/local/hdr_estimation.nf b/subworkflows/local/hrd_estimation.nf similarity index 63% rename from subworkflows/local/hdr_estimation.nf rename to subworkflows/local/hrd_estimation.nf index 6edc0ee..79e48d8 100644 --- a/subworkflows/local/hdr_estimation.nf +++ b/subworkflows/local/hrd_estimation.nf @@ -3,9 +3,9 @@ // params.options = [:] -include { ESTIMATE_HDRSCORE } from '../../modules/local/estimate_hdrscore.nf' addParams( options: params.options ) +include { ESTIMATE_HRDSCORE } from '../../modules/local/estimate_hrdscore.nf' addParams( options: params.options ) -workflow HDR_ESTIMATION { +workflow HRD_ESTIMATION { take: json_report // channel: [val(meta), path(.json)] hdr_files // channel: [val(meta), [path(.txt), path(.txt)..]] @@ -13,25 +13,23 @@ workflow HDR_ESTIMATION { sexfile // channel: [val(meta), path(sexfile.txt)] centromers // channel: [centromers.txt] cytobands // channel: [cytobands.txt] - chrprefix // channel: [chromosome prefix value] main: versions = Channel.empty() // - // MODULE:ESTIMATE_HDRSCORE + // MODULE:ESTIMATE_HRDSCORE // // RUN parseJson.py input_ch = json_report.join(hdr_files) - ESTIMATE_HDRSCORE( + ESTIMATE_HRDSCORE( input_ch.join(sexfile), blacklist, centromers, - cytobands, - chrprefix + cytobands ) - versions = versions.mix(ESTIMATE_HDRSCORE.out.versions) + versions = versions.mix(ESTIMATE_HRDSCORE.out.versions) emit: versions diff --git a/subworkflows/local/purity_evaluation.nf b/subworkflows/local/purity_evaluation.nf index 31a4379..bbdf88b 100644 --- a/subworkflows/local/purity_evaluation.nf +++ b/subworkflows/local/purity_evaluation.nf @@ -69,7 +69,7 @@ workflow PURITY_EVALUATION { ch_input, chrlength ) - hdr_files = GENERATE_PLOTS.out.hdr_estimate_files + hrd_files = GENERATE_PLOTS.out.hrd_estimate_files versions = versions.mix(GENERATE_PLOTS.out.versions) // @@ -83,6 +83,6 @@ workflow PURITY_EVALUATION { emit: json_report - hdr_files + hrd_files versions } diff --git a/workflows/aceseq.nf b/workflows/aceseq.nf index 3b46c60..beb1251 100644 --- a/workflows/aceseq.nf +++ b/workflows/aceseq.nf @@ -77,7 +77,7 @@ include { SNV_CALLING } from '../subworkflows/local/snv_calling' include { PREPROCESSING } from '../subworkflows/local/preprocessing' include { SEGMENTATION } from '../subworkflows/local/segmentation' include { PURITY_EVALUATION } from '../subworkflows/local/purity_evaluation' -include { HDR_ESTIMATION } from '../subworkflows/local/hdr_estimation' +include { HRD_ESTIMATION } from '../subworkflows/local/hrd_estimation' include { PHASING_X } from '../subworkflows/local/phasing_x' include { PHASING_Y } from '../subworkflows/local/phasing_y' @@ -261,18 +261,18 @@ workflow ACESEQ { ch_versions = ch_versions.mix(PURITY_EVALUATION.out.versions) // - // SUBWORKFLOW: HDR_ESTIMATION: + // SUBWORKFLOW: HRD_ESTIMATION: // - HDR_ESTIMATION( + HRD_ESTIMATION( PURITY_EVALUATION.out.json_report, - PURITY_EVALUATION.out.hdr_files, + PURITY_EVALUATION.out.hrd_files, blacklist, SNV_CALLING.out.ch_sex, centromers, - cytobands, - chrprefix + cytobands ) + ch_versions = ch_versions.mix(HRD_ESTIMATION.out.versions) } else{ println "Only quality check is performed since runQualityCheckOnly is set to ${params.runQualityCheckOnly}"