From 136b5bc85d79616bf3fcf55e316d7014f8cf73ee Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 17 Apr 2024 21:42:53 -0400 Subject: [PATCH 1/4] replace local getbasecountsmultisample with msk gbcms module --- conf/modules.config | 4 +- modules.json | 11 ++++ .../container/Dockerfile | 43 ------------- .../local/getbasecountsmultisample/main.nf | 54 ---------------- modules/msk/gbcms/environment.yml | 9 +++ modules/msk/gbcms/main.nf | 63 +++++++++++++++++++ modules/msk/gbcms/meta.yml | 59 +++++++++++++++++ modules/msk/gbcms/tests/main.nf.test | 41 ++++++++++++ modules/msk/gbcms/tests/main.nf.test.snap | 45 +++++++++++++ modules/msk/gbcms/tests/nextflow.config | 12 ++++ modules/msk/gbcms/tests/tags.yml | 2 + subworkflows/local/fillout.nf | 19 +++--- 12 files changed, 255 insertions(+), 107 deletions(-) delete mode 100755 modules/local/getbasecountsmultisample/container/Dockerfile delete mode 100644 modules/local/getbasecountsmultisample/main.nf create mode 100644 modules/msk/gbcms/environment.yml create mode 100644 modules/msk/gbcms/main.nf create mode 100644 modules/msk/gbcms/meta.yml create mode 100644 modules/msk/gbcms/tests/main.nf.test create mode 100644 modules/msk/gbcms/tests/main.nf.test.snap create mode 100644 modules/msk/gbcms/tests/nextflow.config create mode 100644 modules/msk/gbcms/tests/tags.yml diff --git a/conf/modules.config b/conf/modules.config index 4a9305c..00c113c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -474,8 +474,8 @@ process { ] } - withName: GETBASECOUNTSMULTISAMPLE { - ext.args = "--omaf --maq 20 --baq 20 --filter_improper_pair 0" + withName: GBCMS { + ext.args = { "--omaf --maq 20 --baq 20 --filter_improper_pair 0 --thread ${task.cpus}" } publishDir = [ enabled:false ] diff --git a/modules.json b/modules.json index 358c3c7..3dfd8ba 100644 --- a/modules.json +++ b/modules.json @@ -2,6 +2,17 @@ "name": "mskcc/forte", "homePage": "https://github.com/mskcc/forte", "repos": { + "git@github.com:mskcc-omics-workflows/modules.git": { + "modules": { + "msk": { + "gbcms": { + "branch": "main", + "git_sha": "89a863f0533911134095323ac28ca44d3e3d303d", + "installed_by": ["modules"] + } + } + } + }, "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { diff --git a/modules/local/getbasecountsmultisample/container/Dockerfile b/modules/local/getbasecountsmultisample/container/Dockerfile deleted file mode 100755 index c104f4f..0000000 --- a/modules/local/getbasecountsmultisample/container/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -FROM ubuntu:14.04 - -LABEL maintainer="Anne Marie (noronhaa@mskcc.org)" \ - version.image="1.2.4" \ - source.getBaseCountsMultiSample="https://github.com/zengzheng123/GetBaseCountsMultiSample/releases/tag/v1.2.2" \ - version.getBaseCountsMultiSample="1.2.4" -ENV GBCMS_VERSION 1.2.4 - -RUN apt-get update && \ - apt-get install --yes \ - procps \ - gcc \ - make \ - cmake \ - zlib1g-dev \ - libbz2-dev \ - liblzma-dev \ - libssl-dev \ - libcurl4-openssl-dev \ - g++ \ - git \ - wget \ - zip \ - python-pip && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -RUN cd /tmp && \ - wget https://github.com/zengzheng123/GetBaseCountsMultiSample/archive/v${GBCMS_VERSION}.zip && \ - unzip v${GBCMS_VERSION}.zip && \ - # Install bamtools - cd /tmp/GetBaseCountsMultiSample-${GBCMS_VERSION}/bamtools-master && \ - rm -r build/ && \ - mkdir build && \ - cd build/ && \ - cmake -DCMAKE_CXX_FLAGS=-std=c++03 .. && \ - make && \ - make install && \ - cp ../lib/libbamtools.so.2.3.0 /usr/lib/ && \ - # Install GetBaseCountsMultiSample itself - cd /tmp/GetBaseCountsMultiSample-${GBCMS_VERSION} && \ - make && \ - cp GetBaseCountsMultiSample /usr/bin/ diff --git a/modules/local/getbasecountsmultisample/main.nf b/modules/local/getbasecountsmultisample/main.nf deleted file mode 100644 index 64bdd36..0000000 --- a/modules/local/getbasecountsmultisample/main.nf +++ /dev/null @@ -1,54 +0,0 @@ -process GETBASECOUNTSMULTISAMPLE { - tag "$meta.id" - label 'process_single' - - conda "bioconda::bamtools=2.5.2" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://cmopipeline/getbasecountsmultisample:1.2.4' : - 'docker://cmopipeline/getbasecountsmultisample:1.2.4' }" - - input: - tuple val(meta), path(bam), path(bai), path(variants) - path(fasta) - path(fai) - - output: - tuple val(meta), path("*.gbcms.maf"), optional: true, emit: maf - tuple val(meta), path("*.gbcms.vcf"), optional: true, emit: vcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def extension = args.contains("--omaf") ? "maf" : "vcf" - def variant_input = variants.getExtension() == "maf" ? "--maf ${variants}" : "--vcf ${variants}" - """ - GetBaseCountsMultiSample \\ - --thread ${task.cpus} \\ - --fasta ${fasta} \\ - ${variant_input} \\ - --bam ${prefix}:${bam} \\ - --output ${variants.getBaseName()}.gbcms.${extension} \\ - ${args} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - GetBaseCountsMultiSample: \$(GetBaseCountsMultiSample -h | grep ^GetBaseCountsMultiSample | cut -f 2 -d" " ) - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.gbcms.maf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - GetBaseCountsMultiSample: \$(GetBaseCountsMultiSample -h | grep ^GetBaseCountsMultiSample | cut -f 2 -d" " ) - END_VERSIONS - """ -} diff --git a/modules/msk/gbcms/environment.yml b/modules/msk/gbcms/environment.yml new file mode 100644 index 0000000..8d01ad7 --- /dev/null +++ b/modules/msk/gbcms/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "gbcms" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "YOUR-TOOL-HERE" diff --git a/modules/msk/gbcms/main.nf b/modules/msk/gbcms/main.nf new file mode 100644 index 0000000..7a01062 --- /dev/null +++ b/modules/msk/gbcms/main.nf @@ -0,0 +1,63 @@ +process GBCMS { + tag "$meta.id" + label 'process_single' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'ghcr.io/msk-access/gbcms:1.2.5': + 'ghcr.io/msk-access/gbcms:1.2.5' }" + + input: + tuple val(meta), path(bam), path(bambai), path(variant_file), val(output) + path(fasta) + path(fastafai) + + output: + tuple val(meta), path('*.{vcf,maf}') , emit: variant_file + tuple val(meta), path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + script: + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GetBaseCountsMultiSample module does not support Conda. Please use Docker / Singularity instead." + } + def args = task.ext.args ?: '' + def sample = meta.sample + // determine if input file is a maf of vcf + + def input_ext = variant_file.getExtension() + def variant_input = '' + + if(input_ext == 'maf') { + variant_input = '--maf ' + variant_file + } + if(input_ext == 'vcf'){ + variant_input = '--vcf ' + variant_file + } + if(variant_input == ''){ + throw new Exception("Variant file must be maf or vcf, not ${input_ext}") + } + + """ + GetBaseCountsMultiSample --fasta ${fasta} \\ + ${variant_input} \\ + --output ${output} \\ + --bam $sample:${bam} $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + GetBaseCountsMultiSample: \$(echo \$(GetBaseCountsMultiSample --help) | grep -oP '[0-9]\\.[0-9]\\.[0-9]') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + + touch variant_file.maf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + GetBaseCountsMultiSample: 1.2.5 + END_VERSIONS + """ +} diff --git a/modules/msk/gbcms/meta.yml b/modules/msk/gbcms/meta.yml new file mode 100644 index 0000000..848f729 --- /dev/null +++ b/modules/msk/gbcms/meta.yml @@ -0,0 +1,59 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "gbcms" +description: This module wraps GetBaseCountsMultiSample, which calculates the base counts in multiple BAM files for all the sites in a given VCF file or MAF file +keywords: + - basecount + - bams + - vcf +tools: + - "gbcms": + description: "Calculate the base counts in multiple BAM files for all the sites in a given VCF file or MAF file" + homepage: "https://github.com/msk-access/GetBaseCountsMultiSample" + documentation: "https://github.com/msk-access/GetBaseCountsMultiSample/blob/master/README.md" + +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + + - fasta: + type: file + description: Input reference sequence file + pattern: "*.fasta" + + - fastafai: + type: file + description: Index of the reference Fasta + pattern: "*.fai" + - bam: + type: file + description: Input bam file, in the format of SAMPLE_NAME:BAM_FILE. This paramter need to be specified at least once + pattern: "*.bam" + - bambai: + type: file + description: Index of Bam + pattern: "*.bai" + - variant_file: + type: file + description: Input variant file in TCGA maf format. --maf or --vcf need to be specified at least once. But --maf and --vcf are mutually exclusive + pattern: "*.{maf,vcf}" + - output: + type: string + description: Output file + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - variant_file: + type: file + description: base counts in multiple BAM files for all the sites in a given VCF file or MAF file + pattern: "*.{vcf,maf}" + +authors: + - "@buehlere" diff --git a/modules/msk/gbcms/tests/main.nf.test b/modules/msk/gbcms/tests/main.nf.test new file mode 100644 index 0000000..ec1c85b --- /dev/null +++ b/modules/msk/gbcms/tests/main.nf.test @@ -0,0 +1,41 @@ +// nf-core modules test gbcms +nextflow_process { + + name "Test Process GBCMS" + script "../main.nf" + process "GBCMS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "modules_msk" + tag "gbcms" + + test("sarscov2 illumina paired-end [fastq]") { + + when { + process { + """ + input[0] = [ + [ id:'test', sample:'197' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + "variant_file.vcf" + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/msk/gbcms/tests/main.nf.test.snap b/modules/msk/gbcms/tests/main.nf.test.snap new file mode 100644 index 0000000..9b02782 --- /dev/null +++ b/modules/msk/gbcms/tests/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "sarscov2 illumina paired-end [fastq]": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "sample": "197" + }, + "variant_file.vcf:md5,28c8df33c7ea5ed5d1cf9997d8e00ffa" + ] + ], + "1": [ + [ + { + "id": "test", + "sample": "197" + }, + "versions.yml:md5,0ca451c1c6505a0a72541b6cc29e2bc0" + ] + ], + "variant_file": [ + [ + { + "id": "test", + "sample": "197" + }, + "variant_file.vcf:md5,28c8df33c7ea5ed5d1cf9997d8e00ffa" + ] + ], + "versions": [ + [ + { + "id": "test", + "sample": "197" + }, + "versions.yml:md5,0ca451c1c6505a0a72541b6cc29e2bc0" + ] + ] + } + ], + "timestamp": "2023-12-13T15:05:34.981489" + } +} \ No newline at end of file diff --git a/modules/msk/gbcms/tests/nextflow.config b/modules/msk/gbcms/tests/nextflow.config new file mode 100644 index 0000000..178a90d --- /dev/null +++ b/modules/msk/gbcms/tests/nextflow.config @@ -0,0 +1,12 @@ +params { + enable_conda = false +} + +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: "GBCMS" { + ext.args = '' + } + +} diff --git a/modules/msk/gbcms/tests/tags.yml b/modules/msk/gbcms/tests/tags.yml new file mode 100644 index 0000000..2c24c75 --- /dev/null +++ b/modules/msk/gbcms/tests/tags.yml @@ -0,0 +1,2 @@ +gbcms: + - "modules/msk/gbcms/**" diff --git a/subworkflows/local/fillout.nf b/subworkflows/local/fillout.nf index d1d1fb2..9453446 100755 --- a/subworkflows/local/fillout.nf +++ b/subworkflows/local/fillout.nf @@ -1,7 +1,6 @@ - -include { GETBASECOUNTSMULTISAMPLE } from '../../modules/local/getbasecountsmultisample/main' -include { COMBINE_FILLOUTS } from '../../modules/local/combine_fillout_maf/main' -include { MAF_REFORMAT } from '../../modules/local/reformat_fillout_maf/main' +include { GBCMS } from '../../modules/msk/gbcms/main' +include { COMBINE_FILLOUTS } from '../../modules/local/combine_fillout_maf/main' +include { MAF_REFORMAT } from '../../modules/local/reformat_fillout_maf/main' workflow FILLOUT { @@ -26,17 +25,21 @@ workflow FILLOUT { MAF_REFORMAT(maf_ch) ch_versions = ch_versions.mix(MAF_REFORMAT.out.versions.first()) - GETBASECOUNTSMULTISAMPLE( + GBCMS( bam .combine(bai,by:[0]) - .combine(MAF_REFORMAT.out.maf,by:[0]), + .combine(MAF_REFORMAT.out.maf,by:[0]) + .map{ meta, bam, bai, variants -> + [ meta, bam, bai, variants, "${variants.getBaseName()}.gbcms.maf"] + }, fasta, fai + ) - ch_versions = ch_versions.mix(GETBASECOUNTSMULTISAMPLE.out.versions.first()) + ch_versions = ch_versions.mix(GBCMS.out.versions.first()) COMBINE_FILLOUTS( - GETBASECOUNTSMULTISAMPLE.out.maf + GBCMS.out.variant_file .combine(maf_ch,by:[0]) ) ch_versions = ch_versions.mix(COMBINE_FILLOUTS.out.versions.first()) From 1bc87aaa069bde3c66a387ac2b0f1a20cbc97802 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Wed, 17 Apr 2024 21:43:36 -0400 Subject: [PATCH 2/4] remove singularity profile from static pytest command --- tests/test_fillout/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_fillout/test.yml b/tests/test_fillout/test.yml index 332bf8f..146e896 100755 --- a/tests/test_fillout/test.yml +++ b/tests/test_fillout/test.yml @@ -1,5 +1,5 @@ - name: run_test_fillouts - command: nextflow run ./tests/test_fillout/main.nf -profile test,singularity -entry test_rna_fillout -c ./tests/nextflow.config -c ./tests/test_fillout/nextflow.config + command: nextflow run ./tests/test_fillout/main.nf -profile test -entry test_rna_fillout -c ./tests/nextflow.config -c ./tests/test_fillout/nextflow.config tags: - test_fillouts exit_code: 0 From 34e77fe950d911607a5469db9b07e0523172a43d Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 30 Apr 2024 12:11:55 -0400 Subject: [PATCH 3/4] update gbcms module --- modules.json | 4 ++-- modules/msk/gbcms/main.nf | 20 ++++++++++---------- modules/msk/gbcms/tests/main.nf.test.snap | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/modules.json b/modules.json index 3dfd8ba..76ac589 100644 --- a/modules.json +++ b/modules.json @@ -6,8 +6,8 @@ "modules": { "msk": { "gbcms": { - "branch": "main", - "git_sha": "89a863f0533911134095323ac28ca44d3e3d303d", + "branch": "develop", + "git_sha": "e70681357ccd39e7a10d328ca696c074750d8163", "installed_by": ["modules"] } } diff --git a/modules/msk/gbcms/main.nf b/modules/msk/gbcms/main.nf index 7a01062..5a0ebbd 100644 --- a/modules/msk/gbcms/main.nf +++ b/modules/msk/gbcms/main.nf @@ -4,15 +4,15 @@ process GBCMS { container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'ghcr.io/msk-access/gbcms:1.2.5': 'ghcr.io/msk-access/gbcms:1.2.5' }" - + input: tuple val(meta), path(bam), path(bambai), path(variant_file), val(output) - path(fasta) + path(fasta) path(fastafai) output: - tuple val(meta), path('*.{vcf,maf}') , emit: variant_file - tuple val(meta), path("versions.yml") , emit: versions + tuple val(meta), path('*.{vcf,maf}') , emit: variant_file + tuple val(meta), path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when @@ -22,14 +22,14 @@ process GBCMS { } def args = task.ext.args ?: '' def sample = meta.sample - // determine if input file is a maf of vcf + // determine if input file is a maf of vcf def input_ext = variant_file.getExtension() def variant_input = '' - + if(input_ext == 'maf') { variant_input = '--maf ' + variant_file - } + } if(input_ext == 'vcf'){ variant_input = '--vcf ' + variant_file } @@ -42,9 +42,9 @@ process GBCMS { ${variant_input} \\ --output ${output} \\ --bam $sample:${bam} $args - + cat <<-END_VERSIONS > versions.yml - "${task.process}": + "${task.process}": GetBaseCountsMultiSample: \$(echo \$(GetBaseCountsMultiSample --help) | grep -oP '[0-9]\\.[0-9]\\.[0-9]') END_VERSIONS """ @@ -56,7 +56,7 @@ process GBCMS { touch variant_file.maf cat <<-END_VERSIONS > versions.yml - "${task.process}": + "${task.process}": GetBaseCountsMultiSample: 1.2.5 END_VERSIONS """ diff --git a/modules/msk/gbcms/tests/main.nf.test.snap b/modules/msk/gbcms/tests/main.nf.test.snap index 9b02782..cd3ce8a 100644 --- a/modules/msk/gbcms/tests/main.nf.test.snap +++ b/modules/msk/gbcms/tests/main.nf.test.snap @@ -17,7 +17,7 @@ "id": "test", "sample": "197" }, - "versions.yml:md5,0ca451c1c6505a0a72541b6cc29e2bc0" + "versions.yml:md5,a94265ed3bc4b5631d85b9b9b5d2b7e5" ] ], "variant_file": [ @@ -35,11 +35,11 @@ "id": "test", "sample": "197" }, - "versions.yml:md5,0ca451c1c6505a0a72541b6cc29e2bc0" + "versions.yml:md5,a94265ed3bc4b5631d85b9b9b5d2b7e5" ] ] } ], "timestamp": "2023-12-13T15:05:34.981489" } -} \ No newline at end of file +} From e826931ad188b33e58febc2a8539bd6757e20399 Mon Sep 17 00:00:00 2001 From: Anne Marie Noronha Date: Tue, 30 Apr 2024 12:26:56 -0400 Subject: [PATCH 4/4] filter msk modules from editorconfig linting --- .editorconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.editorconfig b/.editorconfig index b6b3190..6c1f76c 100644 --- a/.editorconfig +++ b/.editorconfig @@ -12,7 +12,7 @@ indent_style = space indent_size = 2 # These files are edited and tested upstream in nf-core/modules -[/modules/nf-core/**] +[/modules/{nf-core,msk}/**] charset = unset end_of_line = unset insert_final_newline = unset