diff --git a/.github/workflows/code-linting.yml b/.github/workflows/code-linting.yml index 4a111d9..fccb7e0 100644 --- a/.github/workflows/code-linting.yml +++ b/.github/workflows/code-linting.yml @@ -3,7 +3,7 @@ on: push: branches: [master] pull_request: - branches: [main] + branches: [main, develop] # Cancel if a newer run is started concurrency: diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml new file mode 100755 index 0000000..650be88 --- /dev/null +++ b/.github/workflows/pytest-workflow.yml @@ -0,0 +1,79 @@ +name: Pytest-workflow +on: + pull_request: + branches: [main, develop] + +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ubuntu-20.04 + + name: ${{ matrix.tags }} ${{ matrix.profile }} + strategy: + fail-fast: false + matrix: + tags: ["test_profile"] + profile: ["singularity"] + env: + NXF_ANSI_LOG: false + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.x" + + - uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install Python dependencies + run: python -m pip install --upgrade pip pytest-workflow + + - uses: actions/cache@v2 + with: + path: /usr/local/bin/nextflow + key: ${{ runner.os }} + restore-keys: | + ${{ runner.os }}-nextflow- + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Set up Singularity + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-singularity@v5 + with: + singularity-version: 3.7.1 + + # Test the module + - name: Run pytest-workflow + # only use one thread for pytest-workflow to avoid race condition on conda cache. + run: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof --git-aware --color=yes + + - name: Output log on failure + if: failure() + run: | + sudo apt-get update > /dev/null + sudo apt-get install bat > /dev/null + batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err} + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v2 + with: + name: logs-${{ matrix.profile }} + path: | + /home/runner/pytest_workflow_*/*/.nextflow.log + /home/runner/pytest_workflow_*/*/log.out + /home/runner/pytest_workflow_*/*/log.err + /home/runner/pytest_workflow_*/*/work + !/home/runner/pytest_workflow_*/*/work/singularity diff --git a/README.md b/README.md index 1f303e1..6b09e09 100644 --- a/README.md +++ b/README.md @@ -56,10 +56,12 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool | umi2 | `str`/`int` | `NNNXX`/`3` | `''` | | strand | `str` | `yes`/`no`/`reverse` | `no` | | fastq_1 | `str` | `/path/to/*fastq.gz` | (none) | - | fastq_1 | `str` | `/path/to/*fastq.gz` | (none) | + | fastq_2 | `str` | `/path/to/*fastq.gz` | (none) | If you are running on juno, chain the `juno` profile (i.e. `-profile singularity,juno`) to take advantage of local resources on juno. +5. For more information on running the pipeline, please see the [detailed usage documentation](docs/usage.md). + ## Credits anoronh4/forte was originally written by Anne Marie Noronha . diff --git a/conf/juno.config b/conf/juno.config index 4149bdf..7a7bce8 100644 --- a/conf/juno.config +++ b/conf/juno.config @@ -12,6 +12,30 @@ process { beforeScript = "module load singularity/3.1.1; unset R_LIBS; catch_term () { echo 'caught USR2/TERM signal'; set +e; false; on_exit ; } ; trap catch_term USR2 TERM" errorStrategy = { task.attempt <= 3 ? 'retry' : 'ignore' } maxRetries = 3 + + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { round_memory( check_max( 6.GB, 'memory' )/task.cpus, "down") } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { round_memory( check_max( 12.GB, 'memory' )/task.cpus, "down") } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { round_memory( check_max( 36.GB, 'memory' )/task.cpus, "down") } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { round_memory( check_max( 72.GB, 'memory' )/task.cpus, "down") } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { round_memory( check_max( 200.GB, 'memory' )/task.cpus, "down") } + } } params { diff --git a/conf/modules.config b/conf/modules.config index ae4358c..0bed051 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -122,10 +122,14 @@ process { } withName: FUSIONREPORT_DOWNLOAD { + ext.when = params.run_fusion_report + secret = ["COSMIC_PASSWD"] storeDir = { "${params.reference_base}/fusionreport" } + ext.args = "--cosmic_usr ${params.cosmic_usr} --cosmic_passwd \$COSMIC_PASSWD" } withName: FUSIONREPORT { + ext.when = params.run_fusion_report ext.args = { [ "-t ${params.fusion_report_cutoff}", @@ -235,7 +239,7 @@ process { ext.prefix = { "$meta.sample" } ext.args = { [ - "--outSAMattrRGline ${meta.read_group.collect{"ID:${it} SM:${meta.sample} PL:Illumina"}.join(" , ")}", + "--outSAMattrRGline ${meta.read_group.split(",").collect{"ID:${it} SM:${meta.sample} PL:Illumina"}.join(" , ")}", '--quantMode GeneCounts', '--twopassMode Basic', '--outSAMtype BAM SortedByCoordinate', diff --git a/conf/test.config b/conf/test.config index 4a13463..809c9cb 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,5 +27,6 @@ params { outdir = "results" genome = 'smallGRCh37' + run_fusion_report = false } diff --git a/docs/usage.md b/docs/usage.md index 7fd5fca..16fde16 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -82,6 +82,14 @@ When you run the above command, Nextflow automatically pulls the pipeline code f nextflow pull anoronh4/forte ``` +### Fusion-report annotation + +Annotation with the fusion-report module uses a reference database that is built within the Forte pipeline. Building the reference database requires access to COSMIC with a username and password. Once you have these two items, you can pass in the username on the command-line like so: `--cosmic_usr ` and you must set up the password using nextflow's `secrets` functionality, which allows for the secure transmission of sensitive information within the pipeline: + +```bash +nextflow secrets set COSMIC_PASSWD 'mycosmicpw' +``` + ### OncoKB annotation To enable OncoKB fusion annotation, you must have an [API token to access data from OncoKB](https://www.oncokb.org/apiAccess). Once you have obtained a token, it needs to be registered as a Nextflow Secret, which allows for the secure transmission of sensitive information within the pipeline: diff --git a/modules.json b/modules.json index 07bb886..9c20444 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "arriba": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] }, "cat/fastq": { @@ -22,12 +22,12 @@ }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", "installed_by": ["modules"] }, "fastp": { "branch": "master", - "git_sha": "1e49f31e93c56a3832833eef90a02d3cde5a3f7e", + "git_sha": "20a508676f40d0fd3f911ac595af91ec845704c4", "installed_by": ["modules"] }, "gatk4/bedtointervallist": { @@ -127,12 +127,12 @@ }, "umitools/dedup": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "6d9c7e43404e20a97d2f6f88548456afe78282e6", "installed_by": ["modules"] }, "umitools/extract": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "6d9c7e43404e20a97d2f6f88548456afe78282e6", "installed_by": ["modules"] } } diff --git a/modules/local/fusioncatcher/detect/main.nf b/modules/local/fusioncatcher/detect/main.nf index d64a82e..aee0dae 100644 --- a/modules/local/fusioncatcher/detect/main.nf +++ b/modules/local/fusioncatcher/detect/main.nf @@ -2,7 +2,7 @@ process FUSIONCATCHER_DETECT { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::fusioncatcher=1.33" : null) + conda "bioconda::fusioncatcher=1.33" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker.io/clinicalgenomics/fusioncatcher:1.33' : 'docker.io/clinicalgenomics/fusioncatcher:1.33' }" diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf index e044e79..35891ed 100644 --- a/modules/local/fusioncatcher/download/main.nf +++ b/modules/local/fusioncatcher/download/main.nf @@ -2,7 +2,7 @@ process FUSIONCATCHER_DOWNLOAD { tag 'fusioncatcher_download' label 'process_medium' - conda (params.enable_conda ? "bioconda::fusioncatcher=1.33" : null) + conda "bioconda::fusioncatcher=1.33" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker.io/clinicalgenomics/fusioncatcher:1.33' : 'docker.io/clinicalgenomics/fusioncatcher:1.33' }" diff --git a/modules/local/fusionreport/download/main.nf b/modules/local/fusionreport/download/main.nf index 77e114b..83f7aa5 100644 --- a/modules/local/fusionreport/download/main.nf +++ b/modules/local/fusionreport/download/main.nf @@ -3,25 +3,26 @@ process FUSIONREPORT_DOWNLOAD { label 'process_medium' // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? 'bioconda::star=2.7.9a' : null) + conda 'bioconda::star=2.7.9a' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'cmopipeline/fusion-report:0.0.1' : 'cmopipeline/fusion-report:0.0.1' }" //'docker.io/rannickscilifelab/fusion-report:2.1.5updated' : //'docker.io/rannickscilifelab/fusion-report:2.1.5updated' }" - - input: - val(username) - val(passwd) - output: path "db" , emit: reference - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: '' """ - fusion_report download --cosmic_usr $username --cosmic_passwd $passwd db + fusion_report download \\ + $args \\ + db cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/fusionreport/run/main.nf b/modules/local/fusionreport/run/main.nf index 6dfea59..ce9c10f 100644 --- a/modules/local/fusionreport/run/main.nf +++ b/modules/local/fusionreport/run/main.nf @@ -3,7 +3,7 @@ process FUSIONREPORT { label 'process_low' // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? 'bioconda::star=2.7.9a' : null) + conda 'bioconda::star=2.7.9a' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'cmopipeline/fusion-report:0.0.1' : 'cmopipeline/fusion-report:0.0.1' }" diff --git a/modules/local/htseq/count/main.nf b/modules/local/htseq/count/main.nf index 4404be9..c3f2a4d 100644 --- a/modules/local/htseq/count/main.nf +++ b/modules/local/htseq/count/main.nf @@ -2,7 +2,7 @@ process HTSEQ_COUNT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::htseq=2.0.2" : null) + conda "bioconda::htseq=2.0.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/htseq:2.0.2--py39h919a90d_0' : 'quay.io/biocontainers/htseq:2.0.2--py39h919a90d_0' }" diff --git a/modules/local/star/align/main.nf b/modules/local/star/align/main.nf index a210ab0..799dfca 100644 --- a/modules/local/star/align/main.nf +++ b/modules/local/star/align/main.nf @@ -2,7 +2,7 @@ process STAR_ALIGN { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" : null) + conda "bioconda::star=2.7.10a bioconda::samtools=1.16.1 conda-forge::gawk=5.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' : 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' }" diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf index 9af06e2..9cfd783 100644 --- a/modules/local/starfusion/build/main.nf +++ b/modules/local/starfusion/build/main.nf @@ -1,7 +1,7 @@ process STARFUSION_BUILD { tag 'star-fusion' - conda (params.enable_conda ? "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.10.0 bioconda::trinity=date.2011_11_2 bioconda::samtools=1.9 bioconda::star=2.7.8a" : null) + conda "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.10.0 bioconda::trinity=date.2011_11_2 bioconda::samtools=1.9 bioconda::star=2.7.8a" container "docker.io/trinityctat/starfusion:1.10.1" input: diff --git a/modules/local/starfusion/detect/main.nf b/modules/local/starfusion/detect/main.nf index 1c837e6..7ee3e08 100644 --- a/modules/local/starfusion/detect/main.nf +++ b/modules/local/starfusion/detect/main.nf @@ -2,7 +2,7 @@ process STARFUSION { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.10.0 bioconda::trinity=date.2011_11_2 bioconda::samtools=1.9 bioconda::star=2.7.8a" : null) + conda "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.10.0 bioconda::trinity=date.2011_11_2 bioconda::samtools=1.9 bioconda::star=2.7.8a" container "docker.io/trinityctat/starfusion:1.10.1" input: @@ -17,13 +17,14 @@ process STARFUSION { script: def prefix = task.ext.prefix ?: "${meta.id}" - def fasta = meta.single_end ? "--left_fq ${reads[0]}" : "--left_fq ${reads[0]} --right_fq ${reads[1]}" + def reads_in = reads ? ( meta.single_end ? "--left_fq ${reads[0]}" : "--left_fq ${reads[0]} --right_fq ${reads[1]}" ) : '' + def junction_in = junction ? "-J $junction" : '' def args = task.ext.args ?: '' """ STAR-Fusion \\ --genome_lib_dir $reference \\ - $fasta \\ - -J $junction \\ + $reads_in \\ + $junction_in \\ --CPU $task.cpus \\ --examine_coding_effect \\ --output_dir . \\ diff --git a/modules/local/starfusion/download/main.nf b/modules/local/starfusion/download/main.nf index 18dfee9..eafec75 100644 --- a/modules/local/starfusion/download/main.nf +++ b/modules/local/starfusion/download/main.nf @@ -1,7 +1,7 @@ process STARFUSION_DOWNLOAD { tag 'star-fusion' - conda (params.enable_conda ? "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.10.0 bioconda::trinity=date.2011_11_2 bioconda::samtools=1.9 bioconda::star=2.7.8a" : null) + conda "bioconda::dfam=3.3 bioconda::hmmer=3.3.2 bioconda::star-fusion=1.10.0 bioconda::trinity=date.2011_11_2 bioconda::samtools=1.9 bioconda::star=2.7.8a" container "docker.io/trinityctat/starfusion:1.10.1" input: diff --git a/modules/nf-core/arriba/main.nf b/modules/nf-core/arriba/main.nf index 8c4dfcb..e4b48be 100644 --- a/modules/nf-core/arriba/main.nf +++ b/modules/nf-core/arriba/main.nf @@ -2,7 +2,7 @@ process ARRIBA { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::arriba=2.3.0" : null) + conda "bioconda::arriba=2.3.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/arriba:2.3.0--haa8aa89_0' : 'quay.io/biocontainers/arriba:2.3.0--haa8aa89_0' }" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index cebb6e0..800a609 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py old mode 100644 new mode 100755 index 787bdb7..da03340 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,12 +1,16 @@ #!/usr/bin/env python -import platform -from textwrap import dedent + +"""Provide functions to merge multiple versions.yml files.""" + import yaml +import platform +from textwrap import dedent def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" html = [ dedent( """\\ @@ -45,47 +49,53 @@ def _make_versions_html(versions): return "\\n".join(html) -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 207258a..5eeb9b0 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,7 +2,7 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null) + conda "bioconda::fastp=0.23.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : 'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }" @@ -58,7 +58,6 @@ process FASTP { [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz fastp \\ - --stdout \\ --in1 ${prefix}.fastq.gz \\ --out1 ${prefix}.fastp.fastq.gz \\ --thread $task.cpus \\ diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml index 6f6fad7..197ea7c 100644 --- a/modules/nf-core/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -9,7 +9,7 @@ tools: description: | A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. documentation: https://github.com/OpenGene/fastp - doi: https://doi.org/10.1093/bioinformatics/bty560 + doi: 10.1093/bioinformatics/bty560 licence: ["MIT"] input: - meta: diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf index 48559d8..68fc9b9 100644 --- a/modules/nf-core/umitools/dedup/main.nf +++ b/modules/nf-core/umitools/dedup/main.nf @@ -1,11 +1,11 @@ process UMITOOLS_DEDUP { tag "$meta.id" - label "process_medium" + label "process_single" - conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) + conda "bioconda::umi_tools=1.1.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : - 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : + 'quay.io/biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml index 56888e5..0719a95 100644 --- a/modules/nf-core/umitools/dedup/meta.yml +++ b/modules/nf-core/umitools/dedup/meta.yml @@ -5,57 +5,57 @@ keywords: - deduplication tools: - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) + and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: ["MIT"] input: - meta: - type: map - description: | - Groovy Map containing sample information + type: map + description: | + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - bam: - type: file - description: | - BAM file containing reads to be deduplicated via UMIs. - pattern: "*.{bam}" + type: file + description: | + BAM file containing reads to be deduplicated via UMIs. + pattern: "*.{bam}" - bai: - type: file - description: | - BAM index files corresponding to the input BAM file. - pattern: "*.{bai}" + type: file + description: | + BAM index files corresponding to the input BAM file. + pattern: "*.{bai}" - get_output_stats: - type: boolean - description: | - Whether or not to generate output stats. + type: boolean + description: | + Whether or not to generate output stats. output: - meta: - type: map - description: | - Groovy Map containing sample information + type: map + description: | + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - bam: - type: file - description: BAM file with deduplicated UMIs. - pattern: "*.{bam}" + type: file + description: BAM file with deduplicated UMIs. + pattern: "*.{bam}" - tsv_edit_distance: - type: file - description: Reports the (binned) average edit distance between the UMIs at each position. - pattern: "*edit_distance.tsv" + type: file + description: Reports the (binned) average edit distance between the UMIs at each position. + pattern: "*edit_distance.tsv" - tsv_per_umi: - type: file - description: UMI-level summary statistics. - pattern: "*per_umi.tsv" + type: file + description: UMI-level summary statistics. + pattern: "*per_umi.tsv" - tsv_umi_per_position: - type: file - description: Tabulates the counts for unique combinations of UMI and position. - pattern: "*per_position.tsv" + type: file + description: Tabulates the counts for unique combinations of UMI and position. + pattern: "*per_position.tsv" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf index 22a405b..ba2826e 100644 --- a/modules/nf-core/umitools/extract/main.nf +++ b/modules/nf-core/umitools/extract/main.nf @@ -1,11 +1,11 @@ process UMITOOLS_EXTRACT { tag "$meta.id" - label "process_low" + label "process_single" - conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) + conda "bioconda::umi_tools=1.1.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : - 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : + 'quay.io/biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml index 7fc23f7..db64a0f 100644 --- a/modules/nf-core/umitools/extract/meta.yml +++ b/modules/nf-core/umitools/extract/meta.yml @@ -5,42 +5,42 @@ keywords: - extract tools: - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) + and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: ["MIT"] input: - meta: - type: map - description: | - Groovy Map containing sample information + type: map + description: | + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - reads: - type: list - description: | - List of input FASTQ files whose UMIs will be extracted. + type: list + description: | + List of input FASTQ files whose UMIs will be extracted. output: - meta: - type: map - description: | - Groovy Map containing sample information + type: map + description: | + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - reads: - type: file - description: > - Extracted FASTQ files. | - For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + type: file + description: > + Extracted FASTQ files. | + For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. - pattern: "*.{fastq.gz}" + pattern: "*.{fastq.gz}" - log: - type: file - description: Logfile for umi_tools - pattern: "*.{log}" + type: file + description: Logfile for umi_tools + pattern: "*.{log}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" diff --git a/nextflow.config b/nextflow.config index 00ae547..78763d8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -36,8 +36,8 @@ params { dedup_umi_for_fusions = true run_oncokb_fusionannotator = false cosmic_usr = null - cosmic_passwd = null fusion_report_cutoff = 1 + run_fusion_report = true // QC // rseqc_modules can include ['bam_stat','inner_distance','infer_experiment','junction_annotation','junction_saturation','read_distribution','read_duplication','tin'] @@ -64,7 +64,6 @@ params { validate_params = true show_hidden_params = false schema_ignore_params = 'genomes' - enable_conda = false // Max resource options // Defaults only, expecting to be overwritten @@ -80,7 +79,7 @@ includeConfig 'conf/base.config' profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { - params.enable_conda = true + conda.enabled = true docker.enabled = false singularity.enabled = false podman.enabled = false @@ -88,7 +87,7 @@ profiles { charliecloud.enabled = false } mamba { - params.enable_conda = true + conda.enabled = true conda.useMamba = true docker.enabled = false singularity.enabled = false @@ -225,3 +224,17 @@ def check_max(obj, type) { } } } + +def round_memory(obj,mode = "round") { + def amount = obj.toString().split(" ")[0] + def unit = obj.toString().split(" ")[1] + + if (mode == "up"){ + return (Math.ceil(amount.toDouble()).toInteger() + "." + unit) as nextflow.util.MemoryUnit + } else if (mode == "down"){ + return (Math.floor(amount.toDouble()).toInteger() + "." + unit) as nextflow.util.MemoryUnit + } else { + return (Math.round(amount.toDouble()).toInteger() + "." + unit) as nextflow.util.MemoryUnit + } + +} diff --git a/subworkflows/local/fusion.nf b/subworkflows/local/fusion.nf index 90fdf1c..4d6b70b 100644 --- a/subworkflows/local/fusion.nf +++ b/subworkflows/local/fusion.nf @@ -48,9 +48,12 @@ workflow FUSION { ) ch_versions = ch_versions.mix(STAR_FOR_STARFUSION.out.versions.first()) - reads_junction = reads.join( STAR_FOR_STARFUSION.out.junction ) + //reads_junction = reads.join( STAR_FOR_STARFUSION.out.junction,by:[0] ) - STARFUSION( reads_junction, starfusion_ref) + STARFUSION( + STAR_FOR_STARFUSION.out.junction.map{ meta, junction -> [ meta, [], junction ] }, + starfusion_ref + ) ch_versions = ch_versions.mix(STARFUSION.out.versions.first()) FUSIONCATCHER_DETECT( @@ -73,7 +76,8 @@ workflow FUSION { ).mix( STARFUSION.out.abridged .map{ meta, file -> [ meta, "starfusion", file ] } - ).groupTuple(by:[0],size:numcallers) + ) + .groupTuple(by:[0],size:numcallers) .map{ meta, caller, file -> def avg_weight = caller.collect({(100/caller.size()).toInteger()}) avg_weight[-1] = avg_weight[-1] + (100-avg_weight.sum()) diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 0d357e2..c2a62ce 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -63,10 +63,9 @@ workflow PREPARE_REFERENCES { fusioncatcher_ref = Channel.empty() } - cosmic_usr = params.cosmic_usr ?: "" - cosmic_passwd = params.cosmic_passwd ?: "" - FUSIONREPORT_DOWNLOAD(cosmic_usr,cosmic_passwd) - + //cosmic_usr = params.cosmic_usr ?: "" + //cosmic_passwd = params.cosmic_passwd ?: "" + FUSIONREPORT_DOWNLOAD() emit: star_index = star_index diff --git a/subworkflows/local/preprocess_reads.nf b/subworkflows/local/preprocess_reads.nf index 8daa7b1..eefadc0 100644 --- a/subworkflows/local/preprocess_reads.nf +++ b/subworkflows/local/preprocess_reads.nf @@ -53,12 +53,13 @@ workflow PREPROCESS_READS { trimmed_grouped_reads = trimmed_reads .map{ meta, reads -> def read_group = meta.read_group + def fastq_pair_id = meta.fastq_pair_id def meta_clone = meta.clone().findAll { !["read_group","fastq_pair_id"].contains(it.key) } meta_clone.id = meta.sample - [groupKey(meta_clone,meta.fq_num), reads, read_group] - }.groupTuple() - .map{ meta, reads, read_group -> - meta = meta + [read_group:read_group] + [groupKey(meta_clone,meta.fq_num), reads, read_group, fastq_pair_id] + }.groupTuple(by:[0]) + .map{ meta, reads, read_group, fastq_pair_id -> + meta = meta + [read_group:read_group.join(','), fastq_pair_id:fastq_pair_id.join(',')] [meta, reads.flatten()] } diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100755 index 0000000..bab5342 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,38 @@ +params { + outdir = "output/" + workflow_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + tracedir = "${params.outdir}/pipeline_info/${params.workflow_timestamp}" + reference_base = "output/references" + publish_dir_mode = "copy" + singularity_pull_docker_container = false +} + +process { + cpus = 2 + memory = 3.GB + time = 2.h +} + +if ("$PROFILE" == "singularity") { + singularity.enabled = true + singularity.autoMounts = true +} else if ("$PROFILE" == "conda") { + conda.enabled = true +} else if ("$PROFILE" == "mamba") { + conda.enabled = true + conda.useMamba = true +} else { + docker.enabled = true + docker.userEmulation = true + docker.runOptions = "--platform linux/x86_64" +} + +// Increase time available to build Conda environment +conda { createTimeout = "120 min" } + +// Load test_data.config containing paths to test data +// includeConfig 'test_data.config' + +manifest { + nextflowVersion = '!>=22.10.1' +} diff --git a/tests/small_test/test.yml b/tests/small_test/test.yml new file mode 100755 index 0000000..86dfa8b --- /dev/null +++ b/tests/small_test/test.yml @@ -0,0 +1,15 @@ +- name: run_test_profile + command: nextflow run ./main.nf -profile test,singularity -c ./tests/nextflow.config + tags: + - test_profile + files: + - path: output/analysis/SAMPLE_PAIRED_END/STAR/SAMPLE_PAIRED_END.Aligned.sortedByCoord.out.bam + md5sum: 01678eb7180f5b317048abb4cc06a3b1 + - path: output/analysis/SAMPLE_PAIRED_END_UMI/STAR/SAMPLE_PAIRED_END_UMI.Aligned.sortedByCoord.out.bam + - path: output/analysis/SAMPLE_SINGLE_END/STAR/SAMPLE_SINGLE_END.Aligned.sortedByCoord.out.bam + - path: output/analysis/SAMPLE_SINGLE_END/arriba/SAMPLE_SINGLE_END.fusions.discarded.tsv + md5sum: da3e17e01697fe9990fd545e1e26b822 + - path: output/analysis/SAMPLE_SINGLE_END/arriba/SAMPLE_SINGLE_END.fusions.tsv + md5sum: 7c3383f7eb6d79b84b0bd30a7ef02d70 + - path: output/pipeline_info/software_versions.yml + - path: output/analysis/SAMPLE_PAIRED_END_UMI/htseq/SAMPLE_PAIRED_END_UMI.htseq.count.txt