Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Added parallel version of ltrharvest #4968

Merged
merged 3 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions modules/nf-core/ltrharvest/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "ltrharvest"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::ltr_harvest_parallel=1.1"
56 changes: 56 additions & 0 deletions modules/nf-core/ltrharvest/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process LTRHARVEST {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ltr_harvest_parallel:1.1--hdfd78af_0':
'biocontainers/ltr_harvest_parallel:1.1--hdfd78af_0' }"

input:
tuple val(meta), path(fasta)

output:
tuple val(meta), path("*.gff3") , emit: gff3
tuple val(meta), path("*.scn") , emit: scn
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
LTR_HARVEST_parallel \\
-seq $fasta \\
$args \\
-threads $task.cpus

mv "${fasta}.harvest.combine.gff3" \\
"${prefix}.gff3"

mv "${fasta}.harvest.combine.scn" \\
"${prefix}.scn"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
LTR_HARVEST_parallel: \$(LTR_HARVEST_parallel -h | sed -n '/Version/s/Version: //p')
genometools: \$(gt --version | sed '1!d ; s/gt (GenomeTools) //')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch "${prefix}.gff3"
touch "${prefix}.scn"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
LTR_HARVEST_parallel: \$(LTR_HARVEST_parallel -h | sed -n '/Version/s/Version: //p')
genometools: \$(gt --version | sed '1!d ; s/gt (GenomeTools) //')
END_VERSIONS
"""
}
59 changes: 59 additions & 0 deletions modules/nf-core/ltrharvest/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "ltrharvest"
description: |
Predicts LTR retrotransposons using the parallel version of GenomeTools gt-ltrharvest
utility included in the EDTA toolchain
keywords:
- genomics
- genome
- annotation
- repeat
- transposons
- retrotransposons
tools:
- "LTR_HARVEST_parallel":
description: A Perl wrapper for LTR_harvest
homepage: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel"
documentation: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel"
tool_dev_url: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel"
licence: ["MIT"]
- "gt":
description: "The GenomeTools genome analysis system"
homepage: "https://genometools.org/index.html"
documentation: "https://genometools.org/documentation.html"
tool_dev_url: "https://github.com/genometools/genometools"
doi: "10.1109/TCBB.2013.68"
licence: ["ISC"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- fasta:
type: file
description: Input genome fasta
pattern: "*.{fsa,fa,fasta}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- gff3:
type: file
description: Predicted LTR candidates in gff3 format
pattern: "*.gff3"
- scn:
type: file
description: Predicted LTR candidates in scn format
pattern: "*.scn"
authors:
- "@GallVp"
maintainers:
- "@GallVp"
60 changes: 60 additions & 0 deletions modules/nf-core/ltrharvest/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
nextflow_process {

name "Test Process LTRHARVEST"
script "../main.nf"
process "LTRHARVEST"

tag "modules"
tag "modules_nfcore"
tag "ltrharvest"

test("homo_sapiens-genome_21_fasta") {

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.gff3).match("gff3") },
{ assert path(process.out.scn[0][1]).text.contains("46510803 46520182 9380 46510803 46510940 138 46520042 46520182 141 86.52 0 chr21") },
{ assert snapshot(path(process.out.versions[0]).text).match("script_versions") }
)
}

}

test("homo_sapiens-genome_fasta-stub") {

options '-stub'

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
{ assert snapshot(path(process.out.versions[0]).text).match("stub_versions") }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the stub_versions and script_versions snapshots look the same. I think you can share those snapshots between test cases (.match("versions")) since the script and stub blocks use the exact same logic

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would also be ok to remove this check entirely or loosen it a bit to just check for existence, so you don't need to update the snapshots for minor version bumps... but its fine if you want to be conservative here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @ahvigil

Thank you for the feedback. It is not possible anymore to use the same snapshot name. nf-test doesn't allow that anymore.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ugh thats too bad but it seems you're right: askimed/nf-test#188

however, there is an open issue to provide an opt in for cases like this where it might be useful to remove repetition: askimed/nf-test#197. Hope they can implement something like that!

)
}

}

}
88 changes: 88 additions & 0 deletions modules/nf-core/ltrharvest/tests/main.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions modules/nf-core/ltrharvest/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ltrharvest:
- "modules/nf-core/ltrharvest/**"
Loading