Skip to content

Commit

Permalink
made it so that merging processes were obsolete by replacing nf-core …
Browse files Browse the repository at this point in the history
…star/align with enhanced version of star/align
  • Loading branch information
anoronh4 committed Mar 6, 2023
1 parent c90d2ca commit 9b40147
Show file tree
Hide file tree
Showing 9 changed files with 105 additions and 146 deletions.
127 changes: 66 additions & 61 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -150,25 +150,28 @@ process {
}

withName: STAR_FOR_ARRIBA {
ext.args = [
'--readFilesCommand zcat',
'--outSAMtype BAM Unsorted',
'--outSAMunmapped Within',
'--outBAMcompression 0',
'--outFilterMultimapNmax 50',
'--peOverlapNbasesMin 10',
'--alignSplicedMateMapLminOverLmate 0.5',
'--alignSJstitchMismatchNmax 5',
'-1 5 5',
'--chimSegmentMin 10',
'--chimOutType WithinBAM HardClip',
'--chimJunctionOverhangMin 10',
'--chimScoreDropMax 30',
'--chimScoreJunctionNonGTAG 0',
'--chimScoreSeparation 1',
'--chimSegmentReadGapMax 3',
'--chimMultimapNmax 50'
].join(' ').trim()
ext.args = {
[
"--outSAMattrRGline ID:${meta.sample} SM:${meta.sample} PL:Illumina",
'--readFilesCommand zcat',
'--outSAMtype BAM Unsorted',
'--outSAMunmapped Within',
'--outBAMcompression 0',
'--outFilterMultimapNmax 50',
'--peOverlapNbasesMin 10',
'--alignSplicedMateMapLminOverLmate 0.5',
'--alignSJstitchMismatchNmax 5',
'-1 5 5',
'--chimSegmentMin 10',
'--chimOutType WithinBAM HardClip',
'--chimJunctionOverhangMin 10',
'--chimScoreDropMax 30',
'--chimScoreJunctionNonGTAG 0',
'--chimScoreSeparation 1',
'--chimSegmentReadGapMax 3',
'--chimMultimapNmax 50'
].join(' ').trim()
}
publishDir = [
[
path: { "${params.outdir}/analysis/${meta.id}/arriba/STAR/log" },
Expand All @@ -186,30 +189,33 @@ process {
}

withName: STAR_FOR_STARFUSION {
ext.args = [
'--readFilesCommand zcat',
'--outSAMtype BAM Unsorted',
'--outReadsUnmapped None',
'--twopassMode Basic',
'--outSAMstrandField intronMotif',
'--outSAMunmapped Within',
'--chimSegmentMin 12',
'--chimJunctionOverhangMin 8',
'--chimOutJunctionFormat 1',
'--alignSJDBoverhangMin 10',
'--alignMatesGapMax 100000',
'--alignIntronMax 100000',
'--alignSJstitchMismatchNmax 5 -1 5 5',
'--chimMultimapScoreRange 3',
'--chimScoreJunctionNonGTAG -4',
'--chimMultimapNmax 20',
'--chimNonchimScoreDropMin 10',
'--peOverlapNbasesMin 12',
'--peOverlapMMp 0.1',
'--alignInsertionFlush Right',
'--alignSplicedMateMapLminOverLmate 0',
'--alignSplicedMateMapLmin 30'
].join(' ').trim()
ext.args = {
[
"--outSAMattrRGline ID:${meta.sample} SM:${meta.sample} PL:Illumina",
'--readFilesCommand zcat',
'--outSAMtype None',
'--outReadsUnmapped None',
'--twopassMode Basic',
'--outSAMstrandField intronMotif',
'--outSAMunmapped Within',
'--chimSegmentMin 12',
'--chimJunctionOverhangMin 8',
'--chimOutJunctionFormat 1',
'--alignSJDBoverhangMin 10',
'--alignMatesGapMax 100000',
'--alignIntronMax 100000',
'--alignSJstitchMismatchNmax 5 -1 5 5',
'--chimMultimapScoreRange 3',
'--chimScoreJunctionNonGTAG -4',
'--chimMultimapNmax 20',
'--chimNonchimScoreDropMin 10',
'--peOverlapNbasesMin 12',
'--peOverlapMMp 0.1',
'--alignInsertionFlush Right',
'--alignSplicedMateMapLminOverLmate 0',
'--alignSplicedMateMapLmin 30'
].join(' ').trim()
}
publishDir = [
[
path: { "${params.outdir}/analysis/${meta.id}/starfusion/STAR/log" },
Expand All @@ -227,31 +233,30 @@ process {

withName: STAR_ALIGN {
ext.prefix = { "$meta.sample" }
ext.args = [
'--quantMode GeneCounts',
'--twopassMode Basic',
'--outSAMtype BAM SortedByCoordinate',
'--readFilesCommand zcat',
'--runRNGseed 0',
'--outFilterMultimapNmax 20',
'--alignSJDBoverhangMin 1',
'--outSAMattributes NH HI AS NM MD',
'--quantTranscriptomeBan Singleend',
'--outSAMstrandField intronMotif',
params.save_unaligned ? '--outReadsUnmapped Fastx' : ''
].join(' ').trim()
ext.args = {
[
"--outSAMattrRGline ${meta.read_group.collect{"ID:${it} SM:${meta.sample} PL:Illumina"}.join(" , ")}",
'--quantMode GeneCounts',
'--twopassMode Basic',
'--outSAMtype BAM SortedByCoordinate',
'--readFilesCommand zcat',
'--runRNGseed 0',
'--outFilterMultimapNmax 20',
'--alignSJDBoverhangMin 1',
'--outSAMattributes NH HI AS NM MD',
'--quantTranscriptomeBan Singleend',
'--outSAMstrandField intronMotif',
params.save_unaligned ? '--outReadsUnmapped Fastx' : ''
].join(' ').trim()
}
publishDir = [
[
path: { "${params.outdir}/analysis/${meta.sample}/STAR/log" },
mode: params.publish_dir_mode,
pattern: '*.{out,tab}'
],
[
path: {
meta.fq_num.toInteger() > 1 ?
"${params.outdir}/analysis/${meta.sample}/STAR/${meta.id}" :
"${params.outdir}/analysis/${meta.sample}/STAR/"
},
path: {"${params.outdir}/analysis/${meta.sample}/STAR/"},
mode: params.publish_dir_mode,
pattern: '*.bam',
saveAs: { filename -> meta.fq_num.toInteger() > 1 ? null : filename }
Expand Down
5 changes: 0 additions & 5 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,6 @@
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
},
"star/align": {
"branch": "master",
"git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01",
"installed_by": ["modules"]
},
"star/genomegenerate": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,14 @@ process STAR_ALIGN {
tuple val(meta), path(reads)
path index
path gtf
val star_ignore_sjdbgtf
val seq_platform
val seq_center

output:
tuple val(meta), path('*d.out.bam') , emit: bam
tuple val(meta), path('*Log.final.out') , emit: log_final
tuple val(meta), path('*Log.out') , emit: log_out
tuple val(meta), path('*Log.progress.out'), emit: log_progress
path "versions.yml" , emit: versions

tuple val(meta), path('*d.out.bam') , optional:true, emit: bam
tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted
tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript
tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted
Expand All @@ -36,20 +33,21 @@ process STAR_ALIGN {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf"
def seq_platform = seq_platform ? "'PL:$seq_platform'" : ""
def seq_center = seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$seq_center' 'SM:$prefix' $seq_platform " : "--outSAMattrRGline ID:$prefix 'SM:$prefix' $seq_platform "
def reads1 = [], reads2 = []
meta.single_end ? reads.each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v }
def attrRG = args.contains("--outSAMattrRGline") ? '' : "--outSAMattrRGline ID:$prefix 'SM:$prefix'"
def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted'
def gtf_param = gtf ? "--sjdbGTFfile $gtf" : ''
def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : ''
"""
STAR \\
--genomeDir $index \\
--readFilesIn $reads \\
--readFilesIn ${reads1.join(",")} ${reads2.join(",")} \\
--runThreadN $task.cpus \\
--outFileNamePrefix $prefix. \\
$out_sam_type \\
$ignore_gtf \\
$seq_center \\
$attrRG \\
$gtf_param \\
$args
$mv_unsorted_bam
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,4 @@ authors:
- "@kevinmenden"
- "@drpatelh"
- "@praveenraj2018"
- "@anoronh4"
31 changes: 3 additions & 28 deletions subworkflows/local/align_reads.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
include { STAR_ALIGN } from '../../modules/nf-core/star/align/main'
include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main'
include { STAR_ALIGN } from '../../modules/local/star/align/main'
include { UMITOOLS_DEDUP } from '../../modules/nf-core/umitools/dedup/main'
include {
SAMTOOLS_INDEX;
Expand All @@ -20,34 +19,11 @@ workflow ALIGN_READS {
STAR_ALIGN(
reads,
star_index,
gtf,
false,
false,
false
gtf
)
ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first())

star_align_bam = STAR_ALIGN.out.bam
.map{ meta, bam ->
def meta_clone = meta.clone().findAll { !["read_group","fastq_pair_id"].contains(it.key) }
meta_clone.id = meta.sample
[meta_clone, bam]
}.branch { meta, bam ->
needs_merge: meta.fq_num > 1
skips_merge: meta.fq_num == 1
}

SAMTOOLS_MERGE(
star_align_bam.needs_merge
.map{ meta, bam -> [groupKey(meta, meta.fq_num),bam] }
.groupTuple(),
[],
[]
)
ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions.first())

merged_bam = star_align_bam.skips_merge
.mix(SAMTOOLS_MERGE.out.bam)
merged_bam = STAR_ALIGN.out.bam

SAMTOOLS_INDEX(merged_bam)
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
Expand All @@ -69,7 +45,6 @@ workflow ALIGN_READS {
.filter{ meta, bam -> ! meta.has_umi }
)


emit:
bam = dedup_bam
bam_withdup = merged_bam
Expand Down
14 changes: 4 additions & 10 deletions subworkflows/local/fusion.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../modules/nf-core/star/align/main'
include { STAR_ALIGN as STAR_FOR_ARRIBA } from '../../modules/local/star/align/main'
include { ARRIBA } from '../../modules/nf-core/arriba/main'
include { STAR_ALIGN as STAR_FOR_STARFUSION } from '../../modules/nf-core/star/align/main'
include { STAR_ALIGN as STAR_FOR_STARFUSION } from '../../modules/local/star/align/main'
include { STARFUSION } from '../../modules/local/starfusion/detect/main'
include { FUSIONCATCHER_DETECT } from '../../modules/local/fusioncatcher/detect/main'
include { FUSIONREPORT } from '../../modules/local/fusionreport/run/main'
Expand All @@ -24,10 +24,7 @@ workflow FUSION {
STAR_FOR_ARRIBA(
reads,
star_index,
gtf,
false,
false,
false
gtf
)
ch_versions = ch_versions.mix(STAR_FOR_ARRIBA.out.versions.first())

Expand All @@ -47,10 +44,7 @@ workflow FUSION {
reads,
// use the star index in the starfusion reference to ensure compatibility
starfusion_ref.map{ file( it + "/ref_genome.fa.star.idx")},
gtf,
false,
false,
false
gtf
)
ch_versions = ch_versions.mix(STAR_FOR_STARFUSION.out.versions.first())

Expand Down
34 changes: 6 additions & 28 deletions subworkflows/local/merge_reads.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
include { SAMTOOLS_BAM2FQ } from '../../modules/nf-core/samtools/bam2fq/main'
include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main'

workflow MERGE_READS {
take:
Expand All @@ -10,39 +9,18 @@ workflow MERGE_READS {
ch_versions = Channel.empty()

reads_ch = reads
.map{ meta, reads ->
def meta_clone = meta.clone().findAll { !["read_group","fastq_pair_id"].contains(it.key) }
meta_clone.id = meta.sample
[meta_clone, reads]
}.branch { meta, reads ->
needs_merge: ( meta.fq_num > 1 ) && ( ! ( meta.has_umi && params.dedup_umi_for_fusions ) )
needs_bam2fq: meta.has_umi && params.dedup_umi_for_fusions
skips_merge: true
}

.filter{ meta, reads -> ! ( meta.has_umi && params.dedup_umi_for_fusions) }

bam_ch = bam
.branch { meta, bam ->
needs_bam2fq: meta.has_umi && params.dedup_umi_for_fusions
skips_bam2fq: true
}


CAT_FASTQ(
reads_ch.needs_merge
.map{ meta, reads -> [ groupKey(meta, meta.fq_num), reads ] }
.groupTuple()
.map{ meta, reads -> [ meta, reads.flatten() ] }
)
ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first())
.filter{ meta, bam -> meta.has_umi && params.dedup_umi_for_fusions }

SAMTOOLS_BAM2FQ(
bam_ch.needs_bam2fq,
bam_ch,
true
)
ch_versions = ch_versions.mix(SAMTOOLS_BAM2FQ.out.versions.first())

merged_reads = reads_ch.skips_merge
.mix(CAT_FASTQ.out.reads)
merged_reads = reads_ch
.mix(
SAMTOOLS_BAM2FQ.out.reads
.map{ meta, reads ->
Expand All @@ -51,6 +29,6 @@ workflow MERGE_READS {
)

emit:
merged_reads = merged_reads
dedup_reads = merged_reads
ch_versions = ch_versions
}
Loading

0 comments on commit 9b40147

Please # to comment.