Skip to content

Commit

Permalink
add featurecounts to pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
anoronh4 committed Aug 31, 2023
1 parent ca13b87 commit 97fbb3b
Show file tree
Hide file tree
Showing 11 changed files with 286 additions and 20 deletions.
13 changes: 11 additions & 2 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,23 @@ use_filename_as_sample_name:
- kallisto

custom_data:
my_genstats:
htseq_expression_genstats:
plot_type: "generalstats"
pconfig:
- GeneCount:
title: "Gene Count"
format: "{:,.0f}"
description: "HTSeq: Number of genes with detected expression"
kallisto_expression_genstats:
plot_type: "generalstats"
pconfig:
- GeneCount:
title: "Gene Count"
format: "{:,.0f}"
description: "Kallisto: Number of genes with detected expression"

sp:
my_genstats:
htseq_expression_genstats:
fn: "*.htseq.summary.txt"
kallisto_expression_genstats:
fn: "*.kallisto.customsummary.txt"
70 changes: 70 additions & 0 deletions bin/count_features.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/local/bin/Rscript

# __author__ = "Anne Marie Noronha"
# __email__ = "noronhaa@mskcc.org"
# __version__ = "0.0.1"

usage <- function() {
message("Usage:")
message("count_features.R --abundance <abundance.tsv> --gtf <annotation.gtf> --sample <sampleid>")
}

args = commandArgs(TRUE)

if (is.null(args) | length(args)<1) {
usage()
quit()
}

#' Parse out options from a string without recourse to optparse
#'
#' @param x Long-form argument list like --opt1 val1 --opt2 val2
#'
#' @return named list of options and values similar to optparse

parse_args <- function(x){
args_list <- unlist(strsplit(x, ' ?--')[[1]])[-1]
args_vals <- lapply(args_list, function(x) scan(text=x, what='character', quiet = TRUE))
# Ensure the option vectors are length 2 (key/ value) to catch empty ones
args_vals <- lapply(args_vals, function(z){ length(z) <- 2; z})

parsed_args <- structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) gsub('-','_',x[1])))
parsed_args[! is.na(parsed_args)]
}

args_opt <- parse_args(paste(args,collapse=" "))

abundance <- read.table(args_opt$abundance, header=T)
gtf <- args_opt$gtf
Sample <- args_opt$sample

gtf_df <- as.data.frame(rtracklayer::import(gtf))

gtf_df <- unique(gtf_df[,c("transcript_id","gene_id")])

abundance <- merge(
abundance,
gtf_df,
by.x="target_id",
by.y="transcript_id",
all.x = T,
all.y =F
)
TranscriptCount <- dim(abundance[abundance$est_counts > 0,])[[1]]

abundance_gene <- aggregate(
x = abundance$est_counts,
by = list(abundance$gene_id),
FUN = sum
)
GeneCount <- dim(abundance_gene[abundance_gene$x > 0,])[[1]]

out_df <- data.frame(Sample, TranscriptCount, GeneCount)
write.table(
out_df,
paste0(Sample,".kallisto.customsummary.txt"),
sep="\t",
row.names = F,
append = F,
quote = F
)
5 changes: 5 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,15 @@ process {
}

withName: HTSEQ_COUNT {
ext.when = params.expression_quantifier.replaceAll("\\s","").toLowerCase().split(",").contains("htseq")
ext.args = { "-s ${meta.single_end || meta.strand == "forward" ? "yes" : meta.strand == "reverse" ? "reverse" : "no" } -r pos" }
time = { check_max( 20.h * task.attempt, 'time' ) }
}

withName: SUBREAD_FEATURECOUNTS {
ext.when = ext.when = params.expression_quantifier.replaceAll("\\s","").toLowerCase().split(",").contains("featurecounts")
}

withName: KALLISTO_QUANT {
ext.prefix = { "$meta.sample" }
ext.args = {
Expand Down
6 changes: 6 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"]
},
"subread/featurecounts": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"],
"patch": "modules/nf-core/subread/featurecounts/subread-featurecounts.diff"
},
"ucsc/gtftogenepred": {
"branch": "master",
"git_sha": "511909dedab2a9955f2c3eae88abd3733edfd349",
Expand Down
47 changes: 47 additions & 0 deletions modules/local/count_features/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
process COUNT_FEATURES {
tag "$meta.id"
label "process_low"

/// must be using singularity 3.7+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-rtracklayer:1.60.0--r43ha9d7317_0' :
'https://depot.galaxyproject.org/singularity/bioconductor-rtracklayer:1.60.0--r43ha9d7317_0' }"

input:
tuple val(meta), path(abundance)
path(gtf)

output:
tuple val(meta), path("*.kallisto.customsummary.txt"), emit: kallisto_count_feature
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def sample = "${meta.sample}"
"""
count_features.R \\
--abundance ${abundance} \\
--gtf ${gtf} \\
--sample ${sample}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
R: \$(R --version | head -n1)
count_features.R: 0.0.1
END_VERSIONS
"""

stub:
def sample = meta.sample ?: ''
"""
touch ${sample}.kallisto.customsummary.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
R: \$(R --version | head -n1)
add_flags_and_cluster_information.R: 0.0.1
END_VERSIONS
"""
}
48 changes: 48 additions & 0 deletions modules/nf-core/subread/featurecounts/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 52 additions & 0 deletions modules/nf-core/subread/featurecounts/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions modules/nf-core/subread/featurecounts/subread-featurecounts.diff

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ params {
dedup_umi_for_kallisto = true
kallisto_fragment_len = 500
kallisto_fragment_sd = 150
// choose "htseq" or "featurecounts" or "htseq,featurecounts"
expression_quantifier = "featurecounts"

// MultiQC options
multiqc_config = null
Expand Down
33 changes: 26 additions & 7 deletions subworkflows/local/quantification.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
include { HTSEQ_COUNT } from '../../modules/local/htseq/count/main'
include { KALLISTO_QUANT } from '../../modules/nf-core/kallisto/quant/main'
include { HTSEQ_COUNT } from '../../modules/local/htseq/count/main'
include { KALLISTO_QUANT } from '../../modules/nf-core/kallisto/quant/main'
include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts/main'
include { COUNT_FEATURES } from '../../modules/local/count_features/main'


workflow QUANTIFICATION {
Expand All @@ -14,11 +16,19 @@ workflow QUANTIFICATION {

ch_versions = Channel.empty()


HTSEQ_COUNT(
bam.join(bai,by:[0]),
gtf
)
ch_versions = ch_versions.mix(HTSEQ_COUNT.out.versions)
ch_versions = ch_versions.mix(HTSEQ_COUNT.out.versions)


SUBREAD_FEATURECOUNTS(
bam,
gtf
)
ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions)

KALLISTO_QUANT(
reads,
Expand All @@ -28,9 +38,18 @@ workflow QUANTIFICATION {
)
ch_versions = ch_versions.mix(KALLISTO_QUANT.out.versions)

COUNT_FEATURES(
KALLISTO_QUANT.out.abundance,
gtf
)


emit:
htseq_counts = HTSEQ_COUNT.out.counts
htseq_summary = HTSEQ_COUNT.out.summary
kallisto_log = KALLISTO_QUANT.out.log
ch_versions = ch_versions
htseq_counts = HTSEQ_COUNT.out.counts
htseq_summary = HTSEQ_COUNT.out.summary
featurecounts_counts = SUBREAD_FEATURECOUNTS.out.counts
featurecounts_summary = SUBREAD_FEATURECOUNTS.out.summary
kallisto_log = KALLISTO_QUANT.out.log
kallisto_count_feature = COUNT_FEATURES.out.kallisto_count_feature
ch_versions = ch_versions
}
15 changes: 4 additions & 11 deletions workflows/forte.nf
Original file line number Diff line number Diff line change
Expand Up @@ -142,14 +142,10 @@ workflow FORTE {
ALIGN_READS.out.bam_dedup,
ALIGN_READS.out.bai_dedup,
QUANTIFICATION.out.kallisto_log
.mix(QUANTIFICATION.out.kallisto_count_feature)
.filter{meta, log ->
meta.has_umi && params.dedup_umi_for_kallisto
}.mix(ALIGN_READS.out.umitools_dedup_log)
.mix(
QUANTIFICATION.out.htseq_counts
.mix(QUANTIFICATION.out.htseq_summary)
.filter{ meta, htseq_file -> meta.has_umi }
),
}.mix(ALIGN_READS.out.umitools_dedup_log),
PREPARE_REFERENCES.out.refflat,
PREPARE_REFERENCES.out.rrna_interval_list,
PREPARE_REFERENCES.out.rseqc_bed,
Expand All @@ -163,13 +159,10 @@ workflow FORTE {
ALIGN_READS.out.bam_withdup,
ALIGN_READS.out.bai_withdup,
PREPROCESS_READS.out.fastp_json
.mix(
QUANTIFICATION.out.htseq_counts
.mix(QUANTIFICATION.out.htseq_summary)
.filter{ meta, htseq_file -> ! meta.has_umi }
).mix(ALIGN_READS.out.star_log_final)
.mix(ALIGN_READS.out.star_log_final)
.mix(
QUANTIFICATION.out.kallisto_log
.mix(QUANTIFICATION.out.kallisto_count_feature)
.filter{meta, log ->
! (meta.has_umi && params.dedup_umi_for_kallisto)
}
Expand Down

0 comments on commit 97fbb3b

Please # to comment.