Skip to content

Estimated filtering fix #813

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 6 commits into from
Feb 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
3.2.1

* Changed a bug in `estimateReadFiltering` where the estimated number of filtered reads was typically too low.

3.2.0

* Added access in the Galaxy wrapper to the `--labels` option in most tools (issue #738)
Expand Down
2 changes: 1 addition & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,5 @@ jobs:
displayName: Installing deepTools
- script: |
source activate foo
./.planemo.sh
./.planemo.sh $TESTGALAXY
displayName: Run planemo
18 changes: 9 additions & 9 deletions deeptools/estimateReadFiltering.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,43 +329,43 @@ def main(args=None):
# nFiltered
metric = 0.0
if totals[idx] > 0:
metric = blacklisted[idx] + float(nFiltered[idx]) / float(totals[idx]) * nFiltered[idx]
of.write("\t{}".format(round(metric, 1)))
metric = blacklisted[idx] + float(nFiltered[idx]) / float(totals[idx]) * mapped[idx]
of.write("\t{}".format(min(round(metric, 1), mapped[idx])))
# MAPQ
metric = 0.0
if totals[idx] > 0:
metric = float(MAPQs[idx]) / float(totals[idx]) * mapped[idx]
of.write("\t{}".format(round(metric, 1)))
of.write("\t{}".format(min(round(metric, 1), mapped[idx])))
# samFlagInclude
metric = 0.0
if totals[idx] > 0:
metric = float(flagIncludes[idx]) / float(totals[idx]) * mapped[idx]
of.write("\t{}".format(round(metric, 1)))
of.write("\t{}".format(min(round(metric, 1), mapped[idx])))
# samFlagExclude
metric = 0.0
if totals[idx] > 0:
metric = float(flagExcludes[idx]) / float(totals[idx]) * mapped[idx]
of.write("\t{}".format(round(metric, 1)))
of.write("\t{}".format(min(round(metric, 1), mapped[idx])))
# Internally determined duplicates
metric = 0.0
if totals[idx] > 0:
metric = float(internalDupes[idx]) / float(totals[idx]) * mapped[idx]
of.write("\t{}".format(round(metric, 1)))
of.write("\t{}".format(min(round(metric, 1), mapped[idx])))
# Externally marked duplicates
metric = 0.0
if totals[idx] > 0:
metric = float(externalDupes[idx]) / float(totals[idx]) * mapped[idx]
of.write("\t{}".format(round(metric, 1)))
of.write("\t{}".format(min(round(metric, 1), mapped[idx])))
# Singletons
metric = 0.0
if totals[idx] > 0:
metric = float(singletons[idx]) / float(totals[idx]) * mapped[idx]
of.write("\t{}".format(round(metric, 1)))
of.write("\t{}".format(min(round(metric, 1), mapped[idx])))
# filterRNAstrand
metric = 0.0
if totals[idx] > 0:
metric = float(rnaStrand[idx]) / float(totals[idx]) * mapped[idx]
of.write("\t{}".format(round(metric, 1)))
of.write("\t{}".format(min(round(metric, 1), mapped[idx])))
of.write("\n")

if args.outFile is not None:
Expand Down
4 changes: 2 additions & 2 deletions deeptools/test/test_readFiltering.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_estimate_read_filtering_minimal():

def test_estimate_read_filtering_params():
"""
Minimal testing
--minMappingQuality 10 --samFlagExclude 512 --ignoreDuplicates -bl
"""
outfile = '/tmp/test_params.txt'
args = '-b {} --minMappingQuality 10 --samFlagExclude 512 --ignoreDuplicates -bl {} -o {}'.format(BAMFILE_FILTER, BEDFILE_FILTER, outfile).split()
Expand All @@ -48,7 +48,7 @@ def test_estimate_read_filtering_params():
_[0] = os.path.basename(_[0])
resp[1] = "\t".join(_)
expected = ['Sample\tTotal Reads\tMapped Reads\tAlignments in blacklisted regions\tEstimated mapped reads filtered\tBelow MAPQ\tMissing Flags\tExcluded Flags\tInternally-determined Duplicates\tMarked Duplicates\tSingletons\tWrong strand\n',
'test_filtering.bam\t193\t193\t7\t176.1\t41.4\t0.0\t186.5\t31.6\t0.0\t0.0\t0.0\n']
'test_filtering.bam\t193\t193\t7\t193\t41.4\t0.0\t186.5\t31.6\t0.0\t0.0\t0.0\n']
assert_equal(resp, expected)
unlink(outfile)

Expand Down
4 changes: 2 additions & 2 deletions galaxy/wrapper/bamCompare.xml
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@
<expand macro="skipNAs" />
<param argument="--skipZeroOverZero" type="select" label="Skip bins of no coverage"
help="Skip bins where BOTH files lack coverage.">
<option value="no" selected="true">No</option>
<option value="yes">Yes, skip them.</option>
<option value="" selected="true">No</option>
<option value="--skipZeroOverZero">Yes, skip them.</option>
</param>

<param argument="--ignoreForNormalization" type="text" value="" size="50"
Expand Down
14 changes: 2 additions & 12 deletions galaxy/wrapper/computeMatrix.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
--scoreFileName #echo ' '.join($files)#

--outFileName '$outFileName'
--samplesLabel #echo ' '.join($labels)#

@THREADS@

Expand Down Expand Up @@ -72,9 +73,6 @@
#if $advancedOpt.scale is not None and str($advancedOpt.scale) != '':
--scale $advancedOpt.scale
#end if
#if $advancedOpt.samplesLabel is not None and str($advancedOpt.samplesLabel) != '':
--samplesLabel $advancedOpt.samplesLabel
#end if

@ADVANCED_OPTS_GTF@

Expand All @@ -91,6 +89,7 @@
</repeat>

<expand macro="multiple_input_bigwigs" MIN="1" LABEL="Score file" TITLE="Score files"/>
<expand macro="custom_sample_labels" />

<conditional name="mode" >
<param name="mode_select" type="select"
Expand Down Expand Up @@ -207,15 +206,6 @@
bias the average values. (--maxThreshold)"/>
<param name="scale" type="float" optional="True" label="Scaling factor"
help="If set, all values are multiplied by this number. (--scale)"/>
<param argument="--samplesLabel" type="text" size="30"
label="Labels for the samples (each bigwig)"
help="The default is to use the history item label. The sample labels should be separated by
spaces and quoted if a label itself contains a space E.g. label-1 &quot;label 2&quot;">
<sanitizer>
<valid initial="string.printable">
</valid>
</sanitizer>
</param>

<expand macro="gtf_options" />

Expand Down
4 changes: 2 additions & 2 deletions galaxy/wrapper/deepTools_macros.xml
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ is vital to you, select Yes below.">

<token name="@multiple_input_bigwigs@">
<![CDATA[
#if $custom_labels_conditional.custom_labels_select == "Yes":
#if $custom_sample_labels_conditional.custom_labels_select == "Yes":
#set custom_labels=labels
#end if
#set files=[]
Expand All @@ -533,7 +533,7 @@ is vital to you, select Yes below.">
#silent $labels.append("'%s'" % $identifier)
#end for
#end if
#if $custom_labels_conditional.custom_labels_select == "Yes":
#if $custom_sample_labels_conditional.custom_labels_select == "Yes":
#set labels=custom_labels
#end if
]]>
Expand Down
4 changes: 2 additions & 2 deletions galaxy/wrapper/test-data/estimateReadFiltering.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Sample Total Reads Mapped Reads Alignments in blacklisted regions Estimated mapped reads filtered Below MAPQ Missing Flags Excluded Flags Internally-determined Duplicates Marked Duplicates Singletons Wrong strand
paired_chr2L.bam 12644 12589 0 1395.9 4149.0 0.0 0.0 0.0 0.0 55.0 0.0
paired_chr2L.bam 12644 12589 0 1395.9 4149.0 0.0 0.0 0.0 0.0 55.0 0.0
paired_chr2L.bam 12644 12589 0 4192.0 4149.0 0.0 0.0 0.0 0.0 55.0 0.0
paired_chr2L.bam 12644 12589 0 4192.0 4149.0 0.0 0.0 0.0 0.0 55.0 0.0