Skip to content

Commit

Permalink
PR comments - fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyasoifer committed Jun 25, 2022
1 parent 08f7a63 commit 89f425b
Show file tree
Hide file tree
Showing 11 changed files with 156 additions and 42 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@ report
jacoco.data
.gradle
build
*.log
*.swp
14 changes: 4 additions & 10 deletions src/main/java/picard/analysis/CollectQualityYieldMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.IOUtil;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
Expand Down Expand Up @@ -107,11 +106,7 @@ protected boolean usesNoRefReads() {
@Override
protected void setup(final SAMFileHeader header, final File samFile) {
IOUtil.assertFileIsWritable(OUTPUT);
if (FLOW_MODE) {
this.collector = new QualityYieldMetricsCollector(USE_ORIGINAL_QUALITIES, INCLUDE_SECONDARY_ALIGNMENTS, INCLUDE_SUPPLEMENTAL_ALIGNMENTS, true);
} else {
this.collector = new QualityYieldMetricsCollector(USE_ORIGINAL_QUALITIES, INCLUDE_SECONDARY_ALIGNMENTS, INCLUDE_SUPPLEMENTAL_ALIGNMENTS);
}
this.collector = new QualityYieldMetricsCollector(USE_ORIGINAL_QUALITIES, INCLUDE_SECONDARY_ALIGNMENTS, INCLUDE_SUPPLEMENTAL_ALIGNMENTS, FLOW_MODE);
}

@Override
Expand All @@ -138,7 +133,7 @@ public static class QualityYieldMetricsCollector {

// If true, include bases from supplemental alignments in metrics. Setting to true may cause double-counting
// of bases if there are supplemental alignments in the input file.
private final boolean includeSupplementalAlignments;
public final boolean includeSupplementalAlignments;

// If true collects RLQ25/RLQ30
private final boolean flowMode;
Expand Down Expand Up @@ -256,8 +251,8 @@ public QualityYieldMetricsFlow(final boolean useOriginalBaseQualities, final His
@Override
public void calculateDerivedFields() {
super.calculateDerivedFields();
this.READ_LENGTH_AVG_Q_ABOVE_25 = histogramGenerator.calculateLQ(25, 1,5);
this.READ_LENGTH_AVG_Q_ABOVE_30 = histogramGenerator.calculateLQ(30, 1,5);
this.READ_LENGTH_AVG_Q_ABOVE_25 = histogramGenerator.calculateLQ(25, 1, 5);
this.READ_LENGTH_AVG_Q_ABOVE_30 = histogramGenerator.calculateLQ(30, 1, 5);
}

@Override
Expand Down Expand Up @@ -285,7 +280,6 @@ public QualityYieldMetrics() {
this(false);
}


public QualityYieldMetrics(final boolean useOriginalQualities) {
super();
this.useOriginalQualities = useOriginalQualities;
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/picard/sam/DuplicationMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,9 @@ public static void main(String[] args) {
}
}

/**
* Adds duplicated read to the metrics
*/
public void addDuplicateReadToMetrics(final SAMRecord rec) {
// only update duplicate counts for "decider" reads, not tag-a-long reads
if (!rec.isSecondaryOrSupplementary() && !rec.getReadUnmappedFlag()) {
Expand All @@ -255,8 +258,10 @@ public void addDuplicateReadToMetrics(final SAMRecord rec) {
}
}

/**
* Adds a read to the metrics
*/
public void addReadToLibraryMetrics(final SAMRecord rec) {

// First bring the simple metrics up to date
if (rec.getReadUnmappedFlag()) {
++UNMAPPED_READS;
Expand Down
37 changes: 36 additions & 1 deletion src/main/java/picard/sam/DuplicationMetricsFactory.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,39 @@
/*
* The MIT License
*
* Copyright (c) 2022 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.sam;

/**
* Factory class that creates either regular or flow-based duplication metrics.
* Supports MarkDuplicates with --flowMode
*/
public class DuplicationMetricsFactory {

// create a DuplicationMetrics for a specific read group
/**
* Create empty regular of flow duplication metrics
* @param flowMetrics
* @return DuplicationMetrics
*/
public static DuplicationMetrics createMetrics(final boolean flowMetrics) {

// create based on the presence of flow order
Expand All @@ -13,6 +44,10 @@ public static DuplicationMetrics createMetrics(final boolean flowMetrics) {
}
}

/**
* Create non-flow duplication metrics
* @return
*/
public static DuplicationMetrics createMetrics() {
return new DuplicationMetrics();
}
Expand Down
29 changes: 27 additions & 2 deletions src/main/java/picard/sam/FlowBasedDuplicationMetrics.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,32 @@
/*
* The MIT License
*
* Copyright (c) 2022 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.sam;

import htsjdk.samtools.SAMRecord;
import picard.sam.markduplicates.util.AbstractMarkDuplicatesCommandLineProgram;
import picard.sam.markduplicates.util.ReadEndsForMarkDuplicates;
import picard.util.MathUtil;

public class FlowBasedDuplicationMetrics extends DuplicationMetrics {
Expand Down Expand Up @@ -53,7 +78,7 @@ public void addDuplicateReadToMetrics(final SAMRecord rec) {

if (!rec.isSecondaryOrSupplementary() && !rec.getReadUnmappedFlag()) {
if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
if ( AbstractMarkDuplicatesCommandLineProgram.isSingleEndReadKnownFragment(rec) ) {
if ( ReadEndsForMarkDuplicates.isSingleEndReadKnownFragment(rec) ) {
++UNPAIRED_DUPS_WITH_TLEN;
} else {
++UNPAIRED_DUPS_WITHOUT_TLEN;
Expand All @@ -66,7 +91,7 @@ public void addReadToLibraryMetrics(final SAMRecord rec) {

super.addReadToLibraryMetrics(rec);

if (AbstractMarkDuplicatesCommandLineProgram.isSingleEndReadKnownFragment(rec)) {
if (ReadEndsForMarkDuplicates.isSingleEndReadKnownFragment(rec)) {
++UNPAIRED_WITH_TLEN;
}
}
Expand Down
10 changes: 8 additions & 2 deletions src/main/java/picard/sam/markduplicates/MarkDuplicates.java
Original file line number Diff line number Diff line change
Expand Up @@ -645,13 +645,19 @@ private void buildSortedReadEndLists(final boolean useBarcodes) {
}

/**
* update score for pairedEnds
* Calculates score for the duplicate read
* @param rec - read
* @param pairedEnds - location of the read ends
* @return - read score calculated according to the DUPLICATE_SCORING_STRATEGY:
* SUM_OF_BASE_QUALITIES, (default)
* TOTAL_MAPPED_REFERENCE_LENGTH,
* RANDOM
*
*/
public short getReadDuplicateScore(final SAMRecord rec, final ReadEndsForMarkDuplicates pairedEnds) {
return DuplicateScoringStrategy.computeDuplicateScore(rec, this.DUPLICATE_SCORING_STRATEGY);
}


/**
* Builds a read ends object that represents a single read.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
/*
* The MIT License
*
* Copyright (c) 2009-2022 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.sam.markduplicates;

import com.google.common.annotations.VisibleForTesting;
Expand Down Expand Up @@ -51,9 +75,8 @@ public MarkDuplicatesForFlowHelper(final MarkDuplicates md) {
}

private void validateFlowParameteres() {

if ( md.flowBasedArguments.UNPAIRED_END_UNCERTAINTY != 0 && !md.flowBasedArguments.USE_END_IN_UNPAIRED_READS ) {
throw new IllegalArgumentException("invalid parameter combination. UNPAIRED_END_UNCERTAINTY can not be specified when USE_END_IN_UNPAIRED_READS not specified");
throw new IllegalArgumentException("Invalid parameter combination. UNPAIRED_END_UNCERTAINTY can not be specified when USE_END_IN_UNPAIRED_READS not specified");
}
}

Expand Down Expand Up @@ -91,7 +114,7 @@ public void generateDuplicateIndexes(final boolean useBarcodes, final boolean in

// this code does support pairs at this time
if ( md.pairSort.iterator().hasNext() ) {
throw new IllegalArgumentException("flow based code does not support paired reads");
throw new IllegalArgumentException("Flow based code does not support paired reads");
}
md.pairSort.cleanup();
md.pairSort = null;
Expand Down Expand Up @@ -334,7 +357,7 @@ protected static int getReadEndCoordinate(final SAMRecord rec, final boolean sta
: coor;
}

// "know end" case
// "known end" case
if (flowBasedArguments.FLOW_Q_IS_KNOWN_END ? isAdapterClipped(rec) : isAdapterClippedWithQ(rec)) {
return unclippedCoor;
}
Expand Down
24 changes: 24 additions & 0 deletions src/main/java/picard/sam/markduplicates/MarkDuplicatesHelper.java
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
/*
* The MIT License
*
* Copyright (c) 2009-2022 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.sam.markduplicates;

import htsjdk.samtools.SAMFileHeader;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,26 +217,6 @@ public static DuplicationMetrics addReadToLibraryMetrics(final SAMRecord rec, fi
return metrics;
}

/**
* This method is used to generate the following two metrics:
* UNPAIRED_DUPS_WITH_TLEN
* UNPAIRED_DUPS_WITHOUT_TLEN
*
* It will return true if and only if the read is single ended and the exact fragment length is
* known (i.e. it was not quality trimmed)
*/
public static boolean isSingleEndReadKnownFragment(final SAMRecord rec) {
if ( rec.getReadUnmappedFlag() || rec.getReadPairedFlag() ) {
return false;
} else if ( MarkDuplicatesForFlowHelper.isAdapterClipped(rec) ) {
return true;
} else if ( !rec.getReadNegativeStrandFlag() ) {
return rec.getEnd() != rec.getUnclippedEnd();
} else {
return rec.getStart() != rec.getUnclippedStart();
}
}

/**
* Little class used to package up a header and an iterable/iterator.
*/
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/picard/sam/markduplicates/util/ReadEnds.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ abstract public class ReadEnds extends PhysicalLocationShort {
public int read1ReferenceIndex = -1;
public int read1Coordinate = -1;
public int read2ReferenceIndex = -1;
public int read2Coordinate = -1; // this field is overloaded for flow based processing as the end coordinate (paired reads not supported)
public int read2Coordinate = -1; // This field is overloaded for flow based processing as the end coordinate of read 1. (paired reads not supported)

// Additional information used to detect optical dupes
public short readGroup = -1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@

package picard.sam.markduplicates.util;

import htsjdk.samtools.SAMRecord;
import picard.sam.markduplicates.MarkDuplicatesForFlowHelper;

/**
* Little struct-like class to hold read pair (and fragment) end data for MarkDuplicatesWithMateCigar
*
Expand Down Expand Up @@ -83,4 +86,24 @@ public ReadEndsForMarkDuplicates clone() {
return new ReadEndsForMarkDuplicates(this);
}

/**
* This method is used to generate the following two metrics:
* UNPAIRED_DUPS_WITH_TLEN
* UNPAIRED_DUPS_WITHOUT_TLEN
*
* It will return true if and only if the read is single ended and the exact fragment length is
* known (i.e. it was not quality trimmed)
*/
public static boolean isSingleEndReadKnownFragment(final SAMRecord rec) {
if ( rec.getReadUnmappedFlag() || rec.getReadPairedFlag() ) {
return false;
} else if ( MarkDuplicatesForFlowHelper.isAdapterClipped(rec) ) {
return true;
} else if ( !rec.getReadNegativeStrandFlag() ) {
return rec.getEnd() != rec.getUnclippedEnd();
} else {
return rec.getStart() != rec.getUnclippedStart();
}
}

}

0 comments on commit 89f425b

Please # to comment.