From ba747da72b4b62d3212be367d169765025ee8c43 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Sat, 25 Jun 2022 22:45:18 +0300 Subject: [PATCH] PR comments - fixed --- .gitignore | 1 - .../analysis/CollectQualityYieldMetrics.java | 14 ++----- .../java/picard/sam/DuplicationMetrics.java | 7 +++- .../picard/sam/DuplicationMetricsFactory.java | 37 ++++++++++++++++++- .../sam/FlowBasedDuplicationMetrics.java | 29 ++++++++++++++- .../sam/markduplicates/MarkDuplicates.java | 10 ++++- .../MarkDuplicatesForFlowHelper.java | 31 ++++++++++++++-- .../markduplicates/MarkDuplicatesHelper.java | 24 ++++++++++++ ...tractMarkDuplicatesCommandLineProgram.java | 20 ---------- .../sam/markduplicates/util/ReadEnds.java | 2 +- .../util/ReadEndsForMarkDuplicates.java | 23 ++++++++++++ 11 files changed, 156 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index a0c69cb2f0..1444682d32 100644 --- a/.gitignore +++ b/.gitignore @@ -15,5 +15,4 @@ report jacoco.data .gradle build -*.log *.swp diff --git a/src/main/java/picard/analysis/CollectQualityYieldMetrics.java b/src/main/java/picard/analysis/CollectQualityYieldMetrics.java index 20d2662dde..83879a475a 100644 --- a/src/main/java/picard/analysis/CollectQualityYieldMetrics.java +++ b/src/main/java/picard/analysis/CollectQualityYieldMetrics.java @@ -29,7 +29,6 @@ import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.util.IOUtil; -import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; @@ -107,11 +106,7 @@ protected boolean usesNoRefReads() { @Override protected void setup(final SAMFileHeader header, final File samFile) { IOUtil.assertFileIsWritable(OUTPUT); - if (FLOW_MODE) { - this.collector = new QualityYieldMetricsCollector(USE_ORIGINAL_QUALITIES, INCLUDE_SECONDARY_ALIGNMENTS, INCLUDE_SUPPLEMENTAL_ALIGNMENTS, true); - } else { - this.collector = new QualityYieldMetricsCollector(USE_ORIGINAL_QUALITIES, INCLUDE_SECONDARY_ALIGNMENTS, INCLUDE_SUPPLEMENTAL_ALIGNMENTS); - } + this.collector = new QualityYieldMetricsCollector(USE_ORIGINAL_QUALITIES, INCLUDE_SECONDARY_ALIGNMENTS, INCLUDE_SUPPLEMENTAL_ALIGNMENTS, FLOW_MODE); } @Override @@ -138,7 +133,7 @@ public static class QualityYieldMetricsCollector { // If true, include bases from supplemental alignments in metrics. Setting to true may cause double-counting // of bases if there are supplemental alignments in the input file. - private final boolean includeSupplementalAlignments; + public final boolean includeSupplementalAlignments; // If true collects RLQ25/RLQ30 private final boolean flowMode; @@ -256,8 +251,8 @@ public QualityYieldMetricsFlow(final boolean useOriginalBaseQualities, final His @Override public void calculateDerivedFields() { super.calculateDerivedFields(); - this.READ_LENGTH_AVG_Q_ABOVE_25 = histogramGenerator.calculateLQ(25, 1,5); - this.READ_LENGTH_AVG_Q_ABOVE_30 = histogramGenerator.calculateLQ(30, 1,5); + this.READ_LENGTH_AVG_Q_ABOVE_25 = histogramGenerator.calculateLQ(25, 1, 5); + this.READ_LENGTH_AVG_Q_ABOVE_30 = histogramGenerator.calculateLQ(30, 1, 5); } @Override @@ -285,7 +280,6 @@ public QualityYieldMetrics() { this(false); } - public QualityYieldMetrics(final boolean useOriginalQualities) { super(); this.useOriginalQualities = useOriginalQualities; diff --git a/src/main/java/picard/sam/DuplicationMetrics.java b/src/main/java/picard/sam/DuplicationMetrics.java index 4840623002..7c05325cff 100644 --- a/src/main/java/picard/sam/DuplicationMetrics.java +++ b/src/main/java/picard/sam/DuplicationMetrics.java @@ -242,6 +242,9 @@ public static void main(String[] args) { } } + /** + * Adds duplicated read to the metrics + */ public void addDuplicateReadToMetrics(final SAMRecord rec) { // only update duplicate counts for "decider" reads, not tag-a-long reads if (!rec.isSecondaryOrSupplementary() && !rec.getReadUnmappedFlag()) { @@ -255,8 +258,10 @@ public void addDuplicateReadToMetrics(final SAMRecord rec) { } } + /** + * Adds a read to the metrics + */ public void addReadToLibraryMetrics(final SAMRecord rec) { - // First bring the simple metrics up to date if (rec.getReadUnmappedFlag()) { ++UNMAPPED_READS; diff --git a/src/main/java/picard/sam/DuplicationMetricsFactory.java b/src/main/java/picard/sam/DuplicationMetricsFactory.java index ad7299fb72..1b23c79c4b 100644 --- a/src/main/java/picard/sam/DuplicationMetricsFactory.java +++ b/src/main/java/picard/sam/DuplicationMetricsFactory.java @@ -1,8 +1,39 @@ +/* + * The MIT License + * + * Copyright (c) 2022 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ package picard.sam; +/** + * Factory class that creates either regular or flow-based duplication metrics. + * Supports MarkDuplicates with --flowMode + */ public class DuplicationMetricsFactory { - // create a DuplicationMetrics for a specific read group + /** + * Create empty regular of flow duplication metrics + * @param flowMetrics + * @return DuplicationMetrics + */ public static DuplicationMetrics createMetrics(final boolean flowMetrics) { // create based on the presence of flow order @@ -13,6 +44,10 @@ public static DuplicationMetrics createMetrics(final boolean flowMetrics) { } } + /** + * Create non-flow duplication metrics + * @return + */ public static DuplicationMetrics createMetrics() { return new DuplicationMetrics(); } diff --git a/src/main/java/picard/sam/FlowBasedDuplicationMetrics.java b/src/main/java/picard/sam/FlowBasedDuplicationMetrics.java index ed25731c07..e51b41e724 100644 --- a/src/main/java/picard/sam/FlowBasedDuplicationMetrics.java +++ b/src/main/java/picard/sam/FlowBasedDuplicationMetrics.java @@ -1,7 +1,32 @@ +/* + * The MIT License + * + * Copyright (c) 2022 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + package picard.sam; import htsjdk.samtools.SAMRecord; import picard.sam.markduplicates.util.AbstractMarkDuplicatesCommandLineProgram; +import picard.sam.markduplicates.util.ReadEndsForMarkDuplicates; import picard.util.MathUtil; public class FlowBasedDuplicationMetrics extends DuplicationMetrics { @@ -53,7 +78,7 @@ public void addDuplicateReadToMetrics(final SAMRecord rec) { if (!rec.isSecondaryOrSupplementary() && !rec.getReadUnmappedFlag()) { if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) { - if ( AbstractMarkDuplicatesCommandLineProgram.isSingleEndReadKnownFragment(rec) ) { + if ( ReadEndsForMarkDuplicates.isSingleEndReadKnownFragment(rec) ) { ++UNPAIRED_DUPS_WITH_TLEN; } else { ++UNPAIRED_DUPS_WITHOUT_TLEN; @@ -66,7 +91,7 @@ public void addReadToLibraryMetrics(final SAMRecord rec) { super.addReadToLibraryMetrics(rec); - if (AbstractMarkDuplicatesCommandLineProgram.isSingleEndReadKnownFragment(rec)) { + if (ReadEndsForMarkDuplicates.isSingleEndReadKnownFragment(rec)) { ++UNPAIRED_WITH_TLEN; } } diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java index 4953d1f91e..b5b52d133b 100644 --- a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java +++ b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java @@ -645,13 +645,19 @@ private void buildSortedReadEndLists(final boolean useBarcodes) { } /** - * update score for pairedEnds + * Calculates score for the duplicate read + * @param rec - read + * @param pairedEnds - location of the read ends + * @return - read score calculated according to the DUPLICATE_SCORING_STRATEGY: + * SUM_OF_BASE_QUALITIES, (default) + * TOTAL_MAPPED_REFERENCE_LENGTH, + * RANDOM + * */ public short getReadDuplicateScore(final SAMRecord rec, final ReadEndsForMarkDuplicates pairedEnds) { return DuplicateScoringStrategy.computeDuplicateScore(rec, this.DUPLICATE_SCORING_STRATEGY); } - /** * Builds a read ends object that represents a single read. */ diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicatesForFlowHelper.java b/src/main/java/picard/sam/markduplicates/MarkDuplicatesForFlowHelper.java index 0a26767a76..7a06380456 100644 --- a/src/main/java/picard/sam/markduplicates/MarkDuplicatesForFlowHelper.java +++ b/src/main/java/picard/sam/markduplicates/MarkDuplicatesForFlowHelper.java @@ -1,3 +1,27 @@ +/* + * The MIT License + * + * Copyright (c) 2009-2022 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + package picard.sam.markduplicates; import com.google.common.annotations.VisibleForTesting; @@ -51,9 +75,8 @@ public MarkDuplicatesForFlowHelper(final MarkDuplicates md) { } private void validateFlowParameteres() { - if ( md.flowBasedArguments.UNPAIRED_END_UNCERTAINTY != 0 && !md.flowBasedArguments.USE_END_IN_UNPAIRED_READS ) { - throw new IllegalArgumentException("invalid parameter combination. UNPAIRED_END_UNCERTAINTY can not be specified when USE_END_IN_UNPAIRED_READS not specified"); + throw new IllegalArgumentException("Invalid parameter combination. UNPAIRED_END_UNCERTAINTY can not be specified when USE_END_IN_UNPAIRED_READS not specified"); } } @@ -91,7 +114,7 @@ public void generateDuplicateIndexes(final boolean useBarcodes, final boolean in // this code does support pairs at this time if ( md.pairSort.iterator().hasNext() ) { - throw new IllegalArgumentException("flow based code does not support paired reads"); + throw new IllegalArgumentException("Flow based code does not support paired reads"); } md.pairSort.cleanup(); md.pairSort = null; @@ -334,7 +357,7 @@ protected static int getReadEndCoordinate(final SAMRecord rec, final boolean sta : coor; } - // "know end" case + // "known end" case if (flowBasedArguments.FLOW_Q_IS_KNOWN_END ? isAdapterClipped(rec) : isAdapterClippedWithQ(rec)) { return unclippedCoor; } diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicatesHelper.java b/src/main/java/picard/sam/markduplicates/MarkDuplicatesHelper.java index 18462c452e..16caa589eb 100644 --- a/src/main/java/picard/sam/markduplicates/MarkDuplicatesHelper.java +++ b/src/main/java/picard/sam/markduplicates/MarkDuplicatesHelper.java @@ -1,3 +1,27 @@ +/* + * The MIT License + * + * Copyright (c) 2009-2022 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + package picard.sam.markduplicates; import htsjdk.samtools.SAMFileHeader; diff --git a/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java b/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java index 8a839e9f79..c46bf9eb5a 100644 --- a/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java +++ b/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java @@ -217,26 +217,6 @@ public static DuplicationMetrics addReadToLibraryMetrics(final SAMRecord rec, fi return metrics; } - /** - * This method is used to generate the following two metrics: - * UNPAIRED_DUPS_WITH_TLEN - * UNPAIRED_DUPS_WITHOUT_TLEN - * - * It will return true if and only if the read is single ended and the exact fragment length is - * known (i.e. it was not quality trimmed) - */ - public static boolean isSingleEndReadKnownFragment(final SAMRecord rec) { - if ( rec.getReadUnmappedFlag() || rec.getReadPairedFlag() ) { - return false; - } else if ( MarkDuplicatesForFlowHelper.isAdapterClipped(rec) ) { - return true; - } else if ( !rec.getReadNegativeStrandFlag() ) { - return rec.getEnd() != rec.getUnclippedEnd(); - } else { - return rec.getStart() != rec.getUnclippedStart(); - } - } - /** * Little class used to package up a header and an iterable/iterator. */ diff --git a/src/main/java/picard/sam/markduplicates/util/ReadEnds.java b/src/main/java/picard/sam/markduplicates/util/ReadEnds.java index 0a2ff032a7..9d5dd3775b 100644 --- a/src/main/java/picard/sam/markduplicates/util/ReadEnds.java +++ b/src/main/java/picard/sam/markduplicates/util/ReadEnds.java @@ -35,7 +35,7 @@ abstract public class ReadEnds extends PhysicalLocationShort { public int read1ReferenceIndex = -1; public int read1Coordinate = -1; public int read2ReferenceIndex = -1; - public int read2Coordinate = -1; // this field is overloaded for flow based processing as the end coordinate (paired reads not supported) + public int read2Coordinate = -1; // This field is overloaded for flow based processing as the end coordinate of read 1. (paired reads not supported) // Additional information used to detect optical dupes public short readGroup = -1; diff --git a/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicates.java b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicates.java index 6760f6f9dc..d521af853c 100644 --- a/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicates.java +++ b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicates.java @@ -24,6 +24,9 @@ package picard.sam.markduplicates.util; +import htsjdk.samtools.SAMRecord; +import picard.sam.markduplicates.MarkDuplicatesForFlowHelper; + /** * Little struct-like class to hold read pair (and fragment) end data for MarkDuplicatesWithMateCigar * @@ -83,4 +86,24 @@ public ReadEndsForMarkDuplicates clone() { return new ReadEndsForMarkDuplicates(this); } + /** + * This method is used to generate the following two metrics: + * UNPAIRED_DUPS_WITH_TLEN + * UNPAIRED_DUPS_WITHOUT_TLEN + * + * It will return true if and only if the read is single ended and the exact fragment length is + * known (i.e. it was not quality trimmed) + */ + public static boolean isSingleEndReadKnownFragment(final SAMRecord rec) { + if ( rec.getReadUnmappedFlag() || rec.getReadPairedFlag() ) { + return false; + } else if ( MarkDuplicatesForFlowHelper.isAdapterClipped(rec) ) { + return true; + } else if ( !rec.getReadNegativeStrandFlag() ) { + return rec.getEnd() != rec.getUnclippedEnd(); + } else { + return rec.getStart() != rec.getUnclippedStart(); + } + } + } \ No newline at end of file