Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Picard modifications to support flow based sequencing #1813

Merged
merged 4 commits into from
Jun 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ report
jacoco.data
.gradle
build
*.swp
14 changes: 13 additions & 1 deletion src/main/java/picard/analysis/AlignmentSummaryMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,12 @@ public enum Category {UNPAIRED, FIRST_OF_PAIR, SECOND_OF_PAIR, PAIR}
/** The standard deviation of the read lengths. Computed using all read lengths including clipped bases. */
public double SD_READ_LENGTH;

/** The median read length. Computed using all read lengths including clipped bases. */
/**
* The median read length of the set of reads examined. When looking at the data for a single lane with
* equal length reads this number is just the read length. When looking at data for merged lanes with
* differing read lengths this is the median read length of all reads. Computed using all bases in reads,
* including clipped bases.
*/
public double MEDIAN_READ_LENGTH;

/**
Expand All @@ -155,6 +160,13 @@ public enum Category {UNPAIRED, FIRST_OF_PAIR, SECOND_OF_PAIR, PAIR}
/** The maximum read length. Computed using all read lengths including clipped bases. */
public double MAX_READ_LENGTH;

/**
* The mean aligned read length of the set of reads examined. When looking at the data for a single lane with
* equal length reads this number is just the read length. When looking at data for merged lanes with
* differing read lengths this is the mean read length of all reads. Clipped bases are not counted.
*/
public double MEAN_ALIGNED_READ_LENGTH;

/**
* The number of aligned reads whose mate pair was also aligned to the reference.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ public void finish() {
metrics.PCT_PF_READS_ALIGNED = MathUtil.divide(metrics.PF_READS_ALIGNED, (double) metrics.PF_READS);
metrics.PCT_READS_ALIGNED_IN_PAIRS = MathUtil.divide(metrics.READS_ALIGNED_IN_PAIRS, (double) metrics.PF_READS_ALIGNED);
metrics.PCT_PF_READS_IMPROPER_PAIRS = MathUtil.divide(metrics.PF_READS_IMPROPER_PAIRS, (double) metrics.PF_READS_ALIGNED);
metrics.MEAN_ALIGNED_READ_LENGTH = alignedReadLengthHistogram.getMean();
metrics.STRAND_BALANCE = MathUtil.divide(numPositiveStrand, (double) metrics.PF_READS_ALIGNED);
metrics.PCT_CHIMERAS = MathUtil.divide(chimeras, (double) chimerasDenominator);
metrics.PF_INDEL_RATE = MathUtil.divide(indels, (double) metrics.PF_ALIGNED_BASES);
Expand Down
108 changes: 100 additions & 8 deletions src/main/java/picard/analysis/CollectQualityYieldMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.IOUtil;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.util.help.HelpConstants;
Expand Down Expand Up @@ -92,6 +92,9 @@ public class CollectQualityYieldMetrics extends SinglePassSamProgram {
"of bases if there are supplemental alignments in the input file.")
public boolean INCLUDE_SUPPLEMENTAL_ALIGNMENTS = false;

@Argument(doc = "If true, calculates flow-specific READ_LENGTH_AVG_Q metrics.")
public boolean FLOW_MODE = false;

/**
* Ensure that we get all reads regardless of alignment status.
*/
Expand All @@ -103,7 +106,7 @@ protected boolean usesNoRefReads() {
@Override
protected void setup(final SAMFileHeader header, final File samFile) {
IOUtil.assertFileIsWritable(OUTPUT);
this.collector = new QualityYieldMetricsCollector(USE_ORIGINAL_QUALITIES, INCLUDE_SECONDARY_ALIGNMENTS, INCLUDE_SUPPLEMENTAL_ALIGNMENTS);
this.collector = new QualityYieldMetricsCollector(USE_ORIGINAL_QUALITIES, INCLUDE_SECONDARY_ALIGNMENTS, INCLUDE_SUPPLEMENTAL_ALIGNMENTS, FLOW_MODE);
}

@Override
Expand Down Expand Up @@ -132,15 +135,30 @@ public static class QualityYieldMetricsCollector {
// of bases if there are supplemental alignments in the input file.
public final boolean includeSupplementalAlignments;

// If true collects RLQ25/RLQ30
private final boolean flowMode;
// The metrics to be accumulated
private final QualityYieldMetrics metrics = new QualityYieldMetrics();
private final QualityYieldMetrics metrics;

public QualityYieldMetricsCollector(final boolean useOriginalQualities,
final boolean includeSecondaryAlignments,
final boolean includeSupplementalAlignments){
this(useOriginalQualities, includeSecondaryAlignments, includeSupplementalAlignments, false);
}

public QualityYieldMetricsCollector(final boolean useOriginalQualities,
final boolean includeSecondaryAlignments,
final boolean includeSupplementalAlignments) {
final boolean includeSupplementalAlignments,
final boolean flowMode) {
this.useOriginalQualities = useOriginalQualities;
this.includeSecondaryAlignments = includeSecondaryAlignments;
this.includeSupplementalAlignments = includeSupplementalAlignments;
this.flowMode = flowMode;
if (flowMode){
this.metrics = new QualityYieldMetricsFlow(useOriginalQualities);
} else {
this.metrics = new QualityYieldMetrics(useOriginalQualities);
}
}

public void acceptRecord(final SAMRecord rec, final ReferenceSequence ref) {
Expand Down Expand Up @@ -187,12 +205,15 @@ public void acceptRecord(final SAMRecord rec, final ReferenceSequence ref) {
}
}
}

if (flowMode) {
((QualityYieldMetricsFlow)metrics).addRecordToHistogramGenerator(rec);
}
}

public void finish() {
metrics.Q20_EQUIVALENT_YIELD = metrics.Q20_EQUIVALENT_YIELD / 20;
metrics.PF_Q20_EQUIVALENT_YIELD = metrics.PF_Q20_EQUIVALENT_YIELD / 20;

metrics.calculateDerivedFields();
}

Expand All @@ -201,12 +222,69 @@ public void addMetricsToFile(final MetricsFile<QualityYieldMetrics, Integer> met
}
}

public static class QualityYieldMetricsFlow extends QualityYieldMetrics{
/** The length of the longest interval on the reads where the average quality per-base is above (Q30) */
@NoMergingIsDerived
public long READ_LENGTH_AVG_Q_ABOVE_30 = 0;

/** The length of the longest interval on the reads where the average quality per-base is above (Q25) */
@NoMergingIsDerived
public long READ_LENGTH_AVG_Q_ABOVE_25 = 0;

@MergingIsManual
protected final HistogramGenerator histogramGenerator;

public QualityYieldMetricsFlow(){
this(false);
}

public QualityYieldMetricsFlow(final boolean useOriginalBaseQualities){

super(useOriginalBaseQualities);
histogramGenerator=new HistogramGenerator(useOriginalQualities);
}

public QualityYieldMetricsFlow(final boolean useOriginalBaseQualities, final HistogramGenerator hg) {
histogramGenerator=hg;
}

@Override
public void calculateDerivedFields() {
super.calculateDerivedFields();
this.READ_LENGTH_AVG_Q_ABOVE_25 = histogramGenerator.calculateLQ(25, 1, 5);
this.READ_LENGTH_AVG_Q_ABOVE_30 = histogramGenerator.calculateLQ(30, 1, 5);
}

@Override
public MergeableMetricBase merge(final MergeableMetricBase other) {
if (!(other instanceof QualityYieldMetricsFlow)){
throw new PicardException("Only objects of the same type can be merged");
}
this.histogramGenerator.addOtherHistogramGenerator(((QualityYieldMetricsFlow)other).histogramGenerator);
super.merge(other);
return this;
}

protected void addRecordToHistogramGenerator(final SAMRecord rec) {
histogramGenerator.addRecord(rec);
}

}
/**
* A set of metrics used to describe the general quality of a BAM file
*/
@DocumentedFeature(groupName = HelpConstants.DOC_CAT_METRICS, summary = HelpConstants.DOC_CAT_METRICS_SUMMARY)
public static class QualityYieldMetrics extends MergeableMetricBase {

public QualityYieldMetrics() {
this(false);
}

public QualityYieldMetrics(final boolean useOriginalQualities) {
super();
this.useOriginalQualities = useOriginalQualities;
}

/**
* The total number of reads in the input file
*/
Expand All @@ -220,7 +298,7 @@ public static class QualityYieldMetrics extends MergeableMetricBase {
public long PF_READS = 0;

/**
* The average read length of all the reads (will be fixed for a lane)
* The average read length of all the reads
*/
@NoMergingIsDerived
public int READ_LENGTH = 0;
Expand Down Expand Up @@ -273,12 +351,26 @@ public static class QualityYieldMetrics extends MergeableMetricBase {
@MergeByAdding
public long PF_Q20_EQUIVALENT_YIELD = 0;

@MergeByAssertEquals
protected final boolean useOriginalQualities;

@Override
public void calculateDerivedFields() {
super.calculateDerivedFields();

this.READ_LENGTH = this.TOTAL_READS == 0 ? 0 : (int) (this.TOTAL_BASES / this.TOTAL_READS);
}
}

@Override
public MergeableMetricBase merge(final MergeableMetricBase other) {
if (!(other instanceof QualityYieldMetrics)){
throw new PicardException("Only objects of the same type can be merged");
}

final QualityYieldMetrics otherMetric = (QualityYieldMetrics) other;

super.merge(otherMetric);
calculateDerivedFields();
return this;
}
}
}
Loading