Skip to content

Commit

Permalink
Fix a bug with soft clips in LeftAlignIndels (#6792)
Browse files Browse the repository at this point in the history
Fixes an "IllegalArgumentException: the range cannot contain negative indices" error in LeftAlignIndels

Resolves #6765
  • Loading branch information
davidbenjamin authored and mwalker174 committed Nov 3, 2020
1 parent 2d107f8 commit 69b4b3b
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -679,11 +679,16 @@ private static int lengthOnReference(final CigarElement element) {
* cigar will always mark the leftmost AT as deleted. If there is no indel in the original cigar or if the indel position
* is determined unambiguously (i.e. inserted/deleted sequence is not repeated), the original cigar is returned.
*
* Soft-clipped bases in the cigar are presumed to correspond to bases in the byte[] of read sequence. That is, this method
* assumes that inputs are precise about the distinction between hard clips (removed from the read sequence) and soft clips
* (kept in the read sequence but not aligned). For example, with the inputs {cigar: 2S2M2I, read sequence: TTAAAA, ref sequence: GGAA, read start: 2}
* the method lines up the AAAA (2M2I) of the read with the AA of the ref and left-aligns the indel to yield a cigar of
* 2S2I2M.
*
* @param cigar structure of the original alignment
* @param ref reference sequence the read is aligned to
* @param read read sequence
* @param readStart 0-based alignment start position on ref
* @param readStart 0-based position on ref of the first aligned base in the read sequence
* @return a non-null cigar, in which the indels are guaranteed to be placed at the leftmost possible position across a repeat (if any)
*/
public static CigarBuilder.Result leftAlignIndels(final Cigar cigar, final byte[] ref, final byte[] read, final int readStart) {
Expand Down Expand Up @@ -730,10 +735,12 @@ public static CigarBuilder.Result leftAlignIndels(final Cigar cigar, final byte[
if (emitIndel) { // some of this alignment block remains after left-alignment -- emit the indel
resultRightToLeft.add(new CigarElement(refIndelRange.size(), CigarOperator.DELETION));
resultRightToLeft.add(new CigarElement(readIndelRange.size(), CigarOperator.INSERTION));
refIndelRange.shiftEndLeft(refIndelRange.size()); // ref is empty and points to start of left-aligned indel
readIndelRange.shiftEndLeft(readIndelRange.size()); // read is empty and points to start of left-aligned indel
refIndelRange.shiftLeft(remainingBasesOnLeft + newMatchOnLeftDueToTrimming); // ref is empty and points to end of element preceding this match block
readIndelRange.shiftLeft(remainingBasesOnLeft + newMatchOnLeftDueToTrimming); // read is empty and points to end of element preceding this match block
refIndelRange.shiftEndLeft(refIndelRange.size()); // ref indel range is now empty and points to start of left-aligned indel
readIndelRange.shiftEndLeft(readIndelRange.size()); // read indel range is now empty and points to start of left-aligned indel

refIndelRange.shiftLeft(newMatchOnLeftDueToTrimming + (element.getOperator().consumesReferenceBases() ? remainingBasesOnLeft : 0));
readIndelRange.shiftLeft(newMatchOnLeftDueToTrimming + (element.getOperator().consumesReadBases() ? remainingBasesOnLeft : 0));
// now read and ref indel ranges are empty and point to end of element preceding this block
}
resultRightToLeft.add(new CigarElement(newMatchOnLeftDueToTrimming, CigarOperator.MATCH_OR_MISMATCH));
resultRightToLeft.add(new CigarElement(remainingBasesOnLeft, element.getOperator()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -786,24 +786,30 @@ public void testLeftAlignIndel(final String ref, final String read, final String
// given a read string and a reference string over the same context, test with different permutations of clipping
// and preceding/following reference bases
private void testWithClipsAndReferenceContext(final String refString, final String readString, final String originalCigar, final String expectedCigar) {
for (int leadingSoftClips : new int[] {0, 5}) {
for (int trailingSoftClips : new int[] {0, 5}) {
for (int extraRefInFront : new int[] {0, 10}) {
for (int extraRefInBack : new int[] {0, 10}) {
final byte[] readBases = new byte[readString.length() + leadingSoftClips + trailingSoftClips];
final byte[] refBases = new byte[refString.length() + leadingSoftClips + trailingSoftClips + extraRefInFront + extraRefInBack];
BaseUtils.fillWithRandomBases(readBases, 0, leadingSoftClips);
BaseUtils.fillWithRandomBases(readBases, leadingSoftClips + readString.length(), readBases.length);
System.arraycopy(readString.getBytes(), 0, readBases, leadingSoftClips, readString.length());

BaseUtils.fillWithRandomBases(refBases, 0, extraRefInFront + leadingSoftClips);
BaseUtils.fillWithRandomBases(refBases, extraRefInFront + leadingSoftClips + refString.length(), refBases.length);
System.arraycopy(refString.getBytes(), 0, refBases, extraRefInFront + leadingSoftClips, refString.length());
final String originalCigarWithClips = (leadingSoftClips > 0 ? leadingSoftClips + "S" : "") + originalCigar + (trailingSoftClips > 0 ? trailingSoftClips + "S" : "");
final String expectedCigarWithClips = (leadingSoftClips > 0 ? leadingSoftClips + "S" : "") + expectedCigar + (trailingSoftClips > 0 ? trailingSoftClips + "S" : "");

final Cigar result = AlignmentUtils.leftAlignIndels(TextCigarCodec.decode(originalCigarWithClips), refBases, readBases, extraRefInFront + leadingSoftClips).getCigar();
Assert.assertEquals(result.toString(), expectedCigarWithClips);
for (int leadingHardClips : new int[] {0, 5}) {
for (int trailingHardClips : new int[]{0, 5}) {
for (int leadingSoftClips : new int[]{0, 5}) {
for (int trailingSoftClips : new int[]{0, 5}) {
for (int extraRefInFront : new int[]{0, 10}) {
for (int extraRefInBack : new int[]{0, 10}) {
final byte[] readBases = new byte[readString.length() + leadingSoftClips + trailingSoftClips];
final byte[] refBases = new byte[refString.length() + extraRefInFront + extraRefInBack];
BaseUtils.fillWithRandomBases(readBases, 0, leadingSoftClips);
BaseUtils.fillWithRandomBases(readBases, leadingSoftClips + readString.length(), readBases.length);
System.arraycopy(readString.getBytes(), 0, readBases, leadingSoftClips, readString.length());

BaseUtils.fillWithRandomBases(refBases, 0, extraRefInFront);
BaseUtils.fillWithRandomBases(refBases, extraRefInFront + refString.length(), refBases.length);
System.arraycopy(refString.getBytes(), 0, refBases, extraRefInFront, refString.length());
final String originalCigarWithClips = (leadingHardClips > 0 ? leadingHardClips + "H" : "") + (leadingSoftClips > 0 ? leadingSoftClips + "S" : "")
+ originalCigar + (trailingSoftClips > 0 ? trailingSoftClips + "S" : "") + (trailingHardClips > 0 ? trailingHardClips + "H" : "");
final String expectedCigarWithClips = (leadingHardClips > 0 ? leadingHardClips + "H" : "") + (leadingSoftClips > 0 ? leadingSoftClips + "S" : "") +
expectedCigar + (trailingSoftClips > 0 ? trailingSoftClips + "S" : "") + (trailingHardClips > 0 ? trailingHardClips + "H" : "");

final Cigar result = AlignmentUtils.leftAlignIndels(TextCigarCodec.decode(originalCigarWithClips), refBases, readBases, extraRefInFront).getCigar();
Assert.assertEquals(result.toString(), expectedCigarWithClips);
}
}
}
}
}
Expand Down

0 comments on commit 69b4b3b

Please # to comment.