From 0d9440dab92ba857bda50afec2a00833a8bd4add Mon Sep 17 00:00:00 2001 From: Alexey Date: Sun, 27 Oct 2019 16:54:19 +0100 Subject: [PATCH 01/10] extract function --- source/stitchWindowAligns.cpp | 639 +++++++++++++++++++--------------- 1 file changed, 359 insertions(+), 280 deletions(-) diff --git a/source/stitchWindowAligns.cpp b/source/stitchWindowAligns.cpp index c8551bef..6f0f420c 100644 --- a/source/stitchWindowAligns.cpp +++ b/source/stitchWindowAligns.cpp @@ -5,306 +5,385 @@ #include #include -void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, uint tG2, Transcript trA, \ - uint Lread, uiWA* WA, char* R, Genome &mapGen, \ - Parameters& P, Transcript** wTr, uint* nWinTr, ReadAlign *RA) { - //recursively stitch aligns for one gene - //*nWinTr - number of transcripts for the current window - - if (iA>=nA && tR2==0) return; //no aligns in the transcript - if (iA>=nA) {//no more aligns to add, finalize the transcript - - //extend first - Transcript trAstep1; - - int vOrder[2]; //decide in which order to extend: extend the 5' of the read first - - #if EXTEND_ORDER==1 - if ( trA.roStr==0 ) {//decide in which order to extend: extend the 5' of the read first - vOrder[0]=0; vOrder[1]=1; - } else { - vOrder[0]=1; vOrder[1]=0; +void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, + Parameters &P, Genome &mapGen, char *R, uint Lread, + Transcript& trA, uint tG2, uint tR2, int Score) { + + // extend first + Transcript trAstep1; + + int vOrder[2]; // decide in which order to extend: extend the 5' of the read + // first + +#if EXTEND_ORDER == 1 + if (trA.roStr == 0) { // decide in which order to extend: extend the 5' of the read first + vOrder[0] = 0; + vOrder[1] = 1; + } else { + vOrder[0] = 1; + vOrder[1] = 0; + }; +#elif EXTEND_ORDER == 2 + vOrder[0] = 0; + vOrder[1] = 1; +#else +#error "EXTEND_ORDER value unrecognized" +#endif + + for (int iOrd = 0; iOrd < 2; iOrd++) { + + switch (vOrder[iOrd]) { + + case 0: // extend at start + + if (trA.rStart > + 0) { // if transcript does not start at base, extend to the read start + trAstep1.reset(); + uint imate = trA.exons[0][EX_iFrag]; + if (extendAlign(R, mapGen.G, trA.rStart - 1, trA.gStart - 1, -1, -1, + trA.rStart, tR2 - trA.rStart + 1, trA.nMM, + RA->outFilterMismatchNmaxTotal, + P.outFilterMismatchNoverLmax, + P.alignEndsType.ext[imate][(int)(trA.Str != imate)], + &trAstep1)) { // if could extend + + trA.add(&trAstep1); + Score += trAstep1.maxScore; + + trA.exons[0][EX_R] = trA.rStart = trA.rStart - trAstep1.extendL; + trA.exons[0][EX_G] = trA.gStart = trA.gStart - trAstep1.extendL; + trA.exons[0][EX_L] += trAstep1.extendL; }; - #elif EXTEND_ORDER==2 - vOrder[0]=0; vOrder[1]=1; - #else - #error "EXTEND_ORDER value unrecognized" - #endif - - for (int iOrd=0;iOrd<2;iOrd++) { - - switch (vOrder[iOrd]) { - - case 0: //extend at start - - if (trA.rStart>0) {// if transcript does not start at base, extend to the read start - trAstep1.reset(); - uint imate=trA.exons[0][EX_iFrag]; - if ( extendAlign(R, mapGen.G, trA.rStart-1, trA.gStart-1, -1, -1, trA.rStart, tR2-trA.rStart+1, \ - trA.nMM, RA->outFilterMismatchNmaxTotal, P.outFilterMismatchNoverLmax, \ - P.alignEndsType.ext[imate][(int)(trA.Str!=imate)], &trAstep1) ) {//if could extend - - trA.add(&trAstep1); - Score += trAstep1.maxScore; - - trA.exons[0][EX_R] = trA.rStart = trA.rStart - trAstep1.extendL; - trA.exons[0][EX_G] = trA.gStart = trA.gStart - trAstep1.extendL; - trA.exons[0][EX_L] += trAstep1.extendL; - - }; - //TODO penalize the unmapped bases at the start - }; - break; - - case 1: //extend at end - - if ( tR2outFilterMismatchNmaxTotal, P.outFilterMismatchNoverLmax, \ - P.alignEndsType.ext[imate][(int)(imate==trA.Str)], &trAstep1) ) {//if could extend - - trA.add(&trAstep1); - Score += trAstep1.maxScore; - - tR2 += trAstep1.extendL; - tG2 += trAstep1.extendL; - - trA.exons[trA.nExons-1][EX_L] += trAstep1.extendL;//extend the length of the last exon - - }; - //TODO penalize unmapped bases at the end - }; + // TODO penalize the unmapped bases at the start + }; + break; + + case 1: // extend at end + + if (tR2 < Lread) { // extend alignment to the read end + trAstep1.reset(); + uint imate = trA.exons[trA.nExons - 1][EX_iFrag]; + if (extendAlign(R, mapGen.G, tR2 + 1, tG2 + 1, +1, +1, Lread - tR2 - 1, + tR2 - trA.rStart + 1, trA.nMM, + RA->outFilterMismatchNmaxTotal, + P.outFilterMismatchNoverLmax, + P.alignEndsType.ext[imate][(int)(imate == trA.Str)], + &trAstep1)) { // if could extend + + trA.add(&trAstep1); + Score += trAstep1.maxScore; + + tR2 += trAstep1.extendL; + tG2 += trAstep1.extendL; + + trA.exons[trA.nExons - 1][EX_L] += + trAstep1.extendL; // extend the length of the last exon }; - }; - - if (!P.alignSoftClipAtReferenceEnds.yes && \ - ( (trA.exons[trA.nExons-1][EX_G] + Lread-trA.exons[trA.nExons-1][EX_R]) > (mapGen.chrStart[trA.Chr]+mapGen.chrLength[trA.Chr]) || \ - trA.exons[0][EX_G]<(mapGen.chrStart[trA.Chr]+trA.exons[0][EX_R]) ) ) { - return; //no soft clipping past the ends of the chromosome - }; - + // TODO penalize unmapped bases at the end + }; + }; + }; + + if (!P.alignSoftClipAtReferenceEnds.yes && + ((trA.exons[trA.nExons - 1][EX_G] + Lread - + trA.exons[trA.nExons - 1][EX_R]) > + (mapGen.chrStart[trA.Chr] + mapGen.chrLength[trA.Chr]) || + trA.exons[0][EX_G] < (mapGen.chrStart[trA.Chr] + trA.exons[0][EX_R]))) { + return; // no soft clipping past the ends of the chromosome + }; + + trA.rLength = 0; + for (uint isj = 0; isj < trA.nExons; isj++) { + trA.rLength += trA.exons[isj][EX_L]; + }; + trA.gLength = tG2 + 1 - trA.gStart; + + // check exons lengths including repeats, do not report a transcript with + // short exons + for (uint isj = 0; isj < trA.nExons - 1; + isj++) { // check exons for min length, if they are not annotated and + // precede a junction + if (trA.canonSJ[isj] >= 0) { // junction + if (trA.sjAnnot[isj] == 1) { // sjdb + if ((trA.exons[isj][EX_L] < P.alignSJDBoverhangMin && + (isj == 0 || trA.canonSJ[isj - 1] == -3 || + (trA.sjAnnot[isj - 1] == 0 && trA.canonSJ[isj - 1] >= 0))) || + (trA.exons[isj + 1][EX_L] < P.alignSJDBoverhangMin && + (isj == trA.nExons - 2 || trA.canonSJ[isj + 1] == -3 || + (trA.sjAnnot[isj + 1] == 0 && trA.canonSJ[isj + 1] >= 0)))) + return; + } else { // non-sjdb + if (trA.exons[isj][EX_L] < P.alignSJoverhangMin + trA.shiftSJ[isj][0] || + trA.exons[isj + 1][EX_L] < + P.alignSJoverhangMin + trA.shiftSJ[isj][1]) + return; + }; + }; + }; + if (trA.nExons > 1 && trA.sjAnnot[trA.nExons - 2] == 1 && + trA.exons[trA.nExons - 1][EX_L] < P.alignSJDBoverhangMin) + return; // this exon was not checkedin the cycle above + + // filter strand consistency + uint sjN = 0; + trA.intronMotifs[0] = 0; + trA.intronMotifs[1] = 0; + trA.intronMotifs[2] = 0; + trA.sjYes = false; + for (uint iex = 0; iex < trA.nExons - 1; iex++) { + if (trA.canonSJ[iex] >= 0) { // junctions - others are indels + sjN++; + trA.intronMotifs[trA.sjStr[iex]]++; + trA.sjYes = true; + }; + }; - trA.rLength = 0; - for (uint isj=0;isj=0 ) {//junction - if (trA.sjAnnot[isj]==1) {//sjdb - if ( ( trA.exons[isj][EX_L] < P.alignSJDBoverhangMin && (isj==0 || trA.canonSJ[isj-1]==-3 || (trA.sjAnnot[isj-1]==0 && trA.canonSJ[isj-1]>=0) ) )\ - || ( trA.exons[isj+1][EX_L] < P.alignSJDBoverhangMin && (isj==trA.nExons-2 || trA.canonSJ[isj+1]==-3 || (trA.sjAnnot[isj+1]==0 && trA.canonSJ[isj+1]>=0) ) ) )return; - } else {//non-sjdb - if ( trA.exons[isj][EX_L] < P.alignSJoverhangMin + trA.shiftSJ[isj][0] \ - || trA.exons[isj+1][EX_L] < P.alignSJoverhangMin + trA.shiftSJ[isj][1] ) return; - }; - }; - }; - if (trA.nExons>1 && trA.sjAnnot[trA.nExons-2]==1 && trA.exons[trA.nExons-1][EX_L] < P.alignSJDBoverhangMin) return; //this exon was not checkedin the cycle above - - //filter strand consistency - uint sjN=0; - trA.intronMotifs[0]=0;trA.intronMotifs[1]=0;trA.intronMotifs[2]=0; - trA.sjYes=false; - for (uint iex=0;iex=0) - {//junctions - others are indels - sjN++; - trA.intronMotifs[trA.sjStr[iex]]++; - trA.sjYes=true; - }; - }; + if (trA.intronMotifs[1] > 0 && trA.intronMotifs[2] == 0) + trA.sjMotifStrand = 1; + else if (trA.intronMotifs[1] == 0 && trA.intronMotifs[2] > 0) + trA.sjMotifStrand = 2; + else + trA.sjMotifStrand = 0; - if (trA.intronMotifs[1]>0 && trA.intronMotifs[2]==0) - trA.sjMotifStrand=1; - else if (trA.intronMotifs[1]==0 && trA.intronMotifs[2]>0) - trA.sjMotifStrand=2; - else - trA.sjMotifStrand=0; + if (trA.intronMotifs[1] > 0 && trA.intronMotifs[2] > 0 && + P.outFilterIntronStrands == "RemoveInconsistentStrands") + return; - if (trA.intronMotifs[1]>0 && trA.intronMotifs[2]>0 && P.outFilterIntronStrands=="RemoveInconsistentStrands") - return; + if (sjN > 0 && trA.sjMotifStrand == 0 && + P.outSAMstrandField.type == 1) { // strand not defined for a junction + return; + }; - if (sjN>0 && trA.sjMotifStrand==0 && P.outSAMstrandField.type==1) {//strand not defined for a junction - return; - }; + if (P.outFilterIntronMotifs == "None") { // no filtering - if (P.outFilterIntronMotifs=="None") {//no filtering - - } else if (P.outFilterIntronMotifs=="RemoveNoncanonical") { - for (uint iex=0;iexlogMain, EXIT_CODE_INPUT_FILES, P); + } else if (P.outFilterIntronMotifs == "RemoveNoncanonical") { + for (uint iex = 0; iex < trA.nExons - 1; iex++) { + if (trA.canonSJ[iex] == 0) + return; + }; + } else if (P.outFilterIntronMotifs == "RemoveNoncanonicalUnannotated") { + for (uint iex = 0; iex < trA.nExons - 1; iex++) { + if (trA.canonSJ[iex] == 0 && trA.sjAnnot[iex] == 0) + return; + }; + } else { + ostringstream errOut; + errOut << "EXITING because of FATAL INPUT error: unrecognized value of " + "--outFilterIntronMotifs=" + << P.outFilterIntronMotifs << "\n"; + errOut << "SOLUTION: re-run STAR with --outFilterIntronMotifs = None -OR- " + "RemoveNoncanonical -OR- RemoveNoncanonicalUnannotated\n"; + exitWithError(errOut.str(), std::cerr, P.inOut->logMain, + EXIT_CODE_INPUT_FILES, P); + }; + + { // check mapped length for each mate + uint nsj = 0, exl = 0; + for (uint iex = 0; iex < trA.nExons; iex++) { // + exl += trA.exons[iex][EX_L]; + if (iex == trA.nExons - 1 || + trA.canonSJ[iex] == -3) { // mate is completed, make the checks + if (nsj > 0 && + (exl < P.alignSplicedMateMapLmin || + exl < (uint)(P.alignSplicedMateMapLminOverLmate * + RA->readLength[trA.exons[iex][EX_iFrag]]))) { + return; // do not record this transcript }; + exl = 0; + nsj = 0; + } else if (trA.canonSJ[iex] >= 0) { + nsj++; + }; + }; + }; + + if (P.outFilterBySJoutStage == 2) { // junctions have to be present in the filtered set P.sjnovel + for (uint iex = 0; iex < trA.nExons - 1; iex++) { + if (trA.canonSJ[iex] >= 0 && trA.sjAnnot[iex] == 0) { + uint jS = trA.exons[iex][EX_G] + trA.exons[iex][EX_L]; + uint jE = trA.exons[iex + 1][EX_G] - 1; + if (binarySearch2(jS, jE, P.sjNovelStart, P.sjNovelEnd, P.sjNovelN) < 0) + return; + }; + }; + }; + + if (trA.exons[0][EX_iFrag] != + trA.exons[trA.nExons - 1] + [EX_iFrag]) { // check for correct overlap between mates + if (trA.exons[trA.nExons - 1][EX_G] + trA.exons[trA.nExons - 1][EX_L] <= + trA.exons[0][EX_G]) + return; // to avoid negative insert size + uint iexM2 = trA.nExons; + for (uint iex = 0; iex < trA.nExons - 1; + iex++) { // find the first exon of the second mate + if (trA.canonSJ[iex] == -3) { // + iexM2 = iex + 1; + break; + }; + }; - {//check mapped length for each mate - uint nsj=0,exl=0; - for (uint iex=0;iex0 && (exlreadLength[trA.exons[iex][EX_iFrag]])) ) { - return; //do not record this transcript - }; - exl=0;nsj=0; - } else if (trA.canonSJ[iex]>=0) { - nsj++; - }; - }; + if (trA.exons[iexM2 - 1][EX_G] + trA.exons[iexM2 - 1][EX_L] > + trA.exons[iexM2] + [EX_G]) { // mates overlap - check consistency of junctions + + if (trA.exons[0][EX_G] > trA.exons[iexM2][EX_G] + trA.exons[0][EX_R] + + P.alignEndsProtrude.nBasesMax) + return; // LeftMateStart > RightMateStart + allowance + if (trA.exons[iexM2 - 1][EX_G] + trA.exons[iexM2 - 1][EX_L] > + trA.exons[trA.nExons - 1][EX_G] + Lread - + trA.exons[trA.nExons - 1][EX_R] + P.alignEndsProtrude.nBasesMax) + return; // LeftMateEnd > RightMateEnd +allowance + + // check for junctions consistency + uint iex1 = 1, iex2 = iexM2 + 1; // last exons of the junction + for (; iex1 < iexM2; + iex1++) { // find first junction that overlaps 2nd mate + if (trA.exons[iex1][EX_G] >= + trA.exons[iex2 - 1][EX_G] + trA.exons[iex2 - 1][EX_L]) + break; + }; + while (iex1 < iexM2 && + iex2 < trA.nExons) { // cycle through all overlapping exons + if (trA.canonSJ[iex1 - 1] < 0) { // skip non-junctions + iex1++; + continue; }; - - if (P.outFilterBySJoutStage==2) {//junctions have to be present in the filtered set P.sjnovel - for (uint iex=0;iex=0 && trA.sjAnnot[iex]==0) { - uint jS=trA.exons[iex][EX_G]+trA.exons[iex][EX_L]; - uint jE=trA.exons[iex+1][EX_G]-1; - if ( binarySearch2(jS,jE,P.sjNovelStart,P.sjNovelEnd,P.sjNovelN) < 0 ) return; - }; - }; + if (trA.canonSJ[iex2 - 1] < 0) { // skip non-junctions + iex2++; + continue; }; - if ( trA.exons[0][EX_iFrag]!=trA.exons[trA.nExons-1][EX_iFrag] ) {//check for correct overlap between mates - if (trA.exons[trA.nExons-1][EX_G]+trA.exons[trA.nExons-1][EX_L] <= trA.exons[0][EX_G]) return; //to avoid negative insert size - uint iexM2=trA.nExons; - for (uint iex=0;iex trA.exons[iexM2][EX_G] ) {//mates overlap - check consistency of junctions - - if (trA.exons[0][EX_G] > \ - trA.exons[iexM2][EX_G]+trA.exons[0][EX_R]+P.alignEndsProtrude.nBasesMax) return; //LeftMateStart > RightMateStart + allowance - if (trA.exons[iexM2-1][EX_G]+trA.exons[iexM2-1][EX_L] > \ - trA.exons[trA.nExons-1][EX_G]+Lread-trA.exons[trA.nExons-1][EX_R]+P.alignEndsProtrude.nBasesMax) return; //LeftMateEnd > RightMateEnd +allowance - - //check for junctions consistency - uint iex1=1, iex2=iexM2+1; //last exons of the junction - for (; iex1= trA.exons[iex2-1][EX_G] + trA.exons[iex2-1][EX_L]) break; - }; - while (iex1maxScoreMate[trA.iFrag] = max(RA->maxScoreMate[trA.iFrag], Score); + } else { + trA.iFrag = -1; + }; + + // Variation + Score += trA.variationAdjust(mapGen, R); + + trA.maxScore = Score; + + // transcript has been finalized, compare the score and record + if (Score + P.outFilterMultimapScoreRange >= wTr[0]->maxScore || + (trA.iFrag >= 0 && + Score + P.outFilterMultimapScoreRange >= RA->maxScoreMate[trA.iFrag]) || + P.pCh.segmentMin > 0) { + // only record the transcripts within the window that are in the Score range + // OR within the score range of each mate + // OR all transcript if chimeric detection is activated + + // if (P.alignEndsType.in=="EndToEnd") {//check that the + // alignment is end-to-end + // uint rTotal=trA.rLength+trA.lIns; + // // for (uint iex=1;iexreadLength[0]+RA->readLength[1])) || + // (trA.iFrag>=0 && rTotalreadLength[trA.iFrag])) + // return; + // }; + + uint iTr = 0; // transcript insertion/replacement place + + trA.mappedLength = 0; + for (uint iex = 0; iex < trA.nExons; iex++) { // caclulate total mapped length + trA.mappedLength += trA.exons[iex][EX_L]; + }; - //calculate some final values for the transcript + while (iTr < *nWinTr) { // scan through all recorded transcripts for this + // window - check for duplicates + + // another way to calculate uOld, uNew: w/o gMap + uint nOverlap = blocksOverlap(trA, *wTr[iTr]); + uint uNew = trA.mappedLength - nOverlap; + uint uOld = wTr[iTr]->mappedLength - nOverlap; + + if (uNew == 0 && Score < wTr[iTr]->maxScore) { // new transript is a subset of the old ones + break; + } else if (uOld == 0) { // old transcript is a subset of the new one, + // remove old transcript + Transcript *pTr = wTr[iTr]; + for (uint ii = iTr + 1; ii < *nWinTr; ii++) + wTr[ii - 1] = wTr[ii]; // shift transcripts + (*nWinTr)--; + wTr[*nWinTr] = pTr; + } else if (uOld > 0 && + (uNew > 0 || + Score >= wTr[iTr]->maxScore)) { // check next transcript + iTr++; + }; + }; - trA.roStart = (trA.roStr == 0) ? trA.rStart : Lread - trA.rStart - trA.rLength; - trA.maxScore=Score; + if (iTr == *nWinTr) { // insert the new transcript + for (iTr = 0; iTr < *nWinTr; iTr++) { // find inseriton location + if (Score > wTr[iTr]->maxScore || + (Score == wTr[iTr]->maxScore && trA.gLength < wTr[iTr]->gLength)) + break; + }; + + Transcript *pTr = wTr[*nWinTr]; + for (int ii = *nWinTr; ii > int(iTr); ii--) { // shift all the transcript pointers down from iTr + wTr[ii] = wTr[ii - 1]; + }; + wTr[iTr] = pTr; // the new transcript pointer is now at *nWinTr+1, move it + // into the iTr + *(wTr[iTr]) = trA; + if (*nWinTr < P.alignTranscriptsPerWindowNmax) { + (*nWinTr)++; // increment number of transcripts per window; + } else { + //"WARNING: too many recorded transcripts per window: + // iRead="<iRead<< "\n"; + }; + }; + }; - if (trA.exons[0][EX_iFrag]==trA.exons[trA.nExons-1][EX_iFrag]) {//mark single fragment transcripts - trA.iFrag=trA.exons[0][EX_iFrag]; - RA->maxScoreMate[trA.iFrag] = max (RA->maxScoreMate[trA.iFrag] , Score); - } else { - trA.iFrag=-1; - }; + return; +} - //Variation - Score+=trA.variationAdjust(mapGen, R); - - trA.maxScore=Score; - - // transcript has been finalized, compare the score and record - if ( Score+P.outFilterMultimapScoreRange >= wTr[0]->maxScore \ - || ( trA.iFrag>=0 && Score+P.outFilterMultimapScoreRange >= RA->maxScoreMate[trA.iFrag] ) \ - || P.pCh.segmentMin>0) { - //only record the transcripts within the window that are in the Score range - //OR within the score range of each mate - //OR all transcript if chimeric detection is activated - -// if (P.alignEndsType.in=="EndToEnd") {//check that the alignment is end-to-end -// uint rTotal=trA.rLength+trA.lIns; -// // for (uint iex=1;iexreadLength[0]+RA->readLength[1])) || (trA.iFrag>=0 && rTotalreadLength[trA.iFrag])) return; -// }; - - uint iTr=0; //transcript insertion/replacement place - - trA.mappedLength=0; - for (uint iex=0;iexmappedLength-nOverlap; - - if (uNew==0 && Score < wTr[iTr]->maxScore) {//new transript is a subset of the old ones - break; - } else if (uOld==0) {//old transcript is a subset of the new one, remove old transcript - Transcript *pTr=wTr[iTr]; - for (uint ii=iTr+1;ii<*nWinTr;ii++) wTr[ii-1]=wTr[ii]; //shift transcripts - (*nWinTr)--; - wTr[*nWinTr]=pTr; - } else if (uOld>0 && (uNew>0 || Score >= wTr[iTr]->maxScore) ) {//check next transcript - iTr++; - }; - - }; - - if (iTr==*nWinTr) {//insert the new transcript - for (iTr=0;iTr<*nWinTr;iTr++) {//find inseriton location - if (Score>wTr[iTr]->maxScore || (Score==wTr[iTr]->maxScore && trA.gLengthgLength) ) break; - }; - - Transcript *pTr=wTr[*nWinTr]; - for (int ii=*nWinTr; ii> int(iTr); ii--) {//shift all the transcript pointers down from iTr - wTr[ii]=wTr[ii-1]; - }; - wTr[iTr]=pTr; //the new transcript pointer is now at *nWinTr+1, move it into the iTr - *(wTr[iTr])=trA; - if (*nWinTr=nA && tR2==0) return; //no aligns in the transcript - return; + if (iA>=nA) {//no more aligns to add, finalize the transcript + finalizeTranscript(RA, wTr, nWinTr, P, mapGen, R, Lread, trA, tG2, tR2, Score); + return; }; /////////////////////////////////////////////////////////////////////////////////// From cc69f344c0cb67030bf1710dddc4d4fadfcb5b2f Mon Sep 17 00:00:00 2001 From: Alexey Date: Mon, 28 Oct 2019 10:30:18 +0100 Subject: [PATCH 02/10] add const --- source/extendAlign.cpp | 2 +- source/extendAlign.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/extendAlign.cpp b/source/extendAlign.cpp index c3a61444..c24ca93d 100644 --- a/source/extendAlign.cpp +++ b/source/extendAlign.cpp @@ -3,7 +3,7 @@ #include "Transcript.h" #include "extendAlign.h" -bool extendAlign( char* R, char* G, uint rStart, uint gStart, int dR, int dG, uint L, uint Lprev, uint nMMprev, uint nMMmax, double pMMmax, bool extendToEnd, Transcript* trA ) { +bool extendAlign(const char* R, const char* G, uint rStart, uint gStart, int dR, int dG, uint L, uint Lprev, uint nMMprev, uint nMMmax, double pMMmax, bool extendToEnd, Transcript* trA ) { // find the maximum score diff --git a/source/extendAlign.h b/source/extendAlign.h index 1d07a194..0ac45ed5 100644 --- a/source/extendAlign.h +++ b/source/extendAlign.h @@ -2,5 +2,5 @@ #include "Parameters.h" #include "Transcript.h" -bool extendAlign( char* R, char* G, uint rStart, uint gStart, int dR, int dG, uint L, uint Lprev, uint nMMprev, uint nMMmax, double pMMmax, bool extendToEnd, Transcript* trA ); +bool extendAlign(const char* R, const char* G, uint rStart, uint gStart, int dR, int dG, uint L, uint Lprev, uint nMMprev, uint nMMmax, double pMMmax, bool extendToEnd, Transcript* trA ); From 910d7bd1ac2d175644cda13eab29939cce73baad Mon Sep 17 00:00:00 2001 From: Alexey Date: Mon, 28 Oct 2019 11:15:56 +0100 Subject: [PATCH 03/10] add const --- source/stitchAlignToTranscript.cpp | 5 ++++- source/stitchAlignToTranscript.h | 9 ++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/source/stitchAlignToTranscript.cpp b/source/stitchAlignToTranscript.cpp index 84bb427a..49c67a21 100644 --- a/source/stitchAlignToTranscript.cpp +++ b/source/stitchAlignToTranscript.cpp @@ -6,7 +6,10 @@ // #include "stitchGapIndel.cpp" -intScore stitchAlignToTranscript(uint rAend, uint gAend, uint rBstart, uint gBstart, uint L, uint iFragB, uint sjAB, Parameters& P, char* R, Genome &mapGen, Transcript *trA, const uint outFilterMismatchNmaxTotal) { +intScore stitchAlignToTranscript(uint rAend, uint gAend, uint rBstart, + uint gBstart, uint L, uint iFragB, uint sjAB, + const Parameters& P, char* R, Genome &mapGen, + Transcript *trA, const uint outFilterMismatchNmaxTotal) { //stitch together A and B, extend in the gap, returns max score if (trA->nExons>=MAX_N_EXONS) diff --git a/source/stitchAlignToTranscript.h b/source/stitchAlignToTranscript.h index 0365d9c7..4df5864b 100644 --- a/source/stitchAlignToTranscript.h +++ b/source/stitchAlignToTranscript.h @@ -1,7 +1,10 @@ +#include "Genome.h" #include "IncludeDefine.h" #include "Parameters.h" #include "Transcript.h" -#include "Genome.h" - -intScore stitchAlignToTranscript(uint rAend, uint gAend, uint rBstart, uint gBstart, uint L, uint iFragB, uint sjAB, Parameters& P, char* R, Genome &mapGen, Transcript *trA, uint outFilterMismatchNmaxTotal); +intScore stitchAlignToTranscript(uint rAend, uint gAend, uint rBstart, + uint gBstart, uint L, uint iFragB, uint sjAB, + const Parameters &P, const char *R, + const Genome &mapGen, Transcript *trA, + uint outFilterMismatchNmaxTotal); From be82e127a630c32be9b9b6aa20846cf0c2879118 Mon Sep 17 00:00:00 2001 From: Alexey Date: Fri, 1 Nov 2019 23:22:43 +0100 Subject: [PATCH 04/10] update const declarations --- source/ErrorWarning.cpp | 4 ++-- source/ErrorWarning.h | 2 +- source/Transcript.h | 2 +- source/Transcript_variationAdjust.cpp | 2 +- source/stitchAlignToTranscript.cpp | 2 +- source/stitchWindowAligns.cpp | 10 +++++----- source/stitchWindowAligns.h | 13 +++++++------ 7 files changed, 18 insertions(+), 17 deletions(-) diff --git a/source/ErrorWarning.cpp b/source/ErrorWarning.cpp index a23d1949..e4a9f7fc 100644 --- a/source/ErrorWarning.cpp +++ b/source/ErrorWarning.cpp @@ -5,7 +5,7 @@ #include "TimeFunctions.h" #include "GlobalVariables.h" -void exitWithError(string messageOut, ostream &streamOut1, ostream &streamOut2, int errorInt, Parameters &P) { +void exitWithError(string messageOut, ostream &streamOut1, ostream &streamOut2, int errorInt, const Parameters &P) { time_t timeCurrent; time( &timeCurrent); if (P.runThreadN>1) pthread_mutex_lock(&g_threadChunks.mutexError); @@ -30,4 +30,4 @@ void warningMessage(string messageOut, ostream &streamOut1, ostream &streamOut2, if (streamOut2.good()) { streamOut2 << "WARNING: " << messageOut << "\t\t " << timeMonthDayTime(timeCurrent) <> &sjOut, bool &annotYes); diff --git a/source/Transcript_variationAdjust.cpp b/source/Transcript_variationAdjust.cpp index 0e569593..7f7716cf 100644 --- a/source/Transcript_variationAdjust.cpp +++ b/source/Transcript_variationAdjust.cpp @@ -1,7 +1,7 @@ #include "Transcript.h" #include "serviceFuns.cpp" -int Transcript::variationAdjust(const Genome &mapGen, char *R) +int Transcript::variationAdjust(const Genome &mapGen, const char *R) { Variation &Var=*mapGen.Var; diff --git a/source/stitchAlignToTranscript.cpp b/source/stitchAlignToTranscript.cpp index 49c67a21..c1bd08f7 100644 --- a/source/stitchAlignToTranscript.cpp +++ b/source/stitchAlignToTranscript.cpp @@ -8,7 +8,7 @@ intScore stitchAlignToTranscript(uint rAend, uint gAend, uint rBstart, uint gBstart, uint L, uint iFragB, uint sjAB, - const Parameters& P, char* R, Genome &mapGen, + const Parameters& P, const char* R, const Genome &mapGen, Transcript *trA, const uint outFilterMismatchNmaxTotal) { //stitch together A and B, extend in the gap, returns max score diff --git a/source/stitchWindowAligns.cpp b/source/stitchWindowAligns.cpp index 6f0f420c..b083d359 100644 --- a/source/stitchWindowAligns.cpp +++ b/source/stitchWindowAligns.cpp @@ -7,8 +7,8 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, - Parameters &P, Genome &mapGen, char *R, uint Lread, - Transcript& trA, uint tG2, uint tR2, int Score) { + const Parameters &P, const Genome &mapGen, const char *R, uint Lread, + Transcript trA, uint tG2, uint tR2, int Score) { // extend first Transcript trAstep1; @@ -373,9 +373,9 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, return; } -void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, uint tG2, Transcript trA, \ - uint Lread, uiWA* WA, char* R, Genome &mapGen, \ - Parameters& P, Transcript** wTr, uint* nWinTr, ReadAlign *RA) { +void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, uint tG2, const Transcript& trA, + uint Lread, uiWA* WA, char* R, const Genome &mapGen, + const Parameters& P, Transcript** wTr, uint* nWinTr, ReadAlign *RA) { //recursively stitch aligns for one gene //*nWinTr - number of transcripts for the current window diff --git a/source/stitchWindowAligns.h b/source/stitchWindowAligns.h index 0091411c..07193fc4 100644 --- a/source/stitchWindowAligns.h +++ b/source/stitchWindowAligns.h @@ -1,12 +1,13 @@ #include "IncludeDefine.h" #include "Parameters.h" +#include "ReadAlign.h" #include "Transcript.h" #include "extendAlign.h" #include "stitchAlignToTranscript.h" -#include "ReadAlign.h" -void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, uint tG2, Transcript trA, \ - uint Lread, uiWA* WA, char* R, Genome &mapGen, \ - Parameters& P, Transcript** wTr, uint* nWinTr, ReadAlign *RA); - //recursively stitch aligns for one gene - //*nWinTr - number of transcripts for the current window +void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, + uint tG2, const Transcript &trA, uint Lread, uiWA *WA, + char *R, const Genome &mapGen, const Parameters &P, + Transcript **wTr, uint *nWinTr, ReadAlign *RA); +// recursively stitch aligns for one gene +//*nWinTr - number of transcripts for the current window From 5f6ffb4cea1565204ee5ea0ee69aa86d03bc7662 Mon Sep 17 00:00:00 2001 From: Alexey Date: Tue, 29 Oct 2019 12:51:38 +0100 Subject: [PATCH 05/10] avoid conversion from int to char for known output --- source/ReadAlign_outputTranscriptSAM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/ReadAlign_outputTranscriptSAM.cpp b/source/ReadAlign_outputTranscriptSAM.cpp index c893084c..432fecab 100644 --- a/source/ReadAlign_outputTranscriptSAM.cpp +++ b/source/ReadAlign_outputTranscriptSAM.cpp @@ -232,7 +232,7 @@ uint ReadAlign::outputTranscriptSAM(Transcript const &trOut, uint nTrOut, uint i } else if (mateChr Date: Thu, 31 Oct 2019 12:57:47 +0100 Subject: [PATCH 06/10] style --- source/ReadAlign_assignAlignToWindow.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/source/ReadAlign_assignAlignToWindow.cpp b/source/ReadAlign_assignAlignToWindow.cpp index c1f6fe6b..1fc4cd96 100644 --- a/source/ReadAlign_assignAlignToWindow.cpp +++ b/source/ReadAlign_assignAlignToWindow.cpp @@ -13,10 +13,15 @@ void ReadAlign::assignAlignToWindow(uint a1, uint aLength, uint aStr, uint aNrep {//do not check for overlap if this is an sj-align uint iA; for (iA=0; iA=WA[iW][iA][WA_rStart] && aRstart=WA[iW][iA][WA_rStart] && aRstart+aLength= align[WA_rStart] && + aRstart < align[WA_rStart]+align[WA_Length]) + || (aRstart+aLength>=align[WA_rStart] + && aRstart+aLength Date: Fri, 1 Nov 2019 23:32:34 +0100 Subject: [PATCH 07/10] style --- source/stitchWindowAligns.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/source/stitchWindowAligns.cpp b/source/stitchWindowAligns.cpp index b083d359..0b2a5b56 100644 --- a/source/stitchWindowAligns.cpp +++ b/source/stitchWindowAligns.cpp @@ -386,7 +386,6 @@ void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, ui return; }; - /////////////////////////////////////////////////////////////////////////////////// int dScore=0; Transcript trAi=trA; //trA copy with this align included, to be used in the 1st recursive call of StitchAlign if (trA.nExons>0) {//stitch, a transcript has already been originated @@ -400,16 +399,11 @@ void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, ui trAi.exons[0][EX_L]=WA[iA][WA_Length]; trAi.exons[0][EX_iFrag]=WA[iA][WA_iFrag]; trAi.exons[0][EX_sjA]=WA[iA][WA_sjA]; - trAi.nExons=1; //recorded first exon - - for (uint ii=0;ii-1000000) {//include this align @@ -419,8 +413,6 @@ void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, ui if ( WA[iA][WA_Anchor]>0 ) trAi.nAnchor++; //anchor piece piece stitchWindowAligns(iA+1, nA, Score+dScore, WAincl, WA[iA][WA_rStart]+WA[iA][WA_Length]-1, WA[iA][WA_gStart]+WA[iA][WA_Length]-1, trAi, Lread, WA, R, mapGen, P, wTr, nWinTr, RA); - } else { - }; //also run a transcript w/o including this align From 0b8be682bf9567d927f6c670c5d17bba0d842b62 Mon Sep 17 00:00:00 2001 From: Alexey Date: Fri, 1 Nov 2019 23:43:14 +0100 Subject: [PATCH 08/10] add a predicate function [isAlignToSkip] to skip alignments early * [stitchWindowAligns()] used to create a new [Transcript] instance irregardless of if it will be successfully stitched to the transcript. Early check allows to check for most common reasons to reject the alignment. * To ease reading, `WA[iA]` is aliased as `align`. * minor formatting --- source/stitchWindowAligns.cpp | 92 +++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 30 deletions(-) diff --git a/source/stitchWindowAligns.cpp b/source/stitchWindowAligns.cpp index 0b2a5b56..bef54951 100644 --- a/source/stitchWindowAligns.cpp +++ b/source/stitchWindowAligns.cpp @@ -5,6 +5,32 @@ #include #include +// check for conditions used in stitchAlignToTranscript for early exit, +// so there is no need to create additional instance of [Transcript]. +bool isAlignToSkip(uint rAend, uint gAend, uint rBstart, uint gBstart, uint L, + uint iFragB, uint sjAB, const Genome &mapGen, + const Transcript &trA) { + if (trA.nExons >= MAX_N_EXONS) + return true; + + if (sjAB != ((uint)-1) && trA.exons[trA.nExons - 1][EX_sjA] == sjAB && + trA.exons[trA.nExons - 1][EX_iFrag] == iFragB && rBstart == rAend + 1 && + gAend + 1 < gBstart) { + // simple stitching if junction belongs to a database + // check for too large repeats around non-canonical junction + return (mapGen.sjdbMotif[sjAB] == 0 && + (L <= mapGen.sjdbShiftRight[sjAB] || + trA.exons[trA.nExons - 1][EX_L] <= mapGen.sjdbShiftLeft[sjAB])); + } else { // general stitching + uint gBend = gBstart + L - 1; + uint rBend = rBstart + L - 1; + // stitch aligns on the same fragment + // check if r-overlapping fully + return ((rBend <= rAend) || + (gBend <= gAend)) && + (trA.exons[trA.nExons - 1][EX_iFrag] == iFragB); + } +} void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, const Parameters &P, const Genome &mapGen, const char *R, uint Lread, @@ -386,39 +412,45 @@ void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, ui return; }; - int dScore=0; - Transcript trAi=trA; //trA copy with this align included, to be used in the 1st recursive call of StitchAlign - if (trA.nExons>0) {//stitch, a transcript has already been originated - - dScore=stitchAlignToTranscript(tR2, tG2, WA[iA][WA_rStart], WA[iA][WA_gStart], WA[iA][WA_Length], WA[iA][WA_iFrag], WA[iA][WA_sjA], P, R, mapGen, &trAi, RA->outFilterMismatchNmaxTotal); + const auto& align = WA[iA]; + if (trA.nExons == 0 || + !isAlignToSkip(tR2, tG2, align[WA_rStart], align[WA_gStart], + align[WA_Length], align[WA_iFrag], align[WA_sjA], mapGen, trA)) { + int dScore=0; + Transcript trAi=trA; //trA copy with this align included, to be used in the 1st recursive call of StitchAlign + if (trA.nExons > 0) {//stitch, a transcript has already been originated + dScore=stitchAlignToTranscript(tR2, tG2, align[WA_rStart], align[WA_gStart], + align[WA_Length], align[WA_iFrag], align[WA_sjA], + P, R, mapGen, &trAi, RA->outFilterMismatchNmaxTotal); //TODO check if the new stitching creates too many MM, quit this transcript if so - - } else { //this is the first align in the transcript - trAi.exons[0][EX_R]=trAi.rStart=WA[iA][WA_rStart]; //transcript start/end - trAi.exons[0][EX_G]=trAi.gStart=WA[iA][WA_gStart]; - trAi.exons[0][EX_L]=WA[iA][WA_Length]; - trAi.exons[0][EX_iFrag]=WA[iA][WA_iFrag]; - trAi.exons[0][EX_sjA]=WA[iA][WA_sjA]; - trAi.nExons=1; //recorded first exon - trAi.nMatch=WA[iA][WA_Length]; //# of matches - - for (uint ii=0;ii-1000000) {//include this align + } else { //this is the first align in the transcript + trAi.exons[0][EX_R]=trAi.rStart=align[WA_rStart]; //transcript start/end + trAi.exons[0][EX_G]=trAi.gStart=align[WA_gStart]; + trAi.exons[0][EX_L]=align[WA_Length]; + trAi.exons[0][EX_iFrag]=align[WA_iFrag]; + trAi.exons[0][EX_sjA]=align[WA_sjA]; + trAi.nExons=1; //recorded first exon + trAi.nMatch=align[WA_Length]; //# of matches + + for (uint ii=0;ii-1000000) {//include this align WAincl[iA]=true; - - if ( WA[iA][WA_Nrep]==1 ) trAi.nUnique++; //unique piece - if ( WA[iA][WA_Anchor]>0 ) trAi.nAnchor++; //anchor piece piece - - stitchWindowAligns(iA+1, nA, Score+dScore, WAincl, WA[iA][WA_rStart]+WA[iA][WA_Length]-1, WA[iA][WA_gStart]+WA[iA][WA_Length]-1, trAi, Lread, WA, R, mapGen, P, wTr, nWinTr, RA); - }; - + if (align[WA_Nrep] == 1) trAi.nUnique++; //unique piece + if (align[WA_Anchor] > 0) trAi.nAnchor++; //anchor piece piece + stitchWindowAligns(iA+1, nA, Score+dScore, WAincl, + align[WA_rStart]+align[WA_Length]-1, + align[WA_gStart]+align[WA_Length]-1, + trAi, Lread, WA, R, mapGen, P, wTr, nWinTr, RA); + } + } //also run a transcript w/o including this align - if (WA[iA][WA_Anchor]!=2 || trA.nAnchor>0) {//only allow exclusion if this is not the last anchor, or other anchors have been used - WAincl[iA]=false; - stitchWindowAligns(iA+1, nA, Score, WAincl, tR2, tG2, trA, Lread, WA, R, mapGen, P, wTr, nWinTr, RA); + if (align[WA_Anchor] != 2 || trA.nAnchor > 0) { + //only allow exclusion if this is not the last anchor, or other anchors have been used + WAincl[iA] = false; + stitchWindowAligns(iA+1, nA, Score, WAincl, tR2, tG2, trA, Lread, WA, R, + mapGen, P, wTr, nWinTr, RA); }; return; }; From 5f4dff2cc8370b15fbdfb595dfe16d79aee37993 Mon Sep 17 00:00:00 2001 From: Alexey Date: Fri, 1 Nov 2019 13:35:26 +0100 Subject: [PATCH 09/10] add const - const char* R - const uiWA* WA --- source/stitchWindowAligns.cpp | 2 +- source/stitchWindowAligns.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/stitchWindowAligns.cpp b/source/stitchWindowAligns.cpp index bef54951..c0bf5e44 100644 --- a/source/stitchWindowAligns.cpp +++ b/source/stitchWindowAligns.cpp @@ -400,7 +400,7 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, } void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, uint tG2, const Transcript& trA, - uint Lread, uiWA* WA, char* R, const Genome &mapGen, + uint Lread, const uiWA* WA, const char* R, const Genome &mapGen, const Parameters& P, Transcript** wTr, uint* nWinTr, ReadAlign *RA) { //recursively stitch aligns for one gene //*nWinTr - number of transcripts for the current window diff --git a/source/stitchWindowAligns.h b/source/stitchWindowAligns.h index 07193fc4..cf32029e 100644 --- a/source/stitchWindowAligns.h +++ b/source/stitchWindowAligns.h @@ -6,8 +6,8 @@ #include "stitchAlignToTranscript.h" void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, - uint tG2, const Transcript &trA, uint Lread, uiWA *WA, - char *R, const Genome &mapGen, const Parameters &P, + uint tG2, const Transcript &trA, uint Lread, const uiWA *WA, + const char *R, const Genome &mapGen, const Parameters &P, Transcript **wTr, uint *nWinTr, ReadAlign *RA); // recursively stitch aligns for one gene //*nWinTr - number of transcripts for the current window From 2395ee1cda55b15972d75d477197fe99fbe548c9 Mon Sep 17 00:00:00 2001 From: Alexey Date: Fri, 1 Nov 2019 23:46:43 +0100 Subject: [PATCH 10/10] adjust style --- source/stitchWindowAligns.cpp | 90 ++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/source/stitchWindowAligns.cpp b/source/stitchWindowAligns.cpp index c0bf5e44..418e329b 100644 --- a/source/stitchWindowAligns.cpp +++ b/source/stitchWindowAligns.cpp @@ -33,17 +33,19 @@ bool isAlignToSkip(uint rAend, uint gAend, uint rBstart, uint gBstart, uint L, } void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, - const Parameters &P, const Genome &mapGen, const char *R, uint Lread, - Transcript trA, uint tG2, uint tR2, int Score) { + const Parameters &P, const Genome &mapGen, + const char *R, uint Lread, Transcript trA, uint tG2, + uint tR2, int Score) { // extend first Transcript trAstep1; - + int vOrder[2]; // decide in which order to extend: extend the 5' of the read // first #if EXTEND_ORDER == 1 - if (trA.roStr == 0) { // decide in which order to extend: extend the 5' of the read first + if (trA.roStr == 0) { + // decide in which order to extend: extend the 5' of the read first vOrder[0] = 0; vOrder[1] = 1; } else { @@ -62,9 +64,8 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, switch (vOrder[iOrd]) { case 0: // extend at start - - if (trA.rStart > - 0) { // if transcript does not start at base, extend to the read start + if (trA.rStart > 0) { + // if transcript does not start at base, extend to the read start trAstep1.reset(); uint imate = trA.exons[0][EX_iFrag]; if (extendAlign(R, mapGen.G, trA.rStart - 1, trA.gStart - 1, -1, -1, @@ -102,9 +103,8 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, tR2 += trAstep1.extendL; tG2 += trAstep1.extendL; - - trA.exons[trA.nExons - 1][EX_L] += - trAstep1.extendL; // extend the length of the last exon + // extend the length of the last exon + trA.exons[trA.nExons - 1][EX_L] += trAstep1.extendL; }; // TODO penalize unmapped bases at the end }; @@ -114,7 +114,7 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, if (!P.alignSoftClipAtReferenceEnds.yes && ((trA.exons[trA.nExons - 1][EX_G] + Lread - trA.exons[trA.nExons - 1][EX_R]) > - (mapGen.chrStart[trA.Chr] + mapGen.chrLength[trA.Chr]) || + (mapGen.chrStart[trA.Chr] + mapGen.chrLength[trA.Chr]) || trA.exons[0][EX_G] < (mapGen.chrStart[trA.Chr] + trA.exons[0][EX_R]))) { return; // no soft clipping past the ends of the chromosome }; @@ -127,9 +127,9 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, // check exons lengths including repeats, do not report a transcript with // short exons - for (uint isj = 0; isj < trA.nExons - 1; - isj++) { // check exons for min length, if they are not annotated and - // precede a junction + for (uint isj = 0; isj < trA.nExons - 1; isj++) { + // check exons for min length, if they are not annotated + // and precede a junction if (trA.canonSJ[isj] >= 0) { // junction if (trA.sjAnnot[isj] == 1) { // sjdb if ((trA.exons[isj][EX_L] < P.alignSJDBoverhangMin && @@ -142,7 +142,7 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, } else { // non-sjdb if (trA.exons[isj][EX_L] < P.alignSJoverhangMin + trA.shiftSJ[isj][0] || trA.exons[isj + 1][EX_L] < - P.alignSJoverhangMin + trA.shiftSJ[isj][1]) + P.alignSJoverhangMin + trA.shiftSJ[isj][1]) return; }; }; @@ -176,8 +176,8 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, P.outFilterIntronStrands == "RemoveInconsistentStrands") return; - if (sjN > 0 && trA.sjMotifStrand == 0 && - P.outSAMstrandField.type == 1) { // strand not defined for a junction + if (sjN > 0 && trA.sjMotifStrand == 0 && P.outSAMstrandField.type == 1) { + // strand not defined for a junction return; }; @@ -196,10 +196,10 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, } else { ostringstream errOut; errOut << "EXITING because of FATAL INPUT error: unrecognized value of " - "--outFilterIntronMotifs=" + "--outFilterIntronMotifs=" << P.outFilterIntronMotifs << "\n"; errOut << "SOLUTION: re-run STAR with --outFilterIntronMotifs = None -OR- " - "RemoveNoncanonical -OR- RemoveNoncanonicalUnannotated\n"; + "RemoveNoncanonical -OR- RemoveNoncanonicalUnannotated\n"; exitWithError(errOut.str(), std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P); }; @@ -224,7 +224,8 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, }; }; - if (P.outFilterBySJoutStage == 2) { // junctions have to be present in the filtered set P.sjnovel + if (P.outFilterBySJoutStage == 2) { + // junctions have to be present in the filtered set P.sjnovel for (uint iex = 0; iex < trA.nExons - 1; iex++) { if (trA.canonSJ[iex] >= 0 && trA.sjAnnot[iex] == 0) { uint jS = trA.exons[iex][EX_G] + trA.exons[iex][EX_L]; @@ -235,9 +236,8 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, }; }; - if (trA.exons[0][EX_iFrag] != - trA.exons[trA.nExons - 1] - [EX_iFrag]) { // check for correct overlap between mates + if (trA.exons[0][EX_iFrag] != trA.exons[trA.nExons - 1][EX_iFrag]) { + // check for correct overlap between mates if (trA.exons[trA.nExons - 1][EX_G] + trA.exons[trA.nExons - 1][EX_L] <= trA.exons[0][EX_G]) return; // to avoid negative insert size @@ -251,15 +251,15 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, }; if (trA.exons[iexM2 - 1][EX_G] + trA.exons[iexM2 - 1][EX_L] > - trA.exons[iexM2] - [EX_G]) { // mates overlap - check consistency of junctions + trA.exons[iexM2][EX_G]) { + // mates overlap - check consistency of junctions if (trA.exons[0][EX_G] > trA.exons[iexM2][EX_G] + trA.exons[0][EX_R] + - P.alignEndsProtrude.nBasesMax) + P.alignEndsProtrude.nBasesMax) return; // LeftMateStart > RightMateStart + allowance if (trA.exons[iexM2 - 1][EX_G] + trA.exons[iexM2 - 1][EX_L] > trA.exons[trA.nExons - 1][EX_G] + Lread - - trA.exons[trA.nExons - 1][EX_R] + P.alignEndsProtrude.nBasesMax) + trA.exons[trA.nExons - 1][EX_R] + P.alignEndsProtrude.nBasesMax) return; // LeftMateEnd > RightMateEnd +allowance // check for junctions consistency @@ -270,8 +270,8 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, trA.exons[iex2 - 1][EX_G] + trA.exons[iex2 - 1][EX_L]) break; }; - while (iex1 < iexM2 && - iex2 < trA.nExons) { // cycle through all overlapping exons + while (iex1 < iexM2 && iex2 < trA.nExons) { + // cycle through all overlapping exons if (trA.canonSJ[iex1 - 1] < 0) { // skip non-junctions iex1++; continue; @@ -304,7 +304,8 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, // calculate some final values for the transcript - trA.roStart = (trA.roStr == 0) ? trA.rStart : Lread - trA.rStart - trA.rLength; + trA.roStart = + (trA.roStr == 0) ? trA.rStart : Lread - trA.rStart - trA.rLength; trA.maxScore = Score; if (trA.exons[0][EX_iFrag] == @@ -344,31 +345,33 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, uint iTr = 0; // transcript insertion/replacement place + // caclulate total mapped length trA.mappedLength = 0; - for (uint iex = 0; iex < trA.nExons; iex++) { // caclulate total mapped length + for (uint iex = 0; iex < trA.nExons; iex++) { trA.mappedLength += trA.exons[iex][EX_L]; }; - while (iTr < *nWinTr) { // scan through all recorded transcripts for this - // window - check for duplicates + // scan through all recorded transcripts for this window - + // check for duplicates + while (iTr < *nWinTr) { // another way to calculate uOld, uNew: w/o gMap uint nOverlap = blocksOverlap(trA, *wTr[iTr]); uint uNew = trA.mappedLength - nOverlap; uint uOld = wTr[iTr]->mappedLength - nOverlap; - if (uNew == 0 && Score < wTr[iTr]->maxScore) { // new transript is a subset of the old ones + if (uNew == 0 && Score < wTr[iTr]->maxScore) { + // new transript is a subset of the old ones break; - } else if (uOld == 0) { // old transcript is a subset of the new one, - // remove old transcript + } else if (uOld == 0) { + // old transcript is a subset of the new one, remove old transcript Transcript *pTr = wTr[iTr]; for (uint ii = iTr + 1; ii < *nWinTr; ii++) wTr[ii - 1] = wTr[ii]; // shift transcripts (*nWinTr)--; wTr[*nWinTr] = pTr; - } else if (uOld > 0 && - (uNew > 0 || - Score >= wTr[iTr]->maxScore)) { // check next transcript + } else if (uOld > 0 && (uNew > 0 || Score >= wTr[iTr]->maxScore)) { + // check next transcript iTr++; }; }; @@ -381,11 +384,12 @@ void finalizeTranscript(ReadAlign *RA, Transcript **wTr, uint *nWinTr, }; Transcript *pTr = wTr[*nWinTr]; - for (int ii = *nWinTr; ii > int(iTr); ii--) { // shift all the transcript pointers down from iTr + for (int ii = *nWinTr; ii > int(iTr); ii--) { + // shift all the transcript pointers down from iTr wTr[ii] = wTr[ii - 1]; }; - wTr[iTr] = pTr; // the new transcript pointer is now at *nWinTr+1, move it - // into the iTr + // the new transcript pointer is now at *nWinTr+1, move it into the iTr + wTr[iTr] = pTr; *(wTr[iTr]) = trA; if (*nWinTr < P.alignTranscriptsPerWindowNmax) { (*nWinTr)++; // increment number of transcripts per window; @@ -454,5 +458,3 @@ void stitchWindowAligns(uint iA, uint nA, int Score, bool WAincl[], uint tR2, ui }; return; }; - -