@@ -317,7 +317,7 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
317
317
auto & bandwidth = aligner.config ().bandwidth ;
318
318
auto & aligner_config = aligner.config ();
319
319
libdivide::divider<int32_t > gapExtDivisor (static_cast <int32_t >(mopts.gapExtendPenalty ));
320
- const int32_t minAcceptedScore = scoreStatus_.getCutoff (read .length ()); // mopts.minScoreFraction * mopts.matchScore * readLen;
320
+ const int32_t minAcceptedScore = scoreStatus_.getCutoff (read .length () - maxSoftclipLen ); // mopts.minScoreFraction * mopts.matchScore * readLen;
321
321
logger_->debug (" \t\t NOTE mems.size(): {}, isRev: {}, minAcceptedScore: {}" , mems.size (), !isFw, minAcceptedScore);
322
322
// compute the maximum gap length that would be allowed given the length of read aligned so far and the current
323
323
// alignment score.
@@ -476,6 +476,8 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
476
476
fillRefSeqBufferReverse (allRefSeq, refAccPos, refWindowStart,
477
477
refWindowLength, refSeqBuffer_);
478
478
479
+ int32_t numSoftClipped = 0 ;
480
+
479
481
if (refSeqBuffer_.length () > 0 ) {
480
482
logger_->debug (" \t\t\t CASE 1: some reference bases left to align" );
481
483
auto readWindow = readView.substr (0 , firstMemStart_read).to_string ();
@@ -578,49 +580,52 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
578
580
// computeCIGAR ? addCigar(cigarGen, ez, true) : firstMemStart_read - num_soft_clipped;
579
581
580
582
decltype (alignmentScore) part_score = ez.max ;
581
- int32_t oldRemainedSoftClipLen = remainedSoftClipLen;
582
- logger_->debug (" \t\t\t\t remainedSoftClipLen={}->{};" , oldRemainedSoftClipLen, remainedSoftClipLen);
583
- remainedSoftClipLen -= readWindow.length () - (ez.max_q + 1 );
584
- if (remainedSoftClipLen < 0 || ez.mqe + aligner_config.end_bonus > ez.max )
583
+ numSoftClipped = readWindow.length () - (ez.max_q + 1 );
584
+ if (remainedSoftClipLen < numSoftClipped || ez.mqe + aligner_config.end_bonus > ez.max )
585
585
{
586
586
part_score = ez.mqe ;
587
587
// openGapLen = readWindow.length();
588
588
openGapLen = ez.mqe_t + 1 ;
589
+ numSoftClipped = 0 ;
589
590
}
590
591
else
591
592
{
592
593
part_score = ez.max ;
593
594
openGapLen = ez.max_t + 1 ;
594
- cigarGen.add_item (readWindow.length () - (ez.max_q + 1 ), ' S' );
595
-
595
+ cigarGen.add_item (numSoftClipped, ' S' );
596
596
}
597
+ remainedSoftClipLen -= numSoftClipped;
598
+ logger_->debug (" \t\t\t\t remainedSoftClipLen={}->{};" , remainedSoftClipLen + numSoftClipped, remainedSoftClipLen);
599
+
597
600
alignmentScore += part_score;
598
601
addCigar (cigarGen, ez, true );
599
602
// logger_->debug("score : {}", std::max(ez.mqe, ez.mte));
600
603
} else { // EHSAN-TODO: check if this is necessary
601
604
logger_->debug (" \t\t\t CASE 2: no reference bases left (start)" );
602
605
// overhangingStart = true;
603
606
// do any special soft clipping penalty here if we want
604
- remainedSoftClipLen - =
607
+ numSoftClipped =
605
608
allowSoftclip
606
609
? firstMemStart_read
607
610
: 0 ;
608
611
alignmentScore +=
609
- allowSoftclip && remainedSoftClipLen >= 0
612
+ allowSoftclip && remainedSoftClipLen >= numSoftClipped
610
613
? 0
611
614
: (-1 * mopts.gapOpenPenalty +
612
615
-1 * mopts.gapExtendPenalty * firstMemStart_read);
613
616
openGapLen = firstMemStart_read;
614
617
615
618
if (mopts.computeCIGAR ) {
616
- if (allowSoftclip && remainedSoftClipLen >= 0 ) {
619
+ if (allowSoftclip && remainedSoftClipLen >= numSoftClipped ) {
617
620
cigarGen.add_item (firstMemStart_read, ' S' );
618
621
openGapLen = 0 ;
619
622
} else {
620
623
cigarGen.add_item (firstMemStart_read, ' I' );
624
+ numSoftClipped = 0 ;
621
625
}
622
626
}
623
627
}
628
+ arOut.softclip_start = numSoftClipped;
624
629
logger_->debug (" \t\t\t score_sofar: {}" , alignmentScore);
625
630
logger_->debug (" \t\t\t cigar_sofar: {}" , cigarGen.get_cigar ());
626
631
}
@@ -778,6 +783,8 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
778
783
auto readWindow = readView.substr (prevMemEnd_read + 1 ).to_string ();
779
784
fillRefSeqBuffer (allRefSeq, refAccPos, refTailStart, refLen, refSeqBuffer_);
780
785
786
+ int32_t numSoftClipped = 0 ;
787
+
781
788
logger_->debug (" \t\t\t gapRead : {}, refLen : {}, refBuffer_.size() : {}, refTotalLength : {}" , gapRead, refLen, refSeqBuffer_.size (), refTotalLength);
782
789
if (refLen > 0 ) {
783
790
logger_->debug (" \t\t\t CASE 1: some reference bases left to align" );
@@ -850,20 +857,24 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
850
857
// }
851
858
852
859
decltype (alignmentScore) part_score = ez.max ;
853
- int32_t oldRemainedSoftClipLen = remainedSoftClipLen;
854
- remainedSoftClipLen -= readWindow.length () - (ez.max_q + 1 );
855
- logger_->debug (" \t\t\t\t remainedSoftClipLen={}->{};" , oldRemainedSoftClipLen, remainedSoftClipLen);
856
- if (remainedSoftClipLen < 0 || ez.mqe + aligner_config.end_bonus > ez.max )
860
+
861
+ numSoftClipped = readWindow.length () - (ez.max_q + 1 );
862
+ addCigar (cigarGen, ez, false );
863
+
864
+ if (remainedSoftClipLen < numSoftClipped || ez.mqe + aligner_config.end_bonus > ez.max )
857
865
{
858
866
part_score = ez.mqe ;
867
+ numSoftClipped = 0 ;
859
868
}
860
869
else
861
870
{
862
871
part_score = ez.max ;
863
- cigarGen.add_item (readWindow. length () - (ez. max_q + 1 ) , ' S' );
872
+ cigarGen.add_item (numSoftClipped , ' S' );
864
873
}
874
+ remainedSoftClipLen -= numSoftClipped;
875
+ logger_->debug (" \t\t\t\t remainedSoftClipLen={}->{};" , remainedSoftClipLen + numSoftClipped, remainedSoftClipLen);
876
+
865
877
alignmentScore += part_score;
866
- addCigar (cigarGen, ez, false );
867
878
868
879
// NOTE: pre soft-clip code for adjusting the alignment score.
869
880
// int32_t alnCost = allowOverhangSoftclip ? std::max(ez.mqe, ez.mte)
@@ -874,23 +885,25 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
874
885
logger_->debug (" \t\t\t CASE 2: no reference bases left (end)" );
875
886
// overhangingEnd = true;
876
887
// do any special soft clipping penalty here if we want
877
- remainedSoftClipLen - =
888
+ numSoftClipped =
878
889
allowSoftclip
879
890
? readWindow.length ()
880
891
: 0 ;
881
892
alignmentScore +=
882
- allowSoftclip && remainedSoftClipLen >= 0
893
+ allowSoftclip && remainedSoftClipLen >= numSoftClipped
883
894
? 0
884
895
: (-1 * mopts.gapOpenPenalty +
885
896
-1 * mopts.gapExtendPenalty * readWindow.length ());
886
897
if (mopts.computeCIGAR ) {
887
- if (allowSoftclip && remainedSoftClipLen >= 0 ) {
898
+ if (allowSoftclip && remainedSoftClipLen >= numSoftClipped ) {
888
899
cigarGen.add_item (readWindow.length (), ' S' );
889
900
} else {
890
901
cigarGen.add_item (readWindow.length (), ' I' );
902
+ numSoftClipped = 0 ;
891
903
}
892
904
}
893
905
}
906
+ arOut.softclip_end = numSoftClipped;
894
907
logger_->debug (" \t\t\t score_sofar: {}" , alignmentScore);
895
908
logger_->debug (" \t\t\t cigar_sofar: {}" , cigarGen.get_cigar ());
896
909
}
@@ -962,14 +975,15 @@ int32_t PuffAligner::calculateAlignments(std::string& read_left, std::string& re
962
975
alignRead (read_orphan, rc_orphan, jointHit.orphanClust ()->mems , jointHit.orphanClust ()->queryChainHash ,
963
976
jointHit.orphanClust ()->perfectChain ,
964
977
jointHit.orphanClust ()->isFw , tid, orphan_aln_cache, hctr, ar_orphan, verbose);
978
+ auto orphan_total_softclip_len = ar_orphan.softclip_start + ar_orphan.softclip_end ;
965
979
jointHit.alignmentScore =
966
- ar_orphan.score > threshold (read_orphan.length ()) ? ar_orphan.score : invalidScore;
980
+ ar_orphan.score > threshold (read_orphan.length () - orphan_total_softclip_len ) ? ar_orphan.score : invalidScore;
967
981
jointHit.orphanClust ()->cigar = (mopts.computeCIGAR ) ? ar_orphan.cigar : " " ;
968
982
jointHit.orphanClust ()->openGapLen = ar_orphan.openGapLen ;
969
983
jointHit.orphanClust ()->softClipStart = ar_orphan.softclip_start ;
970
984
// jointHit.orphanClust()->coverage = jointHit.alignmentScore;
971
985
if (jointHit.alignmentScore < 0 and verbose) {
972
- std::cerr << read_orphan.length () << " " << threshold (read_orphan.length ()) << " " << ar_left.score << " \n " ;
986
+ std::cerr << read_orphan.length () << " " << threshold (read_orphan.length () - orphan_total_softclip_len ) << " " << ar_left.score << " \n " ;
973
987
}
974
988
return jointHit.alignmentScore ;
975
989
} else {
@@ -982,8 +996,10 @@ int32_t PuffAligner::calculateAlignments(std::string& read_left, std::string& re
982
996
alignRead (read_right, read_right_rc_, jointHit.rightClust ->mems , jointHit.rightClust ->queryChainHash , jointHit.rightClust ->perfectChain ,
983
997
jointHit.rightClust ->isFw , tid, alnCacheRight, hctr, ar_right, verbose);
984
998
985
- jointHit.alignmentScore = ar_left.score > threshold (read_left.length ()) ? ar_left.score : invalidScore;
986
- jointHit.mateAlignmentScore = ar_right.score > threshold (read_right.length ()) ? ar_right.score : invalidScore;
999
+ auto left_total_softclip_len = ar_left.softclip_start + ar_left.softclip_end ;
1000
+ jointHit.alignmentScore = ar_left.score > threshold (read_left.length () - left_total_softclip_len) ? ar_left.score : invalidScore;
1001
+ auto right_total_softclip_len = ar_right.softclip_start + ar_right.softclip_end ;
1002
+ jointHit.mateAlignmentScore = ar_right.score > threshold (read_right.length () - right_total_softclip_len) ? ar_right.score : invalidScore;
987
1003
/*
988
1004
jointHit.alignmentScore = (score_left == invalidScore or score_right == invalidScore)?
989
1005
invalidScore : score_left + score_right;
@@ -1031,13 +1047,14 @@ int32_t PuffAligner::calculateAlignments(std::string& read, pufferfish::util::Jo
1031
1047
ar_left.score = invalidScore;
1032
1048
const auto & oc = jointHit.orphanClust ();
1033
1049
alignRead (read , read_left_rc_, oc->mems , oc->queryChainHash , oc->perfectChain , oc->isFw , tid, alnCacheLeft, hctr, ar_left, verbose);
1050
+ auto total_softclip_len = ar_left.softclip_start + ar_left.softclip_end ;
1034
1051
jointHit.alignmentScore =
1035
- ar_left.score > threshold (read .length ()) ? ar_left.score : invalidScore;
1052
+ ar_left.score > threshold (read .length () - total_softclip_len ) ? ar_left.score : invalidScore;
1036
1053
jointHit.orphanClust ()->cigar = (mopts.computeCIGAR ) ? ar_left.cigar : " " ;
1037
1054
jointHit.orphanClust ()->openGapLen = ar_left.openGapLen ;
1038
1055
// jointHit.orphanClust()->coverage = jointHit.alignmentScore;
1039
1056
if (jointHit.alignmentScore < 0 and verbose) {
1040
- std::cerr << read .length () << " " << threshold (read .length ()) << " " << ar_left.score << " \n " ;
1057
+ std::cerr << read .length () << " " << threshold (read .length () - total_softclip_len ) << " " << ar_left.score << " \n " ;
1041
1058
}
1042
1059
return jointHit.alignmentScore ;
1043
1060
}
0 commit comments