Skip to content

Commit cf54093

Browse files
author
Hossein Asghari
committed
better separate overhang and general cases; handle complex general and overhang cases happening together
1 parent 9c3e422 commit cf54093

File tree

1 file changed

+45
-17
lines changed

1 file changed

+45
-17
lines changed

src/PuffAligner.cpp

+45-17
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,8 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
281281
bool invalidStart = (signedRefStartPos < 0);
282282
bool invalidEnd = (signedRefEndPos > refTotalLength);
283283

284-
uint32_t maxSoftclipLenGeneral = mopts.maxSoftclipFractionGeneral * readLen;
285-
uint32_t maxSoftclipLenOverhang = mopts.maxSoftclipFractionOverhang * readLen;
284+
int32_t maxSoftclipLenGeneral = mopts.maxSoftclipFractionGeneral * readLen;
285+
int32_t maxSoftclipLenOverhang = mopts.maxSoftclipFractionOverhang * readLen;
286286

287287
if (mopts.end2end) {
288288
maxSoftclipLenGeneral = 0;
@@ -484,11 +484,14 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
484484
logger_->debug("\t\t\tread: [{}]", readWindow);
485485
logger_->debug("\t\t\tref: [{}]", refSeqBuffer_);
486486

487+
bool isOverhang = (readStartPosOnRef < 0);
488+
auto remainedSoftClipLen = isOverhang ? remainedSoftClipLenOverhang : remainedSoftClipLenGeneral;
489+
487490
bandwidth = maxGaps;
488491
logger_->debug("\t\t\tksw2_parameters: bandwidth={}, end_bonus={}, zdrop={}", bandwidth, aligner_config.end_bonus, aligner_config.dropoff);
489492
auto cutoff = minAcceptedScore - mopts.matchScore * read.length();
490493
aligner(readWindow.data(), readWindow.length(), refSeqBuffer_.data(),
491-
refSeqBuffer_.length(), &ez, cutoff, remainedSoftClipLenGeneral,
494+
refSeqBuffer_.length(), &ez, cutoff, remainedSoftClipLen,
492495
ksw2pp::EnumToType<ksw2pp::KSW2AlignmentType::EXTENSION>());
493496
logger_->debug("\t\t\tksw2_results:");
494497
logger_->debug("\t\t\t\tmax={}, max_q={}, max_t={}", ez.max, ez.max_q, ez.max_t);
@@ -521,8 +524,10 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
521524
// https://github.com/COMBINE-lab/salmon/issues/475).
522525

523526
decltype(alignmentScore) part_score = ez.max;
527+
524528
numSoftClipped = readWindow.length() - (ez.max_q + 1);
525-
if(remainedSoftClipLenGeneral < numSoftClipped || ez.mqe + aligner_config.end_bonus > ez.max)
529+
530+
if(remainedSoftClipLen < numSoftClipped || ez.mqe + aligner_config.end_bonus > ez.max)
526531
{
527532
part_score = ez.mqe;
528533
openGapLen = ez.mqe_t + 1;
@@ -534,9 +539,6 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
534539
openGapLen = ez.max_t + 1;
535540
cigarGen.add_item(numSoftClipped, 'S');
536541
}
537-
remainedSoftClipLenGeneral -= numSoftClipped;
538-
remainedSoftClipLenOverhang -= numSoftClipped;
539-
logger_->debug("\t\t\t\tremainedSoftClipLen={}->{};", remainedSoftClipLenGeneral + numSoftClipped, remainedSoftClipLenGeneral);
540542

541543
alignmentScore += part_score;
542544
addCigar(cigarGen, ez, true);
@@ -567,7 +569,16 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
567569
cigarGen.add_item(firstMemStart_read, cigar_char);
568570
}
569571
}
572+
573+
// both of the thresholds are updated in case the 5'-end was overhang and later the 3'-end is going to be
574+
// generally soft-clipped. this would be a complex scenario and we check the general threshold for both sides
575+
// in this case to avoid having soft-clipped lengths that are no allowed
576+
remainedSoftClipLenGeneral -= numSoftClipped;
577+
remainedSoftClipLenOverhang -= numSoftClipped;
578+
logger_->debug("\t\t\t#soft-clipped={}", numSoftClipped);
579+
570580
arOut.softclip_start = numSoftClipped;
581+
571582
logger_->debug("\t\t\tscore_sofar: {}", alignmentScore);
572583
logger_->debug("\t\t\tcigar_sofar: {}", cigarGen.get_cigar());
573584
}
@@ -729,11 +740,21 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
729740
logger_->debug("\t\t\tCASE 1: some reference bases left to align");
730741
logger_->debug("\t\t\tread: [{}]", readWindow);
731742
logger_->debug("\t\t\tref: [{}]", refSeqBuffer_);
743+
744+
bool isOverhang = (gapRead > refLen);
745+
746+
// if the 5'-end was soft-clip generally (not in overhanging), we will use the general threshold regardless
747+
// that's because in complex General-Overhang or Overhang-General scenarios, we do not want to allow soft-cliping
748+
// above the specified limit so all the soft-clips in these scenarios are considered general
749+
// the code will automatically consider aligning to the end of 3'-end here as well if the limit is reached
750+
auto remainedSoftClipLen = (isOverhang && (remainedSoftClipLenGeneral == maxSoftclipLenGeneral))
751+
? remainedSoftClipLenOverhang
752+
: remainedSoftClipLenGeneral;
732753

733754
logger_->debug("\t\t\tksw2_parameters: bandwidth={}, end_bonus={}, zdrop={}", bandwidth, aligner_config.end_bonus, aligner_config.dropoff);
734755
auto cutoff = minAcceptedScore - alignmentScore - mopts.matchScore * readWindow.length();
735756
aligner(readWindow.data(), readWindow.length(), refSeqBuffer_.data(),
736-
refLen, &ez, cutoff, remainedSoftClipLenGeneral,
757+
refLen, &ez, cutoff, remainedSoftClipLen,
737758
ksw2pp::EnumToType<ksw2pp::KSW2AlignmentType::EXTENSION>());
738759
logger_->debug("\t\t\tksw2_results:");
739760
logger_->debug("\t\t\t\tmax={}, max_q={}, max_t={}", ez.max, ez.max_q, ez.max_t);
@@ -750,11 +771,11 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
750771
if (ez.mqe != KSW_NEG_INF) ez.stopped = 0;
751772

752773
decltype(alignmentScore) part_score = ez.max;
753-
754-
numSoftClipped = readWindow.length() - (ez.max_q + 1);
755774
addCigar(cigarGen, ez, false);
756775

757-
if(remainedSoftClipLenGeneral < numSoftClipped || ez.mqe + aligner_config.end_bonus > ez.max)
776+
numSoftClipped = readWindow.length() - (ez.max_q + 1);
777+
778+
if(remainedSoftClipLen < numSoftClipped || ez.mqe + aligner_config.end_bonus > ez.max)
758779
{
759780
part_score = ez.mqe;
760781
numSoftClipped = 0;
@@ -764,10 +785,7 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
764785
part_score = ez.max;
765786
cigarGen.add_item(numSoftClipped, 'S');
766787
}
767-
remainedSoftClipLenGeneral -= numSoftClipped;
768-
remainedSoftClipLenOverhang -= numSoftClipped;
769-
logger_->debug("\t\t\t\tremainedSoftClipLen={}->{};", remainedSoftClipLenGeneral + numSoftClipped, remainedSoftClipLenGeneral);
770-
788+
771789
alignmentScore += part_score;
772790

773791
// NOTE: pre soft-clip code for adjusting the alignment score.
@@ -783,14 +801,20 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
783801
mopts.allowSoftclip
784802
? readWindow.length()
785803
: 0;
804+
805+
// again, general threshold will be used if 5'-end was softclip in general case and not overhang
806+
auto remainedSoftClipLen = (remainedSoftClipLenGeneral == maxSoftclipLenGeneral)
807+
? remainedSoftClipLenOverhang
808+
: remainedSoftClipLenGeneral;
809+
786810
alignmentScore +=
787-
mopts.allowSoftclip && remainedSoftClipLenOverhang >= numSoftClipped
811+
mopts.allowSoftclip && remainedSoftClipLen >= numSoftClipped
788812
? 0
789813
: (-1 * mopts.gapOpenPenalty +
790814
-1 * mopts.gapExtendPenalty * readWindow.length());
791815

792816
char cigar_char;
793-
if (mopts.allowSoftclip && remainedSoftClipLenOverhang >= numSoftClipped) {
817+
if (mopts.allowSoftclip && remainedSoftClipLen >= numSoftClipped) {
794818
cigar_char = 'S';
795819
} else {
796820
cigar_char = 'I';
@@ -801,6 +825,10 @@ bool PuffAligner::alignRead(std::string& read, std::string& read_rc, const std::
801825
cigarGen.add_item(readWindow.length(), cigar_char);
802826
}
803827
}
828+
remainedSoftClipLenGeneral -= numSoftClipped;
829+
remainedSoftClipLenOverhang -= numSoftClipped;
830+
logger_->debug("\t\t\t#soft-clipped={}", numSoftClipped);
831+
804832
arOut.softclip_end = numSoftClipped;
805833
logger_->debug("\t\t\tscore_sofar: {}", alignmentScore);
806834
logger_->debug("\t\t\tcigar_sofar: {}", cigarGen.get_cigar());

0 commit comments

Comments
 (0)