From 64a1cd7a710999696ddc1c9ba7d66f64c5575f23 Mon Sep 17 00:00:00 2001 From: zhuchcn Date: Wed, 12 Feb 2025 12:19:54 -0800 Subject: [PATCH 1/2] fix (callVariant): when align variant bubble, do not let in-bridge node merging with their outgoing nodes outside of the variant bubble to avoid complexity exploding --- CHANGELOG.md | 2 ++ moPepGen/svgraph/ThreeFrameTVG.py | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0f6da99..be5522fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Fixed issue that `callVariant` fails on transcripts with SEC very close to the start codon. +- Fixed issue with extremely long run-time with complex alt splice events. #892 + ## [1.4.3] - 2025-01-18 ### Fixed diff --git a/moPepGen/svgraph/ThreeFrameTVG.py b/moPepGen/svgraph/ThreeFrameTVG.py index 5a3efaa7..244fc051 100644 --- a/moPepGen/svgraph/ThreeFrameTVG.py +++ b/moPepGen/svgraph/ThreeFrameTVG.py @@ -1418,17 +1418,19 @@ def is_candidate_out_node(x:TVGNode, y:TVGNode): # the input node itself. return node, set() - visited:Set[TVGNode] = {node} + visited:Dict[str, TVGNode] = {node.id: node} # When a new farthest node is found, the downstream nodes of the old # farthest node are put into this `exceptions` container, so they # can be visited again. exceptions:Set[TVGNode] = set() + non_members:Set[str] = set() while queue: cur:TVGNode = queue.popleft() if cur is None: continue if cur.reading_frame_index != node.reading_frame_index: + non_members.add(cur.id) continue if subgraph_checker: @@ -1437,7 +1439,7 @@ def is_candidate_out_node(x:TVGNode, y:TVGNode): subgraph_checker = False visited_len_before = len(visited) - visited.add(cur) + visited[cur.id] = cur visited_len_after = len(visited) if visited_len_before == visited_len_after: if cur is farthest and cur is not node: @@ -1497,7 +1499,8 @@ def is_candidate_out_node(x:TVGNode, y:TVGNode): queue.append(farthest) exceptions.add(cur) continue - return farthest, visited + members = {v for k,v in visited.items() if k not in non_members} + return farthest, members def first_node_is_smaller(self, first:TVGNode, second:TVGNode) -> bool: """ Check if the first node is larger """ @@ -1628,6 +1631,11 @@ def align_variants(self, node:TVGNode) -> Tuple[TVGNode, TVGNode]: new_bridge = bridge_in.copy() for edge in bridge_in.out_edges: self.add_edge(new_bridge, edge.out_node, edge.type) + # Here we want to limit the process within the variant bubble. + # In-bridge nodes should not be merged with their outgoing nodes + # that do not belong to the bubble. + if edge.out_node not in members: + end_nodes.add(edge.out_node) bridge_map[new_bridge] = bridge_in trash.add(bridge_in) @@ -1670,7 +1678,7 @@ def align_variants(self, node:TVGNode) -> Tuple[TVGNode, TVGNode]: for out_edge in copy.copy(cur.out_edges): out_node:TVGNode = out_edge.out_node - # So this is the case that some of the end_nodes are in end_nodes + # So this is the case that some of the out_nodes are in end_nodes # but not the others. if out_node in end_nodes: new_node = cur.copy() From c2c033fc704595352693607315695a1ffd15a8de Mon Sep 17 00:00:00 2001 From: zhuchcn Date: Thu, 13 Feb 2025 09:46:21 -0800 Subject: [PATCH 2/2] doc (moPepGen): fuzz test results updated --- docs/files/fuzz_test_history.tsv | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/files/fuzz_test_history.tsv b/docs/files/fuzz_test_history.tsv index ec702a11..af0e5f87 100644 --- a/docs/files/fuzz_test_history.tsv +++ b/docs/files/fuzz_test_history.tsv @@ -32,3 +32,6 @@ v1.2.1 e7a4096 2023-11-28 comprehensive 522929 0 0 0:00:00.378236 15.50100807513 v1.4.2 c2da21c 2025-01-19 snv 270 0 0 0:00:00.170374 0.37595620821363007 0:00:56.681483 105.92716856631384 v1.4.2 c2da21c 2025-01-19 indel 275 0 0 0:00:00.170908 0.3764300020522951 0:00:40.836362 93.63957347416282 v1.4.2 c2da21c 2025-01-19 comprehensive 471 0 0 0:00:00.352437 0.6980351919379612 0:00:38.006367 140.52343056870941 +v1.4.3 64a1cd7 2025-02-12 snv 3615 0 0 0:00:00.164877 0.5071353395324305 0:00:57.359060 118.20011598924994 +v1.4.3 64a1cd7 2025-02-12 indel 3623 0 0 0:00:00.228542 1.0644126008878305 0:00:39.258896 93.83931730856402 +v1.4.3 64a1cd7 2025-02-12 comprehensive 6985 0 0 0:00:00.372239 0.8090526920972508 0:00:35.994138 142.17524693622445