itertools.pairwise substitute for Python<3.10

turboderp · turboderp · commit 069719635788 · 2024-06-24T02:29:15.000+02:00
diff --git a/exllamav2/compat.py b/exllamav2/compat.py
@@ -1,5 +1,16 @@
 from __future__ import annotations
 import torch
+import itertools
+
+# Emulate pairwise on Python <3.10
+
+try:
+    pairwise = itertools.pairwise
+except AttributeError:
+    def pairwise(iterable):
+        a, b = itertools.tee(iterable)
+        next(b, None)
+        return zip(a, b)
 
 # On some setups Torch will attempt to use GPU peer-to-peer copies even when they are not supported. This is either
 # a driver issue, a bug in Torch, or both. Either way, the result is that .to() will create an empty tensor on the
diff --git a/exllamav2/generator/dynamic.py b/exllamav2/generator/dynamic.py
@@ -8,6 +8,7 @@
 from exllamav2.ext import exllamav2_ext as ext_c, none_tensor
 from concurrent.futures import ThreadPoolExecutor
 
+from exllamav2.compat import pairwise
 import torch
 import random
 import numpy as np
@@ -1331,7 +1332,7 @@ def __eq__(self, other):
             rotation = [r * self.page_size for r in rotation]
             for cache, buffer in zip(cache_tensors, defrag_buffers):
                 buffer[:, :, :, :].copy_(cache[:, rotation[0] : rotation[0] + self.page_size, :, :])
-                for a, b in itertools.pairwise(rotation):
+                for a, b in pairwise(rotation):
                     cache[:, a : a + self.page_size, :, :].copy_(cache[:, b : b + self.page_size, :, :])
                 cache[:, rotation[-1] : rotation[-1] + self.page_size, :, :].copy_(buffer[:, :, :, :])
 
@@ -2392,7 +2393,7 @@ def allocate_pages(self):
             # Metrics
 
             self.total_pages += len(seq.allocated_pages)
-            for page_a, page_b in itertools.pairwise(seq.allocated_pages):
+            for page_a, page_b in pairwise(seq.allocated_pages):
                 if page_b.page_index != page_a.page_index + 1:
                     self.non_sequential_pages += 1