[Feat] proof building (#7)

Jackmin801 · web-flow · commit e71402aa28a0 · 2025-02-14T14:05:03.000-08:00
* port proof building from genesys pr

* add tests

* use union instead of pipe
diff --git a/tests/test_poly.py b/tests/test_poly.py
@@ -0,0 +1,196 @@
+import pytest
+import torch
+import base64
+from toploc.poly import (
+    find_injective_modulus,
+    build_proofs,
+    build_proofs_base64,
+    ProofPoly,
+)
+
+
+def test_find_injective_modulus():
+    """Test finding injective modulus"""
+    x = torch.randint(0, 4_000_000_000, (100,)).tolist()
+    modulus = find_injective_modulus(x)
+    assert isinstance(modulus, int)
+    # Check that all values are unique under modulus
+    modded = [i % modulus for i in x]
+    assert len(set(modded)) == len(x)
+
+
+@pytest.fixture
+def sample_poly():
+    return ProofPoly([1, 2, 3, 4], 65497)
+
+
+def test_proof_poly_init(sample_poly):
+    """Test initialization of ProofPoly"""
+    assert sample_poly.coeffs == [1, 2, 3, 4]
+    assert sample_poly.modulus == 65497
+
+
+def test_proof_poly_call(sample_poly):
+    """Test polynomial evaluation"""
+    x = 42
+    result = sample_poly(x)
+    assert isinstance(result, int)
+    assert result == (1 + 2 * x + 3 * x**2 + 4 * x**3) % 65497
+
+
+def test_proof_poly_len(sample_poly):
+    """Test length of polynomial"""
+    assert len(sample_poly) == 4
+
+
+def test_proof_poly_null():
+    """Test null polynomial creation"""
+    length = 5
+    null_poly = ProofPoly.null(length)
+    assert len(null_poly) == length
+    assert null_poly.modulus == 0
+    assert null_poly.coeffs == [0] * length
+
+
+def test_proof_poly_from_points_list():
+    """Test creation from list points"""
+    x = [1, 2, 3]
+    y = [4, 5, 6]
+    poly = ProofPoly.from_points(x, y)
+    assert isinstance(poly, ProofPoly)
+    assert len(poly.coeffs) > 0
+
+
+def test_proof_poly_from_points_tensor():
+    """Test creation from tensor points"""
+    x = torch.tensor([1, 2, 3])
+    y = torch.tensor([4, 5, 6])
+    poly = ProofPoly.from_points(x, y)
+    assert isinstance(poly, ProofPoly)
+    assert len(poly.coeffs) == 3
+    assert poly(1) == 4
+    assert poly(2) == 5
+    assert poly(3) == 6
+
+
+def test_proof_poly_from_points_bfloat16():
+    """Test creation from bfloat16 tensor"""
+    x = torch.tensor([1, 2, 3])
+    y = torch.tensor([4, 5, 6], dtype=torch.bfloat16)
+    poly = ProofPoly.from_points(x, y)
+    assert isinstance(poly, ProofPoly)
+    assert len(poly.coeffs) == 3
+
+
+def test_proof_poly_to_base64(sample_poly):
+    """Test base64 encoding"""
+    encoded = sample_poly.to_base64()
+    assert isinstance(encoded, str)
+    # Verify it's valid base64
+    base64.b64decode(encoded)
+
+
+def test_proof_poly_to_bytes(sample_poly):
+    """Test bytes conversion"""
+    byte_data = sample_poly.to_bytes()
+    assert isinstance(byte_data, bytes)
+    assert len(byte_data) > 0
+
+
+def test_proof_poly_from_bytes(sample_poly):
+    """Test creation from bytes"""
+    byte_data = sample_poly.to_bytes()
+    reconstructed = ProofPoly.from_bytes(byte_data)
+    assert reconstructed.coeffs == sample_poly.coeffs
+    assert reconstructed.modulus == sample_poly.modulus
+
+
+def test_proof_poly_from_base64(sample_poly):
+    """Test creation from base64"""
+    encoded = sample_poly.to_base64()
+    reconstructed = ProofPoly.from_base64(encoded)
+    assert reconstructed.coeffs == sample_poly.coeffs
+    assert reconstructed.modulus == sample_poly.modulus
+
+
+def test_proof_poly_repr(sample_poly):
+    """Test string representation"""
+    repr_str = repr(sample_poly)
+    assert isinstance(repr_str, str)
+    assert str(65497) in repr_str
+    assert str([1, 2, 3, 4]) in repr_str
+
+
+@pytest.fixture
+def sample_activations():
+    DIM = 16
+    a = [torch.randn(3, DIM, dtype=torch.bfloat16)]
+    for _ in range(3 * 2 + 1):
+        a.append(torch.randn(DIM, dtype=torch.bfloat16))
+    return a
+
+
+def test_build_proofs(sample_activations):
+    """Test building proofs"""
+    proofs = build_proofs(sample_activations, decode_batching_size=2, topk=5)
+    assert isinstance(proofs, list)
+    assert all(isinstance(p, bytes) for p in proofs)
+    assert len(proofs) == 5
+
+
+def test_build_proofs_base64(sample_activations):
+    """Test building base64 proofs"""
+    proofs = build_proofs_base64(sample_activations, decode_batching_size=2, topk=5)
+    assert isinstance(proofs, list)
+    assert all(isinstance(p, str) for p in proofs)
+    # Verify each proof is valid base64
+    for proof in proofs:
+        base64.b64decode(proof)
+    assert len(proofs) == 5
+
+
+def test_build_proofs_skip_prefill(sample_activations):
+    """Test building proofs with skip_prefill"""
+    proofs = build_proofs(
+        sample_activations, decode_batching_size=2, topk=5, skip_prefill=True
+    )
+    assert isinstance(proofs, list)
+    assert all(isinstance(p, bytes) for p in proofs)
+    assert len(proofs) == 4
+
+
+def test_build_proofs_error_handling():
+    """Test error handling in proof building"""
+    invalid_activations = [
+        torch.randn(0, 16, dtype=torch.bfloat16),
+        torch.randn(16, dtype=torch.bfloat16),
+    ]
+    proofs = build_proofs(invalid_activations, decode_batching_size=2, topk=5)
+    assert isinstance(proofs, list)
+    assert all(isinstance(p, bytes) for p in proofs)
+
+    nullproof = ProofPoly.null(5).to_bytes()
+    assert all(p == nullproof for p in proofs)
+
+
+def test_build_proofs_edge_cases(sample_activations):
+    """Test edge cases for proof building"""
+    # Test with minimal topk
+    proofs_min = build_proofs(sample_activations, decode_batching_size=2, topk=1)
+    assert len(proofs_min) > 0
+
+    # Test with large batching size
+    proofs_large_batch = build_proofs(
+        sample_activations, decode_batching_size=10, topk=5
+    )
+    assert len(proofs_large_batch) > 0
+
+    # Test with only one prefill activation
+    proofs_one = build_proofs(sample_activations[:1], decode_batching_size=2, topk=5)
+    assert len(proofs_one) == 1
+
+    # Test with only one activation and skip_prefill
+    proofs_one_skip = build_proofs(
+        sample_activations[:1], decode_batching_size=2, topk=5, skip_prefill=True
+    )
+    assert len(proofs_one_skip) == 0
diff --git a/toploc/__init__.py b/toploc/__init__.py
@@ -1 +1,4 @@
+from toploc.poly import ProofPoly, build_proofs, build_proofs_base64  # noqa: F401
+from toploc.utils import sha256sum  # noqa: F401
+
 __version__ = "0.0.0.dev1"
diff --git a/toploc/ndd.py b/toploc/ndd.py
@@ -1,3 +1,4 @@
+# TODO: Deprecate this file and move to C
 MOD_N = 65497
 
 
diff --git a/toploc/poly.py b/toploc/poly.py
@@ -0,0 +1,139 @@
+from typing import Union
+import base64
+from toploc.C.csrc.ndd import compute_newton_coefficients, evaluate_polynomial
+import torch
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def find_injective_modulus(x: list[int]) -> int:
+    for i in range(65497, 2**15, -1):
+        if len(set([j % i for j in x])) == len(x):
+            return i
+    raise ValueError("No injective modulus found!")
+
+
+class ProofPoly:
+    def __init__(self, coeffs: list[int], modulus: int):
+        self.coeffs = coeffs
+        self.modulus = modulus
+
+    def __call__(self, x: int):
+        return evaluate_polynomial(self.coeffs, x % self.modulus)
+
+    def __len__(self):
+        return len(self.coeffs)
+
+    @classmethod
+    def null(cls, length: int) -> "ProofPoly":
+        return cls([0] * length, 0)
+
+    @classmethod
+    def from_points(
+        cls, x: Union[list[int], torch.Tensor], y: Union[list[int], torch.Tensor]
+    ) -> "ProofPoly":
+        if isinstance(x, torch.Tensor):
+            x = x.tolist()
+        if isinstance(y, torch.Tensor):
+            if y.dtype == torch.bfloat16:
+                y = y.view(dtype=torch.uint16).tolist()
+            elif y.dtype == torch.float32:
+                raise NotImplementedError(
+                    "float32 not supported yet because interpolate has hardcode prime"
+                )
+            else:
+                y = y.tolist()
+        modulus = find_injective_modulus(x)
+        x = [i % modulus for i in x]
+        return cls(compute_newton_coefficients(x, y), modulus)
+
+    def to_base64(self):
+        base64_encoded = base64.b64encode(self.to_bytes()).decode("utf-8")
+        return base64_encoded
+
+    def to_bytes(self):
+        return self.modulus.to_bytes(2, byteorder="big", signed=False) + b"".join(
+            coeff.to_bytes(2, byteorder="big", signed=False) for coeff in self.coeffs
+        )
+
+    @classmethod
+    def from_bytes(cls, byte_data: bytes) -> "ProofPoly":
+        modulus = int.from_bytes(byte_data[:2], byteorder="big", signed=False)
+        coeffs = [
+            int.from_bytes(byte_data[i : i + 2], byteorder="big", signed=False)
+            for i in range(2, len(byte_data), 2)
+        ]
+        return cls(coeffs, modulus)
+
+    @classmethod
+    def from_base64(cls, base64_encoded: str) -> "ProofPoly":
+        byte_data = base64.b64decode(base64_encoded)
+        return cls.from_bytes(byte_data)
+
+    def __repr__(self) -> str:
+        return f"ProofPoly[{self.modulus}]({self.coeffs})"
+
+
+def build_proofs(
+    activations: list[torch.Tensor],
+    decode_batching_size: int,
+    topk: int,
+    skip_prefill: bool = False,
+) -> list[bytes]:
+    return [
+        proof.to_bytes()
+        for proof in _build_proofs(
+            activations, decode_batching_size, topk, skip_prefill
+        )
+    ]
+
+
+def build_proofs_base64(
+    activations: list[torch.Tensor],
+    decode_batching_size: int,
+    topk: int,
+    skip_prefill: bool = False,
+) -> list[str]:
+    return [
+        proof.to_base64()
+        for proof in _build_proofs(
+            activations, decode_batching_size, topk, skip_prefill
+        )
+    ]
+
+
+def _build_proofs(
+    activations: list[torch.Tensor],
+    decode_batching_size: int,
+    topk: int,
+    skip_prefill: bool = False,
+) -> list[ProofPoly]:
+    proofs = []
+
+    # In order to not crash, we return null proofs if there is an error
+    try:
+        # Prefill
+        if not skip_prefill:
+            flat_view = activations[0].view(-1)
+            topk_indices = flat_view.abs().topk(topk).indices
+            topk_values = flat_view[topk_indices]
+            proof = ProofPoly.from_points(topk_indices, topk_values)
+            proofs.append(proof)
+
+        # Batched Decode
+        for i in range(1, len(activations), decode_batching_size):
+            flat_view = torch.cat(
+                [i.view(-1) for i in activations[i : i + decode_batching_size]]
+            )
+            topk_indices = flat_view.abs().topk(topk).indices
+            topk_values = flat_view[topk_indices]
+            proof = ProofPoly.from_points(topk_indices, topk_values)
+            proofs.append(proof)
+    except Exception as e:
+        logger.error(f"Error building proofs: {e}")
+        proofs = [ProofPoly.null(topk)] * (
+            1 + (len(activations) - 1 + decode_batching_size) // decode_batching_size
+        )
+
+    return proofs
diff --git a/toploc/utils.py b/toploc/utils.py
@@ -0,0 +1,18 @@
+import hashlib
+
+
+def sha256sum(filename: str, chunk_size: int = 65536) -> str:
+    """Calculate the SHA-256 checksum of a file efficiently.
+
+    Args:
+        filename (str): Path to the file.
+        chunk_size (int, optional): Size of chunks read at a time. Defaults to 64 KB.
+
+    Returns:
+        str: The SHA-256 hash of the file as a hexadecimal string.
+    """
+    sha256 = hashlib.sha256()
+    with open(filename, "rb", buffering=0) as f:
+        for chunk in iter(lambda: f.read(chunk_size), b""):
+            sha256.update(memoryview(chunk))
+    return sha256.hexdigest()

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# TODO: Deprecate this file and move to C`
`1`	`2`	`MOD_N = 65497`
`2`	`3`
`3`	`4`