Skip to content

Commit e8e2b7e

Browse files
authored
missing cast and additional numpy 2.x fix
1 parent 225ec48 commit e8e2b7e

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

gguf-py/gguf/quants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
2727
def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
2828
n = n.astype(np.float32, copy=False).view(np.uint32)
2929
# force nan to quiet
30-
n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | (64 << 16), n)
30+
n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
3131
# flush subnormals to zero
3232
n = np.where((n & 0x7f800000) == 0, n & np.uint32(0x80000000), n)
3333
# round to nearest even
34-
n = (n + (0x7fff + ((n >> 16) & 1))) >> 16
34+
n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16
3535
return n.astype(np.uint16)
3636

3737

0 commit comments

Comments
 (0)