Skip to content

Commit 46054d1

Browse files
authored
truncate intermediate fp32 if converting bf16 to bf16
1 parent 6a52bfe commit 46054d1

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

convert-hf-to-gguf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,8 @@ def write_tensors(self):
295295

296296
if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
297297
if self.ftype == gguf.LlamaFileType.MOSTLY_BF16:
298-
data = gguf.quantize_bf16(data)
299-
assert data.dtype == np.int16
298+
data = gguf.truncate_bf16(data) if old_dtype == torch.bfloat16 else gguf.quantize_bf16(data)
299+
assert data.dtype in (np.int16, np.uint16)
300300
data_qtype = gguf.GGMLQuantizationType.BF16
301301

302302
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data):

0 commit comments

Comments
 (0)