From 72264fa11746b91be2f36b9d4ac49bfb39615e16 Mon Sep 17 00:00:00 2001 From: Eric Buehler Date: Mon, 24 Feb 2025 11:50:39 -0500 Subject: [PATCH] Remove SUPPORTS_I8MM --- candle-core/src/quantized/iq_quants/mod.rs | 2 -- candle-core/src/quantized/k_quants/mod.rs | 12 ------------ candle-core/src/quantized/quants.rs | 3 --- 3 files changed, 17 deletions(-) diff --git a/candle-core/src/quantized/iq_quants/mod.rs b/candle-core/src/quantized/iq_quants/mod.rs index 4821b2f36..fe18254d9 100644 --- a/candle-core/src/quantized/iq_quants/mod.rs +++ b/candle-core/src/quantized/iq_quants/mod.rs @@ -32,7 +32,6 @@ impl GgmlType for BlockIQ4xs { const DTYPE: GgmlDType = GgmlDType::Iq4Xs; const BLCK_SIZE: usize = QK_K; type VecDotType = BlockQ8K; - const SUPPORTS_I8MM: bool = false; fn to_float(xs: &[Self], mut ys: &mut [f32]) -> Result<()> { let k = ys.len(); @@ -179,7 +178,6 @@ impl GgmlType for BlockIQ4nl { const DTYPE: GgmlDType = GgmlDType::Iq4Nl; const BLCK_SIZE: usize = QK4_NL; type VecDotType = BlockQ8_0; - const SUPPORTS_I8MM: bool = false; fn to_float(xs: &[Self], mut ys: &mut [f32]) -> Result<()> { let k = ys.len(); diff --git a/candle-core/src/quantized/k_quants/mod.rs b/candle-core/src/quantized/k_quants/mod.rs index 5a8c886e7..c009dab56 100644 --- a/candle-core/src/quantized/k_quants/mod.rs +++ b/candle-core/src/quantized/k_quants/mod.rs @@ -138,7 +138,6 @@ impl GgmlType for BlockQ4_0 { const DTYPE: GgmlDType = GgmlDType::Q4_0; const BLCK_SIZE: usize = QK4_0; type VecDotType = BlockQ8_0; - const SUPPORTS_I8MM: bool = true; // https://github.com/ggerganov/llama.cpp/blob/468ea24fb4633a0d681f7ac84089566c1c6190cb/ggml.c#L1525 fn to_float(xs: &[Self], ys: &mut [f32]) -> Result<()> { @@ -239,7 +238,6 @@ impl GgmlType for BlockQ4_1 { const DTYPE: GgmlDType = GgmlDType::Q4_1; const BLCK_SIZE: usize = QK4_1; type VecDotType = BlockQ8_1; - const SUPPORTS_I8MM: bool = false; fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { Self::vec_dot_unopt(n, xs, ys) @@ -335,7 +333,6 @@ impl GgmlType for BlockQ5_0 { const DTYPE: GgmlDType = GgmlDType::Q5_0; const BLCK_SIZE: usize = QK5_0; type VecDotType = BlockQ8_0; - const SUPPORTS_I8MM: bool = false; fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { let qk = Self::BLCK_SIZE; @@ -438,7 +435,6 @@ impl GgmlType for BlockQ5_1 { const DTYPE: GgmlDType = GgmlDType::Q5_1; const BLCK_SIZE: usize = QK5_1; type VecDotType = BlockQ8_1; - const SUPPORTS_I8MM: bool = false; fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { Self::vec_dot_unopt(n, xs, ys) @@ -547,7 +543,6 @@ impl GgmlType for BlockQ8_0 { const DTYPE: GgmlDType = GgmlDType::Q8_0; const BLCK_SIZE: usize = QK8_0; type VecDotType = BlockQ8_0; - const SUPPORTS_I8MM: bool = true; // https://github.com/ggerganov/llama.cpp/blob/468ea24fb4633a0d681f7ac84089566c1c6190cb/ggml.c#L1619 fn to_float(xs: &[Self], ys: &mut [f32]) -> Result<()> { @@ -638,7 +633,6 @@ impl GgmlType for BlockQ8_1 { const DTYPE: GgmlDType = GgmlDType::Q8_1; const BLCK_SIZE: usize = QK8_1; type VecDotType = BlockQ8_1; - const SUPPORTS_I8MM: bool = false; fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { Self::vec_dot_unopt(n, xs, ys) @@ -685,7 +679,6 @@ impl GgmlType for BlockQ2K { const DTYPE: GgmlDType = GgmlDType::Q2K; const BLCK_SIZE: usize = QK_K; type VecDotType = BlockQ8K; - const SUPPORTS_I8MM: bool = true; #[allow(unreachable_code)] fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { @@ -863,7 +856,6 @@ impl GgmlType for BlockQ3K { const DTYPE: GgmlDType = GgmlDType::Q3K; const BLCK_SIZE: usize = QK_K; type VecDotType = BlockQ8K; - const SUPPORTS_I8MM: bool = false; #[allow(unreachable_code)] fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { @@ -1142,7 +1134,6 @@ impl GgmlType for BlockQ4K { const DTYPE: GgmlDType = GgmlDType::Q4K; const BLCK_SIZE: usize = QK_K; type VecDotType = BlockQ8K; - const SUPPORTS_I8MM: bool = true; #[allow(unreachable_code)] fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { @@ -1337,7 +1328,6 @@ impl GgmlType for BlockQ5K { const DTYPE: GgmlDType = GgmlDType::Q5K; const BLCK_SIZE: usize = QK_K; type VecDotType = BlockQ8K; - const SUPPORTS_I8MM: bool = true; #[allow(unreachable_code)] fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { @@ -1559,7 +1549,6 @@ impl GgmlType for BlockQ6K { const DTYPE: GgmlDType = GgmlDType::Q6K; const BLCK_SIZE: usize = QK_K; type VecDotType = BlockQ8K; - const SUPPORTS_I8MM: bool = true; #[allow(unreachable_code)] fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { @@ -1744,7 +1733,6 @@ impl GgmlType for BlockQ8K { const DTYPE: GgmlDType = GgmlDType::Q8K; const BLCK_SIZE: usize = QK_K; type VecDotType = BlockQ8K; - const SUPPORTS_I8MM: bool = false; #[allow(unreachable_code)] fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { diff --git a/candle-core/src/quantized/quants.rs b/candle-core/src/quantized/quants.rs index ba065b0e5..163af5bef 100644 --- a/candle-core/src/quantized/quants.rs +++ b/candle-core/src/quantized/quants.rs @@ -7,7 +7,6 @@ pub trait GgmlType: Sized + Clone + Send + Sync { const DTYPE: GgmlDType; const BLCK_SIZE: usize; type VecDotType: GgmlType; - const SUPPORTS_I8MM: bool; // This is only safe for types that include immediate values such as float/int/... fn zeros() -> Self { @@ -28,7 +27,6 @@ impl GgmlType for f32 { const DTYPE: GgmlDType = GgmlDType::F32; const BLCK_SIZE: usize = 1; type VecDotType = f32; - const SUPPORTS_I8MM: bool = false; fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { Self::vec_dot_unopt(n, xs, ys) @@ -67,7 +65,6 @@ impl GgmlType for f16 { const DTYPE: GgmlDType = GgmlDType::F16; const BLCK_SIZE: usize = 1; type VecDotType = f16; - const SUPPORTS_I8MM: bool = false; fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result { Self::vec_dot_unopt(n, xs, ys)