Skip to content

Commit

Permalink
Remove SUPPORTS_I8MM
Browse files Browse the repository at this point in the history
  • Loading branch information
EricLBuehler committed Feb 24, 2025
1 parent 919dddc commit 72264fa
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 17 deletions.
2 changes: 0 additions & 2 deletions candle-core/src/quantized/iq_quants/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ impl GgmlType for BlockIQ4xs {
const DTYPE: GgmlDType = GgmlDType::Iq4Xs;
const BLCK_SIZE: usize = QK_K;
type VecDotType = BlockQ8K;
const SUPPORTS_I8MM: bool = false;

fn to_float(xs: &[Self], mut ys: &mut [f32]) -> Result<()> {
let k = ys.len();
Expand Down Expand Up @@ -179,7 +178,6 @@ impl GgmlType for BlockIQ4nl {
const DTYPE: GgmlDType = GgmlDType::Iq4Nl;
const BLCK_SIZE: usize = QK4_NL;
type VecDotType = BlockQ8_0;
const SUPPORTS_I8MM: bool = false;

fn to_float(xs: &[Self], mut ys: &mut [f32]) -> Result<()> {
let k = ys.len();
Expand Down
12 changes: 0 additions & 12 deletions candle-core/src/quantized/k_quants/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ impl GgmlType for BlockQ4_0 {
const DTYPE: GgmlDType = GgmlDType::Q4_0;
const BLCK_SIZE: usize = QK4_0;
type VecDotType = BlockQ8_0;
const SUPPORTS_I8MM: bool = true;

// https://github.com/ggerganov/llama.cpp/blob/468ea24fb4633a0d681f7ac84089566c1c6190cb/ggml.c#L1525
fn to_float(xs: &[Self], ys: &mut [f32]) -> Result<()> {
Expand Down Expand Up @@ -239,7 +238,6 @@ impl GgmlType for BlockQ4_1 {
const DTYPE: GgmlDType = GgmlDType::Q4_1;
const BLCK_SIZE: usize = QK4_1;
type VecDotType = BlockQ8_1;
const SUPPORTS_I8MM: bool = false;

fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
Self::vec_dot_unopt(n, xs, ys)
Expand Down Expand Up @@ -335,7 +333,6 @@ impl GgmlType for BlockQ5_0 {
const DTYPE: GgmlDType = GgmlDType::Q5_0;
const BLCK_SIZE: usize = QK5_0;
type VecDotType = BlockQ8_0;
const SUPPORTS_I8MM: bool = false;

fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
let qk = Self::BLCK_SIZE;
Expand Down Expand Up @@ -438,7 +435,6 @@ impl GgmlType for BlockQ5_1 {
const DTYPE: GgmlDType = GgmlDType::Q5_1;
const BLCK_SIZE: usize = QK5_1;
type VecDotType = BlockQ8_1;
const SUPPORTS_I8MM: bool = false;

fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
Self::vec_dot_unopt(n, xs, ys)
Expand Down Expand Up @@ -547,7 +543,6 @@ impl GgmlType for BlockQ8_0 {
const DTYPE: GgmlDType = GgmlDType::Q8_0;
const BLCK_SIZE: usize = QK8_0;
type VecDotType = BlockQ8_0;
const SUPPORTS_I8MM: bool = true;

// https://github.com/ggerganov/llama.cpp/blob/468ea24fb4633a0d681f7ac84089566c1c6190cb/ggml.c#L1619
fn to_float(xs: &[Self], ys: &mut [f32]) -> Result<()> {
Expand Down Expand Up @@ -638,7 +633,6 @@ impl GgmlType for BlockQ8_1 {
const DTYPE: GgmlDType = GgmlDType::Q8_1;
const BLCK_SIZE: usize = QK8_1;
type VecDotType = BlockQ8_1;
const SUPPORTS_I8MM: bool = false;

fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
Self::vec_dot_unopt(n, xs, ys)
Expand Down Expand Up @@ -685,7 +679,6 @@ impl GgmlType for BlockQ2K {
const DTYPE: GgmlDType = GgmlDType::Q2K;
const BLCK_SIZE: usize = QK_K;
type VecDotType = BlockQ8K;
const SUPPORTS_I8MM: bool = true;

#[allow(unreachable_code)]
fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
Expand Down Expand Up @@ -863,7 +856,6 @@ impl GgmlType for BlockQ3K {
const DTYPE: GgmlDType = GgmlDType::Q3K;
const BLCK_SIZE: usize = QK_K;
type VecDotType = BlockQ8K;
const SUPPORTS_I8MM: bool = false;

#[allow(unreachable_code)]
fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
Expand Down Expand Up @@ -1142,7 +1134,6 @@ impl GgmlType for BlockQ4K {
const DTYPE: GgmlDType = GgmlDType::Q4K;
const BLCK_SIZE: usize = QK_K;
type VecDotType = BlockQ8K;
const SUPPORTS_I8MM: bool = true;

#[allow(unreachable_code)]
fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
Expand Down Expand Up @@ -1337,7 +1328,6 @@ impl GgmlType for BlockQ5K {
const DTYPE: GgmlDType = GgmlDType::Q5K;
const BLCK_SIZE: usize = QK_K;
type VecDotType = BlockQ8K;
const SUPPORTS_I8MM: bool = true;

#[allow(unreachable_code)]
fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
Expand Down Expand Up @@ -1559,7 +1549,6 @@ impl GgmlType for BlockQ6K {
const DTYPE: GgmlDType = GgmlDType::Q6K;
const BLCK_SIZE: usize = QK_K;
type VecDotType = BlockQ8K;
const SUPPORTS_I8MM: bool = true;

#[allow(unreachable_code)]
fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
Expand Down Expand Up @@ -1744,7 +1733,6 @@ impl GgmlType for BlockQ8K {
const DTYPE: GgmlDType = GgmlDType::Q8K;
const BLCK_SIZE: usize = QK_K;
type VecDotType = BlockQ8K;
const SUPPORTS_I8MM: bool = false;

#[allow(unreachable_code)]
fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
Expand Down
3 changes: 0 additions & 3 deletions candle-core/src/quantized/quants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ pub trait GgmlType: Sized + Clone + Send + Sync {
const DTYPE: GgmlDType;
const BLCK_SIZE: usize;
type VecDotType: GgmlType;
const SUPPORTS_I8MM: bool;

// This is only safe for types that include immediate values such as float/int/...
fn zeros() -> Self {
Expand All @@ -28,7 +27,6 @@ impl GgmlType for f32 {
const DTYPE: GgmlDType = GgmlDType::F32;
const BLCK_SIZE: usize = 1;
type VecDotType = f32;
const SUPPORTS_I8MM: bool = false;

fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
Self::vec_dot_unopt(n, xs, ys)
Expand Down Expand Up @@ -67,7 +65,6 @@ impl GgmlType for f16 {
const DTYPE: GgmlDType = GgmlDType::F16;
const BLCK_SIZE: usize = 1;
type VecDotType = f16;
const SUPPORTS_I8MM: bool = false;

fn vec_dot(n: usize, xs: &[Self], ys: &[Self::VecDotType]) -> Result<f32> {
Self::vec_dot_unopt(n, xs, ys)
Expand Down

0 comments on commit 72264fa

Please sign in to comment.