Skip to content

Commit

Permalink
Merge branch 'main' into ivarflakstad/metal-prng
Browse files Browse the repository at this point in the history
  • Loading branch information
ivarflakstad committed Jan 12, 2024
2 parents 87efb5d + 4191518 commit e63bb86
Show file tree
Hide file tree
Showing 53 changed files with 1,035 additions and 1,051 deletions.
7 changes: 7 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: 2
updates:
- package-ecosystem: "cargo"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 5
14 changes: 11 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,17 @@ license = "MIT OR Apache-2.0"
accelerate-src = { version = "0.3.2" }
anyhow = { version = "1", features = ["backtrace"] }
byteorder = "1.4.3"
candle = { path = "./candle-core", package = "candle-core" }
candle-datasets = { path = "./candle-datasets" }
candle-flash-attn = { path = "./candle-flash-attn" }
candle-kernels = { path = "./candle-kernels" }
candle-metal-kernels = { path = "./candle-metal-kernels" }
candle-nn = { path = "./candle-nn" }
candle-onnx = { path = "./candle-onnx" }
candle-transformers = { path = "./candle-transformers" }
clap = { version = "4.2.4", features = ["derive"] }
criterion = { version = "0.5.1", default-features=false }
cudarc = { version = "0.9.14", features = ["f16"] }
cudarc = { version = "0.10.0", features = ["f16"] }
gemm = { version = "0.16.6", features = ["wasm-simd128-enable"] }
hf-hub = "0.3.0"
half = { version = "2.3.1", features = ["num-traits", "use-intrinsics", "rand_distr"] }
Expand All @@ -42,7 +50,7 @@ imageproc = { version = "0.23.0", default-features = false }
intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] }
libc = { version = "0.2.147" }
log = "0.4"
memmap2 = { version = "0.7.1", features = ["stable_deref_trait"] }
memmap2 = { version = "0.9.3", features = ["stable_deref_trait"] }
num_cpus = "1.15.0"
num-traits = "0.2.15"
parquet = { version = "45.0.0" }
Expand All @@ -55,7 +63,7 @@ serde = { version = "1.0.171", features = ["derive"] }
serde_plain = "1.0.2"
serde_json = "1.0.99"
thiserror = "1"
tokenizers = { version = "0.13.4", default-features = false }
tokenizers = { version = "0.15.0", default-features = false }
tracing = "0.1.37"
tracing-chrome = "0.7.1"
tracing-subscriber = "0.3.7"
Expand Down
10 changes: 5 additions & 5 deletions candle-book/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ readme = "README.md"

[dependencies]
accelerate-src = { workspace = true, optional = true }
candle = { path = "../candle-core", version = "0.3.3", package = "candle-core" }
candle-datasets = { path = "../candle-datasets", version = "0.3.3" }
candle-nn = { path = "../candle-nn", version = "0.3.3" }
candle-transformers = { path = "../candle-transformers", version = "0.3.3" }
candle-flash-attn = { path = "../candle-flash-attn", version = "0.3.3", optional = true }
candle = { workspace = true }
candle-datasets = { workspace = true }
candle-nn = { workspace = true }
candle-transformers = { workspace = true }
candle-flash-attn = { workspace = true, optional = true }
safetensors = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
Expand Down
4 changes: 2 additions & 2 deletions candle-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ readme = "README.md"
[dependencies]
accelerate-src = { workspace = true, optional = true }
byteorder = { workspace = true }
candle-kernels = { path = "../candle-kernels", version = "0.3.3", optional = true }
candle-metal-kernels = { path = "../candle-metal-kernels", version = "0.3.3", optional = true }
candle-kernels = { workspace = true, optional = true }
candle-metal-kernels = { workspace = true, optional = true }
metal = { workspace = true, optional = true}
cudarc = { workspace = true, optional = true }
gemm = { workspace = true }
Expand Down
20 changes: 13 additions & 7 deletions candle-core/benches/benchmarks/matmul.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
use crate::benchmarks::{bench_name, device, BenchDevice};
use candle_core::{DType, Tensor};
use crate::benchmarks::{BenchDevice, BenchDeviceHandler};
use candle_core::{DType, Device, Tensor};
use criterion::{black_box, criterion_group, Criterion, Throughput};
use std::time::Instant;

fn run(a: &Tensor, b: &Tensor) {
a.matmul(&b.t().unwrap()).unwrap();
}

fn criterion_benchmark(c: &mut Criterion) {
fn run_bench(c: &mut Criterion, device: &Device) {
let b = 1;
let m = 1;
let n = 2048;
let k = 2048;

let device = device().unwrap();
let dtype = DType::F32;
let lhs = Tensor::zeros((b, m, k), dtype, &device).unwrap();
let rhs = Tensor::zeros((b, n, k), dtype, &device).unwrap();
let lhs = Tensor::zeros((b, m, k), dtype, device).unwrap();
let rhs = Tensor::zeros((b, n, k), dtype, device).unwrap();

let flops = b * m * n * k;

let mut group = c.benchmark_group(bench_name("matmul"));
let mut group = c.benchmark_group(device.bench_name("matmul"));
group.throughput(Throughput::Bytes(flops as u64));
group.bench_function("iter", move |b| {
b.iter_custom(|iters| {
Expand All @@ -35,4 +34,11 @@ fn criterion_benchmark(c: &mut Criterion) {
group.finish();
}

fn criterion_benchmark(c: &mut Criterion) {
let handler = BenchDeviceHandler::new().unwrap();
for device in handler.devices {
run_bench(c, &device);
}
}

criterion_group!(benches, criterion_benchmark);
50 changes: 29 additions & 21 deletions candle-core/benches/benchmarks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use candle_core::{Device, Result};

pub(crate) trait BenchDevice {
fn sync(&self) -> Result<()>;

fn bench_name<S: Into<String>>(&self, name: S) -> String;
}

impl BenchDevice for Device {
Expand All @@ -25,32 +27,38 @@ impl BenchDevice for Device {
}
}
}
}

pub(crate) fn device() -> Result<Device> {
if cfg!(feature = "metal") {
Device::new_metal(0)
} else if cfg!(feature = "cuda") {
Device::new_cuda(0)
} else {
Ok(Device::Cpu)
fn bench_name<S: Into<String>>(&self, name: S) -> String {
match self {
Device::Cpu => {
let cpu_type = if cfg!(feature = "accelerate") {
"accelerate"
} else if cfg!(feature = "mkl") {
"mkl"
} else {
"cpu"
};
format!("{}_{}", cpu_type, name.into())
}
Device::Cuda(_) => format!("cuda_{}", name.into()),
Device::Metal(_) => format!("metal_{}", name.into()),
}
}
}

pub(crate) fn bench_name<S: Into<String>>(name: S) -> String {
format!("{}_{}", device_variant(), name.into())
struct BenchDeviceHandler {
devices: Vec<Device>,
}

const fn device_variant() -> &'static str {
if cfg!(feature = "metal") {
"metal"
} else if cfg!(feature = "cuda") {
"cuda"
} else if cfg!(feature = "accelerate") {
"accelerate"
} else if cfg!(feature = "mkl") {
"mkl"
} else {
"cpu"
impl BenchDeviceHandler {
pub fn new() -> Result<Self> {
let mut devices = Vec::new();
if cfg!(feature = "metal") {
devices.push(Device::new_metal(0)?);
} else if cfg!(feature = "cuda") {
devices.push(Device::new_cuda(0)?);
}
devices.push(Device::Cpu);
Ok(Self { devices })
}
}
25 changes: 15 additions & 10 deletions candle-core/benches/benchmarks/random.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::benchmarks::{bench_name, device, BenchDevice};
use crate::benchmarks::{BenchDevice, BenchDeviceHandler};
use candle_core::{DType, Device, Tensor};
use criterion::{black_box, criterion_group, Criterion, Throughput};
use std::time::Instant;
Expand All @@ -11,48 +11,53 @@ fn rand_normal(a: &Tensor) {
a.randn_like(100.0, 15.0).unwrap();
}

fn criterion_benchmark(c: &mut Criterion) {
fn run_random_bench(c: &mut Criterion, device: &Device) {
let b = 1;

let rows = 2048;
let cols = 2048;

let d = device().unwrap();
let dtype = DType::F32;
let tensor = Tensor::zeros((b, rows, cols), dtype, &d).unwrap();
let tensor = Tensor::zeros((b, rows, cols), dtype, device).unwrap();

let flops = b * rows * cols * dtype.size_in_bytes();

let mut group = c.benchmark_group(bench_name("random_uniform"));
let mut group = c.benchmark_group(device.bench_name("random_uniform"));
group.throughput(Throughput::Bytes(flops as u64));
group.bench_function("iter", move |benches| {
benches.iter_custom(|iters| {
let start = Instant::now();
for _i in 0..iters {
rand_uniform(black_box(&tensor));
}
d.sync().unwrap();
device.sync().unwrap();
start.elapsed()
})
});
group.finish();

let d = device().unwrap();
let tensor = Tensor::zeros((b, rows, cols), dtype, &d).unwrap();
let tensor = Tensor::zeros((b, rows, cols), dtype, device).unwrap();

let mut group = c.benchmark_group(bench_name("random_normal"));
let mut group = c.benchmark_group(device.bench_name("random_normal"));
group.throughput(Throughput::Bytes(flops as u64));
group.bench_function("iter", move |benches| {
benches.iter_custom(|iters| {
let start = Instant::now();
for _i in 0..iters {
rand_normal(black_box(&tensor));
}
d.sync().unwrap();
device.sync().unwrap();
start.elapsed()
})
});
group.finish();
}

fn criterion_benchmark(c: &mut Criterion) {
let handler = BenchDeviceHandler::new().unwrap();
for device in handler.devices {
run_random_bench(c, &device);
}
}

criterion_group!(benches, criterion_benchmark);
25 changes: 24 additions & 1 deletion candle-core/examples/tensor-tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ enum Command {
},

Quantize {
/// The input file, in gguf format.
/// The input file(s), in safetensors format.
in_file: Vec<std::path::PathBuf>,

/// The output file, in gguf format.
Expand All @@ -117,6 +117,15 @@ enum Command {
#[arg(long, value_enum, default_value_t = QuantizationMode::Llama)]
mode: QuantizationMode,
},

Dequantize {
/// The input file, in gguf format.
in_file: std::path::PathBuf,

/// The output file, in safetensors format.
#[arg(long)]
out_file: std::path::PathBuf,
},
}

#[derive(Parser, Debug, Clone)]
Expand Down Expand Up @@ -285,6 +294,19 @@ fn run_quantize_safetensors(
Ok(())
}

fn run_dequantize(in_file: std::path::PathBuf, out_file: std::path::PathBuf) -> Result<()> {
let mut in_file = std::fs::File::open(in_file)?;
let content = gguf_file::Content::read(&mut in_file)?;
let mut tensors = std::collections::HashMap::new();
for (tensor_name, _) in content.tensor_infos.iter() {
let tensor = content.tensor(&mut in_file, tensor_name)?;
let tensor = tensor.dequantize(&Device::Cpu)?;
tensors.insert(tensor_name.to_string(), tensor);
}
candle_core::safetensors::save(&tensors, out_file)?;
Ok(())
}

fn run_quantize(
in_files: &[std::path::PathBuf],
out_file: std::path::PathBuf,
Expand Down Expand Up @@ -379,6 +401,7 @@ fn main() -> anyhow::Result<()> {
quantization,
mode,
} => run_quantize(&in_file, out_file, quantization, mode)?,
Command::Dequantize { in_file, out_file } => run_dequantize(in_file, out_file)?,
}
Ok(())
}
Loading

0 comments on commit e63bb86

Please sign in to comment.