From 277d4d1579f4a1ae861ab5725c5cf3262af1cb38 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 16:10:55 +0100 Subject: [PATCH 01/25] add booth encoding file --- cryptography/bls12_381/src/booth_encoding.rs | 90 ++++++++++++++++++++ cryptography/bls12_381/src/lib.rs | 1 + 2 files changed, 91 insertions(+) create mode 100644 cryptography/bls12_381/src/booth_encoding.rs diff --git a/cryptography/bls12_381/src/booth_encoding.rs b/cryptography/bls12_381/src/booth_encoding.rs new file mode 100644 index 00000000..ef5df325 --- /dev/null +++ b/cryptography/bls12_381/src/booth_encoding.rs @@ -0,0 +1,90 @@ +use std::ops::Neg; + +use blstrs::{G1Projective, Scalar}; +use ff::PrimeField; + +use crate::G1Point; +// TODO: Link to halo2 file + docs + comments +pub fn get_booth_index(window_index: usize, window_size: usize, el: &[u8]) -> i32 { + // Booth encoding: + // * step by `window` size + // * slice by size of `window + 1`` + // * each window overlap by 1 bit + // * append a zero bit to the least significant end + // Indexing rule for example window size 3 where we slice by 4 bits: + // `[0, +1, +1, +2, +2, +3, +3, +4, -4, -3, -3 -2, -2, -1, -1, 0]`` + // So we can reduce the bucket size without preprocessing scalars + // and remembering them as in classic signed digit encoding + + let skip_bits = (window_index * window_size).saturating_sub(1); + let skip_bytes = skip_bits / 8; + + // fill into a u32 + let mut v: [u8; 4] = [0; 4]; + for (dst, src) in v.iter_mut().zip(el.iter().skip(skip_bytes)) { + *dst = *src + } + let mut tmp = u32::from_le_bytes(v); + + // pad with one 0 if slicing the least significant window + if window_index == 0 { + tmp <<= 1; + } + + // remove further bits + tmp >>= skip_bits - (skip_bytes * 8); + // apply the booth window + tmp &= (1 << (window_size + 1)) - 1; + + let sign = tmp & (1 << window_size) == 0; + + // div ceil by 2 + tmp = (tmp + 1) >> 1; + + // find the booth action index + if sign { + tmp as i32 + } else { + ((!(tmp - 1) & ((1 << window_size) - 1)) as i32).neg() + } +} + +#[test] +fn smoke_scalar_mul() { + use group::prime::PrimeCurveAffine; + let gen = G1Point::generator(); + let s = -Scalar::ONE; + + let res = gen * s; + + let got = mul(&s, &gen, 4); + + assert_eq!(G1Point::from(res), got) +} + +fn mul(scalar: &Scalar, point: &G1Point, window: usize) -> G1Point { + let u = scalar.to_bytes_le(); + let n = Scalar::NUM_BITS as usize / window + 1; + + let table = (0..=1 << (window - 1)) + .map(|i| point * Scalar::from(i as u64)) + .collect::>(); + + let mut acc: G1Projective = G1Point::default().into(); + for i in (0..n).rev() { + for _ in 0..window { + acc = acc + acc; + } + + let idx = get_booth_index(i as usize, window, u.as_ref()); + + if idx.is_negative() { + acc += table[idx.unsigned_abs() as usize].neg(); + } + if idx.is_positive() { + acc += table[idx.unsigned_abs() as usize]; + } + } + + acc.into() +} diff --git a/cryptography/bls12_381/src/lib.rs b/cryptography/bls12_381/src/lib.rs index 2d9095ff..e21a5a26 100644 --- a/cryptography/bls12_381/src/lib.rs +++ b/cryptography/bls12_381/src/lib.rs @@ -1,4 +1,5 @@ pub mod batch_inversion; +mod booth_encoding; pub mod fixed_base_msm; pub mod lincomb; From 5806a861cd62fd46ec731637852d080a2a3e39b8 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 16:12:18 +0100 Subject: [PATCH 02/25] temp: fix --- cryptography/bls12_381/src/booth_encoding.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cryptography/bls12_381/src/booth_encoding.rs b/cryptography/bls12_381/src/booth_encoding.rs index ef5df325..51a90f70 100644 --- a/cryptography/bls12_381/src/booth_encoding.rs +++ b/cryptography/bls12_381/src/booth_encoding.rs @@ -1,7 +1,7 @@ use std::ops::Neg; use blstrs::{G1Projective, Scalar}; -use ff::PrimeField; +use ff::{Field, PrimeField}; use crate::G1Point; // TODO: Link to halo2 file + docs + comments From 231b79d30d8a42c3a17a02afabfbffd04f28c003 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 16:14:47 +0100 Subject: [PATCH 03/25] add batch_add module --- cryptography/bls12_381/src/batch_add.rs | 243 ++++++++++++++++++++++++ cryptography/bls12_381/src/lib.rs | 1 + 2 files changed, 244 insertions(+) create mode 100644 cryptography/bls12_381/src/batch_add.rs diff --git a/cryptography/bls12_381/src/batch_add.rs b/cryptography/bls12_381/src/batch_add.rs new file mode 100644 index 00000000..f79be827 --- /dev/null +++ b/cryptography/bls12_381/src/batch_add.rs @@ -0,0 +1,243 @@ +use crate::batch_inversion::batch_inverse; +use blstrs::{Fp, G1Affine}; + +/// Adds multiple points together in affine representation, batching the inversions +pub fn batch_addition(mut points: Vec) -> G1Affine { + #[inline(always)] + fn point_add(p1: G1Affine, p2: G1Affine, inv: &blstrs::Fp) -> G1Affine { + use ff::Field; + + let lambda = (p2.y() - p1.y()) * inv; + let x = lambda.square() - p1.x() - p2.x(); + let y = lambda * (p1.x() - x) - p1.y(); + G1Affine::from_raw_unchecked(x, y, false) + } + + if points.is_empty() { + use group::prime::PrimeCurveAffine; + return G1Affine::identity(); + } + + let mut stride = 1; + + let mut new_differences = Vec::with_capacity(points.len()); + + while stride < points.len() { + new_differences.clear(); + + for i in (0..points.len()).step_by(stride * 2) { + if i + stride < points.len() { + new_differences.push(points[i + stride].x() - points[i].x()); + } + } + batch_inverse(&mut new_differences); + for (i, inv) in new_differences.iter().enumerate() { + let p1 = points[i * stride * 2]; + let p2 = points[i * stride * 2 + stride]; + points[i * stride * 2] = point_add(p1, p2, inv); + } + + stride *= 2; + } + + points[0] +} +// This method assumes that adjacent points are not the same +// This will lead to an inversion by zero +pub fn batch_addition_mut(points: &mut [G1Affine]) -> G1Affine { + fn point_add(p1: G1Affine, p2: G1Affine, inv: &blstrs::Fp) -> G1Affine { + use ff::Field; + + let lambda = (p2.y() - p1.y()) * inv; + let x = lambda.square() - p1.x() - p2.x(); + let y = lambda * (p1.x() - x) - p1.y(); + G1Affine::from_raw_unchecked(x, y, false) + } + + if points.is_empty() { + use group::prime::PrimeCurveAffine; + return G1Affine::identity(); + } + + let mut stride = 1; + + let mut new_differences = Vec::with_capacity(points.len()); + while stride < points.len() { + new_differences.clear(); + + for i in (0..points.len()).step_by(stride * 2) { + if i + stride < points.len() { + new_differences.push(points[i + stride].x() - points[i].x()); + } + } + batch_inverse(&mut new_differences); + for (i, inv) in new_differences.iter().enumerate() { + let p1 = points[i * stride * 2]; + let p2 = points[i * stride * 2 + stride]; + points[i * stride * 2] = point_add(p1, p2, inv); + } + + stride *= 2; + } + + points[0] +} + +pub fn naive_multi_batch_addition(mut multi_points: Vec>) -> Vec { + // TODO: Try using a scratch pad here + multi_points + .into_iter() + .map(|points| batch_addition(points)) + .collect() +} + +// Similar to batch addition, however we amortize across different batches +// TODO: Clean up -- This has a greater complexity than the regular algorithm +// TODO so we want to check if it makes a difference in our usecase. +pub fn multi_batch_addition(mut multi_points: Vec>) -> Vec { + #[inline(always)] + fn point_add_double(p1: G1Affine, p2: G1Affine, inv: &blstrs::Fp) -> G1Affine { + use ff::Field; + + let lambda = if p1 == p2 { + p1.x().square().mul3() * inv + } else { + (p2.y() - p1.y()) * inv + }; + + let x = lambda.square() - p1.x() - p2.x(); + let y = lambda * (p1.x() - x) - p1.y(); + G1Affine::from_raw_unchecked(x, y, false) + } + #[inline(always)] + // Note: We do not handle the case where p1 == -p2 + fn choose_add_or_double(p1: G1Affine, p2: G1Affine) -> Fp { + use ff::Field; + + if p1 == p2 { + p2.y().double() + } else { + p1.x() - p2.x() + } + } + let total_num_points: usize = multi_points.iter().map(|p| p.len()).sum(); + // let mut scratchpad = Vec::with_capacity(total_num_points); + + // Find the largest buckets, this will be the bottleneck for the number of iterations + let mut max_bucket_length = 0; + for i in 0..multi_points.len() { + max_bucket_length = std::cmp::max(max_bucket_length, multi_points[i].len()); + } + + let mut new_differences = Vec::with_capacity(max_bucket_length); + // (a,b) ; a is the length before adding points and b is the length after adding points. so a range + let mut collected_points = vec![(0, 0); multi_points.len()]; // We want to know how many points each bucket has accumulated + let mut multi_strides = vec![1; multi_points.len()]; // We want to know the stride for each bucket + let mut bucket_complete = vec![false; multi_points.len()]; // We want to know if a bucket is complete + // Iterate over each bucket + let max_num_iterations = max_bucket_length.next_power_of_two().ilog2(); + for _ in 0..max_num_iterations { + new_differences.clear(); + // Iterate over each bucket + for i in 0..multi_points.len() { + if bucket_complete[i] { + continue; + } + let points = &multi_points[i]; + let stride = multi_strides[i]; + let old_diff_len = new_differences.len(); + + // Skip the bucket if the stride is too long, + // This happens if the buckets are not evenly distributed + // in terms of points. + if stride < points.len() { + for k in (0..points.len()).step_by(stride * 2) { + if k + stride < points.len() { + new_differences.push(choose_add_or_double(points[k + stride], points[k])); + // new_differences.push(points[k + stride].x() - points[k].x()); + } + } + + // Save the number of points going into this bucket for the batch inversion + collected_points[i] = (old_diff_len, new_differences.len()); + } else { + collected_points[i] = (old_diff_len, new_differences.len()); + bucket_complete[i] = true; + } + } + + // We have iterated over each bucket, so now we need to do a batch inversion + batch_inverse(&mut new_differences); + // Now we update each bucket using the batch inversion we have computed and the collected points + for i in 0..multi_points.len() { + if bucket_complete[i] { + continue; + } + let points = &mut multi_points[i]; + let stride = multi_strides[i]; + let (start, end) = collected_points[i]; + for (k, new_difference_offset) in (start..end).enumerate() { + let inv = &new_differences[new_difference_offset]; + let p1 = points[k * stride * 2]; + let p2 = points[k * stride * 2 + stride]; + points[k * stride * 2] = point_add_double(p1, p2, inv); + } + + // Update the stride for this bucket + multi_strides[i] *= 2; + } + } + + // multi_points.into_iter().map(|points| points[0]).collect() + // TODO: using this incase we have empty vectors passed in + multi_points + .into_iter() + .map(|points| points.get(0).copied().unwrap_or(G1Affine::default())) + .collect() +} + +#[cfg(test)] +mod tests { + + use super::{batch_addition, multi_batch_addition}; + use blstrs::{G1Affine, G1Projective}; + use group::Group; + + #[test] + fn test_batch_addition() { + let num_points = 100; + let points: Vec = (0..num_points) + .map(|_| G1Projective::random(&mut rand::thread_rng()).into()) + .collect(); + + let expected_result: G1Affine = points + .iter() + .fold(G1Projective::identity(), |acc, p| acc + p) + .into(); + + let got_result = batch_addition(points.clone()); + assert_eq!(expected_result, got_result); + } + + #[test] + fn test_multi_batch_addition() { + let num_points = 100; + let num_sets = 5; + let random_sets_of_points: Vec> = (0..num_sets) + .map(|_| { + (0..num_points) + .map(|_| G1Projective::random(&mut rand::thread_rng()).into()) + .collect() + }) + .collect(); + let random_sets_of_points_clone = random_sets_of_points.clone(); + + let expected_results: Vec<_> = random_sets_of_points + .into_iter() + .map(|points| batch_addition(points)) + .collect(); + + let got_results = multi_batch_addition(random_sets_of_points_clone); + assert_eq!(got_results, expected_results); + } +} diff --git a/cryptography/bls12_381/src/lib.rs b/cryptography/bls12_381/src/lib.rs index e21a5a26..0214b9b6 100644 --- a/cryptography/bls12_381/src/lib.rs +++ b/cryptography/bls12_381/src/lib.rs @@ -1,3 +1,4 @@ +mod batch_add; pub mod batch_inversion; mod booth_encoding; pub mod fixed_base_msm; From 296cf21e1ea059c57ab848d57ca52f6d8be1e2c3 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 16:32:21 +0100 Subject: [PATCH 04/25] put booth testing code under cfg --- cryptography/bls12_381/src/booth_encoding.rs | 70 +++++++++++--------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/cryptography/bls12_381/src/booth_encoding.rs b/cryptography/bls12_381/src/booth_encoding.rs index 51a90f70..67f4e12c 100644 --- a/cryptography/bls12_381/src/booth_encoding.rs +++ b/cryptography/bls12_381/src/booth_encoding.rs @@ -1,9 +1,5 @@ use std::ops::Neg; -use blstrs::{G1Projective, Scalar}; -use ff::{Field, PrimeField}; - -use crate::G1Point; // TODO: Link to halo2 file + docs + comments pub fn get_booth_index(window_index: usize, window_size: usize, el: &[u8]) -> i32 { // Booth encoding: @@ -49,42 +45,52 @@ pub fn get_booth_index(window_index: usize, window_size: usize, el: &[u8]) -> i3 } } -#[test] -fn smoke_scalar_mul() { - use group::prime::PrimeCurveAffine; - let gen = G1Point::generator(); - let s = -Scalar::ONE; +#[cfg(test)] +mod tests { + use std::ops::Neg; - let res = gen * s; + use super::get_booth_index; + use crate::G1Point; + use blstrs::{G1Projective, Scalar}; + use ff::{Field, PrimeField}; - let got = mul(&s, &gen, 4); + #[test] + fn smoke_scalar_mul() { + use group::prime::PrimeCurveAffine; + let gen = G1Point::generator(); + let s = -Scalar::ONE; - assert_eq!(G1Point::from(res), got) -} + let res = gen * s; -fn mul(scalar: &Scalar, point: &G1Point, window: usize) -> G1Point { - let u = scalar.to_bytes_le(); - let n = Scalar::NUM_BITS as usize / window + 1; + let got = mul(&s, &gen, 4); - let table = (0..=1 << (window - 1)) - .map(|i| point * Scalar::from(i as u64)) - .collect::>(); + assert_eq!(G1Point::from(res), got) + } - let mut acc: G1Projective = G1Point::default().into(); - for i in (0..n).rev() { - for _ in 0..window { - acc = acc + acc; - } + fn mul(scalar: &Scalar, point: &G1Point, window: usize) -> G1Point { + let u = scalar.to_bytes_le(); + let n = Scalar::NUM_BITS as usize / window + 1; - let idx = get_booth_index(i as usize, window, u.as_ref()); + let table = (0..=1 << (window - 1)) + .map(|i| point * Scalar::from(i as u64)) + .collect::>(); - if idx.is_negative() { - acc += table[idx.unsigned_abs() as usize].neg(); - } - if idx.is_positive() { - acc += table[idx.unsigned_abs() as usize]; + let mut acc: G1Projective = G1Point::default().into(); + for i in (0..n).rev() { + for _ in 0..window { + acc = acc + acc; + } + + let idx = get_booth_index(i as usize, window, u.as_ref()); + + if idx.is_negative() { + acc += table[idx.unsigned_abs() as usize].neg(); + } + if idx.is_positive() { + acc += table[idx.unsigned_abs() as usize]; + } } - } - acc.into() + acc.into() + } } From d72fd3e5a5f2b08ba18a315470bded5568ccd809 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 16:32:38 +0100 Subject: [PATCH 05/25] initial msm code --- cryptography/bls12_381/src/lib.rs | 2 +- cryptography/bls12_381/src/msm.rs | 445 ++++++++++++++++++++++++++++++ 2 files changed, 446 insertions(+), 1 deletion(-) create mode 100644 cryptography/bls12_381/src/msm.rs diff --git a/cryptography/bls12_381/src/lib.rs b/cryptography/bls12_381/src/lib.rs index 0214b9b6..aa33901b 100644 --- a/cryptography/bls12_381/src/lib.rs +++ b/cryptography/bls12_381/src/lib.rs @@ -3,7 +3,7 @@ pub mod batch_inversion; mod booth_encoding; pub mod fixed_base_msm; pub mod lincomb; - +mod msm; // Re-exporting the blstrs crate // Re-export ff and group, so other crates do not need to directly import(and independently version) them diff --git a/cryptography/bls12_381/src/msm.rs b/cryptography/bls12_381/src/msm.rs new file mode 100644 index 00000000..7f30422e --- /dev/null +++ b/cryptography/bls12_381/src/msm.rs @@ -0,0 +1,445 @@ +use blstrs::G1Affine; +use blstrs::G1Projective; +use blstrs::Scalar; +use ff::PrimeField; +use group::Group; + +use crate::booth_encoding::get_booth_index; +use crate::g1_batch_normalize; +use crate::G1Point; + +#[derive(Debug, Clone, Copy)] +pub struct Info { + base_idx: u64, + bucket_idx: u64, + // We use precomputations which removes the window_idx + // window_idx: u64, + sign: bool, +} + +pub fn precompute( + window_size: usize, + number_of_windows: usize, + points: &[G1Point], +) -> Vec { + // For each point, we compute number_of_windows-1 points + let mut results = Vec::new(); + for point in points { + // First add the original point + results.push(point.into()); + + // Then scale each successive point by 2^window_size + for _ in 0..number_of_windows - 1 { + let mut last_point_scaled_window_size: G1Projective = *results.last().unwrap(); + for _ in 0..window_size { + last_point_scaled_window_size = last_point_scaled_window_size.double() + } + results.push(last_point_scaled_window_size) + } + } + g1_batch_normalize(&results) +} + +// Note: This does not work if the input points are [P, -P] for example +// We could iterate for that case, but its unlikely given the points are random +pub fn msm_best2( + coeffs: &[Scalar], + bases_precomputed: &[G1Point], + window_size: usize, +) -> G1Projective { + // assert_eq!(coeffs.len(), bases.len()); + + let c = window_size; + + // coeffs to byte representation + let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_bytes_le()).collect(); + + // Information on the points we want to add + let mut all_information = vec![vec![]; 1 << (c - 1)]; + + // number of windows + let number_of_windows = Scalar::NUM_BITS as usize / c + 1; + + for window_idx in 0..number_of_windows { + for (base_idx, coeff) in coeffs.iter().enumerate() { + let buck_idx = get_booth_index(window_idx, c, coeff.as_ref()); + + if buck_idx != 0 { + // parse bucket index + let sign = buck_idx.is_positive(); + let buck_idx = buck_idx.unsigned_abs() as usize - 1; + // + // Since we are using precomputed points, the base_idx is augmented + // + // We need to modify the base index to take into account: + // - The window, so we fetch the precomputed base for that window + // - The position of the point in the precomputed bases, + // relative to the original bases vector + // + // If you imagine we had: + // [P1, P2, P3] + // precomp = [P1, c*P1,..., (num_window-1)*c*P1, P2,...] + // + // The index of P1, P2, etc can be computed by: + // augmented_base_idx = base_idx * num_windows + // Then in order to get the correct point, we do: + // augmented_base_idx += window_idx + let base_idx = (base_idx * number_of_windows) + window_idx; + + let info = Info { + bucket_idx: buck_idx as u64, + sign, + base_idx: base_idx as u64, + }; + + all_information[buck_idx].push(info); + } + } + } + + // All of the above costs about 200 microseconds on 64 points. + // Using a vector is about 3 times faster, but the points are not ordered by bucket index + // so we could try and do a second pass on the vector to see if thats quicker for small numPoints + // + // Note: for duplicate points, we could either put them in the running sum + // or use the optimized formulas + let mut all_points = Vec::new(); + let mut bucket_indices = Vec::new(); + for (bucket_idx, points) in all_information.into_iter().enumerate() { + if points.is_empty() { + continue; + } + + // batch add each bucket + let res: Vec<_> = points + .into_iter() + .map(|point_info| { + let mut p = bases_precomputed[point_info.base_idx as usize]; + if !point_info.sign { + p = -p; + } + p + }) + .collect(); + // TODO: We should make sure that we cannot get two points being added together or + // TODO: have the formula deal with it + all_points.push(res); + bucket_indices.push((bucket_idx + 1) as u64); // Add one here since the zeroth bucket will bucket_1, bucket_K eventually translates to K * sum_of_bucket + } + + let buckets_added = crate::batch_add::multi_batch_addition(all_points); + + subsum_accumulation(&bucket_indices, &buckets_added) + // Now we have all of the information needed + // The precomputations that we did, effectively allowed us + // to remove the notion of a "window" -- there is only + // one window, effectively. + // + // Note: For 64 points, this is about 3200 elements. + // + // Do some more preprocessing to reduce the work needed +} + +pub fn msm_best2_noinfo( + coeffs: &[Scalar], + bases_precomputed: &[G1Point], + window_size: usize, +) -> G1Projective { + // assert_eq!(coeffs.len(), bases.len()); + + let c = window_size; + + // coeffs to byte representation + let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_bytes_le()).collect(); + + // Information on the points we want to add + let mut all_information = vec![vec![]; 1 << (c - 1)]; + + // number of windows + let number_of_windows = Scalar::NUM_BITS as usize / c + 1; + + for window_idx in 0..number_of_windows { + for (base_idx, coeff) in coeffs.iter().enumerate() { + let buck_idx = get_booth_index(window_idx, c, coeff.as_ref()); + + if buck_idx != 0 { + // parse bucket index + let sign = buck_idx.is_positive(); + let buck_idx = buck_idx.unsigned_abs() as usize - 1; + // + // Since we are using precomputed points, the base_idx is augmented + // + // We need to modify the base index to take into account: + // - The window, so we fetch the precomputed base for that window + // - The position of the point in the precomputed bases, + // relative to the original bases vector + // + // If you imagine we had: + // [P1, P2, P3] + // precomp = [P1, c*P1,..., (num_window-1)*c*P1, P2,...] + // + // The index of P1, P2, etc can be computed by: + // augmented_base_idx = base_idx * num_windows + // Then in order to get the correct point, we do: + // augmented_base_idx += window_idx + let base_idx = (base_idx * number_of_windows) + window_idx; + + let point = if sign { + bases_precomputed[base_idx as usize] + } else { + -bases_precomputed[base_idx as usize] + }; + + all_information[buck_idx].push(point); + } + } + } + + // All of the above costs about 200 microseconds on 64 points. + // Using a vector is about 3 times faster, but the points are not ordered by bucket index + // so we could try and do a second pass on the vector to see if thats quicker for small numPoints + // + // Note: for duplicate points, we could either put them in the running sum + // or use the optimized formulas + // let mut all_points = Vec::new(); + // let mut bucket_indices = Vec::new(); + // TODO: This should return the points too, ie skip the empty buckets + let bucket_indices: Vec<_> = all_information + .iter() + .enumerate() + .filter(|(_, points)| !points.is_empty()) + .map(|(index, _)| (index + 1) as u64) + .collect(); + + let buckets_added = crate::batch_add::multi_batch_addition(all_information); + + subsum_accumulation(&bucket_indices, &buckets_added) +} + +// Algorithm1 from the LFG paper +// TODO: Fix later, this algorithm is broken in the POC and the paper +// fn subsum_accumulation(b: &[u64], s: &[G1Affine]) -> G1Projective { +// assert_eq!(b.len(), s.len(), "Input arrays must have the same length"); +// let d = *b.iter().max().unwrap() as usize; + +// // Define a length-(d + 1) array tmp = [0] × (d + 1) +// let mut tmp_d = vec![G1Projective::identity(); d + 1]; +// let mut tmp = G1Projective::identity(); + +// // Iterate from |B| to 1 by -1 +// for i in (1..b.len()).rev() { +// // tmp[0] = tmp[0] + S_i +// tmp += s[i]; + +// // k = b_i - b_{i-1} +// let k = (b[i] - b[i - 1]) as usize; + +// // if k >= 1 then tmp[k] = tmp[k] + tmp[0] +// // if k >= 1 { +// // let t0 = tmp_d[0]; +// // tmp_d[k] += t0; +// // } +// tmp_d[k] += tmp; +// } + +// // The original paper has a bug and does not deal with the case +// // when there is only 1 point +// if b.len() == 1 { +// tmp_d[(b[0] - 1) as usize] = s[0].into() +// } + +// // Now do running sum stuff +// // summation by parts +// // e.g. 3a + 2b + 1c = a + +// // (a) + b + +// // ((a) + b) + c +// let mut running_sum = G1Projective::identity(); +// let mut res = G1Projective::identity(); +// // for i in (0..d).rev() { +// // running_sum += &tmp_d[i]; +// // res += &running_sum; +// // } +// // We can use d to skip top buckets that are empty (done above) +// tmp_d.into_iter().rev().for_each(|b| { +// running_sum += &b; +// res += &running_sum; +// }); +// res +// } + +// This is poormans version of Algorithm 1 from LFG +// +// It seems to be faster, but thats likely because the actual one is not implemented +// correctly and does not have the short cuts for bucket sizes 0 and 1 +fn subsum_accumulation(b: &[u64], s: &[G1Affine]) -> G1Projective { + // If we only have one, then we can return the scalar multiplication + // This is an assumption that LFG was making too. + if b.len() == 0 { + return G1Projective::identity(); + } + if b.len() == 1 { + return s[0] * Scalar::from(b[0]); + } + + // Now do running sum stuff + // summation by parts but it does not need to be continuos + let mut running_sum = G1Projective::identity(); + let mut res = G1Projective::identity(); + + s.into_iter().enumerate().rev().for_each(|(index, point)| { + running_sum += point; + res += &running_sum; + + // Check that we are not at the last point + if index > 0 { + // We cannot fail here since we know the length of b is atleast 2 + let diff = b[index] - b[index - 1] - 1; // Note the -1 because if we have 2a + 1b, the diff will be 0 and the for loop will be skipped + // Before going to the next point, we need to account + // for the possible difference in scalars. + // ie we could be doing 3 * a + 1 * b + for _ in 0..diff { + res += running_sum + } + } + }); + res +} + +// summation by parts +// e.g. 3a + 2b + 1c = a + +// (a) + b + +// ((a) + b) + c +// +// Note: This assumes the points are in ascending order. +// ie 1 * points[0] + 2 * points[1] + ... + n * points[n-1] +#[inline(always)] +fn horners_rule_sum(points: &[G1Point]) -> G1Projective { + let mut running_sum = G1Projective::identity(); + let mut res = G1Projective::identity(); + points.into_iter().rev().for_each(|b| { + running_sum += b; + res += &running_sum; + }); + res +} + +#[cfg(test)] +mod test { + + use crate::{ + msm::{horners_rule_sum, msm_best2, precompute}, + G1Point, G1Projective, Scalar, + }; + + use blstrs::G1Affine; + use group::{prime::PrimeCurveAffine, Group}; + + use super::subsum_accumulation; + + #[test] + fn subsum_smoke_test() { + let result = subsum_accumulation(&[1], &[G1Affine::generator()]); + assert_eq!(G1Projective::generator(), result); + + let result = subsum_accumulation(&[2], &[G1Affine::generator()]); + assert_eq!(G1Projective::generator() * Scalar::from(2u64), result); + + let result = subsum_accumulation(&[1, 2], &[G1Affine::generator(), G1Affine::generator()]); + assert_eq!(G1Projective::generator() * Scalar::from(3u64), result); + + let result = subsum_accumulation(&[1, 3], &[G1Affine::generator(), G1Affine::generator()]); + assert_eq!(G1Projective::generator() * Scalar::from(4u64), result); + + let result = + subsum_accumulation(&[1, 300], &[-G1Affine::generator(), G1Affine::generator()]); + assert_eq!(G1Projective::generator() * Scalar::from(299u64), result); + + let result = subsum_accumulation( + &[1, 2, 3, 4, 10, 22, 100], + &[ + G1Affine::generator(), + G1Affine::generator(), + G1Affine::generator(), + G1Affine::generator(), + G1Affine::generator(), + G1Affine::generator(), + G1Affine::generator(), + ], + ); + assert_eq!( + G1Projective::generator() * Scalar::from(1 + 2 + 3 + 4 + 10 + 22 + 100), + result + ); + } + + #[test] + fn horners_sum_smoke_test() { + let result = horners_rule_sum(&[G1Affine::generator()]); + assert_eq!(G1Projective::generator(), result); + + let result = horners_rule_sum(&[ + -G1Affine::generator(), + G1Affine::generator(), + G1Affine::generator(), + ]); + assert_eq!( + G1Projective::generator() * Scalar::from(3u64) + + G1Projective::generator() * Scalar::from(2u64) + + -G1Projective::generator(), + result + ); + } + + #[test] + fn smoke_test_msm_best2() { + use crate::ff::PrimeField; + let window_size = 7; + let number_of_windows = Scalar::NUM_BITS as usize / window_size + 1; + + let precomp_bases = precompute(window_size, number_of_windows, &[G1Point::generator()]); + let scalar = -Scalar::from(2); + + let res = msm_best2(&[scalar], &precomp_bases, window_size); + assert_eq!(res, G1Projective::generator() * scalar); + } + + #[test] + fn smoke_test_msm_best2_neg() { + use crate::ff::PrimeField; + let window_size = 7; + let number_of_windows = Scalar::NUM_BITS as usize / window_size + 1; + + let input_points = vec![G1Point::generator(), G1Point::generator()]; + let input_scalars = vec![-Scalar::from(1), -Scalar::from(2)]; + let precomp_bases = precompute(window_size, number_of_windows, &input_points); + + let res = msm_best2(&input_scalars, &precomp_bases, window_size); + assert_eq!(res, naive_msm(&input_points, &input_scalars)); + } + + #[test] + fn smoke_test_msm_best2_double_scalar() { + use crate::ff::PrimeField; + let window_size = 7; + let number_of_windows = Scalar::NUM_BITS as usize / window_size + 1; + + let point_b: G1Affine = (G1Projective::generator() + G1Projective::generator()).into(); + let point_c: G1Affine = + (G1Projective::generator().double() + G1Projective::generator().double()).into(); + let input_points = vec![G1Point::generator(), point_b, point_c]; + let input_scalars = vec![Scalar::from(1), Scalar::from(2), Scalar::from(3u64)]; + let precomp_bases = precompute(window_size, number_of_windows, &input_points); + + let res = msm_best2(&input_scalars, &precomp_bases, window_size); + assert_eq!(res, naive_msm(&input_points, &input_scalars)); + } + + fn naive_msm(points: &[G1Point], scalars: &[Scalar]) -> G1Projective { + assert!(points.len() == scalars.len()); + let mut result = G1Projective::identity(); + for (scalar, point) in scalars.into_iter().zip(points) { + result += point * scalar + } + result + } +} From 3eca854200419c4ef789b4f4a5170633cc32b015 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 16:51:24 +0100 Subject: [PATCH 06/25] add benchmarks --- cryptography/bls12_381/benches/benchmark.rs | 16 +++++++++++++++- cryptography/bls12_381/src/lib.rs | 4 ++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/cryptography/bls12_381/benches/benchmark.rs b/cryptography/bls12_381/benches/benchmark.rs index 7db62c3b..4ab34db5 100644 --- a/cryptography/bls12_381/benches/benchmark.rs +++ b/cryptography/bls12_381/benches/benchmark.rs @@ -5,6 +5,7 @@ use crate_crypto_internal_eth_kzg_bls12_381::{ g1_batch_normalize, g2_batch_normalize, group::Group, lincomb::{g1_lincomb, g1_lincomb_unsafe, g2_lincomb, g2_lincomb_unsafe}, + msm::{msm_best2, msm_best2_noinfo, precompute}, G1Projective, G2Projective, Scalar, }; use criterion::{criterion_group, criterion_main, Criterion}; @@ -28,12 +29,25 @@ pub fn fixed_base_msm(c: &mut Criterion) { .into_iter() .map(|p| p.into()) .collect(); - let fbm = FixedBaseMSM::new(generators, UsePrecomp::Yes { width: 8 }); + let fbm = FixedBaseMSM::new(generators.clone(), UsePrecomp::Yes { width: 8 }); let scalars: Vec<_> = random_scalars(length); c.bench_function("bls12_381 fixed_base_msm length=64 width=8", |b| { b.iter(|| fbm.msm(scalars.clone())) }); + + use crate_crypto_internal_eth_kzg_bls12_381::ff::PrimeField; + let window_size = (f64::from(length as u32)).ln().ceil() as usize + 2; + let number_of_windows = Scalar::NUM_BITS as usize / window_size + 1; + + let precomp_bases = precompute(window_size, number_of_windows, &generators); + + c.bench_function("bls12_381 fixed_base_msm best2 no info", |b| { + b.iter(|| msm_best2_noinfo(&scalars, &precomp_bases, window_size)) + }); + c.bench_function("bls12_381 fixed_base_msm using pippenger bes32", |b| { + b.iter(|| msm_best2(&scalars, &precomp_bases, window_size)) + }); } pub fn bench_msm(c: &mut Criterion) { diff --git a/cryptography/bls12_381/src/lib.rs b/cryptography/bls12_381/src/lib.rs index aa33901b..f0527015 100644 --- a/cryptography/bls12_381/src/lib.rs +++ b/cryptography/bls12_381/src/lib.rs @@ -3,8 +3,8 @@ pub mod batch_inversion; mod booth_encoding; pub mod fixed_base_msm; pub mod lincomb; -mod msm; -// Re-exporting the blstrs crate +pub mod msm; // TODO: rename since msm is ambiguous + // Re-exporting the blstrs crate // Re-export ff and group, so other crates do not need to directly import(and independently version) them pub use ff; From d1265879f06c8abfc89ce56988a2940142375acc Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 17:07:53 +0100 Subject: [PATCH 07/25] add scratchpad --- cryptography/bls12_381/src/batch_add.rs | 6 ++-- cryptography/bls12_381/src/batch_inversion.rs | 32 +++++++++++++------ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/cryptography/bls12_381/src/batch_add.rs b/cryptography/bls12_381/src/batch_add.rs index f79be827..1009b7d4 100644 --- a/cryptography/bls12_381/src/batch_add.rs +++ b/cryptography/bls12_381/src/batch_add.rs @@ -1,4 +1,4 @@ -use crate::batch_inversion::batch_inverse; +use crate::batch_inversion::{batch_inverse, batch_inverse_scratch_pad}; use blstrs::{Fp, G1Affine}; /// Adds multiple points together in affine representation, batching the inversions @@ -121,7 +121,7 @@ pub fn multi_batch_addition(mut multi_points: Vec>) -> Vec>) -> Vec(v: &mut [F]) { - // Montgomery’s Trick and Fast Implementation of Masked AES + let mut scratch_pad = Vec::with_capacity(v.len()); + batch_inverse_scratch_pad(v, &mut scratch_pad); +} + +/// Given a vector of field elements {v_i}, compute the vector {v_i^(-1)} +/// +/// A scratchpad is used to avoid excessive allocations in the case that this method is +/// called repeatedly. +/// +/// Panics if any of the elements are zero +pub fn batch_inverse_scratch_pad(v: &mut [F], scratchpad: &mut Vec) { + // Montgomery's Trick and Fast Implementation of Masked AES // Genelle, Prouff and Quisquater // Section 3.2 // but with an optimization to multiply every element in the returned vector by coeff + // Clear the scratchpad and ensure it has enough capacity + scratchpad.clear(); + scratchpad.reserve(v.len()); + // First pass: compute [a, ab, abc, ...] - let mut prod = Vec::with_capacity(v.len()); let mut tmp = F::ONE; - for f in v.iter().filter(|f| !f.is_zero_vartime()) { + for f in v.iter() { tmp.mul_assign(f); - prod.push(tmp); + scratchpad.push(tmp); } - assert_eq!(prod.len(), v.len(), "inversion by zero is not allowed"); - // Invert `tmp`. tmp = tmp .invert() @@ -25,14 +39,12 @@ pub fn batch_inverse(v: &mut [F]) { .iter_mut() // Backwards .rev() - // Ignore normalized elements - .filter(|f| !f.is_zero_vartime()) // Backwards, skip last element, fill in one for last term. - .zip(prod.into_iter().rev().skip(1).chain(Some(F::ONE))) + .zip(scratchpad.iter().rev().skip(1).chain(Some(&F::ONE))) { // tmp := tmp * f; f := tmp * s = 1/f let new_tmp = tmp * *f; - *f = tmp * s; + *f = tmp * *s; tmp = new_tmp; } } From 7589402fb772c596b9af5c8e5e5e4beb042c561a Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 17:18:17 +0100 Subject: [PATCH 08/25] remove now unused method: scratchpad does not make it faster than multi_batch method --- cryptography/bls12_381/src/batch_add.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cryptography/bls12_381/src/batch_add.rs b/cryptography/bls12_381/src/batch_add.rs index 1009b7d4..6a26d636 100644 --- a/cryptography/bls12_381/src/batch_add.rs +++ b/cryptography/bls12_381/src/batch_add.rs @@ -83,14 +83,6 @@ pub fn batch_addition_mut(points: &mut [G1Affine]) -> G1Affine { points[0] } -pub fn naive_multi_batch_addition(mut multi_points: Vec>) -> Vec { - // TODO: Try using a scratch pad here - multi_points - .into_iter() - .map(|points| batch_addition(points)) - .collect() -} - // Similar to batch addition, however we amortize across different batches // TODO: Clean up -- This has a greater complexity than the regular algorithm // TODO so we want to check if it makes a difference in our usecase. From a4b66f3bf35693637f438217a4978b285f6f4911 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 17:47:44 +0100 Subject: [PATCH 09/25] fix: remove empty buckets --- cryptography/bls12_381/src/msm.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cryptography/bls12_381/src/msm.rs b/cryptography/bls12_381/src/msm.rs index 7f30422e..9a646b96 100644 --- a/cryptography/bls12_381/src/msm.rs +++ b/cryptography/bls12_381/src/msm.rs @@ -203,12 +203,11 @@ pub fn msm_best2_noinfo( // or use the optimized formulas // let mut all_points = Vec::new(); // let mut bucket_indices = Vec::new(); - // TODO: This should return the points too, ie skip the empty buckets - let bucket_indices: Vec<_> = all_information - .iter() + let (bucket_indices, all_information): (Vec<_>, Vec<_>) = all_information + .into_iter() .enumerate() .filter(|(_, points)| !points.is_empty()) - .map(|(index, _)| (index + 1) as u64) + .map(|(index, points)| (((index + 1) as u64), points)) .collect(); let buckets_added = crate::batch_add::multi_batch_addition(all_information); From a8eb053d179ff7dc9c4e691341a91deef8edfdf6 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 19:24:49 +0100 Subject: [PATCH 10/25] use iterator --- cryptography/bls12_381/src/msm.rs | 45 +++++++++++++++---------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/cryptography/bls12_381/src/msm.rs b/cryptography/bls12_381/src/msm.rs index 9a646b96..8cd32f63 100644 --- a/cryptography/bls12_381/src/msm.rs +++ b/cryptography/bls12_381/src/msm.rs @@ -103,29 +103,28 @@ pub fn msm_best2( // // Note: for duplicate points, we could either put them in the running sum // or use the optimized formulas - let mut all_points = Vec::new(); - let mut bucket_indices = Vec::new(); - for (bucket_idx, points) in all_information.into_iter().enumerate() { - if points.is_empty() { - continue; - } - - // batch add each bucket - let res: Vec<_> = points - .into_iter() - .map(|point_info| { - let mut p = bases_precomputed[point_info.base_idx as usize]; - if !point_info.sign { - p = -p; - } - p - }) - .collect(); - // TODO: We should make sure that we cannot get two points being added together or - // TODO: have the formula deal with it - all_points.push(res); - bucket_indices.push((bucket_idx + 1) as u64); // Add one here since the zeroth bucket will bucket_1, bucket_K eventually translates to K * sum_of_bucket - } + let (all_points, bucket_indices): (Vec>, Vec) = all_information + .into_iter() + .enumerate() + .filter_map(|(bucket_idx, points)| { + if points.is_empty() { + None + } else { + let res: Vec<_> = points + .into_iter() + .map(|point_info| { + let mut p = bases_precomputed[point_info.base_idx as usize]; + if !point_info.sign { + p = -p; + } + p + }) + .collect(); + + Some((res, (bucket_idx + 1) as u64)) + } + }) + .unzip(); let buckets_added = crate::batch_add::multi_batch_addition(all_points); From 6ca6224839651531c2e10eda9fb1bad9003eb523 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 19:29:03 +0100 Subject: [PATCH 11/25] rename module --- .../{msm.rs => fixed_base_msm_pippenger.rs} | 110 +----------------- cryptography/bls12_381/src/lib.rs | 4 +- 2 files changed, 3 insertions(+), 111 deletions(-) rename cryptography/bls12_381/src/{msm.rs => fixed_base_msm_pippenger.rs} (74%) diff --git a/cryptography/bls12_381/src/msm.rs b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs similarity index 74% rename from cryptography/bls12_381/src/msm.rs rename to cryptography/bls12_381/src/fixed_base_msm_pippenger.rs index 8cd32f63..b6734779 100644 --- a/cryptography/bls12_381/src/msm.rs +++ b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs @@ -8,15 +8,6 @@ use crate::booth_encoding::get_booth_index; use crate::g1_batch_normalize; use crate::G1Point; -#[derive(Debug, Clone, Copy)] -pub struct Info { - base_idx: u64, - bucket_idx: u64, - // We use precomputations which removes the window_idx - // window_idx: u64, - sign: bool, -} - pub fn precompute( window_size: usize, number_of_windows: usize, @@ -40,105 +31,6 @@ pub fn precompute( g1_batch_normalize(&results) } -// Note: This does not work if the input points are [P, -P] for example -// We could iterate for that case, but its unlikely given the points are random -pub fn msm_best2( - coeffs: &[Scalar], - bases_precomputed: &[G1Point], - window_size: usize, -) -> G1Projective { - // assert_eq!(coeffs.len(), bases.len()); - - let c = window_size; - - // coeffs to byte representation - let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_bytes_le()).collect(); - - // Information on the points we want to add - let mut all_information = vec![vec![]; 1 << (c - 1)]; - - // number of windows - let number_of_windows = Scalar::NUM_BITS as usize / c + 1; - - for window_idx in 0..number_of_windows { - for (base_idx, coeff) in coeffs.iter().enumerate() { - let buck_idx = get_booth_index(window_idx, c, coeff.as_ref()); - - if buck_idx != 0 { - // parse bucket index - let sign = buck_idx.is_positive(); - let buck_idx = buck_idx.unsigned_abs() as usize - 1; - // - // Since we are using precomputed points, the base_idx is augmented - // - // We need to modify the base index to take into account: - // - The window, so we fetch the precomputed base for that window - // - The position of the point in the precomputed bases, - // relative to the original bases vector - // - // If you imagine we had: - // [P1, P2, P3] - // precomp = [P1, c*P1,..., (num_window-1)*c*P1, P2,...] - // - // The index of P1, P2, etc can be computed by: - // augmented_base_idx = base_idx * num_windows - // Then in order to get the correct point, we do: - // augmented_base_idx += window_idx - let base_idx = (base_idx * number_of_windows) + window_idx; - - let info = Info { - bucket_idx: buck_idx as u64, - sign, - base_idx: base_idx as u64, - }; - - all_information[buck_idx].push(info); - } - } - } - - // All of the above costs about 200 microseconds on 64 points. - // Using a vector is about 3 times faster, but the points are not ordered by bucket index - // so we could try and do a second pass on the vector to see if thats quicker for small numPoints - // - // Note: for duplicate points, we could either put them in the running sum - // or use the optimized formulas - let (all_points, bucket_indices): (Vec>, Vec) = all_information - .into_iter() - .enumerate() - .filter_map(|(bucket_idx, points)| { - if points.is_empty() { - None - } else { - let res: Vec<_> = points - .into_iter() - .map(|point_info| { - let mut p = bases_precomputed[point_info.base_idx as usize]; - if !point_info.sign { - p = -p; - } - p - }) - .collect(); - - Some((res, (bucket_idx + 1) as u64)) - } - }) - .unzip(); - - let buckets_added = crate::batch_add::multi_batch_addition(all_points); - - subsum_accumulation(&bucket_indices, &buckets_added) - // Now we have all of the information needed - // The precomputations that we did, effectively allowed us - // to remove the notion of a "window" -- there is only - // one window, effectively. - // - // Note: For 64 points, this is about 3200 elements. - // - // Do some more preprocessing to reduce the work needed -} - pub fn msm_best2_noinfo( coeffs: &[Scalar], bases_precomputed: &[G1Point], @@ -325,7 +217,7 @@ fn horners_rule_sum(points: &[G1Point]) -> G1Projective { mod test { use crate::{ - msm::{horners_rule_sum, msm_best2, precompute}, + fixed_base_msm_pippenger::{horners_rule_sum, msm_best2_noinfo as msm_best2, precompute}, G1Point, G1Projective, Scalar, }; diff --git a/cryptography/bls12_381/src/lib.rs b/cryptography/bls12_381/src/lib.rs index f0527015..7d349584 100644 --- a/cryptography/bls12_381/src/lib.rs +++ b/cryptography/bls12_381/src/lib.rs @@ -2,15 +2,15 @@ mod batch_add; pub mod batch_inversion; mod booth_encoding; pub mod fixed_base_msm; +pub mod fixed_base_msm_pippenger; pub mod lincomb; -pub mod msm; // TODO: rename since msm is ambiguous - // Re-exporting the blstrs crate // Re-export ff and group, so other crates do not need to directly import(and independently version) them pub use ff; pub use group; use group::{prime::PrimeCurveAffine, Curve}; +// Re-exporting the blstrs crate pub type G1Point = blstrs::G1Affine; pub type G1Projective = blstrs::G1Projective; From 1b308bb7817190235cde43b2a89de9d1c7438765 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 19:34:50 +0100 Subject: [PATCH 12/25] add struct for fixed base pippenger --- .../bls12_381/src/fixed_base_msm_pippenger.rs | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs index b6734779..8bd5ad8b 100644 --- a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs +++ b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs @@ -8,6 +8,31 @@ use crate::booth_encoding::get_booth_index; use crate::g1_batch_normalize; use crate::G1Point; +pub struct FixedBaseMSMPippenger { + precomputed_points: Vec, + number_of_windows: usize, + window_size: usize, +} + +impl FixedBaseMSMPippenger { + pub fn new(points: &[G1Affine]) -> FixedBaseMSMPippenger { + // The +2 was empirically seen to give better results + let window_size = (f64::from(points.len() as u32)).ln().ceil() as usize + 2; + let number_of_windows = Scalar::NUM_BITS as usize / window_size + 1; + let precomputed_points = precompute(window_size, number_of_windows, points); + + FixedBaseMSMPippenger { + precomputed_points, + number_of_windows, + window_size, + } + } + + pub fn msm(&self, scalars: &[Scalar]) -> G1Projective { + msm_best2_noinfo(scalars, &self.precomputed_points, self.window_size) + } +} + pub fn precompute( window_size: usize, number_of_windows: usize, From 350d3f4de7b6950104ccffc03136a1c3e744b435 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Mon, 2 Sep 2024 19:52:10 +0100 Subject: [PATCH 13/25] remove commented line --- cryptography/bls12_381/src/batch_add.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/cryptography/bls12_381/src/batch_add.rs b/cryptography/bls12_381/src/batch_add.rs index 6a26d636..e962e986 100644 --- a/cryptography/bls12_381/src/batch_add.rs +++ b/cryptography/bls12_381/src/batch_add.rs @@ -146,7 +146,6 @@ pub fn multi_batch_addition(mut multi_points: Vec>) -> Vec Date: Mon, 2 Sep 2024 19:53:09 +0100 Subject: [PATCH 14/25] update benchmarks --- cryptography/bls12_381/benches/benchmark.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/cryptography/bls12_381/benches/benchmark.rs b/cryptography/bls12_381/benches/benchmark.rs index 4ab34db5..36a29a7d 100644 --- a/cryptography/bls12_381/benches/benchmark.rs +++ b/cryptography/bls12_381/benches/benchmark.rs @@ -2,10 +2,10 @@ use crate_crypto_internal_eth_kzg_bls12_381::{ batch_inversion, ff::Field, fixed_base_msm::{FixedBaseMSM, UsePrecomp}, + fixed_base_msm_pippenger::FixedBaseMSMPippenger, g1_batch_normalize, g2_batch_normalize, group::Group, lincomb::{g1_lincomb, g1_lincomb_unsafe, g2_lincomb, g2_lincomb_unsafe}, - msm::{msm_best2, msm_best2_noinfo, precompute}, G1Projective, G2Projective, Scalar, }; use criterion::{criterion_group, criterion_main, Criterion}; @@ -36,18 +36,12 @@ pub fn fixed_base_msm(c: &mut Criterion) { b.iter(|| fbm.msm(scalars.clone())) }); - use crate_crypto_internal_eth_kzg_bls12_381::ff::PrimeField; - let window_size = (f64::from(length as u32)).ln().ceil() as usize + 2; - let number_of_windows = Scalar::NUM_BITS as usize / window_size + 1; + let fixed_base_pip = FixedBaseMSMPippenger::new(&generators); - let precomp_bases = precompute(window_size, number_of_windows, &generators); - - c.bench_function("bls12_381 fixed_base_msm best2 no info", |b| { - b.iter(|| msm_best2_noinfo(&scalars, &precomp_bases, window_size)) - }); - c.bench_function("bls12_381 fixed_base_msm using pippenger bes32", |b| { - b.iter(|| msm_best2(&scalars, &precomp_bases, window_size)) - }); + c.bench_function( + "bls12_381 fixed_base_msm best2 no info (fixed base pip)", + |b| b.iter(|| fixed_base_pip.msm(&scalars)), + ); } pub fn bench_msm(c: &mut Criterion) { From aa2a8bccda22dc0a421ee816f56a1ebb914aa0f2 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Tue, 3 Sep 2024 11:04:05 +0100 Subject: [PATCH 15/25] commit bos-coster --- .../bls12_381/src/fixed_base_msm_pippenger.rs | 208 +++++++++++++++++- 1 file changed, 206 insertions(+), 2 deletions(-) diff --git a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs index 8bd5ad8b..a917bda5 100644 --- a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs +++ b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs @@ -1,6 +1,7 @@ use blstrs::G1Affine; use blstrs::G1Projective; use blstrs::Scalar; +use ff::Field; use ff::PrimeField; use group::Group; @@ -127,8 +128,195 @@ pub fn msm_best2_noinfo( .collect(); let buckets_added = crate::batch_add::multi_batch_addition(all_information); + let res = subsum_accumulation(&bucket_indices, &buckets_added); + res +} - subsum_accumulation(&bucket_indices, &buckets_added) +pub fn multi_msm( + matrix_coeffs: &[&[Scalar]], + bases_precomputed: &[G1Point], + window_size: usize, +) -> Vec { + // assert_eq!(coeffs.len(), bases.len()); + + let c = window_size; + + // coeffs to byte representation + let matrix_coeffs: Vec<_> = matrix_coeffs + .iter() + .map(|a| { + a.iter() + .map(|coeff| coeff.to_bytes_le()) + .collect::>() + }) + .collect(); + + // Information on the points we want to add + let mut all_information = vec![vec![]; (1 << (c - 1)) * matrix_coeffs.len()]; + + // number of windows + let number_of_windows = Scalar::NUM_BITS as usize / c + 1; + + for window_idx in 0..number_of_windows { + for (msm_index, coeffs) in matrix_coeffs.iter().enumerate() { + for (base_idx, coeff) in coeffs.iter().enumerate() { + let buck_idx = get_booth_index(window_idx, c, coeff.as_ref()); + + if buck_idx != 0 { + // parse bucket index + let sign = buck_idx.is_positive(); + let buck_idx = buck_idx.unsigned_abs() as usize - 1; + // + // Since we are using precomputed points, the base_idx is augmented + // + // We need to modify the base index to take into account: + // - The window, so we fetch the precomputed base for that window + // - The position of the point in the precomputed bases, + // relative to the original bases vector + // + // If you imagine we had: + // [P1, P2, P3] + // precomp = [P1, c*P1,..., (num_window-1)*c*P1, P2,...] + // + // The index of P1, P2, etc can be computed by: + // augmented_base_idx = base_idx * num_windows + // Then in order to get the correct point, we do: + // augmented_base_idx += window_idx + let base_idx = (base_idx * number_of_windows) + window_idx; + + let point = if sign { + bases_precomputed[base_idx as usize] + } else { + -bases_precomputed[base_idx as usize] + }; + + all_information[buck_idx + (msm_index * (1 << (c - 1)))].push(point); + } + } + } + } + + // All of the above costs about 200 microseconds on 64 points. + // Using a vector is about 3 times faster, but the points are not ordered by bucket index + // so we could try and do a second pass on the vector to see if thats quicker for small numPoints + // + // Note: for duplicate points, we could either put them in the running sum + // or use the optimized formulas + let (chunked_bucket_indices, all_information): (Vec>, Vec<_>) = all_information + .chunks(1 << (c - 1)) + .into_iter() + .map(|chunk| { + let chunked_indices: Vec = chunk + .iter() + .enumerate() + .filter(|(_, points)| !points.is_empty()) + .map(|(index, _)| (index + 1) as u64) + .collect(); + + let all_info: Vec<_> = chunk + .iter() + .filter(|points| !points.is_empty()) + .flat_map(|points| points.iter().cloned()) // Flatten the points directly + .collect(); + + (chunked_indices, all_info) + }) + .collect(); + + let buckets_added = crate::batch_add::multi_batch_addition(all_information); + + let mut result = Vec::new(); + let mut start = 0; + for bucket_indices in chunked_bucket_indices { + result.push(subsum_accumulation( + &bucket_indices, + &buckets_added[start..start + bucket_indices.len()], + )); + start += bucket_indices.len() + } + result +} + +use ruint::aliases::*; +use ruint::Uint; +use std::cmp::Reverse; +use std::collections::BinaryHeap; + +#[derive(Debug, Clone, Copy)] +struct PointScalar { + scalar: U256, + point: G1Projective, +} + +impl PartialEq for PointScalar { + fn eq(&self, other: &Self) -> bool { + self.scalar == other.scalar && self.point == other.point + } +} + +impl Eq for PointScalar {} + +impl PartialOrd for PointScalar { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for PointScalar { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.scalar.cmp(&other.scalar) + } +} + +pub fn bos_coster(scalars: &[U256], points: &[G1Projective]) -> G1Projective { + if scalars.len() != points.len() { + panic!("Mismatch between number of scalars and points"); + } + + let mut heap = BinaryHeap::new(); + for (scalar, point) in scalars.into_iter().zip(points.iter()) { + if *scalar != U256::ZERO { + heap.push(PointScalar { + scalar: *scalar, + point: *point, + }); + } + } + + while heap.len() > 1 { + let PointScalar { + scalar: n1, + point: p1, + } = heap.pop().unwrap(); + let PointScalar { + scalar: n2, + point: p2, + } = heap.pop().unwrap(); + + let p_sum = p1 + &p2; + let n_diff = n1 - n2; + + if n_diff > U256::ZERO { + heap.push(PointScalar { + scalar: n_diff, + point: p1.clone(), + }); + } + heap.push(PointScalar { + scalar: n2, + point: p_sum, + }); + } + + if let Some(PointScalar { + scalar: n, + point: p, + }) = heap.pop() + { + p * Scalar::from_bytes_be(&n.to_be_bytes()).unwrap() + } else { + G1Projective::identity() // Identity point + } } // Algorithm1 from the LFG paper @@ -248,8 +436,9 @@ mod test { use blstrs::G1Affine; use group::{prime::PrimeCurveAffine, Group}; + use ruint::aliases::U256; - use super::subsum_accumulation; + use super::{bos_coster, subsum_accumulation, DebugPoint}; #[test] fn subsum_smoke_test() { @@ -332,6 +521,21 @@ mod test { assert_eq!(res, naive_msm(&input_points, &input_scalars)); } + #[test] + fn smoke_test_bos_coster() { + let input_points = vec![G1Point::generator(), G1Point::generator()]; + + // let input_scalars = vec![Scalar::from(1), Scalar::from(2)]; + let input_scalars = vec![U256::from(100), U256::from(200)]; + + let res = crate::fixed_base_msm_pippenger::bos_coster( + &input_scalars, + &[G1Projective::generator(), G1Projective::generator()], + ); + let input_scalars = vec![Scalar::from(100), Scalar::from(200)]; + assert_eq!(res, naive_msm(&input_points, &input_scalars)); + } + #[test] fn smoke_test_msm_best2_double_scalar() { use crate::ff::PrimeField; From e0b44d05da067d99a088fcd71cd320bcff5f0b59 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Tue, 3 Sep 2024 11:09:11 +0100 Subject: [PATCH 16/25] bos-coster benchmarks --- cryptography/bls12_381/benches/benchmark.rs | 23 ++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/cryptography/bls12_381/benches/benchmark.rs b/cryptography/bls12_381/benches/benchmark.rs index 36a29a7d..2e000af8 100644 --- a/cryptography/bls12_381/benches/benchmark.rs +++ b/cryptography/bls12_381/benches/benchmark.rs @@ -1,3 +1,4 @@ +use blstrs::Scalar; use crate_crypto_internal_eth_kzg_bls12_381::{ batch_inversion, ff::Field, @@ -6,9 +7,10 @@ use crate_crypto_internal_eth_kzg_bls12_381::{ g1_batch_normalize, g2_batch_normalize, group::Group, lincomb::{g1_lincomb, g1_lincomb_unsafe, g2_lincomb, g2_lincomb_unsafe}, - G1Projective, G2Projective, Scalar, + G1Projective, G2Projective, }; use criterion::{criterion_group, criterion_main, Criterion}; +use ruint::aliases::U256; pub fn batch_inversion(c: &mut Criterion) { const NUM_ELEMENTS: usize = 8192; @@ -45,15 +47,26 @@ pub fn fixed_base_msm(c: &mut Criterion) { } pub fn bench_msm(c: &mut Criterion) { - const NUM_G1_ELEMENTS: usize = 4096; + const NUM_G1_ELEMENTS: usize = 64; let polynomial_4096 = random_scalars(NUM_G1_ELEMENTS); - let g1_elements = random_g1_points(NUM_G1_ELEMENTS); - let g1_elements = g1_batch_normalize(&g1_elements); + let g1_elements_proj = random_g1_points(NUM_G1_ELEMENTS); + let g1_elements = g1_batch_normalize(&g1_elements_proj); c.bench_function(&format!("g1 msm of size {}", NUM_G1_ELEMENTS), |b| { b.iter(|| g1_lincomb_unsafe(&g1_elements, &polynomial_4096)) }); + + let scalars_u256: Vec<_> = polynomial_4096 + .iter() + .map(|p| U256::from_be_bytes(p.to_bytes_be())) + .collect(); + + c.bench_function( + &format!("bos-coster msm of size {}", NUM_G1_ELEMENTS), + |b| b.iter(|| bos_coster(&scalars_u256, &g1_elements_proj)), + ); + c.bench_function(&format!("g1 (safe) msm of size {}", NUM_G1_ELEMENTS), |b| { b.iter(|| g1_lincomb(&g1_elements, &polynomial_4096)) }); @@ -94,5 +107,5 @@ fn random_g2_points(size: usize) -> Vec { points } -criterion_group!(benches, batch_inversion, fixed_base_msm, bench_msm); +criterion_group!(benches, /*batch_inversion, fixed_base_msm, fixed_base_msm */ bench_msm); criterion_main!(benches); From 67964cc28ca6cfb4d81ea4e437733644f2e46062 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Tue, 3 Sep 2024 11:15:07 +0100 Subject: [PATCH 17/25] remove bos-coster --- cryptography/bls12_381/benches/benchmark.rs | 11 -- .../bls12_381/src/fixed_base_msm_pippenger.rs | 100 +----------------- 2 files changed, 1 insertion(+), 110 deletions(-) diff --git a/cryptography/bls12_381/benches/benchmark.rs b/cryptography/bls12_381/benches/benchmark.rs index 2e000af8..cf4a90fa 100644 --- a/cryptography/bls12_381/benches/benchmark.rs +++ b/cryptography/bls12_381/benches/benchmark.rs @@ -10,7 +10,6 @@ use crate_crypto_internal_eth_kzg_bls12_381::{ G1Projective, G2Projective, }; use criterion::{criterion_group, criterion_main, Criterion}; -use ruint::aliases::U256; pub fn batch_inversion(c: &mut Criterion) { const NUM_ELEMENTS: usize = 8192; @@ -57,16 +56,6 @@ pub fn bench_msm(c: &mut Criterion) { b.iter(|| g1_lincomb_unsafe(&g1_elements, &polynomial_4096)) }); - let scalars_u256: Vec<_> = polynomial_4096 - .iter() - .map(|p| U256::from_be_bytes(p.to_bytes_be())) - .collect(); - - c.bench_function( - &format!("bos-coster msm of size {}", NUM_G1_ELEMENTS), - |b| b.iter(|| bos_coster(&scalars_u256, &g1_elements_proj)), - ); - c.bench_function(&format!("g1 (safe) msm of size {}", NUM_G1_ELEMENTS), |b| { b.iter(|| g1_lincomb(&g1_elements, &polynomial_4096)) }); diff --git a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs index a917bda5..8df83bcb 100644 --- a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs +++ b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs @@ -237,88 +237,6 @@ pub fn multi_msm( result } -use ruint::aliases::*; -use ruint::Uint; -use std::cmp::Reverse; -use std::collections::BinaryHeap; - -#[derive(Debug, Clone, Copy)] -struct PointScalar { - scalar: U256, - point: G1Projective, -} - -impl PartialEq for PointScalar { - fn eq(&self, other: &Self) -> bool { - self.scalar == other.scalar && self.point == other.point - } -} - -impl Eq for PointScalar {} - -impl PartialOrd for PointScalar { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for PointScalar { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.scalar.cmp(&other.scalar) - } -} - -pub fn bos_coster(scalars: &[U256], points: &[G1Projective]) -> G1Projective { - if scalars.len() != points.len() { - panic!("Mismatch between number of scalars and points"); - } - - let mut heap = BinaryHeap::new(); - for (scalar, point) in scalars.into_iter().zip(points.iter()) { - if *scalar != U256::ZERO { - heap.push(PointScalar { - scalar: *scalar, - point: *point, - }); - } - } - - while heap.len() > 1 { - let PointScalar { - scalar: n1, - point: p1, - } = heap.pop().unwrap(); - let PointScalar { - scalar: n2, - point: p2, - } = heap.pop().unwrap(); - - let p_sum = p1 + &p2; - let n_diff = n1 - n2; - - if n_diff > U256::ZERO { - heap.push(PointScalar { - scalar: n_diff, - point: p1.clone(), - }); - } - heap.push(PointScalar { - scalar: n2, - point: p_sum, - }); - } - - if let Some(PointScalar { - scalar: n, - point: p, - }) = heap.pop() - { - p * Scalar::from_bytes_be(&n.to_be_bytes()).unwrap() - } else { - G1Projective::identity() // Identity point - } -} - // Algorithm1 from the LFG paper // TODO: Fix later, this algorithm is broken in the POC and the paper // fn subsum_accumulation(b: &[u64], s: &[G1Affine]) -> G1Projective { @@ -436,9 +354,8 @@ mod test { use blstrs::G1Affine; use group::{prime::PrimeCurveAffine, Group}; - use ruint::aliases::U256; - use super::{bos_coster, subsum_accumulation, DebugPoint}; + use super::subsum_accumulation; #[test] fn subsum_smoke_test() { @@ -521,21 +438,6 @@ mod test { assert_eq!(res, naive_msm(&input_points, &input_scalars)); } - #[test] - fn smoke_test_bos_coster() { - let input_points = vec![G1Point::generator(), G1Point::generator()]; - - // let input_scalars = vec![Scalar::from(1), Scalar::from(2)]; - let input_scalars = vec![U256::from(100), U256::from(200)]; - - let res = crate::fixed_base_msm_pippenger::bos_coster( - &input_scalars, - &[G1Projective::generator(), G1Projective::generator()], - ); - let input_scalars = vec![Scalar::from(100), Scalar::from(200)]; - assert_eq!(res, naive_msm(&input_points, &input_scalars)); - } - #[test] fn smoke_test_msm_best2_double_scalar() { use crate::ff::PrimeField; From 0e7ec3ff68ce1e170edd5a603b297dadcc8af5e5 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Tue, 3 Sep 2024 11:51:44 +0100 Subject: [PATCH 18/25] rename msm method --- cryptography/bls12_381/src/fixed_base_msm_pippenger.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs index 8df83bcb..73f68a12 100644 --- a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs +++ b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs @@ -30,7 +30,7 @@ impl FixedBaseMSMPippenger { } pub fn msm(&self, scalars: &[Scalar]) -> G1Projective { - msm_best2_noinfo(scalars, &self.precomputed_points, self.window_size) + pippenger_fixed_base_msm(scalars, &self.precomputed_points, self.window_size) } } @@ -57,7 +57,7 @@ pub fn precompute( g1_batch_normalize(&results) } -pub fn msm_best2_noinfo( +pub fn pippenger_fixed_base_msm( coeffs: &[Scalar], bases_precomputed: &[G1Point], window_size: usize, @@ -348,7 +348,9 @@ fn horners_rule_sum(points: &[G1Point]) -> G1Projective { mod test { use crate::{ - fixed_base_msm_pippenger::{horners_rule_sum, msm_best2_noinfo as msm_best2, precompute}, + fixed_base_msm_pippenger::{ + horners_rule_sum, pippenger_fixed_base_msm as msm_best2, precompute, + }, G1Point, G1Projective, Scalar, }; From cb4dbbd76714a328aa0f189a387da1b86a551a08 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Tue, 3 Sep 2024 15:00:28 +0100 Subject: [PATCH 19/25] fix: subsum calculation was not taking into account when the last bucket was not b_1 --- .../bls12_381/src/fixed_base_msm_pippenger.rs | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs index 73f68a12..7068244e 100644 --- a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs +++ b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs @@ -8,7 +8,7 @@ use group::Group; use crate::booth_encoding::get_booth_index; use crate::g1_batch_normalize; use crate::G1Point; - +#[derive(Debug, Clone)] pub struct FixedBaseMSMPippenger { precomputed_points: Vec, number_of_windows: usize, @@ -293,6 +293,8 @@ pub fn multi_msm( // It seems to be faster, but thats likely because the actual one is not implemented // correctly and does not have the short cuts for bucket sizes 0 and 1 fn subsum_accumulation(b: &[u64], s: &[G1Affine]) -> G1Projective { + assert_eq!(b.len(), s.len()); + // If we only have one, then we can return the scalar multiplication // This is an assumption that LFG was making too. if b.len() == 0 { @@ -321,6 +323,14 @@ fn subsum_accumulation(b: &[u64], s: &[G1Affine]) -> G1Projective { for _ in 0..diff { res += running_sum } + } else { + //Check the diff between the last scalar and 1 + // This is so that we "finish" the horner sum. + + let diff = b[index] - 1; + for _ in 0..diff { + res += running_sum + } } }); res @@ -395,6 +405,23 @@ mod test { ); } + fn naive_subsum_accumulation(b: &[u64], s: &[G1Affine]) -> G1Projective { + let mut res = G1Projective::identity(); + for (scalar, point) in b.iter().zip(s) { + res += G1Projective::from(point) * Scalar::from(*scalar) + } + res + } + + #[test] + fn subsum_regression_test() { + let indices = [2, 3]; + let points = vec![G1Affine::generator(); 2]; + let got = subsum_accumulation(&indices, &points); + let expected = naive_subsum_accumulation(&indices, &points); + assert_eq!(got, expected); + } + #[test] fn horners_sum_smoke_test() { let result = horners_rule_sum(&[G1Affine::generator()]); From 3574e14ecc298b73bd5cac3cdc9ec7316af0ec3e Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Tue, 3 Sep 2024 17:18:14 +0100 Subject: [PATCH 20/25] use fixed base msm --- cryptography/bls12_381/src/fixed_base_msm.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/cryptography/bls12_381/src/fixed_base_msm.rs b/cryptography/bls12_381/src/fixed_base_msm.rs index 491ffc01..e3e77150 100644 --- a/cryptography/bls12_381/src/fixed_base_msm.rs +++ b/cryptography/bls12_381/src/fixed_base_msm.rs @@ -1,4 +1,4 @@ -use crate::{G1Projective, Scalar}; +use crate::{fixed_base_msm_pippenger::FixedBaseMSMPippenger, G1Projective, Scalar}; use blstrs::{Fp, G1Affine}; /// FixedBaseMSMPrecomp computes a multi scalar multiplication using pre-computations. @@ -28,7 +28,9 @@ pub enum UsePrecomp { #[derive(Debug)] pub enum FixedBaseMSM { Precomp(FixedBaseMSMPrecomp), - NoPrecomp(Vec), + // TODO: We are hijacking the NoPrecomp variant to store the + // TODO: new pippenger algorithm. + NoPrecomp(FixedBaseMSMPippenger), } impl FixedBaseMSM { @@ -37,18 +39,14 @@ impl FixedBaseMSM { UsePrecomp::Yes { width } => { FixedBaseMSM::Precomp(FixedBaseMSMPrecomp::new(generators, width)) } - UsePrecomp::No => FixedBaseMSM::NoPrecomp(generators), + UsePrecomp::No => FixedBaseMSM::NoPrecomp(FixedBaseMSMPippenger::new(&generators)), } } pub fn msm(&self, scalars: Vec) -> G1Projective { match self { FixedBaseMSM::Precomp(precomp) => precomp.msm(scalars), - FixedBaseMSM::NoPrecomp(generators) => { - use crate::lincomb::g1_lincomb; - g1_lincomb(generators, &scalars) - .expect("number of generators and scalars should be equal") - } + FixedBaseMSM::NoPrecomp(precomp) => precomp.msm(&scalars), } } } From bd4cd72206483bf486870f10034f1ff1d66d1332 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Tue, 3 Sep 2024 17:20:06 +0100 Subject: [PATCH 21/25] update benchmarks --- cryptography/bls12_381/benches/benchmark.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/cryptography/bls12_381/benches/benchmark.rs b/cryptography/bls12_381/benches/benchmark.rs index cf4a90fa..25803359 100644 --- a/cryptography/bls12_381/benches/benchmark.rs +++ b/cryptography/bls12_381/benches/benchmark.rs @@ -39,10 +39,9 @@ pub fn fixed_base_msm(c: &mut Criterion) { let fixed_base_pip = FixedBaseMSMPippenger::new(&generators); - c.bench_function( - "bls12_381 fixed_base_msm best2 no info (fixed base pip)", - |b| b.iter(|| fixed_base_pip.msm(&scalars)), - ); + c.bench_function("bls12_381 fixed based pippenger algorithm", |b| { + b.iter(|| fixed_base_pip.msm(&scalars)) + }); } pub fn bench_msm(c: &mut Criterion) { @@ -96,5 +95,11 @@ fn random_g2_points(size: usize) -> Vec { points } -criterion_group!(benches, /*batch_inversion, fixed_base_msm, fixed_base_msm */ bench_msm); +criterion_group!( + benches, + batch_inversion, + fixed_base_msm, + bench_msm, + fixed_base_msm +); criterion_main!(benches); From f8c3276634b24be650d51e90c26fcb3a5f2131d7 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Tue, 3 Sep 2024 17:21:00 +0100 Subject: [PATCH 22/25] small cleanup --- cryptography/bls12_381/src/fixed_base_msm_pippenger.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs index 7068244e..81b94874 100644 --- a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs +++ b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs @@ -1,7 +1,6 @@ use blstrs::G1Affine; use blstrs::G1Projective; use blstrs::Scalar; -use ff::Field; use ff::PrimeField; use group::Group; @@ -11,7 +10,6 @@ use crate::G1Point; #[derive(Debug, Clone)] pub struct FixedBaseMSMPippenger { precomputed_points: Vec, - number_of_windows: usize, window_size: usize, } @@ -24,7 +22,6 @@ impl FixedBaseMSMPippenger { FixedBaseMSMPippenger { precomputed_points, - number_of_windows, window_size, } } From 6850fec5d5f9031ca2aabaa7a10286e7bc2357d2 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Tue, 3 Sep 2024 17:23:12 +0100 Subject: [PATCH 23/25] remove TODO: We can assume that users will pass in empty vectors --- cryptography/bls12_381/src/batch_add.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/cryptography/bls12_381/src/batch_add.rs b/cryptography/bls12_381/src/batch_add.rs index e962e986..ecf5a3a0 100644 --- a/cryptography/bls12_381/src/batch_add.rs +++ b/cryptography/bls12_381/src/batch_add.rs @@ -179,8 +179,6 @@ pub fn multi_batch_addition(mut multi_points: Vec>) -> Vec Date: Wed, 4 Sep 2024 18:48:50 +0100 Subject: [PATCH 24/25] use -3: batch_addition does not allow us to have duplicate points or points that are negations of each other --- cryptography/kzg_multi_open/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cryptography/kzg_multi_open/src/lib.rs b/cryptography/kzg_multi_open/src/lib.rs index 6fac861d..68aa2211 100644 --- a/cryptography/kzg_multi_open/src/lib.rs +++ b/cryptography/kzg_multi_open/src/lib.rs @@ -28,7 +28,7 @@ pub(crate) fn create_insecure_commit_opening_keys( let g1_gen = G1Projective::generator(); let mut g1_points = Vec::new(); - let secret = -Scalar::from(1 as u64); + let secret = -Scalar::from(3 as u64); let mut current_secret_pow = Scalar::ONE; for _ in 0..num_coefficients_in_polynomial { g1_points.push(g1_gen * current_secret_pow); @@ -39,7 +39,7 @@ pub(crate) fn create_insecure_commit_opening_keys( let ck = CommitKey::new(g1_points.clone()); let mut g2_points = Vec::new(); - let secret = -Scalar::from(1 as u64); + let secret = -Scalar::from(3 as u64); let mut current_secret_pow = Scalar::ONE; let g2_gen = G2Projective::generator(); // The setup needs 65 g1 elements for the opening key, in order From 328fe47468145403220edc2b39340d15b0b5165e Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Wed, 4 Sep 2024 18:57:27 +0100 Subject: [PATCH 25/25] check for invariants in the constructor --- .../bls12_381/src/fixed_base_msm_pippenger.rs | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs index 81b94874..8e59e653 100644 --- a/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs +++ b/cryptography/bls12_381/src/fixed_base_msm_pippenger.rs @@ -1,3 +1,5 @@ +use std::collections::HashSet; + use blstrs::G1Affine; use blstrs::G1Projective; use blstrs::Scalar; @@ -20,6 +22,34 @@ impl FixedBaseMSMPippenger { let number_of_windows = Scalar::NUM_BITS as usize / window_size + 1; let precomputed_points = precompute(window_size, number_of_windows, points); + // Check that the points are not duplicated since the batch addition formula + // assumes that all points are distinct. + let points_set: HashSet<_> = precomputed_points + .iter() + .map(|p| p.to_uncompressed()) + .collect(); + assert_eq!( + points_set.len(), + precomputed_points.len(), + "precomputed points should not contain any duplicates. Each input point is scaled by 2^k, so if points were generated by going s^k*G, please ensure s is not a power of 2" + ); + + // Check that no point is the negation of each other + // as the batch addition formula assumes that distinct points have + // distinct `x` coordinates. + for i in 0..precomputed_points.len() { + for k in 0..precomputed_points.len() { + if i == k { + continue; + } + // Check that point is not the negation of other point + assert!( + precomputed_points[i] != -precomputed_points[k], + "point and its negation are not allowed in the same MSM. Each input point is scaled by 2^k, so if points were generated by going s^k*G, please ensure s is not a power of 2" + ) + } + } + FixedBaseMSMPippenger { precomputed_points, window_size,