From f89331c06ee1c7b277f16cb983e676e0926148a3 Mon Sep 17 00:00:00 2001
From: Luke Parker <lukeparker5132@gmail.com>
Date: Mon, 13 Feb 2023 16:01:18 -0500
Subject: [PATCH 1/2] Optimize scalar multiplication with a 4-bit window

This moves from 255 doubles and 255 additions to 259 doubles and 71 additions.
If doubling is twice as fast, which is roughly the case as far as I can tell,
this shifts the function from executing in (255 + (255 * 2)) = 765 time to
(259 + (71 * 2)) = 401 time, a 48% speedup.
---
 src/curves.rs | 114 +++++++++++++++++++++++++++++---------------------
 1 file changed, 66 insertions(+), 48 deletions(-)

diff --git a/src/curves.rs b/src/curves.rs
index 41ccb9a..e15975b 100644
--- a/src/curves.rs
+++ b/src/curves.rs
@@ -466,32 +466,41 @@ macro_rules! new_curve_impl {
             type Output = $name;
 
             fn mul(self, other: &'b $scalar) -> Self::Output {
-                // TODO: make this faster
-
-                let mut acc = $name::identity();
-
-                // This is a simple double-and-add implementation of point
-                // multiplication, moving from most significant to least
-                // significant bit of the scalar.
-                //
-                // We don't use `PrimeFieldBits::.to_le_bits` here, because that would
-                // force users of this crate to depend on `bitvec` where they otherwise
-                // might not need to.
-                //
-                // NOTE: We skip the leading bit because it's always unset (we are turning
-                // the 32-byte repr into 256 bits, and $scalar::NUM_BITS = 255).
-                for bit in other
-                    .to_repr()
-                    .iter()
-                    .rev()
-                    .flat_map(|byte| (0..8).rev().map(move |i| Choice::from((byte >> i) & 1u8)))
-                    .skip(1)
-                {
-                    acc = acc.double();
-                    acc = $name::conditional_select(&acc, &(acc + self), bit);
+                // Create a table out of the point
+                // TODO: This can be made more efficient with a 5-bit window
+                let mut arr = [$name::identity(); 16];
+                arr[1] = *self;
+                for i in 2 .. 16 {
+                    arr[i] = if (i % 2) == 0 {
+                        arr[i / 2].double()
+                    } else {
+                        arr[i - 1] + arr[1]
+                    };
+                }
+
+                let mut res = $name::identity();
+                let mut first = true;
+                // Iterate from significant byte to least significant byte
+                for byte in other.to_repr().iter().rev() {
+                    // Shift the result over 4 bits
+                    if !first {
+                        for _ in 0 .. 4 {
+                            res = res.double();
+                        }
+                    }
+                    first = false;
+
+                    // Add the top-nibble from this byte into the result
+                    res += arr[usize::from(byte >> 4)];
+                    // Shift the result over
+                    for _ in 0 .. 4 {
+                        res = res.double();
+                    }
+                    // Add the bottom-nibble from this byte into the result
+                    res += arr[usize::from(byte & 0b1111)];
                 }
 
-                acc
+                res
             }
         }
 
@@ -581,32 +590,41 @@ macro_rules! new_curve_impl {
             type Output = $name;
 
             fn mul(self, other: &'b $scalar) -> Self::Output {
-                // TODO: make this faster
-
-                let mut acc = $name::identity();
-
-                // This is a simple double-and-add implementation of point
-                // multiplication, moving from most significant to least
-                // significant bit of the scalar.
-                //
-                // We don't use `PrimeFieldBits::.to_le_bits` here, because that would
-                // force users of this crate to depend on `bitvec` where they otherwise
-                // might not need to.
-                //
-                // NOTE: We skip the leading bit because it's always unset (we are turning
-                // the 32-byte repr into 256 bits, and $scalar::NUM_BITS = 255).
-                for bit in other
-                    .to_repr()
-                    .iter()
-                    .rev()
-                    .flat_map(|byte| (0..8).rev().map(move |i| Choice::from((byte >> i) & 1u8)))
-                    .skip(1)
-                {
-                    acc = acc.double();
-                    acc = $name::conditional_select(&acc, &(acc + self), bit);
+                // Create a table out of the point
+                // TODO: This can be made more efficient with a 5-bit window
+                let mut arr = [$name::identity(); 16];
+                arr[1] = (*self).into();
+                for i in 2 .. 16 {
+                    arr[i] = if (i % 2) == 0 {
+                        arr[i / 2].double()
+                    } else {
+                        arr[i - 1] + arr[1]
+                    };
+                }
+
+                let mut res = $name::identity();
+                let mut first = true;
+                // Iterate from significant byte to least significant byte
+                for byte in other.to_repr().iter().rev() {
+                    // Shift the result over 4 bits
+                    if !first {
+                        for _ in 0 .. 4 {
+                            res = res.double();
+                        }
+                    }
+                    first = false;
+
+                    // Add the top-nibble from this byte into the result
+                    res += arr[usize::from(byte >> 4)];
+                    // Shift the result over
+                    for _ in 0 .. 4 {
+                        res = res.double();
+                    }
+                    // Add the bottom-nibble from this byte into the result
+                    res += arr[usize::from(byte & 0b1111)];
                 }
 
-                acc
+                res
             }
         }
 

From a46b5be95cacbff54d06aad8d3bbcba42e05d616 Mon Sep 17 00:00:00 2001
From: Luke Parker <lukeparker5132@gmail.com>
Date: Thu, 18 May 2023 18:26:06 -0400
Subject: [PATCH 2/2] Add zeroize

---
 Cargo.toml       | 1 +
 src/curves.rs    | 2 +-
 src/fields/fp.rs | 2 +-
 src/fields/fq.rs | 2 +-
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 93091a5..60771f3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -50,6 +50,7 @@ group = { version = "0.13", default-features = false }
 rand = { version = "0.8", default-features = false }
 static_assertions = "1.1.0"
 subtle = { version = "2.3", default-features = false }
+zeroize = { version = "^1.5", default-features = false, features = ["derive"] }
 
 # alloc dependencies
 blake2b_simd = { version = "1", optional = true, default-features = false }
diff --git a/src/curves.rs b/src/curves.rs
index e15975b..3b03441 100644
--- a/src/curves.rs
+++ b/src/curves.rs
@@ -30,7 +30,7 @@ macro_rules! new_curve_impl {
     (($($privacy:tt)*), $name:ident, $name_affine:ident, $iso:ident, $base:ident, $scalar:ident,
      $curve_id:literal, $a_raw:expr, $b_raw:expr, $curve_type:ident) => {
         /// Represents a point in the projective coordinate space.
-        #[derive(Copy, Clone, Debug)]
+        #[derive(Copy, Clone, Debug, zeroize::Zeroize)]
         #[cfg_attr(feature = "repr-c", repr(C))]
         $($privacy)* struct $name {
             x: $base,
diff --git a/src/fields/fp.rs b/src/fields/fp.rs
index 3264cba..35ab1ff 100644
--- a/src/fields/fp.rs
+++ b/src/fields/fp.rs
@@ -24,7 +24,7 @@ use crate::arithmetic::SqrtTables;
 // The internal representation of this type is four 64-bit unsigned
 // integers in little-endian order. `Fp` values are always in
 // Montgomery form; i.e., Fp(a) = aR mod p, with R = 2^256.
-#[derive(Clone, Copy, Eq)]
+#[derive(Clone, Copy, Eq, zeroize::Zeroize)]
 #[repr(transparent)]
 pub struct Fp(pub(crate) [u64; 4]);
 
diff --git a/src/fields/fq.rs b/src/fields/fq.rs
index 8177fa4..2ccabe8 100644
--- a/src/fields/fq.rs
+++ b/src/fields/fq.rs
@@ -24,7 +24,7 @@ use crate::arithmetic::SqrtTables;
 // The internal representation of this type is four 64-bit unsigned
 // integers in little-endian order. `Fq` values are always in
 // Montgomery form; i.e., Fq(a) = aR mod q, with R = 2^256.
-#[derive(Clone, Copy, Eq)]
+#[derive(Clone, Copy, Eq, zeroize::Zeroize)]
 #[repr(transparent)]
 pub struct Fq(pub(crate) [u64; 4]);