Add in signed overloads for writing integers.

Alexhuszagh · Alexhuszagh · commit 84c1c497edd8 · 2024-12-08T14:46:28.000-06:00
Fixes #191. Fixes apache/datafusion#13686
diff --git a/CHANGELOG b/CHANGELOG
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [1.0.5] 2024-12-08
+
+### Fixed
+
+- Only require 19 digits for writing `i64` and not 20 (#191).
+
 ## [1.0.4] 2024-12-07
 
 ### Changed
diff --git a/lexical-core/Cargo.toml b/lexical-core/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT/Apache-2.0"
 name = "lexical-core"
 readme = "README.md"
 repository = "https://github.com/Alexhuszagh/rust-lexical"
-version = "1.0.3"
+version = "1.0.5"
 rust-version = "1.63.0"
 exclude = [
     "assets/*",
@@ -19,30 +19,30 @@ exclude = [
 ]
 
 [dependencies.lexical-util]
-version = "1.0.4"
+version = "1.0.5"
 default-features = false
 path = "../lexical-util"
 
 [dependencies.lexical-parse-integer]
-version = "1.0.3"
+version = "1.0.5"
 optional = true
 default-features = false
 path = "../lexical-parse-integer"
 
 [dependencies.lexical-parse-float]
-version = "1.0.3"
+version = "1.0.5"
 optional = true
 default-features = false
 path = "../lexical-parse-float"
 
 [dependencies.lexical-write-integer]
-version = "1.0.3"
+version = "1.0.5"
 optional = true
 default-features = false
 path = "../lexical-write-integer"
 
 [dependencies.lexical-write-float]
-version = "1.0.3"
+version = "1.0.5"
 optional = true
 default-features = false
 path = "../lexical-write-float"
diff --git a/lexical-parse-float/Cargo.toml b/lexical-parse-float/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT/Apache-2.0"
 name = "lexical-parse-float"
 readme = "README.md"
 repository = "https://github.com/Alexhuszagh/rust-lexical"
-version = "1.0.3"
+version = "1.0.5"
 rust-version = "1.63.0"
 exclude = [
     "assets/*",
@@ -19,13 +19,13 @@ exclude = [
 ]
 
 [dependencies.lexical-util]
-version = "1.0.4"
+version = "1.0.5"
 path = "../lexical-util"
 default-features = false
 features = ["parse-floats"]
 
 [dependencies.lexical-parse-integer]
-version = "1.0.3"
+version = "1.0.5"
 path = "../lexical-parse-integer"
 default-features = false
 features = []
diff --git a/lexical-parse-integer/Cargo.toml b/lexical-parse-integer/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT/Apache-2.0"
 name = "lexical-parse-integer"
 readme = "README.md"
 repository = "https://github.com/Alexhuszagh/rust-lexical"
-version = "1.0.3"
+version = "1.0.5"
 rust-version = "1.63.0"
 exclude = [
     "assets/*",
@@ -22,7 +22,7 @@ exclude = [
 static_assertions = "1"
 
 [dependencies.lexical-util]
-version = "1.0.4"
+version = "1.0.5"
 path = "../lexical-util"
 default-features = false
 features = ["parse-integers"]
diff --git a/lexical-util/Cargo.toml b/lexical-util/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT/Apache-2.0"
 name = "lexical-util"
 readme = "README.md"
 repository = "https://github.com/Alexhuszagh/rust-lexical"
-version = "1.0.4"
+version = "1.0.5"
 rust-version = "1.63.0"
 exclude = [
     "assets/*",
diff --git a/lexical-write-float/Cargo.toml b/lexical-write-float/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT/Apache-2.0"
 name = "lexical-write-float"
 readme = "README.md"
 repository = "https://github.com/Alexhuszagh/rust-lexical"
-version = "1.0.4"
+version = "1.0.5"
 rust-version = "1.63.0"
 exclude = [
     "assets/*",
@@ -19,13 +19,13 @@ exclude = [
 ]
 
 [dependencies.lexical-util]
-version = "1.0.4"
+version = "1.0.5"
 path = "../lexical-util"
 default-features = false
 features = ["write-floats"]
 
 [dependencies.lexical-write-integer]
-version = "1.0.3"
+version = "1.0.5"
 path = "../lexical-write-integer"
 default-features = false
 features = []
diff --git a/lexical-write-float/src/shared.rs b/lexical-write-float/src/shared.rs
@@ -139,7 +139,7 @@ pub fn write_exponent<const FORMAT: u128>(
     bytes[*cursor] = exponent_character;
     *cursor += 1;
     let positive_exp: u32 = write_exponent_sign::<FORMAT>(bytes, cursor, exp);
-    *cursor += positive_exp.write_exponent::<FORMAT>(&mut bytes[*cursor..]);
+    *cursor += positive_exp.write_exponent_signed::<FORMAT>(&mut bytes[*cursor..]);
 }
 
 /// Detect the notation to use for the float formatter and call the appropriate
diff --git a/lexical-write-integer/Cargo.toml b/lexical-write-integer/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT/Apache-2.0"
 name = "lexical-write-integer"
 readme = "README.md"
 repository = "https://github.com/Alexhuszagh/rust-lexical"
-version = "1.0.4"
+version = "1.0.5"
 rust-version = "1.63.0"
 exclude = [
     "assets/*",
@@ -22,7 +22,7 @@ exclude = [
 static_assertions = "1.1.0"
 
 [dependencies.lexical-util]
-version = "1.0.4"
+version = "1.0.5"
 path = "../lexical-util"
 default-features = false
 features = ["write-integers"]
diff --git a/lexical-write-integer/src/api.rs b/lexical-write-integer/src/api.rs
@@ -55,15 +55,15 @@ where
         let unsigned = Unsigned::as_cast(value.wrapping_neg());
         buffer[0] = b'-';
         let buffer = &mut buffer[1..];
-        unsigned.write_mantissa::<FORMAT>(buffer) + 1
+        unsigned.write_mantissa_signed::<FORMAT>(buffer) + 1
     } else if cfg!(feature = "format") && format.required_mantissa_sign() {
         let unsigned = Unsigned::as_cast(value);
         buffer[0] = b'+';
         let buffer = &mut buffer[1..];
-        unsigned.write_mantissa::<FORMAT>(buffer) + 1
+        unsigned.write_mantissa_signed::<FORMAT>(buffer) + 1
     } else {
         let unsigned = Unsigned::as_cast(value);
-        unsigned.write_mantissa::<FORMAT>(buffer)
+        unsigned.write_mantissa_signed::<FORMAT>(buffer)
     }
 }
 
diff --git a/lexical-write-integer/src/decimal.rs b/lexical-write-integer/src/decimal.rs
@@ -239,14 +239,19 @@ unsafe impl DecimalCount for usize {
 
 /// Write integer to decimal string.
 pub trait Decimal: DecimalCount {
-    /// # Safety
+    fn decimal(self, buffer: &mut [u8]) -> usize;
+
+    /// Specialized overload is the type is sized.
     ///
-    /// Safe as long as buffer is at least [`FORMATTED_SIZE`] elements long,
-    /// (or [`FORMATTED_SIZE_DECIMAL`] for decimal), and the radix is valid.
+    /// # Panics
     ///
-    /// [`FORMATTED_SIZE`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE
-    /// [`FORMATTED_SIZE_DECIMAL`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE_DECIMAL
-    fn decimal(self, buffer: &mut [u8]) -> usize;
+    /// If the data original provided was unsigned and therefore
+    /// has more digits than the signed variant. This only affects
+    /// `i64` (see #191).
+    #[inline(always)]
+    fn decimal_signed(self, buffer: &mut [u8]) -> usize {
+        self.decimal(buffer)
+    }
 }
 
 // Implement decimal for type.
@@ -265,10 +270,21 @@ decimal_impl! {
     u8; from_u8
     u16; from_u16
     u32; from_u32
-    u64; from_u64
     u128; from_u128
 }
 
+impl Decimal for u64 {
+    #[inline(always)]
+    fn decimal(self, buffer: &mut [u8]) -> usize {
+        jeaiii::from_u64(self, buffer)
+    }
+
+    #[inline(always)]
+    fn decimal_signed(self, buffer: &mut [u8]) -> usize {
+        jeaiii::from_i64(self, buffer)
+    }
+}
+
 impl Decimal for usize {
     #[inline(always)]
     fn decimal(self, buffer: &mut [u8]) -> usize {
@@ -281,4 +297,16 @@ impl Decimal for usize {
             _ => unimplemented!(),
         }
     }
+
+    #[inline(always)]
+    fn decimal_signed(self, buffer: &mut [u8]) -> usize {
+        match usize::BITS {
+            8 => (self as u8).decimal_signed(buffer),
+            16 => (self as u16).decimal_signed(buffer),
+            32 => (self as u32).decimal_signed(buffer),
+            64 => (self as u64).decimal_signed(buffer),
+            128 => (self as u128).decimal_signed(buffer),
+            _ => unimplemented!(),
+        }
+    }
 }
diff --git a/lexical-write-integer/src/jeaiii.rs b/lexical-write-integer/src/jeaiii.rs
@@ -297,11 +297,16 @@ pub fn from_u32(n: u32, buffer: &mut [u8]) -> usize {
 /// Optimized jeaiii algorithm for u64.
 #[inline(always)]
 #[allow(clippy::collapsible_else_if)] // reason = "branching is fine-tuned for performance"
-pub fn from_u64(n: u64, buffer: &mut [u8]) -> usize {
+fn from_u64_impl(n: u64, buffer: &mut [u8], is_signed: bool) -> usize {
     // NOTE: Like before, this optimizes better for large and small
     // values if there's a flat comparison with larger values first.
     const FACTOR: u64 = 100_0000_0000;
-    let buffer = &mut buffer[..20];
+    // NOTE `i64` takes a max of 19 digits, while `u64` takes a max of 20.
+    let buffer = if is_signed {
+        &mut buffer[..19]
+    } else {
+        &mut buffer[..20]
+    };
     if n < 1_0000 {
         // 1 to 4 digits
         if n >= 100 {
@@ -326,7 +331,7 @@ pub fn from_u64(n: u64, buffer: &mut [u8]) -> usize {
             write_digits!(@5-6 buffer, n)
         }
     } else {
-        // 11-20 digits, can do in 2 steps
+        // 11-20 digits, can do in 2 steps (11-19 if is signed).
         // NOTE: `hi` has to be in `[0, 2^31)`, while `lo` is in `[0, 10^11)`
         // So, we can use our `from_u64_small` for hi. For our `lo`, we always
         // need to write 10 digits. However, the `jeaiii` algorithm is too
@@ -340,6 +345,23 @@ pub fn from_u64(n: u64, buffer: &mut [u8]) -> usize {
     }
 }
 
+/// Optimized jeaiii algorithm for u64.
+#[inline(always)]
+pub fn from_u64(n: u64, buffer: &mut [u8]) -> usize {
+    from_u64_impl(n, buffer, false)
+}
+
+/// Optimized jeaiii algorithm for i64, which must be positive.
+///
+/// This value **MUST** have originally been from an `i64`, since it
+/// uses `19` for the bounds checked, so this will panic if `>= 10^19`
+/// is passed to the function.
+#[inline(always)]
+pub fn from_i64(n: u64, buffer: &mut [u8]) -> usize {
+    debug_assert!(n <= 1000_0000_0000_0000_0000u64);
+    from_u64_impl(n, buffer, true)
+}
+
 /// Optimized jeaiii algorithm for u128.
 #[inline(always)]
 #[allow(clippy::collapsible_else_if)] // reason = "branching is fine-tuned for performance"
diff --git a/lexical-write-integer/src/write.rs b/lexical-write-integer/src/write.rs
diff --git a/lexical-write-integer/tests/decimal_tests.rs b/lexical-write-integer/tests/decimal_tests.rs
diff --git a/lexical/Cargo.toml b/lexical/Cargo.toml

Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,7 @@ pub fn write_exponent<const FORMAT: u128>(`
`139`	`139`	`bytes[*cursor] = exponent_character;`
`140`	`140`	`*cursor += 1;`
`141`	`141`	`let positive_exp: u32 = write_exponent_sign::<FORMAT>(bytes, cursor, exp);`
`142`		`- cursor += positive_exp.write_exponent::<FORMAT>(&mut bytes[cursor..]);`
	`142`	`+ cursor += positive_exp.write_exponent_signed::<FORMAT>(&mut bytes[cursor..]);`
`143`	`143`	`}`
`144`	`144`
`145`	`145`	`/// Detect the notation to use for the float formatter and call the appropriate`