Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize from/assign str for power of 2 bases #78

Merged
merged 2 commits into from
Feb 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions arbi/scripts/size_in_radix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
###############################################################################
# Copyright 2025 Owain Davies
# SPDX-License-Identifier: Apache-2.0 OR MIT
#
# This file is part of "arbi", an Arbitrary Precision Integer library for Rust.
###############################################################################

from decimal import getcontext, Decimal, ROUND_CEILING

# Some high enough precision :)
getcontext().prec = 100


# ceil( (log(2) / log(base)) * 2^(digit_bits) )
# Useful for computing the needed capacity for Arbi::to_string()-type functions
def log2_div_logb(base: int, digit_bits: int = 32) -> int:
x = Decimal(2).ln() / Decimal(base).ln()
scaled = x * Decimal(1 << digit_bits)

return int(scaled.to_integral_exact(ROUND_CEILING))


# ceil( (log(base) / log(2^(digit_bits))) * 2^(digit_bits) )
# Useful for computing the needed capacity for Arbi::from_str()-type functions
def logb_div_logab(base: int, digit_bits: int = 32) -> int:
x = Decimal(base).ln() / Decimal(1 << digit_bits).ln()
scaled = x * Decimal(1 << digit_bits)

return int(scaled.to_integral_exact(ROUND_CEILING))


def print_arrays():
def print_array(prefix: str, digit_bits: int, func, start_base: int):
print(f"const {prefix}_{digit_bits}: [u{digit_bits}; 37] = [")

max_hex_digits = digit_bits >> 2
for base in range(start_base): # Use 0 for bases [0, start_base)
print(f"{' ' * 4}0x{'0' * max_hex_digits},")
for base in range(start_base, 37):
print(f"{' ' * 4}0x{func(base, digit_bits):x},")

print("];\n")

for digit_bits in (32, 64):
print_array("SCALED_LOG2_DIV_LOG", digit_bits, log2_div_logb, 3)
print_array("SCALED_LOGB_DIV_LOGAB", digit_bits, logb_div_logab, 2)


if __name__ == "__main__":
print_arrays()
101 changes: 81 additions & 20 deletions arbi/src/assign_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ SPDX-License-Identifier: Apache-2.0 OR MIT
*/

use crate::from_string::{configs::BASE_MBS, BaseMbs, ParseError};
use crate::uints::UnsignedUtilities;
use crate::Base;
use crate::{Arbi, Digit};
use crate::{Arbi, Base, Digit};

impl Arbi {
/// Assign the integer value the provided string represents to this `Arbi`
Expand Down Expand Up @@ -137,20 +135,85 @@ impl Arbi {
return Ok(());
}

// Get configuration for this base
let base_val = base.value() as u32;
let BaseMbs { mbs, base_pow_mbs } = BASE_MBS[base_val as usize];
let base_value = base.value() as u32;

// Reserve estimated capacity
let estimate = usize::div_ceil_(base_digits.len(), mbs);
self.vec
.reserve(estimate.saturating_sub(self.vec.capacity()));
let capacity = Self::size_with_size_base_maybe_over_by_one(
base_value,
base_digits.len(),
);
self.vec.clear();
self.neg = has_minus_sign;
self.vec.reserve(capacity);

#[cfg(debug_assertions)]
let initial_capacity = self.vec.capacity();

self.neg = has_minus_sign;

if base_value.is_power_of_two() {
self.assign_str_radix_algo_pow2(base_digits, base_value)?
} else {
self.assign_str_radix_algo_generic(base_digits, base_value)?
}

#[cfg(debug_assertions)]
{
debug_assert_eq!(self.vec.capacity(), initial_capacity);
debug_assert!(
self.vec.len() == capacity || self.vec.len() == capacity - 1
);
}

Ok(())
}

fn assign_str_radix_algo_pow2(
&mut self,
base_digits: &[u8],
base: u32,
) -> Result<(), ParseError> {
debug_assert!(!base_digits.is_empty());
debug_assert!((2..=36).contains(&base) && base.is_power_of_two());

// Number of bits in a base-`base` digit.
let bits_in_base_digit: u32 = base.trailing_zeros();
let mut shift: u32 = 0;
let mut digit: Digit = 0;
for &c in base_digits.iter().rev() {
let base_digit: Digit = match (c as char).to_digit(base) {
Some(base_digit) => base_digit as Digit,
None => return Err(ParseError::InvalidDigit),
};
digit |= base_digit << shift;
shift += bits_in_base_digit;
if Digit::BITS <= shift {
self.vec.push(digit);
shift -= Digit::BITS;
/*
On Digit::BITS == shift, digit is simply reset to 0.
On Digit::BITS < shift, digit will be set to the remaining
bits in the current base digit that could not fit in the
just pushed Arbi::BASE-digit, so no information is lost.
*/
digit = base_digit >> (bits_in_base_digit - shift);
}
}
if digit != 0 {
self.vec.push(digit);
}

Ok(())
}

/* Generic Algorithm (works for all valid bases) */
fn assign_str_radix_algo_generic(
&mut self,
base_digits: &[u8],
base: u32,
) -> Result<(), ParseError> {
debug_assert!(!base_digits.is_empty());
debug_assert!((2..=36).contains(&base));

let BaseMbs { mbs, base_pow_mbs } = BASE_MBS[base as usize];
let n_base = base_digits.len();
let rem_batch_size = n_base % mbs;
let mut pos = 0;
Expand All @@ -162,9 +225,9 @@ impl Arbi {
// Convert batch substring to integer value
let end = pos + rem_batch_size;
while pos < end {
match (base_digits[pos] as char).to_digit(base_val) {
Some(digit) => {
batch = digit + batch * base_val;
match (base_digits[pos] as char).to_digit(base) {
Some(base_digit) => {
batch = base_digit + batch * base;
pos += 1;
}
None => return Err(ParseError::InvalidDigit),
Expand All @@ -181,9 +244,9 @@ impl Arbi {
// Convert batch substring to integer value
let end = pos + mbs;
while pos < end {
match (base_digits[pos] as char).to_digit(base_val) {
Some(digit) => {
batch = digit + batch * base_val;
match (base_digits[pos] as char).to_digit(base) {
Some(base_digit) => {
batch = base_digit + batch * base;
pos += 1;
}
None => return Err(ParseError::InvalidDigit),
Expand All @@ -192,10 +255,8 @@ impl Arbi {
Self::imul1add1(self, base_pow_mbs, Some(batch));
}

#[cfg(debug_assertions)]
debug_assert_eq!(self.vec.capacity(), initial_capacity);

self.trim();

Ok(())
}

Expand Down
2 changes: 1 addition & 1 deletion arbi/src/multiplication.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ impl Arbi {
///
/// ## Complexity
/// \\( O(n) \\)
#[allow(dead_code)]
#[inline(always)]
pub(crate) fn imul1add1(x: &mut Self, v: Digit, k: Option<Digit>) {
let mut k: Digit = k.unwrap_or(0);
for d in &mut x.vec {
Expand Down
14 changes: 1 addition & 13 deletions arbi/src/size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,10 @@ impl Arbi {
/// # Examples
/// ```
/// use arbi::{Arbi, Digit};
///
/// let zero = Arbi::zero();
/// assert_eq!(zero.size(), 0);
///
/// let mut a = Arbi::from(Digit::MAX);
/// assert_eq!(a.size(), 1);
///
/// a += 1;
/// assert_eq!(a.size(), 2);
/// ```
Expand All @@ -42,10 +39,8 @@ impl Arbi {
/// # Examples
/// ```
/// use arbi::{Arbi, BitCount, Digit};
///
/// let zero = Arbi::zero();
/// assert_eq!(zero.size_bits(), 0);
///
/// let mut a = Arbi::from(Digit::MAX);
/// assert_eq!(a.size_bits(), Digit::BITS as BitCount);
/// a += 1;
Expand Down Expand Up @@ -106,15 +101,12 @@ impl Arbi {
/// # Examples
/// ```
/// use arbi::Arbi;
///
/// let mut zero = Arbi::zero();
/// assert_eq!(zero.size_base_mut(10), 0);
/// assert_eq!(zero, 0);
///
/// let mut one = Arbi::one();
/// assert_eq!(one.size_base_mut(10), 1);
/// assert_eq!(one, 1);
///
/// let mut a = Arbi::from_str_radix("123456789", 10).unwrap();
/// assert_eq!(a.size_base_mut(10), 9);
/// assert_eq!(a, 9);
Expand Down Expand Up @@ -148,13 +140,10 @@ impl Arbi {
/// # Examples
/// ```
/// use arbi::Arbi;
///
/// let zero = Arbi::zero();
/// assert_eq!(zero.size_base_ref(10), 0);
///
/// let one = Arbi::one();
/// assert_eq!(one.size_base_ref(10), 1);
///
/// let a = Arbi::from_str_radix("123456789", 10).unwrap();
/// assert_eq!(a.size_base_ref(10), 9);
/// ```
Expand All @@ -176,7 +165,7 @@ impl Arbi {

pub(crate) fn size_radix_no_check(&mut self, base: u32) -> BitCount {
let mut count: BitCount = 0;
while self > 0 {
while self.size() != 0 {
Self::div_algo_digit_inplace(self, base as Digit);
count += 1;
}
Expand All @@ -192,7 +181,6 @@ impl Arbi {
}
if base.is_power_of_two() {
let bit_length = self.size_bits();
// let base_log2 = base.ilog2();
let base_log2 = u32::ilog2_(base);
return Some(BitCount::div_ceil_(
bit_length,
Expand Down
Loading