From e68bb192f2f44d0bbd4b5ac10cd343bd057251dd Mon Sep 17 00:00:00 2001 From: nicoo Date: Sun, 21 Jun 2020 16:50:11 +0200 Subject: [PATCH] factor::numeric: Add a 32b Montgomery variant [WiP] ~32% faster --- src/uu/factor/src/numeric.rs | 108 +++++++++++++++++++++++++++++++++++ src/uu/factor/src/rho.rs | 13 ++++- 2 files changed, 118 insertions(+), 3 deletions(-) diff --git a/src/uu/factor/src/numeric.rs b/src/uu/factor/src/numeric.rs index f1b447583..118b4d9fa 100644 --- a/src/uu/factor/src/numeric.rs +++ b/src/uu/factor/src/numeric.rs @@ -216,6 +216,114 @@ pub(crate) fn modular_inverse(a: T) -> T { t } +#[derive(Clone, Copy, Debug)] +pub(crate) struct Montgomery32 { + a: u32, + n: u32, +} + +impl Montgomery32 { + /// computes x/R mod n efficiently + fn reduce(&self, x: u64) -> u32 { + debug_assert!(x < (self.n as u64) << 32); + // TODO: optimiiiiiiise + let Montgomery32 { a, n } = self; + let m = (x as u32).wrapping_mul(*a); + let nm = (*n as u64) * (m as u64); + let (xnm, overflow) = x.overflowing_add(nm); // x + n*m + debug_assert_eq!(xnm % (1 << 32), 0); + + // (x + n*m) / R + // in case of overflow, this is (2¹²⁸ + xnm)/2⁶⁴ - n = xnm/2⁶⁴ + (2⁶⁴ - n) + let y = (xnm >> 32) as u32 + if !overflow { 0 } else { n.wrapping_neg() }; + + if y >= *n { + y - n + } else { + y + } + } +} + +impl Arithmetic for Montgomery32 { + // Montgomery transform, R=2⁶⁴ + // Provides fast arithmetic mod n (n odd, u64) + type I = u32; + + fn new(n: u64) -> Self { + debug_assert!(n < (1 << 32)); + let n = n as u32; + let a = modular_inverse(n).wrapping_neg(); + debug_assert_eq!(n.wrapping_mul(a), 1_u32.wrapping_neg()); + Montgomery32 { a, n } + } + + fn modulus(&self) -> u64 { + self.n as u64 + } + + fn from_u64(&self, x: u64) -> Self::I { + assert!(x < self.n as u64); + let r = ((x << 32) % self.n as u64) as u32; + debug_assert_eq!(x, self.to_u64(r)); + r + } + + fn to_u64(&self, n: Self::I) -> u64 { + self.reduce(n as u64) as u64 + } + + fn add(&self, a: Self::I, b: Self::I) -> Self::I { + let (r, overflow) = a.overflowing_add(b); + + // In case of overflow, a+b = 2⁶⁴ + r = (2⁶⁴ - n) + r (working mod n) + let r = if !overflow { + r + } else { + r + self.n.wrapping_neg() + }; + + // Normalise to [0; n[ + let r = if r < self.n { r } else { r - self.n }; + + // Check that r (reduced back to the usual representation) equals + // a+b % n + #[cfg(debug_assertions)] + { + let a_r = self.to_u64(a); + let b_r = self.to_u64(b); + let r_r = self.to_u64(r); + let r_2 = (((a_r as u128) + (b_r as u128)) % (self.n as u128)) as u64; + debug_assert_eq!( + r_r, r_2, + "[{}] = {} ≠ {} = {} + {} = [{}] + [{}] mod {}; a = {}", + r, r_r, r_2, a_r, b_r, a, b, self.n, self.a + ); + } + r + } + + fn mul(&self, a: Self::I, b: Self::I) -> Self::I { + let r = self.reduce((a as u64) * (b as u64)); + + // Check that r (reduced back to the usual representation) equals + // a*b % n + #[cfg(debug_assertions)] + { + let a_r = self.to_u64(a); + let b_r = self.to_u64(b); + let r_r = self.to_u64(r); + let r_2 = (a_r * b_r) % (self.n as u64); + debug_assert_eq!( + r_r, r_2, + "[{}] = {} ≠ {} = {} * {} = [{}] * [{}] mod {}; a = {}", + r, r_r, r_2, a_r, b_r, a, b, self.n, self.a + ); + } + r + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/uu/factor/src/rho.rs b/src/uu/factor/src/rho.rs index 9a53a40f4..d2023f70f 100644 --- a/src/uu/factor/src/rho.rs +++ b/src/uu/factor/src/rho.rs @@ -51,8 +51,11 @@ fn find_divisor(n: A) -> u64 { fn _factor(num: u64) -> Factors { // Shadow the name, so the recursion automatically goes from “Big” arithmetic to small. let _factor = |n| { - // TODO: Optimise with 32 and 64b versions - _factor::(n) + if n < (1 << 32) { + _factor::(n) + } else { + _factor::(n) + } }; if num == 1 { @@ -75,5 +78,9 @@ fn _factor(num: u64) -> Factors { } pub(crate) fn factor(n: u64) -> Factors { - _factor::(n) + if n < (1 << 32) { + _factor::(n) + } else { + _factor::(n) + } }