From e1dac4695ee7ff501c88df759af3398e6c9fc056 Mon Sep 17 00:00:00 2001 From: kwantam Date: Fri, 15 May 2015 19:26:31 -0400 Subject: [PATCH] improved Sieve implementation ; add `cargo update` This commit adds `cargo update` to the distclean target in the makefile. This updates the Cargo.lock file when clearing the deps directory. In addition, it adds a faster implementation of the Sieve of Eratosthenes for use by `src/factor/gen_table.rs` and `test/factor.rs`. --- Makefile | 2 +- src/factor/gen_table.rs | 16 +++- src/factor/sieve.rs | 193 ++++++++++++++++++++++++++++++++++++---- test/factor.rs | 4 +- 4 files changed, 190 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index 4874f0e1e..af75961e5 100644 --- a/Makefile +++ b/Makefile @@ -329,7 +329,7 @@ clean: $(RM) -rf $(BUILDDIR) $(TEMPDIR) distclean: clean - cd $(BASEDIR)/deps && $(CARGO) clean + cd $(BASEDIR)/deps && $(CARGO) clean && $(CARGO) update $(BUILDDIR): mkdir -p $(BUILDDIR)/gen diff --git a/src/factor/gen_table.rs b/src/factor/gen_table.rs index 02826d0af..f4ad03cce 100644 --- a/src/factor/gen_table.rs +++ b/src/factor/gen_table.rs @@ -15,6 +15,8 @@ //! 2 has no multiplicative inverse mode 2^64 because 2 | 2^64, //! and in any case divisibility by two is trivial by checking the LSB. +#![cfg_attr(test, allow(dead_code))] + use sieve::Sieve; use std::env::args; use std::num::Wrapping; @@ -73,7 +75,7 @@ fn main() { let mut cols = 3; // we want a total of n + 1 values - let mut primes = Sieve::new().take(n + 1); + let mut primes = Sieve::odd_primes().take(n + 1); // in each iteration of the for loop, we use the value yielded // by the previous iteration. This leaves one value left at the @@ -97,16 +99,22 @@ fn main() { } #[test] -fn test_generator_and_inverter() { +fn test_inverter() { let num = 10000; - let invs = Sieve::new().map(|x| inv_mod_u64(x).unwrap()); - assert!(Sieve::new().zip(invs).take(num).all(|(x, y)| { + let invs = Sieve::odd_primes().map(|x| inv_mod_u64(x).unwrap()); + assert!(Sieve::odd_primes().zip(invs).take(num).all(|(x, y)| { let Wrapping(z) = Wrapping(x) * Wrapping(y); is_prime(x) && z == 1 })); } +#[test] +fn test_generator() { + let prime_10001 = Sieve::primes().skip(10000).next(); + assert_eq!(prime_10001, Some(104743)); +} + const MAX_WIDTH: usize = 102; const PREAMBLE: &'static str = r##"/* diff --git a/src/factor/sieve.rs b/src/factor/sieve.rs index d0d5893fe..6b389ec39 100644 --- a/src/factor/sieve.rs +++ b/src/factor/sieve.rs @@ -7,13 +7,17 @@ * that was distributed with this source code. */ -use std::iter::repeat; +use std::iter::{Chain, Cycle, Map}; +use std::slice::Iter; -// A lazy Sieve of Eratosthenes -// Not particularly efficient, but fine for generating a few thousand primes. +/// A lazy Sieve of Eratosthenes. +/// +/// This is a reasonably efficient implementation based on +/// O'Neill, M. E. "[The Genuine Sieve of Eratosthenes.](http://dx.doi.org/10.1017%2FS0956796808007004)" +/// Journal of Functional Programming, Volume 19, Issue 1, 2009, pp. 95--106. pub struct Sieve { - inner: Box>, - filts: Vec, + inner: Wheel, + filts: PrimeHeap, } impl Iterator for Sieve { @@ -27,8 +31,25 @@ impl Iterator for Sieve { #[inline] fn next(&mut self) -> Option { while let Some(n) = self.inner.next() { - if self.filts.iter().all(|&x| n % x != 0) { - self.filts.push(n); + let mut prime = true; + while let Some((next, inc)) = self.filts.peek() { + // need to keep checking the min element of the heap + // until we've found an element that's greater than n + if next > n { + break; // next heap element is bigger than n + } + + if next == n { + // n == next, and is composite. + prime = false; + } + // Increment the element in the prime heap. + self.filts.replace((next + inc, inc)); + } + + if prime { + // this is a prime; add it to the heap + self.filts.insert(n); return Some(n); } } @@ -37,17 +58,155 @@ impl Iterator for Sieve { } impl Sieve { + fn new() -> Sieve { + Sieve { inner: Wheel::new(), filts: PrimeHeap::new() } + } + + #[allow(dead_code)] #[inline] - pub fn new() -> Sieve { - fn next(s: &mut u64, t: u64) -> Option { - let ret = Some(*s); - *s = *s + t; - ret - } - let next = next; + pub fn primes() -> PrimeSieve { + fn deref(x: &u64) -> u64 { *x } + let deref = deref as fn(&u64) -> u64; + INIT_PRIMES.iter().map(deref).chain(Sieve::new()) + } - let odds_by_3 = Box::new(repeat(2).scan(3, next)) as Box>; - - Sieve { inner: odds_by_3, filts: Vec::new() } + #[allow(dead_code)] + #[inline] + pub fn odd_primes() -> PrimeSieve { + fn deref(x: &u64) -> u64 { *x } + let deref = deref as fn(&u64) -> u64; + (&INIT_PRIMES[1..]).iter().map(deref).chain(Sieve::new()) + } +} + +pub type PrimeSieve = Chain, fn(&u64) -> u64>, Sieve>; + +/// An iterator that generates an infinite list of numbers that are +/// not divisible by any of 2, 3, 5, or 7. +struct Wheel { + next: u64, + increment: Cycle>, +} + +impl Iterator for Wheel { + type Item = u64; + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (1, None) + } + + #[inline] + fn next (&mut self) -> Option { + let increment = self.increment.next().unwrap(); // infinite iterator, no check necessary + let ret = self.next; + self.next = ret + increment; + Some(ret) + } +} + +impl Wheel { + #[inline] + fn new() -> Wheel { + Wheel { next: 11u64, increment: WHEEL_INCS.iter().cycle() } + } +} + +/// The increments of a wheel of circumference 210 +/// (i.e., a wheel that skips all multiples of 2, 3, 5, 7) +const WHEEL_INCS: &'static [u64] = &[2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,8,6,4,6,2,4,6,2,6,6,4,2,4,6,2,6,4,2,4,2,10,2,10]; +const INIT_PRIMES: &'static [u64] = &[2, 3, 5, 7]; + +/// A min-heap of "infinite lists" of prime multiples, where a list is +/// represented as (head, increment). +#[derive(Debug)] +struct PrimeHeap { + data: Vec<(u64, u64)>, +} + +impl PrimeHeap { + fn new() -> PrimeHeap { + PrimeHeap { data: Vec::new() } + } + + fn peek(&self) -> Option<(u64, u64)> { + if let Some(&(x, y)) = self.data.get(0) { + Some((x, y)) + } else { + None + } + } + + fn insert(&mut self, next: u64) { + let mut idx = self.data.len(); + let key = next * next; + + let item = (key, next); + self.data.push(item); + loop { + // break if we've bubbled to the top + if idx == 0 { + break; + } + + let paridx = (idx - 1) / 2; + let (k, _) = self.data[paridx]; + if key < k { + // bubble up, found a smaller key + self.data.swap(idx, paridx); + idx = paridx; + } else { + // otherwise, parent is smaller, so we're done + break; + } + } + } + + fn remove(&mut self) -> (u64, u64) { + let ret = self.data.swap_remove(0); + + let mut idx = 0; + let len = self.data.len(); + let (key, _) = self.data[0]; + loop { + let child1 = 2*idx + 1; + let child2 = 2*idx + 2; + + // no more children + if child1 >= len { + break; + } + + // find lesser child + let (c1key, _) = self.data[child1]; + let (minidx, minkey) = if child2 >= len { + (child1, c1key) + } else { + let (c2key, _) = self.data[child2]; + if c1key < c2key { + (child1, c1key) + } else { + (child2, c2key) + } + }; + + if minkey < key { + self.data.swap(minidx, idx); + idx = minidx; + continue; + } + + // smaller than both children, so done + break; + } + + ret + } + + /// More efficient than inserting and removing in two steps + /// because we save one traversal of the heap. + fn replace(&mut self, next: (u64, u64)) -> (u64, u64) { + self.data.push(next); + self.remove() } } diff --git a/test/factor.rs b/test/factor.rs index 872ea0d27..a8e8103d8 100644 --- a/test/factor.rs +++ b/test/factor.rs @@ -27,9 +27,7 @@ const PROGNAME: &'static str = "./factor"; #[test] fn test_random() { - let mut primes = Sieve::new().take(NUM_PRIMES - 1).collect::>(); - primes.push(2); - let primes = primes; + let primes = Sieve::primes().take(NUM_PRIMES).collect::>(); let mut rng = weak_rng(); let mut rand_gt = move |min: u64| {