diff --git a/Cargo.lock b/Cargo.lock index 997d52fab..0674d3de0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,6 +43,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "array-init" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6945cc5422176fc5e602e590c2878d2c2acd9a4fe20a4baa7c28022521698ec6" + [[package]] name = "arrayvec" version = "0.4.12" @@ -279,6 +285,7 @@ dependencies = [ "uu_expand", "uu_expr", "uu_factor", + "uu_factor_benches", "uu_false", "uu_fmt", "uu_fold", @@ -2029,17 +2036,26 @@ name = "uu_factor" version = "0.0.6" dependencies = [ "coz", - "criterion", "num-traits", "paste", "quickcheck", "rand 0.7.3", - "rand_chacha", "smallvec", "uucore", "uucore_procs", ] +[[package]] +name = "uu_factor_benches" +version = "0.0.0" +dependencies = [ + "array-init", + "criterion", + "rand 0.7.3", + "rand_chacha", + "uu_factor", +] + [[package]] name = "uu_false" version = "0.0.6" diff --git a/Cargo.toml b/Cargo.toml index 7c1a771fd..745393260 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -324,6 +324,9 @@ wc = { optional=true, version="0.0.6", package="uu_wc", path="src/uu/wc" } who = { optional=true, version="0.0.6", package="uu_who", path="src/uu/who" } whoami = { optional=true, version="0.0.6", package="uu_whoami", path="src/uu/whoami" } yes = { optional=true, version="0.0.6", package="uu_yes", path="src/uu/yes" } + +factor_benches = { optional = true, version = "0.0.0", package = "uu_factor_benches", path = "tests/benches/factor" } + # # * pinned transitive dependencies # Not needed for now. Keep as examples: diff --git a/src/uu/factor/BENCHMARKING.md b/src/uu/factor/BENCHMARKING.md new file mode 100644 index 000000000..e174d62b7 --- /dev/null +++ b/src/uu/factor/BENCHMARKING.md @@ -0,0 +1,116 @@ +# Benchmarking `factor` + +The benchmarks for `factor` are located under `tests/benches/factor` +and can be invoked with `cargo bench` in that directory. + +They are located outside the `uu_factor` crate, as they do not comply +with the project's minimum supported Rust version, *i.e.* may require +a newer version of `rustc`. + + +## Microbenchmarking deterministic functions + +We currently use [`criterion`] to benchmark deterministic functions, +such as `gcd` and `table::factor`. + +However, µbenchmarks are by nature unstable: not only are they specific to +the hardware, operating system version, etc., but they are noisy and affected +by other tasks on the system (browser, compile jobs, etc.), which can cause +`criterion` to report spurious performance improvements and regressions. + +This can be mitigated by getting as close to [idealised conditions][lemire] +as possible: +- minimize the amount of computation and I/O running concurrently to the + benchmark, *i.e.* close your browser and IM clients, don't compile at the + same time, etc. ; +- ensure the CPU's [frequency stays constant] during the benchmark ; +- [isolate a **physical** core], set it to `nohz_full`, and pin the benchmark + to it, so it won't be preempted in the middle of a measurement ; +- disable ASLR by running `setarch -R cargo bench`, so we can compare results + across multiple executions. + + +[`criterion`]: https://bheisler.github.io/criterion.rs/book/index.html +[lemire]: https://lemire.me/blog/2018/01/16/microbenchmarking-calls-for-idealized-conditions/ +[isolate a **physical** core]: https://pyperf.readthedocs.io/en/latest/system.html#isolate-cpus-on-linux +[frequency stays constant]: XXXTODO + + +### Guidance for designing µbenchmarks + +*Note:* this guidance is specific to `factor` and takes its application domain +into account; do not expect it to generalise to other projects. It is based +on Daniel Lemire's [*Microbenchmarking calls for idealized conditions*][lemire], +which I recommend reading if you want to add benchmarks to `factor`. + +1. Select a small, self-contained, deterministic component + `gcd` and `table::factor` are good example of such: + - no I/O or access to external data structures ; + - no call into other components ; + - behaviour is deterministic: no RNG, no concurrency, ... ; + - the test's body is *fast* (~100ns for `gcd`, ~10µs for `factor::table`), + so each sample takes a very short time, minimizing variability and + maximizing the numbers of samples we can take in a given time. + +2. Benchmarks are immutable (once merged in `uutils`) + Modifying a benchmark means previously-collected values cannot meaningfully + be compared, silently giving nonsensical results. If you must modify an + existing benchmark, rename it. + +3. Test common cases + We are interested in overall performance, rather than specific edge-cases; + use **reproducibly-randomised inputs**, sampling from either all possible + input values or some subset of interest. + +4. Use [`criterion`], `criterion::black_box`, ... + `criterion` isn't perfect, but it is also much better than ad-hoc + solutions in each benchmark. + + +## Wishlist + +### Configurable statistical estimators + +`criterion` always uses the arithmetic average as estimator; in µbenchmarks, +where the code under test is fully deterministic and the measurements are +subject to additive, positive noise, [the minimum is more appropriate][lemire]. + + +### CI & reproducible performance testing + +Measuring performance on real hardware is important, as it relates directly +to what users of `factor` experience; however, such measurements are subject +to the constraints of the real-world, and aren't perfectly reproducible. +Moreover, the mitigations for it (described above) aren't achievable in +virtualized, multi-tenant environments such as CI. + +Instead, we could run the µbenchmarks in a simulated CPU with [`cachegrind`], +measure execution “time” in that model (in CI), and use it to detect and report +performance improvements and regressions. + +[`iai`] is an implementation of this idea for Rust. + +[`cachegrind`]: https://www.valgrind.org/docs/manual/cg-manual.html +[`iai`]: https://bheisler.github.io/criterion.rs/book/iai/iai.html + + +### Comparing randomised implementations across multiple inputs + +`factor` is a challenging target for system benchmarks as it combines two +characteristics: + +1. integer factoring algorithms are randomised, with large variance in + execution time ; + +2. various inputs also have large differences in factoring time, that + corresponds to no natural, linear ordering of the inputs. + + +If (1) was untrue (i.e. if execution time wasn't random), we could faithfully +compare 2 implementations (2 successive versions, or `uutils` and GNU) using +a scatter plot, where each axis corresponds to the perf. of one implementation. + +Similarly, without (2) we could plot numbers on the X axis and their factoring +time on the Y axis, using multiple lines for various quantiles. The large +differences in factoring times for successive numbers, mean that such a plot +would be unreadable. diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml index c4e7e8469..eb34519f1 100644 --- a/src/uu/factor/Cargo.toml +++ b/src/uu/factor/Cargo.toml @@ -17,20 +17,15 @@ num-traits = "0.2.13" # used in src/numerics.rs, which is included by build.rs [dependencies] coz = { version = "0.1.3", optional = true } num-traits = "0.2.13" # Needs at least version 0.2.13 for "OverflowingAdd" -rand = { version="0.7", features=["small_rng"] } -smallvec = { version="0.6.14, < 1.0" } -uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } -uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } +rand = { version = "0.7", features = ["small_rng"] } +smallvec = { version = "0.6.14, < 1.0" } +uucore = { version = ">=0.0.8", package = "uucore", path = "../../uucore" } +uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" } [dev-dependencies] -criterion = "0.3" paste = "0.1.18" quickcheck = "0.9.2" -rand_chacha = "0.2.2" -[[bench]] -name = "gcd" -harness = false [[bin]] name = "factor" diff --git a/src/uu/factor/src/cli.rs b/src/uu/factor/src/cli.rs index fb7b3f192..ee4c8a4c4 100644 --- a/src/uu/factor/src/cli.rs +++ b/src/uu/factor/src/cli.rs @@ -13,13 +13,13 @@ use std::error::Error; use std::io::{self, stdin, stdout, BufRead, Write}; mod factor; -pub(crate) use factor::*; +pub use factor::*; use uucore::InvalidEncodingHandling; mod miller_rabin; pub mod numeric; mod rho; -mod table; +pub mod table; static SYNTAX: &str = "[OPTION] [NUMBER]..."; static SUMMARY: &str = "Print the prime factors of the given number(s). diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index ebe06a1c5..b279de7fc 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -161,7 +161,7 @@ pub fn factor(mut n: u64) -> Factors { return factors; } - let (factors, n) = table::factor(n, factors); + table::factor(&mut n, &mut factors); #[allow(clippy::let_and_return)] let r = if n < (1 << 32) { @@ -239,9 +239,13 @@ mod tests { } #[cfg(test)] -impl quickcheck::Arbitrary for Factors { - fn arbitrary(gen: &mut G) -> Self { - use rand::Rng; +use rand::{ + distributions::{Distribution, Standard}, + Rng, +}; +#[cfg(test)] +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> Factors { let mut f = Factors::one(); let mut g = 1u64; let mut n = u64::MAX; @@ -252,7 +256,7 @@ impl quickcheck::Arbitrary for Factors { // See Generating Random Factored Numbers, Easily, J. Cryptology (2003) 'attempt: loop { while n > 1 { - n = gen.gen_range(1, n); + n = rng.gen_range(1, n); if miller_rabin::is_prime(n) { if let Some(h) = g.checked_mul(n) { f.push(n); @@ -269,6 +273,13 @@ impl quickcheck::Arbitrary for Factors { } } +#[cfg(test)] +impl quickcheck::Arbitrary for Factors { + fn arbitrary(g: &mut G) -> Self { + g.gen() + } +} + #[cfg(test)] impl std::ops::BitXor for Factors { type Output = Self; diff --git a/src/uu/factor/src/table.rs b/src/uu/factor/src/table.rs index 94ad6df4c..518d4f241 100644 --- a/src/uu/factor/src/table.rs +++ b/src/uu/factor/src/table.rs @@ -8,15 +8,13 @@ // spell-checker: ignore (ToDO) INVS -use std::num::Wrapping; - use crate::Factors; include!(concat!(env!("OUT_DIR"), "/prime_table.rs")); -pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { +pub fn factor(num: &mut u64, factors: &mut Factors) { for &(prime, inv, ceil) in P_INVS_U64 { - if num == 1 { + if *num == 1 { break; } @@ -27,11 +25,11 @@ pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { // for a nice explanation. let mut k = 0; loop { - let Wrapping(x) = Wrapping(num) * Wrapping(inv); + let x = num.wrapping_mul(inv); // While prime divides num if x <= ceil { - num = x; + *num = x; k += 1; #[cfg(feature = "coz")] coz::progress!("factor found"); @@ -43,6 +41,61 @@ pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { } } } - - (factors, num) +} + +pub const CHUNK_SIZE: usize = 8; +pub fn factor_chunk(n_s: &mut [u64; CHUNK_SIZE], f_s: &mut [Factors; CHUNK_SIZE]) { + for &(prime, inv, ceil) in P_INVS_U64 { + if n_s[0] == 1 && n_s[1] == 1 && n_s[2] == 1 && n_s[3] == 1 { + break; + } + + for (num, factors) in n_s.iter_mut().zip(f_s.iter_mut()) { + if *num == 1 { + continue; + } + let mut k = 0; + loop { + let x = num.wrapping_mul(inv); + + // While prime divides num + if x <= ceil { + *num = x; + k += 1; + } else { + if k > 0 { + factors.add(prime, k); + } + break; + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::Factors; + use quickcheck::quickcheck; + use rand::{rngs::SmallRng, Rng, SeedableRng}; + + quickcheck! { + fn chunk_vs_iter(seed: u64) -> () { + let mut rng = SmallRng::seed_from_u64(seed); + let mut n_c: [u64; CHUNK_SIZE] = rng.gen(); + let mut f_c: [Factors; CHUNK_SIZE] = rng.gen(); + + let mut n_i = n_c.clone(); + let mut f_i = f_c.clone(); + for (n, f) in n_i.iter_mut().zip(f_i.iter_mut()) { + factor(n, f); + } + + factor_chunk(&mut n_c, &mut f_c); + + assert_eq!(n_i, n_c); + assert_eq!(f_i, f_c); + } + } } diff --git a/tests/benches/factor/Cargo.toml b/tests/benches/factor/Cargo.toml new file mode 100644 index 000000000..b3b718477 --- /dev/null +++ b/tests/benches/factor/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "uu_factor_benches" +version = "0.0.0" +authors = ["nicoo "] +license = "MIT" +description = "Benchmarks for the uu_factor integer factorization tool" +homepage = "https://github.com/uutils/coreutils" +edition = "2018" + +[dependencies] +uu_factor = { path = "../../../src/uu/factor" } + +[dev-dependencies] +array-init = "2.0.0" +criterion = "0.3" +rand = "0.7" +rand_chacha = "0.2.2" + + +[[bench]] +name = "gcd" +harness = false + +[[bench]] +name = "table" +harness = false diff --git a/src/uu/factor/benches/gcd.rs b/tests/benches/factor/benches/gcd.rs similarity index 100% rename from src/uu/factor/benches/gcd.rs rename to tests/benches/factor/benches/gcd.rs diff --git a/tests/benches/factor/benches/table.rs b/tests/benches/factor/benches/table.rs new file mode 100644 index 000000000..0b31b2b4c --- /dev/null +++ b/tests/benches/factor/benches/table.rs @@ -0,0 +1,78 @@ +use array_init::array_init; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use std::convert::TryInto; +use uu_factor::{table::*, Factors}; + +fn table(c: &mut Criterion) { + #[cfg(target_os = "linux")] + check_personality(); + + const INPUT_SIZE: usize = 128; + assert!( + INPUT_SIZE % CHUNK_SIZE == 0, + "INPUT_SIZE ({}) is not divisible by CHUNK_SIZE ({})", + INPUT_SIZE, + CHUNK_SIZE + ); + let inputs = { + // Deterministic RNG; use an explicitely-named RNG to guarantee stability + use rand::{RngCore, SeedableRng}; + use rand_chacha::ChaCha8Rng; + const SEED: u64 = 0xdead_bebe_ea75_cafe; + let mut rng = ChaCha8Rng::seed_from_u64(SEED); + + std::iter::repeat_with(move || array_init::<_, _, INPUT_SIZE>(|_| rng.next_u64())) + }; + + let mut group = c.benchmark_group("table"); + group.throughput(Throughput::Elements(INPUT_SIZE as _)); + for a in inputs.take(10) { + let a_str = format!("{:?}", a); + group.bench_with_input(BenchmarkId::new("factor_chunk", &a_str), &a, |b, &a| { + b.iter(|| { + let mut n_s = a.clone(); + let mut f_s: [_; INPUT_SIZE] = array_init(|_| Factors::one()); + for (n_s, f_s) in n_s.chunks_mut(CHUNK_SIZE).zip(f_s.chunks_mut(CHUNK_SIZE)) { + factor_chunk(n_s.try_into().unwrap(), f_s.try_into().unwrap()) + } + }) + }); + group.bench_with_input(BenchmarkId::new("factor", &a_str), &a, |b, &a| { + b.iter(|| { + let mut n_s = a.clone(); + let mut f_s: [_; INPUT_SIZE] = array_init(|_| Factors::one()); + for (n, f) in n_s.iter_mut().zip(f_s.iter_mut()) { + factor(n, f) + } + }) + }); + } + group.finish() +} + +#[cfg(target_os = "linux")] +fn check_personality() { + use std::fs; + const ADDR_NO_RANDOMIZE: u64 = 0x0040000; + const PERSONALITY_PATH: &'static str = "/proc/self/personality"; + + let p_string = fs::read_to_string(PERSONALITY_PATH) + .expect(&format!("Couldn't read '{}'", PERSONALITY_PATH)) + .strip_suffix("\n") + .unwrap() + .to_owned(); + + let personality = u64::from_str_radix(&p_string, 16).expect(&format!( + "Expected a hex value for personality, got '{:?}'", + p_string + )); + if personality & ADDR_NO_RANDOMIZE == 0 { + eprintln!( + "WARNING: Benchmarking with ASLR enabled (personality is {:x}), results might not be reproducible.", + personality + ); + } +} + +criterion_group!(benches, table); +criterion_main!(benches);