1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-30 04:27:45 +00:00

Speed up sum by using reasonable read buffer sizes. (#3741)

* Speed up sum by using reasonable read buffer sizes.

Use a 4K read buffer for each of the checksum functions, which seems
reasonable. This improves the performance of BSD checksums on
odyssey1024.txt from 399ms to 325ms on my laptop, and of SysV
checksums from 242ms to 67ms.

* Add BENCHMARKING.md for `sum`.

* Add comment regarding block sizes.

* Improve portability of BENCHMARKING.md

* Make `div_ceil` const and enhance comment.
This commit is contained in:
Owen Anderson 2022-07-28 05:38:09 -07:00 committed by GitHub
parent 4e72e284b5
commit 8bdee49cdd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 6 deletions

View file

@ -0,0 +1,23 @@
## Benchmarking `sum`
<!-- spell-checker:ignore wikidatawiki -->
Large sample files can for example be found in the [Wikipedia database dumps](https://dumps.wikimedia.org/wikidatawiki/latest/), usually sized at multiple gigabytes and comprising more than 100M lines.
After you have obtained and uncompressed such a file, you need to build `sum` in release mode
```shell
$ cargo build --release --package uu_sum
```
and then you can time how it long it takes to checksum the file by running
```shell
$ time ./target/release/sum wikidatawiki-20211001-pages-logging.xml
```
For more systematic measurements that include warm-ups, repetitions and comparisons, [Hyperfine](https://github.com/sharkdp/hyperfine) can be helpful. For example, to compare this implementation to the one provided by your distribution run
```shell
$ hyperfine "./target/release/sum wikidatawiki-20211001-pages-logging.xml" "sum wikidatawiki-20211001-pages-logging.xml"
```

View file

@ -23,14 +23,21 @@ static USAGE: &str = "{} [OPTION]... [FILE]...";
static SUMMARY: &str = "Checksum and count the blocks in a file.\n\
With no FILE, or when FILE is -, read standard input.";
// This can be replaced with usize::div_ceil once it is stabilized.
// This implementation approach is optimized for when `b` is a constant,
// particularly a power of two.
const fn div_ceil(a: usize, b: usize) -> usize {
(a + b - 1) / b
}
fn bsd_sum(mut reader: Box<dyn Read>) -> (usize, u16) {
let mut buf = [0; 1024];
let mut blocks_read = 0;
let mut buf = [0; 4096];
let mut bytes_read = 0;
let mut checksum: u16 = 0;
loop {
match reader.read(&mut buf) {
Ok(n) if n != 0 => {
blocks_read += 1;
bytes_read += n;
for &byte in buf[..n].iter() {
checksum = (checksum >> 1) + ((checksum & 1) << 15);
checksum = checksum.wrapping_add(u16::from(byte));
@ -40,18 +47,20 @@ fn bsd_sum(mut reader: Box<dyn Read>) -> (usize, u16) {
}
}
// Report blocks read in terms of 1024-byte blocks.
let blocks_read = div_ceil(bytes_read, 1024);
(blocks_read, checksum)
}
fn sysv_sum(mut reader: Box<dyn Read>) -> (usize, u16) {
let mut buf = [0; 512];
let mut blocks_read = 0;
let mut buf = [0; 4096];
let mut bytes_read = 0;
let mut ret = 0u32;
loop {
match reader.read(&mut buf) {
Ok(n) if n != 0 => {
blocks_read += 1;
bytes_read += n;
for &byte in buf[..n].iter() {
ret = ret.wrapping_add(u32::from(byte));
}
@ -63,6 +72,8 @@ fn sysv_sum(mut reader: Box<dyn Read>) -> (usize, u16) {
ret = (ret & 0xffff) + (ret >> 16);
ret = (ret & 0xffff) + (ret >> 16);
// Report blocks read in terms of 512-byte blocks.
let blocks_read = div_ceil(bytes_read, 512);
(blocks_read, ret as u16)
}