1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 19:47:45 +00:00

Merge pull request #3653 from jfinkels/dd-concise-byte-counts

dd: correct rendering of SI and IEC byte counts
This commit is contained in:
jfinkels 2022-12-03 11:48:19 -05:00 committed by GitHub
commit 4a4bb550e1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 207 additions and 50 deletions

17
Cargo.lock generated
View file

@ -178,16 +178,6 @@ version = "3.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"
[[package]]
name = "byte-unit"
version = "4.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "581ad4b3d627b0c09a0ccb2912148f839acaca0b93cf54cbe42b6c674e86079c"
dependencies = [
"serde",
"utf8-width",
]
[[package]]
name = "bytecount"
version = "0.6.3"
@ -2184,12 +2174,6 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8-width"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1"
[[package]]
name = "uu_arch"
version = "0.0.16"
@ -2356,7 +2340,6 @@ dependencies = [
name = "uu_dd"
version = "0.0.16"
dependencies = [
"byte-unit",
"clap",
"gcd",
"libc",

View file

@ -15,7 +15,6 @@ edition = "2021"
path = "src/dd.rs"
[dependencies]
byte-unit = "4.0"
clap = { version = "4.0", features = ["wrap_help", "cargo"] }
gcd = "2.0"
libc = "0.2"

View file

@ -21,6 +21,8 @@ use progress::{gen_prog_updater, ProgUpdate, ReadStat, StatusLevel, WriteStat};
mod blocks;
use blocks::conv_block_unblock_helper;
mod numbers;
use std::cmp;
use std::env;
use std::ffi::OsString;

157
src/uu/dd/src/numbers.rs Normal file
View file

@ -0,0 +1,157 @@
// * This file is part of the uutils coreutils package.
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
/// Functions for formatting a number as a magnitude and a unit suffix.
/// The first ten powers of 1024.
const IEC_BASES: [u128; 10] = [
1,
1_024,
1_048_576,
1_073_741_824,
1_099_511_627_776,
1_125_899_906_842_624,
1_152_921_504_606_846_976,
1_180_591_620_717_411_303_424,
1_208_925_819_614_629_174_706_176,
1_237_940_039_285_380_274_899_124_224,
];
const IEC_SUFFIXES: [&str; 9] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
/// The first ten powers of 1000.
const SI_BASES: [u128; 10] = [
1,
1_000,
1_000_000,
1_000_000_000,
1_000_000_000_000,
1_000_000_000_000_000,
1_000_000_000_000_000_000,
1_000_000_000_000_000_000_000,
1_000_000_000_000_000_000_000_000,
1_000_000_000_000_000_000_000_000_000,
];
const SI_SUFFIXES: [&str; 9] = ["B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"];
/// A SuffixType determines whether the suffixes are 1000 or 1024 based.
#[derive(Clone, Copy)]
pub(crate) enum SuffixType {
Iec,
Si,
}
impl SuffixType {
fn base_and_suffix(&self, n: u128) -> (u128, &'static str) {
let (bases, suffixes) = match self {
Self::Iec => (IEC_BASES, IEC_SUFFIXES),
Self::Si => (SI_BASES, SI_SUFFIXES),
};
let mut i = 0;
while bases[i + 1] - bases[i] < n && i < suffixes.len() {
i += 1;
}
(bases[i], suffixes[i])
}
}
/// Convert a number into a magnitude and a multi-byte unit suffix.
///
/// The returned string has a maximum length of 5 chars, for example: "1.1kB", "999kB", "1MB".
pub(crate) fn to_magnitude_and_suffix(n: u128, suffix_type: SuffixType) -> String {
let (base, suffix) = suffix_type.base_and_suffix(n);
// TODO To match dd on my machine, we would need to round like
// this:
//
// 1049 => 1.0 kB
// 1050 => 1.0 kB # why is this different?
// 1051 => 1.1 kB
// ...
// 1149 => 1.1 kB
// 1150 => 1.2 kB
// ...
// 1250 => 1.2 kB
// 1251 => 1.3 kB
// ..
// 10500 => 10 kB
// 10501 => 11 kB
//
let quotient = (n as f64) / (base as f64);
if quotient < 10.0 {
format!("{:.1} {}", quotient, suffix)
} else {
format!("{} {}", quotient.round(), suffix)
}
}
#[cfg(test)]
mod tests {
use crate::numbers::{to_magnitude_and_suffix, SuffixType};
#[test]
fn test_to_magnitude_and_suffix_powers_of_1024() {
assert_eq!(to_magnitude_and_suffix(1024, SuffixType::Iec), "1.0 KiB");
assert_eq!(to_magnitude_and_suffix(2048, SuffixType::Iec), "2.0 KiB");
assert_eq!(to_magnitude_and_suffix(4096, SuffixType::Iec), "4.0 KiB");
assert_eq!(
to_magnitude_and_suffix(1024 * 1024, SuffixType::Iec),
"1.0 MiB"
);
assert_eq!(
to_magnitude_and_suffix(2 * 1024 * 1024, SuffixType::Iec),
"2.0 MiB"
);
assert_eq!(
to_magnitude_and_suffix(1024 * 1024 * 1024, SuffixType::Iec),
"1.0 GiB"
);
assert_eq!(
to_magnitude_and_suffix(34 * 1024 * 1024 * 1024, SuffixType::Iec),
"34 GiB"
);
}
#[test]
fn test_to_magnitude_and_suffix_not_powers_of_1024() {
assert_eq!(to_magnitude_and_suffix(1, SuffixType::Si), "1.0 B");
assert_eq!(to_magnitude_and_suffix(999, SuffixType::Si), "999 B");
assert_eq!(to_magnitude_and_suffix(1000, SuffixType::Si), "1.0 kB");
assert_eq!(to_magnitude_and_suffix(1001, SuffixType::Si), "1.0 kB");
assert_eq!(to_magnitude_and_suffix(1023, SuffixType::Si), "1.0 kB");
assert_eq!(to_magnitude_and_suffix(1025, SuffixType::Si), "1.0 kB");
assert_eq!(to_magnitude_and_suffix(10_001, SuffixType::Si), "10 kB");
assert_eq!(to_magnitude_and_suffix(999_000, SuffixType::Si), "999 kB");
assert_eq!(to_magnitude_and_suffix(999_001, SuffixType::Si), "1.0 MB");
assert_eq!(to_magnitude_and_suffix(999_999, SuffixType::Si), "1.0 MB");
assert_eq!(to_magnitude_and_suffix(1_000_000, SuffixType::Si), "1.0 MB");
assert_eq!(to_magnitude_and_suffix(1_000_001, SuffixType::Si), "1.0 MB");
assert_eq!(to_magnitude_and_suffix(1_100_000, SuffixType::Si), "1.1 MB");
assert_eq!(to_magnitude_and_suffix(1_100_001, SuffixType::Si), "1.1 MB");
assert_eq!(to_magnitude_and_suffix(1_900_000, SuffixType::Si), "1.9 MB");
assert_eq!(to_magnitude_and_suffix(1_900_001, SuffixType::Si), "1.9 MB");
assert_eq!(to_magnitude_and_suffix(9_900_000, SuffixType::Si), "9.9 MB");
assert_eq!(to_magnitude_and_suffix(9_900_001, SuffixType::Si), "9.9 MB");
assert_eq!(
to_magnitude_and_suffix(999_000_000, SuffixType::Si),
"999 MB"
);
assert_eq!(
to_magnitude_and_suffix(999_000_001, SuffixType::Si),
"1.0 GB"
);
assert_eq!(
to_magnitude_and_suffix(1_000_000_000, SuffixType::Si),
"1.0 GB"
);
assert_eq!(
to_magnitude_and_suffix(1_000_000_001, SuffixType::Si),
"1.0 GB"
);
}
}

View file

@ -13,7 +13,7 @@ use std::io::Write;
use std::sync::mpsc;
use std::time::Duration;
use byte_unit::Byte;
use crate::numbers::{to_magnitude_and_suffix, SuffixType};
// On Linux, we register a signal handler that prints progress updates.
#[cfg(target_os = "linux")]
@ -129,22 +129,19 @@ impl ProgUpdate {
/// let mut cursor = Cursor::new(vec![]);
/// let rewrite = false;
/// prog_update.write_prog_line(&mut cursor, rewrite).unwrap();
/// assert_eq!(cursor.get_ref(), b"0 bytes copied, 1.0 s, 0 B/s\n");
/// assert_eq!(cursor.get_ref(), b"0 bytes copied, 1.0 s, 0.0 B/s\n");
/// ```
fn write_prog_line(&self, w: &mut impl Write, rewrite: bool) -> std::io::Result<()> {
let btotal_metric = Byte::from_bytes(self.write_stat.bytes_total)
.get_appropriate_unit(false)
.format(0);
let btotal_bin = Byte::from_bytes(self.write_stat.bytes_total)
.get_appropriate_unit(true)
.format(0);
let safe_millis = std::cmp::max(1, self.duration.as_millis());
let transfer_rate = Byte::from_bytes(1000 * (self.write_stat.bytes_total / safe_millis))
.get_appropriate_unit(false)
.format(1);
// The total number of bytes written as a string, in SI and IEC format.
let btotal = self.write_stat.bytes_total;
let btotal_metric = to_magnitude_and_suffix(btotal, SuffixType::Si);
let btotal_bin = to_magnitude_and_suffix(btotal, SuffixType::Iec);
// Compute the throughput (bytes per second) as a string.
let duration = self.duration.as_secs_f64();
let safe_millis = std::cmp::max(1, self.duration.as_millis());
let rate = 1000 * (btotal / safe_millis);
let transfer_rate = to_magnitude_and_suffix(rate, SuffixType::Si);
// If we are rewriting the progress line, do write a carriage
// return (`\r`) at the beginning and don't write a newline
@ -206,7 +203,7 @@ impl ProgUpdate {
/// let mut iter = cursor.get_ref().split(|v| *v == b'\n');
/// assert_eq!(iter.next().unwrap(), b"0+0 records in");
/// assert_eq!(iter.next().unwrap(), b"0+0 records out");
/// assert_eq!(iter.next().unwrap(), b"0 bytes copied, 1.0 s, 0 B/s");
/// assert_eq!(iter.next().unwrap(), b"0 bytes copied, 1.0 s, 0.0 B/s");
/// assert_eq!(iter.next().unwrap(), b"");
/// assert!(iter.next().is_none());
/// ```
@ -564,22 +561,20 @@ mod tests {
// $ : | dd
// 0 bytes copied, 7.9151e-05 s, 0.0 kB/s
//
// The throughput still does not match GNU dd. For the ones
// that include the concise byte counts, the format is also
// not right.
assert_eq!(cursor.get_ref(), b"0 bytes copied, 1.0 s, 0 B/s\n");
// The throughput still does not match GNU dd.
assert_eq!(cursor.get_ref(), b"0 bytes copied, 1.0 s, 0.0 B/s\n");
let prog_update = prog_update_write(999);
let mut cursor = Cursor::new(vec![]);
prog_update.write_prog_line(&mut cursor, rewrite).unwrap();
assert_eq!(cursor.get_ref(), b"999 bytes copied, 1.0 s, 0 B/s\n");
assert_eq!(cursor.get_ref(), b"999 bytes copied, 1.0 s, 0.0 B/s\n");
let prog_update = prog_update_write(1000);
let mut cursor = Cursor::new(vec![]);
prog_update.write_prog_line(&mut cursor, rewrite).unwrap();
assert_eq!(
cursor.get_ref(),
b"1000 bytes (1000 B) copied, 1.0 s, 1000 B/s\n"
b"1000 bytes (1.0 kB) copied, 1.0 s, 1.0 kB/s\n"
);
let prog_update = prog_update_write(1023);
@ -587,15 +582,15 @@ mod tests {
prog_update.write_prog_line(&mut cursor, rewrite).unwrap();
assert_eq!(
cursor.get_ref(),
b"1023 bytes (1 KB) copied, 1.0 s, 1000 B/s\n"
b"1023 bytes (1.0 kB) copied, 1.0 s, 1.0 kB/s\n"
);
let prog_update = prog_update_write(1024);
let mut cursor = Cursor::new(vec![]);
prog_update.write_prog_line(&mut cursor, rewrite).unwrap();
assert_eq!(
std::str::from_utf8(cursor.get_ref()).unwrap(),
"1024 bytes (1 KB, 1024 B) copied, 1.0 s, 1000 B/s\n"
cursor.get_ref(),
b"1024 bytes (1.0 kB, 1.0 KiB) copied, 1.0 s, 1.0 kB/s\n"
);
}
@ -614,7 +609,7 @@ mod tests {
let mut iter = cursor.get_ref().split(|v| *v == b'\n');
assert_eq!(iter.next().unwrap(), b"0+0 records in");
assert_eq!(iter.next().unwrap(), b"0+0 records out");
assert_eq!(iter.next().unwrap(), b"0 bytes copied, 1.0 s, 0 B/s");
assert_eq!(iter.next().unwrap(), b"0 bytes copied, 1.0 s, 0.0 B/s");
assert_eq!(iter.next().unwrap(), b"");
assert!(iter.next().is_none());
}
@ -633,10 +628,10 @@ mod tests {
prog_update.write_prog_line(&mut cursor, rewrite).unwrap();
prog_update.write_transfer_stats(&mut cursor, true).unwrap();
let mut iter = cursor.get_ref().split(|v| *v == b'\n');
assert_eq!(iter.next().unwrap(), b"\r0 bytes copied, 1.0 s, 0 B/s");
assert_eq!(iter.next().unwrap(), b"\r0 bytes copied, 1.0 s, 0.0 B/s");
assert_eq!(iter.next().unwrap(), b"0+0 records in");
assert_eq!(iter.next().unwrap(), b"0+0 records out");
assert_eq!(iter.next().unwrap(), b"0 bytes copied, 1.0 s, 0 B/s");
assert_eq!(iter.next().unwrap(), b"0 bytes copied, 1.0 s, 0.0 B/s");
assert_eq!(iter.next().unwrap(), b"");
assert!(iter.next().is_none());
}

View file

@ -259,7 +259,7 @@ fn test_final_stats_noxfer() {
fn test_final_stats_unspec() {
new_ucmd!()
.run()
.stderr_only("0+0 records in\n0+0 records out\n0 bytes copied, 0.0 s, 0 B/s\n")
.stderr_only("0+0 records in\n0+0 records out\n0 bytes copied, 0.0 s, 0.0 B/s\n")
.success();
}
@ -375,7 +375,7 @@ fn test_null_stats() {
new_ucmd!()
.args(&["if=null.txt"])
.run()
.stderr_only("0+0 records in\n0+0 records out\n0 bytes copied, 0.0 s, 0 B/s\n")
.stderr_only("0+0 records in\n0+0 records out\n0 bytes copied, 0.0 s, 0.0 B/s\n")
.success();
}
@ -1239,22 +1239,43 @@ fn test_bytes_oseek_seek_not_additive() {
}
#[test]
fn test_final_stats_si_iec() {
fn test_final_stats_less_than_one_kb_si() {
let result = new_ucmd!().pipe_in("0".repeat(999)).succeeds();
let s = result.stderr_str();
assert!(s.starts_with("1+1 records in\n1+1 records out\n999 bytes copied,"));
}
#[test]
fn test_final_stats_less_than_one_kb_iec() {
let result = new_ucmd!().pipe_in("0".repeat(1000)).succeeds();
let s = result.stderr_str();
assert!(s.starts_with("1+1 records in\n1+1 records out\n1000 bytes (1000 B) copied,"));
assert!(s.starts_with("1+1 records in\n1+1 records out\n1000 bytes (1.0 kB) copied,"));
let result = new_ucmd!().pipe_in("0".repeat(1023)).succeeds();
let s = result.stderr_str();
assert!(s.starts_with("1+1 records in\n1+1 records out\n1023 bytes (1 KB) copied,"));
assert!(s.starts_with("1+1 records in\n1+1 records out\n1023 bytes (1.0 kB) copied,"));
}
#[test]
fn test_final_stats_more_than_one_kb() {
let result = new_ucmd!().pipe_in("0".repeat(1024)).succeeds();
let s = result.stderr_str();
assert!(s.starts_with("2+0 records in\n2+0 records out\n1024 bytes (1 KB, 1024 B) copied,"));
assert!(s.starts_with("2+0 records in\n2+0 records out\n1024 bytes (1.0 kB, 1.0 KiB) copied,"));
}
#[test]
fn test_final_stats_three_char_limit() {
let result = new_ucmd!().pipe_in("0".repeat(10_000)).succeeds();
let s = result.stderr_str();
assert!(
s.starts_with("19+1 records in\n19+1 records out\n10000 bytes (10 kB, 9.8 KiB) copied,")
);
let result = new_ucmd!().pipe_in("0".repeat(100_000)).succeeds();
let s = result.stderr_str();
assert!(
s.starts_with("195+1 records in\n195+1 records out\n100000 bytes (100 kB, 98 KiB) copied,")
);
}
#[test]