From 847378f8d28cb6e96eea4010946bfdf3f8e27569 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Fri, 18 Nov 2022 20:20:30 -0500 Subject: [PATCH 1/2] uucore: create parse_size::Parser struct Add a `uucore::parse_size::Parser` struct which will allow future commits to add fields that change the behavior of `parse_size()`. --- src/uucore/src/lib/parser/parse_size.rs | 137 +++++++++++++++--------- 1 file changed, 88 insertions(+), 49 deletions(-) diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index afe64ae67..fe13545b5 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -10,6 +10,93 @@ use std::fmt; use crate::display::Quotable; +/// Parser for sizes in SI or IEC units (multiples of 1000 or 1024 bytes). +/// +/// The [`Parser::parse`] function performs the parse. +#[derive(Default)] +pub struct Parser { +} + +impl Parser { + /// Parse a size string into a number of bytes. + /// + /// A size string comprises an integer and an optional unit. The unit + /// may be K, M, G, T, P, E, Z or Y (powers of 1024), or KB, MB, + /// etc. (powers of 1000), or b which is 512. + /// Binary prefixes can be used, too: KiB=K, MiB=M, and so on. + /// + /// # Errors + /// + /// Will return `ParseSizeError` if it's not possible to parse this + /// string into a number, e.g. if the string does not begin with a + /// numeral, or if the unit is not one of the supported units described + /// in the preceding section. + /// + /// # Examples + /// + /// ```rust + /// use uucore::parse_size::parse_size; + /// assert_eq!(Ok(123), parse_size("123")); + /// assert_eq!(Ok(9 * 1000), parse_size("9kB")); // kB is 1000 + /// assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024 + /// ``` + pub fn parse(&self, size: &str) -> Result { + if size.is_empty() { + return Err(ParseSizeError::parse_failure(size)); + } + // Get the numeric part of the size argument. For example, if the + // argument is "123K", then the numeric part is "123". + let numeric_string: String = size.chars().take_while(|c| c.is_ascii_digit()).collect(); + let number: u64 = if !numeric_string.is_empty() { + match numeric_string.parse() { + Ok(n) => n, + Err(_) => return Err(ParseSizeError::parse_failure(size)), + } + } else { + 1 + }; + + // Get the alphabetic units part of the size argument and compute + // the factor it represents. For example, if the argument is "123K", + // then the unit part is "K" and the factor is 1024. This may be the + // empty string, in which case, the factor is 1. + // + // The lowercase "b" (used by `od`, `head`, `tail`, etc.) means + // "block" and the Posix block size is 512. The uppercase "B" + // means "byte". + let unit = &size[numeric_string.len()..]; + let (base, exponent): (u128, u32) = match unit { + "" => (1, 0), + "b" => (512, 1), + "KiB" | "kiB" | "K" | "k" => (1024, 1), + "MiB" | "miB" | "M" | "m" => (1024, 2), + "GiB" | "giB" | "G" | "g" => (1024, 3), + "TiB" | "tiB" | "T" | "t" => (1024, 4), + "PiB" | "piB" | "P" | "p" => (1024, 5), + "EiB" | "eiB" | "E" | "e" => (1024, 6), + "ZiB" | "ziB" | "Z" | "z" => (1024, 7), + "YiB" | "yiB" | "Y" | "y" => (1024, 8), + "KB" | "kB" => (1000, 1), + "MB" | "mB" => (1000, 2), + "GB" | "gB" => (1000, 3), + "TB" | "tB" => (1000, 4), + "PB" | "pB" => (1000, 5), + "EB" | "eB" => (1000, 6), + "ZB" | "zB" => (1000, 7), + "YB" | "yB" => (1000, 8), + _ if numeric_string.is_empty() => return Err(ParseSizeError::parse_failure(size)), + _ => return Err(ParseSizeError::invalid_suffix(size)), + }; + let factor = match u64::try_from(base.pow(exponent)) { + Ok(n) => n, + Err(_) => return Err(ParseSizeError::size_too_big(size)), + }; + number + .checked_mul(factor) + .ok_or_else(|| ParseSizeError::size_too_big(size)) + } +} + /// Parse a size string into a number of bytes. /// /// A size string comprises an integer and an optional unit. The unit @@ -33,55 +120,7 @@ use crate::display::Quotable; /// assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024 /// ``` pub fn parse_size(size: &str) -> Result { - if size.is_empty() { - return Err(ParseSizeError::parse_failure(size)); - } - // Get the numeric part of the size argument. For example, if the - // argument is "123K", then the numeric part is "123". - let numeric_string: String = size.chars().take_while(|c| c.is_ascii_digit()).collect(); - let number: u64 = if !numeric_string.is_empty() { - match numeric_string.parse() { - Ok(n) => n, - Err(_) => return Err(ParseSizeError::parse_failure(size)), - } - } else { - 1 - }; - - // Get the alphabetic units part of the size argument and compute - // the factor it represents. For example, if the argument is "123K", - // then the unit part is "K" and the factor is 1024. This may be the - // empty string, in which case, the factor is 1. - let unit = &size[numeric_string.len()..]; - let (base, exponent): (u128, u32) = match unit { - "" => (1, 0), - "b" => (512, 1), // (`od`, `head` and `tail` use "b") - "KiB" | "kiB" | "K" | "k" => (1024, 1), - "MiB" | "miB" | "M" | "m" => (1024, 2), - "GiB" | "giB" | "G" | "g" => (1024, 3), - "TiB" | "tiB" | "T" | "t" => (1024, 4), - "PiB" | "piB" | "P" | "p" => (1024, 5), - "EiB" | "eiB" | "E" | "e" => (1024, 6), - "ZiB" | "ziB" | "Z" | "z" => (1024, 7), - "YiB" | "yiB" | "Y" | "y" => (1024, 8), - "KB" | "kB" => (1000, 1), - "MB" | "mB" => (1000, 2), - "GB" | "gB" => (1000, 3), - "TB" | "tB" => (1000, 4), - "PB" | "pB" => (1000, 5), - "EB" | "eB" => (1000, 6), - "ZB" | "zB" => (1000, 7), - "YB" | "yB" => (1000, 8), - _ if numeric_string.is_empty() => return Err(ParseSizeError::parse_failure(size)), - _ => return Err(ParseSizeError::invalid_suffix(size)), - }; - let factor = match u64::try_from(base.pow(exponent)) { - Ok(n) => n, - Err(_) => return Err(ParseSizeError::size_too_big(size)), - }; - number - .checked_mul(factor) - .ok_or_else(|| ParseSizeError::size_too_big(size)) + Parser::default().parse(size) } #[derive(Debug, PartialEq, Eq)] From 701550d76b4700123f52f193ebbf55d95f898081 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Fri, 18 Nov 2022 20:27:42 -0500 Subject: [PATCH 2/2] dd: allow B as a suffix for count, seek, and skip Allow uppercase "B" on its own as a unit specifier for the `count`, `seek`, and `skip` arguments to `dd`. For example, $ printf "abcdef" | dd count=3B status=none abc --- src/uu/dd/src/parseargs.rs | 7 ++++-- src/uucore/src/lib/parser/parse_size.rs | 3 +++ tests/by-util/test_dd.rs | 30 +++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/uu/dd/src/parseargs.rs b/src/uu/dd/src/parseargs.rs index ecca62182..4ae6cf922 100644 --- a/src/uu/dd/src/parseargs.rs +++ b/src/uu/dd/src/parseargs.rs @@ -14,7 +14,7 @@ use crate::conversion_tables::ConversionTable; use std::error::Error; use uucore::display::Quotable; use uucore::error::UError; -use uucore::parse_size::ParseSizeError; +use uucore::parse_size::{ParseSizeError, Parser as SizeParser}; use uucore::show_warning; /// Parser Errors describe errors with parser input @@ -499,8 +499,11 @@ fn parse_bytes_only(s: &str) -> Result { /// assert_eq!(parse_bytes_no_x("2k", "2k").unwrap(), 2 * 1024); /// ``` fn parse_bytes_no_x(full: &str, s: &str) -> Result { + let parser = SizeParser { + capital_b_bytes: true, + }; let (num, multiplier) = match (s.find('c'), s.rfind('w'), s.rfind('b')) { - (None, None, None) => match uucore::parse_size::parse_size(s) { + (None, None, None) => match parser.parse(s) { Ok(n) => (n, 1), Err(ParseSizeError::InvalidSuffix(_)) | Err(ParseSizeError::ParseFailure(_)) => { return Err(ParseError::InvalidNumber(full.to_string())) diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index fe13545b5..4ec8268de 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -15,6 +15,8 @@ use crate::display::Quotable; /// The [`Parser::parse`] function performs the parse. #[derive(Default)] pub struct Parser { + /// Whether to treat the suffix "B" as meaning "bytes". + pub capital_b_bytes: bool, } impl Parser { @@ -67,6 +69,7 @@ impl Parser { let unit = &size[numeric_string.len()..]; let (base, exponent): (u128, u32) = match unit { "" => (1, 0), + "B" if self.capital_b_bytes => (1, 0), "b" => (512, 1), "KiB" | "kiB" | "K" | "k" => (1024, 1), "MiB" | "miB" | "M" | "m" => (1024, 2), diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs index 69a941443..1bf64a0b3 100644 --- a/tests/by-util/test_dd.rs +++ b/tests/by-util/test_dd.rs @@ -1295,3 +1295,33 @@ fn test_big_multiplication() { .fails() .stderr_contains("invalid number"); } + +/// Test for count, seek, and skip given in units of bytes. +#[test] +fn test_bytes_suffix() { + new_ucmd!() + .args(&["count=3B", "status=none"]) + .pipe_in("abcdef") + .succeeds() + .stdout_only("abc"); + new_ucmd!() + .args(&["skip=3B", "status=none"]) + .pipe_in("abcdef") + .succeeds() + .stdout_only("def"); + new_ucmd!() + .args(&["iseek=3B", "status=none"]) + .pipe_in("abcdef") + .succeeds() + .stdout_only("def"); + new_ucmd!() + .args(&["seek=3B", "status=none"]) + .pipe_in("abcdef") + .succeeds() + .stdout_only("\0\0\0abcdef"); + new_ucmd!() + .args(&["oseek=3B", "status=none"]) + .pipe_in("abcdef") + .succeeds() + .stdout_only("\0\0\0abcdef"); +}