diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index afe64ae67..fe13545b5 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -10,6 +10,93 @@ use std::fmt; use crate::display::Quotable; +/// Parser for sizes in SI or IEC units (multiples of 1000 or 1024 bytes). +/// +/// The [`Parser::parse`] function performs the parse. +#[derive(Default)] +pub struct Parser { +} + +impl Parser { + /// Parse a size string into a number of bytes. + /// + /// A size string comprises an integer and an optional unit. The unit + /// may be K, M, G, T, P, E, Z or Y (powers of 1024), or KB, MB, + /// etc. (powers of 1000), or b which is 512. + /// Binary prefixes can be used, too: KiB=K, MiB=M, and so on. + /// + /// # Errors + /// + /// Will return `ParseSizeError` if it's not possible to parse this + /// string into a number, e.g. if the string does not begin with a + /// numeral, or if the unit is not one of the supported units described + /// in the preceding section. + /// + /// # Examples + /// + /// ```rust + /// use uucore::parse_size::parse_size; + /// assert_eq!(Ok(123), parse_size("123")); + /// assert_eq!(Ok(9 * 1000), parse_size("9kB")); // kB is 1000 + /// assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024 + /// ``` + pub fn parse(&self, size: &str) -> Result { + if size.is_empty() { + return Err(ParseSizeError::parse_failure(size)); + } + // Get the numeric part of the size argument. For example, if the + // argument is "123K", then the numeric part is "123". + let numeric_string: String = size.chars().take_while(|c| c.is_ascii_digit()).collect(); + let number: u64 = if !numeric_string.is_empty() { + match numeric_string.parse() { + Ok(n) => n, + Err(_) => return Err(ParseSizeError::parse_failure(size)), + } + } else { + 1 + }; + + // Get the alphabetic units part of the size argument and compute + // the factor it represents. For example, if the argument is "123K", + // then the unit part is "K" and the factor is 1024. This may be the + // empty string, in which case, the factor is 1. + // + // The lowercase "b" (used by `od`, `head`, `tail`, etc.) means + // "block" and the Posix block size is 512. The uppercase "B" + // means "byte". + let unit = &size[numeric_string.len()..]; + let (base, exponent): (u128, u32) = match unit { + "" => (1, 0), + "b" => (512, 1), + "KiB" | "kiB" | "K" | "k" => (1024, 1), + "MiB" | "miB" | "M" | "m" => (1024, 2), + "GiB" | "giB" | "G" | "g" => (1024, 3), + "TiB" | "tiB" | "T" | "t" => (1024, 4), + "PiB" | "piB" | "P" | "p" => (1024, 5), + "EiB" | "eiB" | "E" | "e" => (1024, 6), + "ZiB" | "ziB" | "Z" | "z" => (1024, 7), + "YiB" | "yiB" | "Y" | "y" => (1024, 8), + "KB" | "kB" => (1000, 1), + "MB" | "mB" => (1000, 2), + "GB" | "gB" => (1000, 3), + "TB" | "tB" => (1000, 4), + "PB" | "pB" => (1000, 5), + "EB" | "eB" => (1000, 6), + "ZB" | "zB" => (1000, 7), + "YB" | "yB" => (1000, 8), + _ if numeric_string.is_empty() => return Err(ParseSizeError::parse_failure(size)), + _ => return Err(ParseSizeError::invalid_suffix(size)), + }; + let factor = match u64::try_from(base.pow(exponent)) { + Ok(n) => n, + Err(_) => return Err(ParseSizeError::size_too_big(size)), + }; + number + .checked_mul(factor) + .ok_or_else(|| ParseSizeError::size_too_big(size)) + } +} + /// Parse a size string into a number of bytes. /// /// A size string comprises an integer and an optional unit. The unit @@ -33,55 +120,7 @@ use crate::display::Quotable; /// assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024 /// ``` pub fn parse_size(size: &str) -> Result { - if size.is_empty() { - return Err(ParseSizeError::parse_failure(size)); - } - // Get the numeric part of the size argument. For example, if the - // argument is "123K", then the numeric part is "123". - let numeric_string: String = size.chars().take_while(|c| c.is_ascii_digit()).collect(); - let number: u64 = if !numeric_string.is_empty() { - match numeric_string.parse() { - Ok(n) => n, - Err(_) => return Err(ParseSizeError::parse_failure(size)), - } - } else { - 1 - }; - - // Get the alphabetic units part of the size argument and compute - // the factor it represents. For example, if the argument is "123K", - // then the unit part is "K" and the factor is 1024. This may be the - // empty string, in which case, the factor is 1. - let unit = &size[numeric_string.len()..]; - let (base, exponent): (u128, u32) = match unit { - "" => (1, 0), - "b" => (512, 1), // (`od`, `head` and `tail` use "b") - "KiB" | "kiB" | "K" | "k" => (1024, 1), - "MiB" | "miB" | "M" | "m" => (1024, 2), - "GiB" | "giB" | "G" | "g" => (1024, 3), - "TiB" | "tiB" | "T" | "t" => (1024, 4), - "PiB" | "piB" | "P" | "p" => (1024, 5), - "EiB" | "eiB" | "E" | "e" => (1024, 6), - "ZiB" | "ziB" | "Z" | "z" => (1024, 7), - "YiB" | "yiB" | "Y" | "y" => (1024, 8), - "KB" | "kB" => (1000, 1), - "MB" | "mB" => (1000, 2), - "GB" | "gB" => (1000, 3), - "TB" | "tB" => (1000, 4), - "PB" | "pB" => (1000, 5), - "EB" | "eB" => (1000, 6), - "ZB" | "zB" => (1000, 7), - "YB" | "yB" => (1000, 8), - _ if numeric_string.is_empty() => return Err(ParseSizeError::parse_failure(size)), - _ => return Err(ParseSizeError::invalid_suffix(size)), - }; - let factor = match u64::try_from(base.pow(exponent)) { - Ok(n) => n, - Err(_) => return Err(ParseSizeError::size_too_big(size)), - }; - number - .checked_mul(factor) - .ok_or_else(|| ParseSizeError::size_too_big(size)) + Parser::default().parse(size) } #[derive(Debug, PartialEq, Eq)]