1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 19:47:45 +00:00

uucore: add new module "parse_size"

This adds a function to parse size strings, e.g. "2KiB" or "3MB".
It is based on similar functions used by head/tail/truncate, etc.
This commit is contained in:
Jan Scheer 2021-05-28 17:02:25 +02:00
parent 41539df91d
commit 0c502f587b
3 changed files with 248 additions and 2 deletions

View file

@ -19,10 +19,10 @@ pub extern crate winapi;
//## internal modules
mod macros; // crate macros (macro_rules-type; exported to `crate::...`)
mod features; // feature-gated code modules
mod macros; // crate macros (macro_rules-type; exported to `crate::...`)
mod mods; // core cross-platform modules
mod parser; // string parsing moduls
// * cross-platform modules
pub use crate::mods::backup_control;
@ -31,6 +31,9 @@ pub use crate::mods::os;
pub use crate::mods::panic;
pub use crate::mods::ranges;
// * string parsing modules
pub use crate::parser::parse_size;
// * feature-gated modules
#[cfg(feature = "encoding")]
pub use crate::features::encoding;

View file

@ -0,0 +1 @@
pub mod parse_size;

View file

@ -0,0 +1,242 @@
use std::convert::TryFrom;
use std::error::Error;
use std::fmt;
/// Parse a size string into a number of bytes.
///
/// A size string comprises an integer and an optional unit. The unit
/// may be K, M, G, T, P, E, Z or Y (powers of 1024), or KB, MB,
/// etc. (powers of 1000), or b which is 512.
/// Binary prefixes can be used, too: KiB=K, MiB=M, and so on.
///
/// # Errors
///
/// Will return `ParseSizeError` if its not possible to parse this
/// string into a number, e.g. if the string does not begin with a
/// numeral, or if the unit is not one of the supported units described
/// in the preceding section.
///
/// # Examples
///
/// ```rust
/// use uucore::parse_size::parse_size;
/// assert_eq!(Ok(123), parse_size("123"));
/// assert_eq!(Ok(9 * 1000), parse_size("9kB")); // kB is 1000
/// assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024
/// ```
pub fn parse_size(size: &str) -> Result<usize, ParseSizeError> {
if size.is_empty() {
return Err(ParseSizeError::parse_failure(size));
}
// Get the numeric part of the size argument. For example, if the
// argument is "123K", then the numeric part is "123".
let numeric_string: String = size.chars().take_while(|c| c.is_digit(10)).collect();
let number: usize = if !numeric_string.is_empty() {
match numeric_string.parse() {
Ok(n) => n,
Err(_) => return Err(ParseSizeError::parse_failure(size)),
}
} else {
1
};
// Get the alphabetic units part of the size argument and compute
// the factor it represents. For example, if the argument is "123K",
// then the unit part is "K" and the factor is 1024. This may be the
// empty string, in which case, the factor is 1.
let unit = &size[numeric_string.len()..];
let (base, exponent): (u128, u32) = match unit {
"" => (1, 0),
"b" => (512, 1), // (`head` and `tail` use "b")
"KiB" | "K" | "k" => (1024, 1),
"MiB" | "M" | "m" => (1024, 2),
"GiB" | "G" | "g" => (1024, 3),
"TiB" | "T" | "t" => (1024, 4),
"PiB" | "P" | "p" => (1024, 5),
"EiB" | "E" | "e" => (1024, 6),
"ZiB" | "Z" | "z" => (1024, 7),
"YiB" | "Y" | "y" => (1024, 8),
"KB" | "kB" => (1000, 1),
"MB" | "mB" => (1000, 2),
"GB" | "gB" => (1000, 3),
"TB" | "tB" => (1000, 4),
"PB" | "pB" => (1000, 5),
"EB" | "eB" => (1000, 6),
"ZB" | "zB" => (1000, 7),
"YB" | "yB" => (1000, 8),
_ => return Err(ParseSizeError::parse_failure(size)),
};
let factor = match usize::try_from(base.pow(exponent)) {
Ok(n) => n,
Err(_) => return Err(ParseSizeError::size_too_big(size)),
};
match number.checked_mul(factor) {
Some(n) => Ok(n),
None => Err(ParseSizeError::size_too_big(size)),
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum ParseSizeError {
ParseFailure(String), // Syntax
SizeTooBig(String), // Overflow
}
impl Error for ParseSizeError {
fn description(&self) -> &str {
match *self {
ParseSizeError::ParseFailure(ref s) => &*s,
ParseSizeError::SizeTooBig(ref s) => &*s,
}
}
}
impl fmt::Display for ParseSizeError {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
let s = match self {
ParseSizeError::ParseFailure(s) => s,
ParseSizeError::SizeTooBig(s) => s,
};
write!(f, "{}", s)
}
}
impl ParseSizeError {
fn parse_failure(s: &str) -> ParseSizeError {
// has to be handled in the respective uutils because strings differ, e.g.
// truncate: Invalid number: fb
// tail: invalid number of bytes: fb
ParseSizeError::ParseFailure(format!("{}", s))
}
fn size_too_big(s: &str) -> ParseSizeError {
// has to be handled in the respective uutils because strings differ, e.g.
// truncate: Invalid number: 1Y: Value too large to be stored in data type
// tail: invalid number of bytes: 1Y: Value too large to be stored in data type
ParseSizeError::SizeTooBig(format!(
"{}: Value too large to be stored in data type",
s
))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn variant_eq(a: &ParseSizeError, b: &ParseSizeError) -> bool {
std::mem::discriminant(a) == std::mem::discriminant(b)
}
#[test]
fn all_suffixes() {
// Units are K,M,G,T,P,E,Z,Y (powers of 1024) or KB,MB,... (powers of 1000).
// Binary prefixes can be used, too: KiB=K, MiB=M, and so on.
let suffixes = [
('K', 1u32),
('M', 2u32),
('G', 3u32),
('T', 4u32),
('P', 5u32),
('E', 6u32),
#[cfg(target_pointer_width = "128")]
('Z', 7u32), // ParseSizeError::SizeTooBig on x64
#[cfg(target_pointer_width = "128")]
('Y', 8u32), // ParseSizeError::SizeTooBig on x64
];
for &(c, exp) in &suffixes {
let s = format!("2{}B", c); // KB
assert_eq!(Ok((2 * (1000 as u128).pow(exp)) as usize), parse_size(&s));
let s = format!("2{}", c); // K
assert_eq!(Ok((2 * (1024 as u128).pow(exp)) as usize), parse_size(&s));
let s = format!("2{}iB", c); // KiB
assert_eq!(Ok((2 * (1024 as u128).pow(exp)) as usize), parse_size(&s));
// suffix only
let s = format!("{}B", c); // KB
assert_eq!(Ok(((1000 as u128).pow(exp)) as usize), parse_size(&s));
let s = format!("{}", c); // K
assert_eq!(Ok(((1024 as u128).pow(exp)) as usize), parse_size(&s));
let s = format!("{}iB", c); // KiB
assert_eq!(Ok(((1024 as u128).pow(exp)) as usize), parse_size(&s));
}
}
#[test]
#[cfg(not(target_pointer_width = "128"))]
fn overflow_x64() {
assert!(parse_size("10000000000000000000000").is_err());
assert!(parse_size("1000000000T").is_err());
assert!(parse_size("100000P").is_err());
assert!(parse_size("100E").is_err());
assert!(parse_size("1Z").is_err());
assert!(parse_size("1Y").is_err());
assert!(variant_eq(
&parse_size("1Z").unwrap_err(),
&ParseSizeError::SizeTooBig(String::new())
));
assert_eq!(
ParseSizeError::SizeTooBig(
"1Y: Value too large to be stored in data type".to_string()
),
parse_size("1Y").unwrap_err()
);
}
#[test]
#[cfg(target_pointer_width = "32")]
fn overflow_x32() {
assert!(variant_eq(
&parse_size("1T").unwrap_err(),
&ParseSizeError::SizeTooBig(String::new())
));
assert!(variant_eq(
&parse_size("1000G").unwrap_err(),
&ParseSizeError::SizeTooBig(String::new())
));
}
#[test]
fn invalid_syntax() {
let test_strings = ["328hdsf3290", "5MiB nonsense", "5mib", "biB", "-", ""];
for &test_string in &test_strings {
assert_eq!(
parse_size(test_string).unwrap_err(),
ParseSizeError::ParseFailure(format!("{}", test_string))
);
}
}
#[test]
fn b_suffix() {
assert_eq!(Ok(3 * 512), parse_size("3b")); // b is 512
}
#[test]
fn no_suffix() {
assert_eq!(Ok(1234), parse_size("1234"));
assert_eq!(Ok(0), parse_size("0"));
}
#[test]
fn kilobytes_suffix() {
assert_eq!(Ok(123 * 1000), parse_size("123KB")); // KB is 1000
assert_eq!(Ok(9 * 1000), parse_size("9kB")); // kB is 1000
assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024
assert_eq!(Ok(0), parse_size("0K"));
assert_eq!(Ok(0), parse_size("0KB"));
assert_eq!(Ok(1000), parse_size("KB"));
assert_eq!(Ok(1024), parse_size("K"));
}
#[test]
fn megabytes_suffix() {
assert_eq!(Ok(123 * 1024 * 1024), parse_size("123M"));
assert_eq!(Ok(123 * 1000 * 1000), parse_size("123MB"));
assert_eq!(Ok(1024 * 1024), parse_size("M"));
assert_eq!(Ok(1000 * 1000), parse_size("MB"));
}
}