1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 19:47:45 +00:00

truncate: refactor parse_size() function

Change the interface provided by the `parse_size()` function to reduce
its responsibilities to just a single task: parsing a number of bytes
from a string of the form '123KB', etc. Previously, the function was
also responsible for deciding which mode truncate would operate in.

Furthermore, this commit simplifies the code for parsing the number and
unit to be less verbose and use less mutable state.

Finally, this commit adds some unit tests for the `parse_size()`
function.
This commit is contained in:
Jeffrey Finkelstein 2021-05-19 22:23:28 -04:00
parent 0c6a848314
commit 63b496eaa8
2 changed files with 122 additions and 59 deletions

View file

@ -133,7 +133,35 @@ fn truncate(
filenames: Vec<String>, filenames: Vec<String>,
) { ) {
let (modsize, mode) = match size { let (modsize, mode) = match size {
Some(size_string) => parse_size(&size_string), Some(size_string) => {
// Trim any whitespace.
let size_string = size_string.trim();
// Get the modifier character from the size string, if any. For
// example, if the argument is "+123", then the modifier is '+'.
let c = size_string.chars().next().unwrap();
let mode = match c {
'+' => TruncateMode::Extend,
'-' => TruncateMode::Reduce,
'<' => TruncateMode::AtMost,
'>' => TruncateMode::AtLeast,
'/' => TruncateMode::RoundDown,
'*' => TruncateMode::RoundUp,
_ => TruncateMode::Absolute, /* assume that the size is just a number */
};
// If there was a modifier character, strip it.
let size_string = match mode {
TruncateMode::Absolute => size_string,
_ => &size_string[1..],
};
let num_bytes = match parse_size(size_string) {
Ok(b) => b,
Err(_) => crash!(1, "Invalid number: {}", size_string),
};
(num_bytes, mode)
}
None => (0, TruncateMode::Reference), None => (0, TruncateMode::Reference),
}; };
@ -208,64 +236,89 @@ fn truncate(
} }
} }
fn parse_size(size: &str) -> (u64, TruncateMode) { /// Parse a size string into a number of bytes.
let clean_size = size.replace(" ", ""); ///
let mode = match clean_size.chars().next().unwrap() { /// A size string comprises an integer and an optional unit. The unit
'+' => TruncateMode::Extend, /// may be K, M, G, T, P, E, Z, or Y (powers of 1024) or KB, MB,
'-' => TruncateMode::Reduce, /// etc. (powers of 1000).
'<' => TruncateMode::AtMost, ///
'>' => TruncateMode::AtLeast, /// # Errors
'/' => TruncateMode::RoundDown, ///
'*' => TruncateMode::RoundUp, /// This function returns an error if the string does not begin with a
_ => TruncateMode::Absolute, /* assume that the size is just a number */ /// numeral, or if the unit is not one of the supported units described
/// in the preceding section.
///
/// # Examples
///
/// ```rust,ignore
/// assert_eq!(parse_size("123").unwrap(), 123);
/// assert_eq!(parse_size("123K").unwrap(), 123 * 1024);
/// assert_eq!(parse_size("123KB").unwrap(), 123 * 1000);
/// ```
fn parse_size(size: &str) -> Result<u64, ()> {
// Get the numeric part of the size argument. For example, if the
// argument is "123K", then the numeric part is "123".
let numeric_string: String = size.chars().take_while(|c| c.is_digit(10)).collect();
let number: u64 = match numeric_string.parse() {
Ok(n) => n,
Err(_) => return Err(()),
}; };
let bytes = {
let mut slice = if mode == TruncateMode::Absolute { // Get the alphabetic units part of the size argument and compute
&clean_size // the factor it represents. For example, if the argument is "123K",
} else { // then the unit part is "K" and the factor is 1024. This may be the
&clean_size[1..] // empty string, in which case, the factor is 1.
}; let n = numeric_string.len();
if slice.chars().last().unwrap().is_alphabetic() { let (base, exponent): (u64, u32) = match &size[n..] {
slice = &slice[..slice.len() - 1]; "" => (1, 0),
if !slice.is_empty() && slice.chars().last().unwrap().is_alphabetic() { "K" | "k" => (1024, 1),
slice = &slice[..slice.len() - 1]; "M" | "m" => (1024, 2),
} "G" | "g" => (1024, 3),
} "T" | "t" => (1024, 4),
slice "P" | "p" => (1024, 5),
} "E" | "e" => (1024, 6),
.to_owned(); "Z" | "z" => (1024, 7),
let mut number: u64 = match bytes.parse() { "Y" | "y" => (1024, 8),
Ok(num) => num, "KB" | "kB" => (1000, 1),
Err(e) => crash!(1, "'{}' is not a valid number: {}", size, e), "MB" | "mB" => (1000, 2),
"GB" | "gB" => (1000, 3),
"TB" | "tB" => (1000, 4),
"PB" | "pB" => (1000, 5),
"EB" | "eB" => (1000, 6),
"ZB" | "zB" => (1000, 7),
"YB" | "yB" => (1000, 8),
_ => return Err(()),
}; };
if clean_size.chars().last().unwrap().is_alphabetic() { let factor = base.pow(exponent);
number *= match clean_size.chars().last().unwrap().to_ascii_uppercase() { Ok(number * factor)
'B' => match clean_size }
.chars()
.nth(clean_size.len() - 2) #[cfg(test)]
.unwrap() mod tests {
.to_ascii_uppercase() use crate::parse_size;
{
'K' => 1000u64, #[test]
'M' => 1000u64.pow(2), fn test_parse_size_zero() {
'G' => 1000u64.pow(3), assert_eq!(parse_size("0").unwrap(), 0);
'T' => 1000u64.pow(4), assert_eq!(parse_size("0K").unwrap(), 0);
'P' => 1000u64.pow(5), assert_eq!(parse_size("0KB").unwrap(), 0);
'E' => 1000u64.pow(6), }
'Z' => 1000u64.pow(7),
'Y' => 1000u64.pow(8), #[test]
letter => crash!(1, "'{}B' is not a valid suffix.", letter), fn test_parse_size_without_factor() {
}, assert_eq!(parse_size("123").unwrap(), 123);
'K' => 1024u64, }
'M' => 1024u64.pow(2),
'G' => 1024u64.pow(3), #[test]
'T' => 1024u64.pow(4), fn test_parse_size_kilobytes() {
'P' => 1024u64.pow(5), assert_eq!(parse_size("123K").unwrap(), 123 * 1024);
'E' => 1024u64.pow(6), assert_eq!(parse_size("123KB").unwrap(), 123 * 1000);
'Z' => 1024u64.pow(7), }
'Y' => 1024u64.pow(8),
letter => crash!(1, "'{}' is not a valid suffix.", letter), #[test]
}; fn test_parse_size_megabytes() {
} assert_eq!(parse_size("123").unwrap(), 123);
(number, mode) assert_eq!(parse_size("123M").unwrap(), 123 * 1024 * 1024);
assert_eq!(parse_size("123MB").unwrap(), 123 * 1000 * 1000);
}
} }

View file

@ -235,3 +235,13 @@ fn test_size_and_reference() {
actual actual
); );
} }
#[test]
fn test_invalid_numbers() {
// TODO For compatibility with GNU, `truncate -s 0X` should cause
// the same error as `truncate -s 0X file`, but currently it returns
// a different error.
new_ucmd!().args(&["-s", "0X", "file"]).fails().stderr_contains("Invalid number: 0X");
new_ucmd!().args(&["-s", "0XB", "file"]).fails().stderr_contains("Invalid number: 0XB");
new_ucmd!().args(&["-s", "0B", "file"]).fails().stderr_contains("Invalid number: 0B");
}