From 0c502f587bf8db66b07fe614dbaca1bd3bf71a97 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Fri, 28 May 2021 17:02:25 +0200 Subject: [PATCH 01/20] uucore: add new module "parse_size" This adds a function to parse size strings, e.g. "2KiB" or "3MB". It is based on similar functions used by head/tail/truncate, etc. --- src/uucore/src/lib/lib.rs | 7 +- src/uucore/src/lib/parser.rs | 1 + src/uucore/src/lib/parser/parse_size.rs | 242 ++++++++++++++++++++++++ 3 files changed, 248 insertions(+), 2 deletions(-) create mode 100644 src/uucore/src/lib/parser.rs create mode 100644 src/uucore/src/lib/parser/parse_size.rs diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index c17f14516..a60af57fa 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -19,10 +19,10 @@ pub extern crate winapi; //## internal modules -mod macros; // crate macros (macro_rules-type; exported to `crate::...`) - mod features; // feature-gated code modules +mod macros; // crate macros (macro_rules-type; exported to `crate::...`) mod mods; // core cross-platform modules +mod parser; // string parsing moduls // * cross-platform modules pub use crate::mods::backup_control; @@ -31,6 +31,9 @@ pub use crate::mods::os; pub use crate::mods::panic; pub use crate::mods::ranges; +// * string parsing modules +pub use crate::parser::parse_size; + // * feature-gated modules #[cfg(feature = "encoding")] pub use crate::features::encoding; diff --git a/src/uucore/src/lib/parser.rs b/src/uucore/src/lib/parser.rs new file mode 100644 index 000000000..21adefa1a --- /dev/null +++ b/src/uucore/src/lib/parser.rs @@ -0,0 +1 @@ +pub mod parse_size; diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs new file mode 100644 index 000000000..a7260306c --- /dev/null +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -0,0 +1,242 @@ +use std::convert::TryFrom; +use std::error::Error; +use std::fmt; + +/// Parse a size string into a number of bytes. +/// +/// A size string comprises an integer and an optional unit. The unit +/// may be K, M, G, T, P, E, Z or Y (powers of 1024), or KB, MB, +/// etc. (powers of 1000), or b which is 512. +/// Binary prefixes can be used, too: KiB=K, MiB=M, and so on. +/// +/// # Errors +/// +/// Will return `ParseSizeError` if it’s not possible to parse this +/// string into a number, e.g. if the string does not begin with a +/// numeral, or if the unit is not one of the supported units described +/// in the preceding section. +/// +/// # Examples +/// +/// ```rust +/// use uucore::parse_size::parse_size; +/// assert_eq!(Ok(123), parse_size("123")); +/// assert_eq!(Ok(9 * 1000), parse_size("9kB")); // kB is 1000 +/// assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024 +/// ``` +pub fn parse_size(size: &str) -> Result { + if size.is_empty() { + return Err(ParseSizeError::parse_failure(size)); + } + // Get the numeric part of the size argument. For example, if the + // argument is "123K", then the numeric part is "123". + let numeric_string: String = size.chars().take_while(|c| c.is_digit(10)).collect(); + let number: usize = if !numeric_string.is_empty() { + match numeric_string.parse() { + Ok(n) => n, + Err(_) => return Err(ParseSizeError::parse_failure(size)), + } + } else { + 1 + }; + + // Get the alphabetic units part of the size argument and compute + // the factor it represents. For example, if the argument is "123K", + // then the unit part is "K" and the factor is 1024. This may be the + // empty string, in which case, the factor is 1. + let unit = &size[numeric_string.len()..]; + let (base, exponent): (u128, u32) = match unit { + "" => (1, 0), + "b" => (512, 1), // (`head` and `tail` use "b") + "KiB" | "K" | "k" => (1024, 1), + "MiB" | "M" | "m" => (1024, 2), + "GiB" | "G" | "g" => (1024, 3), + "TiB" | "T" | "t" => (1024, 4), + "PiB" | "P" | "p" => (1024, 5), + "EiB" | "E" | "e" => (1024, 6), + "ZiB" | "Z" | "z" => (1024, 7), + "YiB" | "Y" | "y" => (1024, 8), + "KB" | "kB" => (1000, 1), + "MB" | "mB" => (1000, 2), + "GB" | "gB" => (1000, 3), + "TB" | "tB" => (1000, 4), + "PB" | "pB" => (1000, 5), + "EB" | "eB" => (1000, 6), + "ZB" | "zB" => (1000, 7), + "YB" | "yB" => (1000, 8), + _ => return Err(ParseSizeError::parse_failure(size)), + }; + let factor = match usize::try_from(base.pow(exponent)) { + Ok(n) => n, + Err(_) => return Err(ParseSizeError::size_too_big(size)), + }; + match number.checked_mul(factor) { + Some(n) => Ok(n), + None => Err(ParseSizeError::size_too_big(size)), + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum ParseSizeError { + ParseFailure(String), // Syntax + SizeTooBig(String), // Overflow +} + +impl Error for ParseSizeError { + fn description(&self) -> &str { + match *self { + ParseSizeError::ParseFailure(ref s) => &*s, + ParseSizeError::SizeTooBig(ref s) => &*s, + } + } +} + +impl fmt::Display for ParseSizeError { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let s = match self { + ParseSizeError::ParseFailure(s) => s, + ParseSizeError::SizeTooBig(s) => s, + }; + write!(f, "{}", s) + } +} + +impl ParseSizeError { + fn parse_failure(s: &str) -> ParseSizeError { + // has to be handled in the respective uutils because strings differ, e.g. + // truncate: Invalid number: ‘fb’ + // tail: invalid number of bytes: ‘fb’ + ParseSizeError::ParseFailure(format!("‘{}’", s)) + } + + fn size_too_big(s: &str) -> ParseSizeError { + // has to be handled in the respective uutils because strings differ, e.g. + // truncate: Invalid number: ‘1Y’: Value too large to be stored in data type + // tail: invalid number of bytes: ‘1Y’: Value too large to be stored in data type + ParseSizeError::SizeTooBig(format!( + "‘{}’: Value too large to be stored in data type", + s + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn variant_eq(a: &ParseSizeError, b: &ParseSizeError) -> bool { + std::mem::discriminant(a) == std::mem::discriminant(b) + } + + #[test] + fn all_suffixes() { + // Units are K,M,G,T,P,E,Z,Y (powers of 1024) or KB,MB,... (powers of 1000). + // Binary prefixes can be used, too: KiB=K, MiB=M, and so on. + let suffixes = [ + ('K', 1u32), + ('M', 2u32), + ('G', 3u32), + ('T', 4u32), + ('P', 5u32), + ('E', 6u32), + #[cfg(target_pointer_width = "128")] + ('Z', 7u32), // ParseSizeError::SizeTooBig on x64 + #[cfg(target_pointer_width = "128")] + ('Y', 8u32), // ParseSizeError::SizeTooBig on x64 + ]; + + for &(c, exp) in &suffixes { + let s = format!("2{}B", c); // KB + assert_eq!(Ok((2 * (1000 as u128).pow(exp)) as usize), parse_size(&s)); + let s = format!("2{}", c); // K + assert_eq!(Ok((2 * (1024 as u128).pow(exp)) as usize), parse_size(&s)); + let s = format!("2{}iB", c); // KiB + assert_eq!(Ok((2 * (1024 as u128).pow(exp)) as usize), parse_size(&s)); + + // suffix only + let s = format!("{}B", c); // KB + assert_eq!(Ok(((1000 as u128).pow(exp)) as usize), parse_size(&s)); + let s = format!("{}", c); // K + assert_eq!(Ok(((1024 as u128).pow(exp)) as usize), parse_size(&s)); + let s = format!("{}iB", c); // KiB + assert_eq!(Ok(((1024 as u128).pow(exp)) as usize), parse_size(&s)); + } + } + + #[test] + #[cfg(not(target_pointer_width = "128"))] + fn overflow_x64() { + assert!(parse_size("10000000000000000000000").is_err()); + assert!(parse_size("1000000000T").is_err()); + assert!(parse_size("100000P").is_err()); + assert!(parse_size("100E").is_err()); + assert!(parse_size("1Z").is_err()); + assert!(parse_size("1Y").is_err()); + + assert!(variant_eq( + &parse_size("1Z").unwrap_err(), + &ParseSizeError::SizeTooBig(String::new()) + )); + + assert_eq!( + ParseSizeError::SizeTooBig( + "‘1Y’: Value too large to be stored in data type".to_string() + ), + parse_size("1Y").unwrap_err() + ); + } + + #[test] + #[cfg(target_pointer_width = "32")] + fn overflow_x32() { + assert!(variant_eq( + &parse_size("1T").unwrap_err(), + &ParseSizeError::SizeTooBig(String::new()) + )); + assert!(variant_eq( + &parse_size("1000G").unwrap_err(), + &ParseSizeError::SizeTooBig(String::new()) + )); + } + + #[test] + fn invalid_syntax() { + let test_strings = ["328hdsf3290", "5MiB nonsense", "5mib", "biB", "-", ""]; + for &test_string in &test_strings { + assert_eq!( + parse_size(test_string).unwrap_err(), + ParseSizeError::ParseFailure(format!("‘{}’", test_string)) + ); + } + } + + #[test] + fn b_suffix() { + assert_eq!(Ok(3 * 512), parse_size("3b")); // b is 512 + } + + #[test] + fn no_suffix() { + assert_eq!(Ok(1234), parse_size("1234")); + assert_eq!(Ok(0), parse_size("0")); + } + + #[test] + fn kilobytes_suffix() { + assert_eq!(Ok(123 * 1000), parse_size("123KB")); // KB is 1000 + assert_eq!(Ok(9 * 1000), parse_size("9kB")); // kB is 1000 + assert_eq!(Ok(2 * 1024), parse_size("2K")); // K is 1024 + assert_eq!(Ok(0), parse_size("0K")); + assert_eq!(Ok(0), parse_size("0KB")); + assert_eq!(Ok(1000), parse_size("KB")); + assert_eq!(Ok(1024), parse_size("K")); + } + + #[test] + fn megabytes_suffix() { + assert_eq!(Ok(123 * 1024 * 1024), parse_size("123M")); + assert_eq!(Ok(123 * 1000 * 1000), parse_size("123MB")); + assert_eq!(Ok(1024 * 1024), parse_size("M")); + assert_eq!(Ok(1000 * 1000), parse_size("MB")); + } +} From b1b3475e11ff4ef947dfbb8740c7e405ff7776b9 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Fri, 28 May 2021 19:57:50 +0200 Subject: [PATCH 02/20] truncate: use "parse_size" from uucore --- src/uu/truncate/src/truncate.rs | 101 ++---------------------- src/uucore/src/lib/parser/parse_size.rs | 5 ++ 2 files changed, 13 insertions(+), 93 deletions(-) diff --git a/src/uu/truncate/src/truncate.rs b/src/uu/truncate/src/truncate.rs index 03b18723c..86a0c9ffc 100644 --- a/src/uu/truncate/src/truncate.rs +++ b/src/uu/truncate/src/truncate.rs @@ -11,9 +11,11 @@ extern crate uucore; use clap::{App, Arg}; +use std::convert::TryFrom; use std::fs::{metadata, OpenOptions}; use std::io::ErrorKind; use std::path::Path; +use uucore::parse_size::parse_size; #[derive(Eq, PartialEq)] enum TruncateMode { @@ -159,14 +161,14 @@ fn truncate( }; let num_bytes = match parse_size(size_string) { Ok(b) => b, - Err(_) => crash!(1, "Invalid number: ‘{}’", size_string), + Err(e) => crash!(1, "Invalid number: {}", e.to_string()), }; (num_bytes, mode) } None => (0, TruncateMode::Reference), }; - let refsize = match reference { + let refsize: usize = match reference { Some(ref rfilename) => { match mode { // Only Some modes work with a reference @@ -176,7 +178,7 @@ fn truncate( _ => crash!(1, "you must specify a relative ‘--size’ with ‘--reference’"), }; match metadata(rfilename) { - Ok(meta) => meta.len(), + Ok(meta) => usize::try_from(meta.len()).unwrap(), Err(f) => match f.kind() { ErrorKind::NotFound => { crash!(1, "cannot stat '{}': No such file or directory", rfilename) @@ -199,14 +201,14 @@ fn truncate( let fsize = match reference { Some(_) => refsize, None => match metadata(filename) { - Ok(meta) => meta.len(), + Ok(meta) => usize::try_from(meta.len()).unwrap(), Err(f) => { show_warning!("{}", f.to_string()); continue; } }, }; - let tsize: u64 = match mode { + let tsize: usize = match mode { TruncateMode::Absolute => modsize, TruncateMode::Reference => fsize, TruncateMode::Extend => fsize + modsize, @@ -216,7 +218,7 @@ fn truncate( TruncateMode::RoundDown => fsize - fsize % modsize, TruncateMode::RoundUp => fsize + fsize % modsize, }; - match file.set_len(tsize) { + match file.set_len(u64::try_from(tsize).unwrap()) { Ok(_) => {} Err(f) => crash!(1, "{}", f.to_string()), }; @@ -225,90 +227,3 @@ fn truncate( } } } - -/// Parse a size string into a number of bytes. -/// -/// A size string comprises an integer and an optional unit. The unit -/// may be K, M, G, T, P, E, Z, or Y (powers of 1024) or KB, MB, -/// etc. (powers of 1000). -/// -/// # Errors -/// -/// This function returns an error if the string does not begin with a -/// numeral, or if the unit is not one of the supported units described -/// in the preceding section. -/// -/// # Examples -/// -/// ```rust,ignore -/// assert_eq!(parse_size("123").unwrap(), 123); -/// assert_eq!(parse_size("123K").unwrap(), 123 * 1024); -/// assert_eq!(parse_size("123KB").unwrap(), 123 * 1000); -/// ``` -fn parse_size(size: &str) -> Result { - // Get the numeric part of the size argument. For example, if the - // argument is "123K", then the numeric part is "123". - let numeric_string: String = size.chars().take_while(|c| c.is_digit(10)).collect(); - let number: u64 = match numeric_string.parse() { - Ok(n) => n, - Err(_) => return Err(()), - }; - - // Get the alphabetic units part of the size argument and compute - // the factor it represents. For example, if the argument is "123K", - // then the unit part is "K" and the factor is 1024. This may be the - // empty string, in which case, the factor is 1. - let n = numeric_string.len(); - let (base, exponent): (u64, u32) = match &size[n..] { - "" => (1, 0), - "K" | "k" => (1024, 1), - "M" | "m" => (1024, 2), - "G" | "g" => (1024, 3), - "T" | "t" => (1024, 4), - "P" | "p" => (1024, 5), - "E" | "e" => (1024, 6), - "Z" | "z" => (1024, 7), - "Y" | "y" => (1024, 8), - "KB" | "kB" => (1000, 1), - "MB" | "mB" => (1000, 2), - "GB" | "gB" => (1000, 3), - "TB" | "tB" => (1000, 4), - "PB" | "pB" => (1000, 5), - "EB" | "eB" => (1000, 6), - "ZB" | "zB" => (1000, 7), - "YB" | "yB" => (1000, 8), - _ => return Err(()), - }; - let factor = base.pow(exponent); - Ok(number * factor) -} - -#[cfg(test)] -mod tests { - use crate::parse_size; - - #[test] - fn test_parse_size_zero() { - assert_eq!(parse_size("0").unwrap(), 0); - assert_eq!(parse_size("0K").unwrap(), 0); - assert_eq!(parse_size("0KB").unwrap(), 0); - } - - #[test] - fn test_parse_size_without_factor() { - assert_eq!(parse_size("123").unwrap(), 123); - } - - #[test] - fn test_parse_size_kilobytes() { - assert_eq!(parse_size("123K").unwrap(), 123 * 1024); - assert_eq!(parse_size("123KB").unwrap(), 123 * 1000); - } - - #[test] - fn test_parse_size_megabytes() { - assert_eq!(parse_size("123").unwrap(), 123); - assert_eq!(parse_size("123M").unwrap(), 123 * 1024 * 1024); - assert_eq!(parse_size("123MB").unwrap(), 123 * 1000 * 1000); - } -} diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index a7260306c..e8ede8cad 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -1,3 +1,8 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + use std::convert::TryFrom; use std::error::Error; use std::fmt; From 0bf14da490c49bacfa078d8acfee14def48ed701 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Fri, 28 May 2021 21:12:03 +0200 Subject: [PATCH 03/20] tail: use "parse_size" from uucore --- src/uu/tail/src/tail.rs | 124 +++++-------------------------------- tests/by-util/test_tail.rs | 36 ----------- 2 files changed, 16 insertions(+), 144 deletions(-) diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 15a819d35..a4634714c 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -21,13 +21,13 @@ use chunks::ReverseChunks; use clap::{App, Arg}; use std::collections::VecDeque; -use std::error::Error; use std::fmt; use std::fs::File; use std::io::{stdin, stdout, BufRead, BufReader, Read, Seek, SeekFrom, Write}; use std::path::Path; use std::thread::sleep; use std::time::Duration; +use uucore::parse_size::parse_size; use uucore::ringbuffer::RingBuffer; pub mod options { @@ -47,8 +47,8 @@ pub mod options { static ARG_FILES: &str = "files"; enum FilterMode { - Bytes(u64), - Lines(u64, u8), // (number of lines, delimiter) + Bytes(usize), + Lines(usize, u8), // (number of lines, delimiter) } struct Settings { @@ -174,31 +174,31 @@ pub fn uumain(args: impl uucore::Args) -> i32 { match matches.value_of(options::LINES) { Some(n) => { let mut slice: &str = n; - if slice.chars().next().unwrap_or('_') == '+' { - settings.beginning = true; + let c = slice.chars().next().unwrap_or('_'); + if c == '+' || c == '-' { slice = &slice[1..]; + if c == '+' { + settings.beginning = true; + } } match parse_size(slice) { Ok(m) => settings.mode = FilterMode::Lines(m, b'\n'), - Err(e) => { - show_error!("{}", e.to_string()); - return 1; - } + Err(e) => crash!(1, "invalid number of bytes: {}", e.to_string()), } } None => { if let Some(n) = matches.value_of(options::BYTES) { let mut slice: &str = n; - if slice.chars().next().unwrap_or('_') == '+' { - settings.beginning = true; + let c = slice.chars().next().unwrap_or('_'); + if c == '+' || c == '-' { slice = &slice[1..]; + if c == '+' { + settings.beginning = true; + } } match parse_size(slice) { Ok(m) => settings.mode = FilterMode::Bytes(m), - Err(e) => { - show_error!("{}", e.to_string()); - return 1; - } + Err(e) => crash!(1, "invalid number of bytes: {}", e.to_string()), } } } @@ -264,98 +264,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 { 0 } -#[derive(Debug, PartialEq, Eq)] -pub enum ParseSizeErr { - ParseFailure(String), - SizeTooBig(String), -} - -impl Error for ParseSizeErr { - fn description(&self) -> &str { - match *self { - ParseSizeErr::ParseFailure(ref s) => &*s, - ParseSizeErr::SizeTooBig(ref s) => &*s, - } - } -} - -impl fmt::Display for ParseSizeErr { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - let s = match self { - ParseSizeErr::ParseFailure(s) => s, - ParseSizeErr::SizeTooBig(s) => s, - }; - write!(f, "{}", s) - } -} - -impl ParseSizeErr { - fn parse_failure(s: &str) -> ParseSizeErr { - ParseSizeErr::ParseFailure(format!("invalid size: '{}'", s)) - } - - fn size_too_big(s: &str) -> ParseSizeErr { - ParseSizeErr::SizeTooBig(format!( - "invalid size: '{}': Value too large to be stored in data type", - s - )) - } -} - -pub type ParseSizeResult = Result; - -pub fn parse_size(mut size_slice: &str) -> Result { - let mut base = if size_slice.chars().last().unwrap_or('_') == 'B' { - size_slice = &size_slice[..size_slice.len() - 1]; - 1000u64 - } else { - 1024u64 - }; - - let exponent = if !size_slice.is_empty() { - let mut has_suffix = true; - let exp = match size_slice.chars().last().unwrap_or('_') { - 'K' | 'k' => 1u64, - 'M' => 2u64, - 'G' => 3u64, - 'T' => 4u64, - 'P' => 5u64, - 'E' => 6u64, - 'Z' | 'Y' => { - return Err(ParseSizeErr::size_too_big(size_slice)); - } - 'b' => { - base = 512u64; - 1u64 - } - _ => { - has_suffix = false; - 0u64 - } - }; - if has_suffix { - size_slice = &size_slice[..size_slice.len() - 1]; - } - exp - } else { - 0u64 - }; - - let mut multiplier = 1u64; - for _ in 0u64..exponent { - multiplier *= base; - } - if base == 1000u64 && exponent == 0u64 { - // sole B is not a valid suffix - Err(ParseSizeErr::parse_failure(size_slice)) - } else { - let value: Option = size_slice.parse().ok(); - value - .map(|v| Ok((multiplier as i64 * v.abs()) as u64)) - .unwrap_or_else(|| Err(ParseSizeErr::parse_failure(size_slice))) - } -} - fn follow(readers: &mut [BufReader], filenames: &[String], settings: &Settings) { assert!(settings.follow); let mut last = readers.len() - 1; @@ -469,7 +377,7 @@ fn bounded_tail(file: &mut File, settings: &Settings) { /// If any element of `iter` is an [`Err`], then this function panics. fn unbounded_tail_collect( iter: impl Iterator>, - count: u64, + count: usize, beginning: bool, ) -> VecDeque where diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index f3c9a7b11..27e94a78f 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -1,6 +1,5 @@ extern crate tail; -use self::tail::parse_size; use crate::common::util::*; use std::char::from_digit; use std::io::Write; @@ -236,41 +235,6 @@ fn test_bytes_big() { } } -#[test] -fn test_parse_size() { - // No suffix. - assert_eq!(Ok(1234), parse_size("1234")); - - // kB is 1000 - assert_eq!(Ok(9 * 1000), parse_size("9kB")); - - // K is 1024 - assert_eq!(Ok(2 * 1024), parse_size("2K")); - - let suffixes = [ - ('M', 2u32), - ('G', 3u32), - ('T', 4u32), - ('P', 5u32), - ('E', 6u32), - ]; - - for &(c, exp) in &suffixes { - let s = format!("2{}B", c); - assert_eq!(Ok(2 * (1000 as u64).pow(exp)), parse_size(&s)); - - let s = format!("2{}", c); - assert_eq!(Ok(2 * (1024 as u64).pow(exp)), parse_size(&s)); - } - - // Sizes that are too big. - assert!(parse_size("1Z").is_err()); - assert!(parse_size("1Y").is_err()); - - // Bad number - assert!(parse_size("328hdsf3290").is_err()); -} - #[test] fn test_lines_with_size_suffix() { const FILE: &'static str = "test_lines_with_size_suffix.txt"; From 1c41efd73290aea82994f08b40e7e35f97c8745c Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Mon, 31 May 2021 09:35:46 +0200 Subject: [PATCH 04/20] stdbuf: use "parse_size" from uucore --- src/uu/stdbuf/src/stdbuf.rs | 45 ++++-------------------------------- tests/by-util/test_stdbuf.rs | 9 +++++++- 2 files changed, 13 insertions(+), 41 deletions(-) diff --git a/src/uu/stdbuf/src/stdbuf.rs b/src/uu/stdbuf/src/stdbuf.rs index 485b3c70e..e39af3816 100644 --- a/src/uu/stdbuf/src/stdbuf.rs +++ b/src/uu/stdbuf/src/stdbuf.rs @@ -19,14 +19,14 @@ use std::path::PathBuf; use std::process::Command; use tempfile::tempdir; use tempfile::TempDir; +use uucore::parse_size::parse_size; use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Run COMMAND, with modified buffering operations for its standard streams.\n\n\ Mandatory arguments to long options are mandatory for short options too."; -static LONG_HELP: &str = - "If MODE is 'L' the corresponding stream will be line buffered.\n\ +static LONG_HELP: &str = "If MODE is 'L' the corresponding stream will be line buffered.\n\ This option is invalid with standard input.\n\n\ If MODE is '0' the corresponding stream will be unbuffered.\n\n\ Otherwise MODE is a number which may be followed by one of the following:\n\n\ @@ -57,7 +57,7 @@ const STDBUF_INJECT: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libstdbuf enum BufferType { Default, Line, - Size(u64), + Size(usize), } struct ProgramOptions { @@ -106,41 +106,6 @@ fn preload_strings() -> (&'static str, &'static str) { crash!(1, "Command not supported for this operating system!") } -fn parse_size(size: &str) -> Option { - let ext = size.trim_start_matches(|c: char| c.is_digit(10)); - let num = size.trim_end_matches(char::is_alphabetic); - let mut recovered = num.to_owned(); - recovered.push_str(ext); - if recovered != size { - return None; - } - let buf_size: u64 = match num.parse().ok() { - Some(m) => m, - None => return None, - }; - let (power, base): (u32, u64) = match ext { - "" => (0, 0), - "KB" => (1, 1024), - "K" => (1, 1000), - "MB" => (2, 1024), - "M" => (2, 1000), - "GB" => (3, 1024), - "G" => (3, 1000), - "TB" => (4, 1024), - "T" => (4, 1000), - "PB" => (5, 1024), - "P" => (5, 1000), - "EB" => (6, 1024), - "E" => (6, 1000), - "ZB" => (7, 1024), - "Z" => (7, 1000), - "YB" => (8, 1024), - "Y" => (8, 1000), - _ => return None, - }; - Some(buf_size * base.pow(power)) -} - fn check_option(matches: &ArgMatches, name: &str) -> Result { match matches.value_of(name) { Some(value) => match value { @@ -155,8 +120,8 @@ fn check_option(matches: &ArgMatches, name: &str) -> Result { let size = match parse_size(x) { - Some(m) => m, - None => return Err(ProgramOptionsError(format!("invalid mode {}", x))), + Ok(m) => m, + Err(e) => return Err(ProgramOptionsError(format!("invalid mode {}", e))), }; Ok(BufferType::Size(size)) } diff --git a/tests/by-util/test_stdbuf.rs b/tests/by-util/test_stdbuf.rs index 2e09601ce..e5d784edb 100644 --- a/tests/by-util/test_stdbuf.rs +++ b/tests/by-util/test_stdbuf.rs @@ -57,8 +57,15 @@ fn test_stdbuf_line_buffering_stdin_fails() { #[cfg(not(target_os = "windows"))] #[test] fn test_stdbuf_invalid_mode_fails() { + // TODO: GNU's `stdbuf` (8.32) does not return "\nTry 'stdbuf --help' for more information." + // for invalid modes. new_ucmd!() .args(&["-i", "1024R", "head"]) .fails() - .stderr_is("stdbuf: invalid mode 1024R\nTry 'stdbuf --help' for more information."); + .stderr_contains("stdbuf: invalid mode ‘1024R’"); + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .args(&["--error", "1Y", "head"]) + .fails() + .stderr_contains("stdbuf: invalid mode ‘1Y’: Value too large to be stored in data type"); } From 3a6605844ffe0db223e086b77c408535a32ec078 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Mon, 31 May 2021 09:54:31 +0200 Subject: [PATCH 05/20] uucore: move 'parse_time' to 'parser' "parse_time" only uses stdlib and does not need to be feature gated. A more suitable place is the newly created "src/uucore/src/lib/parser/" --- src/uu/sleep/Cargo.toml | 2 +- src/uu/timeout/Cargo.toml | 2 +- src/uucore/Cargo.toml | 1 - src/uucore/src/lib/features.rs | 2 -- src/uucore/src/lib/lib.rs | 3 +-- src/uucore/src/lib/parser.rs | 1 + src/uucore/src/lib/{features => parser}/parse_time.rs | 0 7 files changed, 4 insertions(+), 7 deletions(-) rename src/uucore/src/lib/{features => parser}/parse_time.rs (100%) diff --git a/src/uu/sleep/Cargo.toml b/src/uu/sleep/Cargo.toml index fe7ee2941..618ea7e28 100644 --- a/src/uu/sleep/Cargo.toml +++ b/src/uu/sleep/Cargo.toml @@ -16,7 +16,7 @@ path = "src/sleep.rs" [dependencies] clap = "2.33" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["parse_time"] } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } [[bin]] diff --git a/src/uu/timeout/Cargo.toml b/src/uu/timeout/Cargo.toml index 206a98c08..5116a163c 100644 --- a/src/uu/timeout/Cargo.toml +++ b/src/uu/timeout/Cargo.toml @@ -18,7 +18,7 @@ path = "src/timeout.rs" clap = "2.33" getopts = "0.2.18" libc = "0.2.42" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["parse_time", "process", "signals"] } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["process", "signals"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 482252680..0c11d2c15 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -44,7 +44,6 @@ entries = ["libc"] fs = ["libc"] fsext = ["libc", "time"] mode = ["libc"] -parse_time = [] perms = ["libc"] process = ["libc"] ringbuffer = [] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index 310a41fe1..c1e1ec31e 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -6,8 +6,6 @@ pub mod encoding; pub mod fs; #[cfg(feature = "fsext")] pub mod fsext; -#[cfg(feature = "parse_time")] -pub mod parse_time; #[cfg(feature = "ringbuffer")] pub mod ringbuffer; #[cfg(feature = "zero-copy")] diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index a60af57fa..69819fd05 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -33,6 +33,7 @@ pub use crate::mods::ranges; // * string parsing modules pub use crate::parser::parse_size; +pub use crate::parser::parse_time; // * feature-gated modules #[cfg(feature = "encoding")] @@ -41,8 +42,6 @@ pub use crate::features::encoding; pub use crate::features::fs; #[cfg(feature = "fsext")] pub use crate::features::fsext; -#[cfg(feature = "parse_time")] -pub use crate::features::parse_time; #[cfg(feature = "ringbuffer")] pub use crate::features::ringbuffer; #[cfg(feature = "zero-copy")] diff --git a/src/uucore/src/lib/parser.rs b/src/uucore/src/lib/parser.rs index 21adefa1a..d09777e10 100644 --- a/src/uucore/src/lib/parser.rs +++ b/src/uucore/src/lib/parser.rs @@ -1 +1,2 @@ pub mod parse_size; +pub mod parse_time; diff --git a/src/uucore/src/lib/features/parse_time.rs b/src/uucore/src/lib/parser/parse_time.rs similarity index 100% rename from src/uucore/src/lib/features/parse_time.rs rename to src/uucore/src/lib/parser/parse_time.rs From f9a088cecf4beb8138a61849ed5273bdd2b3a428 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Mon, 31 May 2021 15:22:37 +0200 Subject: [PATCH 06/20] du: use "parse_size" from uucore * fix stderr to be the same than GNU's `du` in case of invalid SIZE --- src/uu/du/src/du.rs | 101 +++++++++++---------------------------- tests/by-util/test_du.rs | 17 +++++++ 2 files changed, 45 insertions(+), 73 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 6bd4f23e4..8394741cc 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -14,6 +14,7 @@ use chrono::prelude::DateTime; use chrono::Local; use clap::{App, Arg}; use std::collections::HashSet; +use std::convert::TryFrom; use std::env; use std::fs; use std::io::{stderr, ErrorKind, Result, Write}; @@ -26,6 +27,7 @@ use std::os::windows::fs::MetadataExt; use std::os::windows::io::AsRawHandle; use std::path::PathBuf; use std::time::{Duration, UNIX_EPOCH}; +use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::InvalidEncodingHandling; #[cfg(windows)] use winapi::shared::minwindef::{DWORD, LPVOID}; @@ -211,64 +213,31 @@ fn get_file_info(path: &PathBuf) -> Option { result } -fn unit_string_to_number(s: &str) -> Option { - let mut offset = 0; - let mut s_chars = s.chars().rev(); - - let (mut ch, multiple) = match s_chars.next() { - Some('B') | Some('b') => ('B', 1000u64), - Some(ch) => (ch, 1024u64), - None => return None, - }; - if ch == 'B' { - ch = s_chars.next()?; - offset += 1; - } - ch = ch.to_ascii_uppercase(); - - let unit = UNITS - .iter() - .rev() - .find(|&&(unit_ch, _)| unit_ch == ch) - .map(|&(_, val)| { - // we found a match, so increment offset - offset += 1; - val - }) - .or_else(|| if multiple == 1024 { Some(0) } else { None })?; - - let number = s[..s.len() - offset].parse::().ok()?; - - Some(number * multiple.pow(unit)) -} - -fn translate_to_pure_number(s: &Option<&str>) -> Option { - match *s { - Some(ref s) => unit_string_to_number(s), - None => None, - } -} - -fn read_block_size(s: Option<&str>) -> u64 { - match translate_to_pure_number(&s) { - Some(v) => v, - None => { - if let Some(value) = s { - show_error!("invalid --block-size argument '{}'", value); - }; - - for env_var in &["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] { - let env_size = env::var(env_var).ok(); - if let Some(quantity) = translate_to_pure_number(&env_size.as_deref()) { - return quantity; +fn read_block_size(s: Option<&str>) -> usize { + if let Some(size_arg) = s { + match parse_size(size_arg) { + Ok(v) => v, + Err(e) => match e { + ParseSizeError::ParseFailure(_) => { + crash!(1, "invalid suffix in --block-size argument '{}'", size_arg) + } + ParseSizeError::SizeTooBig(_) => { + crash!(1, "--block-size argument '{}' too large", size_arg) + } + }, + } + } else { + for env_var in &["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] { + if let Ok(env_size) = env::var(env_var) { + if let Ok(v) = parse_size(&env_size) { + return v; } } - - if env::var("POSIXLY_CORRECT").is_ok() { - 512 - } else { - 1024 - } + } + if env::var("POSIXLY_CORRECT").is_ok() { + 512 + } else { + 1024 } } } @@ -595,7 +564,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } }; - let block_size = read_block_size(matches.value_of(options::BLOCK_SIZE)); + let block_size = u64::try_from(read_block_size(matches.value_of(options::BLOCK_SIZE))).unwrap(); let multiplier: u64 = if matches.is_present(options::SI) { 1000 @@ -733,26 +702,12 @@ mod test_du { #[allow(unused_imports)] use super::*; - #[test] - fn test_translate_to_pure_number() { - let test_data = [ - (Some("10".to_string()), Some(10)), - (Some("10K".to_string()), Some(10 * 1024)), - (Some("5M".to_string()), Some(5 * 1024 * 1024)), - (Some("900KB".to_string()), Some(900 * 1000)), - (Some("BAD_STRING".to_string()), None), - ]; - for it in test_data.iter() { - assert_eq!(translate_to_pure_number(&it.0.as_deref()), it.1); - } - } - #[test] fn test_read_block_size() { let test_data = [ - (Some("10".to_string()), 10), + (Some("1024".to_string()), 1024), + (Some("K".to_string()), 1024), (None, 1024), - (Some("BAD_STRING".to_string()), 1024), ]; for it in test_data.iter() { assert_eq!(read_block_size(it.0.as_deref()), it.1); diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index c5d262c3b..a8a3049f7 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -71,6 +71,23 @@ fn _du_basics_subdir(s: &str) { } } +#[test] +fn test_du_invalid_size() { + new_ucmd!() + .arg("--block-size=1fb4t") + .arg("/tmp") + .fails() + .code_is(1) + .stderr_only("du: invalid suffix in --block-size argument '1fb4t'"); + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .arg("--block-size=1Y") + .arg("/tmp") + .fails() + .code_is(1) + .stderr_only("du: --block-size argument '1Y' too large"); +} + #[test] fn test_du_basics_bad_name() { new_ucmd!() From 84f2bff778b93dcc7a8ae401db1eb62aa723f8a2 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Tue, 1 Jun 2021 09:30:43 +0200 Subject: [PATCH 07/20] head: use "parse_size" from uucore --- src/uu/head/src/head.rs | 9 +-- src/uu/head/src/parse.rs | 135 +++++-------------------------------- tests/by-util/test_head.rs | 25 +++++++ 3 files changed, 42 insertions(+), 127 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 3602b4a73..78d769020 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -107,12 +107,7 @@ where { match parse::parse_num(src) { Ok((n, last)) => Ok((closure(n), last)), - Err(reason) => match reason { - parse::ParseError::Syntax => Err(format!("'{}'", src)), - parse::ParseError::Overflow => { - Err(format!("'{}': Value too large for defined datatype", src)) - } - }, + Err(e) => Err(e.to_string()), } } @@ -473,7 +468,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let args = match HeadOptions::get_from(args) { Ok(o) => o, Err(s) => { - crash!(EXIT_FAILURE, "head: {}", s); + crash!(EXIT_FAILURE, "{}", s); } }; match uu_head(&args) { diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs index 0cf20be42..3b788f819 100644 --- a/src/uu/head/src/parse.rs +++ b/src/uu/head/src/parse.rs @@ -1,5 +1,5 @@ -use std::convert::TryFrom; use std::ffi::OsString; +use uucore::parse_size::{parse_size, ParseSizeError}; #[derive(PartialEq, Debug)] pub enum ParseError { @@ -92,92 +92,25 @@ pub fn parse_obsolete(src: &str) -> Option } /// Parses an -c or -n argument, /// the bool specifies whether to read from the end -pub fn parse_num(src: &str) -> Result<(usize, bool), ParseError> { - let mut num_start = 0; - let mut chars = src.char_indices(); - let (mut chars, all_but_last) = match chars.next() { - Some((_, c)) => { - if c == '-' { - num_start += 1; - (chars, true) - } else { - (src.char_indices(), false) - } - } - None => return Err(ParseError::Syntax), - }; - let mut num_end = 0usize; - let mut last_char = 0 as char; - let mut num_count = 0usize; - for (n, c) in &mut chars { - if c.is_numeric() { - num_end = n; - num_count += 1; - } else { - last_char = c; - break; +pub fn parse_num(src: &str) -> Result<(usize, bool), ParseSizeError> { + let mut size_string = src.trim(); + let mut all_but_last = false; + + if let Some(c) = size_string.chars().next() { + if c == '-' { + size_string = &size_string[1..]; + all_but_last = true; } + } else { + return Err(ParseSizeError::ParseFailure(src.to_string())); } - let num = if num_count > 0 { - match src[num_start..=num_end].parse::() { - Ok(n) => Some(n), - Err(_) => return Err(ParseError::Overflow), - } - } else { - None - }; - - if last_char == 0 as char { - if let Some(n) = num { - Ok((n, all_but_last)) - } else { - Err(ParseError::Syntax) - } - } else { - let base: u128 = match chars.next() { - Some((_, c)) => { - let b = match c { - 'B' if last_char != 'b' => 1000, - 'i' if last_char != 'b' => { - if let Some((_, 'B')) = chars.next() { - 1024 - } else { - return Err(ParseError::Syntax); - } - } - _ => return Err(ParseError::Syntax), - }; - if chars.next().is_some() { - return Err(ParseError::Syntax); - } else { - b - } - } - None => 1024, - }; - let mul = match last_char.to_lowercase().next().unwrap() { - 'b' => 512, - 'k' => base.pow(1), - 'm' => base.pow(2), - 'g' => base.pow(3), - 't' => base.pow(4), - 'p' => base.pow(5), - 'e' => base.pow(6), - 'z' => base.pow(7), - 'y' => base.pow(8), - _ => return Err(ParseError::Syntax), - }; - let mul = match usize::try_from(mul) { - Ok(n) => n, - Err(_) => return Err(ParseError::Overflow), - }; - match num.unwrap_or(1).checked_mul(mul) { - Some(n) => Ok((n, all_but_last)), - None => Err(ParseError::Overflow), - } + match parse_size(&size_string) { + Ok(n) => Ok((n, all_but_last)), + Err(e) => Err(e), } } + #[cfg(test)] mod tests { use super::*; @@ -195,44 +128,6 @@ mod tests { Some(Ok(src.iter().map(|s| s.to_string()).collect())) } #[test] - #[cfg(not(target_pointer_width = "128"))] - fn test_parse_overflow_x64() { - assert_eq!(parse_num("1Y"), Err(ParseError::Overflow)); - assert_eq!(parse_num("1Z"), Err(ParseError::Overflow)); - assert_eq!(parse_num("100E"), Err(ParseError::Overflow)); - assert_eq!(parse_num("100000P"), Err(ParseError::Overflow)); - assert_eq!(parse_num("1000000000T"), Err(ParseError::Overflow)); - assert_eq!( - parse_num("10000000000000000000000"), - Err(ParseError::Overflow) - ); - } - #[test] - #[cfg(target_pointer_width = "32")] - fn test_parse_overflow_x32() { - assert_eq!(parse_num("1T"), Err(ParseError::Overflow)); - assert_eq!(parse_num("1000G"), Err(ParseError::Overflow)); - } - #[test] - fn test_parse_bad_syntax() { - assert_eq!(parse_num("5MiB nonsense"), Err(ParseError::Syntax)); - assert_eq!(parse_num("Nonsense string"), Err(ParseError::Syntax)); - assert_eq!(parse_num("5mib"), Err(ParseError::Syntax)); - assert_eq!(parse_num("biB"), Err(ParseError::Syntax)); - assert_eq!(parse_num("-"), Err(ParseError::Syntax)); - assert_eq!(parse_num(""), Err(ParseError::Syntax)); - } - #[test] - fn test_parse_numbers() { - assert_eq!(parse_num("k"), Ok((1024, false))); - assert_eq!(parse_num("MiB"), Ok((1024 * 1024, false))); - assert_eq!(parse_num("-5"), Ok((5, true))); - assert_eq!(parse_num("b"), Ok((512, false))); - assert_eq!(parse_num("-2GiB"), Ok((2 * 1024 * 1024 * 1024, true))); - assert_eq!(parse_num("5M"), Ok((5 * 1024 * 1024, false))); - assert_eq!(parse_num("5MB"), Ok((5 * 1000 * 1000, false))); - } - #[test] fn test_parse_numbers_obsolete() { assert_eq!(obsolete("-5"), obsolete_result(&["-n", "5"])); assert_eq!(obsolete("-100"), obsolete_result(&["-n", "100"])); diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index cf7c9c2ee..349fc05d3 100755 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -242,3 +242,28 @@ hello ", ); } +#[test] +fn test_head_invalid_num() { + new_ucmd!() + .args(&["-c", "1024R", "emptyfile.txt"]) + .fails() + .stderr_is("head: invalid number of bytes: ‘1024R’"); + new_ucmd!() + .args(&["-n", "1024R", "emptyfile.txt"]) + .fails() + .stderr_is("head: invalid number of lines: ‘1024R’"); + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .args(&["-c", "1Y", "emptyfile.txt"]) + .fails() + .stderr_is( + "head: invalid number of bytes: ‘1Y’: Value too large to be stored in data type", + ); + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .args(&["-n", "1Y", "emptyfile.txt"]) + .fails() + .stderr_is( + "head: invalid number of lines: ‘1Y’: Value too large to be stored in data type", + ); +} From a3e047ff166205b066174032274594a2eeab2a50 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Tue, 1 Jun 2021 10:22:44 +0200 Subject: [PATCH 08/20] uucore: add more tests to parse_size --- src/uucore/src/lib/parser/parse_size.rs | 32 ++++++++++++++----------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index e8ede8cad..4339cd7fb 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -53,14 +53,14 @@ pub fn parse_size(size: &str) -> Result { let (base, exponent): (u128, u32) = match unit { "" => (1, 0), "b" => (512, 1), // (`head` and `tail` use "b") - "KiB" | "K" | "k" => (1024, 1), - "MiB" | "M" | "m" => (1024, 2), - "GiB" | "G" | "g" => (1024, 3), - "TiB" | "T" | "t" => (1024, 4), - "PiB" | "P" | "p" => (1024, 5), - "EiB" | "E" | "e" => (1024, 6), - "ZiB" | "Z" | "z" => (1024, 7), - "YiB" | "Y" | "y" => (1024, 8), + "KiB" | "kiB" | "K" | "k" => (1024, 1), + "MiB" | "miB" | "M" | "m" => (1024, 2), + "GiB" | "giB" | "G" | "g" => (1024, 3), + "TiB" | "tiB" | "T" | "t" => (1024, 4), + "PiB" | "piB" | "P" | "p" => (1024, 5), + "EiB" | "eiB" | "E" | "e" => (1024, 6), + "ZiB" | "ziB" | "Z" | "z" => (1024, 7), + "YiB" | "yiB" | "Y" | "y" => (1024, 8), "KB" | "kB" => (1000, 1), "MB" | "mB" => (1000, 2), "GB" | "gB" => (1000, 3), @@ -152,19 +152,23 @@ mod tests { for &(c, exp) in &suffixes { let s = format!("2{}B", c); // KB - assert_eq!(Ok((2 * (1000 as u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok((2 * (1000_u128).pow(exp)) as usize), parse_size(&s)); let s = format!("2{}", c); // K - assert_eq!(Ok((2 * (1024 as u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok((2 * (1024_u128).pow(exp)) as usize), parse_size(&s)); let s = format!("2{}iB", c); // KiB - assert_eq!(Ok((2 * (1024 as u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok((2 * (1024_u128).pow(exp)) as usize), parse_size(&s)); + let s = format!("2{}iB", c.to_lowercase()); // kiB + assert_eq!(Ok((2 * (1024_u128).pow(exp)) as usize), parse_size(&s)); // suffix only let s = format!("{}B", c); // KB - assert_eq!(Ok(((1000 as u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok(((1000_u128).pow(exp)) as usize), parse_size(&s)); let s = format!("{}", c); // K - assert_eq!(Ok(((1024 as u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok(((1024_u128).pow(exp)) as usize), parse_size(&s)); let s = format!("{}iB", c); // KiB - assert_eq!(Ok(((1024 as u128).pow(exp)) as usize), parse_size(&s)); + assert_eq!(Ok(((1024_u128).pow(exp)) as usize), parse_size(&s)); + let s = format!("{}iB", c.to_lowercase()); // kiB + assert_eq!(Ok(((1024_u128).pow(exp)) as usize), parse_size(&s)); } } From 3c7175f00d784768a793a4141c067650bb0ef150 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Tue, 1 Jun 2021 12:17:11 +0200 Subject: [PATCH 09/20] head/tail: add fixes and tests for bytes/lines NUM arg (undocumented sign) * change tail bytes/lines clap parsing to fix posix override behavior * change tail bytes/lines NUM parsing logic to be consistent with head --- src/uu/head/src/parse.rs | 7 ++-- src/uu/tail/src/tail.rs | 74 ++++++++++++++++++++------------------ tests/by-util/test_head.rs | 22 ++++++++++++ tests/by-util/test_tail.rs | 48 +++++++++++++++++++++++++ 4 files changed, 115 insertions(+), 36 deletions(-) diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs index 3b788f819..b395b330b 100644 --- a/src/uu/head/src/parse.rs +++ b/src/uu/head/src/parse.rs @@ -97,9 +97,12 @@ pub fn parse_num(src: &str) -> Result<(usize, bool), ParseSizeError> { let mut all_but_last = false; if let Some(c) = size_string.chars().next() { - if c == '-' { + if c == '+' || c == '-' { + // head: '+' is not documented (8.32 man pages) size_string = &size_string[1..]; - all_but_last = true; + if c == '-' { + all_but_last = true; + } } } else { return Err(ParseSizeError::ParseFailure(src.to_string())); diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index a4634714c..acaad8c30 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -27,7 +27,7 @@ use std::io::{stdin, stdout, BufRead, BufReader, Read, Seek, SeekFrom, Write}; use std::path::Path; use std::thread::sleep; use std::time::Duration; -use uucore::parse_size::parse_size; +use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::ringbuffer::RingBuffer; pub mod options { @@ -42,10 +42,9 @@ pub mod options { pub static PID: &str = "pid"; pub static SLEEP_INT: &str = "sleep-interval"; pub static ZERO_TERM: &str = "zero-terminated"; + pub static ARG_FILES: &str = "files"; } -static ARG_FILES: &str = "files"; - enum FilterMode { Bytes(usize), Lines(usize, u8), // (number of lines, delimiter) @@ -84,6 +83,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .long(options::BYTES) .takes_value(true) .allow_hyphen_values(true) + .overrides_with_all(&[options::BYTES, options::LINES]) .help("Number of bytes to print"), ) .arg( @@ -98,6 +98,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .long(options::LINES) .takes_value(true) .allow_hyphen_values(true) + .overrides_with_all(&[options::BYTES, options::LINES]) .help("Number of lines to print"), ) .arg( @@ -137,7 +138,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .help("Line delimiter is NUL, not newline"), ) .arg( - Arg::with_name(ARG_FILES) + Arg::with_name(options::ARG_FILES) .multiple(true) .takes_value(true) .min_values(1), @@ -171,38 +172,21 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } } - match matches.value_of(options::LINES) { - Some(n) => { - let mut slice: &str = n; - let c = slice.chars().next().unwrap_or('_'); - if c == '+' || c == '-' { - slice = &slice[1..]; - if c == '+' { - settings.beginning = true; - } - } - match parse_size(slice) { - Ok(m) => settings.mode = FilterMode::Lines(m, b'\n'), - Err(e) => crash!(1, "invalid number of bytes: {}", e.to_string()), - } + let mode_and_beginning = if let Some(arg) = matches.value_of(options::BYTES) { + match parse_num(arg) { + Ok((n, beginning)) => (FilterMode::Bytes(n), beginning), + Err(e) => crash!(1, "invalid number of bytes: {}", e.to_string()), } - None => { - if let Some(n) = matches.value_of(options::BYTES) { - let mut slice: &str = n; - let c = slice.chars().next().unwrap_or('_'); - if c == '+' || c == '-' { - slice = &slice[1..]; - if c == '+' { - settings.beginning = true; - } - } - match parse_size(slice) { - Ok(m) => settings.mode = FilterMode::Bytes(m), - Err(e) => crash!(1, "invalid number of bytes: {}", e.to_string()), - } - } + } else if let Some(arg) = matches.value_of(options::LINES) { + match parse_num(arg) { + Ok((n, beginning)) => (FilterMode::Lines(n, b'\n'), beginning), + Err(e) => crash!(1, "invalid number of lines: {}", e.to_string()), } + } else { + (FilterMode::Lines(10, b'\n'), false) }; + settings.mode = mode_and_beginning.0; + settings.beginning = mode_and_beginning.1; if matches.is_present(options::ZERO_TERM) { if let FilterMode::Lines(count, _) = settings.mode { @@ -215,7 +199,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { || matches.is_present(options::verbosity::SILENT); let files: Vec = matches - .values_of(ARG_FILES) + .values_of(options::ARG_FILES) .map(|v| v.map(ToString::to_string).collect()) .unwrap_or_default(); @@ -422,3 +406,25 @@ fn print_byte(stdout: &mut T, ch: u8) { crash!(1, "{}", err); } } + +fn parse_num(src: &str) -> Result<(usize, bool), ParseSizeError> { + let mut size_string = src.trim(); + let mut starting_with = false; + + if let Some(c) = size_string.chars().next() { + if c == '+' || c == '-' { + // tail: '-' is not documented (8.32 man pages) + size_string = &size_string[1..]; + if c == '+' { + starting_with = true; + } + } + } else { + return Err(ParseSizeError::ParseFailure(src.to_string())); + } + + match parse_size(&size_string) { + Ok(n) => Ok((n, starting_with)), + Err(e) => Err(e), + } +} diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 349fc05d3..f26447636 100755 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -267,3 +267,25 @@ fn test_head_invalid_num() { "head: invalid number of lines: ‘1Y’: Value too large to be stored in data type", ); } + +#[test] +fn test_head_num_with_undocumented_sign_bytes() { + // tail: '-' is not documented (8.32 man pages) + // head: '+' is not documented (8.32 man pages) + const ALPHABET: &str = "abcdefghijklmnopqrstuvwxyz"; + new_ucmd!() + .args(&["-c", "5"]) + .pipe_in(ALPHABET) + .succeeds() + .stdout_is("abcde"); + new_ucmd!() + .args(&["-c", "-5"]) + .pipe_in(ALPHABET) + .succeeds() + .stdout_is("abcdefghijklmnopqrstu"); + new_ucmd!() + .args(&["-c", "+5"]) + .pipe_in(ALPHABET) + .succeeds() + .stdout_is("abcde"); +} diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index 6227ac60b..9d0462c7a 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -352,3 +352,51 @@ fn test_positive_zero_lines() { .succeeds() .stdout_is("a\nb\nc\nd\ne\n"); } + +#[test] +fn test_tail_invalid_num() { + new_ucmd!() + .args(&["-c", "1024R", "emptyfile.txt"]) + .fails() + .stderr_is("tail: invalid number of bytes: ‘1024R’"); + new_ucmd!() + .args(&["-n", "1024R", "emptyfile.txt"]) + .fails() + .stderr_is("tail: invalid number of lines: ‘1024R’"); + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .args(&["-c", "1Y", "emptyfile.txt"]) + .fails() + .stderr_is( + "tail: invalid number of bytes: ‘1Y’: Value too large to be stored in data type", + ); + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .args(&["-n", "1Y", "emptyfile.txt"]) + .fails() + .stderr_is( + "tail: invalid number of lines: ‘1Y’: Value too large to be stored in data type", + ); +} + +#[test] +fn test_tail_num_with_undocumented_sign_bytes() { + // tail: '-' is not documented (8.32 man pages) + // head: '+' is not documented (8.32 man pages) + const ALPHABET: &str = "abcdefghijklmnopqrstuvwxyz"; + new_ucmd!() + .args(&["-c", "5"]) + .pipe_in(ALPHABET) + .succeeds() + .stdout_is("vwxyz"); + new_ucmd!() + .args(&["-c", "-5"]) + .pipe_in(ALPHABET) + .succeeds() + .stdout_is("vwxyz"); + new_ucmd!() + .args(&["-c", "+5"]) + .pipe_in(ALPHABET) + .succeeds() + .stdout_is("efghijklmnopqrstuvwxyz"); +} From a900c7421aaeef2bf4cb225409b3ccd6d8926a9f Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Tue, 1 Jun 2021 22:07:29 +0200 Subject: [PATCH 10/20] od: use "parse_size" from uucore --- src/uu/od/src/parse_nrofbytes.rs | 51 +++---------------------- src/uucore/src/lib/parser/parse_size.rs | 40 ++++++++++++++++++- 2 files changed, 44 insertions(+), 47 deletions(-) diff --git a/src/uu/od/src/parse_nrofbytes.rs b/src/uu/od/src/parse_nrofbytes.rs index d2ba1527b..9223d7e53 100644 --- a/src/uu/od/src/parse_nrofbytes.rs +++ b/src/uu/od/src/parse_nrofbytes.rs @@ -1,14 +1,18 @@ pub fn parse_number_of_bytes(s: &str) -> Result { let mut start = 0; let mut len = s.len(); - let mut radix = 10; + let mut radix = 16; let mut multiply = 1; if s.starts_with("0x") || s.starts_with("0X") { start = 2; - radix = 16; } else if s.starts_with('0') { radix = 8; + } else { + return match uucore::parse_size::parse_size(&s[start..]) { + Ok(n) => Ok(n), + Err(_) => Err("parse failed"), + }; } let mut ends_with = s.chars().rev(); @@ -75,22 +79,6 @@ fn parse_number_of_bytes_str(s: &str) -> Result { #[test] fn test_parse_number_of_bytes() { - // normal decimal numbers - assert_eq!(0, parse_number_of_bytes_str("0").unwrap()); - assert_eq!(5, parse_number_of_bytes_str("5").unwrap()); - assert_eq!(999, parse_number_of_bytes_str("999").unwrap()); - assert_eq!(2 * 512, parse_number_of_bytes_str("2b").unwrap()); - assert_eq!(2 * 1024, parse_number_of_bytes_str("2k").unwrap()); - assert_eq!(4 * 1024, parse_number_of_bytes_str("4K").unwrap()); - assert_eq!(2 * 1048576, parse_number_of_bytes_str("2m").unwrap()); - assert_eq!(4 * 1048576, parse_number_of_bytes_str("4M").unwrap()); - assert_eq!(1073741824, parse_number_of_bytes_str("1G").unwrap()); - assert_eq!(2000, parse_number_of_bytes_str("2kB").unwrap()); - assert_eq!(4000, parse_number_of_bytes_str("4KB").unwrap()); - assert_eq!(2000000, parse_number_of_bytes_str("2mB").unwrap()); - assert_eq!(4000000, parse_number_of_bytes_str("4MB").unwrap()); - assert_eq!(2000000000, parse_number_of_bytes_str("2GB").unwrap()); - // octal input assert_eq!(8, parse_number_of_bytes_str("010").unwrap()); assert_eq!(8 * 512, parse_number_of_bytes_str("010b").unwrap()); @@ -103,31 +91,4 @@ fn test_parse_number_of_bytes() { assert_eq!(27, parse_number_of_bytes_str("0x1b").unwrap()); assert_eq!(16 * 1024, parse_number_of_bytes_str("0x10k").unwrap()); assert_eq!(16 * 1048576, parse_number_of_bytes_str("0x10m").unwrap()); - - // invalid input - parse_number_of_bytes_str("").unwrap_err(); - parse_number_of_bytes_str("-1").unwrap_err(); - parse_number_of_bytes_str("1e2").unwrap_err(); - parse_number_of_bytes_str("xyz").unwrap_err(); - parse_number_of_bytes_str("b").unwrap_err(); - parse_number_of_bytes_str("1Y").unwrap_err(); - parse_number_of_bytes_str("∞").unwrap_err(); -} - -#[test] -#[cfg(target_pointer_width = "64")] -fn test_parse_number_of_bytes_64bits() { - assert_eq!(1099511627776, parse_number_of_bytes_str("1T").unwrap()); - assert_eq!(1125899906842624, parse_number_of_bytes_str("1P").unwrap()); - assert_eq!( - 1152921504606846976, - parse_number_of_bytes_str("1E").unwrap() - ); - - assert_eq!(2000000000000, parse_number_of_bytes_str("2TB").unwrap()); - assert_eq!(2000000000000000, parse_number_of_bytes_str("2PB").unwrap()); - assert_eq!( - 2000000000000000000, - parse_number_of_bytes_str("2EB").unwrap() - ); } diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index 4339cd7fb..8b3e0bf03 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -52,7 +52,7 @@ pub fn parse_size(size: &str) -> Result { let unit = &size[numeric_string.len()..]; let (base, exponent): (u128, u32) = match unit { "" => (1, 0), - "b" => (512, 1), // (`head` and `tail` use "b") + "b" => (512, 1), // (`od`, `head` and `tail` use "b") "KiB" | "kiB" | "K" | "k" => (1024, 1), "MiB" | "miB" | "M" | "m" => (1024, 2), "GiB" | "giB" | "G" | "g" => (1024, 3), @@ -210,7 +210,18 @@ mod tests { #[test] fn invalid_syntax() { - let test_strings = ["328hdsf3290", "5MiB nonsense", "5mib", "biB", "-", ""]; + let test_strings = [ + "328hdsf3290", + "5MiB nonsense", + "5mib", + "biB", + "-", + "+", + "", + "-1", + "1e2", + "∞", + ]; for &test_string in &test_strings { assert_eq!( parse_size(test_string).unwrap_err(), @@ -228,6 +239,8 @@ mod tests { fn no_suffix() { assert_eq!(Ok(1234), parse_size("1234")); assert_eq!(Ok(0), parse_size("0")); + assert_eq!(Ok(5), parse_size("5")); + assert_eq!(Ok(999), parse_size("999")); } #[test] @@ -239,6 +252,8 @@ mod tests { assert_eq!(Ok(0), parse_size("0KB")); assert_eq!(Ok(1000), parse_size("KB")); assert_eq!(Ok(1024), parse_size("K")); + assert_eq!(Ok(2000), parse_size("2kB")); + assert_eq!(Ok(4000), parse_size("4KB")); } #[test] @@ -247,5 +262,26 @@ mod tests { assert_eq!(Ok(123 * 1000 * 1000), parse_size("123MB")); assert_eq!(Ok(1024 * 1024), parse_size("M")); assert_eq!(Ok(1000 * 1000), parse_size("MB")); + assert_eq!(Ok(2 * 1_048_576), parse_size("2m")); + assert_eq!(Ok(4 * 1_048_576), parse_size("4M")); + assert_eq!(Ok(2_000_000), parse_size("2mB")); + assert_eq!(Ok(4_000_000), parse_size("4MB")); + } + + #[test] + fn gigabytes_suffix() { + assert_eq!(Ok(1_073_741_824), parse_size("1G")); + assert_eq!(Ok(2_000_000_000), parse_size("2GB")); + } + + #[test] + #[cfg(target_pointer_width = "64")] + fn x64() { + assert_eq!(Ok(1_099_511_627_776), parse_size("1T")); + assert_eq!(Ok(1_125_899_906_842_624), parse_size("1P")); + assert_eq!(Ok(1_152_921_504_606_846_976), parse_size("1E")); + assert_eq!(Ok(2_000_000_000_000), parse_size("2TB")); + assert_eq!(Ok(2_000_000_000_000_000), parse_size("2PB")); + assert_eq!(Ok(2_000_000_000_000_000_000), parse_size("2EB")); } } From 6b8de1dd8bfd3935467c5c2679c3b2bf0a6478cf Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Wed, 2 Jun 2021 04:16:41 +0200 Subject: [PATCH 11/20] sort: use "parse_size" from uucore * make parsing of SIZE argument consistent with GNU's behavior * add error handling * add tests --- src/uu/sort/src/sort.rs | 100 +++++++++++++++++++++++++++---------- tests/by-util/test_sort.rs | 45 +++++++++++++++-- 2 files changed, 115 insertions(+), 30 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index ab3b06451..208010d09 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -43,6 +43,7 @@ use std::ops::Range; use std::path::Path; use std::path::PathBuf; use unicode_width::UnicodeWidthStr; +use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::InvalidEncodingHandling; static NAME: &str = "sort"; @@ -159,32 +160,31 @@ pub struct GlobalSettings { } impl GlobalSettings { - /// Interpret this `&str` as a number with an optional trailing si unit. - /// - /// If there is no trailing si unit, the implicit unit is K. - /// The suffix B causes the number to be interpreted as a byte count. - fn parse_byte_count(input: &str) -> usize { - const SI_UNITS: &[char] = &['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']; + /// Parse a SIZE string into a number of bytes. + /// A size string comprises an integer and an optional unit. + /// The unit may be k, K, m, M, g, G, t, T, P, E, Z, Y (powers of 1024), or b which is 1. + /// Default is K. + fn parse_byte_count(input: &str) -> Result { + // GNU sort (8.32) valid: 1b, k, K, m, M, g, G, t, T, P, E, Z, Y + // GNU sort (8.32) invalid: b, B, 1B, p, e, z, y + const ALLOW_LIST: &[char] = &[ + 'b', 'k', 'K', 'm', 'M', 'g', 'G', 't', 'T', 'P', 'E', 'Z', 'Y', + ]; + let mut size_string = input.trim().to_string(); - let input = input.trim(); - - let (num_str, si_unit) = - if input.ends_with(|c: char| SI_UNITS.contains(&c.to_ascii_uppercase())) { - let mut chars = input.chars(); - let si_suffix = chars.next_back().unwrap().to_ascii_uppercase(); - let si_unit = SI_UNITS.iter().position(|&c| c == si_suffix).unwrap(); - let num_str = chars.as_str(); - (num_str, si_unit) - } else { - (input, 1) - }; - - let num_usize: usize = num_str - .trim() - .parse() - .unwrap_or_else(|e| crash!(1, "failed to parse buffer size `{}`: {}", num_str, e)); - - num_usize.saturating_mul(1000usize.saturating_pow(si_unit as u32)) + if size_string.ends_with(|c: char| ALLOW_LIST.contains(&c)) + || size_string.ends_with(|c: char| c.is_digit(10)) + { + // b 1, K 1024 (default) + if size_string.ends_with(|c: char| c.is_digit(10)) { + size_string.push('K'); + } else if size_string.ends_with('b') { + size_string.pop(); + } + parse_size(&size_string) + } else { + Err(ParseSizeError::ParseFailure("invalid suffix".to_string())) + } } fn out_writer(&self) -> BufWriter> { @@ -1148,7 +1148,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.buffer_size = matches .value_of(OPT_BUF_SIZE) - .map(GlobalSettings::parse_byte_count) + .map(|v| match GlobalSettings::parse_byte_count(v) { + Ok(n) => n, + Err(ParseSizeError::ParseFailure(_)) => crash!(2, "invalid -S argument '{}'", v), + Err(ParseSizeError::SizeTooBig(_)) => crash!(2, "-S argument '{}' too large", v), + }) .unwrap_or(DEFAULT_BUF_SIZE); settings.tmp_dir = matches @@ -1640,4 +1644,48 @@ mod tests { // How big is a selection? Constant cost all lines pay when we need selections. assert_eq!(std::mem::size_of::(), 24); } + + #[test] + fn test_parse_byte_count() { + let valid_input = [ + ("0", 0), + ("50K", 50 * 1024), + ("50k", 50 * 1024), + ("1M", 1024 * 1024), + ("100M", 100 * 1024 * 1024), + #[cfg(not(target_pointer_width = "32"))] + ("1000G", 1000 * 1024 * 1024 * 1024), + #[cfg(not(target_pointer_width = "32"))] + ("10T", 10 * 1024 * 1024 * 1024 * 1024), + ("1b", 1), + ("1024b", 1024), + ("1024Mb", 1024 * 1024 * 1024), // TODO: This might not be what GNU `sort` does? + ("1", 1024), // K is default + ("50", 50 * 1024), + ("K", 1024), + ("k", 1024), + ("m", 1024 * 1024), + #[cfg(not(target_pointer_width = "32"))] + ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ]; + for (input, expected_output) in &valid_input { + assert_eq!( + GlobalSettings::parse_byte_count(input), + Ok(*expected_output) + ); + } + + // SizeTooBig + let invalid_input = ["500E", "1Y"]; + for input in &invalid_input { + #[cfg(not(target_pointer_width = "128"))] + assert!(GlobalSettings::parse_byte_count(input).is_err()); + } + + // ParseFailure + let invalid_input = ["nonsense", "1B", "B", "b", "p", "e", "z", "y"]; + for input in &invalid_input { + assert!(GlobalSettings::parse_byte_count(input).is_err()); + } + } } diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index d2b447925..054789edf 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -21,9 +21,7 @@ fn test_helper(file_name: &str, possible_args: &[&str]) { #[test] fn test_buffer_sizes() { - let buffer_sizes = [ - "0", "50K", "50k", "1M", "100M", "1000G", "10T", "500E", "1Y", - ]; + let buffer_sizes = ["0", "50K", "50k", "1M", "100M"]; for buffer_size in &buffer_sizes { new_ucmd!() .arg("-n") @@ -32,6 +30,20 @@ fn test_buffer_sizes() { .arg("ext_sort.txt") .succeeds() .stdout_is_fixture("ext_sort.expected"); + + #[cfg(not(target_pointer_width = "32"))] + { + let buffer_sizes = ["1000G", "10T"]; + for buffer_size in &buffer_sizes { + new_ucmd!() + .arg("-n") + .arg("-S") + .arg(buffer_size) + .arg("ext_sort.txt") + .succeeds() + .stdout_is_fixture("ext_sort.expected"); + } + } } } @@ -43,11 +55,36 @@ fn test_invalid_buffer_size() { .arg("-S") .arg(invalid_buffer_size) .fails() + .code_is(2) .stderr_only(format!( - "sort: failed to parse buffer size `{}`: invalid digit found in string", + "sort: invalid -S argument '{}'", invalid_buffer_size )); } + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .arg("-n") + .arg("-S") + .arg("1Y") + .arg("ext_sort.txt") + .fails() + .code_is(2) + .stderr_only("sort: -S argument '1Y' too large"); + + #[cfg(target_pointer_width = "32")] + { + let buffer_sizes = ["1000G", "10T"]; + for buffer_size in &buffer_sizes { + new_ucmd!() + .arg("-n") + .arg("-S") + .arg(buffer_size) + .arg("ext_sort.txt") + .fails() + .code_is(2) + .stderr_only(format!("sort: -S argument '{}' too large", buffer_size)); + } + } } #[test] From 2f5f7c6fa1f79a3917300e62555e812be3faa87b Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Wed, 2 Jun 2021 18:37:21 +0200 Subject: [PATCH 12/20] split: use "parse_size" from uucore * make stderr of parsing SIZE/NUMBER argument consistent with GNU's behavior * add error handling * add tests --- src/uu/split/src/split.rs | 56 +++++++++---------------- src/uucore/src/lib/parser/parse_size.rs | 4 +- tests/by-util/test_split.rs | 46 ++++++++++++++++++++ 3 files changed, 69 insertions(+), 37 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 85ed5f183..1fe35795e 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -13,11 +13,13 @@ extern crate uucore; mod platform; use clap::{App, Arg}; +use std::convert::TryFrom; use std::env; use std::fs::File; use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write}; use std::path::Path; use std::{char, fs::remove_file}; +use uucore::parse_size::{parse_size, ParseSizeError}; static NAME: &str = "split"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -232,10 +234,9 @@ struct LineSplitter { impl LineSplitter { fn new(settings: &Settings) -> LineSplitter { LineSplitter { - lines_per_split: settings - .strategy_param - .parse() - .unwrap_or_else(|e| crash!(1, "invalid number of lines: {}", e)), + lines_per_split: settings.strategy_param.parse().unwrap_or_else(|_| { + crash!(1, "invalid number of lines: ‘{}’", settings.strategy_param) + }), } } } @@ -277,40 +278,23 @@ struct ByteSplitter { impl ByteSplitter { fn new(settings: &Settings) -> ByteSplitter { - // These multipliers are the same as supported by GNU coreutils. - let modifiers: Vec<(&str, u128)> = vec![ - ("K", 1024u128), - ("M", 1024 * 1024), - ("G", 1024 * 1024 * 1024), - ("T", 1024 * 1024 * 1024 * 1024), - ("P", 1024 * 1024 * 1024 * 1024 * 1024), - ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024), - ("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), - ("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), - ("KB", 1000), - ("MB", 1000 * 1000), - ("GB", 1000 * 1000 * 1000), - ("TB", 1000 * 1000 * 1000 * 1000), - ("PB", 1000 * 1000 * 1000 * 1000 * 1000), - ("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000), - ("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), - ("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), - ]; - - // This sequential find is acceptable since none of the modifiers are - // suffixes of any other modifiers, a la Huffman codes. - let (suffix, multiplier) = modifiers - .iter() - .find(|(suffix, _)| settings.strategy_param.ends_with(suffix)) - .unwrap_or(&("", 1)); - - // Try to parse the actual numeral. - let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())] - .parse::() - .unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e)); + let size_string = &settings.strategy_param; + let size_num = match parse_size(&size_string) { + Ok(n) => n, + Err(e) => match e { + ParseSizeError::ParseFailure(_) => { + crash!(1, "invalid number of bytes: {}", e.to_string()) + } + ParseSizeError::SizeTooBig(_) => crash!( + 1, + "invalid number of bytes: ‘{}’: Value too large for defined data type", + size_string + ), + }, + }; ByteSplitter { - bytes_per_split: n * multiplier, + bytes_per_split: u128::try_from(size_num).unwrap(), } } } diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index 8b3e0bf03..4a31d753a 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -117,7 +117,9 @@ impl ParseSizeError { fn size_too_big(s: &str) -> ParseSizeError { // has to be handled in the respective uutils because strings differ, e.g. // truncate: Invalid number: ‘1Y’: Value too large to be stored in data type - // tail: invalid number of bytes: ‘1Y’: Value too large to be stored in data type + // tail: invalid number of bytes: ‘1Y’: Value too large to be stored in data type + // split: invalid number of bytes: ‘1Y’: Value too large for defined data type + // etc. ParseSizeError::SizeTooBig(format!( "‘{}’: Value too large to be stored in data type", s diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 85b28326b..1727072ca 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -285,3 +285,49 @@ fn test_filter_command_fails() { ucmd.args(&["--filter=/a/path/that/totally/does/not/exist", name]) .fails(); } + +#[test] +fn test_split_lines_number() { + // Test if stdout/stderr for '--lines' option is correct + new_ucmd!() + .args(&["--lines", "2"]) + .pipe_in("abcde") + .succeeds() + .no_stderr() + .no_stdout(); + new_ucmd!() + .args(&["--lines", "2fb"]) + .pipe_in("abcde") + .fails() + .code_is(1) + .stderr_only("split: invalid number of lines: ‘2fb’"); +} + +#[test] +fn test_split_invalid_bytes_size() { + new_ucmd!() + .args(&["-b", "1024R"]) + .fails() + .code_is(1) + .stderr_only("split: invalid number of bytes: ‘1024R’"); + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .args(&["-b", "1Y"]) + .fails() + .code_is(1) + .stderr_only("split: invalid number of bytes: ‘1Y’: Value too large for defined data type"); + #[cfg(target_pointer_width = "32")] + { + let sizes = ["1000G", "10T"]; + for size in &sizes { + new_ucmd!() + .args(&["-b", size]) + .fails() + .code_is(1) + .stderr_only(format!( + "split: invalid number of bytes: ‘{}’: Value too large for defined data type", + size + )); + } + } +} From 5898b99627ebb5fac6e14d381abc907879d33ee2 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Wed, 2 Jun 2021 22:08:42 +0200 Subject: [PATCH 13/20] truncate: add error handling for SIZE argument * add tests for SIZE argument * fix clap argument handling for --size and --reference --- src/uu/truncate/src/truncate.rs | 22 +++++++-------- tests/by-util/test_truncate.rs | 48 ++++++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 15 deletions(-) diff --git a/src/uu/truncate/src/truncate.rs b/src/uu/truncate/src/truncate.rs index 79aa4f3f1..57cc280c3 100644 --- a/src/uu/truncate/src/truncate.rs +++ b/src/uu/truncate/src/truncate.rs @@ -61,10 +61,9 @@ pub mod options { pub static NO_CREATE: &str = "no-create"; pub static REFERENCE: &str = "reference"; pub static SIZE: &str = "size"; + pub static ARG_FILES: &str = "files"; } -static ARG_FILES: &str = "files"; - fn get_usage() -> String { format!("{0} [OPTION]... [FILE]...", executable!()) } @@ -116,21 +115,23 @@ pub fn uumain(args: impl uucore::Args) -> i32 { Arg::with_name(options::REFERENCE) .short("r") .long(options::REFERENCE) + .required_unless(options::SIZE) .help("base the size of each file on the size of RFILE") .value_name("RFILE") ) .arg( Arg::with_name(options::SIZE) .short("s") - .long("size") + .long(options::SIZE) + .required_unless(options::REFERENCE) .help("set or adjust the size of each file according to SIZE, which is in bytes unless --io-blocks is specified") .value_name("SIZE") ) - .arg(Arg::with_name(ARG_FILES).multiple(true).takes_value(true).min_values(1)) + .arg(Arg::with_name(options::ARG_FILES).value_name("FILE").multiple(true).takes_value(true).required(true).min_values(1)) .get_matches_from(args); let files: Vec = matches - .values_of(ARG_FILES) + .values_of(options::ARG_FILES) .map(|v| v.map(ToString::to_string).collect()) .unwrap_or_default(); @@ -152,8 +153,8 @@ pub fn uumain(args: impl uucore::Args) -> i32 { crash!( 1, "cannot stat '{}': No such file or directory", - reference.unwrap() - ); + reference.unwrap_or("".to_string()) + ); // TODO: fix '--no-create' see test_reference and test_truncate_bytes_size } _ => crash!(1, "{}", e.to_string()), } @@ -209,8 +210,7 @@ fn truncate_reference_and_size( } _ => m, }, - // TODO: handle errors ParseFailure(String)/SizeTooBig(String) - Err(_) => crash!(1, "Invalid number: ‘{}’", size_string), + Err(e) => crash!(1, "Invalid number: {}", e.to_string()), }; let fsize = usize::try_from(metadata(rfilename)?.len()).unwrap(); let tsize = mode.to_size(fsize); @@ -265,7 +265,7 @@ fn truncate_size_only( ) -> std::io::Result<()> { let mode = match parse_mode_and_size(size_string) { Ok(m) => m, - Err(_) => crash!(1, "Invalid number: ‘{}’", size_string), + Err(e) => crash!(1, "Invalid number: {}", e.to_string()), }; for filename in &filenames { let fsize = usize::try_from(metadata(filename)?.len()).unwrap(); @@ -294,7 +294,7 @@ fn truncate( } (Some(rfilename), None) => truncate_reference_file_only(&rfilename, filenames, create), (None, Some(size_string)) => truncate_size_only(&size_string, filenames, create), - (None, None) => crash!(1, "you must specify either --reference or --size"), + (None, None) => crash!(1, "you must specify either --reference or --size"), // this case cannot happen anymore because it's handled by clap } } diff --git a/tests/by-util/test_truncate.rs b/tests/by-util/test_truncate.rs index 5c3f169a1..72f7f780b 100644 --- a/tests/by-util/test_truncate.rs +++ b/tests/by-util/test_truncate.rs @@ -48,7 +48,7 @@ fn test_reference() { // manpage: "A FILE argument that does not exist is created." // TODO: 'truncate' does not create the file in this case, // but should because '--no-create' wasn't specified. - at.touch(FILE1); // TODO: remove this when fixed + at.touch(FILE1); // TODO: remove this when 'no-create' is fixed scene.ucmd().arg("-s").arg("+5KB").arg(FILE1).succeeds(); scene @@ -240,11 +240,18 @@ fn test_size_and_reference() { ); } +#[test] +fn test_error_filename_only() { + // truncate: you must specify either ‘--size’ or ‘--reference’ + new_ucmd!().args(&["file"]).fails().stderr_contains( + "error: The following required arguments were not provided: + --reference + --size ", + ); +} + #[test] fn test_invalid_numbers() { - // TODO For compatibility with GNU, `truncate -s 0X` should cause - // the same error as `truncate -s 0X file`, but currently it returns - // a different error. new_ucmd!() .args(&["-s", "0X", "file"]) .fails() @@ -274,3 +281,36 @@ fn test_reference_with_size_file_not_found() { .fails() .stderr_contains("cannot stat 'a': No such file or directory"); } + +#[test] +fn test_truncate_bytes_size() { + // TODO: this should succeed without error, uncomment when '--no-create' is fixed + // new_ucmd!() + // .args(&["--no-create", "--size", "K", "file"]) + // .succeeds(); + new_ucmd!() + .args(&["--size", "1024R", "file"]) + .fails() + .code_is(1) + .stderr_only("truncate: Invalid number: ‘1024R’"); + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .args(&["--size", "1Y", "file"]) + .fails() + .code_is(1) + .stderr_only("truncate: Invalid number: ‘1Y’: Value too large for defined data type"); + #[cfg(target_pointer_width = "32")] + { + let sizes = ["1000G", "10T"]; + for size in &sizes { + new_ucmd!() + .args(&["--size", size, "file"]) + .fails() + .code_is(1) + .stderr_only(format!( + "truncate: Invalid number: ‘{}’: Value too large for defined data type", + size + )); + } + } +} From ad26b7a042cabaf50b7b2ba93e980b050dc0bb59 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Thu, 3 Jun 2021 20:37:29 +0200 Subject: [PATCH 14/20] head/tail/split: make error handling of NUM/SIZE arguments more consistent * add tests for each flag that takes NUM/SIZE arguments * fix bug in tail where 'quiet' and 'verbose' flags did not override each other POSIX style --- src/uu/head/src/head.rs | 2 +- src/uu/head/src/parse.rs | 5 +- src/uu/split/src/split.rs | 13 +---- src/uu/tail/src/tail.rs | 18 +++---- src/uucore/src/lib/parser/parse_size.rs | 63 +++++++++++++++++++------ tests/by-util/test_head.rs | 23 ++++++--- tests/by-util/test_tail.rs | 25 ++++++---- 7 files changed, 93 insertions(+), 56 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index d67cc5b07..80f816d0f 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -76,7 +76,7 @@ fn app<'a>() -> App<'a, 'a> { .arg( Arg::with_name(options::QUIET_NAME) .short("q") - .long("--quiet") + .long("quiet") .visible_alias("silent") .help("never print headers giving file names") .overrides_with_all(&[options::VERBOSE_NAME, options::QUIET_NAME]), diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs index 50cdfc439..6631dba0e 100644 --- a/src/uu/head/src/parse.rs +++ b/src/uu/head/src/parse.rs @@ -108,10 +108,7 @@ pub fn parse_num(src: &str) -> Result<(usize, bool), ParseSizeError> { return Err(ParseSizeError::ParseFailure(src.to_string())); } - match parse_size(&size_string) { - Ok(n) => Ok((n, all_but_last)), - Err(e) => Err(e), - } + parse_size(&size_string).map(|n| (n, all_but_last)) } #[cfg(test)] diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 1fe35795e..ca9c68c74 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -19,7 +19,7 @@ use std::fs::File; use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write}; use std::path::Path; use std::{char, fs::remove_file}; -use uucore::parse_size::{parse_size, ParseSizeError}; +use uucore::parse_size::parse_size; static NAME: &str = "split"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -281,16 +281,7 @@ impl ByteSplitter { let size_string = &settings.strategy_param; let size_num = match parse_size(&size_string) { Ok(n) => n, - Err(e) => match e { - ParseSizeError::ParseFailure(_) => { - crash!(1, "invalid number of bytes: {}", e.to_string()) - } - ParseSizeError::SizeTooBig(_) => crash!( - 1, - "invalid number of bytes: ‘{}’: Value too large for defined data type", - size_string - ), - }, + Err(e) => crash!(1, "invalid number of bytes: {}", e.to_string()), }; ByteSplitter { diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index acaad8c30..76c799621 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -33,7 +33,6 @@ use uucore::ringbuffer::RingBuffer; pub mod options { pub mod verbosity { pub static QUIET: &str = "quiet"; - pub static SILENT: &str = "silent"; pub static VERBOSE: &str = "verbose"; } pub static BYTES: &str = "bytes"; @@ -77,6 +76,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let app = App::new(executable!()) .version(crate_version!()) .about("output the last part of files") + // TODO: add usage .arg( Arg::with_name(options::BYTES) .short("c") @@ -111,13 +111,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 { Arg::with_name(options::verbosity::QUIET) .short("q") .long(options::verbosity::QUIET) + .visible_alias("silent") + .overrides_with_all(&[options::verbosity::QUIET, options::verbosity::VERBOSE]) .help("never output headers giving file names"), ) - .arg( - Arg::with_name(options::verbosity::SILENT) - .long(options::verbosity::SILENT) - .help("synonym of --quiet"), - ) .arg( Arg::with_name(options::SLEEP_INT) .short("s") @@ -129,6 +126,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { Arg::with_name(options::verbosity::VERBOSE) .short("v") .long(options::verbosity::VERBOSE) + .overrides_with_all(&[options::verbosity::QUIET, options::verbosity::VERBOSE]) .help("always output headers giving file names"), ) .arg( @@ -195,8 +193,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } let verbose = matches.is_present(options::verbosity::VERBOSE); - let quiet = matches.is_present(options::verbosity::QUIET) - || matches.is_present(options::verbosity::SILENT); + let quiet = matches.is_present(options::verbosity::QUIET); let files: Vec = matches .values_of(options::ARG_FILES) @@ -423,8 +420,5 @@ fn parse_num(src: &str) -> Result<(usize, bool), ParseSizeError> { return Err(ParseSizeError::ParseFailure(src.to_string())); } - match parse_size(&size_string) { - Ok(n) => Ok((n, starting_with)), - Err(e) => Err(e), - } + parse_size(&size_string).map(|n| (n, starting_with)) } diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index 4a31d753a..c7825d4e3 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -75,10 +75,9 @@ pub fn parse_size(size: &str) -> Result { Ok(n) => n, Err(_) => return Err(ParseSizeError::size_too_big(size)), }; - match number.checked_mul(factor) { - Some(n) => Ok(n), - None => Err(ParseSizeError::size_too_big(size)), - } + number + .checked_mul(factor) + .ok_or(ParseSizeError::size_too_big(size)) } #[derive(Debug, PartialEq, Eq)] @@ -108,22 +107,58 @@ impl fmt::Display for ParseSizeError { impl ParseSizeError { fn parse_failure(s: &str) -> ParseSizeError { - // has to be handled in the respective uutils because strings differ, e.g. - // truncate: Invalid number: ‘fb’ - // tail: invalid number of bytes: ‘fb’ + // stderr on linux (GNU coreutils 8.32) + // has to be handled in the respective uutils because strings differ, e.g.: + // + // `NUM` + // head: invalid number of bytes: ‘1fb’ + // tail: invalid number of bytes: ‘1fb’ + // + // `SIZE` + // split: invalid number of bytes: ‘1fb’ + // truncate: Invalid number: ‘1fb’ + // + // `MODE` + // stdbuf: invalid mode ‘1fb’ + // + // `SIZE` + // sort: invalid suffix in --buffer-size argument '1fb' + // sort: invalid --buffer-size argument 'fb' + // + // `SIZE` + // du: invalid suffix in --buffer-size argument '1fb' + // du: invalid suffix in --threshold argument '1fb' + // du: invalid --buffer-size argument 'fb' + // du: invalid --threshold argument 'fb' + // + // `BYTES` + // od: invalid suffix in --read-bytes argument '1fb' + // od: invalid --read-bytes argument argument 'fb' + // --skip-bytes + // --width + // --strings + // etc. ParseSizeError::ParseFailure(format!("‘{}’", s)) } fn size_too_big(s: &str) -> ParseSizeError { - // has to be handled in the respective uutils because strings differ, e.g. - // truncate: Invalid number: ‘1Y’: Value too large to be stored in data type - // tail: invalid number of bytes: ‘1Y’: Value too large to be stored in data type + // stderr on linux (GNU coreutils 8.32) + // has to be handled in the respective uutils because strings differ, e.g.: + // + // head: invalid number of bytes: ‘1Y’: Value too large for defined data type + // tail: invalid number of bytes: ‘1Y’: Value too large for defined data type // split: invalid number of bytes: ‘1Y’: Value too large for defined data type + // truncate: Invalid number: ‘1Y’: Value too large for defined data type + // stdbuf: invalid mode ‘1Y’: Value too large for defined data type + // sort: -S argument '1Y' too large + // du: -B argument '1Y' too large + // od: -N argument '1Y' too large // etc. - ParseSizeError::SizeTooBig(format!( - "‘{}’: Value too large to be stored in data type", - s - )) + // + // stderr on macos (brew - GNU coreutils 8.32) also differs for the same version, e.g.: + // ghead: invalid number of bytes: ‘1Y’: Value too large to be stored in data type + // gtail: invalid number of bytes: ‘1Y’: Value too large to be stored in data type + ParseSizeError::SizeTooBig(format!("‘{}’: Value too large for defined data type", s)) } } diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 99e6518fa..6a2cdf1cd 100755 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -244,6 +244,7 @@ hello ", ); } + #[test] fn test_head_invalid_num() { new_ucmd!() @@ -258,16 +259,26 @@ fn test_head_invalid_num() { new_ucmd!() .args(&["-c", "1Y", "emptyfile.txt"]) .fails() - .stderr_is( - "head: invalid number of bytes: ‘1Y’: Value too large to be stored in data type", - ); + .stderr_is("head: invalid number of bytes: ‘1Y’: Value too large for defined data type"); #[cfg(not(target_pointer_width = "128"))] new_ucmd!() .args(&["-n", "1Y", "emptyfile.txt"]) .fails() - .stderr_is( - "head: invalid number of lines: ‘1Y’: Value too large to be stored in data type", - ); + .stderr_is("head: invalid number of lines: ‘1Y’: Value too large for defined data type"); + #[cfg(target_pointer_width = "32")] + { + let sizes = ["1000G", "10T"]; + for size in &sizes { + new_ucmd!() + .args(&["-c", size]) + .fails() + .code_is(1) + .stderr_only(format!( + "head: invalid number of bytes: ‘{}’: Value too large for defined data type", + size + )); + } + } } #[test] diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index 43e4aaa0c..c296e2763 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -284,12 +284,11 @@ fn test_multiple_input_files_with_suppressed_headers() { #[test] fn test_multiple_input_quiet_flag_overrides_verbose_flag_for_suppressing_headers() { - // TODO: actually the later one should win, i.e. -qv should lead to headers being printed, -vq to them being suppressed new_ucmd!() .arg(FOOBAR_TXT) .arg(FOOBAR_2_TXT) - .arg("-q") .arg("-v") + .arg("-q") .run() .stdout_is_fixture("foobar_multiple_quiet.expected"); } @@ -367,16 +366,26 @@ fn test_tail_invalid_num() { new_ucmd!() .args(&["-c", "1Y", "emptyfile.txt"]) .fails() - .stderr_is( - "tail: invalid number of bytes: ‘1Y’: Value too large to be stored in data type", - ); + .stderr_is("tail: invalid number of bytes: ‘1Y’: Value too large for defined data type"); #[cfg(not(target_pointer_width = "128"))] new_ucmd!() .args(&["-n", "1Y", "emptyfile.txt"]) .fails() - .stderr_is( - "tail: invalid number of lines: ‘1Y’: Value too large to be stored in data type", - ); + .stderr_is("tail: invalid number of lines: ‘1Y’: Value too large for defined data type"); + #[cfg(target_pointer_width = "32")] + { + let sizes = ["1000G", "10T"]; + for size in &sizes { + new_ucmd!() + .args(&["-c", size]) + .fails() + .code_is(1) + .stderr_only(format!( + "tail: invalid number of bytes: ‘{}’: Value too large for defined data type", + size + )); + } + } } #[test] From db3ee61742138e7bc1dd14bb3a1bd67db418f5a5 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Thu, 3 Jun 2021 21:00:03 +0200 Subject: [PATCH 15/20] du/sort/od/stdbuf: make error handling of SIZE/BYTES/MODE arguments more consistent * od: add stderr info for not yet implemented '--strings' flag --- src/uu/du/src/du.rs | 31 ++++++++------- src/uu/od/src/od.rs | 65 +++++++++++++++++++------------- src/uu/od/src/parse_nrofbytes.rs | 45 +++++++++++----------- src/uu/sort/src/sort.rs | 24 ++++++++---- src/uu/stdbuf/src/stdbuf.rs | 11 ++---- tests/by-util/test_du.rs | 2 +- tests/by-util/test_od.rs | 34 +++++++++++++++++ tests/by-util/test_sort.rs | 9 +++-- tests/by-util/test_stdbuf.rs | 25 ++++++------ 9 files changed, 153 insertions(+), 93 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 5dfb41d82..170e057b5 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -42,7 +42,7 @@ mod options { pub const NULL: &str = "0"; pub const ALL: &str = "all"; pub const APPARENT_SIZE: &str = "apparent-size"; - pub const BLOCK_SIZE: &str = "B"; + pub const BLOCK_SIZE: &str = "block-size"; pub const BYTES: &str = "b"; pub const TOTAL: &str = "c"; pub const MAX_DEPTH: &str = "d"; @@ -211,18 +211,11 @@ fn get_file_info(path: &PathBuf) -> Option { } fn read_block_size(s: Option<&str>) -> usize { - if let Some(size_arg) = s { - match parse_size(size_arg) { - Ok(v) => v, - Err(e) => match e { - ParseSizeError::ParseFailure(_) => { - crash!(1, "invalid suffix in --block-size argument '{}'", size_arg) - } - ParseSizeError::SizeTooBig(_) => { - crash!(1, "--block-size argument '{}' too large", size_arg) - } - }, - } + if let Some(s) = s { + parse_size(s).map_or_else( + |e| crash!(1, "{}", format_error_message(e, s, options::BLOCK_SIZE)), + |n| n, + ) } else { for env_var in &["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] { if let Ok(env_size) = env::var(env_var) { @@ -389,7 +382,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .arg( Arg::with_name(options::BLOCK_SIZE) .short("B") - .long("block-size") + .long(options::BLOCK_SIZE) .value_name("SIZE") .help( "scale sizes by SIZE before printing them. \ @@ -694,6 +687,16 @@ Try '{} --help' for more information.", 0 } +fn format_error_message(error: ParseSizeError, s: &str, option: &str) -> String { + // NOTE: + // GNU's du echos affected flag, -B or --block-size (-t or --threshold), depending user's selection + // GNU's du distinguishs between "invalid (suffix in) argument" + match error { + ParseSizeError::ParseFailure(_) => format!("invalid --{} argument '{}'", option, s), + ParseSizeError::SizeTooBig(_) => format!("--{} argument '{}' too large", option, s), + } +} + #[cfg(test)] mod test_du { #[allow(unused_imports)] diff --git a/src/uu/od/src/od.rs b/src/uu/od/src/od.rs index 7359047b2..9b98fd515 100644 --- a/src/uu/od/src/od.rs +++ b/src/uu/od/src/od.rs @@ -43,6 +43,7 @@ use crate::partialreader::*; use crate::peekreader::*; use crate::prn_char::format_ascii_dump; use clap::{self, AppSettings, Arg, ArgMatches}; +use uucore::parse_size::ParseSizeError; use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -130,15 +131,16 @@ impl OdOptions { } }; - let mut skip_bytes = match matches.value_of(options::SKIP_BYTES) { - None => 0, - Some(s) => match parse_number_of_bytes(&s) { - Ok(i) => i, - Err(_) => { - return Err(format!("Invalid argument --skip-bytes={}", s)); - } - }, - }; + if matches.occurrences_of(options::STRINGS) > 0 { + crash!(1, "Option '{}' not yet implemented.", options::STRINGS); + } + + let mut skip_bytes = matches.value_of(options::SKIP_BYTES).map_or(0, |s| { + parse_number_of_bytes(s).map_or_else( + |e| crash!(1, "{}", format_error_message(e, s, options::SKIP_BYTES)), + |n| n, + ) + }); let mut label: Option = None; @@ -161,11 +163,16 @@ impl OdOptions { } }; - let mut line_bytes = match matches.value_of(options::WIDTH) { - None => 16, - Some(_) if matches.occurrences_of(options::WIDTH) == 0 => 16, - Some(s) => s.parse::().unwrap_or(0), - }; + let mut line_bytes = matches.value_of(options::WIDTH).map_or(16, |s| { + if matches.occurrences_of(options::WIDTH) == 0 { + return 16; + }; + parse_number_of_bytes(s).map_or_else( + |e| crash!(1, "{}", format_error_message(e, s, options::WIDTH)), + |n| n, + ) + }); + let min_bytes = formats.iter().fold(1, |max, next| { cmp::max(max, next.formatter_item_info.byte_size) }); @@ -176,15 +183,12 @@ impl OdOptions { let output_duplicates = matches.is_present(options::OUTPUT_DUPLICATES); - let read_bytes = match matches.value_of(options::READ_BYTES) { - None => None, - Some(s) => match parse_number_of_bytes(&s) { - Ok(i) => Some(i), - Err(_) => { - return Err(format!("Invalid argument --read-bytes={}", s)); - } - }, - }; + let read_bytes = matches.value_of(options::READ_BYTES).map(|s| { + parse_number_of_bytes(s).map_or_else( + |e| crash!(1, "{}", format_error_message(e, s, options::READ_BYTES)), + |n| n, + ) + }); let radix = match matches.value_of(options::ADDRESS_RADIX) { None => Radix::Octal, @@ -265,7 +269,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .short("S") .long(options::STRINGS) .help( - "output strings of at least BYTES graphic chars. 3 is assumed when \ + "NotImplemented: output strings of at least BYTES graphic chars. 3 is assumed when \ BYTES is not specified.", ) .default_value("3") @@ -466,8 +470,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let od_options = match OdOptions::new(clap_matches, args) { Err(s) => { - show_usage_error!("{}", s); - return 1; + crash!(1, "{}", s); } Ok(o) => o, }; @@ -649,3 +652,13 @@ fn open_input_peek_reader( let pr = PartialReader::new(mf, skip_bytes, read_bytes); PeekReader::new(pr) } + +fn format_error_message(error: ParseSizeError, s: &str, option: &str) -> String { + // NOTE: + // GNU's od echos affected flag, -N or --read-bytes (-j or --skip-bytes, etc.), depending user's selection + // GNU's od distinguishs between "invalid (suffix in) argument" + match error { + ParseSizeError::ParseFailure(_) => format!("invalid --{} argument '{}'", option, s), + ParseSizeError::SizeTooBig(_) => format!("--{} argument '{}' too large", option, s), + } +} diff --git a/src/uu/od/src/parse_nrofbytes.rs b/src/uu/od/src/parse_nrofbytes.rs index 9223d7e53..e51e15d39 100644 --- a/src/uu/od/src/parse_nrofbytes.rs +++ b/src/uu/od/src/parse_nrofbytes.rs @@ -1,4 +1,6 @@ -pub fn parse_number_of_bytes(s: &str) -> Result { +use uucore::parse_size::{parse_size, ParseSizeError}; + +pub fn parse_number_of_bytes(s: &str) -> Result { let mut start = 0; let mut len = s.len(); let mut radix = 16; @@ -9,10 +11,7 @@ pub fn parse_number_of_bytes(s: &str) -> Result { } else if s.starts_with('0') { radix = 8; } else { - return match uucore::parse_size::parse_size(&s[start..]) { - Ok(n) => Ok(n), - Err(_) => Err("parse failed"), - }; + return parse_size(&s[start..]); } let mut ends_with = s.chars().rev(); @@ -60,35 +59,33 @@ pub fn parse_number_of_bytes(s: &str) -> Result { Some('P') => 1000 * 1000 * 1000 * 1000 * 1000, #[cfg(target_pointer_width = "64")] Some('E') => 1000 * 1000 * 1000 * 1000 * 1000 * 1000, - _ => return Err("parse failed"), + _ => return Err(ParseSizeError::ParseFailure(s.to_string())), } } _ => {} } - match usize::from_str_radix(&s[start..len], radix) { - Ok(i) => Ok(i * multiply), - Err(_) => Err("parse failed"), - } -} - -#[allow(dead_code)] -fn parse_number_of_bytes_str(s: &str) -> Result { - parse_number_of_bytes(&String::from(s)) + let factor = match usize::from_str_radix(&s[start..len], radix) { + Ok(f) => f, + Err(e) => return Err(ParseSizeError::ParseFailure(e.to_string())), + }; + factor + .checked_mul(multiply) + .ok_or(ParseSizeError::SizeTooBig(s.to_string())) } #[test] fn test_parse_number_of_bytes() { // octal input - assert_eq!(8, parse_number_of_bytes_str("010").unwrap()); - assert_eq!(8 * 512, parse_number_of_bytes_str("010b").unwrap()); - assert_eq!(8 * 1024, parse_number_of_bytes_str("010k").unwrap()); - assert_eq!(8 * 1048576, parse_number_of_bytes_str("010m").unwrap()); + assert_eq!(8, parse_number_of_bytes("010").unwrap()); + assert_eq!(8 * 512, parse_number_of_bytes("010b").unwrap()); + assert_eq!(8 * 1024, parse_number_of_bytes("010k").unwrap()); + assert_eq!(8 * 1048576, parse_number_of_bytes("010m").unwrap()); // hex input - assert_eq!(15, parse_number_of_bytes_str("0xf").unwrap()); - assert_eq!(15, parse_number_of_bytes_str("0XF").unwrap()); - assert_eq!(27, parse_number_of_bytes_str("0x1b").unwrap()); - assert_eq!(16 * 1024, parse_number_of_bytes_str("0x10k").unwrap()); - assert_eq!(16 * 1048576, parse_number_of_bytes_str("0x10m").unwrap()); + assert_eq!(15, parse_number_of_bytes("0xf").unwrap()); + assert_eq!(15, parse_number_of_bytes("0XF").unwrap()); + assert_eq!(27, parse_number_of_bytes("0x1b").unwrap()); + assert_eq!(16 * 1024, parse_number_of_bytes("0x10k").unwrap()); + assert_eq!(16 * 1048576, parse_number_of_bytes("0x10m").unwrap()); } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 208010d09..c148d06c7 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1148,12 +1148,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.buffer_size = matches .value_of(OPT_BUF_SIZE) - .map(|v| match GlobalSettings::parse_byte_count(v) { - Ok(n) => n, - Err(ParseSizeError::ParseFailure(_)) => crash!(2, "invalid -S argument '{}'", v), - Err(ParseSizeError::SizeTooBig(_)) => crash!(2, "-S argument '{}' too large", v), - }) - .unwrap_or(DEFAULT_BUF_SIZE); + .map_or(DEFAULT_BUF_SIZE, |s| { + GlobalSettings::parse_byte_count(s).map_or_else( + |e| crash!(2, "{}", format_error_message(e, s, OPT_BUF_SIZE)), + |n| n, + ) + }); settings.tmp_dir = matches .value_of(OPT_TMP_DIR) @@ -1555,6 +1555,16 @@ fn open(path: impl AsRef) -> Box { } } +fn format_error_message(error: ParseSizeError, s: &str, option: &str) -> String { + // NOTE: + // GNU's sort echos affected flag, -S or --buffer-size, depending user's selection + // GNU's sort distinguishs between "invalid (suffix in) argument" + match error { + ParseSizeError::ParseFailure(_) => format!("invalid --{} argument '{}'", option, s), + ParseSizeError::SizeTooBig(_) => format!("--{} argument '{}' too large", option, s), + } +} + #[cfg(test)] mod tests { @@ -1659,7 +1669,7 @@ mod tests { ("10T", 10 * 1024 * 1024 * 1024 * 1024), ("1b", 1), ("1024b", 1024), - ("1024Mb", 1024 * 1024 * 1024), // TODO: This might not be what GNU `sort` does? + ("1024Mb", 1024 * 1024 * 1024), // NOTE: This might not be how GNU `sort` behaves for 'Mb' ("1", 1024), // K is default ("50", 50 * 1024), ("K", 1024), diff --git a/src/uu/stdbuf/src/stdbuf.rs b/src/uu/stdbuf/src/stdbuf.rs index e39af3816..d073c1bd1 100644 --- a/src/uu/stdbuf/src/stdbuf.rs +++ b/src/uu/stdbuf/src/stdbuf.rs @@ -118,13 +118,10 @@ fn check_option(matches: &ArgMatches, name: &str) -> Result { - let size = match parse_size(x) { - Ok(m) => m, - Err(e) => return Err(ProgramOptionsError(format!("invalid mode {}", e))), - }; - Ok(BufferType::Size(size)) - } + x => parse_size(x).map_or_else( + |e| crash!(125, "invalid mode {}", e), + |m| Ok(BufferType::Size(m)), + ), }, None => Ok(BufferType::Default), } diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 8ec0a9c0c..066ab5c9b 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -80,7 +80,7 @@ fn test_du_invalid_size() { .arg("/tmp") .fails() .code_is(1) - .stderr_only("du: invalid suffix in --block-size argument '1fb4t'"); + .stderr_only("du: invalid --block-size argument '1fb4t'"); #[cfg(not(target_pointer_width = "128"))] new_ucmd!() .arg("--block-size=1Y") diff --git a/tests/by-util/test_od.rs b/tests/by-util/test_od.rs index c21c683dc..3d34e47a1 100644 --- a/tests/by-util/test_od.rs +++ b/tests/by-util/test_od.rs @@ -804,3 +804,37 @@ fn test_traditional_only_label() { ", )); } + +#[test] +fn test_od_invalid_bytes() { + const INVALID_SIZE: &str = "1fb4t"; + const BIG_SIZE: &str = "1Y"; + + let input: [u8; 8] = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + + let options = [ + "--read-bytes", + "--skip-bytes", + "--width", + // "--strings", // TODO: consider testing here once '--strings' is implemented + ]; + for option in &options { + new_ucmd!() + .arg(format!("{}={}", option, INVALID_SIZE)) + .run_piped_stdin(&input[..]) + .failure() + .code_is(1) + .stderr_only(format!( + "od: invalid {} argument '{}'", + option, INVALID_SIZE + )); + + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .arg(format!("{}={}", option, BIG_SIZE)) + .run_piped_stdin(&input[..]) + .failure() + .code_is(1) + .stderr_only(format!("od: {} argument '{}' too large", option, BIG_SIZE)); + } +} diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 054789edf..eab256980 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -57,7 +57,7 @@ fn test_invalid_buffer_size() { .fails() .code_is(2) .stderr_only(format!( - "sort: invalid -S argument '{}'", + "sort: invalid --buffer-size argument '{}'", invalid_buffer_size )); } @@ -69,7 +69,7 @@ fn test_invalid_buffer_size() { .arg("ext_sort.txt") .fails() .code_is(2) - .stderr_only("sort: -S argument '1Y' too large"); + .stderr_only("sort: --buffer-size argument '1Y' too large"); #[cfg(target_pointer_width = "32")] { @@ -82,7 +82,10 @@ fn test_invalid_buffer_size() { .arg("ext_sort.txt") .fails() .code_is(2) - .stderr_only(format!("sort: -S argument '{}' too large", buffer_size)); + .stderr_only(format!( + "sort: --buffer-size argument '{}' too large", + buffer_size + )); } } } diff --git a/tests/by-util/test_stdbuf.rs b/tests/by-util/test_stdbuf.rs index e5d784edb..fc1b9324a 100644 --- a/tests/by-util/test_stdbuf.rs +++ b/tests/by-util/test_stdbuf.rs @@ -57,15 +57,18 @@ fn test_stdbuf_line_buffering_stdin_fails() { #[cfg(not(target_os = "windows"))] #[test] fn test_stdbuf_invalid_mode_fails() { - // TODO: GNU's `stdbuf` (8.32) does not return "\nTry 'stdbuf --help' for more information." - // for invalid modes. - new_ucmd!() - .args(&["-i", "1024R", "head"]) - .fails() - .stderr_contains("stdbuf: invalid mode ‘1024R’"); - #[cfg(not(target_pointer_width = "128"))] - new_ucmd!() - .args(&["--error", "1Y", "head"]) - .fails() - .stderr_contains("stdbuf: invalid mode ‘1Y’: Value too large to be stored in data type"); + let options = ["--input", "--output", "--error"]; + for option in &options { + new_ucmd!() + .args(&[*option, "1024R", "head"]) + .fails() + .code_is(125) + .stderr_only("stdbuf: invalid mode ‘1024R’"); + #[cfg(not(target_pointer_width = "128"))] + new_ucmd!() + .args(&[*option, "1Y", "head"]) + .fails() + .code_is(125) + .stderr_contains("stdbuf: invalid mode ‘1Y’: Value too large for defined data type"); + } } From f8e96150f81d20848b5285acd6b4ef95045a31d4 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Fri, 4 Jun 2021 00:49:06 +0200 Subject: [PATCH 16/20] fix clippy warnings and spelling * add some missing LICENSE headers --- src/uu/du/src/du.rs | 20 +++++++++++--------- src/uu/head/src/head.rs | 5 +++++ src/uu/head/src/parse.rs | 5 +++++ src/uu/od/src/od.rs | 2 +- src/uu/od/src/parse_nrofbytes.rs | 6 +++--- src/uu/sort/src/sort.rs | 2 +- src/uu/tail/src/tail.rs | 1 - src/uu/truncate/src/truncate.rs | 2 +- src/uucore/src/lib/lib.rs | 2 +- src/uucore/src/lib/parser/parse_size.rs | 4 +++- tests/by-util/test_du.rs | 5 +++++ tests/by-util/test_head.rs | 7 ++++++- tests/by-util/test_od.rs | 5 +++++ tests/by-util/test_sort.rs | 5 +++++ tests/by-util/test_split.rs | 5 +++++ tests/by-util/test_tail.rs | 7 +++++++ tests/by-util/test_truncate.rs | 7 +++++++ 17 files changed, 71 insertions(+), 19 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 25ef47af3..c2d764ebf 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -1,9 +1,9 @@ -// This file is part of the uutils coreutils package. -// -// (c) Derek Chiang -// -// For the full copyright and license information, please view the LICENSE -// file that was distributed with this source code. +// * This file is part of the uutils coreutils package. +// * +// * (c) Derek Chiang +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. #[macro_use] extern crate uucore; @@ -25,6 +25,8 @@ use std::os::unix::fs::MetadataExt; use std::os::windows::fs::MetadataExt; #[cfg(windows)] use std::os::windows::io::AsRawHandle; +#[cfg(windows)] +use std::path::Path; use std::path::PathBuf; use std::time::{Duration, UNIX_EPOCH}; use uucore::parse_size::{parse_size, ParseSizeError}; @@ -161,7 +163,7 @@ fn birth_u64(meta: &Metadata) -> Option { } #[cfg(windows)] -fn get_size_on_disk(path: &PathBuf) -> u64 { +fn get_size_on_disk(path: &Path) -> u64 { let mut size_on_disk = 0; // bind file so it stays in scope until end of function @@ -193,7 +195,7 @@ fn get_size_on_disk(path: &PathBuf) -> u64 { } #[cfg(windows)] -fn get_file_info(path: &PathBuf) -> Option { +fn get_file_info(path: &Path) -> Option { let mut result = None; let file = match fs::File::open(path) { @@ -709,7 +711,7 @@ Try '{} --help' for more information.", fn format_error_message(error: ParseSizeError, s: &str, option: &str) -> String { // NOTE: // GNU's du echos affected flag, -B or --block-size (-t or --threshold), depending user's selection - // GNU's du distinguishs between "invalid (suffix in) argument" + // GNU's du does distinguish between "invalid (suffix in) argument" match error { ParseSizeError::ParseFailure(_) => format!("invalid --{} argument '{}'", option, s), ParseSizeError::SizeTooBig(_) => format!("--{} argument '{}' too large", option, s), diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index d671ed665..2c13ed37d 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -1,3 +1,8 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + // spell-checker:ignore (vars) zlines use clap::{crate_version, App, Arg}; diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs index 6631dba0e..7e36594b5 100644 --- a/src/uu/head/src/parse.rs +++ b/src/uu/head/src/parse.rs @@ -1,3 +1,8 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + use std::ffi::OsString; use uucore::parse_size::{parse_size, ParseSizeError}; diff --git a/src/uu/od/src/od.rs b/src/uu/od/src/od.rs index 080630b71..1a592f622 100644 --- a/src/uu/od/src/od.rs +++ b/src/uu/od/src/od.rs @@ -643,7 +643,7 @@ fn open_input_peek_reader( fn format_error_message(error: ParseSizeError, s: &str, option: &str) -> String { // NOTE: // GNU's od echos affected flag, -N or --read-bytes (-j or --skip-bytes, etc.), depending user's selection - // GNU's od distinguishs between "invalid (suffix in) argument" + // GNU's od does distinguish between "invalid (suffix in) argument" match error { ParseSizeError::ParseFailure(_) => format!("invalid --{} argument '{}'", option, s), ParseSizeError::SizeTooBig(_) => format!("--{} argument '{}' too large", option, s), diff --git a/src/uu/od/src/parse_nrofbytes.rs b/src/uu/od/src/parse_nrofbytes.rs index e51e15d39..d6329c60a 100644 --- a/src/uu/od/src/parse_nrofbytes.rs +++ b/src/uu/od/src/parse_nrofbytes.rs @@ -71,7 +71,7 @@ pub fn parse_number_of_bytes(s: &str) -> Result { }; factor .checked_mul(multiply) - .ok_or(ParseSizeError::SizeTooBig(s.to_string())) + .ok_or_else(|| ParseSizeError::SizeTooBig(s.to_string())) } #[test] @@ -80,12 +80,12 @@ fn test_parse_number_of_bytes() { assert_eq!(8, parse_number_of_bytes("010").unwrap()); assert_eq!(8 * 512, parse_number_of_bytes("010b").unwrap()); assert_eq!(8 * 1024, parse_number_of_bytes("010k").unwrap()); - assert_eq!(8 * 1048576, parse_number_of_bytes("010m").unwrap()); + assert_eq!(8 * 1_048_576, parse_number_of_bytes("010m").unwrap()); // hex input assert_eq!(15, parse_number_of_bytes("0xf").unwrap()); assert_eq!(15, parse_number_of_bytes("0XF").unwrap()); assert_eq!(27, parse_number_of_bytes("0x1b").unwrap()); assert_eq!(16 * 1024, parse_number_of_bytes("0x10k").unwrap()); - assert_eq!(16 * 1048576, parse_number_of_bytes("0x10m").unwrap()); + assert_eq!(16 * 1_048_576, parse_number_of_bytes("0x10m").unwrap()); } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index fca974dbd..9ec90519f 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1567,7 +1567,7 @@ fn open(path: impl AsRef) -> Box { fn format_error_message(error: ParseSizeError, s: &str, option: &str) -> String { // NOTE: // GNU's sort echos affected flag, -S or --buffer-size, depending user's selection - // GNU's sort distinguishs between "invalid (suffix in) argument" + // GNU's sort does distinguish between "invalid (suffix in) argument" match error { ParseSizeError::ParseFailure(_) => format!("invalid --{} argument '{}'", option, s), ParseSizeError::SizeTooBig(_) => format!("--{} argument '{}' too large", option, s), diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 76c799621..75cc43db1 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -5,7 +5,6 @@ // * // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// * // spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf diff --git a/src/uu/truncate/src/truncate.rs b/src/uu/truncate/src/truncate.rs index 78167ed77..8f02be874 100644 --- a/src/uu/truncate/src/truncate.rs +++ b/src/uu/truncate/src/truncate.rs @@ -152,7 +152,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { crash!( 1, "cannot stat '{}': No such file or directory", - reference.unwrap_or("".to_string()) + reference.unwrap_or_else(|| "".to_string()) ); // TODO: fix '--no-create' see test_reference and test_truncate_bytes_size } _ => crash!(1, "{}", e.to_string()), diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 84adeeb34..f765b7b3e 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -22,7 +22,7 @@ pub extern crate winapi; mod features; // feature-gated code modules mod macros; // crate macros (macro_rules-type; exported to `crate::...`) mod mods; // core cross-platform modules -mod parser; // string parsing moduls +mod parser; // string parsing modules // * cross-platform modules pub use crate::mods::backup_control; diff --git a/src/uucore/src/lib/parser/parse_size.rs b/src/uucore/src/lib/parser/parse_size.rs index 12c410e57..58213adef 100644 --- a/src/uucore/src/lib/parser/parse_size.rs +++ b/src/uucore/src/lib/parser/parse_size.rs @@ -3,6 +3,8 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. +// spell-checker:ignore (ToDO) hdsf ghead gtail + use std::convert::TryFrom; use std::error::Error; use std::fmt; @@ -77,7 +79,7 @@ pub fn parse_size(size: &str) -> Result { }; number .checked_mul(factor) - .ok_or(ParseSizeError::size_too_big(size)) + .ok_or_else(|| ParseSizeError::size_too_big(size)) } #[derive(Debug, PartialEq, Eq)] diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 2c656ced5..e4dd95ae1 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -1,3 +1,8 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + // spell-checker:ignore (paths) sublink subwords use crate::common::util::*; diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 6a2cdf1cd..2c4b66696 100755 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -1,4 +1,9 @@ -// spell-checker:ignore (words) bogusfile emptyfile +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +// spell-checker:ignore (words) bogusfile emptyfile abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstu use crate::common::util::*; diff --git a/tests/by-util/test_od.rs b/tests/by-util/test_od.rs index 3d34e47a1..53b471dba 100644 --- a/tests/by-util/test_od.rs +++ b/tests/by-util/test_od.rs @@ -1,3 +1,8 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + extern crate unindent; use self::unindent::*; diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 204145719..3d1cfe120 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1,3 +1,8 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + // spell-checker:ignore (words) ints use crate::common::util::*; diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 53b545200..a1350534f 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -1,3 +1,8 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + extern crate rand; extern crate regex; diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index c296e2763..8478944e2 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -1,3 +1,10 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile + extern crate tail; use crate::common::util::*; diff --git a/tests/by-util/test_truncate.rs b/tests/by-util/test_truncate.rs index 72f7f780b..2da59035e 100644 --- a/tests/by-util/test_truncate.rs +++ b/tests/by-util/test_truncate.rs @@ -1,3 +1,10 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +// spell-checker:ignore (words) RFILE + use crate::common::util::*; use std::io::{Seek, SeekFrom, Write}; From 81e07a6a4dc829bd57b2527b50048cc3f8a5e61c Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Fri, 4 Jun 2021 17:22:45 +0200 Subject: [PATCH 17/20] od: replace 'piped_stdin' to make test stable --- tests/by-util/test_od.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/by-util/test_od.rs b/tests/by-util/test_od.rs index 53b471dba..33d7d4dc4 100644 --- a/tests/by-util/test_od.rs +++ b/tests/by-util/test_od.rs @@ -815,7 +815,10 @@ fn test_od_invalid_bytes() { const INVALID_SIZE: &str = "1fb4t"; const BIG_SIZE: &str = "1Y"; - let input: [u8; 8] = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + // NOTE: + // GNU's od (8.32) with option '--width' does not accept 'Y' as valid suffix. + // According to the man page it should be valid in the same way it is valid for + // '--read-bytes' and '--skip-bytes'. let options = [ "--read-bytes", @@ -826,8 +829,8 @@ fn test_od_invalid_bytes() { for option in &options { new_ucmd!() .arg(format!("{}={}", option, INVALID_SIZE)) - .run_piped_stdin(&input[..]) - .failure() + .arg("file") + .fails() .code_is(1) .stderr_only(format!( "od: invalid {} argument '{}'", @@ -837,8 +840,8 @@ fn test_od_invalid_bytes() { #[cfg(not(target_pointer_width = "128"))] new_ucmd!() .arg(format!("{}={}", option, BIG_SIZE)) - .run_piped_stdin(&input[..]) - .failure() + .arg("file") + .fails() .code_is(1) .stderr_only(format!("od: {} argument '{}' too large", option, BIG_SIZE)); } From 884f5701251af39b29b6c305ce455fb5bc80ecca Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Sun, 6 Jun 2021 21:55:39 +0200 Subject: [PATCH 18/20] du/od/sort: refactor - replace map_or_else with unwrap_or_else --- src/uu/du/src/du.rs | 6 ++---- src/uu/od/src/od.rs | 20 ++++++++------------ src/uu/sort/src/sort.rs | 6 ++---- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index c2d764ebf..93f2fe5bb 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -229,10 +229,8 @@ fn get_file_info(path: &Path) -> Option { fn read_block_size(s: Option<&str>) -> usize { if let Some(s) = s { - parse_size(s).map_or_else( - |e| crash!(1, "{}", format_error_message(e, s, options::BLOCK_SIZE)), - |n| n, - ) + parse_size(s) + .unwrap_or_else(|e| crash!(1, "{}", format_error_message(e, s, options::BLOCK_SIZE))) } else { for env_var in &["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] { if let Ok(env_size) = env::var(env_var) { diff --git a/src/uu/od/src/od.rs b/src/uu/od/src/od.rs index 1a592f622..d5124e93c 100644 --- a/src/uu/od/src/od.rs +++ b/src/uu/od/src/od.rs @@ -134,10 +134,9 @@ impl OdOptions { } let mut skip_bytes = matches.value_of(options::SKIP_BYTES).map_or(0, |s| { - parse_number_of_bytes(s).map_or_else( - |e| crash!(1, "{}", format_error_message(e, s, options::SKIP_BYTES)), - |n| n, - ) + parse_number_of_bytes(s).unwrap_or_else(|e| { + crash!(1, "{}", format_error_message(e, s, options::SKIP_BYTES)) + }) }); let mut label: Option = None; @@ -165,10 +164,8 @@ impl OdOptions { if matches.occurrences_of(options::WIDTH) == 0 { return 16; }; - parse_number_of_bytes(s).map_or_else( - |e| crash!(1, "{}", format_error_message(e, s, options::WIDTH)), - |n| n, - ) + parse_number_of_bytes(s) + .unwrap_or_else(|e| crash!(1, "{}", format_error_message(e, s, options::WIDTH))) }); let min_bytes = formats.iter().fold(1, |max, next| { @@ -182,10 +179,9 @@ impl OdOptions { let output_duplicates = matches.is_present(options::OUTPUT_DUPLICATES); let read_bytes = matches.value_of(options::READ_BYTES).map(|s| { - parse_number_of_bytes(s).map_or_else( - |e| crash!(1, "{}", format_error_message(e, s, options::READ_BYTES)), - |n| n, - ) + parse_number_of_bytes(s).unwrap_or_else(|e| { + crash!(1, "{}", format_error_message(e, s, options::READ_BYTES)) + }) }); let radix = match matches.value_of(options::ADDRESS_RADIX) { diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 9ec90519f..850f039b3 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1158,10 +1158,8 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.buffer_size = matches .value_of(OPT_BUF_SIZE) .map_or(DEFAULT_BUF_SIZE, |s| { - GlobalSettings::parse_byte_count(s).map_or_else( - |e| crash!(2, "{}", format_error_message(e, s, OPT_BUF_SIZE)), - |n| n, - ) + GlobalSettings::parse_byte_count(s) + .unwrap_or_else(|e| crash!(2, "{}", format_error_message(e, s, OPT_BUF_SIZE))) }); settings.tmp_dir = matches From 0033928128777e6423cc17ff47dad2e74864a6ce Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Sun, 6 Jun 2021 22:56:11 +0200 Subject: [PATCH 19/20] sort: fix broken tests for '--batch-size' (replace 'B' with 'b') https://github.com/uutils/coreutils/pull/2297#issuecomment-855460881 --- tests/by-util/test_sort.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 898bb3568..2894d3007 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -849,7 +849,7 @@ fn sort_multiple() { #[test] fn sort_empty_chunk() { new_ucmd!() - .args(&["-S", "40B"]) + .args(&["-S", "40b"]) .pipe_in("a\na\n") .succeeds() .stdout_is("a\na\n"); @@ -889,12 +889,7 @@ fn test_compress_fail() { #[test] fn test_merge_batches() { new_ucmd!() - .args(&[ - "ext_sort.txt", - "-n", - "-S", - "150B", - ]) + .args(&["ext_sort.txt", "-n", "-S", "150b"]) .succeeds() .stdout_only_fixture("ext_sort.expected"); } From 1b824f491460e7f7274ce597224e4d3a86e41cc9 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Wed, 9 Jun 2021 15:56:29 +0200 Subject: [PATCH 20/20] fix clippy warnings --- src/uu/head/src/parse.rs | 2 +- src/uu/split/src/split.rs | 2 +- src/uu/tail/src/tail.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/head/src/parse.rs b/src/uu/head/src/parse.rs index 7e36594b5..f6f291814 100644 --- a/src/uu/head/src/parse.rs +++ b/src/uu/head/src/parse.rs @@ -113,7 +113,7 @@ pub fn parse_num(src: &str) -> Result<(usize, bool), ParseSizeError> { return Err(ParseSizeError::ParseFailure(src.to_string())); } - parse_size(&size_string).map(|n| (n, all_but_last)) + parse_size(size_string).map(|n| (n, all_but_last)) } #[cfg(test)] diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index b905a4592..0d5543d8b 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -278,7 +278,7 @@ struct ByteSplitter { impl ByteSplitter { fn new(settings: &Settings) -> ByteSplitter { let size_string = &settings.strategy_param; - let size_num = match parse_size(&size_string) { + let size_num = match parse_size(size_string) { Ok(n) => n, Err(e) => crash!(1, "invalid number of bytes: {}", e.to_string()), }; diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 75cc43db1..8950886a2 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -419,5 +419,5 @@ fn parse_num(src: &str) -> Result<(usize, bool), ParseSizeError> { return Err(ParseSizeError::ParseFailure(src.to_string())); } - parse_size(&size_string).map(|n| (n, starting_with)) + parse_size(size_string).map(|n| (n, starting_with)) }