From f8006f47df1b8acf75364d3260d95361e192af01 Mon Sep 17 00:00:00 2001 From: Daniel Rocco Date: Sun, 14 Feb 2021 03:04:29 -0500 Subject: [PATCH] numfmt: handle leading whitespace & implied padding (#1721) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align with GNU numfmt by trimming leading whitespace from supplied values. If the user did not specify a padding, calculate an implied padding from the leading whitespace and the value. Also track closer to GNU numfmt’s error message format. --- src/uu/numfmt/src/numfmt.rs | 116 ++++++++++++++++++++++++++++------- tests/by-util/test_numfmt.rs | 111 +++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+), 23 deletions(-) diff --git a/src/uu/numfmt/src/numfmt.rs b/src/uu/numfmt/src/numfmt.rs index 35584849b..3865619ea 100644 --- a/src/uu/numfmt/src/numfmt.rs +++ b/src/uu/numfmt/src/numfmt.rs @@ -126,7 +126,7 @@ fn parse_suffix(s: &str) -> Result<(f64, Option)> { Some('Z') => Ok(Some((RawSuffix::Z, with_i))), Some('Y') => Ok(Some((RawSuffix::Y, with_i))), Some('0'..='9') => Ok(None), - _ => Err("Failed to parse suffix"), + _ => Err(format!("invalid suffix in input: ‘{}’", s)), }?; let suffix_len = match suffix { @@ -137,7 +137,7 @@ fn parse_suffix(s: &str) -> Result<(f64, Option)> { let number = s[..s.len() - suffix_len] .parse::() - .map_err(|err| err.to_string())?; + .map_err(|_| format!("invalid number: ‘{}’", s))?; Ok((number, suffix)) } @@ -244,19 +244,37 @@ fn transform_to(s: f64, opts: &Transform) -> Result { }) } -fn format_string(source: &str, options: &NumfmtOptions) -> Result { +fn format_string( + source: &str, + options: &NumfmtOptions, + implicit_padding: Option, +) -> Result { let number = transform_to( transform_from(source, &options.transform.from)?, &options.transform.to, )?; - Ok(match options.padding { + Ok(match implicit_padding.unwrap_or(options.padding) { p if p == 0 => number, p if p > 0 => format!("{:>padding$}", number, padding = p as usize), p => format!("{: Result<()> { + let (prefix, field, suffix) = extract_field(&s)?; + + let implicit_padding = match !prefix.is_empty() && options.padding == 0 { + true => Some((prefix.len() + field.len()) as isize), + false => None, + }; + + let field = format_string(field, options, implicit_padding)?; + println!("{}{}", field, suffix); + + Ok(()) +} + fn parse_options(args: &ArgMatches) -> Result { let from = parse_unit(args.value_of(options::FROM).unwrap())?; let to = parse_unit(args.value_of(options::TO).unwrap())?; @@ -287,10 +305,57 @@ fn parse_options(args: &ArgMatches) -> Result { }) } +/// Extract the field to convert from `line`. +/// +/// The field is the first sequence of non-whitespace characters in `line`. +/// +/// Returns a [`Result`] of `(prefix: &str, field: &str, suffix: &str)`, where +/// `prefix` contains any leading whitespace, `field` is the field to convert, +/// and `suffix` is everything after the field. `prefix` and `suffix` may be +/// empty. +/// +/// Returns an [`Err`] if `line` is empty or consists only of whitespace. +/// +/// Examples: +/// +/// ``` +/// use uu_numfmt::extract_field; +/// +/// assert_eq!("1K", extract_field("1K").unwrap().1); +/// +/// let (prefix, field, suffix) = extract_field(" 1K qux").unwrap(); +/// assert_eq!(" ", prefix); +/// assert_eq!("1K", field); +/// assert_eq!(" qux", suffix); +/// +/// assert!(extract_field("").is_err()); +/// ``` +pub fn extract_field(line: &str) -> Result<(&str, &str, &str)> { + let start = line + .find(|c: char| !c.is_whitespace()) + .ok_or("invalid number: ‘’")?; + + let prefix = &line[..start]; + + let mut field = &line[start..]; + + let suffix = match field.find(|c: char| c.is_whitespace()) { + Some(i) => { + let suffix = &field[i..]; + field = &field[..i]; + suffix + } + None => "", + }; + + Ok((prefix, field, suffix)) +} + fn handle_args<'a>(args: impl Iterator, options: NumfmtOptions) -> Result<()> { for l in args { - println!("{}", format_string(l, &options)?) + format_and_print(l, &options)?; } + Ok(()) } @@ -300,16 +365,14 @@ fn handle_stdin(options: NumfmtOptions) -> Result<()> { let mut lines = locked_stdin.lines(); for l in lines.by_ref().take(options.header) { - l.map(|s| println!("{}", s)).map_err(|e| e.to_string())? + l.map(|s| println!("{}", s)).map_err(|e| e.to_string())?; } for l in lines { - l.map_err(|e| e.to_string()).and_then(|l| { - let l = format_string(l.as_ref(), &options)?; - println!("{}", l); - Ok(()) - })? + l.map_err(|e| e.to_string()) + .and_then(|l| format_and_print(&l, &options))?; } + Ok(()) } @@ -326,34 +389,38 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .long(options::FROM) .help("auto-scale input numbers to UNITs; see UNIT below") .value_name("UNIT") - .default_value(options::FROM_DEFAULT) + .default_value(options::FROM_DEFAULT), ) .arg( Arg::with_name(options::TO) .long(options::TO) .help("auto-scale output numbers to UNITs; see UNIT below") .value_name("UNIT") - .default_value(options::TO_DEFAULT) + .default_value(options::TO_DEFAULT), ) .arg( Arg::with_name(options::PADDING) .long(options::PADDING) - .help("pad the output to N characters; positive N will right-align; negative N will left-align; padding is ignored if the output is wider than N") - .value_name("N") + .help( + "pad the output to N characters; positive N will \ + right-align; negative N will left-align; padding is \ + ignored if the output is wider than N; the default is \ + to automatically pad if a whitespace is found", + ) + .value_name("N"), ) .arg( Arg::with_name(options::HEADER) .long(options::HEADER) - .help("print (without converting) the first N header lines; N defaults to 1 if not specified") + .help( + "print (without converting) the first N header lines; \ + N defaults to 1 if not specified", + ) .value_name("N") .default_value(options::HEADER_DEFAULT) - .hide_default_value(true) - ) - .arg( - Arg::with_name(options::NUMBER) - .hidden(true) - .multiple(true) + .hide_default_value(true), ) + .arg(Arg::with_name(options::NUMBER).hidden(true).multiple(true)) .get_matches_from(args); let options = parse_options(&matches).unwrap(); @@ -364,7 +431,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 { }; match result { - Err(e) => crash!(1, "{}", e), + Err(e) => { + show_info!("{}", e); + exit!(1); + } _ => 0, } } diff --git a/tests/by-util/test_numfmt.rs b/tests/by-util/test_numfmt.rs index 3aa7ab53b..625da766b 100644 --- a/tests/by-util/test_numfmt.rs +++ b/tests/by-util/test_numfmt.rs @@ -145,3 +145,114 @@ fn test_si_to_iec() { .run() .stdout_is("13.9T\n"); } + +#[test] +fn test_should_report_invalid_empty_number_on_empty_stdin() { + new_ucmd!() + .args(&["--from=auto"]) + .pipe_in("\n") + .run() + .stderr_is("numfmt: invalid number: ‘’\n"); +} + +#[test] +fn test_should_report_invalid_empty_number_on_blank_stdin() { + new_ucmd!() + .args(&["--from=auto"]) + .pipe_in(" \t \n") + .run() + .stderr_is("numfmt: invalid number: ‘’\n"); +} + +#[test] +fn test_should_report_invalid_suffix_on_stdin() { + new_ucmd!() + .args(&["--from=auto"]) + .pipe_in("1k") + .run() + .stderr_is("numfmt: invalid suffix in input: ‘1k’\n"); + + // GNU numfmt reports this one as “invalid number” + new_ucmd!() + .args(&["--from=auto"]) + .pipe_in("NaN") + .run() + .stderr_is("numfmt: invalid suffix in input: ‘NaN’\n"); +} + +#[test] +fn test_should_report_invalid_number_with_interior_junk() { + // GNU numfmt reports this as “invalid suffix” + new_ucmd!() + .args(&["--from=auto"]) + .pipe_in("1x0K") + .run() + .stderr_is("numfmt: invalid number: ‘1x0K’\n"); +} + +#[test] +fn test_should_skip_leading_space_from_stdin() { + new_ucmd!() + .args(&["--from=auto"]) + .pipe_in(" 2Ki") + .run() + .stdout_is("2048\n"); + + // multiline + new_ucmd!() + .args(&["--from=auto"]) + .pipe_in("\t1Ki\n 2K") + .run() + .stdout_is("1024\n2000\n"); +} + +#[test] +fn test_should_convert_only_first_number_in_line() { + new_ucmd!() + .args(&["--from=auto"]) + .pipe_in("1Ki 2M 3G") + .run() + .stdout_is("1024 2M 3G\n"); +} + +#[test] +fn test_leading_whitespace_should_imply_padding() { + new_ucmd!() + .args(&["--from=auto"]) + .pipe_in(" 1K") + .run() + .stdout_is(" 1000\n"); + + + new_ucmd!() + .args(&["--from=auto"]) + .pipe_in(" 202Ki") + .run() + .stdout_is(" 206848\n"); +} + +#[test] +fn test_should_calculate_implicit_padding_per_line() { + new_ucmd!() + .args(&["--from=auto"]) + .pipe_in(" 1Ki\n 2K") + .run() + .stdout_is(" 1024\n 2000\n"); +} + +#[test] +fn test_leading_whitespace_in_free_argument_should_imply_padding() { + new_ucmd!() + .args(&["--from=auto", " 1Ki"]) + .run() + .stdout_is(" 1024\n"); +} + +#[test] +fn test_should_calculate_implicit_padding_per_free_argument() { + new_ucmd!() + .args(&["--from=auto", " 1Ki", " 2K"]) + .pipe_in(" 1Ki\n 2K") + .run() + .stdout_is(" 1024\n 2000\n"); +}