From 2ac5dc0a70b03d68faba89486b9fc15490affee0 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sat, 11 Sep 2021 12:49:12 -0400 Subject: [PATCH] seq: compute correct width for scientific notation Change the way `seq` computes the number of digits needed to print a number so that it works for inputs given in scientific notation. Specifically, this commit parses the input string to determine whether it is an integer, a float in decimal notation, or a float in scientific notation, and then computes the number of integral digits and the number of fractional digits based on that. This also supports floating point negative zero, expressed in both decimal and scientific notation. --- src/uu/seq/src/digits.rs | 190 ++++++++++++++++++++++++++++++++++++++ src/uu/seq/src/seq.rs | 72 ++++++++++++--- tests/by-util/test_seq.rs | 134 +++++++++++++++++++++++++++ 3 files changed, 381 insertions(+), 15 deletions(-) create mode 100644 src/uu/seq/src/digits.rs diff --git a/src/uu/seq/src/digits.rs b/src/uu/seq/src/digits.rs new file mode 100644 index 000000000..bde933978 --- /dev/null +++ b/src/uu/seq/src/digits.rs @@ -0,0 +1,190 @@ +//! Counting number of digits needed to represent a number. +//! +//! The [`num_integral_digits`] and [`num_fractional_digits`] functions +//! count the number of digits needed to represent a number in decimal +//! notation (like "123.456"). +use std::convert::TryInto; +use std::num::ParseIntError; + +use uucore::display::Quotable; + +/// The number of digits after the decimal point in a given number. +/// +/// The input `s` is a string representing a number, either an integer +/// or a floating point number in either decimal notation or scientific +/// notation. This function returns the number of digits after the +/// decimal point needed to print the number in decimal notation. +/// +/// # Examples +/// +/// ```rust,ignore +/// assert_eq!(num_fractional_digits("123.45e-1").unwrap(), 3); +/// ``` +pub fn num_fractional_digits(s: &str) -> Result { + match (s.find('.'), s.find('e')) { + // For example, "123456". + (None, None) => Ok(0), + + // For example, "123e456". + (None, Some(j)) => { + let exponent: i64 = s[j + 1..].parse()?; + if exponent < 0 { + Ok(-exponent as usize) + } else { + Ok(0) + } + } + + // For example, "123.456". + (Some(i), None) => Ok(s.len() - (i + 1)), + + // For example, "123.456e789". + (Some(i), Some(j)) if i < j => { + // Because of the match guard, this subtraction will not underflow. + let num_digits_between_decimal_point_and_e = (j - (i + 1)) as i64; + let exponent: i64 = s[j + 1..].parse()?; + if num_digits_between_decimal_point_and_e < exponent { + Ok(0) + } else { + Ok((num_digits_between_decimal_point_and_e - exponent) + .try_into() + .unwrap()) + } + } + _ => crash!( + 1, + "invalid floating point argument: {}\n Try '{} --help' for more information.", + s.quote(), + uucore::execution_phrase() + ), + } +} + +/// The number of digits before the decimal point in a given number. +/// +/// The input `s` is a string representing a number, either an integer +/// or a floating point number in either decimal notation or scientific +/// notation. This function returns the number of digits before the +/// decimal point needed to print the number in decimal notation. +/// +/// # Examples +/// +/// ```rust,ignore +/// assert_eq!(num_fractional_digits("123.45e-1").unwrap(), 2); +/// ``` +pub fn num_integral_digits(s: &str) -> Result { + match (s.find('.'), s.find('e')) { + // For example, "123456". + (None, None) => Ok(s.len()), + + // For example, "123e456". + (None, Some(j)) => { + let exponent: i64 = s[j + 1..].parse()?; + let total = j as i64 + exponent; + if total < 1 { + Ok(1) + } else { + Ok(total.try_into().unwrap()) + } + } + + // For example, "123.456". + (Some(i), None) => Ok(i), + + // For example, "123.456e789". + (Some(i), Some(j)) => { + let exponent: i64 = s[j + 1..].parse()?; + let minimum: usize = { + let integral_part: f64 = crash_if_err!(1, s[..j].parse()); + if integral_part == -0.0 && integral_part.is_sign_negative() { + 2 + } else { + 1 + } + }; + + let total = i as i64 + exponent; + if total < minimum as i64 { + Ok(minimum) + } else { + Ok(total.try_into().unwrap()) + } + } + } +} + +#[cfg(test)] +mod tests { + + mod test_num_integral_digits { + use crate::num_integral_digits; + + #[test] + fn test_integer() { + assert_eq!(num_integral_digits("123").unwrap(), 3); + } + + #[test] + fn test_decimal() { + assert_eq!(num_integral_digits("123.45").unwrap(), 3); + } + + #[test] + fn test_scientific_no_decimal_positive_exponent() { + assert_eq!(num_integral_digits("123e4").unwrap(), 3 + 4); + } + + #[test] + fn test_scientific_with_decimal_positive_exponent() { + assert_eq!(num_integral_digits("123.45e6").unwrap(), 3 + 6); + } + + #[test] + fn test_scientific_no_decimal_negative_exponent() { + assert_eq!(num_integral_digits("123e-4").unwrap(), 1); + } + + #[test] + fn test_scientific_with_decimal_negative_exponent() { + assert_eq!(num_integral_digits("123.45e-6").unwrap(), 1); + assert_eq!(num_integral_digits("123.45e-1").unwrap(), 2); + } + } + + mod test_num_fractional_digits { + use crate::num_fractional_digits; + + #[test] + fn test_integer() { + assert_eq!(num_fractional_digits("123").unwrap(), 0); + } + + #[test] + fn test_decimal() { + assert_eq!(num_fractional_digits("123.45").unwrap(), 2); + } + + #[test] + fn test_scientific_no_decimal_positive_exponent() { + assert_eq!(num_fractional_digits("123e4").unwrap(), 0); + } + + #[test] + fn test_scientific_with_decimal_positive_exponent() { + assert_eq!(num_fractional_digits("123.45e6").unwrap(), 0); + assert_eq!(num_fractional_digits("123.45e1").unwrap(), 1); + } + + #[test] + fn test_scientific_no_decimal_negative_exponent() { + assert_eq!(num_fractional_digits("123e-4").unwrap(), 4); + assert_eq!(num_fractional_digits("123e-1").unwrap(), 1); + } + + #[test] + fn test_scientific_with_decimal_negative_exponent() { + assert_eq!(num_fractional_digits("123.45e-6").unwrap(), 8); + assert_eq!(num_fractional_digits("123.45e-1").unwrap(), 3); + } + } +} diff --git a/src/uu/seq/src/seq.rs b/src/uu/seq/src/seq.rs index 0cea90838..18f181aa8 100644 --- a/src/uu/seq/src/seq.rs +++ b/src/uu/seq/src/seq.rs @@ -14,6 +14,11 @@ use num_traits::{Num, ToPrimitive}; use std::cmp; use std::io::{stdout, ErrorKind, Write}; use std::str::FromStr; + +mod digits; +use crate::digits::num_fractional_digits; +use crate::digits::num_integral_digits; + use uucore::display::Quotable; static ABOUT: &str = "Display numbers from FIRST to LAST, in steps of INCREMENT."; @@ -155,20 +160,49 @@ pub fn uumain(args: impl uucore::Args) -> i32 { }; let mut largest_dec = 0; + let mut padding = 0; let first = if numbers.len() > 1 { let slice = numbers[0]; - let len = slice.len(); - let dec = slice.find('.').unwrap_or(len); - largest_dec = len - dec; + largest_dec = num_fractional_digits(slice).unwrap_or_else(|_| { + crash!( + 1, + "invalid floating point argument: {}\n Try '{} --help' for more information.", + slice.quote(), + uucore::execution_phrase() + ) + }); + padding = num_integral_digits(slice).unwrap_or_else(|_| { + crash!( + 1, + "invalid floating point argument: {}\n Try '{} --help' for more information.", + slice.quote(), + uucore::execution_phrase() + ) + }); crash_if_err!(1, slice.parse()) } else { Number::BigInt(BigInt::one()) }; let increment = if numbers.len() > 2 { let slice = numbers[1]; - let len = slice.len(); - let dec = slice.find('.').unwrap_or(len); - largest_dec = cmp::max(largest_dec, len - dec); + let dec = num_fractional_digits(slice).unwrap_or_else(|_| { + crash!( + 1, + "invalid floating point argument: {}\n Try '{} --help' for more information.", + slice.quote(), + uucore::execution_phrase() + ) + }); + let int_digits = num_integral_digits(slice).unwrap_or_else(|_| { + crash!( + 1, + "invalid floating point argument: {}\n Try '{} --help' for more information.", + slice.quote(), + uucore::execution_phrase() + ) + }); + largest_dec = cmp::max(largest_dec, dec); + padding = cmp::max(padding, int_digits); crash_if_err!(1, slice.parse()) } else { Number::BigInt(BigInt::one()) @@ -183,16 +217,18 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } let last: Number = { let slice = numbers[numbers.len() - 1]; + let int_digits = num_integral_digits(slice).unwrap_or_else(|_| { + crash!( + 1, + "invalid floating point argument: {}\n Try '{} --help' for more information.", + slice.quote(), + uucore::execution_phrase() + ) + }); + padding = cmp::max(padding, int_digits); crash_if_err!(1, slice.parse()) }; - if largest_dec > 0 { - largest_dec -= 1; - } - let padding = first - .num_digits() - .max(increment.num_digits()) - .max(last.num_digits()); let result = match (first, last, increment) { (Number::MinusZero, Number::BigInt(last), Number::BigInt(increment)) => print_seq_integers( (BigInt::zero(), increment, last), @@ -286,18 +322,24 @@ fn print_seq( let mut stdout = stdout.lock(); let (first, increment, last) = range; let mut i = 0isize; + let is_first_minus_zero = first == -0.0 && first.is_sign_negative(); let mut value = first + i as f64 * increment; let mut is_first_iteration = true; while !done_printing(&value, &increment, &last) { if !is_first_iteration { write!(stdout, "{}", separator)?; } + let mut width = padding; + if is_first_iteration && is_first_minus_zero { + write!(stdout, "-")?; + width -= 1; + } is_first_iteration = false; let istr = format!("{:.*}", largest_dec, value); let ilen = istr.len(); let before_dec = istr.find('.').unwrap_or(ilen); - if pad && before_dec < padding { - for _ in 0..(padding - before_dec) { + if pad && before_dec < width { + for _ in 0..(width - before_dec) { write!(stdout, "0")?; } } diff --git a/tests/by-util/test_seq.rs b/tests/by-util/test_seq.rs index 65f2451f0..7a58a950a 100644 --- a/tests/by-util/test_seq.rs +++ b/tests/by-util/test_seq.rs @@ -23,6 +23,56 @@ fn test_rejects_non_floats() { )); } +#[test] +fn test_invalid_float() { + new_ucmd!() + .args(&["1e2.3"]) + .fails() + .no_stdout() + .stderr_contains("invalid floating point argument: '1e2.3'") + .stderr_contains("for more information."); + new_ucmd!() + .args(&["1e2.3", "2"]) + .fails() + .no_stdout() + .stderr_contains("invalid floating point argument: '1e2.3'") + .stderr_contains("for more information."); + new_ucmd!() + .args(&["1", "1e2.3"]) + .fails() + .no_stdout() + .stderr_contains("invalid floating point argument: '1e2.3'") + .stderr_contains("for more information."); + new_ucmd!() + .args(&["1e2.3", "2", "3"]) + .fails() + .no_stdout() + .stderr_contains("invalid floating point argument: '1e2.3'") + .stderr_contains("for more information."); + new_ucmd!() + .args(&["1", "1e2.3", "3"]) + .fails() + .no_stdout() + .stderr_contains("invalid floating point argument: '1e2.3'") + .stderr_contains("for more information."); + new_ucmd!() + .args(&["1", "2", "1e2.3"]) + .fails() + .no_stdout() + .stderr_contains("invalid floating point argument: '1e2.3'") + .stderr_contains("for more information."); +} + +#[test] +fn test_width_invalid_float() { + new_ucmd!() + .args(&["-w", "1e2.3"]) + .fails() + .no_stdout() + .stderr_contains("invalid floating point argument: '1e2.3'") + .stderr_contains("for more information."); +} + // ---- Tests for the big integer based path ---- #[test] @@ -178,6 +228,90 @@ fn test_width_negative_zero() { .no_stderr(); } +#[test] +fn test_width_negative_zero_scientific_notation() { + new_ucmd!() + .args(&["-w", "-0e0", "1"]) + .succeeds() + .stdout_is("-0\n01\n") + .no_stderr(); + + new_ucmd!() + .args(&["-w", "-0e+1", "1"]) + .succeeds() + .stdout_is("-00\n001\n") + .no_stderr(); + + new_ucmd!() + .args(&["-w", "-0.000e0", "1"]) + .succeeds() + .stdout_is("-0.000\n01.000\n") + .no_stderr(); + + new_ucmd!() + .args(&["-w", "-0.000e-2", "1"]) + .succeeds() + .stdout_is("-0.00000\n01.00000\n") + .no_stderr(); + + new_ucmd!() + .args(&["-w", "-0.000e5", "1"]) + .succeeds() + .stdout_is("-000000\n0000001\n") + .no_stderr(); + + new_ucmd!() + .args(&["-w", "-0.000e5", "1"]) + .succeeds() + .stdout_is("-000000\n0000001\n") + .no_stderr(); +} + +#[test] +fn test_width_decimal_scientific_notation_increment() { + new_ucmd!() + .args(&["-w", ".1", "1e-2", ".11"]) + .succeeds() + .stdout_is("0.10\n0.11\n") + .no_stderr(); + + new_ucmd!() + .args(&["-w", ".0", "1.500e-1", ".2"]) + .succeeds() + .stdout_is("0.0000\n0.1500\n") + .no_stderr(); +} + +/// Test that trailing zeros in the start argument contribute to precision. +#[test] +fn test_width_decimal_scientific_notation_trailing_zeros_start() { + new_ucmd!() + .args(&["-w", ".1000", "1e-2", ".11"]) + .succeeds() + .stdout_is("0.1000\n0.1100\n") + .no_stderr(); +} + +/// Test that trailing zeros in the increment argument contribute to precision. +#[test] +fn test_width_decimal_scientific_notation_trailing_zeros_increment() { + new_ucmd!() + .args(&["-w", "1e-1", "0.0100", ".11"]) + .succeeds() + .stdout_is("0.1000\n0.1100\n") + .no_stderr(); +} + +/// Test that trailing zeros in the end argument do not contribute to width. +#[test] +fn test_width_decimal_scientific_notation_trailing_zeros_end() { + new_ucmd!() + .args(&["-w", "1e-1", "1e-2", ".1100"]) + .succeeds() + .stdout_is("0.10\n0.11\n") + .no_stderr(); +} + // TODO This is duplicated from `test_yes.rs`; refactor them. /// Run `seq`, capture some of the output, close the pipe, and verify it. fn run(args: &[&str], expected: &[u8]) {