From 2afa9cd1a0ee42bf6ba85c7dc77590f37c39a41c Mon Sep 17 00:00:00 2001 From: Eli Youngs Date: Sun, 6 Feb 2022 14:26:59 -0800 Subject: [PATCH 1/4] printf - Update %g formatting to match GNU --- .../tokenize/num_format/formatters/decf.rs | 123 +++++++++++++----- tests/by-util/test_printf.rs | 66 +++++++++- 2 files changed, 156 insertions(+), 33 deletions(-) diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs index 52b8515c9..3d420ecdb 100644 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs +++ b/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs @@ -5,21 +5,88 @@ use super::super::format_field::FormatField; use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis}; -fn get_len_fmt_primitive(fmt: &FormatPrimitive) -> usize { - let mut len = 0; - if let Some(ref s) = fmt.prefix { - len += s.len(); +const SIGNIFICANT_FIGURES: usize = 6; + +fn round_to_significance(input: &str, significant_figures: usize) -> u32 { + if significant_figures < input.len() { + let digits = &input[..significant_figures + 1]; + let float_representation = digits.parse::().unwrap(); + (float_representation / 10.0).round() as u32 + } else { + input.parse::().unwrap_or(0) } - if let Some(ref s) = fmt.pre_decimal { - len += s.len(); +} + +fn round(mut format: FormatPrimitive) -> FormatPrimitive { + let mut significant_figures = SIGNIFICANT_FIGURES; + + if format.pre_decimal.is_some() { + let input = format.pre_decimal.as_ref().unwrap(); + let rounded = round_to_significance(input, significant_figures); + let mut rounded_str = rounded.to_string(); + significant_figures -= rounded_str.len(); + + if significant_figures == 0 { + if let Some(digits) = &format.post_decimal { + if digits.chars().next().unwrap_or('0') >= '5' { + let rounded = rounded + 1; + rounded_str = rounded.to_string(); + } + } + } + format.pre_decimal = Some(rounded_str); } - if let Some(ref s) = fmt.post_decimal { - len += s.len(); + + if significant_figures == 0 { + format.post_decimal = Some(String::new()); + } else if let Some(input) = format.post_decimal { + let leading_zeroes = input.len() - input.trim_start_matches('0').len(); + + let rounded_str = if leading_zeroes <= significant_figures { + let mut post_decimal = String::with_capacity(significant_figures); + for _ in 0..leading_zeroes { + post_decimal.push('0'); + } + + significant_figures -= leading_zeroes; + let rounded = round_to_significance(&input[leading_zeroes..], significant_figures); + post_decimal.push_str(&rounded.to_string()); + post_decimal + } else { + input[..significant_figures].to_string() + }; + format.post_decimal = Some(rounded_str); } - if let Some(ref s) = fmt.suffix { - len += s.len(); + format +} + +fn truncate(mut format: FormatPrimitive) -> FormatPrimitive { + if let Some(ref post_dec) = format.post_decimal { + let trimmed = post_dec.trim_end_matches('0'); + + if trimmed.is_empty() { + format.post_decimal = Some("".into()); + if format.suffix == Some("e+00".into()) { + format.suffix = Some("".into()); + } + } else if trimmed.len() != post_dec.len() { + format.post_decimal = Some(trimmed.to_owned()); + } + } + format +} + +fn is_float_magnitude(suffix: &Option) -> bool { + match suffix { + Some(exponent) => { + if exponent.chars().nth(1) == Some('-') { + exponent < &"e-05".into() + } else { + exponent < &"e+06".into() + } + } + None => true, } - len } pub struct Decf; @@ -46,34 +113,26 @@ impl Formatter for Decf { None, false, ); - let mut f_sci = get_primitive_dec( + let mut f_dec = get_primitive_dec( initial_prefix, &str_in[initial_prefix.offset..], &analysis, second_field as usize, Some(*field.field_char == 'G'), ); - // strip trailing zeroes - if let Some(ref post_dec) = f_sci.post_decimal { - let trimmed = post_dec.trim_end_matches('0'); - if trimmed.len() != post_dec.len() { - f_sci.post_decimal = Some(trimmed.to_owned()); - } + + if is_float_magnitude(&f_dec.suffix) { + f_dec = get_primitive_dec( + initial_prefix, + &str_in[initial_prefix.offset..], + &analysis, + second_field as usize, + None, + ); } - let f_fl = get_primitive_dec( - initial_prefix, - &str_in[initial_prefix.offset..], - &analysis, - second_field as usize, - None, - ); - Some( - if get_len_fmt_primitive(&f_fl) >= get_len_fmt_primitive(&f_sci) { - f_sci - } else { - f_fl - }, - ) + + f_dec = truncate(round(f_dec)); + Some(f_dec) } fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { primitive_to_str_common(prim, &field) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 72ca4535a..0f2ad9ce3 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -289,7 +289,7 @@ fn sub_num_dec_trunc() { new_ucmd!() .args(&["pi is ~ %g", "3.1415926535"]) .succeeds() - .stdout_only("pi is ~ 3.141593"); + .stdout_only("pi is ~ 3.14159"); } #[cfg_attr(not(feature = "test_unimplemented"), ignore)] @@ -469,3 +469,67 @@ fn sub_float_leading_zeroes() { .succeeds() .stdout_only("001.000000"); } + +#[test] +fn sub_general_float() { + new_ucmd!() + .args(&["%g", "1.1"]) + .succeeds() + .stdout_only("1.1"); +} + +#[test] +fn sub_general_truncate_to_integer() { + new_ucmd!() + .args(&["%g", "1.0"]) + .succeeds() + .stdout_only("1"); +} + +#[test] +fn sub_general_prefix_with_spaces() { + new_ucmd!() + .args(&["%5g", "1.1"]) + .succeeds() + .stdout_only(" 1.1"); +} + +#[test] +fn sub_general_large_integer() { + new_ucmd!() + .args(&["%g", "1000000"]) + .succeeds() + .stdout_only("1e+06"); +} + +#[test] +fn sub_general_round_scientific_notation() { + new_ucmd!() + .args(&["%g", "123456789"]) + .succeeds() + .stdout_only("1.23457e+08"); +} + +#[test] +fn sub_general_scientific_leading_zeroes() { + new_ucmd!() + .args(&["%g", "1000010"]) + .succeeds() + .stdout_only("1.00001e+06"); +} + +#[test] +fn sub_general_round_float() { + new_ucmd!() + .args(&["%g", "12345.6789"]) + .succeeds() + .stdout_only("12345.7"); +} + +#[test] +fn sub_general_round_float_to_integer() { + new_ucmd!() + .args(&["%g", "123456.7"]) + .succeeds() + .stdout_only("123457"); +} From e23219289aac55bad22808676861dab41720c96b Mon Sep 17 00:00:00 2001 From: Eli Youngs Date: Sun, 6 Feb 2022 14:52:20 -0800 Subject: [PATCH 2/4] Simplify test cases --- tests/by-util/test_printf.rs | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 0f2ad9ce3..4f1b52b32 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -487,19 +487,11 @@ fn sub_general_truncate_to_integer() { } #[test] -fn sub_general_prefix_with_spaces() { +fn sub_general_scientific_notation() { new_ucmd!() - .args(&["%5g", "1.1"]) + .args(&["%g", "1000010"]) .succeeds() - .stdout_only(" 1.1"); -} - -#[test] -fn sub_general_large_integer() { - new_ucmd!() - .args(&["%g", "1000000"]) - .succeeds() - .stdout_only("1e+06"); + .stdout_only("1.00001e+06"); } #[test] @@ -510,14 +502,6 @@ fn sub_general_round_scientific_notation() { .stdout_only("1.23457e+08"); } -#[test] -fn sub_general_scientific_leading_zeroes() { - new_ucmd!() - .args(&["%g", "1000010"]) - .succeeds() - .stdout_only("1.00001e+06"); -} - #[test] fn sub_general_round_float() { new_ucmd!() From 565af8237b17c4c658f3b965c0a28b414b0de9d3 Mon Sep 17 00:00:00 2001 From: Eli Youngs Date: Sun, 6 Feb 2022 23:57:54 -0800 Subject: [PATCH 3/4] Fix formatting --- tests/by-util/test_printf.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 4f1b52b32..cb7126b9b 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -480,10 +480,7 @@ fn sub_general_float() { #[test] fn sub_general_truncate_to_integer() { - new_ucmd!() - .args(&["%g", "1.0"]) - .succeeds() - .stdout_only("1"); + new_ucmd!().args(&["%g", "1.0"]).succeeds().stdout_only("1"); } #[test] From a0bd88b51b80a235ddfe26c7fd5f3ceac0e22042 Mon Sep 17 00:00:00 2001 From: Eli Youngs Date: Sat, 19 Mar 2022 14:47:14 -0700 Subject: [PATCH 4/4] Add comments and an additional test --- .../tokenize/num_format/formatters/decf.rs | 125 ++++++++++++------ tests/by-util/test_printf.rs | 8 ++ 2 files changed, 93 insertions(+), 40 deletions(-) diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs index 3d420ecdb..07e1b3f47 100644 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs +++ b/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs @@ -7,8 +7,18 @@ use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnaly const SIGNIFICANT_FIGURES: usize = 6; +// Parse a numeric string as the nearest integer with a given significance. +// This is a helper function for round(). +// Examples: +// round_to_significance("456", 1) == 500 +// round_to_significance("456", 2) == 460 +// round_to_significance("456", 9) == 456 fn round_to_significance(input: &str, significant_figures: usize) -> u32 { if significant_figures < input.len() { + // If the input has too many digits, use a float intermediary + // to round it before converting to an integer. Otherwise, + // converting straight to integer will truncate. + // There might be a cleaner way to do this... let digits = &input[..significant_figures + 1]; let float_representation = digits.parse::().unwrap(); (float_representation / 10.0).round() as u32 @@ -17,16 +27,45 @@ fn round_to_significance(input: &str, significant_figures: usize) -> u32 { } } -fn round(mut format: FormatPrimitive) -> FormatPrimitive { - let mut significant_figures = SIGNIFICANT_FIGURES; +// Removing trailing zeroes, expressing the result as an integer where +// possible. This is a helper function for round(). +fn truncate(mut format: FormatPrimitive) -> FormatPrimitive { + if let Some(ref post_dec) = format.post_decimal { + let trimmed = post_dec.trim_end_matches('0'); + if trimmed.is_empty() { + // If there are no nonzero digits after the decimal point, + // use integer formatting by clearing post_decimal and suffix. + format.post_decimal = Some("".into()); + if format.suffix == Some("e+00".into()) { + format.suffix = Some("".into()); + } + } else if trimmed.len() != post_dec.len() { + // Otherwise, update the format to remove only the trailing + // zeroes (e.g. "4.50" becomes "4.5", not "4"). If there were + // no trailing zeroes, do nothing. + format.post_decimal = Some(trimmed.to_owned()); + } + } + format +} + +// Round a format to six significant figures and remove trailing zeroes. +fn round(mut format: FormatPrimitive) -> FormatPrimitive { + let mut significant_digits_remaining = SIGNIFICANT_FIGURES; + + // First, take as many significant digits as possible from pre_decimal, if format.pre_decimal.is_some() { let input = format.pre_decimal.as_ref().unwrap(); - let rounded = round_to_significance(input, significant_figures); + let rounded = round_to_significance(input, significant_digits_remaining); let mut rounded_str = rounded.to_string(); - significant_figures -= rounded_str.len(); + significant_digits_remaining -= rounded_str.len(); - if significant_figures == 0 { + // If the pre_decimal has exactly enough significant digits, + // round the input to the nearest integer. If the first + // post_decimal digit is 5 or higher, round up by incrementing + // the pre_decimal number. Otherwise, use the pre_decimal as-is. + if significant_digits_remaining == 0 { if let Some(digits) = &format.post_decimal { if digits.chars().next().unwrap_or('0') >= '5' { let rounded = rounded + 1; @@ -37,46 +76,51 @@ fn round(mut format: FormatPrimitive) -> FormatPrimitive { format.pre_decimal = Some(rounded_str); } - if significant_figures == 0 { + // If no significant digits remain, or there's no post_decimal to + // round, return the rounded pre_decimal value with no post_decimal. + // Otherwise, round the post_decimal to the remaining significance. + if significant_digits_remaining == 0 { format.post_decimal = Some(String::new()); } else if let Some(input) = format.post_decimal { let leading_zeroes = input.len() - input.trim_start_matches('0').len(); + let digits = &input[leading_zeroes..]; - let rounded_str = if leading_zeroes <= significant_figures { - let mut post_decimal = String::with_capacity(significant_figures); - for _ in 0..leading_zeroes { - post_decimal.push('0'); - } - - significant_figures -= leading_zeroes; - let rounded = round_to_significance(&input[leading_zeroes..], significant_figures); - post_decimal.push_str(&rounded.to_string()); - post_decimal - } else { - input[..significant_figures].to_string() - }; - format.post_decimal = Some(rounded_str); - } - format -} - -fn truncate(mut format: FormatPrimitive) -> FormatPrimitive { - if let Some(ref post_dec) = format.post_decimal { - let trimmed = post_dec.trim_end_matches('0'); - - if trimmed.is_empty() { - format.post_decimal = Some("".into()); - if format.suffix == Some("e+00".into()) { - format.suffix = Some("".into()); - } - } else if trimmed.len() != post_dec.len() { - format.post_decimal = Some(trimmed.to_owned()); + // In the post_decimal, leading zeroes are significant. "01.0010" + // has one significant digit in pre_decimal, and 3 from post_decimal. + let mut post_decimal_str = String::with_capacity(significant_digits_remaining); + for _ in 0..leading_zeroes { + post_decimal_str.push('0'); } + + if leading_zeroes < significant_digits_remaining { + // After significant leading zeroes, round the remaining digits + // to any remaining significance. + let rounded = round_to_significance(digits, significant_digits_remaining); + post_decimal_str.push_str(&rounded.to_string()); + } else if leading_zeroes == significant_digits_remaining + && digits.chars().next().unwrap_or('0') >= '5' + { + // If necessary, round up the post_decimal ("1.000009" should + // round to 1.00001, instead of truncating after the last + // significant leading zero). + post_decimal_str.pop(); + post_decimal_str.push('1'); + } else { + // If the rounded post_decimal is entirely zeroes, discard + // it and use integer formatting instead. + post_decimal_str = "".into(); + } + + format.post_decimal = Some(post_decimal_str); } - format + truncate(format) } -fn is_float_magnitude(suffix: &Option) -> bool { +// Given an exponent used in scientific notation, return whether the +// number is small enough to be expressed as a decimal instead. "Small +// enough" is based only on the number's magnitude, not the length of +// any string representation. +fn should_represent_as_decimal(suffix: &Option) -> bool { match suffix { Some(exponent) => { if exponent.chars().nth(1) == Some('-') { @@ -121,7 +165,9 @@ impl Formatter for Decf { Some(*field.field_char == 'G'), ); - if is_float_magnitude(&f_dec.suffix) { + if should_represent_as_decimal(&f_dec.suffix) { + // Use decimal formatting instead of scientific notation + // if the input's magnitude is small. f_dec = get_primitive_dec( initial_prefix, &str_in[initial_prefix.offset..], @@ -131,8 +177,7 @@ impl Formatter for Decf { ); } - f_dec = truncate(round(f_dec)); - Some(f_dec) + Some(round(f_dec)) } fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { primitive_to_str_common(prim, &field) diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index cb7126b9b..d5be4cd17 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -514,3 +514,11 @@ fn sub_general_round_float_to_integer() { .succeeds() .stdout_only("123457"); } + +#[test] +fn sub_general_round_float_leading_zeroes() { + new_ucmd!() + .args(&["%g", "1.000009"]) + .succeeds() + .stdout_only("1.00001"); +}