From 81911f9f6a799356bfdbf248bf4204bc0c119c55 Mon Sep 17 00:00:00 2001 From: Dan Hipschman <48698358+dan-hipschman@users.noreply.github.com> Date: Mon, 7 Apr 2025 11:45:42 -0700 Subject: [PATCH] numfmt: add --zero-terminated option --- src/uu/numfmt/src/format.rs | 10 +++++++- src/uu/numfmt/src/numfmt.rs | 37 ++++++++++++++++++++++++++--- src/uu/numfmt/src/options.rs | 2 ++ tests/by-util/test_numfmt.rs | 45 ++++++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 4 deletions(-) diff --git a/src/uu/numfmt/src/format.rs b/src/uu/numfmt/src/format.rs index 7221440cb..20ffa7a7b 100644 --- a/src/uu/numfmt/src/format.rs +++ b/src/uu/numfmt/src/format.rs @@ -392,12 +392,20 @@ fn format_and_print_whitespace(s: &str, options: &NumfmtOptions) -> Result<()> { print!("{}", format_string(field, options, implicit_padding)?); } else { + // the -z option converts an initial \n into a space + let prefix = if options.zero_terminated && prefix.starts_with('\n') { + print!(" "); + &prefix[1..] + } else { + prefix + }; // print unselected field without conversion print!("{prefix}{field}"); } } - println!(); + let eol = if options.zero_terminated { '\0' } else { '\n' }; + print!("{}", eol); Ok(()) } diff --git a/src/uu/numfmt/src/numfmt.rs b/src/uu/numfmt/src/numfmt.rs index b024e99b7..945b5b1ed 100644 --- a/src/uu/numfmt/src/numfmt.rs +++ b/src/uu/numfmt/src/numfmt.rs @@ -8,7 +8,8 @@ use crate::format::format_and_print; use crate::options::*; use crate::units::{Result, Unit}; use clap::{Arg, ArgAction, ArgMatches, Command, parser::ValueSource}; -use std::io::{BufRead, Write}; +use std::io::{BufRead, Error, Write}; +use std::result::Result as StdResult; use std::str::FromStr; use units::{IEC_BASES, SI_BASES}; @@ -38,10 +39,29 @@ fn handle_buffer(input: R, options: &NumfmtOptions) -> UResult<()> where R: BufRead, { - for (idx, line_result) in input.lines().by_ref().enumerate() { + if options.zero_terminated { + handle_buffer_iterator( + input + .split(0) + // FIXME: This panics on UTF8 decoding, but this util in general doesn't handle + // invalid UTF8 + .map(|bytes| Ok(String::from_utf8(bytes?).unwrap())), + options, + ) + } else { + handle_buffer_iterator(input.lines(), options) + } +} + +fn handle_buffer_iterator( + iter: impl Iterator>, + options: &NumfmtOptions, +) -> UResult<()> { + let eol = if options.zero_terminated { '\0' } else { '\n' }; + for (idx, line_result) in iter.enumerate() { match line_result { Ok(line) if idx < options.header => { - println!("{line}"); + print!("{line}{eol}"); Ok(()) } Ok(line) => format_and_handle_validation(line.as_ref(), options), @@ -217,6 +237,8 @@ fn parse_options(args: &ArgMatches) -> Result { let invalid = InvalidModes::from_str(args.get_one::(options::INVALID).unwrap()).unwrap(); + let zero_terminated = args.get_flag(options::ZERO_TERMINATED); + Ok(NumfmtOptions { transform, padding, @@ -227,6 +249,7 @@ fn parse_options(args: &ArgMatches) -> Result { suffix, format, invalid, + zero_terminated, }) } @@ -366,6 +389,13 @@ pub fn uu_app() -> Command { .value_parser(["abort", "fail", "warn", "ignore"]) .value_name("INVALID"), ) + .arg( + Arg::new(options::ZERO_TERMINATED) + .long(options::ZERO_TERMINATED) + .short('z') + .help("line delimiter is NUL, not newline") + .action(ArgAction::SetTrue), + ) .arg( Arg::new(options::NUMBER) .hide(true) @@ -406,6 +436,7 @@ mod tests { suffix: None, format: FormatOptions::default(), invalid: InvalidModes::Abort, + zero_terminated: false, } } diff --git a/src/uu/numfmt/src/options.rs b/src/uu/numfmt/src/options.rs index c61be0b70..72cfe2269 100644 --- a/src/uu/numfmt/src/options.rs +++ b/src/uu/numfmt/src/options.rs @@ -26,6 +26,7 @@ pub const TO: &str = "to"; pub const TO_DEFAULT: &str = "none"; pub const TO_UNIT: &str = "to-unit"; pub const TO_UNIT_DEFAULT: &str = "1"; +pub const ZERO_TERMINATED: &str = "zero-terminated"; pub struct TransformOptions { pub from: Unit, @@ -52,6 +53,7 @@ pub struct NumfmtOptions { pub suffix: Option, pub format: FormatOptions, pub invalid: InvalidModes, + pub zero_terminated: bool, } #[derive(Clone, Copy)] diff --git a/tests/by-util/test_numfmt.rs b/tests/by-util/test_numfmt.rs index 21b327043..806e29d9a 100644 --- a/tests/by-util/test_numfmt.rs +++ b/tests/by-util/test_numfmt.rs @@ -1073,3 +1073,48 @@ fn test_format_grouping_conflicts_with_to_option() { .fails_with_code(1) .stderr_contains("grouping cannot be combined with --to"); } + +#[test] +fn test_zero_terminated_command_line_args() { + new_ucmd!() + .args(&["--zero-terminated", "--to=si", "1000"]) + .succeeds() + .stdout_is("1.0k\x00"); + + new_ucmd!() + .args(&["-z", "--to=si", "1000"]) + .succeeds() + .stdout_is("1.0k\x00"); + + new_ucmd!() + .args(&["-z", "--to=si", "1000", "2000"]) + .succeeds() + .stdout_is("1.0k\x002.0k\x00"); +} + +#[test] +fn test_zero_terminated_input() { + let values = vec![ + ("1000", "1.0k\x00"), + ("1000\x00", "1.0k\x00"), + ("1000\x002000\x00", "1.0k\x002.0k\x00"), + ]; + + for (input, expected) in values { + new_ucmd!() + .args(&["-z", "--to=si"]) + .pipe_in(input) + .succeeds() + .stdout_is(expected); + } +} + +#[test] +fn test_zero_terminated_embedded_newline() { + new_ucmd!() + .args(&["-z", "--from=si", "--field=-"]) + .pipe_in("1K\n2K\x003K\n4K\x00") + .succeeds() + // Newlines get replaced by a single space + .stdout_is("1000 2000\x003000 4000\x00"); +}