diff --git a/src/uu/nl/src/helper.rs b/src/uu/nl/src/helper.rs index fe550e6a0..ae14a6d59 100644 --- a/src/uu/nl/src/helper.rs +++ b/src/uu/nl/src/helper.rs @@ -13,6 +13,15 @@ pub fn parse_options(settings: &mut crate::Settings, opts: &clap::ArgMatches) -> // This vector holds error messages encountered. let mut errs: Vec = vec![]; settings.renumber = opts.get_flag(options::NO_RENUMBER); + if let Some(delimiter) = opts.get_one::(options::SECTION_DELIMITER) { + // check whether the delimiter is a single ASCII char (1 byte) + // because GNU nl doesn't add a ':' to single non-ASCII chars + settings.section_delimiter = if delimiter.len() == 1 { + format!("{delimiter}:") + } else { + delimiter.to_owned() + }; + } if let Some(val) = opts.get_one::(options::NUMBER_SEPARATOR) { settings.number_separator = val.to_owned(); } diff --git a/src/uu/nl/src/nl.rs b/src/uu/nl/src/nl.rs index 6e1cb6835..ea37e00dc 100644 --- a/src/uu/nl/src/nl.rs +++ b/src/uu/nl/src/nl.rs @@ -23,7 +23,7 @@ pub struct Settings { body_numbering: NumberingStyle, footer_numbering: NumberingStyle, // The variable corresponding to -d - section_delimiter: [char; 2], + section_delimiter: String, // The variables corresponding to the options -v, -i, -l, -w. starting_line_number: i64, line_increment: i64, @@ -43,7 +43,7 @@ impl Default for Settings { header_numbering: NumberingStyle::None, body_numbering: NumberingStyle::NonEmpty, footer_numbering: NumberingStyle::None, - section_delimiter: ['\\', ':'], + section_delimiter: String::from("\\:"), starting_line_number: 1, line_increment: 1, join_blank_lines: 1, @@ -134,6 +134,32 @@ impl NumberFormat { } } +enum SectionDelimiter { + Header, + Body, + Footer, +} + +impl SectionDelimiter { + // A valid section delimiter contains the pattern one to three times, + // and nothing else. + fn parse(s: &str, pattern: &str) -> Option { + if s.is_empty() || pattern.is_empty() { + return None; + } + + let pattern_count = s.matches(pattern).count(); + let is_length_ok = pattern_count * pattern.len() == s.len(); + + match (pattern_count, is_length_ok) { + (3, true) => Some(Self::Header), + (2, true) => Some(Self::Body), + (1, true) => Some(Self::Footer), + _ => None, + } + } +} + pub mod options { pub const HELP: &str = "help"; pub const FILE: &str = "file"; @@ -307,14 +333,12 @@ fn nl(reader: &mut BufReader, stats: &mut Stats, settings: &Settings stats.consecutive_empty_lines = 0; }; - // FIXME section delimiters are hardcoded and settings.section_delimiter is ignored - // because --section-delimiter is not correctly implemented yet - let _ = settings.section_delimiter; // XXX suppress "field never read" warning - let new_numbering_style = match line.as_str() { - "\\:\\:\\:" => Some(&settings.header_numbering), - "\\:\\:" => Some(&settings.body_numbering), - "\\:" => Some(&settings.footer_numbering), - _ => None, + let new_numbering_style = match SectionDelimiter::parse(&line, &settings.section_delimiter) + { + Some(SectionDelimiter::Header) => Some(&settings.header_numbering), + Some(SectionDelimiter::Body) => Some(&settings.body_numbering), + Some(SectionDelimiter::Footer) => Some(&settings.footer_numbering), + None => None, }; if let Some(new_style) = new_numbering_style { diff --git a/tests/by-util/test_nl.rs b/tests/by-util/test_nl.rs index 118c4cf04..87f218166 100644 --- a/tests/by-util/test_nl.rs +++ b/tests/by-util/test_nl.rs @@ -2,7 +2,8 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid ninvalid vinvalid winvalid +// +// spell-checker:ignore binvalid finvalid hinvalid iinvalid linvalid nabcabc nabcabcabc ninvalid vinvalid winvalid use crate::common::util::TestScenario; #[test] @@ -537,3 +538,83 @@ fn test_line_number_overflow() { .stdout_is(format!("{}\ta\n", i64::MIN)) .stderr_is("nl: line number overflow\n"); } + +#[test] +fn test_section_delimiter() { + for arg in ["-dabc", "--section-delimiter=abc"] { + new_ucmd!() + .arg(arg) + .pipe_in("a\nabcabcabc\nb") // header section + .succeeds() + .stdout_is(" 1\ta\n\n b\n"); + + new_ucmd!() + .arg(arg) + .pipe_in("a\nabcabc\nb") // body section + .succeeds() + .stdout_is(" 1\ta\n\n 1\tb\n"); + + new_ucmd!() + .arg(arg) + .pipe_in("a\nabc\nb") // footer section + .succeeds() + .stdout_is(" 1\ta\n\n b\n"); + } +} + +#[test] +fn test_one_char_section_delimiter_expansion() { + for arg in ["-da", "--section-delimiter=a"] { + new_ucmd!() + .arg(arg) + .pipe_in("a\na:a:a:\nb") // header section + .succeeds() + .stdout_is(" 1\ta\n\n b\n"); + + new_ucmd!() + .arg(arg) + .pipe_in("a\na:a:\nb") // body section + .succeeds() + .stdout_is(" 1\ta\n\n 1\tb\n"); + + new_ucmd!() + .arg(arg) + .pipe_in("a\na:\nb") // footer section + .succeeds() + .stdout_is(" 1\ta\n\n b\n"); + } +} + +#[test] +fn test_non_ascii_one_char_section_delimiter() { + for arg in ["-dä", "--section-delimiter=ä"] { + new_ucmd!() + .arg(arg) + .pipe_in("a\näää\nb") // header section + .succeeds() + .stdout_is(" 1\ta\n\n b\n"); + + new_ucmd!() + .arg(arg) + .pipe_in("a\nää\nb") // body section + .succeeds() + .stdout_is(" 1\ta\n\n 1\tb\n"); + + new_ucmd!() + .arg(arg) + .pipe_in("a\nä\nb") // footer section + .succeeds() + .stdout_is(" 1\ta\n\n b\n"); + } +} + +#[test] +fn test_empty_section_delimiter() { + for arg in ["-d ''", "--section-delimiter=''"] { + new_ucmd!() + .arg(arg) + .pipe_in("a\n\nb") + .succeeds() + .stdout_is(" 1\ta\n \n 2\tb\n"); + } +}