From 4ef35d4a96260f7a45fa5cc4ba3a61d47136e610 Mon Sep 17 00:00:00 2001 From: jfinkels Date: Sun, 22 Aug 2021 15:01:17 -0400 Subject: [PATCH] tac: correct behavior of -b option (#2523) * tac: correct behavior of -b option Correct the behavior of `tac -b` to match that of GNU coreutils `tac`. Specifically, this changes `tac -b` to assume *leading* line separators instead of the default *trailing* line separators. Before this commit, the (incorrect) behavior was $ printf "/abc/def" | tac -b -s "/" def/abc/ After this commit, the behavior is $ printf "/abc/def" | tac -b -s "/" /def/abc Fixes #2262. * fixup! tac: correct behavior of -b option * fixup! tac: correct behavior of -b option Co-authored-by: Sylvestre Ledru --- src/uu/tac/src/tac.rs | 104 ++++++++---------- tests/by-util/test_tac.rs | 53 ++++++++- .../tac/delimited_primes_before.expected | 2 +- 3 files changed, 96 insertions(+), 63 deletions(-) diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index 67b361a76..4e1b38687 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -5,13 +5,13 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore (ToDO) sbytes slen +// spell-checker:ignore (ToDO) sbytes slen dlen #[macro_use] extern crate uucore; use clap::{crate_version, App, Arg}; -use std::io::{stdin, stdout, BufReader, Read, Stdout, Write}; +use std::io::{stdin, stdout, BufReader, Read, Write}; use std::{fs::File, path::Path}; use uucore::InvalidEncodingHandling; @@ -80,12 +80,49 @@ pub fn uu_app() -> App<'static, 'static> { .arg(Arg::with_name(options::FILE).hidden(true).multiple(true)) } -fn tac(filenames: Vec, before: bool, _: bool, separator: &str) -> i32 { - let mut exit_code = 0; +fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()> { let mut out = stdout(); + + // Convert the line separator to a byte sequence. let sbytes = separator.as_bytes(); let slen = sbytes.len(); + // If there are more characters in the separator than in the data, + // we can't possibly split the data on the separator. Write the + // entire buffer to stdout. + let dlen = data.len(); + if dlen < slen { + return out.write_all(data); + } + + // Iterate over each byte in the buffer in reverse. When we find a + // line separator, write the line to stdout. + // + // The `before` flag controls whether the line separator appears at + // the end of the line (as in "abc\ndef\n") or at the beginning of + // the line (as in "/abc/def"). + let mut following_line_start = data.len(); + for i in (0..dlen - slen + 1).rev() { + if &data[i..i + slen] == sbytes { + if before { + out.write_all(&data[i..following_line_start])?; + following_line_start = i; + } else { + out.write_all(&data[i + slen..following_line_start])?; + following_line_start = i + slen; + } + } + } + + // After the loop terminates, write whatever bytes are remaining at + // the beginning of the buffer. + out.write_all(&data[0..following_line_start])?; + Ok(()) +} + +fn tac(filenames: Vec, before: bool, _: bool, separator: &str) -> i32 { + let mut exit_code = 0; + for filename in &filenames { let mut file = BufReader::new(if filename == "-" { Box::new(stdin()) as Box @@ -120,63 +157,8 @@ fn tac(filenames: Vec, before: bool, _: bool, separator: &str) -> i32 { continue; }; - // find offsets in string of all separators - let mut offsets = Vec::new(); - let mut i = 0; - loop { - if i + slen > data.len() { - break; - } - - if &data[i..i + slen] == sbytes { - offsets.push(i); - i += slen; - } else { - i += 1; - } - } - // If the file contains no line separators, then simply write - // the contents of the file directly to stdout. - if offsets.is_empty() { - out.write_all(&data) - .unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e)); - return exit_code; - } - - // if there isn't a separator at the end of the file, fake it - if *offsets.last().unwrap() < data.len() - slen { - offsets.push(data.len()); - } - - let mut prev = *offsets.last().unwrap(); - let mut start = true; - for off in offsets.iter().rev().skip(1) { - // correctly handle case of no final separator in file - if start && prev == data.len() { - show_line(&mut out, &[], &data[*off + slen..prev], before); - start = false; - } else { - show_line(&mut out, sbytes, &data[*off + slen..prev], before); - } - prev = *off; - } - show_line(&mut out, sbytes, &data[0..prev], before); + buffer_tac(&data, before, separator) + .unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e)); } - exit_code } - -fn show_line(out: &mut Stdout, sep: &[u8], dat: &[u8], before: bool) { - if before { - out.write_all(sep) - .unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e)); - } - - out.write_all(dat) - .unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e)); - - if !before { - out.write_all(sep) - .unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e)); - } -} diff --git a/tests/by-util/test_tac.rs b/tests/by-util/test_tac.rs index 599bc19c7..c6e32fef0 100644 --- a/tests/by-util/test_tac.rs +++ b/tests/by-util/test_tac.rs @@ -1,3 +1,4 @@ +// spell-checker:ignore axxbxx bxxaxx use crate::common::util::*; #[test] @@ -23,7 +24,7 @@ fn test_stdin_non_newline_separator_before() { .args(&["-b", "-s", ":"]) .pipe_in("100:200:300:400:500") .run() - .stdout_is("500:400:300:200:100"); + .stdout_is(":500:400:300:200100"); } #[test] @@ -74,6 +75,56 @@ fn test_no_line_separators() { new_ucmd!().pipe_in("a").succeeds().stdout_is("a"); } +#[test] +fn test_before_trailing_separator_no_leading_separator() { + new_ucmd!() + .arg("-b") + .pipe_in("a\nb\n") + .succeeds() + .stdout_is("\n\nba"); +} + +#[test] +fn test_before_trailing_separator_and_leading_separator() { + new_ucmd!() + .arg("-b") + .pipe_in("\na\nb\n") + .succeeds() + .stdout_is("\n\nb\na"); +} + +#[test] +fn test_before_leading_separator_no_trailing_separator() { + new_ucmd!() + .arg("-b") + .pipe_in("\na\nb") + .succeeds() + .stdout_is("\nb\na"); +} + +#[test] +fn test_before_no_separator() { + new_ucmd!() + .arg("-b") + .pipe_in("ab") + .succeeds() + .stdout_is("ab"); +} + +#[test] +fn test_before_empty_file() { + new_ucmd!().arg("-b").pipe_in("").succeeds().stdout_is(""); +} + +#[test] +fn test_multi_char_separator() { + new_ucmd!() + .args(&["-s", "xx"]) + .pipe_in("axxbxx") + .succeeds() + .stdout_is("bxxaxx"); +} + #[test] fn test_null_separator() { new_ucmd!() diff --git a/tests/fixtures/tac/delimited_primes_before.expected b/tests/fixtures/tac/delimited_primes_before.expected index 13cb1be06..1417a0150 100644 --- a/tests/fixtures/tac/delimited_primes_before.expected +++ b/tests/fixtures/tac/delimited_primes_before.expected @@ -1 +1 @@ -97:89:83:79:73:71:67:61:59:53:47:43:41:37:31:29:23:19:17:13:11:7:5:3:2 \ No newline at end of file +:97:89:83:79:73:71:67:61:59:53:47:43:41:37:31:29:23:19:17:13:11:7:5:32 \ No newline at end of file