1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

tac: support multi-char separator with overlap

Fix a bug in `tac` where multi-character line separators would cause
incorrect behavior when there was overlap between candidate matches in
the input string. This commit adds a dependency on `memchr` in order to
use the `memchr::memmem::rfind_iter()` function to scan for
non-overlapping instances of the specified line separator characters,
scanning from right to left.

Fixes #2580.
This commit is contained in:
Jeffrey Finkelstein 2021-08-22 16:21:08 -04:00
parent c77115ab51
commit 0e689e78aa
4 changed files with 107 additions and 22 deletions

View file

@ -1,4 +1,4 @@
// spell-checker:ignore axxbxx bxxaxx
// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa
use crate::common::util::*;
#[test]
@ -125,6 +125,78 @@ fn test_multi_char_separator() {
.stdout_is("bxxaxx");
}
#[test]
fn test_multi_char_separator_overlap() {
// The right-most pair of "x" characters in the input is treated as
// the only line separator. That is, "axxx" is interpreted as having
// one line comprising the string "ax" followed by the line
// separator "xx".
new_ucmd!()
.args(&["-s", "xx"])
.pipe_in("axxx")
.succeeds()
.stdout_is("axxx");
// Each non-overlapping pair of "x" characters in the input is
// treated as a line separator. That is, "axxxx" is interpreted as
// having two lines:
//
// * the second line is the empty string "" followed by the line
// separator "xx",
// * the first line is the string "a" followed by the line separator
// "xx".
//
// The lines are printed in reverse, resulting in "xx" followed by
// "axx".
new_ucmd!()
.args(&["-s", "xx"])
.pipe_in("axxxx")
.succeeds()
.stdout_is("xxaxx");
}
#[test]
fn test_multi_char_separator_overlap_before() {
// With the "-b" option, the line separator is assumed to be at the
// beginning of the line. In this case, That is, "axxx" is
// interpreted as having two lines:
//
// * the second line is the empty string "" preceded by the line
// separator "xx",
// * the first line is the string "ax" preceded by no line
// separator, since there are no more characters preceding it.
//
// The lines are printed in reverse, resulting in "xx" followed by
// "ax".
new_ucmd!()
.args(&["-b", "-s", "xx"])
.pipe_in("axxx")
.succeeds()
.stdout_is("xxax");
// With the "-b" option, the line separator is assumed to be at the
// beginning of the line. Each non-overlapping pair of "x"
// characters in the input is treated as a line separator. That is,
// "axxxx" is interpreted as having three lines:
//
// * the third line is the empty string "" preceded by the line
// separator "xx" (the last two "x" characters in the input
// string),
// * the second line is the empty string "" preceded by the line
// separator "xx" (the first two "x" characters in the input
// string),
// * the first line is the string "a" preceded by no line separator,
// since there are no more characters preceding it.
//
// The lines are printed in reverse, resulting in "xx" followed by
// "xx" followed by "a".
new_ucmd!()
.args(&["-b", "-s", "xx"])
.pipe_in("axxxx")
.succeeds()
.stdout_is("xxxxa");
}
#[test]
fn test_null_separator() {
new_ucmd!()