1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

paste: permit the delimiter list to be empty (#6714)

* paste: permit the delimiter list to be empty

Also: refactored the delimiter processing logic

* Extract duplicated code into function

* Address PR comments. Improve code structure.

* Fix additional paste bugs

* Fix additional paste bugs

* Simplify backslash delimiter validation

* Fix Clippy violations
This commit is contained in:
Andrew Liebenow 2024-10-10 08:36:30 -05:00 committed by GitHub
parent 7c3a9380f1
commit c41c601b45
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 426 additions and 101 deletions

View file

@ -2,6 +2,9 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore bsdutils toybox
use crate::common::util::TestScenario;
struct TestData<'b> {
@ -11,7 +14,7 @@ struct TestData<'b> {
out: &'b str,
}
static EXAMPLE_DATA: &[TestData] = &[
const EXAMPLE_DATA: &[TestData] = &[
// Ensure that paste properly handles files lacking a final newline.
TestData {
name: "no-nl-1",
@ -172,7 +175,7 @@ fn test_delimiter_list_ending_with_escaped_backslash() {
at.write(&file, one_in);
ins.push(file);
}
ucmd.args(&[d, "\\\\"])
ucmd.args(&[d, r"\\"])
.args(&ins)
.succeeds()
.stdout_is("a\\b\n");
@ -183,13 +186,174 @@ fn test_delimiter_list_ending_with_escaped_backslash() {
fn test_delimiter_list_ending_with_unescaped_backslash() {
for d in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[d, "\\"])
.args(&[d, r"\"])
.fails()
.stderr_contains("delimiter list ends with an unescaped backslash: \\");
.stderr_contains(r"delimiter list ends with an unescaped backslash: \");
new_ucmd!()
.args(&[d, "_\\"])
.args(&[d, r"\\\"])
.fails()
.stderr_contains("delimiter list ends with an unescaped backslash: _\\");
.stderr_contains(r"delimiter list ends with an unescaped backslash: \\\");
new_ucmd!()
.args(&[d, r"_\"])
.fails()
.stderr_contains(r"delimiter list ends with an unescaped backslash: _\");
}
}
#[test]
fn test_delimiter_list_empty() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, "", "-s"])
.pipe_in(
"\
A ALPHA 1 _
B BRAVO 2 _
C CHARLIE 3 _
",
)
.succeeds()
.stdout_only(
"\
A ALPHA 1 _B BRAVO 2 _C CHARLIE 3 _
",
);
}
}
// Was panicking (usize subtraction that would have resulted in a negative number)
// Not observable in release builds, since integer overflow checking is not enabled
#[test]
fn test_delimiter_truncation() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, "!@#", "-s", "-", "-", "-"])
.pipe_in(
"\
FIRST
SECOND
THIRD
FOURTH
ABCDEFG
",
)
.succeeds()
.stdout_only(
"\
FIRST!SECOND@THIRD#FOURTH!ABCDEFG
",
);
}
}
#[test]
fn test_non_utf8_input() {
// 0xC0 is not valid UTF-8
const INPUT: &[u8] = b"Non-UTF-8 test: \xC0\x00\xC0.\n";
new_ucmd!()
.pipe_in(INPUT)
.succeeds()
.stdout_only_bytes(INPUT);
}
#[test]
fn test_three_trailing_backslashes_delimiter() {
const ONE_BACKSLASH_STR: &str = r"\";
let three_backslashes_string = ONE_BACKSLASH_STR.repeat(3);
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, &three_backslashes_string])
.fails()
.no_stdout()
.stderr_str_check(|st| {
st.ends_with(&format!(
": delimiter list ends with an unescaped backslash: {three_backslashes_string}\n"
))
});
}
}
// "If any other characters follow the <backslash>, the results are unspecified."
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
// However, other implementations remove the backslash
#[test]
fn test_posix_unspecified_delimiter() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, r"\z", "-s"])
.pipe_in(
"\
1
2
3
4
",
)
.succeeds()
.stdout_only(
"\
1z2z3z4
",
);
}
}
// "Empty string (not a null character)"
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
#[test]
fn test_backslash_zero_delimiter() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, r"\0z\0", "-s"])
.pipe_in(
"\
1
2
3
4
5
6
",
)
.succeeds()
.stdout_only(
"\
12z345z6
",
);
}
}
// As of 2024-10-09, only bsdutils (https://github.com/dcantrell/bsdutils, derived from FreeBSD) and toybox handle
// multibyte delimiter characters in the way a user would likely expect. BusyBox and GNU Core Utilities do not.
#[test]
fn test_multi_byte_delimiter() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[option_style, "!ß@", "-s"])
.pipe_in(
"\
1
2
3
4
5
6
",
)
.succeeds()
.stdout_only(
"\
1!2ß3@4!5ß6
",
);
}
}