1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

join: add support for multibyte separators (#6736)

* join: add test for multibyte separators

* join: implement support for multibyte separators

* join: use a trait instead of an enum for separator

* join: test whitespace merging
This commit is contained in:
Justin Tracey 2024-10-06 05:48:08 -04:00 committed by GitHub
parent d8eb4e2214
commit a51a731704
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 251 additions and 89 deletions

View file

@ -58,6 +58,25 @@ fn default_arguments() {
.stdout_only_fixture("default.expected");
}
#[test]
fn only_whitespace_separators_merge() {
new_ucmd!()
.arg("contiguous_separators.txt")
.arg("-")
.pipe_in(" a ,c ")
.succeeds()
.stdout_only("a ,,,b ,c \n");
new_ucmd!()
.arg("contiguous_separators.txt")
.arg("-t")
.arg(",")
.arg("-")
.pipe_in(" a ,c ")
.succeeds()
.stdout_only(" a ,,,b,c \n");
}
#[test]
fn different_fields() {
new_ucmd!()
@ -208,9 +227,9 @@ fn tab_multi_character() {
.arg("semicolon_fields_1.txt")
.arg("semicolon_fields_2.txt")
.arg("-t")
.arg("э")
.arg("ab")
.fails()
.stderr_is("join: multi-character tab э\n");
.stderr_is("join: multi-character tab ab\n");
}
#[test]
@ -437,14 +456,22 @@ fn non_unicode() {
#[cfg(unix)]
{
let invalid_utf8: u8 = 167;
let non_utf8_byte: u8 = 167;
new_ucmd!()
.arg("-t")
.arg(OsStr::from_bytes(&[invalid_utf8]))
.arg(OsStr::from_bytes(&[non_utf8_byte]))
.arg("non-unicode_1.bin")
.arg("non-unicode_2.bin")
.succeeds()
.stdout_only_fixture("non-unicode_sep.expected");
new_ucmd!()
.arg("-t")
.arg(OsStr::from_bytes(&[non_utf8_byte, non_utf8_byte]))
.arg("non-unicode_1.bin")
.arg("non-unicode_2.bin")
.fails()
.stderr_is("join: non-UTF-8 multi-byte tab\n");
}
#[cfg(windows)]
@ -462,6 +489,16 @@ fn non_unicode() {
}
}
#[test]
fn multibyte_sep() {
new_ucmd!()
.arg("-t§")
.arg("multibyte_sep_1.txt")
.arg("multibyte_sep_2.txt")
.succeeds()
.stdout_only_fixture("multibyte_sep.expected");
}
#[test]
fn null_field_separators() {
new_ucmd!()