1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

join: add support for non-unicode field separators

This allows for `-t` to take invalid unicode (but still single-byte) values
on unix-like platforms. Other platforms, which as of the time of this commit
do not support `OsStr::as_bytes()`, could possibly be supported in the future,
but would require design decisions as to what that means.
This commit is contained in:
Justin Tracey 2022-01-21 14:22:11 -05:00
parent 7b3cfcf708
commit 58d65fb953
3 changed files with 47 additions and 3 deletions

View file

@ -1,6 +1,10 @@
// spell-checker:ignore (words) autoformat
use crate::common::util::*;
#[cfg(unix)]
use std::{ffi::OsStr, os::unix::ffi::OsStrExt};
#[cfg(windows)]
use std::{ffi::OsString, os::windows::ffi::OsStringExt};
#[test]
fn empty_files() {
@ -364,6 +368,32 @@ fn non_unicode() {
.arg("non-unicode_2.bin")
.succeeds()
.stdout_only_fixture("non-unicode.expected");
#[cfg(unix)]
{
let invalid_utf8: u8 = 167;
new_ucmd!()
.arg("-t")
.arg(OsStr::from_bytes(&[invalid_utf8]))
.arg("non-unicode_1.bin")
.arg("non-unicode_2.bin")
.succeeds()
.stdout_only_fixture("non-unicode_sep.expected");
}
#[cfg(windows)]
{
let invalid_utf16: OsString = OsStringExt::from_wide(&[0xD800]);
new_ucmd!()
.arg("-t")
.arg(&invalid_utf16)
.arg("non-unicode_1.bin")
.arg("non-unicode_2.bin")
.fails()
.stderr_is(
"join: unprintable field separators are only supported on unix-like platforms",
);
}
}
#[test]