mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
join: add support for non-unicode field separators
This allows for `-t` to take invalid unicode (but still single-byte) values on unix-like platforms. Other platforms, which as of the time of this commit do not support `OsStr::as_bytes()`, could possibly be supported in the future, but would require design decisions as to what that means.
This commit is contained in:
parent
7b3cfcf708
commit
58d65fb953
3 changed files with 47 additions and 3 deletions
|
@ -14,6 +14,8 @@ use clap::{crate_version, App, AppSettings, Arg};
|
|||
use std::cmp::Ordering;
|
||||
use std::fs::File;
|
||||
use std::io::{stdin, stdout, BufRead, BufReader, Split, Stdin, Write};
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use uucore::display::Quotable;
|
||||
use uucore::error::{set_exit_code, UResult, USimpleError};
|
||||
|
||||
|
@ -532,8 +534,19 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
settings.key1 = get_field_number(keys, key1)?;
|
||||
settings.key2 = get_field_number(keys, key2)?;
|
||||
|
||||
if let Some(value_str) = matches.value_of("t") {
|
||||
let value = value_str.as_bytes();
|
||||
if let Some(value_os) = matches.value_of_os("t") {
|
||||
#[cfg(unix)]
|
||||
let value = value_os.as_bytes();
|
||||
#[cfg(not(unix))]
|
||||
let value = match value_os.to_str() {
|
||||
Some(value) => value.as_bytes(),
|
||||
None => {
|
||||
return Err(USimpleError::new(
|
||||
1,
|
||||
"unprintable field separators are only supported on unix-like platforms",
|
||||
))
|
||||
}
|
||||
};
|
||||
settings.separator = match value.len() {
|
||||
0 => Sep::Line,
|
||||
1 => Sep::Char(value[0]),
|
||||
|
@ -541,7 +554,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
_ => {
|
||||
return Err(USimpleError::new(
|
||||
1,
|
||||
format!("multi-character tab {}", value_str),
|
||||
format!("multi-character tab {}", value_os.to_string_lossy()),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
@ -655,6 +668,7 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2",
|
|||
.short('t')
|
||||
.takes_value(true)
|
||||
.value_name("CHAR")
|
||||
.allow_invalid_utf8(true)
|
||||
.help("use CHAR as input and output field separator"),
|
||||
)
|
||||
.arg(
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
// spell-checker:ignore (words) autoformat
|
||||
|
||||
use crate::common::util::*;
|
||||
#[cfg(unix)]
|
||||
use std::{ffi::OsStr, os::unix::ffi::OsStrExt};
|
||||
#[cfg(windows)]
|
||||
use std::{ffi::OsString, os::windows::ffi::OsStringExt};
|
||||
|
||||
#[test]
|
||||
fn empty_files() {
|
||||
|
@ -364,6 +368,32 @@ fn non_unicode() {
|
|||
.arg("non-unicode_2.bin")
|
||||
.succeeds()
|
||||
.stdout_only_fixture("non-unicode.expected");
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
let invalid_utf8: u8 = 167;
|
||||
new_ucmd!()
|
||||
.arg("-t")
|
||||
.arg(OsStr::from_bytes(&[invalid_utf8]))
|
||||
.arg("non-unicode_1.bin")
|
||||
.arg("non-unicode_2.bin")
|
||||
.succeeds()
|
||||
.stdout_only_fixture("non-unicode_sep.expected");
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
{
|
||||
let invalid_utf16: OsString = OsStringExt::from_wide(&[0xD800]);
|
||||
new_ucmd!()
|
||||
.arg("-t")
|
||||
.arg(&invalid_utf16)
|
||||
.arg("non-unicode_1.bin")
|
||||
.arg("non-unicode_2.bin")
|
||||
.fails()
|
||||
.stderr_is(
|
||||
"join: unprintable field separators are only supported on unix-like platforms",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
BIN
tests/fixtures/join/non-unicode_sep.expected
vendored
Normal file
BIN
tests/fixtures/join/non-unicode_sep.expected
vendored
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue