mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
paste: permit the delimiter list to be empty (#6714)
* paste: permit the delimiter list to be empty Also: refactored the delimiter processing logic * Extract duplicated code into function * Address PR comments. Improve code structure. * Fix additional paste bugs * Fix additional paste bugs * Simplify backslash delimiter validation * Fix Clippy violations
This commit is contained in:
parent
7c3a9380f1
commit
c41c601b45
2 changed files with 426 additions and 101 deletions
|
@ -3,13 +3,14 @@
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
|
|
||||||
// spell-checker:ignore (ToDO) delim
|
|
||||||
|
|
||||||
use clap::{crate_version, Arg, ArgAction, Command};
|
use clap::{crate_version, Arg, ArgAction, Command};
|
||||||
|
use std::cell::{OnceCell, RefCell};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{stdin, stdout, BufRead, BufReader, Read, Write};
|
use std::io::{stdin, stdout, BufRead, BufReader, Stdin, Write};
|
||||||
use std::path::Path;
|
use std::iter::Cycle;
|
||||||
use uucore::error::{FromIo, UResult, USimpleError};
|
use std::rc::Rc;
|
||||||
|
use std::slice::Iter;
|
||||||
|
use uucore::error::{UResult, USimpleError};
|
||||||
use uucore::line_ending::LineEnding;
|
use uucore::line_ending::LineEnding;
|
||||||
use uucore::{format_usage, help_about, help_usage};
|
use uucore::{format_usage, help_about, help_usage};
|
||||||
|
|
||||||
|
@ -23,18 +24,6 @@ mod options {
|
||||||
pub const ZERO_TERMINATED: &str = "zero-terminated";
|
pub const ZERO_TERMINATED: &str = "zero-terminated";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wraps BufReader and stdin
|
|
||||||
fn read_until<R: Read>(
|
|
||||||
reader: Option<&mut BufReader<R>>,
|
|
||||||
byte: u8,
|
|
||||||
buf: &mut Vec<u8>,
|
|
||||||
) -> std::io::Result<usize> {
|
|
||||||
match reader {
|
|
||||||
Some(reader) => reader.read_until(byte, buf),
|
|
||||||
None => stdin().lock().read_until(byte, buf),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[uucore::main]
|
#[uucore::main]
|
||||||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
let matches = uu_app().try_get_matches_from(args)?;
|
let matches = uu_app().try_get_matches_from(args)?;
|
||||||
|
@ -96,120 +85,292 @@ fn paste(
|
||||||
delimiters: &str,
|
delimiters: &str,
|
||||||
line_ending: LineEnding,
|
line_ending: LineEnding,
|
||||||
) -> UResult<()> {
|
) -> UResult<()> {
|
||||||
let mut files = Vec::with_capacity(filenames.len());
|
let unescaped_and_encoded_delimiters = parse_delimiters(delimiters)?;
|
||||||
for name in filenames {
|
|
||||||
let file = if name == "-" {
|
let stdin_once_cell = OnceCell::<Rc<RefCell<Stdin>>>::new();
|
||||||
None
|
|
||||||
} else {
|
let mut input_source_vec = Vec::with_capacity(filenames.len());
|
||||||
let path = Path::new(&name);
|
|
||||||
let r = File::open(path).map_err_context(String::new)?;
|
for filename in filenames {
|
||||||
Some(BufReader::new(r))
|
let input_source = match filename.as_str() {
|
||||||
|
"-" => InputSource::StandardInput(
|
||||||
|
stdin_once_cell
|
||||||
|
.get_or_init(|| Rc::new(RefCell::new(stdin())))
|
||||||
|
.clone(),
|
||||||
|
),
|
||||||
|
st => {
|
||||||
|
let file = File::open(st)?;
|
||||||
|
|
||||||
|
InputSource::File(BufReader::new(file))
|
||||||
|
}
|
||||||
};
|
};
|
||||||
files.push(file);
|
|
||||||
|
input_source_vec.push(input_source);
|
||||||
}
|
}
|
||||||
|
|
||||||
if delimiters.ends_with('\\') && !delimiters.ends_with("\\\\") {
|
let mut stdout = stdout().lock();
|
||||||
return Err(USimpleError::new(
|
|
||||||
1,
|
|
||||||
format!("delimiter list ends with an unescaped backslash: {delimiters}"),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let delimiters: Vec<char> = unescape(delimiters).chars().collect();
|
let line_ending_byte = u8::from(line_ending);
|
||||||
let mut delim_count = 0;
|
let line_ending_byte_array_ref = &[line_ending_byte];
|
||||||
let mut delim_length = 1;
|
|
||||||
let stdout = stdout();
|
let input_source_vec_len = input_source_vec.len();
|
||||||
let mut stdout = stdout.lock();
|
|
||||||
|
let mut delimiter_state = DelimiterState::new(&unescaped_and_encoded_delimiters);
|
||||||
|
|
||||||
let mut output = Vec::new();
|
let mut output = Vec::new();
|
||||||
|
|
||||||
if serial {
|
if serial {
|
||||||
for file in &mut files {
|
for input_source in &mut input_source_vec {
|
||||||
output.clear();
|
output.clear();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
match read_until(file.as_mut(), line_ending as u8, &mut output) {
|
match input_source.read_until(line_ending_byte, &mut output)? {
|
||||||
Ok(0) => break,
|
0 => break,
|
||||||
Ok(_) => {
|
_ => {
|
||||||
if output.ends_with(&[line_ending as u8]) {
|
remove_trailing_line_ending_byte(line_ending_byte, &mut output);
|
||||||
output.pop();
|
|
||||||
}
|
|
||||||
// a buffer of length four is large enough to encode any char
|
|
||||||
let mut buffer = [0; 4];
|
|
||||||
let ch =
|
|
||||||
delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
|
|
||||||
delim_length = ch.len();
|
|
||||||
|
|
||||||
for byte in buffer.iter().take(delim_length) {
|
delimiter_state.write_delimiter(&mut output);
|
||||||
output.push(*byte);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Err(e) => return Err(e.map_err_context(String::new)),
|
|
||||||
}
|
}
|
||||||
delim_count += 1;
|
|
||||||
}
|
}
|
||||||
// remove final delimiter
|
|
||||||
output.truncate(output.len() - delim_length);
|
|
||||||
|
|
||||||
write!(
|
delimiter_state.remove_trailing_delimiter(&mut output);
|
||||||
stdout,
|
|
||||||
"{}{}",
|
stdout.write_all(&output)?;
|
||||||
String::from_utf8_lossy(&output),
|
stdout.write_all(line_ending_byte_array_ref)?;
|
||||||
line_ending
|
|
||||||
)?;
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let mut eof = vec![false; files.len()];
|
let mut eof = vec![false; input_source_vec_len];
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
output.clear();
|
output.clear();
|
||||||
|
|
||||||
let mut eof_count = 0;
|
let mut eof_count = 0;
|
||||||
for (i, file) in files.iter_mut().enumerate() {
|
|
||||||
|
for (i, input_source) in input_source_vec.iter_mut().enumerate() {
|
||||||
if eof[i] {
|
if eof[i] {
|
||||||
eof_count += 1;
|
eof_count += 1;
|
||||||
} else {
|
} else {
|
||||||
match read_until(file.as_mut(), line_ending as u8, &mut output) {
|
match input_source.read_until(line_ending_byte, &mut output)? {
|
||||||
Ok(0) => {
|
0 => {
|
||||||
eof[i] = true;
|
eof[i] = true;
|
||||||
eof_count += 1;
|
eof_count += 1;
|
||||||
}
|
}
|
||||||
Ok(_) => {
|
_ => {
|
||||||
if output.ends_with(&[line_ending as u8]) {
|
remove_trailing_line_ending_byte(line_ending_byte, &mut output);
|
||||||
output.pop();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Err(e) => return Err(e.map_err_context(String::new)),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// a buffer of length four is large enough to encode any char
|
|
||||||
let mut buffer = [0; 4];
|
|
||||||
let ch = delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
|
|
||||||
delim_length = ch.len();
|
|
||||||
|
|
||||||
for byte in buffer.iter().take(delim_length) {
|
delimiter_state.write_delimiter(&mut output);
|
||||||
output.push(*byte);
|
|
||||||
}
|
|
||||||
|
|
||||||
delim_count += 1;
|
|
||||||
}
|
}
|
||||||
if files.len() == eof_count {
|
|
||||||
|
if eof_count == input_source_vec_len {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Remove final delimiter
|
|
||||||
output.truncate(output.len() - delim_length);
|
|
||||||
|
|
||||||
write!(
|
delimiter_state.remove_trailing_delimiter(&mut output);
|
||||||
stdout,
|
|
||||||
"{}{}",
|
stdout.write_all(&output)?;
|
||||||
String::from_utf8_lossy(&output),
|
stdout.write_all(line_ending_byte_array_ref)?;
|
||||||
line_ending
|
|
||||||
)?;
|
// Quote:
|
||||||
delim_count = 0;
|
// When the -s option is not specified:
|
||||||
|
// [...]
|
||||||
|
// The delimiter shall be reset to the first element of list after each file operand is processed.
|
||||||
|
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
|
||||||
|
delimiter_state.reset_to_first_delimiter();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unescape all special characters
|
fn parse_delimiters(delimiters: &str) -> UResult<Box<[Box<[u8]>]>> {
|
||||||
fn unescape(s: &str) -> String {
|
/// A single backslash char
|
||||||
s.replace("\\n", "\n")
|
const BACKSLASH: char = '\\';
|
||||||
.replace("\\t", "\t")
|
|
||||||
.replace("\\\\", "\\")
|
fn add_one_byte_single_char_delimiter(vec: &mut Vec<Box<[u8]>>, byte: u8) {
|
||||||
|
vec.push(Box::new([byte]));
|
||||||
|
}
|
||||||
|
|
||||||
|
// a buffer of length four is large enough to encode any char
|
||||||
|
let mut buffer = [0; 4];
|
||||||
|
|
||||||
|
let mut add_single_char_delimiter = |vec: &mut Vec<Box<[u8]>>, ch: char| {
|
||||||
|
let delimiter_encoded = ch.encode_utf8(&mut buffer);
|
||||||
|
|
||||||
|
vec.push(Box::from(delimiter_encoded.as_bytes()));
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut vec = Vec::<Box<[u8]>>::with_capacity(delimiters.len());
|
||||||
|
|
||||||
|
let mut chars = delimiters.chars();
|
||||||
|
|
||||||
|
// Unescape all special characters
|
||||||
|
while let Some(char) = chars.next() {
|
||||||
|
match char {
|
||||||
|
BACKSLASH => match chars.next() {
|
||||||
|
// "Empty string (not a null character)"
|
||||||
|
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
|
||||||
|
Some('0') => {
|
||||||
|
vec.push(Box::<[u8; 0]>::new([]));
|
||||||
|
}
|
||||||
|
// "\\" to "\" (U+005C)
|
||||||
|
Some(BACKSLASH) => {
|
||||||
|
add_one_byte_single_char_delimiter(&mut vec, b'\\');
|
||||||
|
}
|
||||||
|
// "\n" to U+000A
|
||||||
|
Some('n') => {
|
||||||
|
add_one_byte_single_char_delimiter(&mut vec, b'\n');
|
||||||
|
}
|
||||||
|
// "\t" to U+0009
|
||||||
|
Some('t') => {
|
||||||
|
add_one_byte_single_char_delimiter(&mut vec, b'\t');
|
||||||
|
}
|
||||||
|
Some(other_char) => {
|
||||||
|
// "If any other characters follow the <backslash>, the results are unspecified."
|
||||||
|
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
|
||||||
|
// However, other implementations remove the backslash
|
||||||
|
// See "test_posix_unspecified_delimiter"
|
||||||
|
add_single_char_delimiter(&mut vec, other_char);
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
return Err(USimpleError::new(
|
||||||
|
1,
|
||||||
|
format!("delimiter list ends with an unescaped backslash: {delimiters}"),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
non_backslash_char => {
|
||||||
|
add_single_char_delimiter(&mut vec, non_backslash_char);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(vec.into_boxed_slice())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remove_trailing_line_ending_byte(line_ending_byte: u8, output: &mut Vec<u8>) {
|
||||||
|
if let Some(&byte) = output.last() {
|
||||||
|
if byte == line_ending_byte {
|
||||||
|
assert!(output.pop() == Some(line_ending_byte));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum DelimiterState<'a> {
|
||||||
|
NoDelimiters,
|
||||||
|
OneDelimiter(&'a [u8]),
|
||||||
|
MultipleDelimiters {
|
||||||
|
current_delimiter: &'a [u8],
|
||||||
|
delimiters: &'a [Box<[u8]>],
|
||||||
|
delimiters_iterator: Cycle<Iter<'a, Box<[u8]>>>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> DelimiterState<'a> {
|
||||||
|
fn new(unescaped_and_encoded_delimiters: &'a [Box<[u8]>]) -> DelimiterState<'a> {
|
||||||
|
match unescaped_and_encoded_delimiters {
|
||||||
|
[] => DelimiterState::NoDelimiters,
|
||||||
|
[only_delimiter] => {
|
||||||
|
// -d '\0' is equivalent to -d ''
|
||||||
|
if only_delimiter.is_empty() {
|
||||||
|
DelimiterState::NoDelimiters
|
||||||
|
} else {
|
||||||
|
DelimiterState::OneDelimiter(only_delimiter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
[first_delimiter, ..] => DelimiterState::MultipleDelimiters {
|
||||||
|
current_delimiter: first_delimiter,
|
||||||
|
delimiters: unescaped_and_encoded_delimiters,
|
||||||
|
delimiters_iterator: unescaped_and_encoded_delimiters.iter().cycle(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This should only be used to return to the start of the delimiter list after a file has been processed.
|
||||||
|
/// This should only be used when the "serial" option is disabled.
|
||||||
|
/// This is a no-op unless there are multiple delimiters.
|
||||||
|
fn reset_to_first_delimiter(&mut self) {
|
||||||
|
if let DelimiterState::MultipleDelimiters {
|
||||||
|
delimiters_iterator,
|
||||||
|
delimiters,
|
||||||
|
..
|
||||||
|
} = self
|
||||||
|
{
|
||||||
|
*delimiters_iterator = delimiters.iter().cycle();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove the trailing delimiter.
|
||||||
|
/// If there are no delimiters, this is a no-op.
|
||||||
|
fn remove_trailing_delimiter(&mut self, output: &mut Vec<u8>) {
|
||||||
|
let delimiter_length = match self {
|
||||||
|
DelimiterState::OneDelimiter(only_delimiter) => only_delimiter.len(),
|
||||||
|
DelimiterState::MultipleDelimiters {
|
||||||
|
current_delimiter, ..
|
||||||
|
} => current_delimiter.len(),
|
||||||
|
_ => {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// `delimiter_length` will be zero if the current delimiter is a "\0" delimiter
|
||||||
|
if delimiter_length > 0 {
|
||||||
|
let output_len = output.len();
|
||||||
|
|
||||||
|
if let Some(output_without_delimiter_length) = output_len.checked_sub(delimiter_length)
|
||||||
|
{
|
||||||
|
output.truncate(output_without_delimiter_length);
|
||||||
|
} else {
|
||||||
|
// This branch is NOT unreachable, must be skipped
|
||||||
|
// `output` should be empty in this case
|
||||||
|
assert!(output_len == 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Append the current delimiter to `output`.
|
||||||
|
/// If there are no delimiters, this is a no-op.
|
||||||
|
fn write_delimiter(&mut self, output: &mut Vec<u8>) {
|
||||||
|
match self {
|
||||||
|
DelimiterState::OneDelimiter(only_delimiter) => {
|
||||||
|
output.extend_from_slice(only_delimiter);
|
||||||
|
}
|
||||||
|
DelimiterState::MultipleDelimiters {
|
||||||
|
current_delimiter,
|
||||||
|
delimiters_iterator,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
// Unwrap because `delimiters_iterator` is a cycle iter and was created from a non-empty slice
|
||||||
|
let bo = delimiters_iterator.next().unwrap();
|
||||||
|
|
||||||
|
output.extend_from_slice(bo);
|
||||||
|
|
||||||
|
*current_delimiter = bo;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum InputSource {
|
||||||
|
File(BufReader<File>),
|
||||||
|
StandardInput(Rc<RefCell<Stdin>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InputSource {
|
||||||
|
fn read_until(&mut self, byte: u8, buf: &mut Vec<u8>) -> UResult<usize> {
|
||||||
|
let us = match self {
|
||||||
|
Self::File(bu) => bu.read_until(byte, buf)?,
|
||||||
|
Self::StandardInput(rc) => rc
|
||||||
|
.try_borrow()
|
||||||
|
.map_err(|bo| USimpleError::new(1, format!("{bo}")))?
|
||||||
|
.lock()
|
||||||
|
.read_until(byte, buf)?,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(us)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,9 @@
|
||||||
//
|
//
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
|
|
||||||
|
// spell-checker:ignore bsdutils toybox
|
||||||
|
|
||||||
use crate::common::util::TestScenario;
|
use crate::common::util::TestScenario;
|
||||||
|
|
||||||
struct TestData<'b> {
|
struct TestData<'b> {
|
||||||
|
@ -11,7 +14,7 @@ struct TestData<'b> {
|
||||||
out: &'b str,
|
out: &'b str,
|
||||||
}
|
}
|
||||||
|
|
||||||
static EXAMPLE_DATA: &[TestData] = &[
|
const EXAMPLE_DATA: &[TestData] = &[
|
||||||
// Ensure that paste properly handles files lacking a final newline.
|
// Ensure that paste properly handles files lacking a final newline.
|
||||||
TestData {
|
TestData {
|
||||||
name: "no-nl-1",
|
name: "no-nl-1",
|
||||||
|
@ -172,7 +175,7 @@ fn test_delimiter_list_ending_with_escaped_backslash() {
|
||||||
at.write(&file, one_in);
|
at.write(&file, one_in);
|
||||||
ins.push(file);
|
ins.push(file);
|
||||||
}
|
}
|
||||||
ucmd.args(&[d, "\\\\"])
|
ucmd.args(&[d, r"\\"])
|
||||||
.args(&ins)
|
.args(&ins)
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_is("a\\b\n");
|
.stdout_is("a\\b\n");
|
||||||
|
@ -183,13 +186,174 @@ fn test_delimiter_list_ending_with_escaped_backslash() {
|
||||||
fn test_delimiter_list_ending_with_unescaped_backslash() {
|
fn test_delimiter_list_ending_with_unescaped_backslash() {
|
||||||
for d in ["-d", "--delimiters"] {
|
for d in ["-d", "--delimiters"] {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&[d, "\\"])
|
.args(&[d, r"\"])
|
||||||
.fails()
|
.fails()
|
||||||
.stderr_contains("delimiter list ends with an unescaped backslash: \\");
|
.stderr_contains(r"delimiter list ends with an unescaped backslash: \");
|
||||||
|
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&[d, "_\\"])
|
.args(&[d, r"\\\"])
|
||||||
.fails()
|
.fails()
|
||||||
.stderr_contains("delimiter list ends with an unescaped backslash: _\\");
|
.stderr_contains(r"delimiter list ends with an unescaped backslash: \\\");
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&[d, r"_\"])
|
||||||
|
.fails()
|
||||||
|
.stderr_contains(r"delimiter list ends with an unescaped backslash: _\");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_delimiter_list_empty() {
|
||||||
|
for option_style in ["-d", "--delimiters"] {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&[option_style, "", "-s"])
|
||||||
|
.pipe_in(
|
||||||
|
"\
|
||||||
|
A ALPHA 1 _
|
||||||
|
B BRAVO 2 _
|
||||||
|
C CHARLIE 3 _
|
||||||
|
",
|
||||||
|
)
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(
|
||||||
|
"\
|
||||||
|
A ALPHA 1 _B BRAVO 2 _C CHARLIE 3 _
|
||||||
|
",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Was panicking (usize subtraction that would have resulted in a negative number)
|
||||||
|
// Not observable in release builds, since integer overflow checking is not enabled
|
||||||
|
#[test]
|
||||||
|
fn test_delimiter_truncation() {
|
||||||
|
for option_style in ["-d", "--delimiters"] {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&[option_style, "!@#", "-s", "-", "-", "-"])
|
||||||
|
.pipe_in(
|
||||||
|
"\
|
||||||
|
FIRST
|
||||||
|
SECOND
|
||||||
|
THIRD
|
||||||
|
FOURTH
|
||||||
|
ABCDEFG
|
||||||
|
",
|
||||||
|
)
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(
|
||||||
|
"\
|
||||||
|
FIRST!SECOND@THIRD#FOURTH!ABCDEFG
|
||||||
|
|
||||||
|
|
||||||
|
",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_non_utf8_input() {
|
||||||
|
// 0xC0 is not valid UTF-8
|
||||||
|
const INPUT: &[u8] = b"Non-UTF-8 test: \xC0\x00\xC0.\n";
|
||||||
|
|
||||||
|
new_ucmd!()
|
||||||
|
.pipe_in(INPUT)
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_bytes(INPUT);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_three_trailing_backslashes_delimiter() {
|
||||||
|
const ONE_BACKSLASH_STR: &str = r"\";
|
||||||
|
|
||||||
|
let three_backslashes_string = ONE_BACKSLASH_STR.repeat(3);
|
||||||
|
|
||||||
|
for option_style in ["-d", "--delimiters"] {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&[option_style, &three_backslashes_string])
|
||||||
|
.fails()
|
||||||
|
.no_stdout()
|
||||||
|
.stderr_str_check(|st| {
|
||||||
|
st.ends_with(&format!(
|
||||||
|
": delimiter list ends with an unescaped backslash: {three_backslashes_string}\n"
|
||||||
|
))
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// "If any other characters follow the <backslash>, the results are unspecified."
|
||||||
|
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
|
||||||
|
// However, other implementations remove the backslash
|
||||||
|
#[test]
|
||||||
|
fn test_posix_unspecified_delimiter() {
|
||||||
|
for option_style in ["-d", "--delimiters"] {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&[option_style, r"\z", "-s"])
|
||||||
|
.pipe_in(
|
||||||
|
"\
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
",
|
||||||
|
)
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(
|
||||||
|
"\
|
||||||
|
1z2z3z4
|
||||||
|
",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// "Empty string (not a null character)"
|
||||||
|
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
|
||||||
|
#[test]
|
||||||
|
fn test_backslash_zero_delimiter() {
|
||||||
|
for option_style in ["-d", "--delimiters"] {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&[option_style, r"\0z\0", "-s"])
|
||||||
|
.pipe_in(
|
||||||
|
"\
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
5
|
||||||
|
6
|
||||||
|
",
|
||||||
|
)
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(
|
||||||
|
"\
|
||||||
|
12z345z6
|
||||||
|
",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// As of 2024-10-09, only bsdutils (https://github.com/dcantrell/bsdutils, derived from FreeBSD) and toybox handle
|
||||||
|
// multibyte delimiter characters in the way a user would likely expect. BusyBox and GNU Core Utilities do not.
|
||||||
|
#[test]
|
||||||
|
fn test_multi_byte_delimiter() {
|
||||||
|
for option_style in ["-d", "--delimiters"] {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&[option_style, "!ß@", "-s"])
|
||||||
|
.pipe_in(
|
||||||
|
"\
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
5
|
||||||
|
6
|
||||||
|
",
|
||||||
|
)
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only(
|
||||||
|
"\
|
||||||
|
1!2ß3@4!5ß6
|
||||||
|
",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue