1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

paste: implement "-z" flag

Fixes #3637
This commit is contained in:
Daniel Hofstetter 2022-06-19 12:37:54 +02:00 committed by Sylvestre Ledru
parent 75edeea5e4
commit d0f608c69f
2 changed files with 131 additions and 17 deletions

View file

@ -8,6 +8,7 @@
// spell-checker:ignore (ToDO) delim
use clap::{crate_version, Arg, Command};
use std::fmt::Display;
use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, Read, Write};
use std::path::Path;
@ -20,16 +21,34 @@ mod options {
pub const DELIMITER: &str = "delimiters";
pub const SERIAL: &str = "serial";
pub const FILE: &str = "file";
pub const ZERO_TERMINATED: &str = "zero-terminated";
}
#[repr(u8)]
#[derive(Clone, Copy)]
enum LineEnding {
Newline = b'\n',
Nul = 0,
}
impl Display for LineEnding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Newline => writeln!(f),
Self::Nul => write!(f, "\0"),
}
}
}
// Wraps BufReader and stdin
fn read_line<R: Read>(
fn read_until<R: Read>(
reader: Option<&mut BufReader<R>>,
buf: &mut String,
byte: u8,
buf: &mut Vec<u8>,
) -> std::io::Result<usize> {
match reader {
Some(reader) => reader.read_line(buf),
None => stdin().read_line(buf),
Some(reader) => reader.read_until(byte, buf),
None => stdin().lock().read_until(byte, buf),
}
}
@ -44,7 +63,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
.unwrap()
.map(|s| s.to_owned())
.collect();
paste(files, serial, delimiters)
let line_ending = if matches.is_present(options::ZERO_TERMINATED) {
LineEnding::Nul
} else {
LineEnding::Newline
};
paste(files, serial, delimiters, line_ending)
}
pub fn uu_app<'a>() -> Command<'a> {
@ -74,9 +99,20 @@ pub fn uu_app<'a>() -> Command<'a> {
.default_value("-")
.value_hint(clap::ValueHint::FilePath),
)
.arg(
Arg::new(options::ZERO_TERMINATED)
.long(options::ZERO_TERMINATED)
.short('z')
.help("line delimiter is NUL, not newline"),
)
}
fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()> {
fn paste(
filenames: Vec<String>,
serial: bool,
delimiters: &str,
line_ending: LineEnding,
) -> UResult<()> {
let mut files = Vec::with_capacity(filenames.len());
for name in filenames {
let file = if name == "-" {
@ -91,28 +127,44 @@ fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()>
let delimiters: Vec<char> = unescape(delimiters).chars().collect();
let mut delim_count = 0;
let mut delim_length = 1;
let stdout = stdout();
let mut stdout = stdout.lock();
let mut output = String::new();
let mut output = Vec::new();
if serial {
for file in &mut files {
output.clear();
loop {
match read_line(file.as_mut(), &mut output) {
match read_until(file.as_mut(), line_ending as u8, &mut output) {
Ok(0) => break,
Ok(_) => {
if output.ends_with('\n') {
if output.ends_with(&[line_ending as u8]) {
output.pop();
}
output.push(delimiters[delim_count % delimiters.len()]);
// a buffer of length four is large enough to encode any char
let mut buffer = [0; 4];
let ch =
delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
delim_length = ch.len();
for byte in buffer.iter().take(delim_length) {
output.push(*byte);
}
}
Err(e) => return Err(e.map_err_context(String::new)),
}
delim_count += 1;
}
output.pop();
writeln!(stdout, "{}", output)?;
// remove final delimiter
output.truncate(output.len() - delim_length);
write!(
stdout,
"{}{}",
String::from_utf8_lossy(&output),
line_ending
)?;
}
} else {
let mut eof = vec![false; files.len()];
@ -123,28 +175,42 @@ fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()>
if eof[i] {
eof_count += 1;
} else {
match read_line(file.as_mut(), &mut output) {
match read_until(file.as_mut(), line_ending as u8, &mut output) {
Ok(0) => {
eof[i] = true;
eof_count += 1;
}
Ok(_) => {
if output.ends_with('\n') {
if output.ends_with(&[line_ending as u8]) {
output.pop();
}
}
Err(e) => return Err(e.map_err_context(String::new)),
}
}
output.push(delimiters[delim_count % delimiters.len()]);
// a buffer of length four is large enough to encode any char
let mut buffer = [0; 4];
let ch = delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
delim_length = ch.len();
for byte in buffer.iter().take(delim_length) {
output.push(*byte);
}
delim_count += 1;
}
if files.len() == eof_count {
break;
}
// Remove final delimiter
output.pop();
writeln!(stdout, "{}", output)?;
output.truncate(output.len() - delim_length);
write!(
stdout,
"{}{}",
String::from_utf8_lossy(&output),
line_ending
)?;
delim_count = 0;
}
}

View file

@ -33,6 +33,30 @@ static EXAMPLE_DATA: &[TestData] = &[
ins: &["a\n", "b\n"],
out: "a\tb\n",
},
TestData {
name: "zno-nl-1",
args: &["-z"],
ins: &["a", "b"],
out: "a\tb\0",
},
TestData {
name: "zno-nl-2",
args: &["-z"],
ins: &["a\0", "b"],
out: "a\tb\0",
},
TestData {
name: "zno-nl-3",
args: &["-z"],
ins: &["a", "b\0"],
out: "a\tb\0",
},
TestData {
name: "zno-nl-4",
args: &["-z"],
ins: &["a\0", "b\0"],
out: "a\tb\0",
},
// Same as above, but with a two lines in each input file and the
// addition of the -d option to make SPACE be the output
// delimiter.
@ -60,6 +84,30 @@ static EXAMPLE_DATA: &[TestData] = &[
ins: &["1\na\n", "2\nb\n"],
out: "1 2\na b\n",
},
TestData {
name: "zno-nla1",
args: &["-zd", " "],
ins: &["1\0a", "2\0b"],
out: "1 2\0a b\0",
},
TestData {
name: "zno-nla2",
args: &["-zd", " "],
ins: &["1\0a\0", "2\0b"],
out: "1 2\0a b\0",
},
TestData {
name: "zno-nla3",
args: &["-zd", " "],
ins: &["1\0a", "2\0b\0"],
out: "1 2\0a b\0",
},
TestData {
name: "zno-nla4",
args: &["-zd", " "],
ins: &["1\0a\0", "2\0b\0"],
out: "1 2\0a b\0",
},
TestData {
name: "multibyte-delim",
args: &["-d", "💣"],