mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
parent
75edeea5e4
commit
d0f608c69f
2 changed files with 131 additions and 17 deletions
|
@ -8,6 +8,7 @@
|
||||||
// spell-checker:ignore (ToDO) delim
|
// spell-checker:ignore (ToDO) delim
|
||||||
|
|
||||||
use clap::{crate_version, Arg, Command};
|
use clap::{crate_version, Arg, Command};
|
||||||
|
use std::fmt::Display;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{stdin, stdout, BufRead, BufReader, Read, Write};
|
use std::io::{stdin, stdout, BufRead, BufReader, Read, Write};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
@ -20,16 +21,34 @@ mod options {
|
||||||
pub const DELIMITER: &str = "delimiters";
|
pub const DELIMITER: &str = "delimiters";
|
||||||
pub const SERIAL: &str = "serial";
|
pub const SERIAL: &str = "serial";
|
||||||
pub const FILE: &str = "file";
|
pub const FILE: &str = "file";
|
||||||
|
pub const ZERO_TERMINATED: &str = "zero-terminated";
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(u8)]
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
enum LineEnding {
|
||||||
|
Newline = b'\n',
|
||||||
|
Nul = 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for LineEnding {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::Newline => writeln!(f),
|
||||||
|
Self::Nul => write!(f, "\0"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wraps BufReader and stdin
|
// Wraps BufReader and stdin
|
||||||
fn read_line<R: Read>(
|
fn read_until<R: Read>(
|
||||||
reader: Option<&mut BufReader<R>>,
|
reader: Option<&mut BufReader<R>>,
|
||||||
buf: &mut String,
|
byte: u8,
|
||||||
|
buf: &mut Vec<u8>,
|
||||||
) -> std::io::Result<usize> {
|
) -> std::io::Result<usize> {
|
||||||
match reader {
|
match reader {
|
||||||
Some(reader) => reader.read_line(buf),
|
Some(reader) => reader.read_until(byte, buf),
|
||||||
None => stdin().read_line(buf),
|
None => stdin().lock().read_until(byte, buf),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,7 +63,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.map(|s| s.to_owned())
|
.map(|s| s.to_owned())
|
||||||
.collect();
|
.collect();
|
||||||
paste(files, serial, delimiters)
|
let line_ending = if matches.is_present(options::ZERO_TERMINATED) {
|
||||||
|
LineEnding::Nul
|
||||||
|
} else {
|
||||||
|
LineEnding::Newline
|
||||||
|
};
|
||||||
|
|
||||||
|
paste(files, serial, delimiters, line_ending)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn uu_app<'a>() -> Command<'a> {
|
pub fn uu_app<'a>() -> Command<'a> {
|
||||||
|
@ -74,9 +99,20 @@ pub fn uu_app<'a>() -> Command<'a> {
|
||||||
.default_value("-")
|
.default_value("-")
|
||||||
.value_hint(clap::ValueHint::FilePath),
|
.value_hint(clap::ValueHint::FilePath),
|
||||||
)
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::new(options::ZERO_TERMINATED)
|
||||||
|
.long(options::ZERO_TERMINATED)
|
||||||
|
.short('z')
|
||||||
|
.help("line delimiter is NUL, not newline"),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()> {
|
fn paste(
|
||||||
|
filenames: Vec<String>,
|
||||||
|
serial: bool,
|
||||||
|
delimiters: &str,
|
||||||
|
line_ending: LineEnding,
|
||||||
|
) -> UResult<()> {
|
||||||
let mut files = Vec::with_capacity(filenames.len());
|
let mut files = Vec::with_capacity(filenames.len());
|
||||||
for name in filenames {
|
for name in filenames {
|
||||||
let file = if name == "-" {
|
let file = if name == "-" {
|
||||||
|
@ -91,28 +127,44 @@ fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()>
|
||||||
|
|
||||||
let delimiters: Vec<char> = unescape(delimiters).chars().collect();
|
let delimiters: Vec<char> = unescape(delimiters).chars().collect();
|
||||||
let mut delim_count = 0;
|
let mut delim_count = 0;
|
||||||
|
let mut delim_length = 1;
|
||||||
let stdout = stdout();
|
let stdout = stdout();
|
||||||
let mut stdout = stdout.lock();
|
let mut stdout = stdout.lock();
|
||||||
|
|
||||||
let mut output = String::new();
|
let mut output = Vec::new();
|
||||||
if serial {
|
if serial {
|
||||||
for file in &mut files {
|
for file in &mut files {
|
||||||
output.clear();
|
output.clear();
|
||||||
loop {
|
loop {
|
||||||
match read_line(file.as_mut(), &mut output) {
|
match read_until(file.as_mut(), line_ending as u8, &mut output) {
|
||||||
Ok(0) => break,
|
Ok(0) => break,
|
||||||
Ok(_) => {
|
Ok(_) => {
|
||||||
if output.ends_with('\n') {
|
if output.ends_with(&[line_ending as u8]) {
|
||||||
output.pop();
|
output.pop();
|
||||||
}
|
}
|
||||||
output.push(delimiters[delim_count % delimiters.len()]);
|
// a buffer of length four is large enough to encode any char
|
||||||
|
let mut buffer = [0; 4];
|
||||||
|
let ch =
|
||||||
|
delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
|
||||||
|
delim_length = ch.len();
|
||||||
|
|
||||||
|
for byte in buffer.iter().take(delim_length) {
|
||||||
|
output.push(*byte);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(e) => return Err(e.map_err_context(String::new)),
|
Err(e) => return Err(e.map_err_context(String::new)),
|
||||||
}
|
}
|
||||||
delim_count += 1;
|
delim_count += 1;
|
||||||
}
|
}
|
||||||
output.pop();
|
// remove final delimiter
|
||||||
writeln!(stdout, "{}", output)?;
|
output.truncate(output.len() - delim_length);
|
||||||
|
|
||||||
|
write!(
|
||||||
|
stdout,
|
||||||
|
"{}{}",
|
||||||
|
String::from_utf8_lossy(&output),
|
||||||
|
line_ending
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let mut eof = vec![false; files.len()];
|
let mut eof = vec![false; files.len()];
|
||||||
|
@ -123,28 +175,42 @@ fn paste(filenames: Vec<String>, serial: bool, delimiters: &str) -> UResult<()>
|
||||||
if eof[i] {
|
if eof[i] {
|
||||||
eof_count += 1;
|
eof_count += 1;
|
||||||
} else {
|
} else {
|
||||||
match read_line(file.as_mut(), &mut output) {
|
match read_until(file.as_mut(), line_ending as u8, &mut output) {
|
||||||
Ok(0) => {
|
Ok(0) => {
|
||||||
eof[i] = true;
|
eof[i] = true;
|
||||||
eof_count += 1;
|
eof_count += 1;
|
||||||
}
|
}
|
||||||
Ok(_) => {
|
Ok(_) => {
|
||||||
if output.ends_with('\n') {
|
if output.ends_with(&[line_ending as u8]) {
|
||||||
output.pop();
|
output.pop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(e) => return Err(e.map_err_context(String::new)),
|
Err(e) => return Err(e.map_err_context(String::new)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output.push(delimiters[delim_count % delimiters.len()]);
|
// a buffer of length four is large enough to encode any char
|
||||||
|
let mut buffer = [0; 4];
|
||||||
|
let ch = delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
|
||||||
|
delim_length = ch.len();
|
||||||
|
|
||||||
|
for byte in buffer.iter().take(delim_length) {
|
||||||
|
output.push(*byte);
|
||||||
|
}
|
||||||
|
|
||||||
delim_count += 1;
|
delim_count += 1;
|
||||||
}
|
}
|
||||||
if files.len() == eof_count {
|
if files.len() == eof_count {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Remove final delimiter
|
// Remove final delimiter
|
||||||
output.pop();
|
output.truncate(output.len() - delim_length);
|
||||||
writeln!(stdout, "{}", output)?;
|
|
||||||
|
write!(
|
||||||
|
stdout,
|
||||||
|
"{}{}",
|
||||||
|
String::from_utf8_lossy(&output),
|
||||||
|
line_ending
|
||||||
|
)?;
|
||||||
delim_count = 0;
|
delim_count = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,30 @@ static EXAMPLE_DATA: &[TestData] = &[
|
||||||
ins: &["a\n", "b\n"],
|
ins: &["a\n", "b\n"],
|
||||||
out: "a\tb\n",
|
out: "a\tb\n",
|
||||||
},
|
},
|
||||||
|
TestData {
|
||||||
|
name: "zno-nl-1",
|
||||||
|
args: &["-z"],
|
||||||
|
ins: &["a", "b"],
|
||||||
|
out: "a\tb\0",
|
||||||
|
},
|
||||||
|
TestData {
|
||||||
|
name: "zno-nl-2",
|
||||||
|
args: &["-z"],
|
||||||
|
ins: &["a\0", "b"],
|
||||||
|
out: "a\tb\0",
|
||||||
|
},
|
||||||
|
TestData {
|
||||||
|
name: "zno-nl-3",
|
||||||
|
args: &["-z"],
|
||||||
|
ins: &["a", "b\0"],
|
||||||
|
out: "a\tb\0",
|
||||||
|
},
|
||||||
|
TestData {
|
||||||
|
name: "zno-nl-4",
|
||||||
|
args: &["-z"],
|
||||||
|
ins: &["a\0", "b\0"],
|
||||||
|
out: "a\tb\0",
|
||||||
|
},
|
||||||
// Same as above, but with a two lines in each input file and the
|
// Same as above, but with a two lines in each input file and the
|
||||||
// addition of the -d option to make SPACE be the output
|
// addition of the -d option to make SPACE be the output
|
||||||
// delimiter.
|
// delimiter.
|
||||||
|
@ -60,6 +84,30 @@ static EXAMPLE_DATA: &[TestData] = &[
|
||||||
ins: &["1\na\n", "2\nb\n"],
|
ins: &["1\na\n", "2\nb\n"],
|
||||||
out: "1 2\na b\n",
|
out: "1 2\na b\n",
|
||||||
},
|
},
|
||||||
|
TestData {
|
||||||
|
name: "zno-nla1",
|
||||||
|
args: &["-zd", " "],
|
||||||
|
ins: &["1\0a", "2\0b"],
|
||||||
|
out: "1 2\0a b\0",
|
||||||
|
},
|
||||||
|
TestData {
|
||||||
|
name: "zno-nla2",
|
||||||
|
args: &["-zd", " "],
|
||||||
|
ins: &["1\0a\0", "2\0b"],
|
||||||
|
out: "1 2\0a b\0",
|
||||||
|
},
|
||||||
|
TestData {
|
||||||
|
name: "zno-nla3",
|
||||||
|
args: &["-zd", " "],
|
||||||
|
ins: &["1\0a", "2\0b\0"],
|
||||||
|
out: "1 2\0a b\0",
|
||||||
|
},
|
||||||
|
TestData {
|
||||||
|
name: "zno-nla4",
|
||||||
|
args: &["-zd", " "],
|
||||||
|
ins: &["1\0a\0", "2\0b\0"],
|
||||||
|
out: "1 2\0a b\0",
|
||||||
|
},
|
||||||
TestData {
|
TestData {
|
||||||
name: "multibyte-delim",
|
name: "multibyte-delim",
|
||||||
args: &["-d", "💣"],
|
args: &["-d", "💣"],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue