mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-27 19:17:43 +00:00
Merge pull request #2851 from jtracey/join-strless
join: operate on bytes instead of Strings
This commit is contained in:
commit
00c11b184f
8 changed files with 105 additions and 60 deletions
|
@ -13,7 +13,7 @@ extern crate uucore;
|
||||||
use clap::{crate_version, App, Arg};
|
use clap::{crate_version, App, Arg};
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{stdin, BufRead, BufReader, Lines, Stdin};
|
use std::io::{stdin, stdout, BufRead, BufReader, Split, Stdin, Write};
|
||||||
use uucore::display::Quotable;
|
use uucore::display::Quotable;
|
||||||
use uucore::error::{set_exit_code, UResult, USimpleError};
|
use uucore::error::{set_exit_code, UResult, USimpleError};
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ enum FileNum {
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
enum Sep {
|
enum Sep {
|
||||||
Char(char),
|
Char(u8),
|
||||||
Line,
|
Line,
|
||||||
Whitespaces,
|
Whitespaces,
|
||||||
}
|
}
|
||||||
|
@ -49,7 +49,7 @@ struct Settings {
|
||||||
separator: Sep,
|
separator: Sep,
|
||||||
autoformat: bool,
|
autoformat: bool,
|
||||||
format: Vec<Spec>,
|
format: Vec<Spec>,
|
||||||
empty: String,
|
empty: Vec<u8>,
|
||||||
check_order: CheckOrder,
|
check_order: CheckOrder,
|
||||||
headers: bool,
|
headers: bool,
|
||||||
}
|
}
|
||||||
|
@ -66,7 +66,7 @@ impl Default for Settings {
|
||||||
separator: Sep::Whitespaces,
|
separator: Sep::Whitespaces,
|
||||||
autoformat: false,
|
autoformat: false,
|
||||||
format: vec![],
|
format: vec![],
|
||||||
empty: String::new(),
|
empty: vec![],
|
||||||
check_order: CheckOrder::Default,
|
check_order: CheckOrder::Default,
|
||||||
headers: false,
|
headers: false,
|
||||||
}
|
}
|
||||||
|
@ -75,13 +75,13 @@ impl Default for Settings {
|
||||||
|
|
||||||
/// Output representation.
|
/// Output representation.
|
||||||
struct Repr<'a> {
|
struct Repr<'a> {
|
||||||
separator: char,
|
separator: u8,
|
||||||
format: &'a [Spec],
|
format: &'a [Spec],
|
||||||
empty: &'a str,
|
empty: &'a [u8],
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Repr<'a> {
|
impl<'a> Repr<'a> {
|
||||||
fn new(separator: char, format: &'a [Spec], empty: &'a str) -> Repr<'a> {
|
fn new(separator: u8, format: &'a [Spec], empty: &'a [u8]) -> Repr<'a> {
|
||||||
Repr {
|
Repr {
|
||||||
separator,
|
separator,
|
||||||
format,
|
format,
|
||||||
|
@ -94,32 +94,34 @@ impl<'a> Repr<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Print the field or empty filler if the field is not set.
|
/// Print the field or empty filler if the field is not set.
|
||||||
fn print_field(&self, field: Option<&str>) {
|
fn print_field(&self, field: Option<&Vec<u8>>) -> Result<(), std::io::Error> {
|
||||||
let value = match field {
|
let value = match field {
|
||||||
Some(field) => field,
|
Some(field) => field,
|
||||||
None => self.empty,
|
None => self.empty,
|
||||||
};
|
};
|
||||||
|
|
||||||
print!("{}", value);
|
stdout().write_all(value)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Print each field except the one at the index.
|
/// Print each field except the one at the index.
|
||||||
fn print_fields(&self, line: &Line, index: usize) {
|
fn print_fields(&self, line: &Line, index: usize) -> Result<(), std::io::Error> {
|
||||||
for i in 0..line.fields.len() {
|
for i in 0..line.fields.len() {
|
||||||
if i != index {
|
if i != index {
|
||||||
print!("{}{}", self.separator, line.fields[i]);
|
stdout().write_all(&[self.separator])?;
|
||||||
|
stdout().write_all(&line.fields[i])?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Print each field or the empty filler if the field is not set.
|
/// Print each field or the empty filler if the field is not set.
|
||||||
fn print_format<F>(&self, f: F)
|
fn print_format<F>(&self, f: F) -> Result<(), std::io::Error>
|
||||||
where
|
where
|
||||||
F: Fn(&Spec) -> Option<&'a str>,
|
F: Fn(&Spec) -> Option<&'a Vec<u8>>,
|
||||||
{
|
{
|
||||||
for i in 0..self.format.len() {
|
for i in 0..self.format.len() {
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
print!("{}", self.separator);
|
stdout().write_all(&[self.separator])?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let field = match f(&self.format[i]) {
|
let field = match f(&self.format[i]) {
|
||||||
|
@ -127,8 +129,9 @@ impl<'a> Repr<'a> {
|
||||||
None => self.empty,
|
None => self.empty,
|
||||||
};
|
};
|
||||||
|
|
||||||
print!("{}", field);
|
stdout().write_all(field)?;
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,10 +151,12 @@ impl Input {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compare(&self, field1: Option<&str>, field2: Option<&str>) -> Ordering {
|
fn compare(&self, field1: Option<&Vec<u8>>, field2: Option<&Vec<u8>>) -> Ordering {
|
||||||
if let (Some(field1), Some(field2)) = (field1, field2) {
|
if let (Some(field1), Some(field2)) = (field1, field2) {
|
||||||
if self.ignore_case {
|
if self.ignore_case {
|
||||||
field1.to_lowercase().cmp(&field2.to_lowercase())
|
field1
|
||||||
|
.to_ascii_lowercase()
|
||||||
|
.cmp(&field2.to_ascii_lowercase())
|
||||||
} else {
|
} else {
|
||||||
field1.cmp(field2)
|
field1.cmp(field2)
|
||||||
}
|
}
|
||||||
|
@ -209,14 +214,19 @@ impl Spec {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Line {
|
struct Line {
|
||||||
fields: Vec<String>,
|
fields: Vec<Vec<u8>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Line {
|
impl Line {
|
||||||
fn new(string: String, separator: Sep) -> Line {
|
fn new(string: Vec<u8>, separator: Sep) -> Line {
|
||||||
let fields = match separator {
|
let fields = match separator {
|
||||||
Sep::Whitespaces => string.split_whitespace().map(String::from).collect(),
|
Sep::Whitespaces => string
|
||||||
Sep::Char(sep) => string.split(sep).map(String::from).collect(),
|
// GNU join uses Bourne shell field splitters by default
|
||||||
|
.split(|c| matches!(*c, b' ' | b'\t' | b'\n'))
|
||||||
|
.filter(|f| !f.is_empty())
|
||||||
|
.map(Vec::from)
|
||||||
|
.collect(),
|
||||||
|
Sep::Char(sep) => string.split(|c| *c == sep).map(Vec::from).collect(),
|
||||||
Sep::Line => vec![string],
|
Sep::Line => vec![string],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -224,7 +234,7 @@ impl Line {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get field at index.
|
/// Get field at index.
|
||||||
fn get_field(&self, index: usize) -> Option<&str> {
|
fn get_field(&self, index: usize) -> Option<&Vec<u8>> {
|
||||||
if index < self.fields.len() {
|
if index < self.fields.len() {
|
||||||
Some(&self.fields[index])
|
Some(&self.fields[index])
|
||||||
} else {
|
} else {
|
||||||
|
@ -238,7 +248,7 @@ struct State<'a> {
|
||||||
file_name: &'a str,
|
file_name: &'a str,
|
||||||
file_num: FileNum,
|
file_num: FileNum,
|
||||||
print_unpaired: bool,
|
print_unpaired: bool,
|
||||||
lines: Lines<Box<dyn BufRead + 'a>>,
|
lines: Split<Box<dyn BufRead + 'a>>,
|
||||||
seq: Vec<Line>,
|
seq: Vec<Line>,
|
||||||
line_num: usize,
|
line_num: usize,
|
||||||
has_failed: bool,
|
has_failed: bool,
|
||||||
|
@ -266,7 +276,7 @@ impl<'a> State<'a> {
|
||||||
file_name: name,
|
file_name: name,
|
||||||
file_num,
|
file_num,
|
||||||
print_unpaired,
|
print_unpaired,
|
||||||
lines: f.lines(),
|
lines: f.split(b'\n'),
|
||||||
seq: Vec::new(),
|
seq: Vec::new(),
|
||||||
line_num: 0,
|
line_num: 0,
|
||||||
has_failed: false,
|
has_failed: false,
|
||||||
|
@ -274,12 +284,13 @@ impl<'a> State<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Skip the current unpaired line.
|
/// Skip the current unpaired line.
|
||||||
fn skip_line(&mut self, input: &Input, repr: &Repr) {
|
fn skip_line(&mut self, input: &Input, repr: &Repr) -> Result<(), std::io::Error> {
|
||||||
if self.print_unpaired {
|
if self.print_unpaired {
|
||||||
self.print_first_line(repr);
|
self.print_first_line(repr)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.reset_next_line(input);
|
self.reset_next_line(input);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Keep reading line sequence until the key does not change, return
|
/// Keep reading line sequence until the key does not change, return
|
||||||
|
@ -299,20 +310,22 @@ impl<'a> State<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Print lines in the buffers as headers.
|
/// Print lines in the buffers as headers.
|
||||||
fn print_headers(&self, other: &State, repr: &Repr) {
|
fn print_headers(&self, other: &State, repr: &Repr) -> Result<(), std::io::Error> {
|
||||||
if self.has_line() {
|
if self.has_line() {
|
||||||
if other.has_line() {
|
if other.has_line() {
|
||||||
self.combine(other, repr);
|
self.combine(other, repr)?;
|
||||||
} else {
|
} else {
|
||||||
self.print_first_line(repr);
|
self.print_first_line(repr)?;
|
||||||
}
|
}
|
||||||
} else if other.has_line() {
|
} else if other.has_line() {
|
||||||
other.print_first_line(repr);
|
other.print_first_line(repr)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Combine two line sequences.
|
/// Combine two line sequences.
|
||||||
fn combine(&self, other: &State, repr: &Repr) {
|
fn combine(&self, other: &State, repr: &Repr) -> Result<(), std::io::Error> {
|
||||||
let key = self.get_current_key();
|
let key = self.get_current_key();
|
||||||
|
|
||||||
for line1 in &self.seq {
|
for line1 in &self.seq {
|
||||||
|
@ -331,16 +344,18 @@ impl<'a> State<'a> {
|
||||||
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
});
|
})?;
|
||||||
} else {
|
} else {
|
||||||
repr.print_field(key);
|
repr.print_field(key)?;
|
||||||
repr.print_fields(line1, self.key);
|
repr.print_fields(line1, self.key)?;
|
||||||
repr.print_fields(line2, other.key);
|
repr.print_fields(line2, other.key)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
println!();
|
stdout().write_all(&[b'\n'])?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reset with the next line.
|
/// Reset with the next line.
|
||||||
|
@ -377,14 +392,16 @@ impl<'a> State<'a> {
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finalize(&mut self, input: &Input, repr: &Repr) {
|
fn finalize(&mut self, input: &Input, repr: &Repr) -> Result<(), std::io::Error> {
|
||||||
if self.has_line() && self.print_unpaired {
|
if self.has_line() && self.print_unpaired {
|
||||||
self.print_first_line(repr);
|
self.print_first_line(repr)?;
|
||||||
|
|
||||||
while let Some(line) = self.next_line(input) {
|
while let Some(line) = self.next_line(input) {
|
||||||
self.print_line(&line, repr);
|
self.print_line(&line, repr)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the next line without the order check.
|
/// Get the next line without the order check.
|
||||||
|
@ -423,11 +440,11 @@ impl<'a> State<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Gets the key value of the lines stored in seq.
|
/// Gets the key value of the lines stored in seq.
|
||||||
fn get_current_key(&self) -> Option<&str> {
|
fn get_current_key(&self) -> Option<&Vec<u8>> {
|
||||||
self.seq[0].get_field(self.key)
|
self.seq[0].get_field(self.key)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_line(&self, line: &Line, repr: &Repr) {
|
fn print_line(&self, line: &Line, repr: &Repr) -> Result<(), std::io::Error> {
|
||||||
if repr.uses_format() {
|
if repr.uses_format() {
|
||||||
repr.print_format(|spec| match *spec {
|
repr.print_format(|spec| match *spec {
|
||||||
Spec::Key => line.get_field(self.key),
|
Spec::Key => line.get_field(self.key),
|
||||||
|
@ -438,17 +455,17 @@ impl<'a> State<'a> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
})?;
|
||||||
} else {
|
} else {
|
||||||
repr.print_field(line.get_field(self.key));
|
repr.print_field(line.get_field(self.key))?;
|
||||||
repr.print_fields(line, self.key);
|
repr.print_fields(line, self.key)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
println!();
|
stdout().write_all(&[b'\n'])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_first_line(&self, repr: &Repr) {
|
fn print_first_line(&self, repr: &Repr) -> Result<(), std::io::Error> {
|
||||||
self.print_line(&self.seq[0], repr);
|
self.print_line(&self.seq[0], repr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -481,14 +498,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
settings.key1 = get_field_number(keys, key1)?;
|
settings.key1 = get_field_number(keys, key1)?;
|
||||||
settings.key2 = get_field_number(keys, key2)?;
|
settings.key2 = get_field_number(keys, key2)?;
|
||||||
|
|
||||||
if let Some(value) = matches.value_of("t") {
|
if let Some(value_str) = matches.value_of("t") {
|
||||||
|
let value = value_str.as_bytes();
|
||||||
settings.separator = match value.len() {
|
settings.separator = match value.len() {
|
||||||
0 => Sep::Line,
|
0 => Sep::Line,
|
||||||
1 => Sep::Char(value.chars().next().unwrap()),
|
1 => Sep::Char(value[0]),
|
||||||
_ => {
|
_ => {
|
||||||
return Err(USimpleError::new(
|
return Err(USimpleError::new(
|
||||||
1,
|
1,
|
||||||
format!("multi-character tab {}", value),
|
format!("multi-character tab {}", value_str),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -507,7 +525,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(empty) = matches.value_of("e") {
|
if let Some(empty) = matches.value_of("e") {
|
||||||
settings.empty = empty.to_string();
|
settings.empty = empty.as_bytes().to_vec();
|
||||||
}
|
}
|
||||||
|
|
||||||
if matches.is_present("nocheck-order") {
|
if matches.is_present("nocheck-order") {
|
||||||
|
@ -529,7 +547,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
return Err(USimpleError::new(1, "both files cannot be standard input"));
|
return Err(USimpleError::new(1, "both files cannot be standard input"));
|
||||||
}
|
}
|
||||||
|
|
||||||
exec(file1, file2, settings)
|
match exec(file1, file2, settings) {
|
||||||
|
Ok(_) => Ok(()),
|
||||||
|
Err(e) => Err(USimpleError::new(1, format!("{}", e))),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn uu_app() -> App<'static, 'static> {
|
pub fn uu_app() -> App<'static, 'static> {
|
||||||
|
@ -639,7 +660,7 @@ FILENUM is 1 or 2, corresponding to FILE1 or FILE2",
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn exec(file1: &str, file2: &str, settings: Settings) -> UResult<()> {
|
fn exec(file1: &str, file2: &str, settings: Settings) -> Result<(), std::io::Error> {
|
||||||
let stdin = stdin();
|
let stdin = stdin();
|
||||||
|
|
||||||
let mut state1 = State::new(
|
let mut state1 = State::new(
|
||||||
|
@ -686,14 +707,14 @@ fn exec(file1: &str, file2: &str, settings: Settings) -> UResult<()> {
|
||||||
let repr = Repr::new(
|
let repr = Repr::new(
|
||||||
match settings.separator {
|
match settings.separator {
|
||||||
Sep::Char(sep) => sep,
|
Sep::Char(sep) => sep,
|
||||||
_ => ' ',
|
_ => b' ',
|
||||||
},
|
},
|
||||||
&format,
|
&format,
|
||||||
&settings.empty,
|
&settings.empty,
|
||||||
);
|
);
|
||||||
|
|
||||||
if settings.headers {
|
if settings.headers {
|
||||||
state1.print_headers(&state2, &repr);
|
state1.print_headers(&state2, &repr)?;
|
||||||
state1.reset_read_line(&input);
|
state1.reset_read_line(&input);
|
||||||
state2.reset_read_line(&input);
|
state2.reset_read_line(&input);
|
||||||
}
|
}
|
||||||
|
@ -703,17 +724,17 @@ fn exec(file1: &str, file2: &str, settings: Settings) -> UResult<()> {
|
||||||
|
|
||||||
match diff {
|
match diff {
|
||||||
Ordering::Less => {
|
Ordering::Less => {
|
||||||
state1.skip_line(&input, &repr);
|
state1.skip_line(&input, &repr)?;
|
||||||
}
|
}
|
||||||
Ordering::Greater => {
|
Ordering::Greater => {
|
||||||
state2.skip_line(&input, &repr);
|
state2.skip_line(&input, &repr)?;
|
||||||
}
|
}
|
||||||
Ordering::Equal => {
|
Ordering::Equal => {
|
||||||
let next_line1 = state1.extend(&input);
|
let next_line1 = state1.extend(&input);
|
||||||
let next_line2 = state2.extend(&input);
|
let next_line2 = state2.extend(&input);
|
||||||
|
|
||||||
if settings.print_joined {
|
if settings.print_joined {
|
||||||
state1.combine(&state2, &repr);
|
state1.combine(&state2, &repr)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
state1.reset(next_line1);
|
state1.reset(next_line1);
|
||||||
|
@ -722,8 +743,8 @@ fn exec(file1: &str, file2: &str, settings: Settings) -> UResult<()> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
state1.finalize(&input, &repr);
|
state1.finalize(&input, &repr)?;
|
||||||
state2.finalize(&input, &repr);
|
state2.finalize(&input, &repr)?;
|
||||||
|
|
||||||
if state1.has_failed || state2.has_failed {
|
if state1.has_failed || state2.has_failed {
|
||||||
set_exit_code(1);
|
set_exit_code(1);
|
||||||
|
|
|
@ -328,3 +328,21 @@ fn single_file_with_header() {
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_is("A 1\n");
|
.stdout_is("A 1\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn non_line_feeds() {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("non-line_feeds_1.txt")
|
||||||
|
.arg("non-line_feeds_2.txt")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_fixture("non-line_feeds.expected");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn non_unicode() {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("non-unicode_1.bin")
|
||||||
|
.arg("non-unicode_2.bin")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only_fixture("non-unicode.expected");
|
||||||
|
}
|
||||||
|
|
2
tests/fixtures/join/non-line_feeds.expected
vendored
Normal file
2
tests/fixtures/join/non-line_feeds.expected
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
b
d
|
||||||
|
a c f
|
2
tests/fixtures/join/non-line_feeds_1.txt
vendored
Normal file
2
tests/fixtures/join/non-line_feeds_1.txt
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
b
|
||||||
|
a c
|
2
tests/fixtures/join/non-line_feeds_2.txt
vendored
Normal file
2
tests/fixtures/join/non-line_feeds_2.txt
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
d
|
||||||
|
a f
|
BIN
tests/fixtures/join/non-unicode.expected
vendored
Normal file
BIN
tests/fixtures/join/non-unicode.expected
vendored
Normal file
Binary file not shown.
BIN
tests/fixtures/join/non-unicode_1.bin
vendored
Normal file
BIN
tests/fixtures/join/non-unicode_1.bin
vendored
Normal file
Binary file not shown.
BIN
tests/fixtures/join/non-unicode_2.bin
vendored
Normal file
BIN
tests/fixtures/join/non-unicode_2.bin
vendored
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue