1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-31 13:07:46 +00:00

enum Delimiter; misc changes

This commit is contained in:
TechHara 2022-12-20 20:37:31 -05:00
parent c29839f4f9
commit f2a7175144
2 changed files with 94 additions and 88 deletions

View file

@ -118,11 +118,15 @@ struct Options {
zero_terminated: bool, zero_terminated: bool,
} }
enum Delimiter {
Whitespace,
String(String), // one char long, String because of UTF8 representation
}
struct FieldOptions { struct FieldOptions {
delimiter: String, // one char long, String because of UTF8 representation delimiter: Delimiter,
out_delimiter: Option<String>, out_delimiter: Option<String>,
only_delimited: bool, only_delimited: bool,
whitespace_delimited: bool,
zero_terminated: bool, zero_terminated: bool,
} }
@ -352,95 +356,98 @@ fn cut_fields_whitespace<R: Read>(
#[allow(clippy::cognitive_complexity)] #[allow(clippy::cognitive_complexity)]
fn cut_fields<R: Read>(reader: R, ranges: &[Range], opts: &FieldOptions) -> UResult<()> { fn cut_fields<R: Read>(reader: R, ranges: &[Range], opts: &FieldOptions) -> UResult<()> {
let newline_char = if opts.zero_terminated { b'\0' } else { b'\n' }; let newline_char = if opts.zero_terminated { b'\0' } else { b'\n' };
match opts.delimiter {
if opts.whitespace_delimited { Delimiter::Whitespace => {
return cut_fields_whitespace( return cut_fields_whitespace(
reader, reader,
ranges, ranges,
opts.only_delimited, opts.only_delimited,
newline_char, newline_char,
match opts.out_delimiter { match opts.out_delimiter {
Some(ref delim) => delim, Some(ref delim) => delim,
_ => "\t", _ => "\t",
}, },
); )
}
if let Some(ref o_delim) = opts.out_delimiter {
return cut_fields_delimiter(
reader,
ranges,
&opts.delimiter,
opts.only_delimited,
newline_char,
o_delim,
);
}
let mut buf_in = BufReader::new(reader);
let mut out = stdout_writer();
let delim_len = opts.delimiter.len();
let result = buf_in.for_byte_record_with_terminator(newline_char, |line| {
let mut fields_pos = 1;
let mut low_idx = 0;
let mut delim_search = Searcher::new(line, opts.delimiter.as_bytes()).peekable();
let mut print_delim = false;
if delim_search.peek().is_none() {
if !opts.only_delimited {
out.write_all(line)?;
if line[line.len() - 1] != newline_char {
out.write_all(&[newline_char])?;
}
}
return Ok(true);
} }
Delimiter::String(ref delimiter) => {
for &Range { low, high } in ranges { if let Some(ref o_delim) = opts.out_delimiter {
if low - fields_pos > 0 { return cut_fields_delimiter(
if let Some(delim_pos) = delim_search.nth(low - fields_pos - 1) { reader,
low_idx = if print_delim { ranges,
delim_pos &delimiter,
} else { opts.only_delimited,
delim_pos + delim_len newline_char,
} o_delim,
} else { );
break;
}
} }
match delim_search.nth(high - low) { let mut buf_in = BufReader::new(reader);
Some(high_idx) => { let mut out = stdout_writer();
let segment = &line[low_idx..high_idx]; let delim_len = delimiter.len();
out.write_all(segment)?; let result = buf_in.for_byte_record_with_terminator(newline_char, |line| {
let mut fields_pos = 1;
let mut low_idx = 0;
let mut delim_search = Searcher::new(line, delimiter.as_bytes()).peekable();
let mut print_delim = false;
print_delim = true; if delim_search.peek().is_none() {
low_idx = high_idx; if !opts.only_delimited {
fields_pos = high + 1; out.write_all(line)?;
} if line[line.len() - 1] != newline_char {
None => { out.write_all(&[newline_char])?;
let segment = &line[low_idx..line.len()]; }
out.write_all(segment)?;
if line[line.len() - 1] == newline_char {
return Ok(true);
} }
break;
return Ok(true);
} }
for &Range { low, high } in ranges {
if low - fields_pos > 0 {
if let Some(delim_pos) = delim_search.nth(low - fields_pos - 1) {
low_idx = if print_delim {
delim_pos
} else {
delim_pos + delim_len
}
} else {
break;
}
}
match delim_search.nth(high - low) {
Some(high_idx) => {
let segment = &line[low_idx..high_idx];
out.write_all(segment)?;
print_delim = true;
low_idx = high_idx;
fields_pos = high + 1;
}
None => {
let segment = &line[low_idx..line.len()];
out.write_all(segment)?;
if line[line.len() - 1] == newline_char {
return Ok(true);
}
break;
}
}
}
out.write_all(&[newline_char])?;
Ok(true)
});
if let Err(e) = result {
return Err(USimpleError::new(1, e.to_string()));
} }
Ok(())
} }
out.write_all(&[newline_char])?;
Ok(true)
});
if let Err(e) = result {
return Err(USimpleError::new(1, e.to_string()));
} }
Ok(())
} }
fn cut_files(mut filenames: Vec<String>, mode: &Mode) { fn cut_files(mut filenames: Vec<String>, mode: &Mode) {
@ -585,10 +592,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
Ok(Mode::Fields( Ok(Mode::Fields(
ranges, ranges,
FieldOptions { FieldOptions {
delimiter: delim, delimiter: Delimiter::String(delim),
out_delimiter: out_delim, out_delimiter: out_delim,
only_delimited, only_delimited,
whitespace_delimited,
zero_terminated, zero_terminated,
}, },
)) ))
@ -597,10 +603,12 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
None => Ok(Mode::Fields( None => Ok(Mode::Fields(
ranges, ranges,
FieldOptions { FieldOptions {
delimiter: "\t".to_owned(), delimiter: match whitespace_delimited {
true => Delimiter::Whitespace,
false => Delimiter::String("\t".to_owned()),
},
out_delimiter: out_delim, out_delimiter: out_delim,
only_delimited, only_delimited,
whitespace_delimited,
zero_terminated, zero_terminated,
}, },
)), )),

View file

@ -1,11 +1,9 @@
// This file is part of the uutils coreutils package. // This file is part of the uutils coreutils package.
// //
// (c) Rolf Morel <rolfmorel@gmail.com>
//
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
// cSpell:ignore multispace // spell-checker:ignore multispace
use memchr::memchr2; use memchr::memchr2;