1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 03:57:44 +00:00

Merge pull request #2059 from cbjadwani/master

uniq: avoid building list of duplicate lines
This commit is contained in:
Sylvestre Ledru 2021-04-25 09:48:48 +02:00 committed by GitHub
commit 441763b73d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -61,34 +61,43 @@ impl Uniq {
reader: &mut BufReader<R>, reader: &mut BufReader<R>,
writer: &mut BufWriter<W>, writer: &mut BufWriter<W>,
) { ) {
let mut lines: Vec<String> = vec![];
let mut first_line_printed = false; let mut first_line_printed = false;
let delimiters = self.delimiters; let mut group_count = 1;
let line_terminator = self.get_line_terminator(); let line_terminator = self.get_line_terminator();
// Don't print any delimiting lines before, after or between groups if delimiting method is 'none' let mut lines = reader.split(line_terminator).map(get_line_string);
let no_delimiters = delimiters == Delimiters::None; let mut line = match lines.next() {
// The 'prepend' and 'both' delimit methods will cause output to start with delimiter line Some(l) => l,
let prepend_delimiter = delimiters == Delimiters::Prepend || delimiters == Delimiters::Both; None => return,
// The 'append' and 'both' delimit methods will cause output to end with delimiter line };
let append_delimiter = delimiters == Delimiters::Append || delimiters == Delimiters::Both;
for line in reader.split(line_terminator).map(get_line_string) { // compare current `line` with consecutive lines (`next_line`) of the input
if !lines.is_empty() && self.cmp_keys(&lines[0], &line) { // and if needed, print `line` based on the command line options provided
// Print delimiter if delimit method is not 'none' and any line has been output for next_line in lines {
// before or if we need to start output with delimiter if self.cmp_keys(&line, &next_line) {
let print_delimiter = !no_delimiters && (prepend_delimiter || first_line_printed); if (group_count == 1 && !self.repeats_only)
first_line_printed |= self.print_lines(writer, &lines, print_delimiter); || (group_count > 1 && !self.uniques_only)
lines.truncate(0); {
self.print_line(writer, &line, group_count, first_line_printed);
first_line_printed = true;
}
line = next_line;
group_count = 1;
} else {
if self.all_repeated {
self.print_line(writer, &line, group_count, first_line_printed);
first_line_printed = true;
line = next_line;
}
group_count += 1;
} }
lines.push(line);
} }
if !lines.is_empty() { if (group_count == 1 && !self.repeats_only) || (group_count > 1 && !self.uniques_only) {
// Print delimiter if delimit method is not 'none' and any line has been output self.print_line(writer, &line, group_count, first_line_printed);
// before or if we need to start output with delimiter first_line_printed = true;
let print_delimiter = !no_delimiters && (prepend_delimiter || first_line_printed);
first_line_printed |= self.print_lines(writer, &lines, print_delimiter);
} }
if append_delimiter && first_line_printed { if (self.delimiters == Delimiters::Append || self.delimiters == Delimiters::Both)
&& first_line_printed
{
crash_if_err!(1, writer.write_all(&[line_terminator])); crash_if_err!(1, writer.write_all(&[line_terminator]));
} }
} }
@ -163,27 +172,17 @@ impl Uniq {
} }
} }
fn print_lines<W: Write>( fn should_print_delimiter(&self, group_count: usize, first_line_printed: bool) -> bool {
&self, // if no delimiter option is selected then no other checks needed
writer: &mut BufWriter<W>, self.delimiters != Delimiters::None
lines: &[String], // print delimiter only before the first line of a group, not between lines of a group
print_delimiter: bool, && group_count == 1
) -> bool { // if at least one line has been output before current group then print delimiter
let mut first_line_printed = false; && (first_line_printed
let mut count = if self.all_repeated { 1 } else { lines.len() }; // or if we need to prepend delimiter then print it even at the start of the output
if lines.len() == 1 && !self.repeats_only || lines.len() > 1 && !self.uniques_only { || self.delimiters == Delimiters::Prepend
self.print_line(writer, &lines[0], count, print_delimiter); // the 'both' delimit mode should prepend and append delimiters
first_line_printed = true; || self.delimiters == Delimiters::Both)
count += 1;
}
if self.all_repeated {
for line in lines[1..].iter() {
self.print_line(writer, line, count, print_delimiter && !first_line_printed);
first_line_printed = true;
count += 1;
}
}
first_line_printed
} }
fn print_line<W: Write>( fn print_line<W: Write>(
@ -191,11 +190,11 @@ impl Uniq {
writer: &mut BufWriter<W>, writer: &mut BufWriter<W>,
line: &str, line: &str,
count: usize, count: usize,
print_delimiter: bool, first_line_printed: bool,
) { ) {
let line_terminator = self.get_line_terminator(); let line_terminator = self.get_line_terminator();
if print_delimiter { if self.should_print_delimiter(count, first_line_printed) {
crash_if_err!(1, writer.write_all(&[line_terminator])); crash_if_err!(1, writer.write_all(&[line_terminator]));
} }