From d624dbcfa0e3a017d454c636042ab64b5f4f2f6a Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 12 Jul 2018 00:00:22 +0200 Subject: [PATCH 1/5] uniq: do not allocate a new string in skip_fields Instead, reuse the existing line and just view into that. --- src/uniq/uniq.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/uniq/uniq.rs b/src/uniq/uniq.rs index 5b27e9549..78e066dc2 100644 --- a/src/uniq/uniq.rs +++ b/src/uniq/uniq.rs @@ -72,7 +72,7 @@ impl Uniq { } } - fn skip_fields(&self, line: &str) -> String { + fn skip_fields<'a>(&self, line: &'a str) -> &'a str { if let Some(skip_fields) = self.skip_fields { if line.split_whitespace().count() > skip_fields { let mut field = 0; @@ -86,12 +86,12 @@ impl Uniq { } field = field + 1; } - line[i..].to_owned() + &line[i..] } else { - "".to_owned() + "" } } else { - line[..].to_owned() + line } } @@ -104,7 +104,7 @@ impl Uniq { } fn cmp_key(&self, line: &str) -> String { - let fields_to_check = &self.skip_fields(line); + let fields_to_check = self.skip_fields(line); let len = fields_to_check.len(); if len > 0 { fields_to_check From f233d8e5d756cbe526f6ad1921561ea088324f1c Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 12 Jul 2018 11:03:11 +0200 Subject: [PATCH 2/5] uniq: avoid copying Strings all the time --- src/uniq/uniq.rs | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/uniq/uniq.rs b/src/uniq/uniq.rs index 78e066dc2..42f013d60 100644 --- a/src/uniq/uniq.rs +++ b/src/uniq/uniq.rs @@ -44,6 +44,10 @@ struct Uniq { zero_terminated: bool, } +fn iter_ne<'a>(lhs: Box + 'a>, rhs: Box + 'a>) -> bool { + lhs.ne(rhs) +} + impl Uniq { pub fn print_uniq( &self, @@ -57,7 +61,7 @@ impl Uniq { for io_line in reader.split(line_terminator) { let line = String::from_utf8(crash_if_err!(1, io_line)).unwrap(); - if !lines.is_empty() && self.cmp_key(&lines[0]) != self.cmp_key(&line) { + if !lines.is_empty() && self.cmp_keys(&lines[0], &line) { let print_delimiter = delimiters == &Delimiters::Prepend || (delimiters == &Delimiters::Separate && first_line_printed); first_line_printed |= self.print_lines(writer, &lines, print_delimiter); @@ -103,21 +107,22 @@ impl Uniq { } } - fn cmp_key(&self, line: &str) -> String { + fn cmp_key<'a>(&'a self, line: &'a str) -> Box + 'a> { let fields_to_check = self.skip_fields(line); let len = fields_to_check.len(); if len > 0 { - fields_to_check - .chars() - .skip(self.slice_start.unwrap_or(0)) - .take(self.slice_stop.unwrap_or(len)) - .map(|c| match c { - 'a'...'z' if self.ignore_case => ((c as u8) - 32) as char, - _ => c, - }) - .collect() + Box::new( + fields_to_check + .chars() + .skip(self.slice_start.unwrap_or(0)) + .take(self.slice_stop.unwrap_or(len)) + .map(move |c| match c { + 'a'...'z' if self.ignore_case => ((c as u8) - 32) as char, + _ => c, + }), + ) } else { - fields_to_check.to_owned() + Box::new(fields_to_check.chars()) } } From 3288fa2cd2c659322b4c1b220476b0893db54d44 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 12 Jul 2018 11:21:28 +0200 Subject: [PATCH 3/5] uniq: avoid a upper-case map if we don't need to --- src/uniq/uniq.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/uniq/uniq.rs b/src/uniq/uniq.rs index 42f013d60..580d27e96 100644 --- a/src/uniq/uniq.rs +++ b/src/uniq/uniq.rs @@ -110,12 +110,19 @@ impl Uniq { fn cmp_key<'a>(&'a self, line: &'a str) -> Box + 'a> { let fields_to_check = self.skip_fields(line); let len = fields_to_check.len(); + let slice_start = self.slice_start.unwrap_or(0); + let slice_stop = self.slice_stop.unwrap_or(len); if len > 0 { + // fast path: we can avoid mapping chars to upper-case, if we don't want to ignore the case + if !self.ignore_case { + return Box::new(fields_to_check.chars().skip(slice_start).take(slice_stop)); + } + Box::new( fields_to_check .chars() - .skip(self.slice_start.unwrap_or(0)) - .take(self.slice_stop.unwrap_or(len)) + .skip(slice_start) + .take(slice_stop) .map(move |c| match c { 'a'...'z' if self.ignore_case => ((c as u8) - 32) as char, _ => c, From 781c9236e1f5e7624510445782c172147f3deba4 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Thu, 12 Jul 2018 17:00:06 +0200 Subject: [PATCH 4/5] uniq: add more fast-paths to avoid work --- src/uniq/uniq.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/uniq/uniq.rs b/src/uniq/uniq.rs index 580d27e96..2bda28f4a 100644 --- a/src/uniq/uniq.rs +++ b/src/uniq/uniq.rs @@ -113,6 +113,19 @@ impl Uniq { let slice_start = self.slice_start.unwrap_or(0); let slice_stop = self.slice_stop.unwrap_or(len); if len > 0 { + // fast path: avoid doing any work if there is no need to skip or map to lower-case + if !self.ignore_case && slice_start == 0 && slice_stop == len { + return Box::new(fields_to_check.chars()); + } + + // fast path: avoid skipping + if self.ignore_case && slice_start == 0 && slice_stop == len { + return Box::new(fields_to_check.chars().map(|c| match c { + 'a'...'z' => ((c as u8) - 32) as char, + _ => c, + })); + } + // fast path: we can avoid mapping chars to upper-case, if we don't want to ignore the case if !self.ignore_case { return Box::new(fields_to_check.chars().skip(slice_start).take(slice_stop)); @@ -123,8 +136,8 @@ impl Uniq { .chars() .skip(slice_start) .take(slice_stop) - .map(move |c| match c { - 'a'...'z' if self.ignore_case => ((c as u8) - 32) as char, + .map(|c| match c { + 'a'...'z' => ((c as u8) - 32) as char, _ => c, }), ) From e30237a714c0fff0deab219e2db7b7351a3f2df4 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Sat, 21 Jul 2018 15:26:38 +0200 Subject: [PATCH 5/5] uniq: avoid allocations for the iterator --- src/uniq/uniq.rs | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/uniq/uniq.rs b/src/uniq/uniq.rs index 2bda28f4a..fb6918449 100644 --- a/src/uniq/uniq.rs +++ b/src/uniq/uniq.rs @@ -44,10 +44,6 @@ struct Uniq { zero_terminated: bool, } -fn iter_ne<'a>(lhs: Box + 'a>, rhs: Box + 'a>) -> bool { - lhs.ne(rhs) -} - impl Uniq { pub fn print_uniq( &self, @@ -107,7 +103,16 @@ impl Uniq { } } - fn cmp_key<'a>(&'a self, line: &'a str) -> Box + 'a> { + fn cmp_keys(&self, first: &str, second: &str) -> bool { + self.cmp_key(first, |first_iter| { + self.cmp_key(second, |second_iter| first_iter.ne(second_iter)) + }) + } + + fn cmp_key(&self, line: &str, mut closure: F) -> bool + where + F: FnMut(&mut Iterator) -> bool, + { let fields_to_check = self.skip_fields(line); let len = fields_to_check.len(); let slice_start = self.slice_start.unwrap_or(0); @@ -115,12 +120,12 @@ impl Uniq { if len > 0 { // fast path: avoid doing any work if there is no need to skip or map to lower-case if !self.ignore_case && slice_start == 0 && slice_stop == len { - return Box::new(fields_to_check.chars()); + return closure(&mut fields_to_check.chars()); } // fast path: avoid skipping if self.ignore_case && slice_start == 0 && slice_stop == len { - return Box::new(fields_to_check.chars().map(|c| match c { + return closure(&mut fields_to_check.chars().map(|c| match c { 'a'...'z' => ((c as u8) - 32) as char, _ => c, })); @@ -128,11 +133,11 @@ impl Uniq { // fast path: we can avoid mapping chars to upper-case, if we don't want to ignore the case if !self.ignore_case { - return Box::new(fields_to_check.chars().skip(slice_start).take(slice_stop)); + return closure(&mut fields_to_check.chars().skip(slice_start).take(slice_stop)); } - Box::new( - fields_to_check + closure( + &mut fields_to_check .chars() .skip(slice_start) .take(slice_stop) @@ -142,7 +147,7 @@ impl Uniq { }), ) } else { - Box::new(fields_to_check.chars()) + closure(&mut fields_to_check.chars()) } }