mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
join: avoid extra allocations when using -i
This commit is contained in:
parent
4f79c0b69f
commit
7c3a9380f1
2 changed files with 38 additions and 4 deletions
|
@ -55,7 +55,7 @@ The following options can have a non-trivial impact on performance:
|
|||
|
||||
- `-a`/`-v` if one of the two files has significantly more lines than the other
|
||||
- `-j`/`-1`/`-2` cause work to be done to grab the appropriate field
|
||||
- `-i` adds a call to `to_ascii_lowercase()` that adds some time for allocating and dropping memory for the lowercase key
|
||||
- `-i` uses our custom code for case-insensitive text comparisons
|
||||
- `--nocheck-order` causes some calls of `Input::compare` to be skipped
|
||||
|
||||
The content of the files being joined has a very significant impact on the performance.
|
||||
|
|
|
@ -288,6 +288,40 @@ impl<'a, Sep: Separator> Repr<'a, Sep> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Byte slice wrapper whose Ord implementation is case-insensitive on ASCII.
|
||||
#[derive(Eq)]
|
||||
struct CaseInsensitiveSlice<'a> {
|
||||
v: &'a [u8],
|
||||
}
|
||||
|
||||
impl Ord for CaseInsensitiveSlice<'_> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
if let Some((s, o)) =
|
||||
std::iter::zip(self.v.iter(), other.v.iter()).find(|(s, o)| !s.eq_ignore_ascii_case(o))
|
||||
{
|
||||
// first characters that differ, return the case-insensitive comparison
|
||||
let s = s.to_ascii_lowercase();
|
||||
let o = o.to_ascii_lowercase();
|
||||
s.cmp(&o)
|
||||
} else {
|
||||
// one of the strings is a substring or equal of the other
|
||||
self.v.len().cmp(&other.v.len())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for CaseInsensitiveSlice<'_> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for CaseInsensitiveSlice<'_> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.v.eq_ignore_ascii_case(other.v)
|
||||
}
|
||||
}
|
||||
|
||||
/// Input processing parameters.
|
||||
struct Input<Sep: Separator> {
|
||||
separator: Sep,
|
||||
|
@ -307,9 +341,9 @@ impl<Sep: Separator> Input<Sep> {
|
|||
fn compare(&self, field1: Option<&[u8]>, field2: Option<&[u8]>) -> Ordering {
|
||||
if let (Some(field1), Some(field2)) = (field1, field2) {
|
||||
if self.ignore_case {
|
||||
field1
|
||||
.to_ascii_lowercase()
|
||||
.cmp(&field2.to_ascii_lowercase())
|
||||
let field1 = CaseInsensitiveSlice { v: field1 };
|
||||
let field2 = CaseInsensitiveSlice { v: field2 };
|
||||
field1.cmp(&field2)
|
||||
} else {
|
||||
field1.cmp(field2)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue