mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
uniq: Fix skip fields
Current implementation of the skip fields logic does not handle multibyte code points correctly. It assumes each code point (`char`) is one byte. If the skipped part of the input line has any multibyte code points then this can cause fields not being skipped correctly (field start index is calculated to be before it actually starts).
This commit is contained in:
parent
ce4342d12e
commit
116e253cc0
3 changed files with 13 additions and 16 deletions
|
@ -79,22 +79,19 @@ impl Uniq {
|
||||||
|
|
||||||
fn skip_fields<'a>(&self, line: &'a str) -> &'a str {
|
fn skip_fields<'a>(&self, line: &'a str) -> &'a str {
|
||||||
if let Some(skip_fields) = self.skip_fields {
|
if let Some(skip_fields) = self.skip_fields {
|
||||||
if line.split_whitespace().count() > skip_fields {
|
|
||||||
let mut field = 0;
|
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while field < skip_fields && i < line.len() {
|
let mut char_indices = line.char_indices();
|
||||||
while i < line.len() && line.chars().nth(i).unwrap().is_whitespace() {
|
for _ in 0..skip_fields {
|
||||||
i += 1;
|
if char_indices.find(|(_, c)| !c.is_whitespace()) == None {
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
while i < line.len() && !line.chars().nth(i).unwrap().is_whitespace() {
|
match char_indices.find(|(_, c)| c.is_whitespace()) {
|
||||||
i += 1;
|
None => return "",
|
||||||
|
|
||||||
|
Some((next_field_i, _)) => i = next_field_i,
|
||||||
}
|
}
|
||||||
field += 1;
|
|
||||||
}
|
}
|
||||||
&line[i..]
|
&line[i..]
|
||||||
} else {
|
|
||||||
""
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
line
|
line
|
||||||
}
|
}
|
||||||
|
|
2
tests/fixtures/uniq/skip-2-fields.expected
vendored
2
tests/fixtures/uniq/skip-2-fields.expected
vendored
|
@ -1,2 +1,2 @@
|
||||||
aaa aa a
|
aaa ⟪⟫ a
|
||||||
aa a
|
aa a
|
||||||
|
|
2
tests/fixtures/uniq/skip-fields.txt
vendored
2
tests/fixtures/uniq/skip-fields.txt
vendored
|
@ -1,4 +1,4 @@
|
||||||
aaa aa a
|
aaa ⟪⟫ a
|
||||||
ZZZ aa a
|
ZZZ aa a
|
||||||
ZZZ aa a
|
ZZZ aa a
|
||||||
ZZZ bb a
|
ZZZ bb a
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue