mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-27 19:17:43 +00:00
tr: correctly detected matched [:upper:]
tr detects if a class in set2 is matched by a class at the correct position in set1 after it has expanded everything before the class in both sets: So tr 'abcd[:upper:]' 'a-d[:lower:]' should not fail
This commit is contained in:
parent
cb5111c1b8
commit
866366a874
2 changed files with 55 additions and 81 deletions
|
@ -103,22 +103,18 @@ pub enum Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Sequence {
|
impl Sequence {
|
||||||
pub fn flatten_non_lower_upper(&self) -> Box<dyn Iterator<Item = Self>> {
|
pub fn flatten(&self) -> Box<dyn Iterator<Item = u8>> {
|
||||||
match self {
|
match self {
|
||||||
Self::Char(c) => Box::new(std::iter::once(*c).map(Self::Char)),
|
Self::Char(c) => Box::new(std::iter::once(*c)),
|
||||||
Self::CharRange(l, r) => Box::new((*l..=*r).map(Self::Char)),
|
Self::CharRange(l, r) => Box::new(*l..=*r),
|
||||||
Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n).map(Self::Char)),
|
Self::CharStar(c) => Box::new(std::iter::repeat(*c)),
|
||||||
|
Self::CharRepeat(c, n) => Box::new(std::iter::repeat(*c).take(*n)),
|
||||||
Self::Class(class) => match class {
|
Self::Class(class) => match class {
|
||||||
Class::Alnum => Box::new(
|
Class::Alnum => Box::new((b'0'..=b'9').chain(b'A'..=b'Z').chain(b'a'..=b'z')),
|
||||||
(b'0'..=b'9')
|
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z')),
|
||||||
.chain(b'A'..=b'Z')
|
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned()),
|
||||||
.chain(b'a'..=b'z')
|
Class::Control => Box::new((0..=31).chain(std::iter::once(127))),
|
||||||
.map(Self::Char),
|
Class::Digit => Box::new(b'0'..=b'9'),
|
||||||
),
|
|
||||||
Class::Alpha => Box::new((b'A'..=b'Z').chain(b'a'..=b'z').map(Self::Char)),
|
|
||||||
Class::Blank => Box::new(unicode_table::BLANK.iter().cloned().map(Self::Char)),
|
|
||||||
Class::Control => Box::new((0..=31).chain(std::iter::once(127)).map(Self::Char)),
|
|
||||||
Class::Digit => Box::new((b'0'..=b'9').map(Self::Char)),
|
|
||||||
Class::Graph => Box::new(
|
Class::Graph => Box::new(
|
||||||
(48..=57) // digit
|
(48..=57) // digit
|
||||||
.chain(65..=90) // uppercase
|
.chain(65..=90) // uppercase
|
||||||
|
@ -128,8 +124,7 @@ impl Sequence {
|
||||||
.chain(58..=64)
|
.chain(58..=64)
|
||||||
.chain(91..=96)
|
.chain(91..=96)
|
||||||
.chain(123..=126)
|
.chain(123..=126)
|
||||||
.chain(std::iter::once(32))
|
.chain(std::iter::once(32)), // space
|
||||||
.map(Self::Char), // space
|
|
||||||
),
|
),
|
||||||
Class::Print => Box::new(
|
Class::Print => Box::new(
|
||||||
(48..=57) // digit
|
(48..=57) // digit
|
||||||
|
@ -139,37 +134,14 @@ impl Sequence {
|
||||||
.chain(33..=47)
|
.chain(33..=47)
|
||||||
.chain(58..=64)
|
.chain(58..=64)
|
||||||
.chain(91..=96)
|
.chain(91..=96)
|
||||||
.chain(123..=126)
|
.chain(123..=126),
|
||||||
.map(Self::Char),
|
|
||||||
),
|
),
|
||||||
Class::Punct => Box::new(
|
Class::Punct => Box::new((33..=47).chain(58..=64).chain(91..=96).chain(123..=126)),
|
||||||
(33..=47)
|
Class::Space => Box::new(unicode_table::SPACES.iter().cloned()),
|
||||||
.chain(58..=64)
|
Class::Xdigit => Box::new((b'0'..=b'9').chain(b'A'..=b'F').chain(b'a'..=b'f')),
|
||||||
.chain(91..=96)
|
Class::Lower => Box::new(b'a'..=b'z'),
|
||||||
.chain(123..=126)
|
Class::Upper => Box::new(b'A'..=b'Z'),
|
||||||
.map(Self::Char),
|
|
||||||
),
|
|
||||||
Class::Space => Box::new(unicode_table::SPACES.iter().cloned().map(Self::Char)),
|
|
||||||
Class::Xdigit => Box::new(
|
|
||||||
(b'0'..=b'9')
|
|
||||||
.chain(b'A'..=b'F')
|
|
||||||
.chain(b'a'..=b'f')
|
|
||||||
.map(Self::Char),
|
|
||||||
),
|
|
||||||
s => Box::new(std::iter::once(Self::Class(*s))),
|
|
||||||
},
|
},
|
||||||
s => Box::new(std::iter::once(*s)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn flatten_all(&self) -> Box<dyn Iterator<Item = Self>> {
|
|
||||||
match self {
|
|
||||||
Self::Class(class) => match class {
|
|
||||||
Class::Lower => Box::new((b'a'..=b'z').map(Self::Char)),
|
|
||||||
Class::Upper => Box::new((b'A'..=b'Z').map(Self::Char)),
|
|
||||||
s => Self::Class(*s).flatten_non_lower_upper(),
|
|
||||||
},
|
|
||||||
s => s.flatten_non_lower_upper(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -182,12 +154,6 @@ impl Sequence {
|
||||||
translating: bool,
|
translating: bool,
|
||||||
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
|
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
|
||||||
let is_char_star = |s: &&Self| -> bool { matches!(s, Self::CharStar(_)) };
|
let is_char_star = |s: &&Self| -> bool { matches!(s, Self::CharStar(_)) };
|
||||||
let to_u8 = |s: Self| -> Option<u8> {
|
|
||||||
match s {
|
|
||||||
Self::Char(c) => Some(c),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let set1 = Self::from_str(set1_str)?;
|
let set1 = Self::from_str(set1_str)?;
|
||||||
if set1.iter().filter(is_char_star).count() != 0 {
|
if set1.iter().filter(is_char_star).count() != 0 {
|
||||||
|
@ -208,11 +174,7 @@ impl Sequence {
|
||||||
return Err(BadSequence::ClassExceptLowerUpperInSet2);
|
return Err(BadSequence::ClassExceptLowerUpperInSet2);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut set1_solved: Vec<u8> = set1
|
let mut set1_solved: Vec<u8> = set1.iter().flat_map(Self::flatten).collect();
|
||||||
.iter()
|
|
||||||
.flat_map(Self::flatten_all)
|
|
||||||
.filter_map(to_u8)
|
|
||||||
.collect();
|
|
||||||
if complement_flag {
|
if complement_flag {
|
||||||
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
|
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
|
||||||
}
|
}
|
||||||
|
@ -224,7 +186,7 @@ impl Sequence {
|
||||||
Self::CharStar(_) => None,
|
Self::CharStar(_) => None,
|
||||||
r => Some(r),
|
r => Some(r),
|
||||||
})
|
})
|
||||||
.flat_map(Self::flatten_all)
|
.flat_map(Self::flatten)
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
let star_compensate_len = set1_len.saturating_sub(set2_len);
|
let star_compensate_len = set1_len.saturating_sub(set2_len);
|
||||||
|
@ -238,33 +200,38 @@ impl Sequence {
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
//Flatten everything but upper/lower into Char
|
// For every upper/lower in set2, there must be an upper/lower in set1 at the same position. The position is calculated by expanding everything before the upper/lower in both sets
|
||||||
let set1_flattened: Vec<_> = set1
|
for (set2_pos, set2_item) in set2.iter().enumerate() {
|
||||||
.iter()
|
if matches!(set2_item, Self::Class(_)) {
|
||||||
.flat_map(Self::flatten_non_lower_upper)
|
let mut set2_part_solved_len = 0;
|
||||||
.collect();
|
if set2_pos >= 1 {
|
||||||
set2 = set2
|
set2_part_solved_len =
|
||||||
.iter()
|
set2.iter().take(set2_pos).flat_map(Self::flatten).count();
|
||||||
.flat_map(Self::flatten_non_lower_upper)
|
}
|
||||||
.collect();
|
|
||||||
|
|
||||||
if set2
|
let mut class_matches = false;
|
||||||
.iter()
|
for (set1_pos, set1_item) in set1.iter().enumerate() {
|
||||||
.zip(
|
if matches!(set1_item, Self::Class(_)) {
|
||||||
set1_flattened
|
let mut set1_part_solved_len = 0;
|
||||||
.iter()
|
if set1_pos >= 1 {
|
||||||
.chain(std::iter::repeat(&Self::Char(0))),
|
set1_part_solved_len =
|
||||||
)
|
set1.iter().take(set1_pos).flat_map(Self::flatten).count();
|
||||||
.any(|x| matches!(x.0, Self::Class(_)) && !matches!(x.1, Self::Class(_)))
|
}
|
||||||
{
|
|
||||||
return Err(BadSequence::ClassInSet2NotMatchedBySet1);
|
if set1_part_solved_len == set2_part_solved_len {
|
||||||
|
class_matches = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !class_matches {
|
||||||
|
return Err(BadSequence::ClassInSet2NotMatchedBySet1);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let set2_solved: Vec<_> = set2
|
let set2_solved: Vec<_> = set2.iter().flat_map(Self::flatten).collect();
|
||||||
.iter()
|
|
||||||
.flat_map(Self::flatten_all)
|
|
||||||
.filter_map(to_u8)
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// Calculate the set of unique characters in set2
|
// Calculate the set of unique characters in set2
|
||||||
let mut set2_uniques = set2_solved.clone();
|
let mut set2_uniques = set2_solved.clone();
|
||||||
|
|
|
@ -1375,6 +1375,13 @@ fn check_class_in_set2_must_be_matched_in_set1() {
|
||||||
new_ucmd!().args(&["-t", "1[:upper:]", "[:upper:]"]).fails();
|
new_ucmd!().args(&["-t", "1[:upper:]", "[:upper:]"]).fails();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn check_class_in_set2_must_be_matched_in_set1_right_length_check() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-t", "a-z[:upper:]", "abcdefghijklmnopqrstuvwxyz[:upper:]"])
|
||||||
|
.succeeds();
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn check_set1_longer_set2_ends_in_class() {
|
fn check_set1_longer_set2_ends_in_class() {
|
||||||
new_ucmd!().args(&["[:lower:]a", "[:upper:]"]).fails();
|
new_ucmd!().args(&["[:lower:]a", "[:upper:]"]).fails();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue