1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 19:17:43 +00:00

Merge pull request #7953 from frendsick/fix/expr-regex-anchors

expr: Fix parsing regex anchors '^' and '$'
This commit is contained in:
Dorian Péron 2025-05-23 15:16:28 +02:00 committed by GitHub
commit cd9ce77098
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 53 additions and 8 deletions

View file

@ -156,7 +156,7 @@ impl StringOp {
re_string.push('^'); re_string.push('^');
// Handle first character from the input pattern // Handle first character from the input pattern
let mut pattern_chars = right.chars(); let mut pattern_chars = right.chars().peekable();
let first = pattern_chars.next(); let first = pattern_chars.next();
match first { match first {
Some('^') => {} // Start of string anchor is already added Some('^') => {} // Start of string anchor is already added
@ -166,16 +166,42 @@ impl StringOp {
}; };
// Handle the rest of the input pattern. // Handle the rest of the input pattern.
// Escaped previous character should not affect the current.
let mut prev = first.unwrap_or_default(); let mut prev = first.unwrap_or_default();
let mut prev_is_escaped = false; let mut prev_is_escaped = false;
for curr in pattern_chars { while let Some(curr) = pattern_chars.next() {
match curr { match curr {
// Carets are interpreted literally, unless used as character class negation "[^a]" '^' => match (prev, prev_is_escaped) {
'^' if prev_is_escaped || !matches!(prev, '\\' | '[') => { // Start of a capturing group
re_string.push_str(r"\^"); ('(', true)
// Start of an alternative pattern
| ('|', true)
// Character class negation "[^a]"
| ('[', false)
// Explicitly escaped caret
| ('\\', false) => re_string.push(curr),
_ => re_string.push_str(r"\^"),
},
'$' => {
if let Some('\\') = pattern_chars.peek() {
// The next character was checked to be a backslash
let backslash = pattern_chars.next().unwrap_or_default();
match pattern_chars.peek() {
// End of a capturing group
Some(')') => re_string.push('$'),
// End of an alternative pattern
Some('|') => re_string.push('$'),
_ => re_string.push_str(r"\$"),
}
re_string.push(backslash);
} else if (prev_is_escaped || prev != '\\')
&& pattern_chars.peek().is_some()
{
re_string.push_str(r"\$");
} else {
re_string.push('$');
}
} }
char => re_string.push(char), _ => re_string.push(curr),
} }
prev_is_escaped = prev == '\\' && !prev_is_escaped; prev_is_escaped = prev == '\\' && !prev_is_escaped;

View file

@ -282,10 +282,26 @@ fn test_regex() {
.args(&["a^b", ":", "a\\^b"]) .args(&["a^b", ":", "a\\^b"])
.succeeds() .succeeds()
.stdout_only("3\n"); .stdout_only("3\n");
new_ucmd!()
.args(&["b", ":", "a\\|^b"])
.succeeds()
.stdout_only("1\n");
new_ucmd!()
.args(&["ab", ":", "\\(^a\\)b"])
.succeeds()
.stdout_only("a\n");
new_ucmd!() new_ucmd!()
.args(&["a$b", ":", "a\\$b"]) .args(&["a$b", ":", "a\\$b"])
.succeeds() .succeeds()
.stdout_only("3\n"); .stdout_only("3\n");
new_ucmd!()
.args(&["a", ":", "a$\\|b"])
.succeeds()
.stdout_only("1\n");
new_ucmd!()
.args(&["ab", ":", "a\\(b$\\)"])
.succeeds()
.stdout_only("b\n");
new_ucmd!() new_ucmd!()
.args(&["abc", ":", "^abc"]) .args(&["abc", ":", "^abc"])
.succeeds() .succeeds()
@ -298,6 +314,10 @@ fn test_regex() {
.args(&["b^$ic", ":", "b^\\$ic"]) .args(&["b^$ic", ":", "b^\\$ic"])
.succeeds() .succeeds()
.stdout_only("5\n"); .stdout_only("5\n");
new_ucmd!()
.args(&["a$c", ":", "a$\\c"])
.succeeds()
.stdout_only("3\n");
new_ucmd!() new_ucmd!()
.args(&["^^^^^^^^^", ":", "^^^"]) .args(&["^^^^^^^^^", ":", "^^^"])
.succeeds() .succeeds()
@ -766,7 +786,6 @@ mod gnu_expr {
.stdout_only("3\n"); .stdout_only("3\n");
} }
#[ignore]
#[test] #[test]
fn test_bre11() { fn test_bre11() {
new_ucmd!() new_ucmd!()