mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-27 11:07:44 +00:00
Merge pull request #7953 from frendsick/fix/expr-regex-anchors
expr: Fix parsing regex anchors '^' and '$'
This commit is contained in:
commit
cd9ce77098
2 changed files with 53 additions and 8 deletions
|
@ -156,7 +156,7 @@ impl StringOp {
|
||||||
re_string.push('^');
|
re_string.push('^');
|
||||||
|
|
||||||
// Handle first character from the input pattern
|
// Handle first character from the input pattern
|
||||||
let mut pattern_chars = right.chars();
|
let mut pattern_chars = right.chars().peekable();
|
||||||
let first = pattern_chars.next();
|
let first = pattern_chars.next();
|
||||||
match first {
|
match first {
|
||||||
Some('^') => {} // Start of string anchor is already added
|
Some('^') => {} // Start of string anchor is already added
|
||||||
|
@ -166,16 +166,42 @@ impl StringOp {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Handle the rest of the input pattern.
|
// Handle the rest of the input pattern.
|
||||||
// Escaped previous character should not affect the current.
|
|
||||||
let mut prev = first.unwrap_or_default();
|
let mut prev = first.unwrap_or_default();
|
||||||
let mut prev_is_escaped = false;
|
let mut prev_is_escaped = false;
|
||||||
for curr in pattern_chars {
|
while let Some(curr) = pattern_chars.next() {
|
||||||
match curr {
|
match curr {
|
||||||
// Carets are interpreted literally, unless used as character class negation "[^a]"
|
'^' => match (prev, prev_is_escaped) {
|
||||||
'^' if prev_is_escaped || !matches!(prev, '\\' | '[') => {
|
// Start of a capturing group
|
||||||
re_string.push_str(r"\^");
|
('(', true)
|
||||||
|
// Start of an alternative pattern
|
||||||
|
| ('|', true)
|
||||||
|
// Character class negation "[^a]"
|
||||||
|
| ('[', false)
|
||||||
|
// Explicitly escaped caret
|
||||||
|
| ('\\', false) => re_string.push(curr),
|
||||||
|
_ => re_string.push_str(r"\^"),
|
||||||
|
},
|
||||||
|
'$' => {
|
||||||
|
if let Some('\\') = pattern_chars.peek() {
|
||||||
|
// The next character was checked to be a backslash
|
||||||
|
let backslash = pattern_chars.next().unwrap_or_default();
|
||||||
|
match pattern_chars.peek() {
|
||||||
|
// End of a capturing group
|
||||||
|
Some(')') => re_string.push('$'),
|
||||||
|
// End of an alternative pattern
|
||||||
|
Some('|') => re_string.push('$'),
|
||||||
|
_ => re_string.push_str(r"\$"),
|
||||||
|
}
|
||||||
|
re_string.push(backslash);
|
||||||
|
} else if (prev_is_escaped || prev != '\\')
|
||||||
|
&& pattern_chars.peek().is_some()
|
||||||
|
{
|
||||||
|
re_string.push_str(r"\$");
|
||||||
|
} else {
|
||||||
|
re_string.push('$');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
char => re_string.push(char),
|
_ => re_string.push(curr),
|
||||||
}
|
}
|
||||||
|
|
||||||
prev_is_escaped = prev == '\\' && !prev_is_escaped;
|
prev_is_escaped = prev == '\\' && !prev_is_escaped;
|
||||||
|
|
|
@ -282,10 +282,26 @@ fn test_regex() {
|
||||||
.args(&["a^b", ":", "a\\^b"])
|
.args(&["a^b", ":", "a\\^b"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("3\n");
|
.stdout_only("3\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["b", ":", "a\\|^b"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("1\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["ab", ":", "\\(^a\\)b"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("a\n");
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["a$b", ":", "a\\$b"])
|
.args(&["a$b", ":", "a\\$b"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("3\n");
|
.stdout_only("3\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["a", ":", "a$\\|b"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("1\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["ab", ":", "a\\(b$\\)"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("b\n");
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["abc", ":", "^abc"])
|
.args(&["abc", ":", "^abc"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
|
@ -298,6 +314,10 @@ fn test_regex() {
|
||||||
.args(&["b^$ic", ":", "b^\\$ic"])
|
.args(&["b^$ic", ":", "b^\\$ic"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("5\n");
|
.stdout_only("5\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["a$c", ":", "a$\\c"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("3\n");
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["^^^^^^^^^", ":", "^^^"])
|
.args(&["^^^^^^^^^", ":", "^^^"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
|
@ -766,7 +786,6 @@ mod gnu_expr {
|
||||||
.stdout_only("3\n");
|
.stdout_only("3\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[ignore]
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_bre11() {
|
fn test_bre11() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue