mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
expr: Fix parsing negated character classes "[^a]" (#7884)
* expr: Fix regex escape logic We have to track if the previous character was already escaped to determine if the '\' character should be interpreted as an escape character. * expr: Fix parsing caret (^) as character class negation token * expr: Add tests for parsing carets in regex * expr: Add missing semicolon * expr: Simplify boolean assignment Co-authored-by: Daniel Hofstetter <daniel.hofstetter@42dh.com> --------- Co-authored-by: Daniel Hofstetter <daniel.hofstetter@42dh.com>
This commit is contained in:
parent
69d5cf40b1
commit
4ee53acad0
2 changed files with 32 additions and 2 deletions
|
@ -166,13 +166,19 @@ impl StringOp {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Handle the rest of the input pattern.
|
// Handle the rest of the input pattern.
|
||||||
// Escape characters that should be handled literally within the pattern.
|
// Escaped previous character should not affect the current.
|
||||||
let mut prev = first.unwrap_or_default();
|
let mut prev = first.unwrap_or_default();
|
||||||
|
let mut prev_is_escaped = false;
|
||||||
for curr in pattern_chars {
|
for curr in pattern_chars {
|
||||||
match curr {
|
match curr {
|
||||||
'^' if prev != '\\' => re_string.push_str(r"\^"),
|
// Carets are interpreted literally, unless used as character class negation "[^a]"
|
||||||
|
'^' if prev_is_escaped || !matches!(prev, '\\' | '[') => {
|
||||||
|
re_string.push_str(r"\^");
|
||||||
|
}
|
||||||
char => re_string.push(char),
|
char => re_string.push(char),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prev_is_escaped = prev == '\\' && !prev_is_escaped;
|
||||||
prev = curr;
|
prev = curr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -302,6 +302,26 @@ fn test_regex() {
|
||||||
.args(&["^^^^^^^^^", ":", "^^^"])
|
.args(&["^^^^^^^^^", ":", "^^^"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("2\n");
|
.stdout_only("2\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["ab[^c]", ":", "ab[^c]"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("3\n"); // Matches "ab["
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["ab[^c]", ":", "ab\\[^c]"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("6\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["[^a]", ":", "\\[^a]"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("4\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["\\a", ":", "\\\\[^^]"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("2\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["^a", ":", "^^[^^]"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("2\n");
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-5", ":", "-\\{0,1\\}[0-9]*$"])
|
.args(&["-5", ":", "-\\{0,1\\}[0-9]*$"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
|
@ -319,6 +339,10 @@ fn test_regex() {
|
||||||
.args(&["^abc", ":", "^abc"])
|
.args(&["^abc", ":", "^abc"])
|
||||||
.fails()
|
.fails()
|
||||||
.stdout_only("0\n");
|
.stdout_only("0\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["abc", ":", "ab[^c]"])
|
||||||
|
.fails()
|
||||||
|
.stdout_only("0\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue