From 4ee53acad0864cda11df54108ecbbc34693e8e1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= <44954973+frendsick@users.noreply.github.com> Date: Sun, 4 May 2025 18:35:18 +0300 Subject: [PATCH] expr: Fix parsing negated character classes "[^a]" (#7884) * expr: Fix regex escape logic We have to track if the previous character was already escaped to determine if the '\' character should be interpreted as an escape character. * expr: Fix parsing caret (^) as character class negation token * expr: Add tests for parsing carets in regex * expr: Add missing semicolon * expr: Simplify boolean assignment Co-authored-by: Daniel Hofstetter --------- Co-authored-by: Daniel Hofstetter --- src/uu/expr/src/syntax_tree.rs | 10 ++++++++-- tests/by-util/test_expr.rs | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index c555e6ccc..3026d5d41 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -166,13 +166,19 @@ impl StringOp { }; // Handle the rest of the input pattern. - // Escape characters that should be handled literally within the pattern. + // Escaped previous character should not affect the current. let mut prev = first.unwrap_or_default(); + let mut prev_is_escaped = false; for curr in pattern_chars { match curr { - '^' if prev != '\\' => re_string.push_str(r"\^"), + // Carets are interpreted literally, unless used as character class negation "[^a]" + '^' if prev_is_escaped || !matches!(prev, '\\' | '[') => { + re_string.push_str(r"\^"); + } char => re_string.push(char), } + + prev_is_escaped = prev == '\\' && !prev_is_escaped; prev = curr; } diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 5ac5c262d..193737d10 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -302,6 +302,26 @@ fn test_regex() { .args(&["^^^^^^^^^", ":", "^^^"]) .succeeds() .stdout_only("2\n"); + new_ucmd!() + .args(&["ab[^c]", ":", "ab[^c]"]) + .succeeds() + .stdout_only("3\n"); // Matches "ab[" + new_ucmd!() + .args(&["ab[^c]", ":", "ab\\[^c]"]) + .succeeds() + .stdout_only("6\n"); + new_ucmd!() + .args(&["[^a]", ":", "\\[^a]"]) + .succeeds() + .stdout_only("4\n"); + new_ucmd!() + .args(&["\\a", ":", "\\\\[^^]"]) + .succeeds() + .stdout_only("2\n"); + new_ucmd!() + .args(&["^a", ":", "^^[^^]"]) + .succeeds() + .stdout_only("2\n"); new_ucmd!() .args(&["-5", ":", "-\\{0,1\\}[0-9]*$"]) .succeeds() @@ -319,6 +339,10 @@ fn test_regex() { .args(&["^abc", ":", "^abc"]) .fails() .stdout_only("0\n"); + new_ucmd!() + .args(&["abc", ":", "ab[^c]"]) + .fails() + .stdout_only("0\n"); } #[test]