From f664578a4b781bf941231eea684673a881e1f177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Fri, 23 May 2025 15:22:18 +0300 Subject: [PATCH 1/2] expr: Handle special cases for `^` in regex --- src/uu/expr/src/syntax_tree.rs | 15 +++++++++++---- tests/by-util/test_expr.rs | 8 ++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 3026d5d41..8eb8ede92 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -171,11 +171,18 @@ impl StringOp { let mut prev_is_escaped = false; for curr in pattern_chars { match curr { - // Carets are interpreted literally, unless used as character class negation "[^a]" - '^' if prev_is_escaped || !matches!(prev, '\\' | '[') => { - re_string.push_str(r"\^"); + '^' => match (prev, prev_is_escaped) { + // Start of a capturing group + ('(', true) + // Start of an alternative pattern + | ('|', true) + // Character class negation "[^a]" + | ('[', false) + // Explicitly escaped caret + | ('\\', false) => re_string.push(curr), + _ => re_string.push_str(r"\^"), } - char => re_string.push(char), + _ => re_string.push(curr), } prev_is_escaped = prev == '\\' && !prev_is_escaped; diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 193737d10..4c0640e30 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -282,6 +282,14 @@ fn test_regex() { .args(&["a^b", ":", "a\\^b"]) .succeeds() .stdout_only("3\n"); + new_ucmd!() + .args(&["b", ":", "a\\|^b"]) + .succeeds() + .stdout_only("1\n"); + new_ucmd!() + .args(&["ab", ":", "\\(^a\\)b"]) + .succeeds() + .stdout_only("a\n"); new_ucmd!() .args(&["a$b", ":", "a\\$b"]) .succeeds() From 29332865a939759b2265230fb2500d91b6d7842e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Fri, 23 May 2025 15:24:17 +0300 Subject: [PATCH 2/2] expr: Handle special cases for `$` in regex Enable fixed test `test_bre11` --- src/uu/expr/src/syntax_tree.rs | 25 ++++++++++++++++++++++--- tests/by-util/test_expr.rs | 13 ++++++++++++- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 8eb8ede92..106b4bd68 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -156,7 +156,7 @@ impl StringOp { re_string.push('^'); // Handle first character from the input pattern - let mut pattern_chars = right.chars(); + let mut pattern_chars = right.chars().peekable(); let first = pattern_chars.next(); match first { Some('^') => {} // Start of string anchor is already added @@ -166,10 +166,9 @@ impl StringOp { }; // Handle the rest of the input pattern. - // Escaped previous character should not affect the current. let mut prev = first.unwrap_or_default(); let mut prev_is_escaped = false; - for curr in pattern_chars { + while let Some(curr) = pattern_chars.next() { match curr { '^' => match (prev, prev_is_escaped) { // Start of a capturing group @@ -181,6 +180,26 @@ impl StringOp { // Explicitly escaped caret | ('\\', false) => re_string.push(curr), _ => re_string.push_str(r"\^"), + }, + '$' => { + if let Some('\\') = pattern_chars.peek() { + // The next character was checked to be a backslash + let backslash = pattern_chars.next().unwrap_or_default(); + match pattern_chars.peek() { + // End of a capturing group + Some(')') => re_string.push('$'), + // End of an alternative pattern + Some('|') => re_string.push('$'), + _ => re_string.push_str(r"\$"), + } + re_string.push(backslash); + } else if (prev_is_escaped || prev != '\\') + && pattern_chars.peek().is_some() + { + re_string.push_str(r"\$"); + } else { + re_string.push('$'); + } } _ => re_string.push(curr), } diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 4c0640e30..c5fb96c3d 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -294,6 +294,14 @@ fn test_regex() { .args(&["a$b", ":", "a\\$b"]) .succeeds() .stdout_only("3\n"); + new_ucmd!() + .args(&["a", ":", "a$\\|b"]) + .succeeds() + .stdout_only("1\n"); + new_ucmd!() + .args(&["ab", ":", "a\\(b$\\)"]) + .succeeds() + .stdout_only("b\n"); new_ucmd!() .args(&["abc", ":", "^abc"]) .succeeds() @@ -306,6 +314,10 @@ fn test_regex() { .args(&["b^$ic", ":", "b^\\$ic"]) .succeeds() .stdout_only("5\n"); + new_ucmd!() + .args(&["a$c", ":", "a$\\c"]) + .succeeds() + .stdout_only("3\n"); new_ucmd!() .args(&["^^^^^^^^^", ":", "^^^"]) .succeeds() @@ -774,7 +786,6 @@ mod gnu_expr { .stdout_only("3\n"); } - #[ignore] #[test] fn test_bre11() { new_ucmd!()