diff --git a/src/uu/expr/src/expr.rs b/src/uu/expr/src/expr.rs index 073bf501a..fa165f9f3 100644 --- a/src/uu/expr/src/expr.rs +++ b/src/uu/expr/src/expr.rs @@ -50,6 +50,8 @@ pub enum ExprError { UnmatchedClosingBrace, #[error("Invalid content of \\{{\\}}")] InvalidBracketContent, + #[error("Trailing backslash")] + TrailingBackslash, } impl UError for ExprError { diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 106b4bd68..b0326f7b6 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -161,6 +161,8 @@ impl StringOp { match first { Some('^') => {} // Start of string anchor is already added Some('*') => re_string.push_str(r"\*"), + Some('$') if !is_end_of_expression(&pattern_chars) => re_string.push_str(r"\$"), + Some('\\') if right.len() == 1 => return Err(ExprError::TrailingBackslash), Some(char) => re_string.push(char), None => return Ok(0.into()), }; @@ -169,6 +171,8 @@ impl StringOp { let mut prev = first.unwrap_or_default(); let mut prev_is_escaped = false; while let Some(curr) = pattern_chars.next() { + let curr_is_escaped = prev == '\\' && !prev_is_escaped; + match curr { '^' => match (prev, prev_is_escaped) { // Start of a capturing group @@ -181,25 +185,11 @@ impl StringOp { | ('\\', false) => re_string.push(curr), _ => re_string.push_str(r"\^"), }, - '$' => { - if let Some('\\') = pattern_chars.peek() { - // The next character was checked to be a backslash - let backslash = pattern_chars.next().unwrap_or_default(); - match pattern_chars.peek() { - // End of a capturing group - Some(')') => re_string.push('$'), - // End of an alternative pattern - Some('|') => re_string.push('$'), - _ => re_string.push_str(r"\$"), - } - re_string.push(backslash); - } else if (prev_is_escaped || prev != '\\') - && pattern_chars.peek().is_some() - { - re_string.push_str(r"\$"); - } else { - re_string.push('$'); - } + '$' if !curr_is_escaped && !is_end_of_expression(&pattern_chars) => { + re_string.push_str(r"\$"); + } + '\\' if !curr_is_escaped && pattern_chars.peek().is_none() => { + return Err(ExprError::TrailingBackslash); } _ => re_string.push(curr), } @@ -241,6 +231,19 @@ impl StringOp { } } +/// Check if regex pattern character iterator is at the end of a regex expression or subexpression +fn is_end_of_expression(pattern_chars: &I) -> bool +where + I: Iterator + Clone, +{ + let mut pattern_chars_clone = pattern_chars.clone(); + match pattern_chars_clone.next() { + Some('\\') => matches!(pattern_chars_clone.next(), Some(')' | '|')), + None => true, // No characters left + _ => false, + } +} + /// Check for errors in a supplied regular expression /// /// GNU coreutils shows messages for invalid regular expressions diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index c5fb96c3d..2c0eafe32 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -273,7 +273,36 @@ fn test_length_mb() { } #[test] -fn test_regex() { +fn test_regex_empty() { + new_ucmd!().args(&["", ":", ""]).fails().stdout_only("0\n"); + new_ucmd!() + .args(&["abc", ":", ""]) + .fails() + .stdout_only("0\n"); +} + +#[test] +fn test_regex_trailing_backslash() { + new_ucmd!() + .args(&["\\", ":", "\\\\"]) + .succeeds() + .stdout_only("1\n"); + new_ucmd!() + .args(&["\\", ":", "\\"]) + .fails() + .stderr_only("expr: Trailing backslash\n"); + new_ucmd!() + .args(&["abc\\", ":", "abc\\\\"]) + .succeeds() + .stdout_only("4\n"); + new_ucmd!() + .args(&["abc\\", ":", "abc\\"]) + .fails() + .stderr_only("expr: Trailing backslash\n"); +} + +#[test] +fn test_regex_caret() { new_ucmd!() .args(&["a^b", ":", "a^b"]) .succeeds() @@ -282,26 +311,6 @@ fn test_regex() { .args(&["a^b", ":", "a\\^b"]) .succeeds() .stdout_only("3\n"); - new_ucmd!() - .args(&["b", ":", "a\\|^b"]) - .succeeds() - .stdout_only("1\n"); - new_ucmd!() - .args(&["ab", ":", "\\(^a\\)b"]) - .succeeds() - .stdout_only("a\n"); - new_ucmd!() - .args(&["a$b", ":", "a\\$b"]) - .succeeds() - .stdout_only("3\n"); - new_ucmd!() - .args(&["a", ":", "a$\\|b"]) - .succeeds() - .stdout_only("1\n"); - new_ucmd!() - .args(&["ab", ":", "a\\(b$\\)"]) - .succeeds() - .stdout_only("b\n"); new_ucmd!() .args(&["abc", ":", "^abc"]) .succeeds() @@ -311,13 +320,17 @@ fn test_regex() { .succeeds() .stdout_only("4\n"); new_ucmd!() - .args(&["b^$ic", ":", "b^\\$ic"]) + .args(&["b", ":", "a\\|^b"]) .succeeds() - .stdout_only("5\n"); + .stdout_only("1\n"); new_ucmd!() - .args(&["a$c", ":", "a$\\c"]) + .args(&["ab", ":", "\\(^a\\)b"]) .succeeds() - .stdout_only("3\n"); + .stdout_only("a\n"); + new_ucmd!() + .args(&["^abc", ":", "^abc"]) + .fails() + .stdout_only("0\n"); new_ucmd!() .args(&["^^^^^^^^^", ":", "^^^"]) .succeeds() @@ -338,29 +351,57 @@ fn test_regex() { .args(&["\\a", ":", "\\\\[^^]"]) .succeeds() .stdout_only("2\n"); - new_ucmd!() - .args(&["^a", ":", "^^[^^]"]) - .succeeds() - .stdout_only("2\n"); - new_ucmd!() - .args(&["-5", ":", "-\\{0,1\\}[0-9]*$"]) - .succeeds() - .stdout_only("2\n"); - new_ucmd!().args(&["", ":", ""]).fails().stdout_only("0\n"); - new_ucmd!() - .args(&["abc", ":", ""]) - .fails() - .stdout_only("0\n"); + // Patterns are anchored to the beginning of the pattern "^bc" new_ucmd!() .args(&["abc", ":", "bc"]) .fails() .stdout_only("0\n"); new_ucmd!() - .args(&["^abc", ":", "^abc"]) + .args(&["^a", ":", "^^[^^]"]) + .succeeds() + .stdout_only("2\n"); + new_ucmd!() + .args(&["abc", ":", "ab[^c]"]) + .fails() + .stdout_only("0\n"); +} + +#[test] +fn test_regex_dollar() { + new_ucmd!() + .args(&["a$b", ":", "a\\$b"]) + .succeeds() + .stdout_only("3\n"); + new_ucmd!() + .args(&["a", ":", "a$\\|b"]) + .succeeds() + .stdout_only("1\n"); + new_ucmd!() + .args(&["ab", ":", "a\\(b$\\)"]) + .succeeds() + .stdout_only("b\n"); + new_ucmd!() + .args(&["a$c", ":", "a$\\c"]) + .succeeds() + .stdout_only("3\n"); + new_ucmd!() + .args(&["$a", ":", "$a"]) + .succeeds() + .stdout_only("2\n"); + new_ucmd!() + .args(&["a", ":", "a$\\|b"]) + .succeeds() + .stdout_only("1\n"); + new_ucmd!() + .args(&["-5", ":", "-\\{0,1\\}[0-9]*$"]) + .succeeds() + .stdout_only("2\n"); + new_ucmd!() + .args(&["$", ":", "$"]) .fails() .stdout_only("0\n"); new_ucmd!() - .args(&["abc", ":", "ab[^c]"]) + .args(&["a$", ":", "a$\\|b"]) .fails() .stdout_only("0\n"); }