1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 11:07:44 +00:00

Merge pull request #7974 from frendsick/fix/expr-regex-special-cases

expr: Handle more special cases for regex pattern
This commit is contained in:
Daniel Hofstetter 2025-05-25 13:39:44 +02:00 committed by GitHub
commit 3f9514d115
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 105 additions and 59 deletions

View file

@ -50,6 +50,8 @@ pub enum ExprError {
UnmatchedClosingBrace,
#[error("Invalid content of \\{{\\}}")]
InvalidBracketContent,
#[error("Trailing backslash")]
TrailingBackslash,
}
impl UError for ExprError {

View file

@ -161,6 +161,8 @@ impl StringOp {
match first {
Some('^') => {} // Start of string anchor is already added
Some('*') => re_string.push_str(r"\*"),
Some('$') if !is_end_of_expression(&pattern_chars) => re_string.push_str(r"\$"),
Some('\\') if right.len() == 1 => return Err(ExprError::TrailingBackslash),
Some(char) => re_string.push(char),
None => return Ok(0.into()),
};
@ -169,6 +171,8 @@ impl StringOp {
let mut prev = first.unwrap_or_default();
let mut prev_is_escaped = false;
while let Some(curr) = pattern_chars.next() {
let curr_is_escaped = prev == '\\' && !prev_is_escaped;
match curr {
'^' => match (prev, prev_is_escaped) {
// Start of a capturing group
@ -181,25 +185,11 @@ impl StringOp {
| ('\\', false) => re_string.push(curr),
_ => re_string.push_str(r"\^"),
},
'$' => {
if let Some('\\') = pattern_chars.peek() {
// The next character was checked to be a backslash
let backslash = pattern_chars.next().unwrap_or_default();
match pattern_chars.peek() {
// End of a capturing group
Some(')') => re_string.push('$'),
// End of an alternative pattern
Some('|') => re_string.push('$'),
_ => re_string.push_str(r"\$"),
}
re_string.push(backslash);
} else if (prev_is_escaped || prev != '\\')
&& pattern_chars.peek().is_some()
{
re_string.push_str(r"\$");
} else {
re_string.push('$');
}
'$' if !curr_is_escaped && !is_end_of_expression(&pattern_chars) => {
re_string.push_str(r"\$");
}
'\\' if !curr_is_escaped && pattern_chars.peek().is_none() => {
return Err(ExprError::TrailingBackslash);
}
_ => re_string.push(curr),
}
@ -241,6 +231,19 @@ impl StringOp {
}
}
/// Check if regex pattern character iterator is at the end of a regex expression or subexpression
fn is_end_of_expression<I>(pattern_chars: &I) -> bool
where
I: Iterator<Item = char> + Clone,
{
let mut pattern_chars_clone = pattern_chars.clone();
match pattern_chars_clone.next() {
Some('\\') => matches!(pattern_chars_clone.next(), Some(')' | '|')),
None => true, // No characters left
_ => false,
}
}
/// Check for errors in a supplied regular expression
///
/// GNU coreutils shows messages for invalid regular expressions

View file

@ -273,7 +273,36 @@ fn test_length_mb() {
}
#[test]
fn test_regex() {
fn test_regex_empty() {
new_ucmd!().args(&["", ":", ""]).fails().stdout_only("0\n");
new_ucmd!()
.args(&["abc", ":", ""])
.fails()
.stdout_only("0\n");
}
#[test]
fn test_regex_trailing_backslash() {
new_ucmd!()
.args(&["\\", ":", "\\\\"])
.succeeds()
.stdout_only("1\n");
new_ucmd!()
.args(&["\\", ":", "\\"])
.fails()
.stderr_only("expr: Trailing backslash\n");
new_ucmd!()
.args(&["abc\\", ":", "abc\\\\"])
.succeeds()
.stdout_only("4\n");
new_ucmd!()
.args(&["abc\\", ":", "abc\\"])
.fails()
.stderr_only("expr: Trailing backslash\n");
}
#[test]
fn test_regex_caret() {
new_ucmd!()
.args(&["a^b", ":", "a^b"])
.succeeds()
@ -282,26 +311,6 @@ fn test_regex() {
.args(&["a^b", ":", "a\\^b"])
.succeeds()
.stdout_only("3\n");
new_ucmd!()
.args(&["b", ":", "a\\|^b"])
.succeeds()
.stdout_only("1\n");
new_ucmd!()
.args(&["ab", ":", "\\(^a\\)b"])
.succeeds()
.stdout_only("a\n");
new_ucmd!()
.args(&["a$b", ":", "a\\$b"])
.succeeds()
.stdout_only("3\n");
new_ucmd!()
.args(&["a", ":", "a$\\|b"])
.succeeds()
.stdout_only("1\n");
new_ucmd!()
.args(&["ab", ":", "a\\(b$\\)"])
.succeeds()
.stdout_only("b\n");
new_ucmd!()
.args(&["abc", ":", "^abc"])
.succeeds()
@ -311,13 +320,17 @@ fn test_regex() {
.succeeds()
.stdout_only("4\n");
new_ucmd!()
.args(&["b^$ic", ":", "b^\\$ic"])
.args(&["b", ":", "a\\|^b"])
.succeeds()
.stdout_only("5\n");
.stdout_only("1\n");
new_ucmd!()
.args(&["a$c", ":", "a$\\c"])
.args(&["ab", ":", "\\(^a\\)b"])
.succeeds()
.stdout_only("3\n");
.stdout_only("a\n");
new_ucmd!()
.args(&["^abc", ":", "^abc"])
.fails()
.stdout_only("0\n");
new_ucmd!()
.args(&["^^^^^^^^^", ":", "^^^"])
.succeeds()
@ -338,29 +351,57 @@ fn test_regex() {
.args(&["\\a", ":", "\\\\[^^]"])
.succeeds()
.stdout_only("2\n");
new_ucmd!()
.args(&["^a", ":", "^^[^^]"])
.succeeds()
.stdout_only("2\n");
new_ucmd!()
.args(&["-5", ":", "-\\{0,1\\}[0-9]*$"])
.succeeds()
.stdout_only("2\n");
new_ucmd!().args(&["", ":", ""]).fails().stdout_only("0\n");
new_ucmd!()
.args(&["abc", ":", ""])
.fails()
.stdout_only("0\n");
// Patterns are anchored to the beginning of the pattern "^bc"
new_ucmd!()
.args(&["abc", ":", "bc"])
.fails()
.stdout_only("0\n");
new_ucmd!()
.args(&["^abc", ":", "^abc"])
.args(&["^a", ":", "^^[^^]"])
.succeeds()
.stdout_only("2\n");
new_ucmd!()
.args(&["abc", ":", "ab[^c]"])
.fails()
.stdout_only("0\n");
}
#[test]
fn test_regex_dollar() {
new_ucmd!()
.args(&["a$b", ":", "a\\$b"])
.succeeds()
.stdout_only("3\n");
new_ucmd!()
.args(&["a", ":", "a$\\|b"])
.succeeds()
.stdout_only("1\n");
new_ucmd!()
.args(&["ab", ":", "a\\(b$\\)"])
.succeeds()
.stdout_only("b\n");
new_ucmd!()
.args(&["a$c", ":", "a$\\c"])
.succeeds()
.stdout_only("3\n");
new_ucmd!()
.args(&["$a", ":", "$a"])
.succeeds()
.stdout_only("2\n");
new_ucmd!()
.args(&["a", ":", "a$\\|b"])
.succeeds()
.stdout_only("1\n");
new_ucmd!()
.args(&["-5", ":", "-\\{0,1\\}[0-9]*$"])
.succeeds()
.stdout_only("2\n");
new_ucmd!()
.args(&["$", ":", "$"])
.fails()
.stdout_only("0\n");
new_ucmd!()
.args(&["abc", ":", "ab[^c]"])
.args(&["a$", ":", "a$\\|b"])
.fails()
.stdout_only("0\n");
}