mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-27 11:07:44 +00:00
Merge pull request #7974 from frendsick/fix/expr-regex-special-cases
expr: Handle more special cases for regex pattern
This commit is contained in:
commit
3f9514d115
3 changed files with 105 additions and 59 deletions
|
@ -50,6 +50,8 @@ pub enum ExprError {
|
||||||
UnmatchedClosingBrace,
|
UnmatchedClosingBrace,
|
||||||
#[error("Invalid content of \\{{\\}}")]
|
#[error("Invalid content of \\{{\\}}")]
|
||||||
InvalidBracketContent,
|
InvalidBracketContent,
|
||||||
|
#[error("Trailing backslash")]
|
||||||
|
TrailingBackslash,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UError for ExprError {
|
impl UError for ExprError {
|
||||||
|
|
|
@ -161,6 +161,8 @@ impl StringOp {
|
||||||
match first {
|
match first {
|
||||||
Some('^') => {} // Start of string anchor is already added
|
Some('^') => {} // Start of string anchor is already added
|
||||||
Some('*') => re_string.push_str(r"\*"),
|
Some('*') => re_string.push_str(r"\*"),
|
||||||
|
Some('$') if !is_end_of_expression(&pattern_chars) => re_string.push_str(r"\$"),
|
||||||
|
Some('\\') if right.len() == 1 => return Err(ExprError::TrailingBackslash),
|
||||||
Some(char) => re_string.push(char),
|
Some(char) => re_string.push(char),
|
||||||
None => return Ok(0.into()),
|
None => return Ok(0.into()),
|
||||||
};
|
};
|
||||||
|
@ -169,6 +171,8 @@ impl StringOp {
|
||||||
let mut prev = first.unwrap_or_default();
|
let mut prev = first.unwrap_or_default();
|
||||||
let mut prev_is_escaped = false;
|
let mut prev_is_escaped = false;
|
||||||
while let Some(curr) = pattern_chars.next() {
|
while let Some(curr) = pattern_chars.next() {
|
||||||
|
let curr_is_escaped = prev == '\\' && !prev_is_escaped;
|
||||||
|
|
||||||
match curr {
|
match curr {
|
||||||
'^' => match (prev, prev_is_escaped) {
|
'^' => match (prev, prev_is_escaped) {
|
||||||
// Start of a capturing group
|
// Start of a capturing group
|
||||||
|
@ -181,25 +185,11 @@ impl StringOp {
|
||||||
| ('\\', false) => re_string.push(curr),
|
| ('\\', false) => re_string.push(curr),
|
||||||
_ => re_string.push_str(r"\^"),
|
_ => re_string.push_str(r"\^"),
|
||||||
},
|
},
|
||||||
'$' => {
|
'$' if !curr_is_escaped && !is_end_of_expression(&pattern_chars) => {
|
||||||
if let Some('\\') = pattern_chars.peek() {
|
|
||||||
// The next character was checked to be a backslash
|
|
||||||
let backslash = pattern_chars.next().unwrap_or_default();
|
|
||||||
match pattern_chars.peek() {
|
|
||||||
// End of a capturing group
|
|
||||||
Some(')') => re_string.push('$'),
|
|
||||||
// End of an alternative pattern
|
|
||||||
Some('|') => re_string.push('$'),
|
|
||||||
_ => re_string.push_str(r"\$"),
|
|
||||||
}
|
|
||||||
re_string.push(backslash);
|
|
||||||
} else if (prev_is_escaped || prev != '\\')
|
|
||||||
&& pattern_chars.peek().is_some()
|
|
||||||
{
|
|
||||||
re_string.push_str(r"\$");
|
re_string.push_str(r"\$");
|
||||||
} else {
|
|
||||||
re_string.push('$');
|
|
||||||
}
|
}
|
||||||
|
'\\' if !curr_is_escaped && pattern_chars.peek().is_none() => {
|
||||||
|
return Err(ExprError::TrailingBackslash);
|
||||||
}
|
}
|
||||||
_ => re_string.push(curr),
|
_ => re_string.push(curr),
|
||||||
}
|
}
|
||||||
|
@ -241,6 +231,19 @@ impl StringOp {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if regex pattern character iterator is at the end of a regex expression or subexpression
|
||||||
|
fn is_end_of_expression<I>(pattern_chars: &I) -> bool
|
||||||
|
where
|
||||||
|
I: Iterator<Item = char> + Clone,
|
||||||
|
{
|
||||||
|
let mut pattern_chars_clone = pattern_chars.clone();
|
||||||
|
match pattern_chars_clone.next() {
|
||||||
|
Some('\\') => matches!(pattern_chars_clone.next(), Some(')' | '|')),
|
||||||
|
None => true, // No characters left
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Check for errors in a supplied regular expression
|
/// Check for errors in a supplied regular expression
|
||||||
///
|
///
|
||||||
/// GNU coreutils shows messages for invalid regular expressions
|
/// GNU coreutils shows messages for invalid regular expressions
|
||||||
|
|
|
@ -273,7 +273,36 @@ fn test_length_mb() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_regex() {
|
fn test_regex_empty() {
|
||||||
|
new_ucmd!().args(&["", ":", ""]).fails().stdout_only("0\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["abc", ":", ""])
|
||||||
|
.fails()
|
||||||
|
.stdout_only("0\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_regex_trailing_backslash() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["\\", ":", "\\\\"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("1\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["\\", ":", "\\"])
|
||||||
|
.fails()
|
||||||
|
.stderr_only("expr: Trailing backslash\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["abc\\", ":", "abc\\\\"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("4\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["abc\\", ":", "abc\\"])
|
||||||
|
.fails()
|
||||||
|
.stderr_only("expr: Trailing backslash\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_regex_caret() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["a^b", ":", "a^b"])
|
.args(&["a^b", ":", "a^b"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
|
@ -282,26 +311,6 @@ fn test_regex() {
|
||||||
.args(&["a^b", ":", "a\\^b"])
|
.args(&["a^b", ":", "a\\^b"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("3\n");
|
.stdout_only("3\n");
|
||||||
new_ucmd!()
|
|
||||||
.args(&["b", ":", "a\\|^b"])
|
|
||||||
.succeeds()
|
|
||||||
.stdout_only("1\n");
|
|
||||||
new_ucmd!()
|
|
||||||
.args(&["ab", ":", "\\(^a\\)b"])
|
|
||||||
.succeeds()
|
|
||||||
.stdout_only("a\n");
|
|
||||||
new_ucmd!()
|
|
||||||
.args(&["a$b", ":", "a\\$b"])
|
|
||||||
.succeeds()
|
|
||||||
.stdout_only("3\n");
|
|
||||||
new_ucmd!()
|
|
||||||
.args(&["a", ":", "a$\\|b"])
|
|
||||||
.succeeds()
|
|
||||||
.stdout_only("1\n");
|
|
||||||
new_ucmd!()
|
|
||||||
.args(&["ab", ":", "a\\(b$\\)"])
|
|
||||||
.succeeds()
|
|
||||||
.stdout_only("b\n");
|
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["abc", ":", "^abc"])
|
.args(&["abc", ":", "^abc"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
|
@ -311,13 +320,17 @@ fn test_regex() {
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("4\n");
|
.stdout_only("4\n");
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["b^$ic", ":", "b^\\$ic"])
|
.args(&["b", ":", "a\\|^b"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("5\n");
|
.stdout_only("1\n");
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["a$c", ":", "a$\\c"])
|
.args(&["ab", ":", "\\(^a\\)b"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("3\n");
|
.stdout_only("a\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["^abc", ":", "^abc"])
|
||||||
|
.fails()
|
||||||
|
.stdout_only("0\n");
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["^^^^^^^^^", ":", "^^^"])
|
.args(&["^^^^^^^^^", ":", "^^^"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
|
@ -338,29 +351,57 @@ fn test_regex() {
|
||||||
.args(&["\\a", ":", "\\\\[^^]"])
|
.args(&["\\a", ":", "\\\\[^^]"])
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("2\n");
|
.stdout_only("2\n");
|
||||||
new_ucmd!()
|
// Patterns are anchored to the beginning of the pattern "^bc"
|
||||||
.args(&["^a", ":", "^^[^^]"])
|
|
||||||
.succeeds()
|
|
||||||
.stdout_only("2\n");
|
|
||||||
new_ucmd!()
|
|
||||||
.args(&["-5", ":", "-\\{0,1\\}[0-9]*$"])
|
|
||||||
.succeeds()
|
|
||||||
.stdout_only("2\n");
|
|
||||||
new_ucmd!().args(&["", ":", ""]).fails().stdout_only("0\n");
|
|
||||||
new_ucmd!()
|
|
||||||
.args(&["abc", ":", ""])
|
|
||||||
.fails()
|
|
||||||
.stdout_only("0\n");
|
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["abc", ":", "bc"])
|
.args(&["abc", ":", "bc"])
|
||||||
.fails()
|
.fails()
|
||||||
.stdout_only("0\n");
|
.stdout_only("0\n");
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["^abc", ":", "^abc"])
|
.args(&["^a", ":", "^^[^^]"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("2\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["abc", ":", "ab[^c]"])
|
||||||
|
.fails()
|
||||||
|
.stdout_only("0\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_regex_dollar() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["a$b", ":", "a\\$b"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("3\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["a", ":", "a$\\|b"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("1\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["ab", ":", "a\\(b$\\)"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("b\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["a$c", ":", "a$\\c"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("3\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["$a", ":", "$a"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("2\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["a", ":", "a$\\|b"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("1\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-5", ":", "-\\{0,1\\}[0-9]*$"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("2\n");
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["$", ":", "$"])
|
||||||
.fails()
|
.fails()
|
||||||
.stdout_only("0\n");
|
.stdout_only("0\n");
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["abc", ":", "ab[^c]"])
|
.args(&["a$", ":", "a$\\|b"])
|
||||||
.fails()
|
.fails()
|
||||||
.stdout_only("0\n");
|
.stdout_only("0\n");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue