1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

Merge pull request #7997 from frendsick/fix/expr-regex-range-quantifiers

expr: Fix parsing range quantifiers in regex
This commit is contained in:
Daniel Hofstetter 2025-05-26 17:01:32 +02:00 committed by GitHub
commit 8dbc37e403
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 68 additions and 11 deletions

View file

@ -160,7 +160,6 @@ impl StringOp {
let first = pattern_chars.next();
match first {
Some('^') => {} // Start of string anchor is already added
Some('*') => re_string.push_str(r"\*"),
Some('$') if !is_end_of_expression(&pattern_chars) => re_string.push_str(r"\$"),
Some('\\') if right.len() == 1 => return Err(ExprError::TrailingBackslash),
Some(char) => re_string.push(char),
@ -191,10 +190,17 @@ impl StringOp {
'\\' if !curr_is_escaped && pattern_chars.peek().is_none() => {
return Err(ExprError::TrailingBackslash);
}
'{' if curr_is_escaped && is_valid_range_quantifier(&pattern_chars) => {
re_string.push(curr);
// Set the lower bound of range quantifier to 0 if it is missing
if pattern_chars.peek() == Some(&',') {
re_string.push('0');
}
}
_ => re_string.push(curr),
}
prev_is_escaped = prev == '\\' && !prev_is_escaped;
prev_is_escaped = curr_is_escaped;
prev = curr;
}
@ -244,6 +250,46 @@ where
}
}
/// Check if regex pattern character iterator is at the start of a valid range quantifier.
/// The iterator's start position is expected to be after the opening brace.
/// Range quantifier ends to closing brace.
///
/// # Examples of valid range quantifiers
///
/// - `r"\{3\}"`
/// - `r"\{3,\}"`
/// - `r"\{,6\}"`
/// - `r"\{3,6\}"`
/// - `r"\{,\}"`
fn is_valid_range_quantifier<I>(pattern_chars: &I) -> bool
where
I: Iterator<Item = char> + Clone,
{
// Parse the string between braces
let mut quantifier = String::new();
let mut pattern_chars_clone = pattern_chars.clone().peekable();
let Some(mut prev) = pattern_chars_clone.next() else {
return false;
};
let mut prev_is_escaped = false;
while let Some(curr) = pattern_chars_clone.next() {
if prev == '\\' && curr == '}' && !prev_is_escaped {
break;
}
if pattern_chars_clone.peek().is_none() {
return false;
}
quantifier.push(prev);
prev_is_escaped = prev == '\\' && !prev_is_escaped;
prev = curr;
}
// Check if parsed quantifier is valid
let re = Regex::new(r"(\d+|\d*,\d*)").expect("valid regular expression");
re.is_match(&quantifier)
}
/// Check for errors in a supplied regular expression
///
/// GNU coreutils shows messages for invalid regular expressions
@ -287,10 +333,7 @@ fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> {
.expect("splitn always returns at least one string"),
repetition.next(),
) {
("", None) => {
// Empty repeating pattern
invalid_content_error = true;
}
("", Some("")) => {}
(x, None | Some("")) => {
if x.parse::<i16>().is_err() {
invalid_content_error = true;
@ -750,6 +793,7 @@ pub fn is_truthy(s: &NumOrStr) -> bool {
mod test {
use crate::ExprError;
use crate::ExprError::InvalidBracketContent;
use crate::syntax_tree::is_valid_range_quantifier;
use super::{
AstNode, AstNodeInner, BinOp, NumericOp, RelationOp, StringOp, check_posix_regex_errors,
@ -998,4 +1042,22 @@ mod test {
Err(InvalidBracketContent)
);
}
#[test]
fn test_is_valid_range_quantifier() {
assert!(is_valid_range_quantifier(&"3\\}".chars()));
assert!(is_valid_range_quantifier(&"3,\\}".chars()));
assert!(is_valid_range_quantifier(&",6\\}".chars()));
assert!(is_valid_range_quantifier(&"3,6\\}".chars()));
assert!(is_valid_range_quantifier(&",\\}".chars()));
assert!(is_valid_range_quantifier(&"3,6\\}anything".chars()));
assert!(!is_valid_range_quantifier(&"\\{3,6\\}".chars()));
assert!(!is_valid_range_quantifier(&"\\}".chars()));
assert!(!is_valid_range_quantifier(&"".chars()));
assert!(!is_valid_range_quantifier(&"3".chars()));
assert!(!is_valid_range_quantifier(&"3,".chars()));
assert!(!is_valid_range_quantifier(&",6".chars()));
assert!(!is_valid_range_quantifier(&"3,6".chars()));
assert!(!is_valid_range_quantifier(&",".chars()));
}
}

View file

@ -875,7 +875,6 @@ mod gnu_expr {
.stdout_only("\n");
}
#[ignore]
#[test]
fn test_bre17() {
new_ucmd!()
@ -884,7 +883,6 @@ mod gnu_expr {
.stdout_only("{1}a\n");
}
#[ignore]
#[test]
fn test_bre18() {
new_ucmd!()
@ -893,7 +891,6 @@ mod gnu_expr {
.stdout_only("1\n");
}
#[ignore]
#[test]
fn test_bre19() {
new_ucmd!()
@ -1105,7 +1102,6 @@ mod gnu_expr {
.stderr_contains("Invalid content of \\{\\}");
}
#[ignore]
#[test]
fn test_bre45() {
new_ucmd!()
@ -1114,7 +1110,6 @@ mod gnu_expr {
.stdout_only("1\n");
}
#[ignore]
#[test]
fn test_bre46() {
new_ucmd!()