1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

expr: handle \{ literally at the start of an expression

Normally, `\{` begins a range quantifier like `{n,m}`, but at the start of
an expression, there is no preceding item to apply the quantifier to.
This commit is contained in:
Teemu Pätsi 2025-05-27 01:51:56 +03:00
parent 685df65cc5
commit 7789ef46a4
No known key found for this signature in database

View file

@ -169,37 +169,46 @@ impl StringOp {
// Handle the rest of the input pattern. // Handle the rest of the input pattern.
let mut prev = first.unwrap_or_default(); let mut prev = first.unwrap_or_default();
let mut prev_is_escaped = false; let mut prev_is_escaped = false;
let mut is_start_of_expression = first == Some('\\');
while let Some(curr) = pattern_chars.next() { while let Some(curr) = pattern_chars.next() {
let curr_is_escaped = prev == '\\' && !prev_is_escaped; let curr_is_escaped = prev == '\\' && !prev_is_escaped;
match curr { match curr {
'^' => match (prev, prev_is_escaped) { // Character class negation "[^a]"
// Start of a capturing group // Explicitly escaped caret "\^"
('(', true) '^' if !is_start_of_expression && !matches!(prev, '[' | '\\') => {
// Start of an alternative pattern re_string.push_str(r"\^");
| ('|', true) }
// Character class negation "[^a]"
| ('[', false)
// Explicitly escaped caret
| ('\\', false) => re_string.push(curr),
_ => re_string.push_str(r"\^"),
},
'$' if !curr_is_escaped && !is_end_of_expression(&pattern_chars) => { '$' if !curr_is_escaped && !is_end_of_expression(&pattern_chars) => {
re_string.push_str(r"\$"); re_string.push_str(r"\$");
} }
'\\' if !curr_is_escaped && pattern_chars.peek().is_none() => { '\\' if !curr_is_escaped && pattern_chars.peek().is_none() => {
return Err(ExprError::TrailingBackslash); return Err(ExprError::TrailingBackslash);
} }
'{' if curr_is_escaped && is_valid_range_quantifier(&pattern_chars) => { '{' if curr_is_escaped => {
re_string.push(curr); // Handle '{' literally at the start of an expression
// Set the lower bound of range quantifier to 0 if it is missing if is_start_of_expression {
if pattern_chars.peek() == Some(&',') { if re_string.ends_with('\\') {
re_string.push('0'); let _ = re_string.pop();
}
re_string.push(curr);
} else if is_valid_range_quantifier(&pattern_chars) {
re_string.push(curr);
// Set the lower bound of range quantifier to 0 if it is missing
if pattern_chars.peek() == Some(&',') {
re_string.push('0');
}
} else {
return Err(ExprError::InvalidBracketContent);
} }
} }
_ => re_string.push(curr), _ => re_string.push(curr),
} }
// Capturing group "\(abc\)"
// Alternative pattern "a\|b"
is_start_of_expression = curr_is_escaped && matches!(curr, '(' | '|')
|| curr == '\\' && prev_is_escaped && matches!(prev, '(' | '|');
prev_is_escaped = curr_is_escaped; prev_is_escaped = curr_is_escaped;
prev = curr; prev = curr;
} }
@ -209,7 +218,14 @@ impl StringOp {
RegexOptions::REGEX_OPTION_SINGLELINE, RegexOptions::REGEX_OPTION_SINGLELINE,
Syntax::grep(), Syntax::grep(),
) )
.map_err(|_| ExprError::InvalidRegexExpression)?; .map_err(|error| match error.code() {
// "invalid repeat range {lower,upper}"
-123 => ExprError::InvalidBracketContent,
// "too big number for repeat range"
-201 => ExprError::InvalidBracketContent,
_ => ExprError::InvalidRegexExpression,
})?;
Ok(if re.captures_len() > 0 { Ok(if re.captures_len() > 0 {
re.captures(&left) re.captures(&left)
.and_then(|captures| captures.at(1)) .and_then(|captures| captures.at(1))
@ -286,8 +302,28 @@ where
} }
// Check if parsed quantifier is valid // Check if parsed quantifier is valid
let re = Regex::new(r"(\d+|\d*,\d*)").expect("valid regular expression"); let re = Regex::new(r"(\d*,\d*|\d+)").expect("valid regular expression");
re.is_match(&quantifier) match re.captures(&quantifier) {
None => false,
Some(captures) => {
let matched = captures.at(0).unwrap_or_default();
let mut repetition = matched.splitn(2, ',');
match (
repetition
.next()
.expect("splitn always returns at least one string"),
repetition.next(),
) {
("", Some("")) => true,
(x, None | Some("")) => x.parse::<i32>().map_or(true, |x| x <= i16::MAX as i32),
("", Some(x)) => x.parse::<i32>().map_or(true, |x| x <= i16::MAX as i32),
(f, Some(l)) => match (f.parse::<i32>(), l.parse::<i32>()) {
(Ok(f), Ok(l)) => f <= l && f <= i16::MAX as i32 && l <= i16::MAX as i32,
_ => false,
},
}
}
}
} }
/// Check for errors in a supplied regular expression /// Check for errors in a supplied regular expression
@ -306,77 +342,48 @@ where
fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> { fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> {
let mut escaped_parens: u64 = 0; let mut escaped_parens: u64 = 0;
let mut escaped_braces: u64 = 0; let mut escaped_braces: u64 = 0;
let mut escaped = false; let mut prev = '\0';
let mut prev_is_escaped = false;
let mut is_brace_ignored = false;
let mut is_start_of_expression = true;
let mut repeating_pattern_text = String::new(); for curr in pattern.chars() {
let mut invalid_content_error = false; let curr_is_escaped = prev == '\\' && !prev_is_escaped;
for c in pattern.chars() { match (curr_is_escaped, curr) {
match (escaped, c) { (true, '(') => escaped_parens += 1,
(true, ')') => { (true, ')') => {
escaped_parens = escaped_parens escaped_parens = escaped_parens
.checked_sub(1) .checked_sub(1)
.ok_or(ExprError::UnmatchedClosingParenthesis)?; .ok_or(ExprError::UnmatchedClosingParenthesis)?;
} }
(true, '(') => { (true, '{') => {
escaped_parens += 1; is_brace_ignored = is_start_of_expression;
if !is_brace_ignored {
escaped_braces += 1;
}
} }
(true, '}') => { (true, '}') => {
escaped_braces = escaped_braces if !is_brace_ignored {
.checked_sub(1) escaped_braces = escaped_braces
.ok_or(ExprError::UnmatchedClosingBrace)?; .saturating_sub(1)
let mut repetition = .ok_or(ExprError::UnmatchedClosingBrace)?;
repeating_pattern_text[..repeating_pattern_text.len() - 1].splitn(2, ',');
match (
repetition
.next()
.expect("splitn always returns at least one string"),
repetition.next(),
) {
("", Some("")) => {}
(x, None | Some("")) => {
if x.parse::<i16>().is_err() {
invalid_content_error = true;
}
}
("", Some(x)) => {
if x.parse::<i16>().is_err() {
invalid_content_error = true;
}
}
(f, Some(l)) => {
if let (Ok(f), Ok(l)) = (f.parse::<i16>(), l.parse::<i16>()) {
invalid_content_error = invalid_content_error || f > l;
} else {
invalid_content_error = true;
}
}
}
repeating_pattern_text.clear();
}
(true, '{') => {
escaped_braces += 1;
}
_ => {
if escaped_braces > 0 && repeating_pattern_text.len() <= 13 {
repeating_pattern_text.push(c);
}
if escaped_braces > 0 && !(c.is_ascii_digit() || c == '\\' || c == ',') {
invalid_content_error = true;
} }
} }
_ => {}
} }
escaped = !escaped && c == '\\';
is_start_of_expression = prev == '\0'
|| curr_is_escaped && matches!(curr, '(' | '|')
|| curr == '\\' && prev_is_escaped && matches!(prev, '(' | '|');
prev_is_escaped = curr_is_escaped;
prev = curr;
} }
match (
escaped_parens.is_zero(), match (escaped_parens.is_zero(), escaped_braces.is_zero()) {
escaped_braces.is_zero(), (true, true) => Ok(()),
invalid_content_error, (_, false) => Err(ExprError::UnmatchedOpeningBrace),
) { (false, _) => Err(ExprError::UnmatchedOpeningParenthesis),
(true, true, false) => Ok(()),
(_, false, _) => Err(ExprError::UnmatchedOpeningBrace),
(false, _, _) => Err(ExprError::UnmatchedOpeningParenthesis),
(true, true, true) => Err(ExprError::InvalidBracketContent),
} }
} }