mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
Merge pull request #7997 from frendsick/fix/expr-regex-range-quantifiers
expr: Fix parsing range quantifiers in regex
This commit is contained in:
commit
8dbc37e403
2 changed files with 68 additions and 11 deletions
|
@ -160,7 +160,6 @@ impl StringOp {
|
||||||
let first = pattern_chars.next();
|
let first = pattern_chars.next();
|
||||||
match first {
|
match first {
|
||||||
Some('^') => {} // Start of string anchor is already added
|
Some('^') => {} // Start of string anchor is already added
|
||||||
Some('*') => re_string.push_str(r"\*"),
|
|
||||||
Some('$') if !is_end_of_expression(&pattern_chars) => re_string.push_str(r"\$"),
|
Some('$') if !is_end_of_expression(&pattern_chars) => re_string.push_str(r"\$"),
|
||||||
Some('\\') if right.len() == 1 => return Err(ExprError::TrailingBackslash),
|
Some('\\') if right.len() == 1 => return Err(ExprError::TrailingBackslash),
|
||||||
Some(char) => re_string.push(char),
|
Some(char) => re_string.push(char),
|
||||||
|
@ -191,10 +190,17 @@ impl StringOp {
|
||||||
'\\' if !curr_is_escaped && pattern_chars.peek().is_none() => {
|
'\\' if !curr_is_escaped && pattern_chars.peek().is_none() => {
|
||||||
return Err(ExprError::TrailingBackslash);
|
return Err(ExprError::TrailingBackslash);
|
||||||
}
|
}
|
||||||
|
'{' if curr_is_escaped && is_valid_range_quantifier(&pattern_chars) => {
|
||||||
|
re_string.push(curr);
|
||||||
|
// Set the lower bound of range quantifier to 0 if it is missing
|
||||||
|
if pattern_chars.peek() == Some(&',') {
|
||||||
|
re_string.push('0');
|
||||||
|
}
|
||||||
|
}
|
||||||
_ => re_string.push(curr),
|
_ => re_string.push(curr),
|
||||||
}
|
}
|
||||||
|
|
||||||
prev_is_escaped = prev == '\\' && !prev_is_escaped;
|
prev_is_escaped = curr_is_escaped;
|
||||||
prev = curr;
|
prev = curr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -244,6 +250,46 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if regex pattern character iterator is at the start of a valid range quantifier.
|
||||||
|
/// The iterator's start position is expected to be after the opening brace.
|
||||||
|
/// Range quantifier ends to closing brace.
|
||||||
|
///
|
||||||
|
/// # Examples of valid range quantifiers
|
||||||
|
///
|
||||||
|
/// - `r"\{3\}"`
|
||||||
|
/// - `r"\{3,\}"`
|
||||||
|
/// - `r"\{,6\}"`
|
||||||
|
/// - `r"\{3,6\}"`
|
||||||
|
/// - `r"\{,\}"`
|
||||||
|
fn is_valid_range_quantifier<I>(pattern_chars: &I) -> bool
|
||||||
|
where
|
||||||
|
I: Iterator<Item = char> + Clone,
|
||||||
|
{
|
||||||
|
// Parse the string between braces
|
||||||
|
let mut quantifier = String::new();
|
||||||
|
let mut pattern_chars_clone = pattern_chars.clone().peekable();
|
||||||
|
let Some(mut prev) = pattern_chars_clone.next() else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
let mut prev_is_escaped = false;
|
||||||
|
while let Some(curr) = pattern_chars_clone.next() {
|
||||||
|
if prev == '\\' && curr == '}' && !prev_is_escaped {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if pattern_chars_clone.peek().is_none() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
quantifier.push(prev);
|
||||||
|
prev_is_escaped = prev == '\\' && !prev_is_escaped;
|
||||||
|
prev = curr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if parsed quantifier is valid
|
||||||
|
let re = Regex::new(r"(\d+|\d*,\d*)").expect("valid regular expression");
|
||||||
|
re.is_match(&quantifier)
|
||||||
|
}
|
||||||
|
|
||||||
/// Check for errors in a supplied regular expression
|
/// Check for errors in a supplied regular expression
|
||||||
///
|
///
|
||||||
/// GNU coreutils shows messages for invalid regular expressions
|
/// GNU coreutils shows messages for invalid regular expressions
|
||||||
|
@ -287,10 +333,7 @@ fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> {
|
||||||
.expect("splitn always returns at least one string"),
|
.expect("splitn always returns at least one string"),
|
||||||
repetition.next(),
|
repetition.next(),
|
||||||
) {
|
) {
|
||||||
("", None) => {
|
("", Some("")) => {}
|
||||||
// Empty repeating pattern
|
|
||||||
invalid_content_error = true;
|
|
||||||
}
|
|
||||||
(x, None | Some("")) => {
|
(x, None | Some("")) => {
|
||||||
if x.parse::<i16>().is_err() {
|
if x.parse::<i16>().is_err() {
|
||||||
invalid_content_error = true;
|
invalid_content_error = true;
|
||||||
|
@ -750,6 +793,7 @@ pub fn is_truthy(s: &NumOrStr) -> bool {
|
||||||
mod test {
|
mod test {
|
||||||
use crate::ExprError;
|
use crate::ExprError;
|
||||||
use crate::ExprError::InvalidBracketContent;
|
use crate::ExprError::InvalidBracketContent;
|
||||||
|
use crate::syntax_tree::is_valid_range_quantifier;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
AstNode, AstNodeInner, BinOp, NumericOp, RelationOp, StringOp, check_posix_regex_errors,
|
AstNode, AstNodeInner, BinOp, NumericOp, RelationOp, StringOp, check_posix_regex_errors,
|
||||||
|
@ -998,4 +1042,22 @@ mod test {
|
||||||
Err(InvalidBracketContent)
|
Err(InvalidBracketContent)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_is_valid_range_quantifier() {
|
||||||
|
assert!(is_valid_range_quantifier(&"3\\}".chars()));
|
||||||
|
assert!(is_valid_range_quantifier(&"3,\\}".chars()));
|
||||||
|
assert!(is_valid_range_quantifier(&",6\\}".chars()));
|
||||||
|
assert!(is_valid_range_quantifier(&"3,6\\}".chars()));
|
||||||
|
assert!(is_valid_range_quantifier(&",\\}".chars()));
|
||||||
|
assert!(is_valid_range_quantifier(&"3,6\\}anything".chars()));
|
||||||
|
assert!(!is_valid_range_quantifier(&"\\{3,6\\}".chars()));
|
||||||
|
assert!(!is_valid_range_quantifier(&"\\}".chars()));
|
||||||
|
assert!(!is_valid_range_quantifier(&"".chars()));
|
||||||
|
assert!(!is_valid_range_quantifier(&"3".chars()));
|
||||||
|
assert!(!is_valid_range_quantifier(&"3,".chars()));
|
||||||
|
assert!(!is_valid_range_quantifier(&",6".chars()));
|
||||||
|
assert!(!is_valid_range_quantifier(&"3,6".chars()));
|
||||||
|
assert!(!is_valid_range_quantifier(&",".chars()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -875,7 +875,6 @@ mod gnu_expr {
|
||||||
.stdout_only("\n");
|
.stdout_only("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[ignore]
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_bre17() {
|
fn test_bre17() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
|
@ -884,7 +883,6 @@ mod gnu_expr {
|
||||||
.stdout_only("{1}a\n");
|
.stdout_only("{1}a\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[ignore]
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_bre18() {
|
fn test_bre18() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
|
@ -893,7 +891,6 @@ mod gnu_expr {
|
||||||
.stdout_only("1\n");
|
.stdout_only("1\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[ignore]
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_bre19() {
|
fn test_bre19() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
|
@ -1105,7 +1102,6 @@ mod gnu_expr {
|
||||||
.stderr_contains("Invalid content of \\{\\}");
|
.stderr_contains("Invalid content of \\{\\}");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[ignore]
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_bre45() {
|
fn test_bre45() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
|
@ -1114,7 +1110,6 @@ mod gnu_expr {
|
||||||
.stdout_only("1\n");
|
.stdout_only("1\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[ignore]
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_bre46() {
|
fn test_bre46() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue