mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
expr: handle \{
literally at the start of an expression
Normally, `\{` begins a range quantifier like `{n,m}`, but at the start of an expression, there is no preceding item to apply the quantifier to.
This commit is contained in:
parent
685df65cc5
commit
7789ef46a4
1 changed files with 84 additions and 77 deletions
|
@ -169,37 +169,46 @@ impl StringOp {
|
||||||
// Handle the rest of the input pattern.
|
// Handle the rest of the input pattern.
|
||||||
let mut prev = first.unwrap_or_default();
|
let mut prev = first.unwrap_or_default();
|
||||||
let mut prev_is_escaped = false;
|
let mut prev_is_escaped = false;
|
||||||
|
let mut is_start_of_expression = first == Some('\\');
|
||||||
while let Some(curr) = pattern_chars.next() {
|
while let Some(curr) = pattern_chars.next() {
|
||||||
let curr_is_escaped = prev == '\\' && !prev_is_escaped;
|
let curr_is_escaped = prev == '\\' && !prev_is_escaped;
|
||||||
|
|
||||||
match curr {
|
match curr {
|
||||||
'^' => match (prev, prev_is_escaped) {
|
// Character class negation "[^a]"
|
||||||
// Start of a capturing group
|
// Explicitly escaped caret "\^"
|
||||||
('(', true)
|
'^' if !is_start_of_expression && !matches!(prev, '[' | '\\') => {
|
||||||
// Start of an alternative pattern
|
re_string.push_str(r"\^");
|
||||||
| ('|', true)
|
}
|
||||||
// Character class negation "[^a]"
|
|
||||||
| ('[', false)
|
|
||||||
// Explicitly escaped caret
|
|
||||||
| ('\\', false) => re_string.push(curr),
|
|
||||||
_ => re_string.push_str(r"\^"),
|
|
||||||
},
|
|
||||||
'$' if !curr_is_escaped && !is_end_of_expression(&pattern_chars) => {
|
'$' if !curr_is_escaped && !is_end_of_expression(&pattern_chars) => {
|
||||||
re_string.push_str(r"\$");
|
re_string.push_str(r"\$");
|
||||||
}
|
}
|
||||||
'\\' if !curr_is_escaped && pattern_chars.peek().is_none() => {
|
'\\' if !curr_is_escaped && pattern_chars.peek().is_none() => {
|
||||||
return Err(ExprError::TrailingBackslash);
|
return Err(ExprError::TrailingBackslash);
|
||||||
}
|
}
|
||||||
'{' if curr_is_escaped && is_valid_range_quantifier(&pattern_chars) => {
|
'{' if curr_is_escaped => {
|
||||||
re_string.push(curr);
|
// Handle '{' literally at the start of an expression
|
||||||
// Set the lower bound of range quantifier to 0 if it is missing
|
if is_start_of_expression {
|
||||||
if pattern_chars.peek() == Some(&',') {
|
if re_string.ends_with('\\') {
|
||||||
re_string.push('0');
|
let _ = re_string.pop();
|
||||||
|
}
|
||||||
|
re_string.push(curr);
|
||||||
|
} else if is_valid_range_quantifier(&pattern_chars) {
|
||||||
|
re_string.push(curr);
|
||||||
|
// Set the lower bound of range quantifier to 0 if it is missing
|
||||||
|
if pattern_chars.peek() == Some(&',') {
|
||||||
|
re_string.push('0');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Err(ExprError::InvalidBracketContent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => re_string.push(curr),
|
_ => re_string.push(curr),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Capturing group "\(abc\)"
|
||||||
|
// Alternative pattern "a\|b"
|
||||||
|
is_start_of_expression = curr_is_escaped && matches!(curr, '(' | '|')
|
||||||
|
|| curr == '\\' && prev_is_escaped && matches!(prev, '(' | '|');
|
||||||
prev_is_escaped = curr_is_escaped;
|
prev_is_escaped = curr_is_escaped;
|
||||||
prev = curr;
|
prev = curr;
|
||||||
}
|
}
|
||||||
|
@ -209,7 +218,14 @@ impl StringOp {
|
||||||
RegexOptions::REGEX_OPTION_SINGLELINE,
|
RegexOptions::REGEX_OPTION_SINGLELINE,
|
||||||
Syntax::grep(),
|
Syntax::grep(),
|
||||||
)
|
)
|
||||||
.map_err(|_| ExprError::InvalidRegexExpression)?;
|
.map_err(|error| match error.code() {
|
||||||
|
// "invalid repeat range {lower,upper}"
|
||||||
|
-123 => ExprError::InvalidBracketContent,
|
||||||
|
// "too big number for repeat range"
|
||||||
|
-201 => ExprError::InvalidBracketContent,
|
||||||
|
_ => ExprError::InvalidRegexExpression,
|
||||||
|
})?;
|
||||||
|
|
||||||
Ok(if re.captures_len() > 0 {
|
Ok(if re.captures_len() > 0 {
|
||||||
re.captures(&left)
|
re.captures(&left)
|
||||||
.and_then(|captures| captures.at(1))
|
.and_then(|captures| captures.at(1))
|
||||||
|
@ -286,8 +302,28 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if parsed quantifier is valid
|
// Check if parsed quantifier is valid
|
||||||
let re = Regex::new(r"(\d+|\d*,\d*)").expect("valid regular expression");
|
let re = Regex::new(r"(\d*,\d*|\d+)").expect("valid regular expression");
|
||||||
re.is_match(&quantifier)
|
match re.captures(&quantifier) {
|
||||||
|
None => false,
|
||||||
|
Some(captures) => {
|
||||||
|
let matched = captures.at(0).unwrap_or_default();
|
||||||
|
let mut repetition = matched.splitn(2, ',');
|
||||||
|
match (
|
||||||
|
repetition
|
||||||
|
.next()
|
||||||
|
.expect("splitn always returns at least one string"),
|
||||||
|
repetition.next(),
|
||||||
|
) {
|
||||||
|
("", Some("")) => true,
|
||||||
|
(x, None | Some("")) => x.parse::<i32>().map_or(true, |x| x <= i16::MAX as i32),
|
||||||
|
("", Some(x)) => x.parse::<i32>().map_or(true, |x| x <= i16::MAX as i32),
|
||||||
|
(f, Some(l)) => match (f.parse::<i32>(), l.parse::<i32>()) {
|
||||||
|
(Ok(f), Ok(l)) => f <= l && f <= i16::MAX as i32 && l <= i16::MAX as i32,
|
||||||
|
_ => false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check for errors in a supplied regular expression
|
/// Check for errors in a supplied regular expression
|
||||||
|
@ -306,77 +342,48 @@ where
|
||||||
fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> {
|
fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> {
|
||||||
let mut escaped_parens: u64 = 0;
|
let mut escaped_parens: u64 = 0;
|
||||||
let mut escaped_braces: u64 = 0;
|
let mut escaped_braces: u64 = 0;
|
||||||
let mut escaped = false;
|
let mut prev = '\0';
|
||||||
|
let mut prev_is_escaped = false;
|
||||||
|
let mut is_brace_ignored = false;
|
||||||
|
let mut is_start_of_expression = true;
|
||||||
|
|
||||||
let mut repeating_pattern_text = String::new();
|
for curr in pattern.chars() {
|
||||||
let mut invalid_content_error = false;
|
let curr_is_escaped = prev == '\\' && !prev_is_escaped;
|
||||||
|
|
||||||
for c in pattern.chars() {
|
match (curr_is_escaped, curr) {
|
||||||
match (escaped, c) {
|
(true, '(') => escaped_parens += 1,
|
||||||
(true, ')') => {
|
(true, ')') => {
|
||||||
escaped_parens = escaped_parens
|
escaped_parens = escaped_parens
|
||||||
.checked_sub(1)
|
.checked_sub(1)
|
||||||
.ok_or(ExprError::UnmatchedClosingParenthesis)?;
|
.ok_or(ExprError::UnmatchedClosingParenthesis)?;
|
||||||
}
|
}
|
||||||
(true, '(') => {
|
(true, '{') => {
|
||||||
escaped_parens += 1;
|
is_brace_ignored = is_start_of_expression;
|
||||||
|
if !is_brace_ignored {
|
||||||
|
escaped_braces += 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
(true, '}') => {
|
(true, '}') => {
|
||||||
escaped_braces = escaped_braces
|
if !is_brace_ignored {
|
||||||
.checked_sub(1)
|
escaped_braces = escaped_braces
|
||||||
.ok_or(ExprError::UnmatchedClosingBrace)?;
|
.saturating_sub(1)
|
||||||
let mut repetition =
|
.ok_or(ExprError::UnmatchedClosingBrace)?;
|
||||||
repeating_pattern_text[..repeating_pattern_text.len() - 1].splitn(2, ',');
|
|
||||||
match (
|
|
||||||
repetition
|
|
||||||
.next()
|
|
||||||
.expect("splitn always returns at least one string"),
|
|
||||||
repetition.next(),
|
|
||||||
) {
|
|
||||||
("", Some("")) => {}
|
|
||||||
(x, None | Some("")) => {
|
|
||||||
if x.parse::<i16>().is_err() {
|
|
||||||
invalid_content_error = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
("", Some(x)) => {
|
|
||||||
if x.parse::<i16>().is_err() {
|
|
||||||
invalid_content_error = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(f, Some(l)) => {
|
|
||||||
if let (Ok(f), Ok(l)) = (f.parse::<i16>(), l.parse::<i16>()) {
|
|
||||||
invalid_content_error = invalid_content_error || f > l;
|
|
||||||
} else {
|
|
||||||
invalid_content_error = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
repeating_pattern_text.clear();
|
|
||||||
}
|
|
||||||
(true, '{') => {
|
|
||||||
escaped_braces += 1;
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
if escaped_braces > 0 && repeating_pattern_text.len() <= 13 {
|
|
||||||
repeating_pattern_text.push(c);
|
|
||||||
}
|
|
||||||
if escaped_braces > 0 && !(c.is_ascii_digit() || c == '\\' || c == ',') {
|
|
||||||
invalid_content_error = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_ => {}
|
||||||
}
|
}
|
||||||
escaped = !escaped && c == '\\';
|
|
||||||
|
is_start_of_expression = prev == '\0'
|
||||||
|
|| curr_is_escaped && matches!(curr, '(' | '|')
|
||||||
|
|| curr == '\\' && prev_is_escaped && matches!(prev, '(' | '|');
|
||||||
|
prev_is_escaped = curr_is_escaped;
|
||||||
|
prev = curr;
|
||||||
}
|
}
|
||||||
match (
|
|
||||||
escaped_parens.is_zero(),
|
match (escaped_parens.is_zero(), escaped_braces.is_zero()) {
|
||||||
escaped_braces.is_zero(),
|
(true, true) => Ok(()),
|
||||||
invalid_content_error,
|
(_, false) => Err(ExprError::UnmatchedOpeningBrace),
|
||||||
) {
|
(false, _) => Err(ExprError::UnmatchedOpeningParenthesis),
|
||||||
(true, true, false) => Ok(()),
|
|
||||||
(_, false, _) => Err(ExprError::UnmatchedOpeningBrace),
|
|
||||||
(false, _, _) => Err(ExprError::UnmatchedOpeningParenthesis),
|
|
||||||
(true, true, true) => Err(ExprError::InvalidBracketContent),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue