mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-08-01 21:47:46 +00:00
expr: Add specific errors for invalid regular expressions
This commit is contained in:
parent
0ba9a301b0
commit
6a7df7d6c1
2 changed files with 122 additions and 2 deletions
|
@ -35,6 +35,11 @@ pub enum ExprError {
|
|||
InvalidRegexExpression,
|
||||
ExpectedClosingBraceAfter(String),
|
||||
ExpectedClosingBraceInsteadOf(String),
|
||||
UnmatchedOpeningParenthesis,
|
||||
UnmatchedClosingParenthesis,
|
||||
UnmatchedOpeningBrace,
|
||||
UnmatchedClosingBrace,
|
||||
InvalidContent(String),
|
||||
}
|
||||
|
||||
impl Display for ExprError {
|
||||
|
@ -56,6 +61,21 @@ impl Display for ExprError {
|
|||
Self::ExpectedClosingBraceInsteadOf(s) => {
|
||||
write!(f, "syntax error: expecting ')' instead of {}", s.quote())
|
||||
}
|
||||
Self::UnmatchedOpeningParenthesis => {
|
||||
write!(f, "Unmatched ( or \\(")
|
||||
}
|
||||
Self::UnmatchedClosingParenthesis => {
|
||||
write!(f, "Unmatched ) or \\)")
|
||||
}
|
||||
Self::UnmatchedOpeningBrace => {
|
||||
write!(f, "Unmatched \\{{")
|
||||
}
|
||||
Self::UnmatchedClosingBrace => {
|
||||
write!(f, "Unmatched ) or \\}}")
|
||||
}
|
||||
Self::InvalidContent(s) => {
|
||||
write!(f, "Invalid content of {}", s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
// spell-checker:ignore (ToDO) ints paren prec multibytes
|
||||
|
||||
use num_bigint::{BigInt, ParseBigIntError};
|
||||
use num_traits::ToPrimitive;
|
||||
use num_traits::{ToPrimitive, Zero};
|
||||
use onig::{Regex, RegexOptions, Syntax};
|
||||
|
||||
use crate::{ExprError, ExprResult};
|
||||
|
@ -139,6 +139,7 @@ impl StringOp {
|
|||
Self::Match => {
|
||||
let left = left.eval()?.eval_as_string();
|
||||
let right = right.eval()?.eval_as_string();
|
||||
validate_regex(&right)?;
|
||||
let re_string = format!("^{right}");
|
||||
let re = Regex::with_options(
|
||||
&re_string,
|
||||
|
@ -173,6 +174,65 @@ impl StringOp {
|
|||
}
|
||||
}
|
||||
|
||||
/// Check errors with a supplied regular expression
|
||||
///
|
||||
/// GNU coreutils shows messages for invalid regular expressions
|
||||
/// differently from the oniguruma library used by the regex crate.
|
||||
/// This method attempts to do these checks manually in one linear pass
|
||||
/// through the regular expression.
|
||||
fn validate_regex(pattern: &str) -> ExprResult<()> {
|
||||
let mut escaped_parens: u64 = 0;
|
||||
let mut escaped_braces: u64 = 0;
|
||||
let mut escaped = false;
|
||||
|
||||
let mut comma_in_braces = false;
|
||||
let mut invalid_content_error = false;
|
||||
|
||||
for c in pattern.chars() {
|
||||
match (escaped, c) {
|
||||
(true, ')') => {
|
||||
escaped_parens = escaped_parens
|
||||
.checked_sub(1)
|
||||
.ok_or(ExprError::UnmatchedClosingParenthesis)?;
|
||||
}
|
||||
(true, '(') => {
|
||||
escaped_parens += 1;
|
||||
}
|
||||
(true, '}') => {
|
||||
escaped_braces = escaped_braces
|
||||
.checked_sub(1)
|
||||
.ok_or(ExprError::UnmatchedClosingBrace)?;
|
||||
|
||||
if !comma_in_braces {
|
||||
// Empty repeating patterns are not valid
|
||||
return Err(ExprError::InvalidContent(r"\{\}".to_string()));
|
||||
}
|
||||
}
|
||||
(true, '{') => {
|
||||
comma_in_braces = false;
|
||||
escaped_braces += 1;
|
||||
}
|
||||
_ => {
|
||||
if escaped_braces > 0 && !(c.is_ascii_digit() || c == '\\' || c == ',') {
|
||||
invalid_content_error = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
escaped = !escaped && c == '\\';
|
||||
comma_in_braces = escaped_braces > 0 && (comma_in_braces || c == ',')
|
||||
}
|
||||
match (
|
||||
escaped_parens.is_zero(),
|
||||
escaped_braces.is_zero(),
|
||||
invalid_content_error,
|
||||
) {
|
||||
(true, true, false) => Ok(()),
|
||||
(_, false, _) => Err(ExprError::UnmatchedOpeningBrace),
|
||||
(false, _, _) => Err(ExprError::UnmatchedOpeningParenthesis),
|
||||
(true, true, true) => Err(ExprError::InvalidContent(r"\{\}".to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Precedence for infix binary operators
|
||||
const PRECEDENCE: &[&[(&str, BinOp)]] = &[
|
||||
&[("|", BinOp::String(StringOp::Or))],
|
||||
|
@ -493,8 +553,9 @@ pub fn is_truthy(s: &NumOrStr) -> bool {
|
|||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::ExprError;
|
||||
use crate::ExprError::InvalidContent;
|
||||
|
||||
use super::{AstNode, BinOp, NumericOp, RelationOp, StringOp};
|
||||
use super::{validate_regex, AstNode, BinOp, NumericOp, RelationOp, StringOp};
|
||||
|
||||
impl From<&str> for AstNode {
|
||||
fn from(value: &str) -> Self {
|
||||
|
@ -619,4 +680,43 @@ mod test {
|
|||
.unwrap();
|
||||
assert_eq!(result.eval_as_string(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_regex_valid() {
|
||||
assert!(validate_regex(r"(a+b) \(a* b\)").is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_regex_missing_closing() {
|
||||
assert_eq!(
|
||||
validate_regex(r"\(abc"),
|
||||
Err(ExprError::UnmatchedOpeningParenthesis)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
validate_regex(r"\{1,2"),
|
||||
Err(ExprError::UnmatchedOpeningBrace)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_regex_missing_opening() {
|
||||
assert_eq!(
|
||||
validate_regex(r"abc\)"),
|
||||
Err(ExprError::UnmatchedClosingParenthesis)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
validate_regex(r"abc\}"),
|
||||
Err(ExprError::UnmatchedClosingBrace)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_regex_empty_repeating_pattern() {
|
||||
assert_eq!(
|
||||
validate_regex("ab\\{\\}"),
|
||||
Err(InvalidContent(r"\{\}".to_string()))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue