mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-08-02 05:57:46 +00:00
expr: Add specific errors for invalid regular expressions
This commit is contained in:
parent
0ba9a301b0
commit
6a7df7d6c1
2 changed files with 122 additions and 2 deletions
|
@ -35,6 +35,11 @@ pub enum ExprError {
|
||||||
InvalidRegexExpression,
|
InvalidRegexExpression,
|
||||||
ExpectedClosingBraceAfter(String),
|
ExpectedClosingBraceAfter(String),
|
||||||
ExpectedClosingBraceInsteadOf(String),
|
ExpectedClosingBraceInsteadOf(String),
|
||||||
|
UnmatchedOpeningParenthesis,
|
||||||
|
UnmatchedClosingParenthesis,
|
||||||
|
UnmatchedOpeningBrace,
|
||||||
|
UnmatchedClosingBrace,
|
||||||
|
InvalidContent(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for ExprError {
|
impl Display for ExprError {
|
||||||
|
@ -56,6 +61,21 @@ impl Display for ExprError {
|
||||||
Self::ExpectedClosingBraceInsteadOf(s) => {
|
Self::ExpectedClosingBraceInsteadOf(s) => {
|
||||||
write!(f, "syntax error: expecting ')' instead of {}", s.quote())
|
write!(f, "syntax error: expecting ')' instead of {}", s.quote())
|
||||||
}
|
}
|
||||||
|
Self::UnmatchedOpeningParenthesis => {
|
||||||
|
write!(f, "Unmatched ( or \\(")
|
||||||
|
}
|
||||||
|
Self::UnmatchedClosingParenthesis => {
|
||||||
|
write!(f, "Unmatched ) or \\)")
|
||||||
|
}
|
||||||
|
Self::UnmatchedOpeningBrace => {
|
||||||
|
write!(f, "Unmatched \\{{")
|
||||||
|
}
|
||||||
|
Self::UnmatchedClosingBrace => {
|
||||||
|
write!(f, "Unmatched ) or \\}}")
|
||||||
|
}
|
||||||
|
Self::InvalidContent(s) => {
|
||||||
|
write!(f, "Invalid content of {}", s)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
// spell-checker:ignore (ToDO) ints paren prec multibytes
|
// spell-checker:ignore (ToDO) ints paren prec multibytes
|
||||||
|
|
||||||
use num_bigint::{BigInt, ParseBigIntError};
|
use num_bigint::{BigInt, ParseBigIntError};
|
||||||
use num_traits::ToPrimitive;
|
use num_traits::{ToPrimitive, Zero};
|
||||||
use onig::{Regex, RegexOptions, Syntax};
|
use onig::{Regex, RegexOptions, Syntax};
|
||||||
|
|
||||||
use crate::{ExprError, ExprResult};
|
use crate::{ExprError, ExprResult};
|
||||||
|
@ -139,6 +139,7 @@ impl StringOp {
|
||||||
Self::Match => {
|
Self::Match => {
|
||||||
let left = left.eval()?.eval_as_string();
|
let left = left.eval()?.eval_as_string();
|
||||||
let right = right.eval()?.eval_as_string();
|
let right = right.eval()?.eval_as_string();
|
||||||
|
validate_regex(&right)?;
|
||||||
let re_string = format!("^{right}");
|
let re_string = format!("^{right}");
|
||||||
let re = Regex::with_options(
|
let re = Regex::with_options(
|
||||||
&re_string,
|
&re_string,
|
||||||
|
@ -173,6 +174,65 @@ impl StringOp {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check errors with a supplied regular expression
|
||||||
|
///
|
||||||
|
/// GNU coreutils shows messages for invalid regular expressions
|
||||||
|
/// differently from the oniguruma library used by the regex crate.
|
||||||
|
/// This method attempts to do these checks manually in one linear pass
|
||||||
|
/// through the regular expression.
|
||||||
|
fn validate_regex(pattern: &str) -> ExprResult<()> {
|
||||||
|
let mut escaped_parens: u64 = 0;
|
||||||
|
let mut escaped_braces: u64 = 0;
|
||||||
|
let mut escaped = false;
|
||||||
|
|
||||||
|
let mut comma_in_braces = false;
|
||||||
|
let mut invalid_content_error = false;
|
||||||
|
|
||||||
|
for c in pattern.chars() {
|
||||||
|
match (escaped, c) {
|
||||||
|
(true, ')') => {
|
||||||
|
escaped_parens = escaped_parens
|
||||||
|
.checked_sub(1)
|
||||||
|
.ok_or(ExprError::UnmatchedClosingParenthesis)?;
|
||||||
|
}
|
||||||
|
(true, '(') => {
|
||||||
|
escaped_parens += 1;
|
||||||
|
}
|
||||||
|
(true, '}') => {
|
||||||
|
escaped_braces = escaped_braces
|
||||||
|
.checked_sub(1)
|
||||||
|
.ok_or(ExprError::UnmatchedClosingBrace)?;
|
||||||
|
|
||||||
|
if !comma_in_braces {
|
||||||
|
// Empty repeating patterns are not valid
|
||||||
|
return Err(ExprError::InvalidContent(r"\{\}".to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(true, '{') => {
|
||||||
|
comma_in_braces = false;
|
||||||
|
escaped_braces += 1;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
if escaped_braces > 0 && !(c.is_ascii_digit() || c == '\\' || c == ',') {
|
||||||
|
invalid_content_error = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
escaped = !escaped && c == '\\';
|
||||||
|
comma_in_braces = escaped_braces > 0 && (comma_in_braces || c == ',')
|
||||||
|
}
|
||||||
|
match (
|
||||||
|
escaped_parens.is_zero(),
|
||||||
|
escaped_braces.is_zero(),
|
||||||
|
invalid_content_error,
|
||||||
|
) {
|
||||||
|
(true, true, false) => Ok(()),
|
||||||
|
(_, false, _) => Err(ExprError::UnmatchedOpeningBrace),
|
||||||
|
(false, _, _) => Err(ExprError::UnmatchedOpeningParenthesis),
|
||||||
|
(true, true, true) => Err(ExprError::InvalidContent(r"\{\}".to_string())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Precedence for infix binary operators
|
/// Precedence for infix binary operators
|
||||||
const PRECEDENCE: &[&[(&str, BinOp)]] = &[
|
const PRECEDENCE: &[&[(&str, BinOp)]] = &[
|
||||||
&[("|", BinOp::String(StringOp::Or))],
|
&[("|", BinOp::String(StringOp::Or))],
|
||||||
|
@ -493,8 +553,9 @@ pub fn is_truthy(s: &NumOrStr) -> bool {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use crate::ExprError;
|
use crate::ExprError;
|
||||||
|
use crate::ExprError::InvalidContent;
|
||||||
|
|
||||||
use super::{AstNode, BinOp, NumericOp, RelationOp, StringOp};
|
use super::{validate_regex, AstNode, BinOp, NumericOp, RelationOp, StringOp};
|
||||||
|
|
||||||
impl From<&str> for AstNode {
|
impl From<&str> for AstNode {
|
||||||
fn from(value: &str) -> Self {
|
fn from(value: &str) -> Self {
|
||||||
|
@ -619,4 +680,43 @@ mod test {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(result.eval_as_string(), "");
|
assert_eq!(result.eval_as_string(), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn validate_regex_valid() {
|
||||||
|
assert!(validate_regex(r"(a+b) \(a* b\)").is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn validate_regex_missing_closing() {
|
||||||
|
assert_eq!(
|
||||||
|
validate_regex(r"\(abc"),
|
||||||
|
Err(ExprError::UnmatchedOpeningParenthesis)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
validate_regex(r"\{1,2"),
|
||||||
|
Err(ExprError::UnmatchedOpeningBrace)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn validate_regex_missing_opening() {
|
||||||
|
assert_eq!(
|
||||||
|
validate_regex(r"abc\)"),
|
||||||
|
Err(ExprError::UnmatchedClosingParenthesis)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
validate_regex(r"abc\}"),
|
||||||
|
Err(ExprError::UnmatchedClosingBrace)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn validate_regex_empty_repeating_pattern() {
|
||||||
|
assert_eq!(
|
||||||
|
validate_regex("ab\\{\\}"),
|
||||||
|
Err(InvalidContent(r"\{\}".to_string()))
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue