mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-31 13:07:46 +00:00
expr: Reject invalid intervals in regular expressions
This commit is contained in:
parent
fa0bd722b7
commit
bff827d9ed
1 changed files with 65 additions and 8 deletions
|
@ -179,14 +179,14 @@ impl StringOp {
|
||||||
///
|
///
|
||||||
/// GNU coreutils shows messages for invalid regular expressions
|
/// GNU coreutils shows messages for invalid regular expressions
|
||||||
/// differently from the oniguruma library used by the regex crate.
|
/// differently from the oniguruma library used by the regex crate.
|
||||||
/// This method attempts to do these checks manually in one linear pass
|
/// This method attempts to do these checks manually in one pass
|
||||||
/// through the regular expression.
|
/// through the regular expression.
|
||||||
fn validate_regex(pattern: &str) -> ExprResult<()> {
|
fn validate_regex(pattern: &str) -> ExprResult<()> {
|
||||||
let mut escaped_parens: u64 = 0;
|
let mut escaped_parens: u64 = 0;
|
||||||
let mut escaped_braces: u64 = 0;
|
let mut escaped_braces: u64 = 0;
|
||||||
let mut escaped = false;
|
let mut escaped = false;
|
||||||
|
|
||||||
let mut comma_in_braces = false;
|
let mut repeating_pattern_text = String::with_capacity(13);
|
||||||
let mut invalid_content_error = false;
|
let mut invalid_content_error = false;
|
||||||
|
|
||||||
for c in pattern.chars() {
|
for c in pattern.chars() {
|
||||||
|
@ -203,24 +203,46 @@ fn validate_regex(pattern: &str) -> ExprResult<()> {
|
||||||
escaped_braces = escaped_braces
|
escaped_braces = escaped_braces
|
||||||
.checked_sub(1)
|
.checked_sub(1)
|
||||||
.ok_or(ExprError::UnmatchedClosingBrace)?;
|
.ok_or(ExprError::UnmatchedClosingBrace)?;
|
||||||
|
let mut repetition = repeating_pattern_text[..repeating_pattern_text.len() - 1]
|
||||||
if !comma_in_braces {
|
.splitn(2, |x| x == ',');
|
||||||
// Empty repeating patterns are not valid
|
match (repetition.next(), repetition.next()) {
|
||||||
return Err(ExprError::InvalidContent(r"\{\}".to_string()));
|
(None, None) => {
|
||||||
|
// Empty repeating pattern
|
||||||
|
invalid_content_error = true;
|
||||||
|
}
|
||||||
|
(Some(x), None) | (Some(x), Some("")) => {
|
||||||
|
if !x.parse::<i16>().is_ok() {
|
||||||
|
invalid_content_error = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(None, Some(x)) | (Some(""), Some(x)) => {
|
||||||
|
if !x.parse::<i16>().is_ok() {
|
||||||
|
invalid_content_error = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(Some(f), Some(l)) => {
|
||||||
|
if let (Ok(f), Ok(l)) = (f.parse::<i16>(), l.parse::<i16>()) {
|
||||||
|
invalid_content_error = invalid_content_error || f > l;
|
||||||
|
} else {
|
||||||
|
invalid_content_error = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
repeating_pattern_text.clear();
|
||||||
}
|
}
|
||||||
(true, '{') => {
|
(true, '{') => {
|
||||||
comma_in_braces = false;
|
|
||||||
escaped_braces += 1;
|
escaped_braces += 1;
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
|
if escaped_braces > 0 && repeating_pattern_text.len() <= 13 {
|
||||||
|
repeating_pattern_text.push(c);
|
||||||
|
}
|
||||||
if escaped_braces > 0 && !(c.is_ascii_digit() || c == '\\' || c == ',') {
|
if escaped_braces > 0 && !(c.is_ascii_digit() || c == '\\' || c == ',') {
|
||||||
invalid_content_error = true;
|
invalid_content_error = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
escaped = !escaped && c == '\\';
|
escaped = !escaped && c == '\\';
|
||||||
comma_in_braces = escaped_braces > 0 && (comma_in_braces || c == ',')
|
|
||||||
}
|
}
|
||||||
match (
|
match (
|
||||||
escaped_parens.is_zero(),
|
escaped_parens.is_zero(),
|
||||||
|
@ -697,11 +719,25 @@ mod test {
|
||||||
assert_eq!(result.eval_as_string(), "5");
|
assert_eq!(result.eval_as_string(), "5");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn only_match_in_beginning() {
|
||||||
|
let result = AstNode::parse(&["cowsay", ":", r"ow"])
|
||||||
|
.unwrap()
|
||||||
|
.eval()
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(result.eval_as_string(), "0");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn validate_regex_valid() {
|
fn validate_regex_valid() {
|
||||||
assert!(validate_regex(r"(a+b) \(a* b\)").is_ok());
|
assert!(validate_regex(r"(a+b) \(a* b\)").is_ok());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn validate_regex_simple_repeating_pattern() {
|
||||||
|
assert!(validate_regex(r"(a+b){4}").is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn validate_regex_missing_closing() {
|
fn validate_regex_missing_closing() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -735,4 +771,25 @@ mod test {
|
||||||
Err(InvalidContent(r"\{\}".to_string()))
|
Err(InvalidContent(r"\{\}".to_string()))
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn validate_regex_intervals_two_numbers() {
|
||||||
|
assert_eq!(
|
||||||
|
// out of order
|
||||||
|
validate_regex("ab\\{1,0\\}"),
|
||||||
|
Err(InvalidContent(r"\{\}".to_string()))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
validate_regex("ab\\{1,a\\}"),
|
||||||
|
Err(InvalidContent(r"\{\}".to_string()))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
validate_regex("ab\\{a,3\\}"),
|
||||||
|
Err(InvalidContent(r"\{\}".to_string()))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
validate_regex("ab\\{a,b\\}"),
|
||||||
|
Err(InvalidContent(r"\{\}".to_string()))
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue