From aae62072d8743d7c4edb02c8ae6547296c9f3d8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Mon, 26 May 2025 09:40:42 +0300 Subject: [PATCH] expr: Fix parsing range quantifiers in regex --- src/uu/expr/src/syntax_tree.rs | 54 ++++++++++++++++++++++++++++++---- tests/by-util/test_expr.rs | 5 ---- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index b0326f7b6..419526963 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -191,10 +191,17 @@ impl StringOp { '\\' if !curr_is_escaped && pattern_chars.peek().is_none() => { return Err(ExprError::TrailingBackslash); } + '{' if curr_is_escaped && is_valid_range_quantifier(&pattern_chars) => { + re_string.push(curr); + // Set the lower bound of range quantifier to 0 if it is missing + if pattern_chars.peek() == Some(&',') { + re_string.push('0'); + } + } _ => re_string.push(curr), } - prev_is_escaped = prev == '\\' && !prev_is_escaped; + prev_is_escaped = curr_is_escaped; prev = curr; } @@ -244,6 +251,46 @@ where } } +/// Check if regex pattern character iterator is at the start of a valid range quantifier. +/// The iterator's start position is expected to be after the opening brace. +/// Range quantifier ends to closing brace. +/// +/// # Examples of valid range quantifiers +/// +/// - `r"\{3\}"` +/// - `r"\{3,\}"` +/// - `r"\{,6\}"` +/// - `r"\{3,6\}"` +/// - `r"\{,\}"` +fn is_valid_range_quantifier(pattern_chars: &I) -> bool +where + I: Iterator + Clone, +{ + // Parse the string between braces + let mut quantifier = String::new(); + let mut pattern_chars_clone = pattern_chars.clone().peekable(); + let Some(mut prev) = pattern_chars_clone.next() else { + return false; + }; + let mut prev_is_escaped = false; + while let Some(curr) = pattern_chars_clone.next() { + if prev == '\\' && curr == '}' && !prev_is_escaped { + break; + } + if pattern_chars_clone.peek().is_none() { + return false; + } + + quantifier.push(prev); + prev_is_escaped = prev == '\\' && !prev_is_escaped; + prev = curr; + } + + // Check if parsed quantifier is valid + let re = Regex::new(r"(\d+|\d*,\d*)").expect("valid regular expression"); + re.is_match(&quantifier) +} + /// Check for errors in a supplied regular expression /// /// GNU coreutils shows messages for invalid regular expressions @@ -287,10 +334,7 @@ fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> { .expect("splitn always returns at least one string"), repetition.next(), ) { - ("", None) => { - // Empty repeating pattern - invalid_content_error = true; - } + ("", Some("")) => {} (x, None | Some("")) => { if x.parse::().is_err() { invalid_content_error = true; diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 2c0eafe32..b573ea098 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -875,7 +875,6 @@ mod gnu_expr { .stdout_only("\n"); } - #[ignore] #[test] fn test_bre17() { new_ucmd!() @@ -884,7 +883,6 @@ mod gnu_expr { .stdout_only("{1}a\n"); } - #[ignore] #[test] fn test_bre18() { new_ucmd!() @@ -893,7 +891,6 @@ mod gnu_expr { .stdout_only("1\n"); } - #[ignore] #[test] fn test_bre19() { new_ucmd!() @@ -1105,7 +1102,6 @@ mod gnu_expr { .stderr_contains("Invalid content of \\{\\}"); } - #[ignore] #[test] fn test_bre45() { new_ucmd!() @@ -1114,7 +1110,6 @@ mod gnu_expr { .stdout_only("1\n"); } - #[ignore] #[test] fn test_bre46() { new_ucmd!()