From 7789ef46a4d16e8d579c99fa22688c3e5a00f4ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 01:51:56 +0300 Subject: [PATCH 01/16] expr: handle `\{` literally at the start of an expression Normally, `\{` begins a range quantifier like `{n,m}`, but at the start of an expression, there is no preceding item to apply the quantifier to. --- src/uu/expr/src/syntax_tree.rs | 161 +++++++++++++++++---------------- 1 file changed, 84 insertions(+), 77 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index a966fca49..fa9d71a21 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -169,37 +169,46 @@ impl StringOp { // Handle the rest of the input pattern. let mut prev = first.unwrap_or_default(); let mut prev_is_escaped = false; + let mut is_start_of_expression = first == Some('\\'); while let Some(curr) = pattern_chars.next() { let curr_is_escaped = prev == '\\' && !prev_is_escaped; match curr { - '^' => match (prev, prev_is_escaped) { - // Start of a capturing group - ('(', true) - // Start of an alternative pattern - | ('|', true) - // Character class negation "[^a]" - | ('[', false) - // Explicitly escaped caret - | ('\\', false) => re_string.push(curr), - _ => re_string.push_str(r"\^"), - }, + // Character class negation "[^a]" + // Explicitly escaped caret "\^" + '^' if !is_start_of_expression && !matches!(prev, '[' | '\\') => { + re_string.push_str(r"\^"); + } '$' if !curr_is_escaped && !is_end_of_expression(&pattern_chars) => { re_string.push_str(r"\$"); } '\\' if !curr_is_escaped && pattern_chars.peek().is_none() => { return Err(ExprError::TrailingBackslash); } - '{' if curr_is_escaped && is_valid_range_quantifier(&pattern_chars) => { - re_string.push(curr); - // Set the lower bound of range quantifier to 0 if it is missing - if pattern_chars.peek() == Some(&',') { - re_string.push('0'); + '{' if curr_is_escaped => { + // Handle '{' literally at the start of an expression + if is_start_of_expression { + if re_string.ends_with('\\') { + let _ = re_string.pop(); + } + re_string.push(curr); + } else if is_valid_range_quantifier(&pattern_chars) { + re_string.push(curr); + // Set the lower bound of range quantifier to 0 if it is missing + if pattern_chars.peek() == Some(&',') { + re_string.push('0'); + } + } else { + return Err(ExprError::InvalidBracketContent); } } _ => re_string.push(curr), } + // Capturing group "\(abc\)" + // Alternative pattern "a\|b" + is_start_of_expression = curr_is_escaped && matches!(curr, '(' | '|') + || curr == '\\' && prev_is_escaped && matches!(prev, '(' | '|'); prev_is_escaped = curr_is_escaped; prev = curr; } @@ -209,7 +218,14 @@ impl StringOp { RegexOptions::REGEX_OPTION_SINGLELINE, Syntax::grep(), ) - .map_err(|_| ExprError::InvalidRegexExpression)?; + .map_err(|error| match error.code() { + // "invalid repeat range {lower,upper}" + -123 => ExprError::InvalidBracketContent, + // "too big number for repeat range" + -201 => ExprError::InvalidBracketContent, + _ => ExprError::InvalidRegexExpression, + })?; + Ok(if re.captures_len() > 0 { re.captures(&left) .and_then(|captures| captures.at(1)) @@ -286,8 +302,28 @@ where } // Check if parsed quantifier is valid - let re = Regex::new(r"(\d+|\d*,\d*)").expect("valid regular expression"); - re.is_match(&quantifier) + let re = Regex::new(r"(\d*,\d*|\d+)").expect("valid regular expression"); + match re.captures(&quantifier) { + None => false, + Some(captures) => { + let matched = captures.at(0).unwrap_or_default(); + let mut repetition = matched.splitn(2, ','); + match ( + repetition + .next() + .expect("splitn always returns at least one string"), + repetition.next(), + ) { + ("", Some("")) => true, + (x, None | Some("")) => x.parse::().map_or(true, |x| x <= i16::MAX as i32), + ("", Some(x)) => x.parse::().map_or(true, |x| x <= i16::MAX as i32), + (f, Some(l)) => match (f.parse::(), l.parse::()) { + (Ok(f), Ok(l)) => f <= l && f <= i16::MAX as i32 && l <= i16::MAX as i32, + _ => false, + }, + } + } + } } /// Check for errors in a supplied regular expression @@ -306,77 +342,48 @@ where fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> { let mut escaped_parens: u64 = 0; let mut escaped_braces: u64 = 0; - let mut escaped = false; + let mut prev = '\0'; + let mut prev_is_escaped = false; + let mut is_brace_ignored = false; + let mut is_start_of_expression = true; - let mut repeating_pattern_text = String::new(); - let mut invalid_content_error = false; + for curr in pattern.chars() { + let curr_is_escaped = prev == '\\' && !prev_is_escaped; - for c in pattern.chars() { - match (escaped, c) { + match (curr_is_escaped, curr) { + (true, '(') => escaped_parens += 1, (true, ')') => { escaped_parens = escaped_parens .checked_sub(1) .ok_or(ExprError::UnmatchedClosingParenthesis)?; } - (true, '(') => { - escaped_parens += 1; + (true, '{') => { + is_brace_ignored = is_start_of_expression; + if !is_brace_ignored { + escaped_braces += 1; + } } (true, '}') => { - escaped_braces = escaped_braces - .checked_sub(1) - .ok_or(ExprError::UnmatchedClosingBrace)?; - let mut repetition = - repeating_pattern_text[..repeating_pattern_text.len() - 1].splitn(2, ','); - match ( - repetition - .next() - .expect("splitn always returns at least one string"), - repetition.next(), - ) { - ("", Some("")) => {} - (x, None | Some("")) => { - if x.parse::().is_err() { - invalid_content_error = true; - } - } - ("", Some(x)) => { - if x.parse::().is_err() { - invalid_content_error = true; - } - } - (f, Some(l)) => { - if let (Ok(f), Ok(l)) = (f.parse::(), l.parse::()) { - invalid_content_error = invalid_content_error || f > l; - } else { - invalid_content_error = true; - } - } - } - repeating_pattern_text.clear(); - } - (true, '{') => { - escaped_braces += 1; - } - _ => { - if escaped_braces > 0 && repeating_pattern_text.len() <= 13 { - repeating_pattern_text.push(c); - } - if escaped_braces > 0 && !(c.is_ascii_digit() || c == '\\' || c == ',') { - invalid_content_error = true; + if !is_brace_ignored { + escaped_braces = escaped_braces + .saturating_sub(1) + .ok_or(ExprError::UnmatchedClosingBrace)?; } } + _ => {} } - escaped = !escaped && c == '\\'; + + is_start_of_expression = prev == '\0' + || curr_is_escaped && matches!(curr, '(' | '|') + || curr == '\\' && prev_is_escaped && matches!(prev, '(' | '|'); + prev_is_escaped = curr_is_escaped; + prev = curr; } - match ( - escaped_parens.is_zero(), - escaped_braces.is_zero(), - invalid_content_error, - ) { - (true, true, false) => Ok(()), - (_, false, _) => Err(ExprError::UnmatchedOpeningBrace), - (false, _, _) => Err(ExprError::UnmatchedOpeningParenthesis), - (true, true, true) => Err(ExprError::InvalidBracketContent), + + match (escaped_parens.is_zero(), escaped_braces.is_zero()) { + (true, true) => Ok(()), + (_, false) => Err(ExprError::UnmatchedOpeningBrace), + (false, _) => Err(ExprError::UnmatchedOpeningParenthesis), } } From bbc912eb75bd0fb724e447ca66ed0f315cbdde7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 04:00:09 +0300 Subject: [PATCH 02/16] expr: Add tests for regex range quantifiers --- tests/by-util/test_expr.rs | 68 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index b573ea098..38028a54b 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -406,6 +406,74 @@ fn test_regex_dollar() { .stdout_only("0\n"); } +#[test] +fn test_regex_range_quantifier() { + new_ucmd!() + .args(&["a", ":", "a\\{1\\}"]) + .succeeds() + .stdout_only("1\n"); + new_ucmd!() + .args(&["aaaaaaaaaa", ":", "a\\{1,\\}"]) + .succeeds() + .stdout_only("10\n"); + new_ucmd!() + .args(&["aaa", ":", "a\\{,3\\}"]) + .succeeds() + .stdout_only("3\n"); + new_ucmd!() + .args(&["aa", ":", "a\\{1,3\\}"]) + .succeeds() + .stdout_only("2\n"); + new_ucmd!() + .args(&["aaaa", ":", "a\\{,\\}"]) + .succeeds() + .stdout_only("4\n"); + new_ucmd!() + .args(&["a", ":", "ab\\{,3\\}"]) + .succeeds() + .stdout_only("1\n"); + new_ucmd!() + .args(&["abbb", ":", "ab\\{,3\\}"]) + .succeeds() + .stdout_only("4\n"); + new_ucmd!() + .args(&["abcabc", ":", "\\(abc\\)\\{,\\}"]) + .succeeds() + .stdout_only("abc\n"); + new_ucmd!() + .args(&["a", ":", "a\\{,6\\}"]) + .succeeds() + .stdout_only("1\n"); + new_ucmd!() + .args(&["{abc}", ":", "\\{abc\\}"]) + .succeeds() + .stdout_only("5\n"); + new_ucmd!() + .args(&["a{bc}", ":", "a\\(\\{bc\\}\\)"]) + .succeeds() + .stdout_only("{bc}\n"); + new_ucmd!() + .args(&["{b}", ":", "a\\|\\{b\\}"]) + .succeeds() + .stdout_only("3\n"); + new_ucmd!() + .args(&["{", ":", "a\\|\\{"]) + .succeeds() + .stdout_only("1\n"); + new_ucmd!() + .args(&["{}}}", ":", "\\{\\}\\}\\}"]) + .succeeds() + .stdout_only("4\n"); + new_ucmd!() + .args(&["a{}}}", ":", "a\\{\\}\\}\\}"]) + .fails() + .stderr_only("expr: Invalid content of \\{\\}\n"); + new_ucmd!() + .args(&["ab", ":", "ab\\{\\}"]) + .fails() + .stderr_only("expr: Invalid content of \\{\\}\n"); +} + #[test] fn test_substr() { new_ucmd!() From 874a9304cf22255420415d0b08b2786328b173d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 04:02:01 +0300 Subject: [PATCH 03/16] expr: Remove nonexistent error `UnmatchedClosingBrace` The closing brace without related opening brace is handled literally --- src/uu/expr/src/expr.rs | 2 -- src/uu/expr/src/syntax_tree.rs | 10 +--------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/uu/expr/src/expr.rs b/src/uu/expr/src/expr.rs index fa165f9f3..70f4e62d2 100644 --- a/src/uu/expr/src/expr.rs +++ b/src/uu/expr/src/expr.rs @@ -46,8 +46,6 @@ pub enum ExprError { UnmatchedClosingParenthesis, #[error("Unmatched \\{{")] UnmatchedOpeningBrace, - #[error("Unmatched ) or \\}}")] - UnmatchedClosingBrace, #[error("Invalid content of \\{{\\}}")] InvalidBracketContent, #[error("Trailing backslash")] diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index fa9d71a21..e315535f6 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -365,9 +365,7 @@ fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> { } (true, '}') => { if !is_brace_ignored { - escaped_braces = escaped_braces - .saturating_sub(1) - .ok_or(ExprError::UnmatchedClosingBrace)?; + escaped_braces = escaped_braces.saturating_sub(1); } } _ => {} @@ -1007,12 +1005,6 @@ mod test { Err(ExprError::UnmatchedClosingParenthesis) ); - assert_eq!( - check_posix_regex_errors(r"abc\}"), - Err(ExprError::UnmatchedClosingBrace) - ); - } - #[test] fn check_regex_empty_repeating_pattern() { assert_eq!( From 30654824405d682cd177216ef651fb19d7322420 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 04:19:57 +0300 Subject: [PATCH 04/16] expr: Anchor regex for detecting range quantifier --- src/uu/expr/src/syntax_tree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index e315535f6..83240e7f8 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -302,7 +302,7 @@ where } // Check if parsed quantifier is valid - let re = Regex::new(r"(\d*,\d*|\d+)").expect("valid regular expression"); + let re = Regex::new(r"^(\d*,\d*|\d+)").expect("valid regular expression"); match re.captures(&quantifier) { None => false, Some(captures) => { From 6b49b26af721c9e08cfb8f9d81b5a54fb0707f78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 04:25:25 +0300 Subject: [PATCH 05/16] expr: Remove redundant tests that should not work anymore --- src/uu/expr/src/syntax_tree.rs | 37 ---------------------------------- 1 file changed, 37 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 83240e7f8..e53c0d1a7 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -797,7 +797,6 @@ pub fn is_truthy(s: &NumOrStr) -> bool { #[cfg(test)] mod test { use crate::ExprError; - use crate::ExprError::InvalidBracketContent; use crate::syntax_tree::is_valid_range_quantifier; use super::{ @@ -1004,42 +1003,6 @@ mod test { check_posix_regex_errors(r"abc\)"), Err(ExprError::UnmatchedClosingParenthesis) ); - - #[test] - fn check_regex_empty_repeating_pattern() { - assert_eq!( - check_posix_regex_errors("ab\\{\\}"), - Err(InvalidBracketContent) - ); - } - - #[test] - fn check_regex_intervals_two_numbers() { - assert_eq!( - // out of order - check_posix_regex_errors("ab\\{1,0\\}"), - Err(InvalidBracketContent) - ); - assert_eq!( - check_posix_regex_errors("ab\\{1,a\\}"), - Err(InvalidBracketContent) - ); - assert_eq!( - check_posix_regex_errors("ab\\{a,3\\}"), - Err(InvalidBracketContent) - ); - assert_eq!( - check_posix_regex_errors("ab\\{a,b\\}"), - Err(InvalidBracketContent) - ); - assert_eq!( - check_posix_regex_errors("ab\\{a,\\}"), - Err(InvalidBracketContent) - ); - assert_eq!( - check_posix_regex_errors("ab\\{,b\\}"), - Err(InvalidBracketContent) - ); } #[test] From 639310c697fe78b988106ab7af5f331958321228 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 04:25:54 +0300 Subject: [PATCH 06/16] expr: Fix testing `UnmatchedOpeningBrace` --- src/uu/expr/src/syntax_tree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index e53c0d1a7..4f5a4c241 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -992,7 +992,7 @@ mod test { ); assert_eq!( - check_posix_regex_errors(r"\{1,2"), + check_posix_regex_errors(r"a\{1,2"), Err(ExprError::UnmatchedOpeningBrace) ); } From b1a91351bc50e5c7b754afbfa8216c1ab1ebdea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 04:34:38 +0300 Subject: [PATCH 07/16] expr: Ignore test cases from spell checker --- tests/by-util/test_expr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 38028a54b..458ba9e84 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -3,8 +3,8 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // spell-checker:ignore αbcdef ; (people) kkos -// spell-checker:ignore aabcccd aabcd aabd abbbd abbcabc abbcac abbcbbbd abbcbd -// spell-checker:ignore abbccd abcac acabc andand bigcmp bignum emptysub +// spell-checker:ignore aabcccd aabcd aabd abbb abbbd abbcabc abbcac abbcbbbd abbcbd +// spell-checker:ignore abbccd abcabc abcac acabc andand bigcmp bignum emptysub // spell-checker:ignore orempty oror use uutests::new_ucmd; From 6aeae43f3ccfa8111cb2fa63e4145207db2a5e1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 13:08:22 +0300 Subject: [PATCH 08/16] expr: Simplify verifying indexes within regex range quantifier --- src/uu/expr/src/syntax_tree.rs | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 4f5a4c241..c1abfb9a1 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -302,27 +302,19 @@ where } // Check if parsed quantifier is valid + let is_valid_range_index = |s: &str| s.parse::().map_or(true, |x| x <= i16::MAX as i32); let re = Regex::new(r"^(\d*,\d*|\d+)").expect("valid regular expression"); - match re.captures(&quantifier) { - None => false, - Some(captures) => { - let matched = captures.at(0).unwrap_or_default(); - let mut repetition = matched.splitn(2, ','); - match ( - repetition - .next() - .expect("splitn always returns at least one string"), - repetition.next(), - ) { - ("", Some("")) => true, - (x, None | Some("")) => x.parse::().map_or(true, |x| x <= i16::MAX as i32), - ("", Some(x)) => x.parse::().map_or(true, |x| x <= i16::MAX as i32), - (f, Some(l)) => match (f.parse::(), l.parse::()) { - (Ok(f), Ok(l)) => f <= l && f <= i16::MAX as i32 && l <= i16::MAX as i32, - _ => false, - }, - } + if let Some(captures) = re.captures(&quantifier) { + let matched = captures.at(0).unwrap_or_default(); + match matched.split_once(',') { + None => is_valid_range_index(matched), + Some(("", "")) => true, + Some((x, "")) => is_valid_range_index(x), + Some(("", x)) => is_valid_range_index(x), + Some((f, l)) => f <= l && is_valid_range_index(f) && is_valid_range_index(l), } + } else { + false } } From 07caa4867bedbb3fa91fef3335e16ae11b60a53d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 14:02:24 +0300 Subject: [PATCH 09/16] expr: Fix error message for too big range quantifier index --- src/uu/expr/src/expr.rs | 2 + src/uu/expr/src/syntax_tree.rs | 104 ++++++++++++++++++++++++--------- tests/by-util/test_expr.rs | 2 +- 3 files changed, 78 insertions(+), 30 deletions(-) diff --git a/src/uu/expr/src/expr.rs b/src/uu/expr/src/expr.rs index 70f4e62d2..6a4f9487b 100644 --- a/src/uu/expr/src/expr.rs +++ b/src/uu/expr/src/expr.rs @@ -50,6 +50,8 @@ pub enum ExprError { InvalidBracketContent, #[error("Trailing backslash")] TrailingBackslash, + #[error("Regular expression too big")] + TooBigRangeQuantifierIndex, } impl UError for ExprError { diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index c1abfb9a1..64386f920 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -192,14 +192,15 @@ impl StringOp { let _ = re_string.pop(); } re_string.push(curr); - } else if is_valid_range_quantifier(&pattern_chars) { + } else { + // Check if the following section is a valid range quantifier + verify_range_quantifier(&pattern_chars)?; + re_string.push(curr); // Set the lower bound of range quantifier to 0 if it is missing if pattern_chars.peek() == Some(&',') { re_string.push('0'); } - } else { - return Err(ExprError::InvalidBracketContent); } } _ => re_string.push(curr), @@ -222,7 +223,7 @@ impl StringOp { // "invalid repeat range {lower,upper}" -123 => ExprError::InvalidBracketContent, // "too big number for repeat range" - -201 => ExprError::InvalidBracketContent, + -201 => ExprError::TooBigRangeQuantifierIndex, _ => ExprError::InvalidRegexExpression, })?; @@ -277,7 +278,7 @@ where /// - `r"\{,6\}"` /// - `r"\{3,6\}"` /// - `r"\{,\}"` -fn is_valid_range_quantifier(pattern_chars: &I) -> bool +fn verify_range_quantifier(pattern_chars: &I) -> Result<(), ExprError> where I: Iterator + Clone, { @@ -285,15 +286,19 @@ where let mut quantifier = String::new(); let mut pattern_chars_clone = pattern_chars.clone().peekable(); let Some(mut prev) = pattern_chars_clone.next() else { - return false; + return Err(ExprError::UnmatchedOpeningBrace); }; + if pattern_chars_clone.peek().is_none() { + return Err(ExprError::UnmatchedOpeningBrace); + } + let mut prev_is_escaped = false; while let Some(curr) = pattern_chars_clone.next() { if prev == '\\' && curr == '}' && !prev_is_escaped { break; } if pattern_chars_clone.peek().is_none() { - return false; + return Err(ExprError::UnmatchedOpeningBrace); } quantifier.push(prev); @@ -302,19 +307,32 @@ where } // Check if parsed quantifier is valid - let is_valid_range_index = |s: &str| s.parse::().map_or(true, |x| x <= i16::MAX as i32); let re = Regex::new(r"^(\d*,\d*|\d+)").expect("valid regular expression"); if let Some(captures) = re.captures(&quantifier) { let matched = captures.at(0).unwrap_or_default(); match matched.split_once(',') { - None => is_valid_range_index(matched), - Some(("", "")) => true, - Some((x, "")) => is_valid_range_index(x), - Some(("", x)) => is_valid_range_index(x), - Some((f, l)) => f <= l && is_valid_range_index(f) && is_valid_range_index(l), + Some(("", "")) => Ok(()), + Some((x, "")) | Some(("", x)) => match x.parse::() { + Ok(x) if x <= i16::MAX.into() => Ok(()), + Ok(_) => Err(ExprError::TooBigRangeQuantifierIndex), + Err(_) => Err(ExprError::InvalidBracketContent), + }, + Some((f, l)) => match (f.parse::(), l.parse::()) { + (Ok(f), Ok(l)) if f > l => Err(ExprError::InvalidBracketContent), + (Ok(f), Ok(l)) if f > i16::MAX.into() || l > i16::MAX.into() => { + Err(ExprError::TooBigRangeQuantifierIndex) + } + (Ok(_), Ok(_)) => Ok(()), + _ => Err(ExprError::InvalidBracketContent), + }, + None => match matched.parse::() { + Ok(x) if x <= i16::MAX.into() => Ok(()), + Ok(_) => Err(ExprError::TooBigRangeQuantifierIndex), + Err(_) => Err(ExprError::InvalidBracketContent), + }, } } else { - false + Err(ExprError::InvalidBracketContent) } } @@ -789,7 +807,7 @@ pub fn is_truthy(s: &NumOrStr) -> bool { #[cfg(test)] mod test { use crate::ExprError; - use crate::syntax_tree::is_valid_range_quantifier; + use crate::syntax_tree::verify_range_quantifier; use super::{ AstNode, AstNodeInner, BinOp, NumericOp, RelationOp, StringOp, check_posix_regex_errors, @@ -999,19 +1017,47 @@ mod test { #[test] fn test_is_valid_range_quantifier() { - assert!(is_valid_range_quantifier(&"3\\}".chars())); - assert!(is_valid_range_quantifier(&"3,\\}".chars())); - assert!(is_valid_range_quantifier(&",6\\}".chars())); - assert!(is_valid_range_quantifier(&"3,6\\}".chars())); - assert!(is_valid_range_quantifier(&",\\}".chars())); - assert!(is_valid_range_quantifier(&"3,6\\}anything".chars())); - assert!(!is_valid_range_quantifier(&"\\{3,6\\}".chars())); - assert!(!is_valid_range_quantifier(&"\\}".chars())); - assert!(!is_valid_range_quantifier(&"".chars())); - assert!(!is_valid_range_quantifier(&"3".chars())); - assert!(!is_valid_range_quantifier(&"3,".chars())); - assert!(!is_valid_range_quantifier(&",6".chars())); - assert!(!is_valid_range_quantifier(&"3,6".chars())); - assert!(!is_valid_range_quantifier(&",".chars())); + assert!(verify_range_quantifier(&"3\\}".chars()).is_ok()); + assert!(verify_range_quantifier(&"3,\\}".chars()).is_ok()); + assert!(verify_range_quantifier(&",6\\}".chars()).is_ok()); + assert!(verify_range_quantifier(&"3,6\\}".chars()).is_ok()); + assert!(verify_range_quantifier(&",\\}".chars()).is_ok()); + assert!(verify_range_quantifier(&"32767\\}anything".chars()).is_ok()); + assert_eq!( + verify_range_quantifier(&"\\{3,6\\}".chars()), + Err(ExprError::InvalidBracketContent) + ); + assert_eq!( + verify_range_quantifier(&"\\}".chars()), + Err(ExprError::InvalidBracketContent) + ); + assert_eq!( + verify_range_quantifier(&"".chars()), + Err(ExprError::UnmatchedOpeningBrace) + ); + assert_eq!( + verify_range_quantifier(&"3".chars()), + Err(ExprError::UnmatchedOpeningBrace) + ); + assert_eq!( + verify_range_quantifier(&"3,".chars()), + Err(ExprError::UnmatchedOpeningBrace) + ); + assert_eq!( + verify_range_quantifier(&",6".chars()), + Err(ExprError::UnmatchedOpeningBrace) + ); + assert_eq!( + verify_range_quantifier(&"3,6".chars()), + Err(ExprError::UnmatchedOpeningBrace) + ); + assert_eq!( + verify_range_quantifier(&",".chars()), + Err(ExprError::UnmatchedOpeningBrace) + ); + assert_eq!( + verify_range_quantifier(&"32768\\}".chars()), + Err(ExprError::TooBigRangeQuantifierIndex) + ); } } diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 458ba9e84..b8952bdb8 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -1210,7 +1210,7 @@ mod gnu_expr { .args(&["_", ":", "a\\{32768\\}"]) .fails_with_code(2) .no_stdout() - .stderr_contains("Invalid content of \\{\\}"); + .stderr_contains("Regular expression too big\n"); } #[test] From ce0c2320ea2f4b400a15c663329942beea1ba1c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 14:35:13 +0300 Subject: [PATCH 10/16] expr: Remove redundant checks for `UnmatchedOpeningBrace` It is handled in `verify_range_quantifier` function. --- src/uu/expr/src/syntax_tree.rs | 38 ++++++---------------------------- 1 file changed, 6 insertions(+), 32 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 64386f920..23a79fb98 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -8,7 +8,7 @@ use std::{cell::Cell, collections::BTreeMap}; use num_bigint::{BigInt, ParseBigIntError}; -use num_traits::{ToPrimitive, Zero}; +use num_traits::ToPrimitive; use onig::{Regex, RegexOptions, Syntax}; use crate::{ExprError, ExprResult}; @@ -351,15 +351,11 @@ where /// has specific error messages. fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> { let mut escaped_parens: u64 = 0; - let mut escaped_braces: u64 = 0; let mut prev = '\0'; - let mut prev_is_escaped = false; - let mut is_brace_ignored = false; - let mut is_start_of_expression = true; + let mut curr_is_escaped = false; for curr in pattern.chars() { - let curr_is_escaped = prev == '\\' && !prev_is_escaped; - + curr_is_escaped = prev == '\\' && !curr_is_escaped; match (curr_is_escaped, curr) { (true, '(') => escaped_parens += 1, (true, ')') => { @@ -367,31 +363,14 @@ fn check_posix_regex_errors(pattern: &str) -> ExprResult<()> { .checked_sub(1) .ok_or(ExprError::UnmatchedClosingParenthesis)?; } - (true, '{') => { - is_brace_ignored = is_start_of_expression; - if !is_brace_ignored { - escaped_braces += 1; - } - } - (true, '}') => { - if !is_brace_ignored { - escaped_braces = escaped_braces.saturating_sub(1); - } - } _ => {} } - - is_start_of_expression = prev == '\0' - || curr_is_escaped && matches!(curr, '(' | '|') - || curr == '\\' && prev_is_escaped && matches!(prev, '(' | '|'); - prev_is_escaped = curr_is_escaped; prev = curr; } - match (escaped_parens.is_zero(), escaped_braces.is_zero()) { - (true, true) => Ok(()), - (_, false) => Err(ExprError::UnmatchedOpeningBrace), - (false, _) => Err(ExprError::UnmatchedOpeningParenthesis), + match escaped_parens { + 0 => Ok(()), + _ => Err(ExprError::UnmatchedOpeningParenthesis), } } @@ -1000,11 +979,6 @@ mod test { check_posix_regex_errors(r"\(abc"), Err(ExprError::UnmatchedOpeningParenthesis) ); - - assert_eq!( - check_posix_regex_errors(r"a\{1,2"), - Err(ExprError::UnmatchedOpeningBrace) - ); } #[test] From 210f4f7154a7e2e40646ea5fde8ff0fb61bf213c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 14:53:05 +0300 Subject: [PATCH 11/16] expr: Simplify parsing a range quantifier --- src/uu/expr/src/syntax_tree.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 23a79fb98..1d396a2cc 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -282,27 +282,25 @@ fn verify_range_quantifier(pattern_chars: &I) -> Result<(), ExprError> where I: Iterator + Clone, { - // Parse the string between braces - let mut quantifier = String::new(); let mut pattern_chars_clone = pattern_chars.clone().peekable(); - let Some(mut prev) = pattern_chars_clone.next() else { - return Err(ExprError::UnmatchedOpeningBrace); - }; if pattern_chars_clone.peek().is_none() { return Err(ExprError::UnmatchedOpeningBrace); } - let mut prev_is_escaped = false; + // Parse the string between braces + let mut quantifier = String::new(); + let mut prev = '\0'; + let mut curr_is_escaped = false; while let Some(curr) = pattern_chars_clone.next() { - if prev == '\\' && curr == '}' && !prev_is_escaped { + curr_is_escaped = prev == '\\' && !curr_is_escaped; + if curr_is_escaped && curr == '}' { break; } if pattern_chars_clone.peek().is_none() { return Err(ExprError::UnmatchedOpeningBrace); } - quantifier.push(prev); - prev_is_escaped = prev == '\\' && !prev_is_escaped; + quantifier.push(curr); prev = curr; } From ca6a10ea9a10ec1522cf567b437c8616221df7ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 15:26:41 +0300 Subject: [PATCH 12/16] expr: Only '^' requires special treatment at the start of regex --- src/uu/expr/src/syntax_tree.rs | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 1d396a2cc..e660b096d 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -151,27 +151,21 @@ impl StringOp { let right = right?.eval_as_string(); check_posix_regex_errors(&right)?; - // All patterns are anchored so they begin with a caret (^) + // Transpile the input pattern from BRE syntax to `onig` crate's `Syntax::grep` let mut re_string = String::with_capacity(right.len() + 1); - re_string.push('^'); - - // Handle first character from the input pattern let mut pattern_chars = right.chars().peekable(); - let first = pattern_chars.next(); - match first { - Some('^') => {} // Start of string anchor is already added - Some('$') if !is_end_of_expression(&pattern_chars) => re_string.push_str(r"\$"), - Some('\\') if right.len() == 1 => return Err(ExprError::TrailingBackslash), - Some(char) => re_string.push(char), - None => return Ok(0.into()), - }; - - // Handle the rest of the input pattern. - let mut prev = first.unwrap_or_default(); + let mut prev = '\0'; let mut prev_is_escaped = false; - let mut is_start_of_expression = first == Some('\\'); + let mut is_start_of_expression = true; + + // All patterns are anchored so they begin with a caret (^) + if pattern_chars.peek() != Some(&'^') { + re_string.push('^'); + } + while let Some(curr) = pattern_chars.next() { let curr_is_escaped = prev == '\\' && !prev_is_escaped; + let is_first_character = prev == '\0'; match curr { // Character class negation "[^a]" @@ -208,8 +202,10 @@ impl StringOp { // Capturing group "\(abc\)" // Alternative pattern "a\|b" - is_start_of_expression = curr_is_escaped && matches!(curr, '(' | '|') + is_start_of_expression = curr == '\\' && is_first_character + || curr_is_escaped && matches!(curr, '(' | '|') || curr == '\\' && prev_is_escaped && matches!(prev, '(' | '|'); + prev_is_escaped = curr_is_escaped; prev = curr; } From 2b565612eeb5b3cc9699299cfa010124337b124b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 16:43:43 +0300 Subject: [PATCH 13/16] expr: Fix parsing regex range quantifier --- src/uu/expr/src/syntax_tree.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index e660b096d..3570dd6a4 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -295,8 +295,9 @@ where if pattern_chars_clone.peek().is_none() { return Err(ExprError::UnmatchedOpeningBrace); } - - quantifier.push(curr); + if prev != '\0' { + quantifier.push(prev); + } prev = curr; } From 74ad163da98644cc3218dc1ab580cdbf8efdc393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 16:44:27 +0300 Subject: [PATCH 14/16] expr: Fix regex for validating range quantifier --- src/uu/expr/src/syntax_tree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 3570dd6a4..9f817d395 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -302,7 +302,7 @@ where } // Check if parsed quantifier is valid - let re = Regex::new(r"^(\d*,\d*|\d+)").expect("valid regular expression"); + let re = Regex::new(r"^(\d*,\d*|\d+)$").expect("valid regular expression"); if let Some(captures) = re.captures(&quantifier) { let matched = captures.at(0).unwrap_or_default(); match matched.split_once(',') { From 4946922c0f151b5c5ad10d89fdf1a00b4be53c0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 16:45:54 +0300 Subject: [PATCH 15/16] expr: Fix error message for large numbers as range index --- src/uu/expr/src/syntax_tree.rs | 21 ++++++--------------- tests/by-util/test_expr.rs | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 9f817d395..9a8fb05f2 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -307,24 +307,15 @@ where let matched = captures.at(0).unwrap_or_default(); match matched.split_once(',') { Some(("", "")) => Ok(()), - Some((x, "")) | Some(("", x)) => match x.parse::() { - Ok(x) if x <= i16::MAX.into() => Ok(()), - Ok(_) => Err(ExprError::TooBigRangeQuantifierIndex), - Err(_) => Err(ExprError::InvalidBracketContent), - }, - Some((f, l)) => match (f.parse::(), l.parse::()) { + Some((x, "") | ("", x)) if x.parse::().is_ok() => Ok(()), + Some((_, "") | ("", _)) => Err(ExprError::TooBigRangeQuantifierIndex), + Some((f, l)) => match (f.parse::(), l.parse::()) { (Ok(f), Ok(l)) if f > l => Err(ExprError::InvalidBracketContent), - (Ok(f), Ok(l)) if f > i16::MAX.into() || l > i16::MAX.into() => { - Err(ExprError::TooBigRangeQuantifierIndex) - } (Ok(_), Ok(_)) => Ok(()), - _ => Err(ExprError::InvalidBracketContent), - }, - None => match matched.parse::() { - Ok(x) if x <= i16::MAX.into() => Ok(()), - Ok(_) => Err(ExprError::TooBigRangeQuantifierIndex), - Err(_) => Err(ExprError::InvalidBracketContent), + _ => Err(ExprError::TooBigRangeQuantifierIndex), }, + None if matched.parse::().is_ok() => Ok(()), + None => Err(ExprError::TooBigRangeQuantifierIndex), } } else { Err(ExprError::InvalidBracketContent) diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index b8952bdb8..0a16d4ea9 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -472,6 +472,26 @@ fn test_regex_range_quantifier() { .args(&["ab", ":", "ab\\{\\}"]) .fails() .stderr_only("expr: Invalid content of \\{\\}\n"); + new_ucmd!() + .args(&["_", ":", "a\\{12345678901234567890\\}"]) + .fails() + .stderr_only("expr: Regular expression too big\n"); + new_ucmd!() + .args(&["_", ":", "a\\{12345678901234567890,\\}"]) + .fails() + .stderr_only("expr: Regular expression too big\n"); + new_ucmd!() + .args(&["_", ":", "a\\{,12345678901234567890\\}"]) + .fails() + .stderr_only("expr: Regular expression too big\n"); + new_ucmd!() + .args(&["_", ":", "a\\{1,12345678901234567890\\}"]) + .fails() + .stderr_only("expr: Regular expression too big\n"); + new_ucmd!() + .args(&["_", ":", "a\\{1,1234567890abcdef\\}"]) + .fails() + .stderr_only("expr: Invalid content of \\{\\}\n"); } #[test] From eb7bc2f251f724b3a5e0878a1f0f986a1c527406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Tue, 27 May 2025 17:50:44 +0300 Subject: [PATCH 16/16] expr: Allow only ASCII digits for regex range quantifiers --- src/uu/expr/src/syntax_tree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 9a8fb05f2..0950020c9 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -302,7 +302,7 @@ where } // Check if parsed quantifier is valid - let re = Regex::new(r"^(\d*,\d*|\d+)$").expect("valid regular expression"); + let re = Regex::new(r"^([0-9]*,[0-9]*|[0-9]+)$").expect("valid regular expression"); if let Some(captures) = re.captures(&quantifier) { let matched = captures.at(0).unwrap_or_default(); match matched.split_once(',') {