1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

expr: Escape anchor characters within pattern (#7842)

* expr: Escape anchor characters within the core pattern

The anchor characters `^` and `$` are not considered special characters by `expr`
unless they are used as expected on the start or end of the pattern.
This commit is contained in:
Teemu Pätsi 2025-04-28 00:52:35 +03:00 committed by GitHub
parent ddf48facbe
commit 07501be4ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 56 additions and 9 deletions

View file

@ -150,8 +150,32 @@ impl StringOp {
let left = left?.eval_as_string(); let left = left?.eval_as_string();
let right = right?.eval_as_string(); let right = right?.eval_as_string();
check_posix_regex_errors(&right)?; check_posix_regex_errors(&right)?;
let prefix = if right.starts_with('*') { r"^\" } else { "^" };
let re_string = format!("{prefix}{right}"); // All patterns are anchored so they begin with a caret (^)
let mut re_string = String::with_capacity(right.len() + 1);
re_string.push('^');
// Handle first character from the input pattern
let mut pattern_chars = right.chars();
let first = pattern_chars.next();
match first {
Some('^') => {} // Start of string anchor is already added
Some('*') => re_string.push_str(r"\*"),
Some(char) => re_string.push(char),
None => return Ok(0.into()),
};
// Handle the rest of the input pattern.
// Escape characters that should be handled literally within the pattern.
let mut prev = first.unwrap_or_default();
for curr in pattern_chars {
match curr {
'^' if prev != '\\' => re_string.push_str(r"\^"),
char => re_string.push(char),
}
prev = curr;
}
let re = Regex::with_options( let re = Regex::with_options(
&re_string, &re_string,
RegexOptions::REGEX_OPTION_NONE, RegexOptions::REGEX_OPTION_NONE,

View file

@ -274,11 +274,10 @@ fn test_length_mb() {
#[test] #[test]
fn test_regex() { fn test_regex() {
// FixME: [2022-12-19; rivy] test disabled as it currently fails due to 'oniguruma' bug (see GH:kkos/oniguruma/issues/279) new_ucmd!()
// new_ucmd!() .args(&["a^b", ":", "a^b"])
// .args(&["a^b", ":", "a^b"]) .succeeds()
// .succeeds() .stdout_only("3\n");
// .stdout_only("3\n");
new_ucmd!() new_ucmd!()
.args(&["a^b", ":", "a\\^b"]) .args(&["a^b", ":", "a\\^b"])
.succeeds() .succeeds()
@ -288,13 +287,38 @@ fn test_regex() {
.succeeds() .succeeds()
.stdout_only("3\n"); .stdout_only("3\n");
new_ucmd!() new_ucmd!()
.args(&["-5", ":", "-\\{0,1\\}[0-9]*$"]) .args(&["abc", ":", "^abc"])
.succeeds()
.stdout_only("3\n");
new_ucmd!()
.args(&["^abc", ":", "^^abc"])
.succeeds()
.stdout_only("4\n");
new_ucmd!()
.args(&["b^$ic", ":", "b^\\$ic"])
.succeeds()
.stdout_only("5\n");
new_ucmd!()
.args(&["^^^^^^^^^", ":", "^^^"])
.succeeds() .succeeds()
.stdout_only("2\n"); .stdout_only("2\n");
new_ucmd!()
.args(&["-5", ":", "-\\{0,1\\}[0-9]*$"])
.succeeds()
.stdout_only("2\n");
new_ucmd!().args(&["", ":", ""]).fails().stdout_only("0\n");
new_ucmd!()
.args(&["abc", ":", ""])
.fails()
.stdout_only("0\n");
new_ucmd!() new_ucmd!()
.args(&["abc", ":", "bc"]) .args(&["abc", ":", "bc"])
.fails() .fails()
.stdout_only("0\n"); .stdout_only("0\n");
new_ucmd!()
.args(&["^abc", ":", "^abc"])
.fails()
.stdout_only("0\n");
} }
#[test] #[test]
@ -711,7 +735,6 @@ mod gnu_expr {
.stdout_only("\n"); .stdout_only("\n");
} }
#[ignore = "rust-onig bug, see https://github.com/rust-onig/rust-onig/issues/188"]
#[test] #[test]
fn test_bre10() { fn test_bre10() {
new_ucmd!() new_ucmd!()