diff --git a/Cargo.lock b/Cargo.lock index 13f37e989..ab580991d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3102,6 +3102,7 @@ dependencies = [ "data-encoding-macro", "dns-lookup", "dunce", + "glob", "itertools", "libc", "nix", diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 55a8133a4..45c750739 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -23,6 +23,7 @@ clap = "3.2" dns-lookup = { version="1.0.5", optional=true } dunce = "1.0.0" wild = "2.0" +glob = "0.3.0" # * optional itertools = { version="0.10.0", optional=true } thiserror = { version="1.0", optional=true } diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 1c405ce98..d8860cfda 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -29,6 +29,7 @@ pub use crate::mods::ranges; pub use crate::mods::version_cmp; // * string parsing modules +pub use crate::parser::parse_glob; pub use crate::parser::parse_size; pub use crate::parser::parse_time; diff --git a/src/uucore/src/lib/parser.rs b/src/uucore/src/lib/parser.rs index d09777e10..8eae16bbf 100644 --- a/src/uucore/src/lib/parser.rs +++ b/src/uucore/src/lib/parser.rs @@ -1,2 +1,3 @@ +pub mod parse_glob; pub mod parse_size; pub mod parse_time; diff --git a/src/uucore/src/lib/parser/parse_glob.rs b/src/uucore/src/lib/parser/parse_glob.rs new file mode 100644 index 000000000..8605f7450 --- /dev/null +++ b/src/uucore/src/lib/parser/parse_glob.rs @@ -0,0 +1,109 @@ +//! Parsing a glob Pattern from a string. +//! +//! Use the [`from_str`] function to parse a [`Pattern`] from a string. + +// cSpell:words fnmatch + +use glob::{Pattern, PatternError}; + +fn fix_negation(glob: &str) -> String { + let mut chars = glob.chars().collect::>(); + + let mut i = 0; + while i < chars.len() { + if chars[i] == '[' && i + 4 <= glob.len() && chars[i + 1] == '^' { + match chars[i + 3..].iter().position(|x| *x == ']') { + None => (), + Some(j) => { + chars[i + 1] = '!'; + i += j + 4; + continue; + } + } + } + + i += 1; + } + + chars.into_iter().collect::() +} + +/// Parse a glob Pattern from a string. +/// +/// This function amends the input string to replace any caret or circumflex +/// character (^) used to negate a set of characters with an exclamation mark +/// (!), which adapts rust's glob matching to function the way the GNU utils' +/// fnmatch does. +/// +/// # Examples +/// +/// ```rust +/// use std::time::Duration; +/// use uucore::parse_glob::from_str; +/// assert!(!from_str("[^abc]").unwrap().matches("a")); +/// assert!(from_str("[^abc]").unwrap().matches("x")); +/// ``` +pub fn from_str(glob: &str) -> Result { + Pattern::new(&fix_negation(glob)) +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn test_from_str() { + assert_eq!(from_str("[^abc]").unwrap(), Pattern::new("[!abc]").unwrap()); + } + + #[test] + fn test_fix_negation() { + // Happy/Simple case + assert_eq!(fix_negation("[^abc]"), "[!abc]"); + + // Should fix negations in a long regex + assert_eq!(fix_negation("foo[abc] bar[^def]"), "foo[abc] bar[!def]"); + + // Should fix multiple negations in a regex + assert_eq!(fix_negation("foo[^abc]bar[^def]"), "foo[!abc]bar[!def]"); + + // Should fix negation of the single character ] + assert_eq!(fix_negation("[^]]"), "[!]]"); + + // Should fix negation of the single character ^ + assert_eq!(fix_negation("[^^]"), "[!^]"); + + // Should fix negation of the space character + assert_eq!(fix_negation("[^ ]"), "[! ]"); + + // Complicated patterns + assert_eq!(fix_negation("[^][]"), "[!][]"); + assert_eq!(fix_negation("[^[]]"), "[![]]"); + + // More complex patterns that should be replaced + assert_eq!(fix_negation("[[]] [^a]"), "[[]] [!a]"); + assert_eq!(fix_negation("[[] [^a]"), "[[] [!a]"); + assert_eq!(fix_negation("[]] [^a]"), "[]] [!a]"); + } + + #[test] + fn test_fix_negation_should_not_amend() { + assert_eq!(fix_negation("abc"), "abc"); + + // Regex specifically matches either [ or ^ + assert_eq!(fix_negation("[[^]"), "[[^]"); + + // Regex that specifically matches either space or ^ + assert_eq!(fix_negation("[ ^]"), "[ ^]"); + + // Regex that specifically matches either [, space or ^ + assert_eq!(fix_negation("[[ ^]"), "[[ ^]"); + assert_eq!(fix_negation("[ [^]"), "[ [^]"); + + // Invalid globs (according to rust's glob implementation) will remain unamended + assert_eq!(fix_negation("[^]"), "[^]"); + assert_eq!(fix_negation("[^"), "[^"); + assert_eq!(fix_negation("[][^]"), "[][^]"); + } +}