Merge pull request #3754 from ackerleytng/main

Add `parse_glob` module and update `du` to use `parse_glob`
2025-09-16 03:36:18 +00:00 · 2022-08-10 19:28:40 +02:00 · 2022-08-10 19:28:40 +02:00 · 8692301ec7
commit 8692301ec7
parent e304758f61 c2bb9596d9
7 changed files with 224 additions and 104 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3102,6 +3102,7 @@ dependencies = [
 "data-encoding-macro",
 "dns-lookup",
 "dunce",
 "glob",
 "itertools",
 "libc",
 "nix",
--- a/src/uu/du/src/du.rs
+++ b/src/uu/du/src/du.rs
@ -37,6 +37,7 @@ use uucore::display::{print_verbatim, Quotable};
 use uucore::error::FromIo;
 use uucore::error::{UError, UResult};
 use uucore::format_usage;
 use uucore::parse_glob;
 use uucore::parse_size::{parse_size, ParseSizeError};
 use uucore::InvalidEncodingHandling;
 #[cfg(windows)]
@ -488,55 +489,28 @@ fn file_as_vec(filename: impl AsRef<Path>) -> Vec<String> {
 // Given the --exclude-from and/or --exclude arguments, returns the globset lists
 // to ignore the files
-fn get_glob_ignore(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
+fn build_exclude_patterns(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
-    let mut excludes_from = if matches.contains_id(options::EXCLUDE_FROM) {
+    let exclude_from_iterator = matches
-        match matches.values_of(options::EXCLUDE_FROM) {
+        .values_of(options::EXCLUDE_FROM)
-            Some(all_files) => {
+        .unwrap_or_default()
-                let mut exclusion = Vec::<String>::new();
+        .flat_map(|f| file_as_vec(&f));
                // Read the exclude lists from all the files
                // and add them into a vector of string
                let files: Vec<String> = all_files.clone().map(|v| v.to_owned()).collect();
                for f in files {
                    exclusion.extend(file_as_vec(&f));
                }
                exclusion
            }
            None => Vec::<String>::new(),
        }
    } else {
        Vec::<String>::new()
    };
-    let mut excludes = if matches.contains_id(options::EXCLUDE) {
+    let excludes_iterator = matches
-        match matches.values_of(options::EXCLUDE) {
+        .values_of(options::EXCLUDE)
-            Some(v) => {
+        .unwrap_or_default()
-                // Read the various arguments
+        .map(|v| v.to_owned());
                v.clone().map(|v| v.to_owned()).collect()
            }
            None => Vec::<String>::new(),
        }
    } else {
        Vec::<String>::new()
    };
-    // Merge the two lines
+    let mut exclude_patterns = Vec::new();
-    excludes.append(&mut excludes_from);
+    for f in excludes_iterator.chain(exclude_from_iterator) {
-    if !&excludes.is_empty() {
+        if matches.is_present(options::VERBOSE) {
-        let mut builder = Vec::new();
+            println!("adding {:?} to the exclude list ", &f);
-        // Create the `Vec` of excludes
+        }
-        for f in excludes {
+        match parse_glob::from_str(&f) {
-            if matches.contains_id(options::VERBOSE) {
+            Ok(glob) => exclude_patterns.push(glob),
-                println!("adding {:?} to the exclude list ", &f);
+            Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
            }
            match Pattern::new(&f) {
                Ok(glob) => builder.push(glob),
                Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
            };
        }
        Ok(builder)
    } else {
        Ok(Vec::new())
    }
    Ok(exclude_patterns)
 }
 #[uucore::main]
@ -615,85 +589,84 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
        "\n"
    };
-    let excludes = get_glob_ignore(&matches)?;
+    let excludes = build_exclude_patterns(&matches)?;
    let mut grand_total = 0;
    'loop_file: for path_string in files {
        // Skip if we don't want to ignore anything
        if !&excludes.is_empty() {
            for pattern in &excludes {
-                {
+                if pattern.matches(path_string) {
-                    if pattern.matches(path_string) {
+                    // if the directory is ignored, leave early
-                        // if the directory is ignored, leave early
+                    if options.verbose {
-                        if options.verbose {
+                        println!("{} ignored", path_string.quote());
                            println!("{} ignored", path_string.quote());
                        }
                        continue 'loop_file;
                    }
                    continue 'loop_file;
                }
            }
        }
        let path = PathBuf::from(&path_string);
-        match Stat::new(path, &options) {
+        // Check existence of path provided in argument
-            Ok(stat) => {
+        if let Ok(stat) = Stat::new(path, &options) {
-                let mut inodes: HashSet<FileInfo> = HashSet::new();
+            // Kick off the computation of disk usage from the initial path
-                if let Some(inode) = stat.inode {
+            let mut inodes: HashSet<FileInfo> = HashSet::new();
-                    inodes.insert(inode);
+            if let Some(inode) = stat.inode {
                inodes.insert(inode);
            }
            let iter = du(stat, &options, 0, &mut inodes, &excludes);
            // Sum up all the returned `Stat`s and display results
            let (_, len) = iter.size_hint();
            let len = len.unwrap();
            for (index, stat) in iter.enumerate() {
                let size = choose_size(&matches, &stat);
                if threshold.map_or(false, |threshold| threshold.should_exclude(size)) {
                    continue;
                }
                let iter = du(stat, &options, 0, &mut inodes, &excludes);
                let (_, len) = iter.size_hint();
                let len = len.unwrap();
                for (index, stat) in iter.enumerate() {
                    let size = choose_size(&matches, &stat);
-                    if threshold.map_or(false, |threshold| threshold.should_exclude(size)) {
+                if matches.is_present(options::TIME) {
-                        continue;
+                    let tm = {
-                    }
+                        let secs = {
-
+                            match matches.value_of(options::TIME) {
-                    if matches.contains_id(options::TIME) {
+                                Some(s) => match s {
-                        let tm = {
+                                    "ctime" | "status" => stat.modified,
-                            let secs = {
+                                    "access" | "atime" | "use" => stat.accessed,
-                                match matches.value_of(options::TIME) {
+                                    "birth" | "creation" => stat
-                                    Some(s) => match s {
+                                        .created
-                                        "ctime" | "status" => stat.modified,
+                                        .ok_or_else(|| DuError::InvalidTimeArg(s.into()))?,
-                                        "access" | "atime" | "use" => stat.accessed,
+                                    // below should never happen as clap already restricts the values.
-                                        "birth" | "creation" => stat
+                                    _ => unreachable!("Invalid field for --time"),
-                                            .created
+                                },
-                                            .ok_or_else(|| DuError::InvalidTimeArg(s.into()))?,
+                                None => stat.modified,
-                                        // below should never happen as clap already restricts the values.
+                            }
                                        _ => unreachable!("Invalid field for --time"),
                                    },
                                    None => stat.modified,
                                }
                            };
                            DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs(secs))
                        };
-                        if !summarize || index == len - 1 {
+                        DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs(secs))
-                            let time_str = tm.format(time_format_str).to_string();
+                    };
-                            print!("{}\t{}\t", convert_size(size), time_str);
+                    if !summarize || index == len - 1 {
-                            print_verbatim(stat.path).unwrap();
+                        let time_str = tm.format(time_format_str).to_string();
-                            print!("{}", line_separator);
+                        print!("{}\t{}\t", convert_size(size), time_str);
                        }
                    } else if !summarize || index == len - 1 {
                        print!("{}\t", convert_size(size));
                        print_verbatim(stat.path).unwrap();
                        print!("{}", line_separator);
                    }
-                    if options.total && index == (len - 1) {
+                } else if !summarize || index == len - 1 {
-                        // The last element will be the total size of the the path under
+                    print!("{}\t", convert_size(size));
-                        // path_string.  We add it to the grand total.
+                    print_verbatim(stat.path).unwrap();
-                        grand_total += size;
+                    print!("{}", line_separator);
-                    }
+                }
                if options.total && index == (len - 1) {
                    // The last element will be the total size of the the path under
                    // path_string.  We add it to the grand total.
                    grand_total += size;
                }
            }
-            Err(_) => {
+        } else {
-                show_error!(
+            show_error!(
-                    "{}: {}",
+                "{}: {}",
-                    path_string.maybe_quote(),
+                path_string.maybe_quote(),
-                    "No such file or directory"
+                "No such file or directory"
-                );
+            );
            }
        }
    }
--- a/src/uucore/Cargo.toml
+++ b/src/uucore/Cargo.toml
@ -23,6 +23,7 @@ clap = "3.2"
 dns-lookup = { version="1.0.5", optional=true }
 dunce = "1.0.0"
 wild = "2.0"
 glob = "0.3.0"
 # * optional
 itertools = { version="0.10.0", optional=true }
 thiserror = { version="1.0", optional=true }
--- a/src/uucore/src/lib/lib.rs
+++ b/src/uucore/src/lib/lib.rs
@ -29,6 +29,7 @@ pub use crate::mods::ranges;
 pub use crate::mods::version_cmp;
 // * string parsing modules
 pub use crate::parser::parse_glob;
 pub use crate::parser::parse_size;
 pub use crate::parser::parse_time;
--- a/src/uucore/src/lib/parser.rs
+++ b/src/uucore/src/lib/parser.rs
@ -1,2 +1,3 @@
 pub mod parse_glob;
 pub mod parse_size;
 pub mod parse_time;
--- a/src/uucore/src/lib/parser/parse_glob.rs
+++ b/src/uucore/src/lib/parser/parse_glob.rs
@ -0,0 +1,109 @@
 //! Parsing a glob Pattern from a string.
 //!
 //! Use the [`from_str`] function to parse a [`Pattern`] from a string.
 // cSpell:words fnmatch
 use glob::{Pattern, PatternError};
 fn fix_negation(glob: &str) -> String {
    let mut chars = glob.chars().collect::<Vec<_>>();
    let mut i = 0;
    while i < chars.len() {
        if chars[i] == '[' && i + 4 <= glob.len() && chars[i + 1] == '^' {
            match chars[i + 3..].iter().position(|x| *x == ']') {
                None => (),
                Some(j) => {
                    chars[i + 1] = '!';
                    i += j + 4;
                    continue;
                }
            }
        }
        i += 1;
    }
    chars.into_iter().collect::<String>()
 }
 /// Parse a glob Pattern from a string.
 ///
 /// This function amends the input string to replace any caret or circumflex
 /// character (^) used to negate a set of characters with an exclamation mark
 /// (!), which adapts rust's glob matching to function the way the GNU utils'
 /// fnmatch does.
 ///
 /// # Examples
 ///
 /// ```rust
 /// use std::time::Duration;
 /// use uucore::parse_glob::from_str;
 /// assert!(!from_str("[^abc]").unwrap().matches("a"));
 /// assert!(from_str("[^abc]").unwrap().matches("x"));
 /// ```
 pub fn from_str(glob: &str) -> Result<Pattern, PatternError> {
    Pattern::new(&fix_negation(glob))
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_from_str() {
        assert_eq!(from_str("[^abc]").unwrap(), Pattern::new("[!abc]").unwrap());
    }
    #[test]
    fn test_fix_negation() {
        // Happy/Simple case
        assert_eq!(fix_negation("[^abc]"), "[!abc]");
        // Should fix negations in a long regex
        assert_eq!(fix_negation("foo[abc]  bar[^def]"), "foo[abc]  bar[!def]");
        // Should fix multiple negations in a regex
        assert_eq!(fix_negation("foo[^abc]bar[^def]"), "foo[!abc]bar[!def]");
        // Should fix negation of the single character ]
        assert_eq!(fix_negation("[^]]"), "[!]]");
        // Should fix negation of the single character ^
        assert_eq!(fix_negation("[^^]"), "[!^]");
        // Should fix negation of the space character
        assert_eq!(fix_negation("[^ ]"), "[! ]");
        // Complicated patterns
        assert_eq!(fix_negation("[^][]"), "[!][]");
        assert_eq!(fix_negation("[^[]]"), "[![]]");
        // More complex patterns that should be replaced
        assert_eq!(fix_negation("[[]] [^a]"), "[[]] [!a]");
        assert_eq!(fix_negation("[[] [^a]"), "[[] [!a]");
        assert_eq!(fix_negation("[]] [^a]"), "[]] [!a]");
    }
    #[test]
    fn test_fix_negation_should_not_amend() {
        assert_eq!(fix_negation("abc"), "abc");
        // Regex specifically matches either [ or ^
        assert_eq!(fix_negation("[[^]"), "[[^]");
        // Regex that specifically matches either space or ^
        assert_eq!(fix_negation("[ ^]"), "[ ^]");
        // Regex that specifically matches either [, space or ^
        assert_eq!(fix_negation("[[ ^]"), "[[ ^]");
        assert_eq!(fix_negation("[ [^]"), "[ [^]");
        // Invalid globs (according to rust's glob implementation) will remain unamended
        assert_eq!(fix_negation("[^]"), "[^]");
        assert_eq!(fix_negation("[^"), "[^");
        assert_eq!(fix_negation("[][^]"), "[][^]");
    }
 }
--- a/tests/by-util/test_du.rs
+++ b/tests/by-util/test_du.rs
@ -747,6 +747,40 @@ fn test_du_exclude_mix() {
    assert!(result.stdout_str().contains("xcwww"));
 }
 #[test]
 // Disable on Windows because we are looking for /
 // And the tests would be more complex if we have to support \ too
 #[cfg(not(target_os = "windows"))]
 fn test_du_complex_exclude_patterns() {
    let ts = TestScenario::new(util_name!());
    let at = &ts.fixtures;
    at.mkdir_all("azerty/xcwww/azeaze");
    at.mkdir_all("azerty/xcwww/qzerty");
    at.mkdir_all("azerty/xcwww/amazing");
    // Negation in glob should work with both ^ and !
    let result = ts
        .ucmd()
        .arg("--exclude=azerty/*/[^q]*")
        .arg("azerty")
        .succeeds();
    assert!(!result.stdout_str().contains("amazing"));
    assert!(result.stdout_str().contains("qzerty"));
    assert!(!result.stdout_str().contains("azeaze"));
    assert!(result.stdout_str().contains("xcwww"));
    let result = ts
        .ucmd()
        .arg("--exclude=azerty/*/[!q]*")
        .arg("azerty")
        .succeeds();
    assert!(!result.stdout_str().contains("amazing"));
    assert!(result.stdout_str().contains("qzerty"));
    assert!(!result.stdout_str().contains("azeaze"));
    assert!(result.stdout_str().contains("xcwww"));
 }
 #[test]
 fn test_du_exclude_several_components() {
    let ts = TestScenario::new(util_name!());