From 8b719a859112cf7ae9adbb38cd5826a0d955c035 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 11 Apr 2022 22:50:01 +0200 Subject: [PATCH] du: add support for --exclude and --exclude-from (#3381) * du: add support for --exclude and --exclude-from And add an option --verbose (doesn't exist in GNU) --- Cargo.lock | 1 + src/uu/du/Cargo.toml | 2 + src/uu/du/src/du.rs | 230 ++++++++++++++++++++++++++++++--------- tests/by-util/test_du.rs | 174 ++++++++++++++++++++++++++++- 4 files changed, 354 insertions(+), 53 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e34fd2f01..5b78da651 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2402,6 +2402,7 @@ version = "0.0.13" dependencies = [ "chrono", "clap 3.1.6", + "glob", "uucore", "winapi 0.3.9", ] diff --git a/src/uu/du/Cargo.toml b/src/uu/du/Cargo.toml index 9da4be090..1760731e3 100644 --- a/src/uu/du/Cargo.toml +++ b/src/uu/du/Cargo.toml @@ -16,6 +16,8 @@ path = "src/du.rs" [dependencies] chrono = "^0.4.11" +# For the --exclude & --exclude-from options +glob = "0.3.0" clap = { version = "3.1", features = ["wrap_help", "cargo"] } uucore = { version=">=0.0.11", package="uucore", path="../../uucore" } diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 2c3dfceae..ff7a5a5b7 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -11,11 +11,15 @@ extern crate uucore; use chrono::prelude::DateTime; use chrono::Local; use clap::{crate_version, Arg, ArgMatches, Command}; +use glob::Pattern; use std::collections::HashSet; use std::env; use std::fs; +use std::fs::File; #[cfg(not(windows))] use std::fs::Metadata; +use std::io::BufRead; +use std::io::BufReader; use std::io::{ErrorKind, Result}; use std::iter; #[cfg(not(windows))] @@ -24,7 +28,6 @@ use std::os::unix::fs::MetadataExt; use std::os::windows::fs::MetadataExt; #[cfg(windows)] use std::os::windows::io::AsRawHandle; -#[cfg(windows)] use std::path::Path; use std::path::PathBuf; use std::str::FromStr; @@ -68,6 +71,9 @@ mod options { pub const ONE_FILE_SYSTEM: &str = "one-file-system"; pub const DEREFERENCE: &str = "dereference"; pub const INODES: &str = "inodes"; + pub const EXCLUDE: &str = "exclude"; + pub const EXCLUDE_FROM: &str = "exclude-from"; + pub const VERBOSE: &str = "verbose"; pub const FILE: &str = "FILE"; } @@ -80,6 +86,12 @@ Otherwise, units default to 1024 bytes (or 512 if POSIXLY_CORRECT is set). SIZE is an integer and optional unit (example: 10M is 10*1024*1024). Units are K, M, G, T, P, E, Z, Y (powers of 1024) or KB, MB,... (powers of 1000). + +PATTERN allows some advanced exclusions. For example, the following syntaxes +are supported: +? will match only one character +* will match zero or more characters +{a,b} will match a or b "; const USAGE: &str = "\ {} [OPTION]... [FILE]... @@ -97,6 +109,7 @@ struct Options { one_file_system: bool, dereference: bool, inodes: bool, + verbose: bool, } #[derive(PartialEq, Eq, Hash, Clone, Copy)] @@ -287,6 +300,7 @@ fn du( options: &Options, depth: usize, inodes: &mut HashSet, + exclude: &[Pattern], ) -> Box> { let mut stats = vec![]; let mut futures = vec![]; @@ -306,49 +320,68 @@ fn du( } }; - for f in read { + 'file_loop: for f in read { match f { - Ok(entry) => match Stat::new(entry.path(), options) { - Ok(this_stat) => { - if let Some(inode) = this_stat.inode { - if inodes.contains(&inode) { - continue; - } - inodes.insert(inode); - } - if this_stat.is_dir { - if options.one_file_system { - if let (Some(this_inode), Some(my_inode)) = - (this_stat.inode, my_stat.inode) + Ok(entry) => { + match Stat::new(entry.path(), options) { + Ok(this_stat) => { + // We have an exclude list + for pattern in exclude { + // Look at all patterns with both short and long paths + // if we have 'du foo' but search to exclude 'foo/bar' + // we need the full path + if pattern.matches(&this_stat.path.to_string_lossy()) + || pattern.matches(&entry.file_name().into_string().unwrap()) { - if this_inode.dev_id != my_inode.dev_id { - continue; + // if the directory is ignored, leave early + if options.verbose { + println!("{} ignored", &this_stat.path.quote()); } + // Go to the next file + continue 'file_loop; } } - futures.push(du(this_stat, options, depth + 1, inodes)); - } else { - my_stat.size += this_stat.size; - my_stat.blocks += this_stat.blocks; - my_stat.inodes += 1; - if options.all { - stats.push(this_stat); + + if let Some(inode) = this_stat.inode { + if inodes.contains(&inode) { + continue; + } + inodes.insert(inode); + } + if this_stat.is_dir { + if options.one_file_system { + if let (Some(this_inode), Some(my_inode)) = + (this_stat.inode, my_stat.inode) + { + if this_inode.dev_id != my_inode.dev_id { + continue; + } + } + } + futures.push(du(this_stat, options, depth + 1, inodes, exclude)); + } else { + my_stat.size += this_stat.size; + my_stat.blocks += this_stat.blocks; + my_stat.inodes += 1; + if options.all { + stats.push(this_stat); + } } } + Err(error) => match error.kind() { + ErrorKind::PermissionDenied => { + let description = format!("cannot access {}", entry.path().quote()); + let error_message = "Permission denied"; + show_error_custom_description!(description, "{}", error_message); + set_exit_code(1); + } + _ => { + set_exit_code(1); + show_error!("cannot access {}: {}", entry.path().quote(), error); + } + }, } - Err(error) => match error.kind() { - ErrorKind::PermissionDenied => { - let description = format!("cannot access {}", entry.path().quote()); - let error_message = "Permission denied"; - show_error_custom_description!(description, "{}", error_message); - set_exit_code(1); - } - _ => { - set_exit_code(1); - show_error!("cannot access {}: {}", entry.path().quote(), error); - } - }, - }, + } Err(error) => show_error!("{}", error), } } @@ -406,6 +439,7 @@ enum DuError { SummarizeDepthConflict(String), InvalidTimeStyleArg(String), InvalidTimeArg(String), + InvalidGlob(String), } impl Display for DuError { @@ -436,6 +470,7 @@ Try '{} --help' for more information.", 'birth' and 'creation' arguments are not supported on this platform.", s.quote() ), + DuError::InvalidGlob(s) => write!(f, "Invalid exclude syntax: {}", s), } } } @@ -448,11 +483,75 @@ impl UError for DuError { Self::InvalidMaxDepthArg(_) | Self::SummarizeDepthConflict(_) | Self::InvalidTimeStyleArg(_) - | Self::InvalidTimeArg(_) => 1, + | Self::InvalidTimeArg(_) + | Self::InvalidGlob(_) => 1, } } } +// Read a file and return each line in a vector of String +fn file_as_vec(filename: impl AsRef) -> Vec { + let file = File::open(filename).expect("no such file"); + let buf = BufReader::new(file); + + buf.lines() + .map(|l| l.expect("Could not parse line")) + .collect() +} + +// Given the --exclude-from and/or --exclude arguments, returns the globset lists +// to ignore the files +fn get_glob_ignore(matches: &ArgMatches) -> UResult> { + let mut excludes_from = if matches.is_present(options::EXCLUDE_FROM) { + match matches.values_of(options::EXCLUDE_FROM) { + Some(all_files) => { + let mut exclusion = Vec::::new(); + // Read the exclude lists from all the files + // and add them into a vector of string + let files: Vec = all_files.clone().map(|v| v.to_owned()).collect(); + for f in files { + exclusion.extend(file_as_vec(&f)); + } + exclusion + } + None => Vec::::new(), + } + } else { + Vec::::new() + }; + + let mut excludes = if matches.is_present(options::EXCLUDE) { + match matches.values_of(options::EXCLUDE) { + Some(v) => { + // Read the various arguments + v.clone().map(|v| v.to_owned()).collect() + } + None => Vec::::new(), + } + } else { + Vec::::new() + }; + + // Merge the two lines + excludes.append(&mut excludes_from); + if !&excludes.is_empty() { + let mut builder = Vec::new(); + // Create the `Vec` of excludes + for f in excludes { + if matches.is_present(options::VERBOSE) { + println!("adding {:?} to the exclude list ", &f); + } + match Pattern::new(&f) { + Ok(glob) => builder.push(glob), + Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()), + }; + } + Ok(builder) + } else { + Ok(Vec::new()) + } +} + #[uucore::main] #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> UResult<()> { @@ -475,6 +574,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { one_file_system: matches.is_present(options::ONE_FILE_SYSTEM), dereference: matches.is_present(options::DEREFERENCE), inodes: matches.is_present(options::INODES), + verbose: matches.is_present(options::VERBOSE), }; let files = match matches.value_of(options::FILE) { @@ -529,8 +629,25 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { "\n" }; + let excludes = get_glob_ignore(&matches)?; + let mut grand_total = 0; - for path_string in files { + 'loop_file: for path_string in files { + // Skip if we don't want to ignore anything + if !&excludes.is_empty() { + for pattern in &excludes { + { + if pattern.matches(path_string) { + // if the directory is ignored, leave early + if options.verbose { + println!("{} ignored", path_string.quote()); + } + continue 'loop_file; + } + } + } + } + let path = PathBuf::from(&path_string); match Stat::new(path, &options) { Ok(stat) => { @@ -538,7 +655,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { if let Some(inode) = stat.inode { inodes.insert(inode); } - let iter = du(stat, &options, 0, &mut inodes); + let iter = du(stat, &options, 0, &mut inodes, &excludes); let (_, len) = iter.size_hint(); let len = len.unwrap(); for (index, stat) in iter.enumerate() { @@ -763,19 +880,28 @@ pub fn uu_app<'a>() -> Command<'a> { .help("exclude entries smaller than SIZE if positive, \ or entries greater than SIZE if negative") ) - // .arg( - // Arg::new("") - // .short('x') - // .long("exclude-from") - // .value_name("FILE") - // .help("exclude files that match any pattern in FILE") - // ) - // .arg( - // Arg::new("exclude") - // .long("exclude") - // .value_name("PATTERN") - // .help("exclude files that match PATTERN") - // ) + .arg( + Arg::new(options::VERBOSE) + .short('v') + .long("verbose") + .help("verbose mode (option not present in GNU/Coreutils)") + ) + .arg( + Arg::new(options::EXCLUDE) + .long(options::EXCLUDE) + .value_name("PATTERN") + .help("exclude files that match PATTERN") + .multiple_occurrences(true) + ) + .arg( + Arg::new(options::EXCLUDE_FROM) + .short('X') + .long("exclude-from") + .value_name("FILE") + .help("exclude files that match any pattern in FILE") + .multiple_occurrences(true) + + ) .arg( Arg::new(options::TIME) .long(options::TIME) diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 415a64ac7..1deddb77f 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -3,7 +3,11 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore (paths) sublink subwords +// spell-checker:ignore (paths) sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty +#[cfg(not(windows))] +use regex::Regex; +#[cfg(not(windows))] +use std::io::Write; use crate::common::util::*; @@ -602,3 +606,171 @@ fn test_du_bytes() { ))] result.stdout_contains("21529\t./subdir\n"); } + +#[test] +fn test_du_exclude() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.symlink_dir(SUB_DEEPER_DIR, SUB_DIR_LINKS_DEEPER_SYM_DIR); + at.mkdir_all(SUB_DIR_LINKS); + + ts.ucmd() + .arg("--exclude=subdir") + .arg(SUB_DEEPER_DIR) + .succeeds() + .stdout_contains("subdir/deeper/deeper_dir"); + ts.ucmd() + .arg("--exclude=subdir") + .arg("subdir") + .succeeds() + .stdout_is(""); + ts.ucmd() + .arg("--exclude=subdir") + .arg("--verbose") + .arg("subdir") + .succeeds() + .stdout_contains("'subdir' ignored"); +} + +#[test] +// Disable on Windows because we are looking for / +// And the tests would be more complex if we have to support \ too +#[cfg(not(target_os = "windows"))] +fn test_du_exclude_2() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir_all("azerty/xcwww/azeaze"); + + let result = ts.ucmd().arg("azerty").succeeds(); + + let path_regexp = r"(.*)azerty/xcwww/azeaze(.*)azerty/xcwww(.*)azerty"; + let re = Regex::new(path_regexp).unwrap(); + assert!(re.is_match(result.stdout_str().replace('\n', "").trim())); + + // Exact match + ts.ucmd() + .arg("--exclude=azeaze") + .arg("azerty") + .succeeds() + .stdout_does_not_contain("azerty/xcwww/azeaze"); + // Partial match and NOT a glob + ts.ucmd() + .arg("--exclude=azeaz") + .arg("azerty") + .succeeds() + .stdout_contains("azerty/xcwww/azeaze"); + // Partial match and a various glob + ts.ucmd() + .arg("--exclude=azea?") + .arg("azerty") + .succeeds() + .stdout_contains("azerty/xcwww/azeaze"); + ts.ucmd() + .arg("--exclude=azea{z,b}") + .arg("azerty") + .succeeds() + .stdout_contains("azerty/xcwww/azeaze"); + ts.ucmd() + .arg("--exclude=azea*") + .arg("azerty") + .succeeds() + .stdout_does_not_contain("azerty/xcwww/azeaze"); + ts.ucmd() + .arg("--exclude=azeaz?") + .arg("azerty") + .succeeds() + .stdout_does_not_contain("azerty/xcwww/azeaze"); +} + +#[test] +// Disable on Windows because we are looking for / +// And the tests would be more complex if we have to support \ too +#[cfg(not(target_os = "windows"))] +fn test_du_exclude_mix() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + let mut file1 = at.make_file("file-ignore1"); + file1.write_all(b"azeaze").unwrap(); + let mut file2 = at.make_file("file-ignore2"); + file2.write_all(b"amaz?ng").unwrap(); + + at.mkdir_all("azerty/xcwww/azeaze"); + at.mkdir_all("azerty/xcwww/qzerty"); + at.mkdir_all("azerty/xcwww/amazing"); + + ts.ucmd() + .arg("azerty") + .succeeds() + .stdout_contains("azerty/xcwww/azeaze"); + ts.ucmd() + .arg("--exclude=azeaze") + .arg("azerty") + .succeeds() + .stdout_does_not_contain("azerty/xcwww/azeaze"); + + // Just exclude one file name + let result = ts.ucmd().arg("--exclude=qzerty").arg("azerty").succeeds(); + assert!(!result.stdout_str().contains("qzerty")); + assert!(result.stdout_str().contains("azerty")); + assert!(result.stdout_str().contains("xcwww")); + + // Exclude from file + let result = ts + .ucmd() + .arg("--exclude-from=file-ignore1") + .arg("azerty") + .succeeds(); + assert!(!result.stdout_str().contains("azeaze")); + assert!(result.stdout_str().contains("qzerty")); + assert!(result.stdout_str().contains("xcwww")); + + // Mix two files and string + let result = ts + .ucmd() + .arg("--exclude=qzerty") + .arg("--exclude-from=file-ignore1") + .arg("--exclude-from=file-ignore2") + .arg("azerty") + .succeeds(); + assert!(!result.stdout_str().contains("amazing")); + assert!(!result.stdout_str().contains("qzerty")); + assert!(!result.stdout_str().contains("azeaze")); + assert!(result.stdout_str().contains("xcwww")); +} + +#[test] +fn test_du_exclude_several_components() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir_all("a/b/c"); + at.mkdir_all("a/x/y"); + at.mkdir_all("a/u/y"); + + // Exact match + let result = ts + .ucmd() + .arg("--exclude=a/u") + .arg("--exclude=a/b") + .arg("a") + .succeeds(); + assert!(!result.stdout_str().contains("a/u")); + assert!(!result.stdout_str().contains("a/b")); +} + +#[test] +fn test_du_exclude_invalid_syntax() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir_all("azerty/xcwww/azeaze"); + + ts.ucmd() + .arg("--exclude=a[ze") + .arg("azerty") + .fails() + .stderr_contains("du: Invalid exclude syntax"); +}