From 1606968bf206daf40438d97ebe39f849d5c49636 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sun, 16 Feb 2025 16:50:06 -0500 Subject: [PATCH] rm: recursive implementation of -r option Change the implementation of `rm -r` so that it is explicitly recursive so that (1) there is one code path regardless of whether `--verbose` is given and (2) it is easier to be compatible with GNU `rm`. This change eliminates a dependency on the `walkdir` crate. Fixes #7033, fixes #7305, fixes #7307. --- Cargo.lock | 1 - src/uu/rm/Cargo.toml | 1 - src/uu/rm/src/rm.rs | 187 +++++++++++++++++++++++++-------------- tests/by-util/test_rm.rs | 72 +++++++++++++++ util/build-gnu.sh | 8 -- 5 files changed, 191 insertions(+), 78 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 765114901..42f4a0641 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3164,7 +3164,6 @@ dependencies = [ "clap", "libc", "uucore", - "walkdir", "windows-sys 0.59.0", ] diff --git a/src/uu/rm/Cargo.toml b/src/uu/rm/Cargo.toml index c45dfe33d..05ed02775 100644 --- a/src/uu/rm/Cargo.toml +++ b/src/uu/rm/Cargo.toml @@ -18,7 +18,6 @@ path = "src/rm.rs" [dependencies] clap = { workspace = true } -walkdir = { workspace = true } uucore = { workspace = true, features = ["fs"] } [target.'cfg(unix)'.dependencies] diff --git a/src/uu/rm/src/rm.rs b/src/uu/rm/src/rm.rs index ac30478c1..ba003e85d 100644 --- a/src/uu/rm/src/rm.rs +++ b/src/uu/rm/src/rm.rs @@ -6,7 +6,6 @@ // spell-checker:ignore (path) eacces inacc rm-r4 use clap::{builder::ValueParser, crate_version, parser::ValueSource, Arg, ArgAction, Command}; -use std::collections::VecDeque; use std::ffi::{OsStr, OsString}; use std::fs::{self, Metadata}; use std::ops::BitOr; @@ -21,7 +20,6 @@ use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; use uucore::{ format_usage, help_about, help_section, help_usage, os_str_as_bytes, prompt_yes, show_error, }; -use walkdir::{DirEntry, WalkDir}; #[derive(Eq, PartialEq, Clone, Copy)] /// Enum, determining when the `rm` will prompt the user about the file deletion @@ -341,6 +339,24 @@ fn is_dir_empty(path: &Path) -> bool { } } +/// Whether the given file or directory is readable. +#[cfg(unix)] +fn is_readable(path: &Path) -> bool { + match std::fs::metadata(path) { + Err(_) => false, + Ok(metadata) => { + let mode = metadata.permissions().mode(); + (mode & 0o400) > 0 + } + } +} + +/// Whether the given file or directory is readable. +#[cfg(not(unix))] +fn is_readable(_path: &Path) -> bool { + true +} + /// Whether the given file or directory is writable. #[cfg(unix)] fn is_writable(path: &Path) -> bool { @@ -360,7 +376,106 @@ fn is_writable(_path: &Path) -> bool { true } -#[allow(clippy::cognitive_complexity)] +/// Recursively remove the directory tree rooted at the given path. +/// +/// If `path` is a file or a symbolic link, just remove it. If it is a +/// directory, remove all of its entries recursively and then remove the +/// directory itself. In case of an error, print the error message to +/// `stderr` and return `true`. If there were no errors, return `false`. +fn remove_dir_recursive(path: &Path, options: &Options) -> bool { + // Special case: if we cannot access the metadata because the + // filename is too long, fall back to try + // `std::fs::remove_dir_all()`. + // + // TODO This is a temporary bandage; we shouldn't need to do this + // at all. Instead of using the full path like "x/y/z", which + // causes a `InvalidFilename` error when trying to access the file + // metadata, we should be able to use just the last part of the + // path, "z", and know that it is relative to the parent, "x/y". + if let Some(s) = path.to_str() { + if s.len() > 1000 { + match std::fs::remove_dir_all(path) { + Ok(_) => return false, + Err(e) => { + let e = e.map_err_context(|| format!("cannot remove {}", path.quote())); + show_error!("{e}"); + return true; + } + } + } + } + + // Base case 1: this is a file or a symbolic link. + // + // The symbolic link case is important because it could be a link to + // a directory and we don't want to recurse. In particular, this + // avoids an infinite recursion in the case of a link to the current + // directory, like `ln -s . link`. + if !path.is_dir() || path.is_symlink() { + return remove_file(path, options); + } + + // Base case 2: this is a non-empty directory, but the user + // doesn't want to descend into it. + if options.interactive == InteractiveMode::Always + && !is_dir_empty(path) + && !prompt_descend(path) + { + return false; + } + + // Recursive case: this is a directory. + let mut error = false; + match std::fs::read_dir(path) { + Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => { + // This is not considered an error. + } + Err(_) => error = true, + Ok(iter) => { + for entry in iter { + match entry { + Err(_) => error = true, + Ok(entry) => { + let child_error = remove_dir_recursive(&entry.path(), options); + error = error || child_error; + } + } + } + } + } + + // Ask the user whether to remove the current directory. + if options.interactive == InteractiveMode::Always && !prompt_dir(path, options) { + return false; + } + + // Try removing the directory itself. + match std::fs::remove_dir(path) { + Err(_) if !error && !is_readable(path) => { + // For compatibility with GNU test case + // `tests/rm/unread2.sh`, show "Permission denied" in this + // case instead of "Directory not empty". + show_error!("cannot remove {}: Permission denied", path.quote()); + error = true; + } + Err(e) if !error => { + let e = e.map_err_context(|| format!("cannot remove {}", path.quote())); + show_error!("{e}"); + error = true; + } + Err(_) => { + // If there has already been at least one error when + // trying to remove the children, then there is no need to + // show another error message as we return from each level + // of the recursion. + } + Ok(_) if options.verbose => println!("removed directory {}", normalize(path).quote()), + Ok(_) => {} + } + + error +} + fn handle_dir(path: &Path, options: &Options) -> bool { let mut had_err = false; @@ -375,71 +490,7 @@ fn handle_dir(path: &Path, options: &Options) -> bool { let is_root = path.has_root() && path.parent().is_none(); if options.recursive && (!is_root || !options.preserve_root) { - if options.interactive != InteractiveMode::Always && !options.verbose { - if let Err(e) = fs::remove_dir_all(path) { - // GNU compatibility (rm/empty-inacc.sh) - // remove_dir_all failed. maybe it is because of the permissions - // but if the directory is empty, remove_dir might work. - // So, let's try that before failing for real - if fs::remove_dir(path).is_err() { - had_err = true; - if e.kind() == std::io::ErrorKind::PermissionDenied { - // GNU compatibility (rm/fail-eacces.sh) - // here, GNU doesn't use some kind of remove_dir_all - // It will show directory+file - show_error!("cannot remove {}: {}", path.quote(), "Permission denied"); - } else { - show_error!("cannot remove {}: {}", path.quote(), e); - } - } - } - } else { - let mut dirs: VecDeque = VecDeque::new(); - // The Paths to not descend into. We need to this because WalkDir doesn't have a way, afaik, to not descend into a directory - // So we have to just ignore paths as they come up if they start with a path we aren't descending into - let mut not_descended: Vec = Vec::new(); - - 'outer: for entry in WalkDir::new(path) { - match entry { - Ok(entry) => { - if options.interactive == InteractiveMode::Always { - for not_descend in ¬_descended { - if entry.path().starts_with(not_descend) { - // We don't need to continue the rest of code in this loop if we are in a directory we don't want to descend into - continue 'outer; - } - } - } - let file_type = entry.file_type(); - if file_type.is_dir() { - // If we are in Interactive Mode Always and the directory isn't empty we ask if we should descend else we push this directory onto dirs vector - if options.interactive == InteractiveMode::Always - && !is_dir_empty(entry.path()) - { - // If we don't descend we push this directory onto our not_descended vector else we push this directory onto dirs vector - if prompt_descend(entry.path()) { - dirs.push_back(entry); - } else { - not_descended.push(entry.path().to_path_buf()); - } - } else { - dirs.push_back(entry); - } - } else { - had_err = remove_file(entry.path(), options).bitor(had_err); - } - } - Err(e) => { - had_err = true; - show_error!("recursing in {}: {}", path.quote(), e); - } - } - } - - for dir in dirs.iter().rev() { - had_err = remove_dir(dir.path(), options).bitor(had_err); - } - } + had_err = remove_dir_recursive(path, options) } else if options.dir && (!is_root || !options.preserve_root) { had_err = remove_dir(path, options).bitor(had_err); } else if options.recursive { diff --git a/tests/by-util/test_rm.rs b/tests/by-util/test_rm.rs index ecb3be5d5..230ea22cd 100644 --- a/tests/by-util/test_rm.rs +++ b/tests/by-util/test_rm.rs @@ -813,3 +813,75 @@ fn test_recursive_symlink_loop() { assert!(!at.symlink_exists("d/link")); assert!(!at.dir_exists("d")); } + +#[cfg(not(windows))] +#[test] +fn test_only_first_error_recursive() { + let (at, mut ucmd) = at_and_ucmd!(); + at.mkdir("a"); + at.mkdir("a/b"); + at.touch("a/b/file"); + // Make the inner directory not writable. + at.set_mode("a/b", 0o0555); + + // To match the behavior of GNU `rm`, print an error message for + // the file in the non-writable directory, but don't print the + // error messages that would have appeared when trying to remove + // the directories containing the file. + ucmd.args(&["-r", "-f", "a"]) + .fails() + .stderr_only("rm: cannot remove 'a/b/file': Permission denied\n"); + assert!(at.file_exists("a/b/file")); + assert!(at.dir_exists("a/b")); + assert!(at.dir_exists("a")); +} + +#[cfg(not(windows))] +#[test] +fn test_unreadable_and_nonempty_dir() { + let (at, mut ucmd) = at_and_ucmd!(); + at.mkdir_all("a/b"); + at.set_mode("a", 0o0333); + + ucmd.args(&["-r", "-f", "a"]) + .fails() + .stderr_only("rm: cannot remove 'a': Permission denied\n"); + assert!(at.dir_exists("a/b")); + assert!(at.dir_exists("a")); +} + +#[cfg(not(windows))] +#[test] +fn test_inaccessible_dir() { + let (at, mut ucmd) = at_and_ucmd!(); + at.mkdir("dir"); + at.set_mode("dir", 0o0333); + ucmd.args(&["-d", "dir"]).succeeds().no_output(); + assert!(!at.dir_exists("dir")); +} + +#[cfg(not(windows))] +#[test] +fn test_inaccessible_dir_nonempty() { + let (at, mut ucmd) = at_and_ucmd!(); + at.mkdir("dir"); + at.touch("dir/f"); + at.set_mode("dir", 0o0333); + ucmd.args(&["-d", "dir"]) + .fails() + .stderr_only("rm: cannot remove 'dir': Directory not empty\n"); + assert!(at.file_exists("dir/f")); + assert!(at.dir_exists("dir")); +} + +#[cfg(not(windows))] +#[test] +fn test_inaccessible_dir_recursive() { + let (at, mut ucmd) = at_and_ucmd!(); + at.mkdir("a"); + at.mkdir("a/unreadable"); + at.set_mode("a/unreadable", 0o0333); + ucmd.args(&["-r", "-f", "a"]).succeeds().no_output(); + assert!(!at.dir_exists("a/unreadable")); + assert!(!at.dir_exists("a")); +} diff --git a/util/build-gnu.sh b/util/build-gnu.sh index c47e99889..10f26d4c5 100755 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -199,14 +199,6 @@ grep -rlE '/usr/local/bin/\s?/usr/local/bin' init.cfg tests/* | xargs -r sed -Ei # we should not regress our project just to match what GNU is going. # So, do some changes on the fly -sed -i -e "s|rm: cannot remove 'e/slink'|rm: cannot remove 'e'|g" tests/rm/fail-eacces.sh - -sed -i -e "s|rm: cannot remove 'a/b/file'|rm: cannot remove 'a'|g" tests/rm/cycle.sh - -sed -i -e "s|rm: cannot remove directory 'b/a/p'|rm: cannot remove 'b'|g" tests/rm/rm1.sh - -sed -i -e "s|rm: cannot remove 'a/1'|rm: cannot remove 'a'|g" tests/rm/rm2.sh - sed -i -e "s|removed directory 'a/'|removed directory 'a'|g" tests/rm/v-slash.sh # 'rel' doesn't exist. Our implementation is giving a better message.