From bd0fb817a7ae32f0dc6fafe6aa2ade7e81d3d05b Mon Sep 17 00:00:00 2001 From: tommady Date: Sun, 24 Sep 2023 16:53:27 +0800 Subject: [PATCH] cp: fix the result of inodes are not the same when preserve links is flagged (#5064) Should fix: ``` rm -rf a b c touch a ln -s a b mkdir c ./target/debug/coreutils cp --preserve=links -R -H a b c a_inode=$(ls -i c/a|sed 's,c/.*,,') b_inode=$(ls -i c/b|sed 's,c/.*,,') echo "$a_inode" = "$b_inode" ``` --- src/uu/cp/src/copydir.rs | 57 ++++++----- src/uu/cp/src/cp.rs | 158 ++++++++++++++----------------- tests/by-util/test_cp.rs | 199 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 296 insertions(+), 118 deletions(-) diff --git a/src/uu/cp/src/copydir.rs b/src/uu/cp/src/copydir.rs index 430548d0c..ac44ce687 100644 --- a/src/uu/cp/src/copydir.rs +++ b/src/uu/cp/src/copydir.rs @@ -8,7 +8,7 @@ //! See the [`copy_directory`] function for more information. #[cfg(windows)] use std::borrow::Cow; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::env; use std::fs; use std::io; @@ -24,8 +24,8 @@ use uucore::uio_error; use walkdir::{DirEntry, WalkDir}; use crate::{ - aligned_ancestors, context_for, copy_attributes, copy_file, copy_link, preserve_hardlinks, - CopyResult, Error, Options, + aligned_ancestors, context_for, copy_attributes, copy_file, copy_link, CopyResult, Error, + Options, }; /// Ensure a Windows path starts with a `\\?`. @@ -200,7 +200,7 @@ fn copy_direntry( options: &Options, symlinked_files: &mut HashSet, preserve_hard_links: bool, - hard_links: &mut Vec<(String, u64)>, + copied_files: &mut HashMap, ) -> CopyResult<()> { let Entry { source_absolute, @@ -240,30 +240,27 @@ fn copy_direntry( // If the source is not a directory, then we need to copy the file. if !source_absolute.is_dir() { if preserve_hard_links { - let dest = local_to_target.as_path().to_path_buf(); - let found_hard_link = preserve_hardlinks(hard_links, &source_absolute, &dest)?; - if !found_hard_link { - match copy_file( - progress_bar, - &source_absolute, - local_to_target.as_path(), - options, - symlinked_files, - false, - ) { - Ok(_) => Ok(()), - Err(err) => { - if source_absolute.is_symlink() { - // silent the error with a symlink - // In case we do --archive, we might copy the symlink - // before the file itself - Ok(()) - } else { - Err(err) - } + match copy_file( + progress_bar, + &source_absolute, + local_to_target.as_path(), + options, + symlinked_files, + copied_files, + false, + ) { + Ok(_) => Ok(()), + Err(err) => { + if source_absolute.is_symlink() { + // silent the error with a symlink + // In case we do --archive, we might copy the symlink + // before the file itself + Ok(()) + } else { + Err(err) } - }?; - } + } + }?; } else { // At this point, `path` is just a plain old file. // Terminate this function immediately if there is any @@ -277,6 +274,7 @@ fn copy_direntry( local_to_target.as_path(), options, symlinked_files, + copied_files, false, ) { Ok(_) => {} @@ -310,6 +308,7 @@ pub(crate) fn copy_directory( target: &Path, options: &Options, symlinked_files: &mut HashSet, + copied_files: &mut HashMap, source_in_command_line: bool, ) -> CopyResult<()> { if !options.recursive { @@ -324,6 +323,7 @@ pub(crate) fn copy_directory( target, options, symlinked_files, + copied_files, source_in_command_line, ); } @@ -372,7 +372,6 @@ pub(crate) fn copy_directory( }; let target = tmp.as_path(); - let mut hard_links: Vec<(String, u64)> = vec![]; let preserve_hard_links = options.preserve_hard_links(); // Collect some paths here that are invariant during the traversal @@ -397,7 +396,7 @@ pub(crate) fn copy_directory( options, symlinked_files, preserve_hard_links, - &mut hard_links, + copied_files, )?; } // Print an error message, but continue traversing the directory. diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index 516f20fda..05261c22f 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -9,7 +9,7 @@ use quick_error::quick_error; use std::borrow::Cow; use std::cmp::Ordering; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::env; #[cfg(not(windows))] use std::ffi::CString; @@ -1106,67 +1106,6 @@ fn parse_path_args( Ok((paths, target)) } -/// Get the inode information for a file. -fn get_inode(file_info: &FileInformation) -> u64 { - #[cfg(unix)] - let result = file_info.inode(); - #[cfg(windows)] - let result = file_info.file_index(); - result -} - -#[cfg(target_os = "redox")] -fn preserve_hardlinks( - hard_links: &mut Vec<(String, u64)>, - source: &std::path::Path, - dest: &std::path::Path, - found_hard_link: &mut bool, -) -> CopyResult<()> { - // Redox does not currently support hard links - Ok(()) -} - -/// Hard link a pair of files if needed _and_ record if this pair is a new hard link. -#[cfg(not(target_os = "redox"))] -fn preserve_hardlinks( - hard_links: &mut Vec<(String, u64)>, - source: &std::path::Path, - dest: &std::path::Path, -) -> CopyResult { - let info = FileInformation::from_path(source, false) - .context(format!("cannot stat {}", source.quote()))?; - let inode = get_inode(&info); - let nlinks = info.number_of_links(); - let mut found_hard_link = false; - #[allow(clippy::explicit_iter_loop)] - for (link, link_inode) in hard_links.iter() { - if *link_inode == inode { - // Consider the following files: - // - // * `src/f` - a regular file - // * `src/link` - a hard link to `src/f` - // * `dest/src/f` - a different regular file - // - // In this scenario, if we do `cp -a src/ dest/`, it is - // possible that the order of traversal causes `src/link` - // to get copied first (to `dest/src/link`). In that case, - // in order to make sure `dest/src/link` is a hard link to - // `dest/src/f` and `dest/src/f` has the contents of - // `src/f`, we delete the existing file to allow the hard - // linking. - if file_or_link_exists(dest) && file_or_link_exists(Path::new(link)) { - std::fs::remove_file(dest)?; - } - std::fs::hard_link(link, dest).unwrap(); - found_hard_link = true; - } - } - if !found_hard_link && nlinks > 1 { - hard_links.push((dest.to_str().unwrap().to_string(), inode)); - } - Ok(found_hard_link) -} - /// When handling errors, we don't always want to show them to the user. This function handles that. fn show_error_if_needed(error: &Error) { match error { @@ -1195,14 +1134,19 @@ pub fn copy(sources: &[PathBuf], target: &Path, options: &Options) -> CopyResult let target_type = TargetType::determine(sources, target); verify_target_type(target, &target_type)?; - let preserve_hard_links = options.preserve_hard_links(); - - let mut hard_links: Vec<(String, u64)> = vec![]; - let mut non_fatal_errors = false; let mut seen_sources = HashSet::with_capacity(sources.len()); let mut symlinked_files = HashSet::new(); + // to remember the copied files for further usage. + // the FileInformation implemented the Hash trait by using + // 1. inode number + // 2. device number + // the combination of a file's inode number and device number is unique throughout all the file systems. + // + // key is the source file's information and the value is the destination filepath. + let mut copied_files: HashMap = HashMap::with_capacity(sources.len()); + let progress_bar = if options.progress_bar { let pb = ProgressBar::new(disk_usage(sources, options.recursive)?) .with_style( @@ -1222,28 +1166,19 @@ pub fn copy(sources: &[PathBuf], target: &Path, options: &Options) -> CopyResult if seen_sources.contains(source) { // FIXME: compare sources by the actual file they point to, not their path. (e.g. dir/file == dir/../dir/file in most cases) show_warning!("source {} specified more than once", source.quote()); - } else { - let found_hard_link = if preserve_hard_links && !source.is_dir() { - let dest = construct_dest_path(source, target, &target_type, options)?; - preserve_hardlinks(&mut hard_links, source, &dest)? - } else { - false - }; - if !found_hard_link { - if let Err(error) = copy_source( - &progress_bar, - source, - target, - &target_type, - options, - &mut symlinked_files, - ) { - show_error_if_needed(&error); - non_fatal_errors = true; - } - } - seen_sources.insert(source); + } else if let Err(error) = copy_source( + &progress_bar, + source, + target, + &target_type, + options, + &mut symlinked_files, + &mut copied_files, + ) { + show_error_if_needed(&error); + non_fatal_errors = true; } + seen_sources.insert(source); } if let Some(pb) = progress_bar { @@ -1295,11 +1230,20 @@ fn copy_source( target_type: &TargetType, options: &Options, symlinked_files: &mut HashSet, + copied_files: &mut HashMap, ) -> CopyResult<()> { let source_path = Path::new(&source); if source_path.is_dir() { // Copy as directory - copy_directory(progress_bar, source, target, options, symlinked_files, true) + copy_directory( + progress_bar, + source, + target, + options, + symlinked_files, + copied_files, + true, + ) } else { // Copy as file let dest = construct_dest_path(source_path, target, target_type, options)?; @@ -1309,6 +1253,7 @@ fn copy_source( dest.as_path(), options, symlinked_files, + copied_files, true, ); if options.parents { @@ -1570,6 +1515,24 @@ fn handle_existing_dest( OverwriteMode::Clobber(ClobberMode::RemoveDestination) => { fs::remove_file(dest)?; } + OverwriteMode::Clobber(ClobberMode::Standard) => { + // Consider the following files: + // + // * `src/f` - a regular file + // * `src/link` - a hard link to `src/f` + // * `dest/src/f` - a different regular file + // + // In this scenario, if we do `cp -a src/ dest/`, it is + // possible that the order of traversal causes `src/link` + // to get copied first (to `dest/src/link`). In that case, + // in order to make sure `dest/src/link` is a hard link to + // `dest/src/f` and `dest/src/f` has the contents of + // `src/f`, we delete the existing file to allow the hard + // linking. + if options.preserve_hard_links() { + fs::remove_file(dest)?; + } + } _ => (), }; @@ -1643,6 +1606,7 @@ fn copy_file( dest: &Path, options: &Options, symlinked_files: &mut HashSet, + copied_files: &mut HashMap, source_in_command_line: bool, ) -> CopyResult<()> { if (options.update == UpdateMode::ReplaceIfOlder || options.update == UpdateMode::ReplaceNone) @@ -1686,6 +1650,19 @@ fn copy_file( handle_existing_dest(source, dest, options, source_in_command_line)?; } + if options.preserve_hard_links() { + // if we encounter a matching device/inode pair in the source tree + // we can arrange to create a hard link between the corresponding names + // in the destination tree. + if let Some(new_source) = copied_files.get( + &FileInformation::from_path(source, options.dereference(source_in_command_line)) + .context(format!("cannot stat {}", source.quote()))?, + ) { + std::fs::hard_link(new_source, dest)?; + return Ok(()); + }; + } + if options.verbose { if let Some(pb) = progress_bar { // Suspend (hide) the progress bar so the println won't overlap with the progress bar. @@ -1873,6 +1850,11 @@ fn copy_file( copy_attributes(source, dest, &options.attributes)?; + copied_files.insert( + FileInformation::from_path(source, options.dereference(source_in_command_line))?, + dest.to_path_buf(), + ); + if let Some(progress_bar) = progress_bar { progress_bar.inc(fs::metadata(source)?.len()); } diff --git a/tests/by-util/test_cp.rs b/tests/by-util/test_cp.rs index c77be3d4e..afac4cd3a 100644 --- a/tests/by-util/test_cp.rs +++ b/tests/by-util/test_cp.rs @@ -11,7 +11,7 @@ use std::fs::set_permissions; #[cfg(not(windows))] use std::os::unix::fs; -#[cfg(all(unix, not(target_os = "freebsd")))] +#[cfg(unix)] use std::os::unix::fs::MetadataExt; #[cfg(all(unix, not(target_os = "freebsd")))] use std::os::unix::fs::PermissionsExt; @@ -1353,6 +1353,203 @@ fn test_cp_preserve_xattr_fails_on_android() { .fails(); } +#[test] +// android platform will causing stderr = cp: Permission denied (os error 13) +#[cfg(not(target_os = "android"))] +fn test_cp_preserve_links_case_1() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.touch("a"); + at.hard_link("a", "b"); + at.mkdir("c"); + + ucmd.arg("-d").arg("a").arg("b").arg("c").succeeds(); + + assert!(at.dir_exists("c")); + assert!(at.plus("c").join("a").exists()); + assert!(at.plus("c").join("b").exists()); + + #[cfg(unix)] + { + let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap(); + let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap(); + + assert_eq!(metadata_a.ino(), metadata_b.ino()); + } +} + +#[test] +// android platform will causing stderr = cp: Permission denied (os error 13) +#[cfg(not(target_os = "android"))] +fn test_cp_preserve_links_case_2() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.touch("a"); + at.symlink_file("a", "b"); + at.mkdir("c"); + + ucmd.arg("--preserve=links") + .arg("-R") + .arg("-H") + .arg("a") + .arg("b") + .arg("c") + .succeeds(); + + assert!(at.dir_exists("c")); + assert!(at.plus("c").join("a").exists()); + assert!(at.plus("c").join("b").exists()); + + #[cfg(unix)] + { + let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap(); + let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap(); + + assert_eq!(metadata_a.ino(), metadata_b.ino()); + } +} + +#[test] +// android platform will causing stderr = cp: Permission denied (os error 13) +#[cfg(not(target_os = "android"))] +fn test_cp_preserve_links_case_3() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir("d"); + at.touch("d/a"); + at.symlink_file("d/a", "d/b"); + + ucmd.arg("--preserve=links") + .arg("-R") + .arg("-L") + .arg("d") + .arg("c") + .succeeds(); + + assert!(at.dir_exists("c")); + assert!(at.plus("c").join("a").exists()); + assert!(at.plus("c").join("b").exists()); + + #[cfg(unix)] + { + let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap(); + let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap(); + + assert_eq!(metadata_a.ino(), metadata_b.ino()); + } +} + +#[test] +// android platform will causing stderr = cp: Permission denied (os error 13) +#[cfg(not(target_os = "android"))] +fn test_cp_preserve_links_case_4() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir("d"); + at.touch("d/a"); + at.hard_link("d/a", "d/b"); + + ucmd.arg("--preserve=links") + .arg("-R") + .arg("-L") + .arg("d") + .arg("c") + .succeeds(); + + assert!(at.dir_exists("c")); + assert!(at.plus("c").join("a").exists()); + assert!(at.plus("c").join("b").exists()); + + #[cfg(unix)] + { + let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap(); + let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap(); + + assert_eq!(metadata_a.ino(), metadata_b.ino()); + } +} + +#[test] +// android platform will causing stderr = cp: Permission denied (os error 13) +#[cfg(not(target_os = "android"))] +fn test_cp_preserve_links_case_5() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir("d"); + at.touch("d/a"); + at.hard_link("d/a", "d/b"); + + ucmd.arg("-dR") + .arg("--no-preserve=links") + .arg("d") + .arg("c") + .succeeds(); + + assert!(at.dir_exists("c")); + assert!(at.plus("c").join("a").exists()); + assert!(at.plus("c").join("b").exists()); + + #[cfg(unix)] + { + let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap(); + let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap(); + + assert_eq!(metadata_a.ino(), metadata_b.ino()); + } +} + +#[test] +// android platform will causing stderr = cp: Permission denied (os error 13) +#[cfg(not(target_os = "android"))] +fn test_cp_preserve_links_case_6() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.touch("a"); + at.hard_link("a", "b"); + at.mkdir("c"); + + ucmd.arg("-d").arg("a").arg("b").arg("c").succeeds(); + + assert!(at.dir_exists("c")); + assert!(at.plus("c").join("a").exists()); + assert!(at.plus("c").join("b").exists()); + + #[cfg(unix)] + { + let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap(); + let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap(); + + assert_eq!(metadata_a.ino(), metadata_b.ino()); + } +} + +#[test] +// android platform will causing stderr = cp: Permission denied (os error 13) +#[cfg(not(target_os = "android"))] +fn test_cp_preserve_links_case_7() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir("src"); + at.touch("src/f"); + at.hard_link("src/f", "src/g"); + + at.mkdir("dest"); + at.touch("dest/g"); + + ucmd.arg("-n") + .arg("--preserve=links") + .arg("src/f") + .arg("src/g") + .arg("dest") + .fails() + .stderr_contains("not replacing"); + (); + + assert!(at.dir_exists("dest")); + assert!(at.plus("dest").join("f").exists()); + assert!(at.plus("dest").join("g").exists()); +} + #[test] // For now, disable the test on Windows. Symlinks aren't well support on Windows. // It works on Unix for now and it works locally when run from a powershell