From e9b24b4bc2fd1fde21f870ed97fa64103d6666d6 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 3 Jul 2025 16:28:09 +0200 Subject: [PATCH] mv: improve the hardlink support (#8296) * mv: implement hardlink support Should fix GNU tests/mv/part-hardlink.sh tests/mv/hard-link-1.sh Co-authored-by: Daniel Hofstetter * mv: fix the GNU test - tests/mv/part-fail * make it pass on windows --------- Co-authored-by: Daniel Hofstetter --- src/uu/mv/src/hardlink.rs | 346 ++++++++++++++++++++++ src/uu/mv/src/mv.rs | 356 +++++++++++++++++++---- tests/by-util/test_mv.rs | 598 +++++++++++++++++++++++++++++++++++++- 3 files changed, 1238 insertions(+), 62 deletions(-) create mode 100644 src/uu/mv/src/hardlink.rs diff --git a/src/uu/mv/src/hardlink.rs b/src/uu/mv/src/hardlink.rs new file mode 100644 index 000000000..831b74574 --- /dev/null +++ b/src/uu/mv/src/hardlink.rs @@ -0,0 +1,346 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// spell-checker:ignore hardlinked + +//! Hardlink preservation utilities for mv operations +//! +//! This module provides functionality to preserve hardlink relationships +//! when moving files across different filesystems/partitions. + +use std::collections::HashMap; +use std::io; +use std::path::{Path, PathBuf}; + +/// Tracks hardlinks during cross-partition moves to preserve them +#[derive(Debug, Default)] +pub struct HardlinkTracker { + /// Maps (device, inode) -> destination path for the first occurrence + inode_map: HashMap<(u64, u64), PathBuf>, +} + +/// Pre-scans files to identify hardlink groups with optimized memory usage +#[derive(Debug, Default)] +pub struct HardlinkGroupScanner { + /// Maps (device, inode) -> list of source paths that are hardlinked together + hardlink_groups: HashMap<(u64, u64), Vec>, + /// List of source files/directories being moved (for destination mapping) + source_files: Vec, + /// Whether scanning has been performed + scanned: bool, +} + +/// Configuration options for hardlink preservation +#[derive(Debug, Clone, Default)] +pub struct HardlinkOptions { + /// Whether to show verbose output about hardlink operations + pub verbose: bool, +} + +/// Result type for hardlink operations +pub type HardlinkResult = Result; + +/// Errors that can occur during hardlink operations +#[derive(Debug)] +pub enum HardlinkError { + Io(io::Error), + Scan(String), + Preservation { source: PathBuf, target: PathBuf }, + Metadata { path: PathBuf, error: io::Error }, +} + +impl std::fmt::Display for HardlinkError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HardlinkError::Io(e) => write!(f, "I/O error during hardlink operation: {}", e), + HardlinkError::Scan(msg) => { + write!(f, "Failed to scan files for hardlinks: {}", msg) + } + HardlinkError::Preservation { source, target } => { + write!( + f, + "Failed to preserve hardlink: {} -> {}", + source.display(), + target.display() + ) + } + HardlinkError::Metadata { path, error } => { + write!(f, "Metadata access error for {}: {}", path.display(), error) + } + } + } +} + +impl std::error::Error for HardlinkError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + HardlinkError::Io(e) => Some(e), + HardlinkError::Metadata { error, .. } => Some(error), + _ => None, + } + } +} + +impl From for HardlinkError { + fn from(error: io::Error) -> Self { + HardlinkError::Io(error) + } +} + +impl From for io::Error { + fn from(error: HardlinkError) -> Self { + match error { + HardlinkError::Io(e) => e, + HardlinkError::Scan(msg) => io::Error::other(msg), + HardlinkError::Preservation { source, target } => io::Error::other(format!( + "Failed to preserve hardlink: {} -> {}", + source.display(), + target.display() + )), + + HardlinkError::Metadata { path, error } => io::Error::other(format!( + "Metadata access error for {}: {}", + path.display(), + error + )), + } + } +} + +impl HardlinkTracker { + pub fn new() -> Self { + Self::default() + } + + /// Check if a file is a hardlink we've seen before, and return the target path if so + pub fn check_hardlink( + &mut self, + source: &Path, + dest: &Path, + scanner: &HardlinkGroupScanner, + options: &HardlinkOptions, + ) -> HardlinkResult> { + use std::os::unix::fs::MetadataExt; + + let metadata = match source.metadata() { + Ok(meta) => meta, + Err(e) => { + // Gracefully handle metadata errors by logging and continuing without hardlink tracking + if options.verbose { + eprintln!( + "warning: cannot get metadata for {}: {}", + source.display(), + e + ); + } + return Ok(None); + } + }; + + let key = (metadata.dev(), metadata.ino()); + + // Check if we've already processed a file with this inode + if let Some(existing_path) = self.inode_map.get(&key) { + // Check if this file is part of a hardlink group from the scanner + let has_hardlinks = scanner + .hardlink_groups + .get(&key) + .map(|group| group.len() > 1) + .unwrap_or(false); + + if has_hardlinks { + if options.verbose { + eprintln!( + "preserving hardlink {} -> {} (hardlinked)", + source.display(), + existing_path.display() + ); + } + return Ok(Some(existing_path.clone())); + } + } + + // This is the first time we see this file, record its destination + self.inode_map.insert(key, dest.to_path_buf()); + + Ok(None) + } +} + +impl HardlinkGroupScanner { + pub fn new() -> Self { + Self::default() + } + + /// Scan files and group them by hardlinks, including recursive directory scanning + pub fn scan_files( + &mut self, + files: &[PathBuf], + options: &HardlinkOptions, + ) -> HardlinkResult<()> { + if self.scanned { + return Ok(()); + } + + // Store the source files for destination mapping + self.source_files = files.to_vec(); + + for file in files { + if let Err(e) = self.scan_single_path(file) { + if options.verbose { + // Only show warnings for verbose mode + eprintln!("warning: failed to scan {}: {}", file.display(), e); + } + // For non-verbose mode, silently continue for missing files + // This provides graceful degradation - we'll lose hardlink info for this file + // but can still preserve hardlinks for other files + continue; + } + } + + self.scanned = true; + + if options.verbose { + let stats = self.stats(); + if stats.total_groups > 0 { + eprintln!( + "found {} hardlink groups with {} total files", + stats.total_groups, stats.total_files + ); + } + } + + Ok(()) + } + + /// Scan a single path (file or directory) + fn scan_single_path(&mut self, path: &Path) -> io::Result<()> { + use std::os::unix::fs::MetadataExt; + + if path.is_dir() { + // Recursively scan directory contents + self.scan_directory_recursive(path)?; + } else { + let metadata = path.metadata()?; + if metadata.nlink() > 1 { + let key = (metadata.dev(), metadata.ino()); + self.hardlink_groups + .entry(key) + .or_default() + .push(path.to_path_buf()); + } + } + Ok(()) + } + + /// Recursively scan a directory for hardlinked files + fn scan_directory_recursive(&mut self, dir: &Path) -> io::Result<()> { + use std::os::unix::fs::MetadataExt; + + let entries = std::fs::read_dir(dir)?; + for entry in entries { + let entry = entry?; + let path = entry.path(); + + if path.is_dir() { + self.scan_directory_recursive(&path)?; + } else { + let metadata = path.metadata()?; + if metadata.nlink() > 1 { + let key = (metadata.dev(), metadata.ino()); + self.hardlink_groups.entry(key).or_default().push(path); + } + } + } + Ok(()) + } + + #[cfg(not(unix))] + pub fn scan_files( + &mut self, + files: &[PathBuf], + _options: &HardlinkOptions, + ) -> HardlinkResult<()> { + self.source_files = files.to_vec(); + self.scanned = true; + Ok(()) + } + + #[cfg(not(unix))] + pub fn stats(&self) -> ScannerStats { + ScannerStats { + total_groups: 0, + total_files: 0, + } + } + + /// Get statistics about scanned hardlinks + #[cfg(unix)] + pub fn stats(&self) -> ScannerStats { + let total_groups = self.hardlink_groups.len(); + let total_files = self.hardlink_groups.values().map(|group| group.len()).sum(); + + ScannerStats { + total_groups, + total_files, + } + } +} + +/// Statistics about hardlink scanning +#[derive(Debug, Clone)] +pub struct ScannerStats { + pub total_groups: usize, + pub total_files: usize, +} + +/// Create a new hardlink tracker and scanner pair +pub fn create_hardlink_context() -> (HardlinkTracker, HardlinkGroupScanner) { + (HardlinkTracker::new(), HardlinkGroupScanner::new()) +} + +/// Convenient function to execute operations with proper hardlink context handling +pub fn with_optional_hardlink_context( + tracker: Option<&mut HardlinkTracker>, + scanner: Option<&HardlinkGroupScanner>, + operation: F, +) -> R +where + F: FnOnce(&mut HardlinkTracker, &HardlinkGroupScanner) -> R, +{ + match (tracker, scanner) { + (Some(tracker), Some(scanner)) => operation(tracker, scanner), + _ => { + let (mut dummy_tracker, dummy_scanner) = create_hardlink_context(); + operation(&mut dummy_tracker, &dummy_scanner) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_hardlink_tracker_creation() { + let _tracker = HardlinkTracker::new(); + // Just test that creation works + } + + #[test] + fn test_scanner_creation() { + let scanner = HardlinkGroupScanner::new(); + let stats = scanner.stats(); + assert_eq!(stats.total_groups, 0); + assert_eq!(stats.total_files, 0); + } + + #[test] + fn test_create_hardlink_context() { + let (_tracker, scanner) = create_hardlink_context(); + let stats = scanner.stats(); + assert_eq!(stats.total_groups, 0); + assert_eq!(stats.total_files, 0); + } +} diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index 38ad150c7..da019139d 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -6,6 +6,8 @@ // spell-checker:ignore (ToDO) sourcepath targetpath nushell canonicalized mod error; +#[cfg(unix)] +mod hardlink; use clap::builder::ValueParser; use clap::{Arg, ArgAction, ArgMatches, Command, error::ErrorKind}; @@ -24,6 +26,11 @@ use std::os::unix::fs::FileTypeExt; use std::os::windows; use std::path::{Path, PathBuf, absolute}; +#[cfg(unix)] +use crate::hardlink::{ + HardlinkGroupScanner, HardlinkOptions, HardlinkTracker, create_hardlink_context, + with_optional_hardlink_context, +}; use uucore::backup_control::{self, source_is_target_backup}; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError, UUsageError, set_exit_code}; @@ -42,10 +49,7 @@ use uucore::update_control; pub use uucore::{backup_control::BackupMode, update_control::UpdateMode}; use uucore::{format_usage, prompt_yes, show}; -use fs_extra::dir::{ - CopyOptions as DirCopyOptions, TransitProcess, TransitProcessResult, get_size as dir_get_size, - move_dir, move_dir_with_progress, -}; +use fs_extra::dir::get_size as dir_get_size; use crate::error::MvError; use uucore::locale::{get_message, get_message_with_args}; @@ -357,7 +361,22 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()> if target_is_dir { if opts.no_target_dir { if source.is_dir() { - rename(source, target, opts, None).map_err_context(|| { + #[cfg(unix)] + let (mut hardlink_tracker, hardlink_scanner) = create_hardlink_context(); + #[cfg(unix)] + let hardlink_params = (Some(&mut hardlink_tracker), Some(&hardlink_scanner)); + #[cfg(not(unix))] + let hardlink_params = (None, None); + + rename( + source, + target, + opts, + None, + hardlink_params.0, + hardlink_params.1, + ) + .map_err_context(|| { get_message_with_args( "mv-error-cannot-move", HashMap::from([ @@ -394,7 +413,22 @@ fn handle_two_paths(source: &Path, target: &Path, opts: &Options) -> UResult<()> ) .into()) } else { - rename(source, target, opts, None).map_err(|e| USimpleError::new(1, format!("{e}"))) + #[cfg(unix)] + let (mut hardlink_tracker, hardlink_scanner) = create_hardlink_context(); + #[cfg(unix)] + let hardlink_params = (Some(&mut hardlink_tracker), Some(&hardlink_scanner)); + #[cfg(not(unix))] + let hardlink_params = (None, None); + + rename( + source, + target, + opts, + None, + hardlink_params.0, + hardlink_params.1, + ) + .map_err(|e| USimpleError::new(1, format!("{e}"))) } } @@ -516,6 +550,33 @@ pub fn mv(files: &[OsString], opts: &Options) -> UResult<()> { fn move_files_into_dir(files: &[PathBuf], target_dir: &Path, options: &Options) -> UResult<()> { // remember the moved destinations for further usage let mut moved_destinations: HashSet = HashSet::with_capacity(files.len()); + // Create hardlink tracking context + #[cfg(unix)] + let (mut hardlink_tracker, hardlink_scanner) = { + let (tracker, mut scanner) = create_hardlink_context(); + + // Use hardlink options + let hardlink_options = HardlinkOptions { + verbose: options.verbose || options.debug, + }; + + // Pre-scan files if needed + if let Err(e) = scanner.scan_files(files, &hardlink_options) { + if hardlink_options.verbose { + eprintln!("mv: warning: failed to scan files for hardlinks: {}", e); + eprintln!("mv: continuing without hardlink preservation"); + } else { + // Show warning in non-verbose mode for serious errors + eprintln!( + "mv: warning: hardlink scanning failed, continuing without hardlink preservation" + ); + } + // Continue without hardlink tracking on scan failure + // This provides graceful degradation rather than failing completely + } + + (tracker, scanner) + }; if !target_dir.is_dir() { return Err(MvError::NotADirectory(target_dir.quote().to_string()).into()); @@ -550,7 +611,8 @@ fn move_files_into_dir(files: &[PathBuf], target_dir: &Path, options: &Options) } if let Some(ref pb) = count_progress { - pb.set_message(sourcepath.to_string_lossy().to_string()); + let msg = format!("{} (scanning hardlinks)", sourcepath.to_string_lossy()); + pb.set_message(msg); } let targetpath = match sourcepath.file_name() { @@ -583,7 +645,19 @@ fn move_files_into_dir(files: &[PathBuf], target_dir: &Path, options: &Options) continue; } - match rename(sourcepath, &targetpath, options, multi_progress.as_ref()) { + #[cfg(unix)] + let hardlink_params = (Some(&mut hardlink_tracker), Some(&hardlink_scanner)); + #[cfg(not(unix))] + let hardlink_params = (None, None); + + match rename( + sourcepath, + &targetpath, + options, + multi_progress.as_ref(), + hardlink_params.0, + hardlink_params.1, + ) { Err(e) if e.to_string().is_empty() => set_exit_code(1), Err(e) => { let e = e.map_err_context(|| { @@ -615,6 +689,10 @@ fn rename( to: &Path, opts: &Options, multi_progress: Option<&MultiProgress>, + #[cfg(unix)] hardlink_tracker: Option<&mut HardlinkTracker>, + #[cfg(unix)] hardlink_scanner: Option<&HardlinkGroupScanner>, + #[cfg(not(unix))] _hardlink_tracker: Option<()>, + #[cfg(not(unix))] _hardlink_scanner: Option<()>, ) -> io::Result<()> { let mut backup_path = None; @@ -675,7 +753,8 @@ fn rename( backup_path = backup_control::get_backup_path(opts.backup, to, &opts.suffix); if let Some(ref backup_path) = backup_path { - rename_with_fallback(to, backup_path, multi_progress)?; + // For backup renames, we don't need to track hardlinks as we're just moving the existing file + rename_with_fallback(to, backup_path, multi_progress, None, None)?; } } @@ -693,7 +772,14 @@ fn rename( } } - rename_with_fallback(from, to, multi_progress)?; + #[cfg(unix)] + { + rename_with_fallback(from, to, multi_progress, hardlink_tracker, hardlink_scanner)?; + } + #[cfg(not(unix))] + { + rename_with_fallback(from, to, multi_progress, None, None)?; + } if opts.verbose { let message = match backup_path { @@ -740,6 +826,10 @@ fn rename_with_fallback( from: &Path, to: &Path, multi_progress: Option<&MultiProgress>, + #[cfg(unix)] hardlink_tracker: Option<&mut HardlinkTracker>, + #[cfg(unix)] hardlink_scanner: Option<&HardlinkGroupScanner>, + #[cfg(not(unix))] _hardlink_tracker: Option<()>, + #[cfg(not(unix))] _hardlink_scanner: Option<()>, ) -> io::Result<()> { fs::rename(from, to).or_else(|err| { #[cfg(windows)] @@ -762,11 +852,35 @@ fn rename_with_fallback( if file_type.is_symlink() { rename_symlink_fallback(from, to) } else if file_type.is_dir() { - rename_dir_fallback(from, to, multi_progress) + #[cfg(unix)] + { + with_optional_hardlink_context( + hardlink_tracker, + hardlink_scanner, + |tracker, scanner| { + rename_dir_fallback(from, to, multi_progress, Some(tracker), Some(scanner)) + }, + ) + } + #[cfg(not(unix))] + { + rename_dir_fallback(from, to, multi_progress) + } } else if is_fifo(file_type) { rename_fifo_fallback(from, to) } else { - rename_file_fallback(from, to) + #[cfg(unix)] + { + with_optional_hardlink_context( + hardlink_tracker, + hardlink_scanner, + |tracker, scanner| rename_file_fallback(from, to, Some(tracker), Some(scanner)), + ) + } + #[cfg(not(unix))] + { + rename_file_fallback(from, to) + } } }) } @@ -824,6 +938,8 @@ fn rename_dir_fallback( from: &Path, to: &Path, multi_progress: Option<&MultiProgress>, + #[cfg(unix)] hardlink_tracker: Option<&mut HardlinkTracker>, + #[cfg(unix)] hardlink_scanner: Option<&HardlinkGroupScanner>, ) -> io::Result<()> { // We remove the destination directory if it exists to match the // behavior of `fs::rename`. As far as I can tell, `fs_extra`'s @@ -831,13 +947,6 @@ fn rename_dir_fallback( if to.exists() { fs::remove_dir_all(to)?; } - let options = DirCopyOptions { - // From the `fs_extra` documentation: - // "Recursively copy a directory with a new name or place it - // inside the destination. (same behaviors like cp -r in Unix)" - copy_inside: true, - ..DirCopyOptions::new() - }; // Calculate total size of directory // Silently degrades: @@ -859,55 +968,194 @@ fn rename_dir_fallback( #[cfg(all(unix, not(any(target_os = "macos", target_os = "redox"))))] let xattrs = fsxattr::retrieve_xattrs(from).unwrap_or_else(|_| HashMap::new()); - let result = if let Some(ref pb) = progress_bar { - move_dir_with_progress(from, to, &options, |process_info: TransitProcess| { - pb.set_position(process_info.copied_bytes); - pb.set_message(process_info.file_name); - TransitProcessResult::ContinueOrAbort - }) - } else { - move_dir(from, to, &options) - }; + // Use directory copying (with or without hardlink support) + let result = copy_dir_contents( + from, + to, + #[cfg(unix)] + hardlink_tracker, + #[cfg(unix)] + hardlink_scanner, + progress_bar.as_ref(), + ); #[cfg(all(unix, not(any(target_os = "macos", target_os = "redox"))))] fsxattr::apply_xattrs(to, xattrs)?; - match result { - Err(err) => match err.kind { - fs_extra::error::ErrorKind::PermissionDenied => Err(io::Error::new( - io::ErrorKind::PermissionDenied, - get_message("mv-error-permission-denied"), - )), - _ => Err(io::Error::other(format!("{err:?}"))), - }, - _ => Ok(()), - } + result?; + + // Remove the source directory after successful copy + fs::remove_dir_all(from)?; + + Ok(()) } -fn rename_file_fallback(from: &Path, to: &Path) -> io::Result<()> { +/// Copy directory recursively, optionally preserving hardlinks +fn copy_dir_contents( + from: &Path, + to: &Path, + #[cfg(unix)] hardlink_tracker: Option<&mut HardlinkTracker>, + #[cfg(unix)] hardlink_scanner: Option<&HardlinkGroupScanner>, + progress_bar: Option<&ProgressBar>, +) -> io::Result<()> { + // Create the destination directory + fs::create_dir_all(to)?; + + // Recursively copy contents + #[cfg(unix)] + { + if let (Some(tracker), Some(scanner)) = (hardlink_tracker, hardlink_scanner) { + copy_dir_contents_recursive(from, to, tracker, scanner, progress_bar)?; + } + } + #[cfg(not(unix))] + { + copy_dir_contents_recursive(from, to, progress_bar)?; + } + + Ok(()) +} + +fn copy_dir_contents_recursive( + from_dir: &Path, + to_dir: &Path, + #[cfg(unix)] hardlink_tracker: &mut HardlinkTracker, + #[cfg(unix)] hardlink_scanner: &HardlinkGroupScanner, + progress_bar: Option<&ProgressBar>, +) -> io::Result<()> { + let entries = fs::read_dir(from_dir)?; + + for entry in entries { + let entry = entry?; + let from_path = entry.path(); + let file_name = from_path.file_name().unwrap(); + let to_path = to_dir.join(file_name); + + if let Some(pb) = progress_bar { + pb.set_message(from_path.to_string_lossy().to_string()); + } + + if from_path.is_dir() { + // Recursively copy subdirectory + fs::create_dir_all(&to_path)?; + copy_dir_contents_recursive( + &from_path, + &to_path, + #[cfg(unix)] + hardlink_tracker, + #[cfg(unix)] + hardlink_scanner, + progress_bar, + )?; + } else { + // Copy file with or without hardlink support based on platform + #[cfg(unix)] + { + copy_file_with_hardlinks_helper( + &from_path, + &to_path, + hardlink_tracker, + hardlink_scanner, + )?; + } + #[cfg(not(unix))] + { + fs::copy(&from_path, &to_path)?; + } + } + + if let Some(pb) = progress_bar { + if let Ok(metadata) = from_path.metadata() { + pb.inc(metadata.len()); + } + } + } + + Ok(()) +} + +#[cfg(unix)] +fn copy_file_with_hardlinks_helper( + from: &Path, + to: &Path, + hardlink_tracker: &mut HardlinkTracker, + hardlink_scanner: &HardlinkGroupScanner, +) -> io::Result<()> { + // Check if this file should be a hardlink to an already-copied file + use crate::hardlink::HardlinkOptions; + let hardlink_options = HardlinkOptions::default(); + // Create a hardlink instead of copying + if let Some(existing_target) = + hardlink_tracker.check_hardlink(from, to, hardlink_scanner, &hardlink_options)? + { + fs::hard_link(&existing_target, to)?; + return Ok(()); + } + + // Regular file copy + #[cfg(all(unix, not(any(target_os = "macos", target_os = "redox"))))] + { + fs::copy(from, to).and_then(|_| fsxattr::copy_xattrs(&from, &to))?; + } + #[cfg(any(target_os = "macos", target_os = "redox"))] + { + fs::copy(from, to)?; + } + + Ok(()) +} + +fn rename_file_fallback( + from: &Path, + to: &Path, + #[cfg(unix)] hardlink_tracker: Option<&mut HardlinkTracker>, + #[cfg(unix)] hardlink_scanner: Option<&HardlinkGroupScanner>, +) -> io::Result<()> { + // Remove existing target file if it exists if to.is_symlink() { fs::remove_file(to).map_err(|err| { - let to = to.to_string_lossy(); - let from = from.to_string_lossy(); - io::Error::new( - err.kind(), - get_message_with_args( - "mv-error-inter-device-move-failed", - HashMap::from([ - ("from".to_string(), from.to_string()), - ("to".to_string(), to.to_string()), - ("err".to_string(), err.to_string()), - ]), - ), - ) + let inter_device_msg = get_message_with_args( + "mv-error-inter-device-move-failed", + HashMap::from([ + ("from".to_string(), from.display().to_string()), + ("to".to_string(), to.display().to_string()), + ("err".to_string(), err.to_string()), + ]), + ); + io::Error::new(err.kind(), inter_device_msg) })?; + } else if to.exists() { + // For non-symlinks, just remove the file without special error handling + fs::remove_file(to)?; } + + // Check if this file is part of a hardlink group and if so, create a hardlink instead of copying + #[cfg(unix)] + { + if let (Some(tracker), Some(scanner)) = (hardlink_tracker, hardlink_scanner) { + use crate::hardlink::HardlinkOptions; + let hardlink_options = HardlinkOptions::default(); + if let Some(existing_target) = + tracker.check_hardlink(from, to, scanner, &hardlink_options)? + { + // Create a hardlink to the first moved file instead of copying + fs::hard_link(&existing_target, to)?; + fs::remove_file(from)?; + return Ok(()); + } + } + } + + // Regular file copy #[cfg(all(unix, not(any(target_os = "macos", target_os = "redox"))))] fs::copy(from, to) .and_then(|_| fsxattr::copy_xattrs(&from, &to)) - .and_then(|_| fs::remove_file(from))?; + .and_then(|_| fs::remove_file(from)) + .map_err(|err| io::Error::new(err.kind(), get_message("mv-error-permission-denied")))?; #[cfg(any(target_os = "macos", target_os = "redox", not(unix)))] - fs::copy(from, to).and_then(|_| fs::remove_file(from))?; + fs::copy(from, to) + .and_then(|_| fs::remove_file(from)) + .map_err(|err| io::Error::new(err.kind(), get_message("mv-error-permission-denied")))?; Ok(()) } diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index b20c4b2b6..7b984cfde 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -3,7 +3,8 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // -// spell-checker:ignore mydir +// spell-checker:ignore mydir hardlinked tmpfs + use filetime::FileTime; use rstest::rstest; use std::io::Write; @@ -1845,24 +1846,17 @@ mod inter_partition_copying { let scene = TestScenario::new(util_name!()); let at = &scene.fixtures; - // create a file in the current partition. at.write("src", "src contents"); - // create a folder in another partition. let other_fs_tempdir = TempDir::new_in("/dev/shm/").expect("Unable to create temp directory"); - - // create a file inside that folder. let other_fs_file_path = other_fs_tempdir.path().join("other_fs_file"); write(&other_fs_file_path, "other fs file contents") .expect("Unable to write to other_fs_file"); - // create a symlink to the file inside the same directory. let symlink_path = other_fs_tempdir.path().join("symlink_to_file"); symlink(&other_fs_file_path, &symlink_path).expect("Unable to create symlink_to_file"); - // disable write for the target folder so that when mv tries to remove the - // the destination symlink inside the target directory it would fail. set_permissions(other_fs_tempdir.path(), PermissionsExt::from_mode(0o555)) .expect("Unable to set permissions for temp directory"); @@ -1875,6 +1869,459 @@ mod inter_partition_copying { .stderr_contains("inter-device move failed:") .stderr_contains("Permission denied"); } + + // Test that hardlinks are preserved when moving files across partitions + #[test] + #[cfg(unix)] + pub(crate) fn test_mv_preserves_hardlinks_across_partitions() { + use std::fs::metadata; + use std::os::unix::fs::MetadataExt; + use tempfile::TempDir; + use uutests::util::TestScenario; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("file1", "test content"); + at.hard_link("file1", "file2"); + + let metadata1 = metadata(at.plus("file1")).expect("Failed to get metadata for file1"); + let metadata2 = metadata(at.plus("file2")).expect("Failed to get metadata for file2"); + assert_eq!( + metadata1.ino(), + metadata2.ino(), + "Files should have same inode before move" + ); + assert_eq!( + metadata1.nlink(), + 2, + "Files should have nlink=2 before move" + ); + + // Create a target directory in another partition (using /dev/shm which is typically tmpfs) + let other_fs_tempdir = TempDir::new_in("/dev/shm/") + .expect("Unable to create temp directory in /dev/shm - test requires tmpfs"); + + scene + .ucmd() + .arg("file1") + .arg("file2") + .arg(other_fs_tempdir.path().to_str().unwrap()) + .succeeds(); + + assert!(!at.file_exists("file1"), "file1 should not exist in source"); + assert!(!at.file_exists("file2"), "file2 should not exist in source"); + + let moved_file1 = other_fs_tempdir.path().join("file1"); + let moved_file2 = other_fs_tempdir.path().join("file2"); + assert!(moved_file1.exists(), "file1 should exist in destination"); + assert!(moved_file2.exists(), "file2 should exist in destination"); + + let moved_metadata1 = + metadata(&moved_file1).expect("Failed to get metadata for moved file1"); + let moved_metadata2 = + metadata(&moved_file2).expect("Failed to get metadata for moved file2"); + + assert_eq!( + moved_metadata1.ino(), + moved_metadata2.ino(), + "Files should have same inode after cross-partition move (hardlinks preserved)" + ); + assert_eq!( + moved_metadata1.nlink(), + 2, + "Files should have nlink=2 after cross-partition move" + ); + + // Verify content is preserved + assert_eq!( + std::fs::read_to_string(&moved_file1).expect("Failed to read moved file1"), + "test content" + ); + assert_eq!( + std::fs::read_to_string(&moved_file2).expect("Failed to read moved file2"), + "test content" + ); + } + + // Test that hardlinks are preserved even with multiple sets of hardlinked files + #[test] + #[cfg(unix)] + #[allow(clippy::too_many_lines)] + #[allow(clippy::similar_names)] + pub(crate) fn test_mv_preserves_multiple_hardlink_groups_across_partitions() { + use std::fs::metadata; + use std::os::unix::fs::MetadataExt; + use tempfile::TempDir; + use uutests::util::TestScenario; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("group1_file1", "content group 1"); + at.hard_link("group1_file1", "group1_file2"); + + at.write("group2_file1", "content group 2"); + at.hard_link("group2_file1", "group2_file2"); + + at.write("single_file", "single file content"); + + let g1f1_meta = metadata(at.plus("group1_file1")).unwrap(); + let g1f2_meta = metadata(at.plus("group1_file2")).unwrap(); + let g2f1_meta = metadata(at.plus("group2_file1")).unwrap(); + let g2f2_meta = metadata(at.plus("group2_file2")).unwrap(); + let single_meta = metadata(at.plus("single_file")).unwrap(); + + assert_eq!( + g1f1_meta.ino(), + g1f2_meta.ino(), + "Group 1 files should have same inode" + ); + assert_eq!( + g2f1_meta.ino(), + g2f2_meta.ino(), + "Group 2 files should have same inode" + ); + assert_ne!( + g1f1_meta.ino(), + g2f1_meta.ino(), + "Different groups should have different inodes" + ); + assert_eq!(single_meta.nlink(), 1, "Single file should have nlink=1"); + + let other_fs_tempdir = + TempDir::new_in("/dev/shm/").expect("Unable to create temp directory in /dev/shm"); + + scene + .ucmd() + .arg("group1_file1") + .arg("group1_file2") + .arg("group2_file1") + .arg("group2_file2") + .arg("single_file") + .arg(other_fs_tempdir.path().to_str().unwrap()) + .succeeds(); + + // Verify hardlinks are preserved for both groups + let moved_g1f1 = other_fs_tempdir.path().join("group1_file1"); + let moved_g1f2 = other_fs_tempdir.path().join("group1_file2"); + let moved_g2f1 = other_fs_tempdir.path().join("group2_file1"); + let moved_g2f2 = other_fs_tempdir.path().join("group2_file2"); + let moved_single = other_fs_tempdir.path().join("single_file"); + + let moved_g1f1_meta = metadata(&moved_g1f1).unwrap(); + let moved_g1f2_meta = metadata(&moved_g1f2).unwrap(); + let moved_g2f1_meta = metadata(&moved_g2f1).unwrap(); + let moved_g2f2_meta = metadata(&moved_g2f2).unwrap(); + let moved_single_meta = metadata(&moved_single).unwrap(); + + assert_eq!( + moved_g1f1_meta.ino(), + moved_g1f2_meta.ino(), + "Group 1 files should still be hardlinked after move" + ); + assert_eq!( + moved_g1f1_meta.nlink(), + 2, + "Group 1 files should have nlink=2" + ); + + assert_eq!( + moved_g2f1_meta.ino(), + moved_g2f2_meta.ino(), + "Group 2 files should still be hardlinked after move" + ); + assert_eq!( + moved_g2f1_meta.nlink(), + 2, + "Group 2 files should have nlink=2" + ); + + assert_ne!( + moved_g1f1_meta.ino(), + moved_g2f1_meta.ino(), + "Different groups should still have different inodes" + ); + + assert_eq!( + moved_single_meta.nlink(), + 1, + "Single file should still have nlink=1" + ); + + assert_eq!( + std::fs::read_to_string(&moved_g1f1).unwrap(), + "content group 1" + ); + assert_eq!( + std::fs::read_to_string(&moved_g1f2).unwrap(), + "content group 1" + ); + assert_eq!( + std::fs::read_to_string(&moved_g2f1).unwrap(), + "content group 2" + ); + assert_eq!( + std::fs::read_to_string(&moved_g2f2).unwrap(), + "content group 2" + ); + assert_eq!( + std::fs::read_to_string(&moved_single).unwrap(), + "single file content" + ); + } + + // Test the exact GNU test scenario: hardlinks within directories being moved + #[test] + #[cfg(unix)] + pub(crate) fn test_mv_preserves_hardlinks_in_directories_across_partitions() { + use std::fs::metadata; + use std::os::unix::fs::MetadataExt; + use tempfile::TempDir; + use uutests::util::TestScenario; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("f", "file content"); + at.hard_link("f", "g"); + + at.mkdir("a"); + at.mkdir("b"); + at.write("a/1", "directory file content"); + at.hard_link("a/1", "b/1"); + + let f_meta = metadata(at.plus("f")).unwrap(); + let g_meta = metadata(at.plus("g")).unwrap(); + let a1_meta = metadata(at.plus("a/1")).unwrap(); + let b1_meta = metadata(at.plus("b/1")).unwrap(); + + assert_eq!( + f_meta.ino(), + g_meta.ino(), + "f and g should have same inode before move" + ); + assert_eq!(f_meta.nlink(), 2, "f should have nlink=2 before move"); + assert_eq!( + a1_meta.ino(), + b1_meta.ino(), + "a/1 and b/1 should have same inode before move" + ); + assert_eq!(a1_meta.nlink(), 2, "a/1 should have nlink=2 before move"); + + let other_fs_tempdir = + TempDir::new_in("/dev/shm/").expect("Unable to create temp directory in /dev/shm"); + + scene + .ucmd() + .arg("f") + .arg("g") + .arg(other_fs_tempdir.path().to_str().unwrap()) + .succeeds(); + + scene + .ucmd() + .arg("a") + .arg("b") + .arg(other_fs_tempdir.path().to_str().unwrap()) + .succeeds(); + + let moved_f = other_fs_tempdir.path().join("f"); + let moved_g = other_fs_tempdir.path().join("g"); + let moved_f_metadata = metadata(&moved_f).unwrap(); + let moved_second_file_metadata = metadata(&moved_g).unwrap(); + + assert_eq!( + moved_f_metadata.ino(), + moved_second_file_metadata.ino(), + "f and g should have same inode after cross-partition move" + ); + assert_eq!( + moved_f_metadata.nlink(), + 2, + "f should have nlink=2 after move" + ); + + // Verify directory files' hardlinks are preserved (the main test) + let moved_dir_a_file = other_fs_tempdir.path().join("a/1"); + let moved_dir_second_file = other_fs_tempdir.path().join("b/1"); + let moved_dir_a_file_metadata = metadata(&moved_dir_a_file).unwrap(); + let moved_dir_second_file_metadata = metadata(&moved_dir_second_file).unwrap(); + + assert_eq!( + moved_dir_a_file_metadata.ino(), + moved_dir_second_file_metadata.ino(), + "a/1 and b/1 should have same inode after cross-partition directory move (hardlinks preserved)" + ); + assert_eq!( + moved_dir_a_file_metadata.nlink(), + 2, + "a/1 should have nlink=2 after move" + ); + + assert_eq!(std::fs::read_to_string(&moved_f).unwrap(), "file content"); + assert_eq!(std::fs::read_to_string(&moved_g).unwrap(), "file content"); + assert_eq!( + std::fs::read_to_string(&moved_dir_a_file).unwrap(), + "directory file content" + ); + assert_eq!( + std::fs::read_to_string(&moved_dir_second_file).unwrap(), + "directory file content" + ); + } + + // Test complex scenario with multiple hardlink groups across nested directories + #[test] + #[cfg(unix)] + #[allow(clippy::too_many_lines)] + #[allow(clippy::similar_names)] + pub(crate) fn test_mv_preserves_complex_hardlinks_across_nested_directories() { + use std::fs::metadata; + use std::os::unix::fs::MetadataExt; + use tempfile::TempDir; + use uutests::util::TestScenario; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.mkdir("dir1"); + at.mkdir("dir1/subdir1"); + at.mkdir("dir1/subdir2"); + at.mkdir("dir2"); + at.mkdir("dir2/subdir1"); + + at.write("dir1/subdir1/file_a", "content A"); + at.hard_link("dir1/subdir1/file_a", "dir1/subdir2/file_a_link1"); + at.hard_link("dir1/subdir1/file_a", "dir2/subdir1/file_a_link2"); + + at.write("dir1/file_b", "content B"); + at.hard_link("dir1/file_b", "dir2/file_b_link"); + + at.write("dir1/subdir1/nested_file", "nested content"); + at.hard_link("dir1/subdir1/nested_file", "dir1/subdir2/nested_file_link"); + + let orig_file_a_metadata = metadata(at.plus("dir1/subdir1/file_a")).unwrap(); + let orig_file_a_link1_metadata = metadata(at.plus("dir1/subdir2/file_a_link1")).unwrap(); + let orig_file_a_link2_metadata = metadata(at.plus("dir2/subdir1/file_a_link2")).unwrap(); + + assert_eq!(orig_file_a_metadata.ino(), orig_file_a_link1_metadata.ino()); + assert_eq!(orig_file_a_metadata.ino(), orig_file_a_link2_metadata.ino()); + assert_eq!( + orig_file_a_metadata.nlink(), + 3, + "file_a group should have nlink=3" + ); + + let orig_file_b_metadata = metadata(at.plus("dir1/file_b")).unwrap(); + let orig_file_b_link_metadata = metadata(at.plus("dir2/file_b_link")).unwrap(); + assert_eq!(orig_file_b_metadata.ino(), orig_file_b_link_metadata.ino()); + assert_eq!( + orig_file_b_metadata.nlink(), + 2, + "file_b group should have nlink=2" + ); + + let nested_meta = metadata(at.plus("dir1/subdir1/nested_file")).unwrap(); + let nested_link_meta = metadata(at.plus("dir1/subdir2/nested_file_link")).unwrap(); + assert_eq!(nested_meta.ino(), nested_link_meta.ino()); + assert_eq!( + nested_meta.nlink(), + 2, + "nested file group should have nlink=2" + ); + + let other_fs_tempdir = + TempDir::new_in("/dev/shm/").expect("Unable to create temp directory in /dev/shm"); + + scene + .ucmd() + .arg("dir1") + .arg("dir2") + .arg(other_fs_tempdir.path().to_str().unwrap()) + .succeeds(); + + let moved_file_a = other_fs_tempdir.path().join("dir1/subdir1/file_a"); + let moved_file_a_link1 = other_fs_tempdir.path().join("dir1/subdir2/file_a_link1"); + let moved_file_a_link2 = other_fs_tempdir.path().join("dir2/subdir1/file_a_link2"); + + let final_file_a_metadata = metadata(&moved_file_a).unwrap(); + let final_file_a_link1_metadata = metadata(&moved_file_a_link1).unwrap(); + let final_file_a_link2_metadata = metadata(&moved_file_a_link2).unwrap(); + + assert_eq!( + final_file_a_metadata.ino(), + final_file_a_link1_metadata.ino(), + "file_a hardlinks should be preserved" + ); + assert_eq!( + final_file_a_metadata.ino(), + final_file_a_link2_metadata.ino(), + "file_a hardlinks should be preserved across directories" + ); + assert_eq!( + final_file_a_metadata.nlink(), + 3, + "file_a group should still have nlink=3" + ); + + let moved_file_b = other_fs_tempdir.path().join("dir1/file_b"); + let moved_file_b_hardlink = other_fs_tempdir.path().join("dir2/file_b_link"); + let final_file_b_metadata = metadata(&moved_file_b).unwrap(); + let final_file_b_hardlink_metadata = metadata(&moved_file_b_hardlink).unwrap(); + + assert_eq!( + final_file_b_metadata.ino(), + final_file_b_hardlink_metadata.ino(), + "file_b hardlinks should be preserved" + ); + assert_eq!( + final_file_b_metadata.nlink(), + 2, + "file_b group should still have nlink=2" + ); + + let moved_nested = other_fs_tempdir.path().join("dir1/subdir1/nested_file"); + let moved_nested_link = other_fs_tempdir + .path() + .join("dir1/subdir2/nested_file_link"); + let moved_nested_meta = metadata(&moved_nested).unwrap(); + let moved_nested_link_meta = metadata(&moved_nested_link).unwrap(); + + assert_eq!( + moved_nested_meta.ino(), + moved_nested_link_meta.ino(), + "nested file hardlinks should be preserved" + ); + assert_eq!( + moved_nested_meta.nlink(), + 2, + "nested file group should still have nlink=2" + ); + + assert_eq!(std::fs::read_to_string(&moved_file_a).unwrap(), "content A"); + assert_eq!( + std::fs::read_to_string(&moved_file_a_link1).unwrap(), + "content A" + ); + assert_eq!( + std::fs::read_to_string(&moved_file_a_link2).unwrap(), + "content A" + ); + assert_eq!(std::fs::read_to_string(&moved_file_b).unwrap(), "content B"); + assert_eq!( + std::fs::read_to_string(&moved_file_b_hardlink).unwrap(), + "content B" + ); + assert_eq!( + std::fs::read_to_string(&moved_nested).unwrap(), + "nested content" + ); + assert_eq!( + std::fs::read_to_string(&moved_nested_link).unwrap(), + "nested content" + ); + } } #[test] @@ -1892,6 +2339,97 @@ fn test_mv_error_msg_with_multiple_sources_that_does_not_exist() { .stderr_contains("mv: cannot stat 'b/': No such file or directory"); } +// Tests for hardlink preservation (now always enabled) +#[test] +#[cfg(all(unix, not(target_os = "android")))] +fn test_mv_hardlink_preservation() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.write("file1", "test content"); + at.hard_link("file1", "file2"); + at.mkdir("target"); + + ucmd.arg("file1") + .arg("file2") + .arg("target") + .succeeds() + .no_stderr(); + + assert!(at.file_exists("target/file1")); + assert!(at.file_exists("target/file2")); +} + +#[test] +#[cfg(all(unix, not(target_os = "android")))] +fn test_mv_hardlink_progress_indication() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.write("file1", "content1"); + at.write("file2", "content2"); + at.hard_link("file1", "file1_link"); + + at.mkdir("target"); + + // Test with progress bar and verbose mode + ucmd.arg("--progress") + .arg("--verbose") + .arg("file1") + .arg("file1_link") + .arg("file2") + .arg("target") + .succeeds(); + + // Verify all files were moved + assert!(at.file_exists("target/file1")); + assert!(at.file_exists("target/file1_link")); + assert!(at.file_exists("target/file2")); +} + +#[test] +#[cfg(all(unix, not(target_os = "android")))] +fn test_mv_mixed_hardlinks_and_regular_files() { + use std::fs::metadata; + use std::os::unix::fs::MetadataExt; + + let (at, mut ucmd) = at_and_ucmd!(); + + // Create a mix of hardlinked and regular files + at.write("hardlink1", "hardlink content"); + at.hard_link("hardlink1", "hardlink2"); + at.write("regular1", "regular content"); + at.write("regular2", "regular content 2"); + + at.mkdir("target"); + + // Move all files (hardlinks automatically preserved) + ucmd.arg("hardlink1") + .arg("hardlink2") + .arg("regular1") + .arg("regular2") + .arg("target") + .succeeds(); + + // Verify all files moved + assert!(at.file_exists("target/hardlink1")); + assert!(at.file_exists("target/hardlink2")); + assert!(at.file_exists("target/regular1")); + assert!(at.file_exists("target/regular2")); + + // Verify hardlinks are preserved (on same filesystem) + let h1_meta = metadata(at.plus("target/hardlink1")).unwrap(); + let h2_meta = metadata(at.plus("target/hardlink2")).unwrap(); + let r1_meta = metadata(at.plus("target/regular1")).unwrap(); + let r2_meta = metadata(at.plus("target/regular2")).unwrap(); + + // Hardlinked files should have same inode if on same filesystem + if h1_meta.dev() == h2_meta.dev() { + assert_eq!(h1_meta.ino(), h2_meta.ino()); + } + + // Regular files should have different inodes + assert_ne!(r1_meta.ino(), r2_meta.ino()); +} + #[cfg(not(windows))] #[ignore = "requires access to a different filesystem"] #[test] @@ -1906,3 +2444,47 @@ fn test_special_file_different_filesystem() { assert!(Path::new("/dev/shm/tmp/f").exists()); std::fs::remove_dir_all("/dev/shm/tmp").unwrap(); } + +/// Test cross-device move with permission denied error +/// This test mimics the scenario from the GNU part-fail test where +/// a cross-device move fails due to permission errors when removing the target file +#[test] +#[cfg(target_os = "linux")] +fn test_mv_cross_device_permission_denied() { + use std::fs::{set_permissions, write}; + use std::os::unix::fs::PermissionsExt; + use tempfile::TempDir; + use uutests::util::TestScenario; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.write("k", "source content"); + + let other_fs_tempdir = + TempDir::new_in("/dev/shm/").expect("Unable to create temp directory in /dev/shm"); + + let target_file_path = other_fs_tempdir.path().join("k"); + write(&target_file_path, "target content").expect("Unable to write target file"); + + // Remove write permissions from the directory to cause permission denied + set_permissions(other_fs_tempdir.path(), PermissionsExt::from_mode(0o555)) + .expect("Unable to set directory permissions"); + + // Attempt to move file to the other filesystem + // This should fail with a permission denied error + let result = scene + .ucmd() + .arg("-f") + .arg("k") + .arg(target_file_path.to_str().unwrap()) + .fails(); + + // Check that it contains permission denied and references the file + // The exact format may vary but should contain these key elements + let stderr = result.stderr_str(); + assert!(stderr.contains("Permission denied") || stderr.contains("permission denied")); + + set_permissions(other_fs_tempdir.path(), PermissionsExt::from_mode(0o755)) + .expect("Unable to restore directory permissions"); +}