diff --git a/Cargo.lock b/Cargo.lock index 15c880338..2140867b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2613,8 +2613,11 @@ dependencies = [ "clap", "memchr", "nix", + "tempfile", "thiserror 2.0.12", "uucore", + "winapi-util", + "windows-sys 0.59.0", ] [[package]] diff --git a/src/uu/cat/Cargo.toml b/src/uu/cat/Cargo.toml index f5ac6a64e..387ecbe23 100644 --- a/src/uu/cat/Cargo.toml +++ b/src/uu/cat/Cargo.toml @@ -26,6 +26,13 @@ uucore = { workspace = true, features = ["fast-inc", "fs", "pipes"] } [target.'cfg(unix)'.dependencies] nix = { workspace = true } +[target.'cfg(windows)'.dependencies] +winapi-util = { workspace = true } +windows-sys = { workspace = true, features = ["Win32_Storage_FileSystem"] } + +[dev-dependencies] +tempfile = { workspace = true } + [[bin]] name = "cat" path = "src/main.rs" diff --git a/src/uu/cat/src/cat.rs b/src/uu/cat/src/cat.rs index cd89c3bc3..be47e5c28 100644 --- a/src/uu/cat/src/cat.rs +++ b/src/uu/cat/src/cat.rs @@ -4,6 +4,10 @@ // file that was distributed with this source code. // spell-checker:ignore (ToDO) nonprint nonblank nonprinting ELOOP + +mod platform; + +use crate::platform::is_unsafe_overwrite; use std::fs::{File, metadata}; use std::io::{self, BufWriter, IsTerminal, Read, Write}; /// Unix domain socket support @@ -18,12 +22,9 @@ use std::os::unix::net::UnixStream; use clap::{Arg, ArgAction, Command}; use memchr::memchr2; -#[cfg(unix)] -use nix::fcntl::{FcntlArg, fcntl}; use thiserror::Error; use uucore::display::Quotable; use uucore::error::UResult; -use uucore::fs::FileInformation; use uucore::locale::get_message; use uucore::{fast_inc::fast_inc_one, format_usage}; @@ -366,42 +367,17 @@ fn cat_handle( } } -/// Whether this process is appending to stdout. -#[cfg(unix)] -fn is_appending() -> bool { - let stdout = io::stdout(); - let Ok(flags) = fcntl(stdout.as_fd(), FcntlArg::F_GETFL) else { - return false; - }; - // TODO Replace `1 << 10` with `nix::fcntl::Oflag::O_APPEND`. - let o_append = 1 << 10; - (flags & o_append) > 0 -} - -#[cfg(not(unix))] -fn is_appending() -> bool { - false -} - -fn cat_path( - path: &str, - options: &OutputOptions, - state: &mut OutputState, - out_info: Option<&FileInformation>, -) -> CatResult<()> { +fn cat_path(path: &str, options: &OutputOptions, state: &mut OutputState) -> CatResult<()> { match get_input_type(path)? { InputType::StdIn => { let stdin = io::stdin(); - let in_info = FileInformation::from_file(&stdin)?; + if is_unsafe_overwrite(&stdin, &io::stdout()) { + return Err(CatError::OutputIsInput); + } let mut handle = InputHandle { reader: stdin, is_interactive: io::stdin().is_terminal(), }; - if let Some(out_info) = out_info { - if in_info == *out_info && is_appending() { - return Err(CatError::OutputIsInput); - } - } cat_handle(&mut handle, options, state) } InputType::Directory => Err(CatError::IsDirectory), @@ -417,15 +393,9 @@ fn cat_path( } _ => { let file = File::open(path)?; - - if let Some(out_info) = out_info { - if out_info.file_size() != 0 - && FileInformation::from_file(&file).ok().as_ref() == Some(out_info) - { - return Err(CatError::OutputIsInput); - } + if is_unsafe_overwrite(&file, &io::stdout()) { + return Err(CatError::OutputIsInput); } - let mut handle = InputHandle { reader: file, is_interactive: false, @@ -436,8 +406,6 @@ fn cat_path( } fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> { - let out_info = FileInformation::from_file(&io::stdout()).ok(); - let mut state = OutputState { line_number: LineNumber::new(), at_line_start: true, @@ -447,7 +415,7 @@ fn cat_files(files: &[String], options: &OutputOptions) -> UResult<()> { let mut error_messages: Vec = Vec::new(); for path in files { - if let Err(err) = cat_path(path, options, &mut state, out_info.as_ref()) { + if let Err(err) = cat_path(path, options, &mut state) { error_messages.push(format!("{}: {err}", path.maybe_quote())); } } diff --git a/src/uu/cat/src/platform/mod.rs b/src/uu/cat/src/platform/mod.rs new file mode 100644 index 000000000..3fa27a276 --- /dev/null +++ b/src/uu/cat/src/platform/mod.rs @@ -0,0 +1,16 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +#[cfg(unix)] +pub use self::unix::is_unsafe_overwrite; + +#[cfg(windows)] +pub use self::windows::is_unsafe_overwrite; + +#[cfg(unix)] +mod unix; + +#[cfg(windows)] +mod windows; diff --git a/src/uu/cat/src/platform/unix.rs b/src/uu/cat/src/platform/unix.rs new file mode 100644 index 000000000..8c55c9a42 --- /dev/null +++ b/src/uu/cat/src/platform/unix.rs @@ -0,0 +1,108 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore lseek seekable + +use nix::fcntl::{FcntlArg, OFlag, fcntl}; +use nix::unistd::{Whence, lseek}; +use std::os::fd::AsFd; +use uucore::fs::FileInformation; + +/// An unsafe overwrite occurs when the same nonempty file is used as both stdin and stdout, +/// and the file offset of stdin is positioned earlier than that of stdout. +/// In this scenario, bytes read from stdin are written to a later part of the file +/// via stdout, which can then be read again by stdin and written again by stdout, +/// causing an infinite loop and potential file corruption. +pub fn is_unsafe_overwrite(input: &I, output: &O) -> bool { + // `FileInformation::from_file` returns an error if the file descriptor is closed, invalid, + // or refers to a non-regular file (e.g., socket, pipe, or special device). + let Ok(input_info) = FileInformation::from_file(input) else { + return false; + }; + let Ok(output_info) = FileInformation::from_file(output) else { + return false; + }; + if input_info != output_info || output_info.file_size() == 0 { + return false; + } + if is_appending(output) { + return true; + } + // `lseek` returns an error if the file descriptor is closed or it refers to + // a non-seekable resource (e.g., pipe, socket, or some devices). + let Ok(input_pos) = lseek(input.as_fd(), 0, Whence::SeekCur) else { + return false; + }; + let Ok(output_pos) = lseek(output.as_fd(), 0, Whence::SeekCur) else { + return false; + }; + input_pos < output_pos +} + +/// Whether the file is opened with the `O_APPEND` flag +fn is_appending(file: &F) -> bool { + let flags_raw = fcntl(file.as_fd(), FcntlArg::F_GETFL).unwrap_or_default(); + let flags = OFlag::from_bits_truncate(flags_raw); + flags.contains(OFlag::O_APPEND) +} + +#[cfg(test)] +mod tests { + use crate::platform::unix::{is_appending, is_unsafe_overwrite}; + use std::fs::OpenOptions; + use std::io::{Seek, SeekFrom, Write}; + use tempfile::NamedTempFile; + + #[test] + fn test_is_appending() { + let temp_file = NamedTempFile::new().unwrap(); + assert!(!is_appending(&temp_file)); + + let read_file = OpenOptions::new().read(true).open(&temp_file).unwrap(); + assert!(!is_appending(&read_file)); + + let write_file = OpenOptions::new().write(true).open(&temp_file).unwrap(); + assert!(!is_appending(&write_file)); + + let append_file = OpenOptions::new().append(true).open(&temp_file).unwrap(); + assert!(is_appending(&append_file)); + } + + #[test] + fn test_is_unsafe_overwrite() { + // Create two temp files one of which is empty + let empty = NamedTempFile::new().unwrap(); + let mut nonempty = NamedTempFile::new().unwrap(); + nonempty.write_all(b"anything").unwrap(); + nonempty.seek(SeekFrom::Start(0)).unwrap(); + + // Using a different file as input and output does not result in an overwrite + assert!(!is_unsafe_overwrite(&empty, &nonempty)); + + // Overwriting an empty file is always safe + assert!(!is_unsafe_overwrite(&empty, &empty)); + + // Overwriting a nonempty file with itself is safe + assert!(!is_unsafe_overwrite(&nonempty, &nonempty)); + + // Overwriting an empty file opened in append mode is safe + let empty_append = OpenOptions::new().append(true).open(&empty).unwrap(); + assert!(!is_unsafe_overwrite(&empty, &empty_append)); + + // Overwriting a nonempty file opened in append mode is unsafe + let nonempty_append = OpenOptions::new().append(true).open(&nonempty).unwrap(); + assert!(is_unsafe_overwrite(&nonempty, &nonempty_append)); + + // Overwriting a file opened in write mode is safe + let mut nonempty_write = OpenOptions::new().write(true).open(&nonempty).unwrap(); + assert!(!is_unsafe_overwrite(&nonempty, &nonempty_write)); + + // Overwriting a file when the input and output file descriptors are pointing to + // different offsets is safe if the input offset is further than the output offset + nonempty_write.seek(SeekFrom::Start(1)).unwrap(); + assert!(!is_unsafe_overwrite(&nonempty_write, &nonempty)); + assert!(is_unsafe_overwrite(&nonempty, &nonempty_write)); + } +} diff --git a/src/uu/cat/src/platform/windows.rs b/src/uu/cat/src/platform/windows.rs new file mode 100644 index 000000000..ebf375b32 --- /dev/null +++ b/src/uu/cat/src/platform/windows.rs @@ -0,0 +1,56 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::ffi::OsString; +use std::os::windows::ffi::OsStringExt; +use std::path::PathBuf; +use uucore::fs::FileInformation; +use winapi_util::AsHandleRef; +use windows_sys::Win32::Storage::FileSystem::{ + FILE_NAME_NORMALIZED, GetFinalPathNameByHandleW, VOLUME_NAME_NT, +}; + +/// An unsafe overwrite occurs when the same file is used as both stdin and stdout +/// and the stdout file is not empty. +pub fn is_unsafe_overwrite(input: &I, output: &O) -> bool { + if !is_same_file_by_path(input, output) { + return false; + } + + // Check if the output file is empty + FileInformation::from_file(output) + .map(|info| info.file_size() > 0) + .unwrap_or(false) +} + +/// Get the file path for a file handle +fn get_file_path_from_handle(file: &F) -> Option { + let handle = file.as_raw(); + let mut path_buf = vec![0u16; 4096]; + + // SAFETY: We should check how many bytes was written to `path_buf` + // and only read that many bytes from it. + let len = unsafe { + GetFinalPathNameByHandleW( + handle, + path_buf.as_mut_ptr(), + path_buf.len() as u32, + FILE_NAME_NORMALIZED | VOLUME_NAME_NT, + ) + }; + if len == 0 { + return None; + } + let path = OsString::from_wide(&path_buf[..len as usize]); + Some(PathBuf::from(path)) +} + +/// Compare two file handles if they correspond to the same file +fn is_same_file_by_path(a: &A, b: &B) -> bool { + match (get_file_path_from_handle(a), get_file_path_from_handle(b)) { + (Some(path1), Some(path2)) => path1 == path2, + _ => false, + } +} diff --git a/tests/by-util/test_cat.rs b/tests/by-util/test_cat.rs index 926befe72..9fca79753 100644 --- a/tests/by-util/test_cat.rs +++ b/tests/by-util/test_cat.rs @@ -9,6 +9,7 @@ use rlimit::Resource; #[cfg(unix)] use std::fs::File; use std::fs::OpenOptions; +use std::fs::read_to_string; use std::process::Stdio; use uutests::at_and_ucmd; use uutests::new_ucmd; @@ -637,6 +638,57 @@ fn test_write_to_self() { ); } +/// Test derived from the following GNU test in `tests/cat/cat-self.sh`: +/// +/// `cat fxy2 fy 1<>fxy2` +// TODO: make this work on windows +#[test] +#[cfg(unix)] +fn test_successful_write_to_read_write_self() { + let (at, mut ucmd) = at_and_ucmd!(); + at.write("fy", "y"); + at.write("fxy2", "x"); + + // Open `rw_file` as both stdin and stdout (read/write) + let fxy2_file_path = at.plus("fxy2"); + let fxy2_file = OpenOptions::new() + .read(true) + .write(true) + .open(&fxy2_file_path) + .unwrap(); + ucmd.args(&["fxy2", "fy"]).set_stdout(fxy2_file).succeeds(); + + // The contents of `fxy2` and `fy` files should be merged + let fxy2_contents = read_to_string(fxy2_file_path).unwrap(); + assert_eq!(fxy2_contents, "xy"); +} + +/// Test derived from the following GNU test in `tests/cat/cat-self.sh`: +/// +/// `cat fx fx3 1<>fx3` +#[test] +fn test_failed_write_to_read_write_self() { + let (at, mut ucmd) = at_and_ucmd!(); + at.write("fx", "g"); + at.write("fx3", "bold"); + + // Open `rw_file` as both stdin and stdout (read/write) + let fx3_file_path = at.plus("fx3"); + let fx3_file = OpenOptions::new() + .read(true) + .write(true) + .open(&fx3_file_path) + .unwrap(); + ucmd.args(&["fx", "fx3"]) + .set_stdout(fx3_file) + .fails_with_code(1) + .stderr_only("cat: fx3: input file is output file\n"); + + // The contents of `fx` should have overwritten the beginning of `fx3` + let fx3_contents = read_to_string(fx3_file_path).unwrap(); + assert_eq!(fx3_contents, "gold"); +} + #[test] #[cfg(unix)] #[cfg(not(target_os = "openbsd"))]