From f2cfc15a702d32735a3243d1b85eedf9970249c7 Mon Sep 17 00:00:00 2001 From: Andrew Baptist Date: Fri, 15 Jul 2022 12:44:25 -0400 Subject: [PATCH] split: Don't overwrite files Check that a file exists by calling create_new and changing the interface of instantiate_current_writer to return a Result rather than calling unwrap. --- src/uu/split/src/platform/mod.rs | 4 ++ src/uu/split/src/platform/unix.rs | 42 ++++++++++++----- src/uu/split/src/platform/windows.rs | 24 ++++++++-- src/uu/split/src/split.rs | 69 +++++++++++++++++----------- src/uucore/src/lib/features/fs.rs | 16 ++++++- tests/by-util/test_split.rs | 28 +++++++++++ 6 files changed, 136 insertions(+), 47 deletions(-) diff --git a/src/uu/split/src/platform/mod.rs b/src/uu/split/src/platform/mod.rs index 020c01a4a..cf5d8d384 100644 --- a/src/uu/split/src/platform/mod.rs +++ b/src/uu/split/src/platform/mod.rs @@ -1,8 +1,12 @@ #[cfg(unix)] pub use self::unix::instantiate_current_writer; +#[cfg(unix)] +pub use self::unix::paths_refer_to_same_file; #[cfg(windows)] pub use self::windows::instantiate_current_writer; +#[cfg(windows)] +pub use self::windows::paths_refer_to_same_file; #[cfg(unix)] mod unix; diff --git a/src/uu/split/src/platform/unix.rs b/src/uu/split/src/platform/unix.rs index f6bac702b..09326244b 100644 --- a/src/uu/split/src/platform/unix.rs +++ b/src/uu/split/src/platform/unix.rs @@ -1,8 +1,11 @@ use std::env; use std::io::Write; -use std::io::{BufWriter, Result}; +use std::io::{BufWriter, Error, ErrorKind, Result}; +use std::path::Path; use std::process::{Child, Command, Stdio}; use uucore::crash; +use uucore::fs; +use uucore::fs::FileInformation; /// A writer that writes to a shell_process' stdin /// @@ -66,7 +69,7 @@ impl FilterWriter { /// /// * `command` - The shell command to execute /// * `filepath` - Path of the output file (forwarded to command as $FILE) - fn new(command: &str, filepath: &str) -> Self { + fn new(command: &str, filepath: &str) -> Result { // set $FILE, save previous value (if there was one) let _with_env_var_set = WithEnvVarSet::new("FILE", filepath); @@ -75,10 +78,9 @@ impl FilterWriter { .arg("-c") .arg(command) .stdin(Stdio::piped()) - .spawn() - .expect("Couldn't spawn filter command"); + .spawn()?; - Self { shell_process } + Ok(Self { shell_process }) } } @@ -107,19 +109,35 @@ impl Drop for FilterWriter { pub fn instantiate_current_writer( filter: &Option, filename: &str, -) -> BufWriter> { +) -> Result>> { match filter { - None => BufWriter::new(Box::new( + None => Ok(BufWriter::new(Box::new( // write to the next file std::fs::OpenOptions::new() .write(true) .create(true) + .truncate(true) .open(std::path::Path::new(&filename)) - .unwrap(), - ) as Box), - Some(ref filter_command) => BufWriter::new(Box::new( + .map_err(|_| { + Error::new( + ErrorKind::Other, + format!("unable to open '{}'; aborting", filename), + ) + })?, + ) as Box)), + Some(ref filter_command) => Ok(BufWriter::new(Box::new( // spawn a shell command and write to it - FilterWriter::new(filter_command, filename), - ) as Box), + FilterWriter::new(filter_command, filename)?, + ) as Box)), } } + +pub fn paths_refer_to_same_file(p1: &str, p2: &str) -> bool { + // We have to take symlinks and relative paths into account. + let p1 = if p1 == "-" { + FileInformation::from_file(&std::io::stdin()) + } else { + FileInformation::from_path(Path::new(&p1), true) + }; + fs::infos_refer_to_same_file(p1, FileInformation::from_path(Path::new(p2), true)) +} diff --git a/src/uu/split/src/platform/windows.rs b/src/uu/split/src/platform/windows.rs index e67518f2a..4b0dfbdfc 100644 --- a/src/uu/split/src/platform/windows.rs +++ b/src/uu/split/src/platform/windows.rs @@ -1,5 +1,8 @@ -use std::io::BufWriter; use std::io::Write; +use std::io::{BufWriter, Error, ErrorKind, Result}; +use std::path::Path; +use uucore::fs; + /// Get a file writer /// /// Unlike the unix version of this function, this _always_ returns @@ -7,13 +10,24 @@ use std::io::Write; pub fn instantiate_current_writer( _filter: &Option, filename: &str, -) -> BufWriter> { - BufWriter::new(Box::new( +) -> Result>> { + Ok(BufWriter::new(Box::new( // write to the next file std::fs::OpenOptions::new() .write(true) .create(true) + .truncate(true) .open(std::path::Path::new(&filename)) - .unwrap(), - ) as Box) + .map_err(|_| { + Error::new( + ErrorKind::Other, + format!("'{}' would overwrite input; aborting", filename), + ) + })?, + ) as Box)) +} + +pub fn paths_refer_to_same_file(p1: &str, p2: &str) -> bool { + // Windows doesn't support many of the unix ways of paths being equals + fs::paths_refer_to_same_file(Path::new(p1), Path::new(p2), true) } diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 88edc569c..79f0eaf10 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -530,6 +530,17 @@ impl Settings { Ok(result) } + + fn instantiate_current_writer(&self, filename: &str) -> io::Result>> { + if platform::paths_refer_to_same_file(&self.input, filename) { + return Err(io::Error::new( + ErrorKind::Other, + format!("'{}' would overwrite input; aborting", filename), + )); + } + + platform::instantiate_current_writer(&self.filter, filename) + } } /// Write a certain number of bytes to one file, then move on to another one. @@ -569,19 +580,21 @@ struct ByteChunkWriter<'a> { } impl<'a> ByteChunkWriter<'a> { - fn new(chunk_size: u64, settings: &'a Settings) -> Option> { + fn new(chunk_size: u64, settings: &'a Settings) -> UResult> { let mut filename_iterator = FilenameIterator::new( &settings.prefix, &settings.additional_suffix, settings.suffix_length, settings.suffix_type, ); - let filename = filename_iterator.next()?; + let filename = filename_iterator + .next() + .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; if settings.verbose { println!("creating file {}", filename.quote()); } - let inner = platform::instantiate_current_writer(&settings.filter, &filename); - Some(ByteChunkWriter { + let inner = settings.instantiate_current_writer(&filename)?; + Ok(ByteChunkWriter { settings, chunk_size, num_bytes_remaining_in_current_chunk: chunk_size, @@ -652,8 +665,7 @@ impl<'a> Write for ByteChunkWriter<'a> { if self.settings.verbose { println!("creating file {}", filename.quote()); } - self.inner = - platform::instantiate_current_writer(&self.settings.filter, &filename); + self.inner = self.settings.instantiate_current_writer(&filename)?; } } } @@ -701,19 +713,21 @@ struct LineChunkWriter<'a> { } impl<'a> LineChunkWriter<'a> { - fn new(chunk_size: u64, settings: &'a Settings) -> Option> { + fn new(chunk_size: u64, settings: &'a Settings) -> UResult> { let mut filename_iterator = FilenameIterator::new( &settings.prefix, &settings.additional_suffix, settings.suffix_length, settings.suffix_type, ); - let filename = filename_iterator.next()?; + let filename = filename_iterator + .next() + .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; if settings.verbose { println!("creating file {}", filename.quote()); } - let inner = platform::instantiate_current_writer(&settings.filter, &filename); - Some(LineChunkWriter { + let inner = settings.instantiate_current_writer(&filename)?; + Ok(LineChunkWriter { settings, chunk_size, num_lines_remaining_in_current_chunk: chunk_size, @@ -745,7 +759,7 @@ impl<'a> Write for LineChunkWriter<'a> { if self.settings.verbose { println!("creating file {}", filename.quote()); } - self.inner = platform::instantiate_current_writer(&self.settings.filter, &filename); + self.inner = self.settings.instantiate_current_writer(&filename)?; self.num_lines_remaining_in_current_chunk = self.chunk_size; } @@ -807,22 +821,24 @@ struct LineBytesChunkWriter<'a> { } impl<'a> LineBytesChunkWriter<'a> { - fn new(chunk_size: u64, settings: &'a Settings) -> Option> { + fn new(chunk_size: u64, settings: &'a Settings) -> UResult> { let mut filename_iterator = FilenameIterator::new( &settings.prefix, &settings.additional_suffix, settings.suffix_length, settings.suffix_type, ); - let filename = filename_iterator.next()?; + let filename = filename_iterator + .next() + .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; if settings.verbose { println!("creating file {}", filename.quote()); } - let inner = platform::instantiate_current_writer(&settings.filter, &filename); - Some(LineBytesChunkWriter { + let inner = settings.instantiate_current_writer(&filename)?; + Ok(LineBytesChunkWriter { settings, chunk_size, - num_bytes_remaining_in_current_chunk: chunk_size.try_into().unwrap(), + num_bytes_remaining_in_current_chunk: usize::try_from(chunk_size).unwrap(), num_chunks_written: 0, inner, filename_iterator, @@ -882,7 +898,7 @@ impl<'a> Write for LineBytesChunkWriter<'a> { if self.settings.verbose { println!("creating file {}", filename.quote()); } - self.inner = platform::instantiate_current_writer(&self.settings.filter, &filename); + self.inner = self.settings.instantiate_current_writer(&filename)?; self.num_bytes_remaining_in_current_chunk = self.chunk_size.try_into().unwrap(); } @@ -1017,7 +1033,7 @@ where let filename = filename_iterator .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; - let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); + let writer = settings.instantiate_current_writer(filename.as_str())?; writers.push(writer); } @@ -1093,7 +1109,7 @@ where let filename = filename_iterator .next() .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; - let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); + let writer = settings.instantiate_current_writer(filename.as_str())?; writers.push(writer); } @@ -1209,8 +1225,7 @@ fn split(settings: &Settings) -> UResult<()> { } Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")), Strategy::Lines(chunk_size) => { - let mut writer = LineChunkWriter::new(chunk_size, settings) - .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; + let mut writer = LineChunkWriter::new(chunk_size, settings)?; match std::io::copy(&mut reader, &mut writer) { Ok(_) => Ok(()), Err(e) => match e.kind() { @@ -1222,15 +1237,14 @@ fn split(settings: &Settings) -> UResult<()> { // allowable filenames, we use `ErrorKind::Other` to // indicate that. A special error message needs to be // printed in that case. - ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")), + ErrorKind::Other => Err(USimpleError::new(1, format!("{}", e))), ErrorKind::BrokenPipe => Ok(()), _ => Err(uio_error!(e, "input/output error")), }, } } Strategy::Bytes(chunk_size) => { - let mut writer = ByteChunkWriter::new(chunk_size, settings) - .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; + let mut writer = ByteChunkWriter::new(chunk_size, settings)?; match std::io::copy(&mut reader, &mut writer) { Ok(_) => Ok(()), Err(e) => match e.kind() { @@ -1242,15 +1256,14 @@ fn split(settings: &Settings) -> UResult<()> { // allowable filenames, we use `ErrorKind::Other` to // indicate that. A special error message needs to be // printed in that case. - ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")), + ErrorKind::Other => Err(USimpleError::new(1, format!("{}", e))), ErrorKind::BrokenPipe => Ok(()), _ => Err(uio_error!(e, "input/output error")), }, } } Strategy::LineBytes(chunk_size) => { - let mut writer = LineBytesChunkWriter::new(chunk_size, settings) - .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; + let mut writer = LineBytesChunkWriter::new(chunk_size, settings)?; match std::io::copy(&mut reader, &mut writer) { Ok(_) => Ok(()), Err(e) => match e.kind() { @@ -1262,7 +1275,7 @@ fn split(settings: &Settings) -> UResult<()> { // allowable filenames, we use `ErrorKind::Other` to // indicate that. A special error message needs to be // printed in that case. - ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")), + ErrorKind::Other => Err(USimpleError::new(1, format!("{}", e))), ErrorKind::BrokenPipe => Ok(()), _ => Err(uio_error!(e, "input/output error")), }, diff --git a/src/uucore/src/lib/features/fs.rs b/src/uucore/src/lib/features/fs.rs index bd5909674..46760600f 100644 --- a/src/uucore/src/lib/features/fs.rs +++ b/src/uucore/src/lib/features/fs.rs @@ -480,8 +480,20 @@ pub fn dir_strip_dot_for_creation(path: &Path) -> PathBuf { /// Checks if `p1` and `p2` are the same file. /// If error happens when trying to get files' metadata, returns false pub fn paths_refer_to_same_file>(p1: P, p2: P, dereference: bool) -> bool { - if let Ok(info1) = FileInformation::from_path(p1, dereference) { - if let Ok(info2) = FileInformation::from_path(p2, dereference) { + infos_refer_to_same_file( + FileInformation::from_path(p1, dereference), + FileInformation::from_path(p2, dereference), + ) +} + +/// Checks if `p1` and `p2` are the same file information. +/// If error happens when trying to get files' metadata, returns false +pub fn infos_refer_to_same_file( + info1: IOResult, + info2: IOResult, +) -> bool { + if let Ok(info1) = info1 { + if let Ok(info2) = info2 { return info1 == info2; } } diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 87c37787e..f9cfcdbf9 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -655,3 +655,31 @@ fn test_line_bytes_no_empty_file() { assert_eq!(at.read("xaj"), "4"); assert!(!at.plus("xak").exists()); } + +#[test] +fn test_guard_input() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + ts.ucmd() + .args(&["-C", "6"]) + .pipe_in("1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n") + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("xaa"), "1\n2\n3\n"); + + ts.ucmd() + .args(&["-C", "6"]) + .pipe_in("1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n") + .succeeds() + .no_stdout() + .no_stderr(); + assert_eq!(at.read("xaa"), "1\n2\n3\n"); + + ts.ucmd() + .args(&["-C", "6", "xaa"]) + .fails() + .stderr_only("split: 'xaa' would overwrite input; aborting"); + assert_eq!(at.read("xaa"), "1\n2\n3\n"); +}