1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge pull request #3719 from andrewbaptist/main

split: Don't overwrite files
This commit is contained in:
Sylvestre Ledru 2022-07-23 22:54:57 +02:00 committed by GitHub
commit 62305e67d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 136 additions and 47 deletions

View file

@ -1,8 +1,12 @@
#[cfg(unix)] #[cfg(unix)]
pub use self::unix::instantiate_current_writer; pub use self::unix::instantiate_current_writer;
#[cfg(unix)]
pub use self::unix::paths_refer_to_same_file;
#[cfg(windows)] #[cfg(windows)]
pub use self::windows::instantiate_current_writer; pub use self::windows::instantiate_current_writer;
#[cfg(windows)]
pub use self::windows::paths_refer_to_same_file;
#[cfg(unix)] #[cfg(unix)]
mod unix; mod unix;

View file

@ -1,8 +1,11 @@
use std::env; use std::env;
use std::io::Write; use std::io::Write;
use std::io::{BufWriter, Result}; use std::io::{BufWriter, Error, ErrorKind, Result};
use std::path::Path;
use std::process::{Child, Command, Stdio}; use std::process::{Child, Command, Stdio};
use uucore::crash; use uucore::crash;
use uucore::fs;
use uucore::fs::FileInformation;
/// A writer that writes to a shell_process' stdin /// A writer that writes to a shell_process' stdin
/// ///
@ -66,7 +69,7 @@ impl FilterWriter {
/// ///
/// * `command` - The shell command to execute /// * `command` - The shell command to execute
/// * `filepath` - Path of the output file (forwarded to command as $FILE) /// * `filepath` - Path of the output file (forwarded to command as $FILE)
fn new(command: &str, filepath: &str) -> Self { fn new(command: &str, filepath: &str) -> Result<Self> {
// set $FILE, save previous value (if there was one) // set $FILE, save previous value (if there was one)
let _with_env_var_set = WithEnvVarSet::new("FILE", filepath); let _with_env_var_set = WithEnvVarSet::new("FILE", filepath);
@ -75,10 +78,9 @@ impl FilterWriter {
.arg("-c") .arg("-c")
.arg(command) .arg(command)
.stdin(Stdio::piped()) .stdin(Stdio::piped())
.spawn() .spawn()?;
.expect("Couldn't spawn filter command");
Self { shell_process } Ok(Self { shell_process })
} }
} }
@ -107,19 +109,35 @@ impl Drop for FilterWriter {
pub fn instantiate_current_writer( pub fn instantiate_current_writer(
filter: &Option<String>, filter: &Option<String>,
filename: &str, filename: &str,
) -> BufWriter<Box<dyn Write>> { ) -> Result<BufWriter<Box<dyn Write>>> {
match filter { match filter {
None => BufWriter::new(Box::new( None => Ok(BufWriter::new(Box::new(
// write to the next file // write to the next file
std::fs::OpenOptions::new() std::fs::OpenOptions::new()
.write(true) .write(true)
.create(true) .create(true)
.truncate(true)
.open(std::path::Path::new(&filename)) .open(std::path::Path::new(&filename))
.unwrap(), .map_err(|_| {
) as Box<dyn Write>), Error::new(
Some(ref filter_command) => BufWriter::new(Box::new( ErrorKind::Other,
format!("unable to open '{}'; aborting", filename),
)
})?,
) as Box<dyn Write>)),
Some(ref filter_command) => Ok(BufWriter::new(Box::new(
// spawn a shell command and write to it // spawn a shell command and write to it
FilterWriter::new(filter_command, filename), FilterWriter::new(filter_command, filename)?,
) as Box<dyn Write>), ) as Box<dyn Write>)),
} }
} }
pub fn paths_refer_to_same_file(p1: &str, p2: &str) -> bool {
// We have to take symlinks and relative paths into account.
let p1 = if p1 == "-" {
FileInformation::from_file(&std::io::stdin())
} else {
FileInformation::from_path(Path::new(&p1), true)
};
fs::infos_refer_to_same_file(p1, FileInformation::from_path(Path::new(p2), true))
}

View file

@ -1,5 +1,8 @@
use std::io::BufWriter;
use std::io::Write; use std::io::Write;
use std::io::{BufWriter, Error, ErrorKind, Result};
use std::path::Path;
use uucore::fs;
/// Get a file writer /// Get a file writer
/// ///
/// Unlike the unix version of this function, this _always_ returns /// Unlike the unix version of this function, this _always_ returns
@ -7,13 +10,24 @@ use std::io::Write;
pub fn instantiate_current_writer( pub fn instantiate_current_writer(
_filter: &Option<String>, _filter: &Option<String>,
filename: &str, filename: &str,
) -> BufWriter<Box<dyn Write>> { ) -> Result<BufWriter<Box<dyn Write>>> {
BufWriter::new(Box::new( Ok(BufWriter::new(Box::new(
// write to the next file // write to the next file
std::fs::OpenOptions::new() std::fs::OpenOptions::new()
.write(true) .write(true)
.create(true) .create(true)
.truncate(true)
.open(std::path::Path::new(&filename)) .open(std::path::Path::new(&filename))
.unwrap(), .map_err(|_| {
) as Box<dyn Write>) Error::new(
ErrorKind::Other,
format!("'{}' would overwrite input; aborting", filename),
)
})?,
) as Box<dyn Write>))
}
pub fn paths_refer_to_same_file(p1: &str, p2: &str) -> bool {
// Windows doesn't support many of the unix ways of paths being equals
fs::paths_refer_to_same_file(Path::new(p1), Path::new(p2), true)
} }

View file

@ -530,6 +530,17 @@ impl Settings {
Ok(result) Ok(result)
} }
fn instantiate_current_writer(&self, filename: &str) -> io::Result<BufWriter<Box<dyn Write>>> {
if platform::paths_refer_to_same_file(&self.input, filename) {
return Err(io::Error::new(
ErrorKind::Other,
format!("'{}' would overwrite input; aborting", filename),
));
}
platform::instantiate_current_writer(&self.filter, filename)
}
} }
/// Write a certain number of bytes to one file, then move on to another one. /// Write a certain number of bytes to one file, then move on to another one.
@ -569,19 +580,21 @@ struct ByteChunkWriter<'a> {
} }
impl<'a> ByteChunkWriter<'a> { impl<'a> ByteChunkWriter<'a> {
fn new(chunk_size: u64, settings: &'a Settings) -> Option<ByteChunkWriter<'a>> { fn new(chunk_size: u64, settings: &'a Settings) -> UResult<ByteChunkWriter<'a>> {
let mut filename_iterator = FilenameIterator::new( let mut filename_iterator = FilenameIterator::new(
&settings.prefix, &settings.prefix,
&settings.additional_suffix, &settings.additional_suffix,
settings.suffix_length, settings.suffix_length,
settings.suffix_type, settings.suffix_type,
); );
let filename = filename_iterator.next()?; let filename = filename_iterator
.next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
if settings.verbose { if settings.verbose {
println!("creating file {}", filename.quote()); println!("creating file {}", filename.quote());
} }
let inner = platform::instantiate_current_writer(&settings.filter, &filename); let inner = settings.instantiate_current_writer(&filename)?;
Some(ByteChunkWriter { Ok(ByteChunkWriter {
settings, settings,
chunk_size, chunk_size,
num_bytes_remaining_in_current_chunk: chunk_size, num_bytes_remaining_in_current_chunk: chunk_size,
@ -652,8 +665,7 @@ impl<'a> Write for ByteChunkWriter<'a> {
if self.settings.verbose { if self.settings.verbose {
println!("creating file {}", filename.quote()); println!("creating file {}", filename.quote());
} }
self.inner = self.inner = self.settings.instantiate_current_writer(&filename)?;
platform::instantiate_current_writer(&self.settings.filter, &filename);
} }
} }
} }
@ -701,19 +713,21 @@ struct LineChunkWriter<'a> {
} }
impl<'a> LineChunkWriter<'a> { impl<'a> LineChunkWriter<'a> {
fn new(chunk_size: u64, settings: &'a Settings) -> Option<LineChunkWriter<'a>> { fn new(chunk_size: u64, settings: &'a Settings) -> UResult<LineChunkWriter<'a>> {
let mut filename_iterator = FilenameIterator::new( let mut filename_iterator = FilenameIterator::new(
&settings.prefix, &settings.prefix,
&settings.additional_suffix, &settings.additional_suffix,
settings.suffix_length, settings.suffix_length,
settings.suffix_type, settings.suffix_type,
); );
let filename = filename_iterator.next()?; let filename = filename_iterator
.next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
if settings.verbose { if settings.verbose {
println!("creating file {}", filename.quote()); println!("creating file {}", filename.quote());
} }
let inner = platform::instantiate_current_writer(&settings.filter, &filename); let inner = settings.instantiate_current_writer(&filename)?;
Some(LineChunkWriter { Ok(LineChunkWriter {
settings, settings,
chunk_size, chunk_size,
num_lines_remaining_in_current_chunk: chunk_size, num_lines_remaining_in_current_chunk: chunk_size,
@ -745,7 +759,7 @@ impl<'a> Write for LineChunkWriter<'a> {
if self.settings.verbose { if self.settings.verbose {
println!("creating file {}", filename.quote()); println!("creating file {}", filename.quote());
} }
self.inner = platform::instantiate_current_writer(&self.settings.filter, &filename); self.inner = self.settings.instantiate_current_writer(&filename)?;
self.num_lines_remaining_in_current_chunk = self.chunk_size; self.num_lines_remaining_in_current_chunk = self.chunk_size;
} }
@ -807,22 +821,24 @@ struct LineBytesChunkWriter<'a> {
} }
impl<'a> LineBytesChunkWriter<'a> { impl<'a> LineBytesChunkWriter<'a> {
fn new(chunk_size: u64, settings: &'a Settings) -> Option<LineBytesChunkWriter<'a>> { fn new(chunk_size: u64, settings: &'a Settings) -> UResult<LineBytesChunkWriter<'a>> {
let mut filename_iterator = FilenameIterator::new( let mut filename_iterator = FilenameIterator::new(
&settings.prefix, &settings.prefix,
&settings.additional_suffix, &settings.additional_suffix,
settings.suffix_length, settings.suffix_length,
settings.suffix_type, settings.suffix_type,
); );
let filename = filename_iterator.next()?; let filename = filename_iterator
.next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
if settings.verbose { if settings.verbose {
println!("creating file {}", filename.quote()); println!("creating file {}", filename.quote());
} }
let inner = platform::instantiate_current_writer(&settings.filter, &filename); let inner = settings.instantiate_current_writer(&filename)?;
Some(LineBytesChunkWriter { Ok(LineBytesChunkWriter {
settings, settings,
chunk_size, chunk_size,
num_bytes_remaining_in_current_chunk: chunk_size.try_into().unwrap(), num_bytes_remaining_in_current_chunk: usize::try_from(chunk_size).unwrap(),
num_chunks_written: 0, num_chunks_written: 0,
inner, inner,
filename_iterator, filename_iterator,
@ -882,7 +898,7 @@ impl<'a> Write for LineBytesChunkWriter<'a> {
if self.settings.verbose { if self.settings.verbose {
println!("creating file {}", filename.quote()); println!("creating file {}", filename.quote());
} }
self.inner = platform::instantiate_current_writer(&self.settings.filter, &filename); self.inner = self.settings.instantiate_current_writer(&filename)?;
self.num_bytes_remaining_in_current_chunk = self.chunk_size.try_into().unwrap(); self.num_bytes_remaining_in_current_chunk = self.chunk_size.try_into().unwrap();
} }
@ -1017,7 +1033,7 @@ where
let filename = filename_iterator let filename = filename_iterator
.next() .next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); let writer = settings.instantiate_current_writer(filename.as_str())?;
writers.push(writer); writers.push(writer);
} }
@ -1093,7 +1109,7 @@ where
let filename = filename_iterator let filename = filename_iterator
.next() .next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); let writer = settings.instantiate_current_writer(filename.as_str())?;
writers.push(writer); writers.push(writer);
} }
@ -1209,8 +1225,7 @@ fn split(settings: &Settings) -> UResult<()> {
} }
Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")), Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")),
Strategy::Lines(chunk_size) => { Strategy::Lines(chunk_size) => {
let mut writer = LineChunkWriter::new(chunk_size, settings) let mut writer = LineChunkWriter::new(chunk_size, settings)?;
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
match std::io::copy(&mut reader, &mut writer) { match std::io::copy(&mut reader, &mut writer) {
Ok(_) => Ok(()), Ok(_) => Ok(()),
Err(e) => match e.kind() { Err(e) => match e.kind() {
@ -1222,15 +1237,14 @@ fn split(settings: &Settings) -> UResult<()> {
// allowable filenames, we use `ErrorKind::Other` to // allowable filenames, we use `ErrorKind::Other` to
// indicate that. A special error message needs to be // indicate that. A special error message needs to be
// printed in that case. // printed in that case.
ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")), ErrorKind::Other => Err(USimpleError::new(1, format!("{}", e))),
ErrorKind::BrokenPipe => Ok(()), ErrorKind::BrokenPipe => Ok(()),
_ => Err(uio_error!(e, "input/output error")), _ => Err(uio_error!(e, "input/output error")),
}, },
} }
} }
Strategy::Bytes(chunk_size) => { Strategy::Bytes(chunk_size) => {
let mut writer = ByteChunkWriter::new(chunk_size, settings) let mut writer = ByteChunkWriter::new(chunk_size, settings)?;
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
match std::io::copy(&mut reader, &mut writer) { match std::io::copy(&mut reader, &mut writer) {
Ok(_) => Ok(()), Ok(_) => Ok(()),
Err(e) => match e.kind() { Err(e) => match e.kind() {
@ -1242,15 +1256,14 @@ fn split(settings: &Settings) -> UResult<()> {
// allowable filenames, we use `ErrorKind::Other` to // allowable filenames, we use `ErrorKind::Other` to
// indicate that. A special error message needs to be // indicate that. A special error message needs to be
// printed in that case. // printed in that case.
ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")), ErrorKind::Other => Err(USimpleError::new(1, format!("{}", e))),
ErrorKind::BrokenPipe => Ok(()), ErrorKind::BrokenPipe => Ok(()),
_ => Err(uio_error!(e, "input/output error")), _ => Err(uio_error!(e, "input/output error")),
}, },
} }
} }
Strategy::LineBytes(chunk_size) => { Strategy::LineBytes(chunk_size) => {
let mut writer = LineBytesChunkWriter::new(chunk_size, settings) let mut writer = LineBytesChunkWriter::new(chunk_size, settings)?;
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
match std::io::copy(&mut reader, &mut writer) { match std::io::copy(&mut reader, &mut writer) {
Ok(_) => Ok(()), Ok(_) => Ok(()),
Err(e) => match e.kind() { Err(e) => match e.kind() {
@ -1262,7 +1275,7 @@ fn split(settings: &Settings) -> UResult<()> {
// allowable filenames, we use `ErrorKind::Other` to // allowable filenames, we use `ErrorKind::Other` to
// indicate that. A special error message needs to be // indicate that. A special error message needs to be
// printed in that case. // printed in that case.
ErrorKind::Other => Err(USimpleError::new(1, "output file suffixes exhausted")), ErrorKind::Other => Err(USimpleError::new(1, format!("{}", e))),
ErrorKind::BrokenPipe => Ok(()), ErrorKind::BrokenPipe => Ok(()),
_ => Err(uio_error!(e, "input/output error")), _ => Err(uio_error!(e, "input/output error")),
}, },

View file

@ -485,8 +485,20 @@ pub fn dir_strip_dot_for_creation(path: &Path) -> PathBuf {
/// Checks if `p1` and `p2` are the same file. /// Checks if `p1` and `p2` are the same file.
/// If error happens when trying to get files' metadata, returns false /// If error happens when trying to get files' metadata, returns false
pub fn paths_refer_to_same_file<P: AsRef<Path>>(p1: P, p2: P, dereference: bool) -> bool { pub fn paths_refer_to_same_file<P: AsRef<Path>>(p1: P, p2: P, dereference: bool) -> bool {
if let Ok(info1) = FileInformation::from_path(p1, dereference) { infos_refer_to_same_file(
if let Ok(info2) = FileInformation::from_path(p2, dereference) { FileInformation::from_path(p1, dereference),
FileInformation::from_path(p2, dereference),
)
}
/// Checks if `p1` and `p2` are the same file information.
/// If error happens when trying to get files' metadata, returns false
pub fn infos_refer_to_same_file(
info1: IOResult<FileInformation>,
info2: IOResult<FileInformation>,
) -> bool {
if let Ok(info1) = info1 {
if let Ok(info2) = info2 {
return info1 == info2; return info1 == info2;
} }
} }

View file

@ -655,3 +655,31 @@ fn test_line_bytes_no_empty_file() {
assert_eq!(at.read("xaj"), "4"); assert_eq!(at.read("xaj"), "4");
assert!(!at.plus("xak").exists()); assert!(!at.plus("xak").exists());
} }
#[test]
fn test_guard_input() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
ts.ucmd()
.args(&["-C", "6"])
.pipe_in("1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n")
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("xaa"), "1\n2\n3\n");
ts.ucmd()
.args(&["-C", "6"])
.pipe_in("1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n")
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("xaa"), "1\n2\n3\n");
ts.ucmd()
.args(&["-C", "6", "xaa"])
.fails()
.stderr_only("split: 'xaa' would overwrite input; aborting");
assert_eq!(at.read("xaa"), "1\n2\n3\n");
}