diff --git a/src/uu/sort/src/check.rs b/src/uu/sort/src/check.rs index de320ef77..350a98e23 100644 --- a/src/uu/sort/src/check.rs +++ b/src/uu/sort/src/check.rs @@ -14,6 +14,7 @@ use crate::{ use itertools::Itertools; use std::{ cmp::Ordering, + ffi::OsStr, io::Read, iter, sync::mpsc::{sync_channel, Receiver, SyncSender}, @@ -25,7 +26,7 @@ use std::{ /// # Returns /// /// The code we should exit with. -pub fn check(path: &str, settings: &GlobalSettings) -> i32 { +pub fn check(path: &OsStr, settings: &GlobalSettings) -> i32 { let max_allowed_cmp = if settings.unique { // If `unique` is enabled, the previous line must compare _less_ to the next one. Ordering::Less @@ -63,7 +64,12 @@ pub fn check(path: &str, settings: &GlobalSettings) -> i32 { ) > max_allowed_cmp { if !settings.check_silent { - eprintln!("sort: {}:{}: disorder: {}", path, line_idx, new_first.line); + eprintln!( + "sort: {}:{}: disorder: {}", + path.to_string_lossy(), + line_idx, + new_first.line + ); } return 1; } @@ -74,7 +80,12 @@ pub fn check(path: &str, settings: &GlobalSettings) -> i32 { line_idx += 1; if compare_by(a, b, settings, chunk.line_data(), chunk.line_data()) > max_allowed_cmp { if !settings.check_silent { - eprintln!("sort: {}:{}: disorder: {}", path, line_idx, b.line); + eprintln!( + "sort: {}:{}: disorder: {}", + path.to_string_lossy(), + line_idx, + b.line + ); } return 1; } diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index b8d69fb14..4a45028f7 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -9,10 +9,11 @@ use std::{ cmp::Ordering, + ffi::OsString, fs::{self, File}, io::{BufWriter, Read, Write}, iter, - path::PathBuf, + path::{Path, PathBuf}, process::{Child, ChildStdin, ChildStdout, Command, Stdio}, rc::Rc, sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender}, @@ -25,28 +26,66 @@ use tempfile::TempDir; use crate::{ chunks::{self, Chunk, RecycledChunk}, - compare_by, GlobalSettings, Output, + compare_by, open, GlobalSettings, Output, }; +/// If the output file occurs in the input files as well, copy the contents of the output file +/// and replace its occurrences in the inputs with that copy. +fn replace_output_file_in_input_files( + files: &mut [OsString], + settings: &GlobalSettings, + output: Option<&str>, +) -> Option<(TempDir, usize)> { + let mut copy: Option<(TempDir, PathBuf)> = None; + if let Some(Ok(output_path)) = output.map(|path| Path::new(path).canonicalize()) { + for file in files { + if let Ok(file_path) = Path::new(file).canonicalize() { + if file_path == output_path { + if let Some((_dir, copy)) = © { + *file = copy.clone().into_os_string(); + } else { + let tmp_dir = tempfile::Builder::new() + .prefix("uutils_sort") + .tempdir_in(&settings.tmp_dir) + .unwrap(); + let copy_path = tmp_dir.path().join("0"); + std::fs::copy(file_path, ©_path).unwrap(); + *file = copy_path.clone().into_os_string(); + copy = Some((tmp_dir, copy_path)) + } + } + } + } + } + // if we created a TempDir its size must be one. + copy.map(|(dir, _copy)| (dir, 1)) +} + /// Merge pre-sorted `Box`s. /// /// If `settings.merge_batch_size` is greater than the length of `files`, intermediate files will be used. /// If `settings.compress_prog` is `Some`, intermediate files will be compressed with it. -pub fn merge>>( - files: Files, - settings: &GlobalSettings, -) -> FileMerger { +pub fn merge<'a>( + files: &mut [OsString], + settings: &'a GlobalSettings, + output: Option<&str>, +) -> FileMerger<'a> { + let tmp_dir = replace_output_file_in_input_files(files, settings, output); if settings.compress_prog.is_none() { merge_with_file_limit::<_, _, WriteablePlainTmpFile>( - files.map(|file| PlainMergeInput { inner: file }), + files + .iter() + .map(|file| PlainMergeInput { inner: open(file) }), settings, - None, + tmp_dir, ) } else { merge_with_file_limit::<_, _, WriteableCompressedTmpFile>( - files.map(|file| PlainMergeInput { inner: file }), + files + .iter() + .map(|file| PlainMergeInput { inner: open(file) }), settings, - None, + tmp_dir, ) } } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index aa4d483c0..1f674e549 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -33,8 +33,8 @@ use rand::{thread_rng, Rng}; use rayon::prelude::*; use std::cmp::Ordering; use std::env; -use std::ffi::OsStr; -use std::fs::File; +use std::ffi::{OsStr, OsString}; +use std::fs::{File, OpenOptions}; use std::hash::{Hash, Hasher}; use std::io::{stdin, stdout, BufRead, BufReader, Read, Write}; use std::ops::Range; @@ -146,26 +146,46 @@ impl SortMode { } pub struct Output { - file: Option, + file: Option<(String, File)>, } impl Output { fn new(name: Option<&str>) -> Self { Self { file: name.map(|name| { - File::create(name).unwrap_or_else(|e| { - crash!(2, "open failed: {}: {}", name, strip_errno(&e.to_string())) - }) + // This is different from `File::create()` because we don't truncate the output yet. + // This allows using the output file as an input file. + ( + name.to_owned(), + OpenOptions::new() + .write(true) + .create(true) + .open(name) + .unwrap_or_else(|e| { + crash!(2, "open failed: {}: {}", name, strip_errno(&e.to_string())) + }), + ) }), } } fn into_write(self) -> Box { match self.file { - Some(file) => Box::new(file), + Some((_name, file)) => { + // truncate the file + file.set_len(0).unwrap(); + Box::new(file) + } None => Box::new(stdout()), } } + + fn as_output_name(&self) -> Option<&str> { + match &self.file { + Some((name, _file)) => Some(name), + None => None, + } + } } #[derive(Clone)] @@ -956,29 +976,28 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.debug = matches.is_present(options::DEBUG); // check whether user specified a zero terminated list of files for input, otherwise read files from args - let mut files: Vec = if matches.is_present(options::FILES0_FROM) { - let files0_from: Vec = matches - .values_of(options::FILES0_FROM) - .map(|v| v.map(ToString::to_string).collect()) + let mut files: Vec = if matches.is_present(options::FILES0_FROM) { + let files0_from: Vec = matches + .values_of_os(options::FILES0_FROM) + .map(|v| v.map(ToOwned::to_owned).collect()) .unwrap_or_default(); let mut files = Vec::new(); for path in &files0_from { - let reader = open(path.as_str()); + let reader = open(&path); let buf_reader = BufReader::new(reader); for line in buf_reader.split(b'\0').flatten() { - files.push( + files.push(OsString::from( std::str::from_utf8(&line) - .expect("Could not parse string from zero terminated input.") - .to_string(), - ); + .expect("Could not parse string from zero terminated input."), + )); } } files } else { matches - .values_of(options::FILES) - .map(|v| v.map(ToString::to_string).collect()) + .values_of_os(options::FILES) + .map(|v| v.map(ToOwned::to_owned).collect()) .unwrap_or_default() }; @@ -1066,9 +1085,13 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if files.is_empty() { /* if no file, default to stdin */ - files.push("-".to_owned()); + files.push("-".to_string().into()); } else if settings.check && files.len() != 1 { - crash!(2, "extra operand `{}' not allowed with -c", files[1]) + crash!( + 2, + "extra operand `{}' not allowed with -c", + files[1].to_string_lossy() + ) } if let Some(arg) = matches.args.get(options::SEPARATOR) { @@ -1122,7 +1145,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.init_precomputed(); - exec(&files, &settings, output) + exec(&mut files, &settings, output) } pub fn uu_app() -> App<'static, 'static> { @@ -1345,9 +1368,9 @@ pub fn uu_app() -> App<'static, 'static> { ) } -fn exec(files: &[String], settings: &GlobalSettings, output: Output) -> i32 { +fn exec(files: &mut [OsString], settings: &GlobalSettings, output: Output) -> i32 { if settings.merge { - let mut file_merger = merge::merge(files.iter().map(open), settings); + let mut file_merger = merge::merge(files, settings, output.as_output_name()); file_merger.write_all(settings, output); } else if settings.check { if files.len() > 1 { diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index b9cdb2b80..4c4a7a697 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1020,3 +1020,13 @@ fn test_separator_null() { .succeeds() .stdout_only("a\0z\0z\nz\0b\0a\nz\0a\0b\n"); } + +#[test] +fn test_output_is_input() { + let input = "a\nb\nc\n"; + let (at, mut cmd) = at_and_ucmd!(); + at.touch("file"); + at.append("file", input); + cmd.args(&["-m", "-o", "file", "file"]).succeeds(); + assert_eq!(at.read("file"), input); +}