From 88911be6e002e0f66bcfb6fbac5593cc3066dae9 Mon Sep 17 00:00:00 2001 From: Felipe Lema <1232306+FelipeLema@users.noreply.github.com> Date: Mon, 18 Jan 2021 10:42:44 -0300 Subject: [PATCH] `--filter` argument for `split` (#1681) --- src/uu/split/src/platform/mod.rs | 11 +++ src/uu/split/src/platform/unix.rs | 124 +++++++++++++++++++++++++++ src/uu/split/src/platform/windows.rs | 19 ++++ src/uu/split/src/split.rs | 35 +++++--- tests/by-util/test_split.rs | 74 +++++++++++++++- tests/common/util.rs | 4 + 6 files changed, 253 insertions(+), 14 deletions(-) create mode 100644 src/uu/split/src/platform/mod.rs create mode 100644 src/uu/split/src/platform/unix.rs create mode 100644 src/uu/split/src/platform/windows.rs diff --git a/src/uu/split/src/platform/mod.rs b/src/uu/split/src/platform/mod.rs new file mode 100644 index 000000000..020c01a4a --- /dev/null +++ b/src/uu/split/src/platform/mod.rs @@ -0,0 +1,11 @@ +#[cfg(unix)] +pub use self::unix::instantiate_current_writer; + +#[cfg(windows)] +pub use self::windows::instantiate_current_writer; + +#[cfg(unix)] +mod unix; + +#[cfg(windows)] +mod windows; diff --git a/src/uu/split/src/platform/unix.rs b/src/uu/split/src/platform/unix.rs new file mode 100644 index 000000000..45333ceb2 --- /dev/null +++ b/src/uu/split/src/platform/unix.rs @@ -0,0 +1,124 @@ +use std::env; +use std::io::Write; +use std::io::{BufWriter, Result}; +use std::process::{Child, Command, Stdio}; +/// A writer that writes to a shell_process' stdin +/// +/// We use a shell process (not directy calling a sub-process) so we can forward the name of the +/// corresponding output file (xaa, xab, xac… ). This is the way it was implemented in GNU split. +struct FilterWriter { + /// Running shell process + shell_process: Child, +} + +impl Write for FilterWriter { + fn write(&mut self, buf: &[u8]) -> Result { + self.shell_process + .stdin + .as_mut() + .expect("failed to get shell stdin") + .write(buf) + } + fn flush(&mut self) -> Result<()> { + self.shell_process + .stdin + .as_mut() + .expect("failed to get shell stdin") + .flush() + } +} + +/// Have an environment variable set at a value during this lifetime +struct WithEnvVarSet { + /// Env var key + _previous_var_key: String, + /// Previous value set to this key + _previous_var_value: std::result::Result, +} +impl WithEnvVarSet { + /// Save previous value assigned to key, set key=value + fn new(key: &str, value: &str) -> WithEnvVarSet { + let previous_env_value = env::var(key); + env::set_var(key, value); + WithEnvVarSet { + _previous_var_key: String::from(key), + _previous_var_value: previous_env_value, + } + } +} + +impl Drop for WithEnvVarSet { + /// Restore previous value now that this is being dropped by context + fn drop(&mut self) { + if let Ok(ref prev_value) = self._previous_var_value { + env::set_var(&self._previous_var_key, &prev_value); + } else { + env::remove_var(&self._previous_var_key) + } + } +} +impl FilterWriter { + /// Create a new filter running a command with $FILE pointing at the output name + /// + /// #Arguments + /// + /// * `command` - The shell command to execute + /// * `filepath` - Path of the output file (forwarded to command as $FILE) + fn new(command: &str, filepath: &str) -> FilterWriter { + // set $FILE, save previous value (if there was one) + let _with_env_var_set = WithEnvVarSet::new("FILE", &filepath); + + let shell_process = Command::new(env::var("SHELL").unwrap_or("/bin/sh".to_owned())) + .arg("-c") + .arg(command) + .stdin(Stdio::piped()) + .spawn() + .expect("Couldn't spawn filter command"); + + FilterWriter { + shell_process: shell_process, + } + } +} + +impl Drop for FilterWriter { + /// flush stdin, close it and wait on `shell_process` before dropping self + fn drop(&mut self) { + { + // close stdin by dropping it + let _stdin = self.shell_process.stdin.as_mut(); + } + let exit_status = self + .shell_process + .wait() + .expect("Couldn't wait for child process"); + if let Some(return_code) = exit_status.code() { + if return_code != 0 { + crash!(1, "Shell process returned {}", return_code); + } + } else { + crash!(1, "Shell process terminated by signal") + } + } +} + +/// Instantiate either a file writer or a "write to shell process's stdin" writer +pub fn instantiate_current_writer( + filter: &Option, + filename: &str, +) -> BufWriter> { + match filter { + None => BufWriter::new(Box::new( + // write to the next file + std::fs::OpenOptions::new() + .write(true) + .create(true) + .open(std::path::Path::new(&filename)) + .unwrap(), + ) as Box), + Some(ref filter_command) => BufWriter::new(Box::new( + // spawn a shell command and write to it + FilterWriter::new(&filter_command, &filename), + ) as Box), + } +} diff --git a/src/uu/split/src/platform/windows.rs b/src/uu/split/src/platform/windows.rs new file mode 100644 index 000000000..e67518f2a --- /dev/null +++ b/src/uu/split/src/platform/windows.rs @@ -0,0 +1,19 @@ +use std::io::BufWriter; +use std::io::Write; +/// Get a file writer +/// +/// Unlike the unix version of this function, this _always_ returns +/// a file writer +pub fn instantiate_current_writer( + _filter: &Option, + filename: &str, +) -> BufWriter> { + BufWriter::new(Box::new( + // write to the next file + std::fs::OpenOptions::new() + .write(true) + .create(true) + .open(std::path::Path::new(&filename)) + .unwrap(), + ) as Box) +} diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index c2109a562..3f51d9447 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -10,8 +10,11 @@ #[macro_use] extern crate uucore; +mod platform; + use std::char; -use std::fs::{File, OpenOptions}; +use std::env; +use std::fs::File; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; use std::path::Path; @@ -47,6 +50,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { "additional suffix to append to output file names", "SUFFIX", ); + opts.optopt( + "", + "filter", + "write to shell COMMAND file name is $FILE (Currently not implemented for Windows)", + "COMMAND", + ); opts.optopt("l", "lines", "put NUMBER lines per output file", "NUMBER"); opts.optflag( "", @@ -92,6 +101,7 @@ size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is suffix_length: 0, additional_suffix: "".to_owned(), input: "".to_owned(), + filter: None, strategy: "".to_owned(), strategy_param: "".to_owned(), verbose: false, @@ -138,6 +148,14 @@ size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is settings.input = input; settings.prefix = prefix; + settings.filter = matches.opt_str("filter"); + + if settings.filter.is_some() && cfg!(windows) { + // see https://github.com/rust-lang/rust/issues/29494 + show_error!("--filter is currently not supported in this platform"); + exit!(-1); + } + split(&settings) } @@ -147,6 +165,8 @@ struct Settings { suffix_length: usize, additional_suffix: String, input: String, + /// When supplied, a shell command to output to instead of xaa, xab … + filter: Option, strategy: String, strategy_param: String, verbose: bool, @@ -323,7 +343,6 @@ fn split(settings: &Settings) -> i32 { _ => {} } } - if control.request_new_file { let mut filename = settings.prefix.clone(); filename.push_str( @@ -336,17 +355,9 @@ fn split(settings: &Settings) -> i32 { ); filename.push_str(settings.additional_suffix.as_ref()); - if fileno != 0 { - crash_if_err!(1, writer.flush()); - } + crash_if_err!(1, writer.flush()); fileno += 1; - writer = BufWriter::new(Box::new( - OpenOptions::new() - .write(true) - .create(true) - .open(Path::new(&filename)) - .unwrap(), - ) as Box); + writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); control.request_new_file = false; if settings.verbose { println!("creating file '{}'", filename); diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 5ed1a271d..88e97803c 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -4,6 +4,8 @@ extern crate regex; use self::rand::{thread_rng, Rng}; use self::regex::Regex; use crate::common::util::*; +#[cfg(not(windows))] +use std::env; use std::fs::{read_dir, File}; use std::io::Write; use std::path::Path; @@ -32,6 +34,7 @@ impl Glob { self.collect().len() } + /// Get all files in `self.directory` that match `self.regex` fn collect(&self) -> Vec { read_dir(Path::new(&self.directory.subdir)) .unwrap() @@ -49,6 +52,7 @@ impl Glob { .collect() } + /// Accumulate bytes of all files in `self.collect()` fn collate(&self) -> Vec { let mut files = self.collect(); files.sort(); @@ -60,11 +64,16 @@ impl Glob { } } +/// File handle that user can add random bytes (line-formatted or not) to struct RandomFile { inner: File, } impl RandomFile { + /// Size of each line that's being generated + const LINESIZE: usize = 32; + + /// `create()` file handle located at `at` / `name` fn new(at: &AtPath, name: &str) -> RandomFile { RandomFile { inner: File::create(&at.plus(name)).unwrap(), @@ -81,11 +90,11 @@ impl RandomFile { let _ = write!(self.inner, "{}", random_chars(n)); } + /// Add n lines each of size `RandomFile::LINESIZE` fn add_lines(&mut self, lines: usize) { - let line_size: usize = 32; let mut n = lines; while n > 0 { - let _ = writeln!(self.inner, "{}", random_chars(line_size)); + let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE)); n -= 1; } } @@ -156,3 +165,64 @@ fn test_split_additional_suffix() { assert_eq!(glob.count(), 2); assert_eq!(glob.collate(), at.read(name).into_bytes()); } + +// note: the test_filter* tests below are unix-only +// windows support has been waived for now because of the difficulty of getting +// the `cmd` call right +// see https://github.com/rust-lang/rust/issues/29494 + +#[test] +#[cfg(unix)] +fn test_filter() { + // like `test_split_default()` but run a command before writing + let (at, mut ucmd) = at_and_ucmd!(); + let name = "filtered"; + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + let n_lines = 3; + RandomFile::new(&at, name).add_lines(n_lines); + + // change all characters to 'i' + ucmd.args(&["--filter=sed s/./i/g > $FILE", name]) + .succeeds(); + // assert all characters are 'i' / no character is not 'i' + // (assert that command succeded) + assert!( + glob.collate().iter().find(|&&c| { + // is not i + c != ('i' as u8) + // is not newline + && c != ('\n' as u8) + }) == None + ); +} + +#[test] +#[cfg(unix)] +fn test_filter_with_env_var_set() { + // This test will ensure that if $FILE env var was set before running --filter, it'll stay that + // way + // implemented like `test_split_default()` but run a command before writing + let (at, mut ucmd) = at_and_ucmd!(); + let name = "filtered"; + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + let n_lines = 3; + RandomFile::new(&at, name).add_lines(n_lines); + + let env_var_value = "somevalue"; + env::set_var("FILE", &env_var_value); + ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name]) + .succeeds(); + assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value); +} + +#[test] +#[cfg(unix)] +fn test_filter_command_fails() { + let (at, mut ucmd) = at_and_ucmd!(); + let name = "filter-will-fail"; + RandomFile::new(&at, name).add_lines(4); + + ucmd.args(&["--filter=/a/path/that/totally/does/not/exist", name]) + .fails(); +} diff --git a/tests/common/util.rs b/tests/common/util.rs index b581b8de9..b01f5014d 100644 --- a/tests/common/util.rs +++ b/tests/common/util.rs @@ -72,8 +72,12 @@ pub fn repeat_str(s: &str, n: u32) -> String { pub struct CmdResult { //tmpd is used for convenience functions for asserts against fixtures tmpd: Option>, + /// zero-exit from running the Command? + /// see [`success`] pub success: bool, + /// captured utf-8 standard output after running the Command pub stdout: String, + /// captured utf-8 standard error after running the Command pub stderr: String, }