1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

--filter argument for split (#1681)

This commit is contained in:
Felipe Lema 2021-01-18 10:42:44 -03:00 committed by GitHub
parent 0bb5adad3f
commit 88911be6e0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 253 additions and 14 deletions

View file

@ -0,0 +1,11 @@
#[cfg(unix)]
pub use self::unix::instantiate_current_writer;
#[cfg(windows)]
pub use self::windows::instantiate_current_writer;
#[cfg(unix)]
mod unix;
#[cfg(windows)]
mod windows;

View file

@ -0,0 +1,124 @@
use std::env;
use std::io::Write;
use std::io::{BufWriter, Result};
use std::process::{Child, Command, Stdio};
/// A writer that writes to a shell_process' stdin
///
/// We use a shell process (not directy calling a sub-process) so we can forward the name of the
/// corresponding output file (xaa, xab, xac… ). This is the way it was implemented in GNU split.
struct FilterWriter {
/// Running shell process
shell_process: Child,
}
impl Write for FilterWriter {
fn write(&mut self, buf: &[u8]) -> Result<usize> {
self.shell_process
.stdin
.as_mut()
.expect("failed to get shell stdin")
.write(buf)
}
fn flush(&mut self) -> Result<()> {
self.shell_process
.stdin
.as_mut()
.expect("failed to get shell stdin")
.flush()
}
}
/// Have an environment variable set at a value during this lifetime
struct WithEnvVarSet {
/// Env var key
_previous_var_key: String,
/// Previous value set to this key
_previous_var_value: std::result::Result<String, env::VarError>,
}
impl WithEnvVarSet {
/// Save previous value assigned to key, set key=value
fn new(key: &str, value: &str) -> WithEnvVarSet {
let previous_env_value = env::var(key);
env::set_var(key, value);
WithEnvVarSet {
_previous_var_key: String::from(key),
_previous_var_value: previous_env_value,
}
}
}
impl Drop for WithEnvVarSet {
/// Restore previous value now that this is being dropped by context
fn drop(&mut self) {
if let Ok(ref prev_value) = self._previous_var_value {
env::set_var(&self._previous_var_key, &prev_value);
} else {
env::remove_var(&self._previous_var_key)
}
}
}
impl FilterWriter {
/// Create a new filter running a command with $FILE pointing at the output name
///
/// #Arguments
///
/// * `command` - The shell command to execute
/// * `filepath` - Path of the output file (forwarded to command as $FILE)
fn new(command: &str, filepath: &str) -> FilterWriter {
// set $FILE, save previous value (if there was one)
let _with_env_var_set = WithEnvVarSet::new("FILE", &filepath);
let shell_process = Command::new(env::var("SHELL").unwrap_or("/bin/sh".to_owned()))
.arg("-c")
.arg(command)
.stdin(Stdio::piped())
.spawn()
.expect("Couldn't spawn filter command");
FilterWriter {
shell_process: shell_process,
}
}
}
impl Drop for FilterWriter {
/// flush stdin, close it and wait on `shell_process` before dropping self
fn drop(&mut self) {
{
// close stdin by dropping it
let _stdin = self.shell_process.stdin.as_mut();
}
let exit_status = self
.shell_process
.wait()
.expect("Couldn't wait for child process");
if let Some(return_code) = exit_status.code() {
if return_code != 0 {
crash!(1, "Shell process returned {}", return_code);
}
} else {
crash!(1, "Shell process terminated by signal")
}
}
}
/// Instantiate either a file writer or a "write to shell process's stdin" writer
pub fn instantiate_current_writer(
filter: &Option<String>,
filename: &str,
) -> BufWriter<Box<dyn Write>> {
match filter {
None => BufWriter::new(Box::new(
// write to the next file
std::fs::OpenOptions::new()
.write(true)
.create(true)
.open(std::path::Path::new(&filename))
.unwrap(),
) as Box<dyn Write>),
Some(ref filter_command) => BufWriter::new(Box::new(
// spawn a shell command and write to it
FilterWriter::new(&filter_command, &filename),
) as Box<dyn Write>),
}
}

View file

@ -0,0 +1,19 @@
use std::io::BufWriter;
use std::io::Write;
/// Get a file writer
///
/// Unlike the unix version of this function, this _always_ returns
/// a file writer
pub fn instantiate_current_writer(
_filter: &Option<String>,
filename: &str,
) -> BufWriter<Box<dyn Write>> {
BufWriter::new(Box::new(
// write to the next file
std::fs::OpenOptions::new()
.write(true)
.create(true)
.open(std::path::Path::new(&filename))
.unwrap(),
) as Box<dyn Write>)
}

View file

@ -10,8 +10,11 @@
#[macro_use]
extern crate uucore;
mod platform;
use std::char;
use std::fs::{File, OpenOptions};
use std::env;
use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
use std::path::Path;
@ -47,6 +50,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
"additional suffix to append to output file names",
"SUFFIX",
);
opts.optopt(
"",
"filter",
"write to shell COMMAND file name is $FILE (Currently not implemented for Windows)",
"COMMAND",
);
opts.optopt("l", "lines", "put NUMBER lines per output file", "NUMBER");
opts.optflag(
"",
@ -92,6 +101,7 @@ size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is
suffix_length: 0,
additional_suffix: "".to_owned(),
input: "".to_owned(),
filter: None,
strategy: "".to_owned(),
strategy_param: "".to_owned(),
verbose: false,
@ -138,6 +148,14 @@ size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is
settings.input = input;
settings.prefix = prefix;
settings.filter = matches.opt_str("filter");
if settings.filter.is_some() && cfg!(windows) {
// see https://github.com/rust-lang/rust/issues/29494
show_error!("--filter is currently not supported in this platform");
exit!(-1);
}
split(&settings)
}
@ -147,6 +165,8 @@ struct Settings {
suffix_length: usize,
additional_suffix: String,
input: String,
/// When supplied, a shell command to output to instead of xaa, xab …
filter: Option<String>,
strategy: String,
strategy_param: String,
verbose: bool,
@ -323,7 +343,6 @@ fn split(settings: &Settings) -> i32 {
_ => {}
}
}
if control.request_new_file {
let mut filename = settings.prefix.clone();
filename.push_str(
@ -336,17 +355,9 @@ fn split(settings: &Settings) -> i32 {
);
filename.push_str(settings.additional_suffix.as_ref());
if fileno != 0 {
crash_if_err!(1, writer.flush());
}
fileno += 1;
writer = BufWriter::new(Box::new(
OpenOptions::new()
.write(true)
.create(true)
.open(Path::new(&filename))
.unwrap(),
) as Box<dyn Write>);
writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
control.request_new_file = false;
if settings.verbose {
println!("creating file '{}'", filename);

View file

@ -4,6 +4,8 @@ extern crate regex;
use self::rand::{thread_rng, Rng};
use self::regex::Regex;
use crate::common::util::*;
#[cfg(not(windows))]
use std::env;
use std::fs::{read_dir, File};
use std::io::Write;
use std::path::Path;
@ -32,6 +34,7 @@ impl Glob {
self.collect().len()
}
/// Get all files in `self.directory` that match `self.regex`
fn collect(&self) -> Vec<String> {
read_dir(Path::new(&self.directory.subdir))
.unwrap()
@ -49,6 +52,7 @@ impl Glob {
.collect()
}
/// Accumulate bytes of all files in `self.collect()`
fn collate(&self) -> Vec<u8> {
let mut files = self.collect();
files.sort();
@ -60,11 +64,16 @@ impl Glob {
}
}
/// File handle that user can add random bytes (line-formatted or not) to
struct RandomFile {
inner: File,
}
impl RandomFile {
/// Size of each line that's being generated
const LINESIZE: usize = 32;
/// `create()` file handle located at `at` / `name`
fn new(at: &AtPath, name: &str) -> RandomFile {
RandomFile {
inner: File::create(&at.plus(name)).unwrap(),
@ -81,11 +90,11 @@ impl RandomFile {
let _ = write!(self.inner, "{}", random_chars(n));
}
/// Add n lines each of size `RandomFile::LINESIZE`
fn add_lines(&mut self, lines: usize) {
let line_size: usize = 32;
let mut n = lines;
while n > 0 {
let _ = writeln!(self.inner, "{}", random_chars(line_size));
let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE));
n -= 1;
}
}
@ -156,3 +165,64 @@ fn test_split_additional_suffix() {
assert_eq!(glob.count(), 2);
assert_eq!(glob.collate(), at.read(name).into_bytes());
}
// note: the test_filter* tests below are unix-only
// windows support has been waived for now because of the difficulty of getting
// the `cmd` call right
// see https://github.com/rust-lang/rust/issues/29494
#[test]
#[cfg(unix)]
fn test_filter() {
// like `test_split_default()` but run a command before writing
let (at, mut ucmd) = at_and_ucmd!();
let name = "filtered";
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
let n_lines = 3;
RandomFile::new(&at, name).add_lines(n_lines);
// change all characters to 'i'
ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
.succeeds();
// assert all characters are 'i' / no character is not 'i'
// (assert that command succeded)
assert!(
glob.collate().iter().find(|&&c| {
// is not i
c != ('i' as u8)
// is not newline
&& c != ('\n' as u8)
}) == None
);
}
#[test]
#[cfg(unix)]
fn test_filter_with_env_var_set() {
// This test will ensure that if $FILE env var was set before running --filter, it'll stay that
// way
// implemented like `test_split_default()` but run a command before writing
let (at, mut ucmd) = at_and_ucmd!();
let name = "filtered";
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
let n_lines = 3;
RandomFile::new(&at, name).add_lines(n_lines);
let env_var_value = "somevalue";
env::set_var("FILE", &env_var_value);
ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
.succeeds();
assert_eq!(glob.collate(), at.read(name).into_bytes());
assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value);
}
#[test]
#[cfg(unix)]
fn test_filter_command_fails() {
let (at, mut ucmd) = at_and_ucmd!();
let name = "filter-will-fail";
RandomFile::new(&at, name).add_lines(4);
ucmd.args(&["--filter=/a/path/that/totally/does/not/exist", name])
.fails();
}

View file

@ -72,8 +72,12 @@ pub fn repeat_str(s: &str, n: u32) -> String {
pub struct CmdResult {
//tmpd is used for convenience functions for asserts against fixtures
tmpd: Option<Rc<TempDir>>,
/// zero-exit from running the Command?
/// see [`success`]
pub success: bool,
/// captured utf-8 standard output after running the Command
pub stdout: String,
/// captured utf-8 standard error after running the Command
pub stderr: String,
}