mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
--filter
argument for split
(#1681)
This commit is contained in:
parent
0bb5adad3f
commit
88911be6e0
6 changed files with 253 additions and 14 deletions
11
src/uu/split/src/platform/mod.rs
Normal file
11
src/uu/split/src/platform/mod.rs
Normal file
|
@ -0,0 +1,11 @@
|
|||
#[cfg(unix)]
|
||||
pub use self::unix::instantiate_current_writer;
|
||||
|
||||
#[cfg(windows)]
|
||||
pub use self::windows::instantiate_current_writer;
|
||||
|
||||
#[cfg(unix)]
|
||||
mod unix;
|
||||
|
||||
#[cfg(windows)]
|
||||
mod windows;
|
124
src/uu/split/src/platform/unix.rs
Normal file
124
src/uu/split/src/platform/unix.rs
Normal file
|
@ -0,0 +1,124 @@
|
|||
use std::env;
|
||||
use std::io::Write;
|
||||
use std::io::{BufWriter, Result};
|
||||
use std::process::{Child, Command, Stdio};
|
||||
/// A writer that writes to a shell_process' stdin
|
||||
///
|
||||
/// We use a shell process (not directy calling a sub-process) so we can forward the name of the
|
||||
/// corresponding output file (xaa, xab, xac… ). This is the way it was implemented in GNU split.
|
||||
struct FilterWriter {
|
||||
/// Running shell process
|
||||
shell_process: Child,
|
||||
}
|
||||
|
||||
impl Write for FilterWriter {
|
||||
fn write(&mut self, buf: &[u8]) -> Result<usize> {
|
||||
self.shell_process
|
||||
.stdin
|
||||
.as_mut()
|
||||
.expect("failed to get shell stdin")
|
||||
.write(buf)
|
||||
}
|
||||
fn flush(&mut self) -> Result<()> {
|
||||
self.shell_process
|
||||
.stdin
|
||||
.as_mut()
|
||||
.expect("failed to get shell stdin")
|
||||
.flush()
|
||||
}
|
||||
}
|
||||
|
||||
/// Have an environment variable set at a value during this lifetime
|
||||
struct WithEnvVarSet {
|
||||
/// Env var key
|
||||
_previous_var_key: String,
|
||||
/// Previous value set to this key
|
||||
_previous_var_value: std::result::Result<String, env::VarError>,
|
||||
}
|
||||
impl WithEnvVarSet {
|
||||
/// Save previous value assigned to key, set key=value
|
||||
fn new(key: &str, value: &str) -> WithEnvVarSet {
|
||||
let previous_env_value = env::var(key);
|
||||
env::set_var(key, value);
|
||||
WithEnvVarSet {
|
||||
_previous_var_key: String::from(key),
|
||||
_previous_var_value: previous_env_value,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for WithEnvVarSet {
|
||||
/// Restore previous value now that this is being dropped by context
|
||||
fn drop(&mut self) {
|
||||
if let Ok(ref prev_value) = self._previous_var_value {
|
||||
env::set_var(&self._previous_var_key, &prev_value);
|
||||
} else {
|
||||
env::remove_var(&self._previous_var_key)
|
||||
}
|
||||
}
|
||||
}
|
||||
impl FilterWriter {
|
||||
/// Create a new filter running a command with $FILE pointing at the output name
|
||||
///
|
||||
/// #Arguments
|
||||
///
|
||||
/// * `command` - The shell command to execute
|
||||
/// * `filepath` - Path of the output file (forwarded to command as $FILE)
|
||||
fn new(command: &str, filepath: &str) -> FilterWriter {
|
||||
// set $FILE, save previous value (if there was one)
|
||||
let _with_env_var_set = WithEnvVarSet::new("FILE", &filepath);
|
||||
|
||||
let shell_process = Command::new(env::var("SHELL").unwrap_or("/bin/sh".to_owned()))
|
||||
.arg("-c")
|
||||
.arg(command)
|
||||
.stdin(Stdio::piped())
|
||||
.spawn()
|
||||
.expect("Couldn't spawn filter command");
|
||||
|
||||
FilterWriter {
|
||||
shell_process: shell_process,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for FilterWriter {
|
||||
/// flush stdin, close it and wait on `shell_process` before dropping self
|
||||
fn drop(&mut self) {
|
||||
{
|
||||
// close stdin by dropping it
|
||||
let _stdin = self.shell_process.stdin.as_mut();
|
||||
}
|
||||
let exit_status = self
|
||||
.shell_process
|
||||
.wait()
|
||||
.expect("Couldn't wait for child process");
|
||||
if let Some(return_code) = exit_status.code() {
|
||||
if return_code != 0 {
|
||||
crash!(1, "Shell process returned {}", return_code);
|
||||
}
|
||||
} else {
|
||||
crash!(1, "Shell process terminated by signal")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Instantiate either a file writer or a "write to shell process's stdin" writer
|
||||
pub fn instantiate_current_writer(
|
||||
filter: &Option<String>,
|
||||
filename: &str,
|
||||
) -> BufWriter<Box<dyn Write>> {
|
||||
match filter {
|
||||
None => BufWriter::new(Box::new(
|
||||
// write to the next file
|
||||
std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.open(std::path::Path::new(&filename))
|
||||
.unwrap(),
|
||||
) as Box<dyn Write>),
|
||||
Some(ref filter_command) => BufWriter::new(Box::new(
|
||||
// spawn a shell command and write to it
|
||||
FilterWriter::new(&filter_command, &filename),
|
||||
) as Box<dyn Write>),
|
||||
}
|
||||
}
|
19
src/uu/split/src/platform/windows.rs
Normal file
19
src/uu/split/src/platform/windows.rs
Normal file
|
@ -0,0 +1,19 @@
|
|||
use std::io::BufWriter;
|
||||
use std::io::Write;
|
||||
/// Get a file writer
|
||||
///
|
||||
/// Unlike the unix version of this function, this _always_ returns
|
||||
/// a file writer
|
||||
pub fn instantiate_current_writer(
|
||||
_filter: &Option<String>,
|
||||
filename: &str,
|
||||
) -> BufWriter<Box<dyn Write>> {
|
||||
BufWriter::new(Box::new(
|
||||
// write to the next file
|
||||
std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.open(std::path::Path::new(&filename))
|
||||
.unwrap(),
|
||||
) as Box<dyn Write>)
|
||||
}
|
|
@ -10,8 +10,11 @@
|
|||
#[macro_use]
|
||||
extern crate uucore;
|
||||
|
||||
mod platform;
|
||||
|
||||
use std::char;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::env;
|
||||
use std::fs::File;
|
||||
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
||||
use std::path::Path;
|
||||
|
||||
|
@ -47,6 +50,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
|||
"additional suffix to append to output file names",
|
||||
"SUFFIX",
|
||||
);
|
||||
opts.optopt(
|
||||
"",
|
||||
"filter",
|
||||
"write to shell COMMAND file name is $FILE (Currently not implemented for Windows)",
|
||||
"COMMAND",
|
||||
);
|
||||
opts.optopt("l", "lines", "put NUMBER lines per output file", "NUMBER");
|
||||
opts.optflag(
|
||||
"",
|
||||
|
@ -92,6 +101,7 @@ size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is
|
|||
suffix_length: 0,
|
||||
additional_suffix: "".to_owned(),
|
||||
input: "".to_owned(),
|
||||
filter: None,
|
||||
strategy: "".to_owned(),
|
||||
strategy_param: "".to_owned(),
|
||||
verbose: false,
|
||||
|
@ -138,6 +148,14 @@ size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is
|
|||
settings.input = input;
|
||||
settings.prefix = prefix;
|
||||
|
||||
settings.filter = matches.opt_str("filter");
|
||||
|
||||
if settings.filter.is_some() && cfg!(windows) {
|
||||
// see https://github.com/rust-lang/rust/issues/29494
|
||||
show_error!("--filter is currently not supported in this platform");
|
||||
exit!(-1);
|
||||
}
|
||||
|
||||
split(&settings)
|
||||
}
|
||||
|
||||
|
@ -147,6 +165,8 @@ struct Settings {
|
|||
suffix_length: usize,
|
||||
additional_suffix: String,
|
||||
input: String,
|
||||
/// When supplied, a shell command to output to instead of xaa, xab …
|
||||
filter: Option<String>,
|
||||
strategy: String,
|
||||
strategy_param: String,
|
||||
verbose: bool,
|
||||
|
@ -323,7 +343,6 @@ fn split(settings: &Settings) -> i32 {
|
|||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if control.request_new_file {
|
||||
let mut filename = settings.prefix.clone();
|
||||
filename.push_str(
|
||||
|
@ -336,17 +355,9 @@ fn split(settings: &Settings) -> i32 {
|
|||
);
|
||||
filename.push_str(settings.additional_suffix.as_ref());
|
||||
|
||||
if fileno != 0 {
|
||||
crash_if_err!(1, writer.flush());
|
||||
}
|
||||
crash_if_err!(1, writer.flush());
|
||||
fileno += 1;
|
||||
writer = BufWriter::new(Box::new(
|
||||
OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.open(Path::new(&filename))
|
||||
.unwrap(),
|
||||
) as Box<dyn Write>);
|
||||
writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
||||
control.request_new_file = false;
|
||||
if settings.verbose {
|
||||
println!("creating file '{}'", filename);
|
||||
|
|
|
@ -4,6 +4,8 @@ extern crate regex;
|
|||
use self::rand::{thread_rng, Rng};
|
||||
use self::regex::Regex;
|
||||
use crate::common::util::*;
|
||||
#[cfg(not(windows))]
|
||||
use std::env;
|
||||
use std::fs::{read_dir, File};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
@ -32,6 +34,7 @@ impl Glob {
|
|||
self.collect().len()
|
||||
}
|
||||
|
||||
/// Get all files in `self.directory` that match `self.regex`
|
||||
fn collect(&self) -> Vec<String> {
|
||||
read_dir(Path::new(&self.directory.subdir))
|
||||
.unwrap()
|
||||
|
@ -49,6 +52,7 @@ impl Glob {
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// Accumulate bytes of all files in `self.collect()`
|
||||
fn collate(&self) -> Vec<u8> {
|
||||
let mut files = self.collect();
|
||||
files.sort();
|
||||
|
@ -60,11 +64,16 @@ impl Glob {
|
|||
}
|
||||
}
|
||||
|
||||
/// File handle that user can add random bytes (line-formatted or not) to
|
||||
struct RandomFile {
|
||||
inner: File,
|
||||
}
|
||||
|
||||
impl RandomFile {
|
||||
/// Size of each line that's being generated
|
||||
const LINESIZE: usize = 32;
|
||||
|
||||
/// `create()` file handle located at `at` / `name`
|
||||
fn new(at: &AtPath, name: &str) -> RandomFile {
|
||||
RandomFile {
|
||||
inner: File::create(&at.plus(name)).unwrap(),
|
||||
|
@ -81,11 +90,11 @@ impl RandomFile {
|
|||
let _ = write!(self.inner, "{}", random_chars(n));
|
||||
}
|
||||
|
||||
/// Add n lines each of size `RandomFile::LINESIZE`
|
||||
fn add_lines(&mut self, lines: usize) {
|
||||
let line_size: usize = 32;
|
||||
let mut n = lines;
|
||||
while n > 0 {
|
||||
let _ = writeln!(self.inner, "{}", random_chars(line_size));
|
||||
let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE));
|
||||
n -= 1;
|
||||
}
|
||||
}
|
||||
|
@ -156,3 +165,64 @@ fn test_split_additional_suffix() {
|
|||
assert_eq!(glob.count(), 2);
|
||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
||||
}
|
||||
|
||||
// note: the test_filter* tests below are unix-only
|
||||
// windows support has been waived for now because of the difficulty of getting
|
||||
// the `cmd` call right
|
||||
// see https://github.com/rust-lang/rust/issues/29494
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_filter() {
|
||||
// like `test_split_default()` but run a command before writing
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
let name = "filtered";
|
||||
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
||||
let n_lines = 3;
|
||||
RandomFile::new(&at, name).add_lines(n_lines);
|
||||
|
||||
// change all characters to 'i'
|
||||
ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
|
||||
.succeeds();
|
||||
// assert all characters are 'i' / no character is not 'i'
|
||||
// (assert that command succeded)
|
||||
assert!(
|
||||
glob.collate().iter().find(|&&c| {
|
||||
// is not i
|
||||
c != ('i' as u8)
|
||||
// is not newline
|
||||
&& c != ('\n' as u8)
|
||||
}) == None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_filter_with_env_var_set() {
|
||||
// This test will ensure that if $FILE env var was set before running --filter, it'll stay that
|
||||
// way
|
||||
// implemented like `test_split_default()` but run a command before writing
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
let name = "filtered";
|
||||
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
||||
let n_lines = 3;
|
||||
RandomFile::new(&at, name).add_lines(n_lines);
|
||||
|
||||
let env_var_value = "somevalue";
|
||||
env::set_var("FILE", &env_var_value);
|
||||
ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
|
||||
.succeeds();
|
||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
||||
assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_filter_command_fails() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
let name = "filter-will-fail";
|
||||
RandomFile::new(&at, name).add_lines(4);
|
||||
|
||||
ucmd.args(&["--filter=/a/path/that/totally/does/not/exist", name])
|
||||
.fails();
|
||||
}
|
||||
|
|
|
@ -72,8 +72,12 @@ pub fn repeat_str(s: &str, n: u32) -> String {
|
|||
pub struct CmdResult {
|
||||
//tmpd is used for convenience functions for asserts against fixtures
|
||||
tmpd: Option<Rc<TempDir>>,
|
||||
/// zero-exit from running the Command?
|
||||
/// see [`success`]
|
||||
pub success: bool,
|
||||
/// captured utf-8 standard output after running the Command
|
||||
pub stdout: String,
|
||||
/// captured utf-8 standard error after running the Command
|
||||
pub stderr: String,
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue