mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
--filter
argument for split
(#1681)
This commit is contained in:
parent
0bb5adad3f
commit
88911be6e0
6 changed files with 253 additions and 14 deletions
11
src/uu/split/src/platform/mod.rs
Normal file
11
src/uu/split/src/platform/mod.rs
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
#[cfg(unix)]
|
||||||
|
pub use self::unix::instantiate_current_writer;
|
||||||
|
|
||||||
|
#[cfg(windows)]
|
||||||
|
pub use self::windows::instantiate_current_writer;
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
mod unix;
|
||||||
|
|
||||||
|
#[cfg(windows)]
|
||||||
|
mod windows;
|
124
src/uu/split/src/platform/unix.rs
Normal file
124
src/uu/split/src/platform/unix.rs
Normal file
|
@ -0,0 +1,124 @@
|
||||||
|
use std::env;
|
||||||
|
use std::io::Write;
|
||||||
|
use std::io::{BufWriter, Result};
|
||||||
|
use std::process::{Child, Command, Stdio};
|
||||||
|
/// A writer that writes to a shell_process' stdin
|
||||||
|
///
|
||||||
|
/// We use a shell process (not directy calling a sub-process) so we can forward the name of the
|
||||||
|
/// corresponding output file (xaa, xab, xac… ). This is the way it was implemented in GNU split.
|
||||||
|
struct FilterWriter {
|
||||||
|
/// Running shell process
|
||||||
|
shell_process: Child,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Write for FilterWriter {
|
||||||
|
fn write(&mut self, buf: &[u8]) -> Result<usize> {
|
||||||
|
self.shell_process
|
||||||
|
.stdin
|
||||||
|
.as_mut()
|
||||||
|
.expect("failed to get shell stdin")
|
||||||
|
.write(buf)
|
||||||
|
}
|
||||||
|
fn flush(&mut self) -> Result<()> {
|
||||||
|
self.shell_process
|
||||||
|
.stdin
|
||||||
|
.as_mut()
|
||||||
|
.expect("failed to get shell stdin")
|
||||||
|
.flush()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Have an environment variable set at a value during this lifetime
|
||||||
|
struct WithEnvVarSet {
|
||||||
|
/// Env var key
|
||||||
|
_previous_var_key: String,
|
||||||
|
/// Previous value set to this key
|
||||||
|
_previous_var_value: std::result::Result<String, env::VarError>,
|
||||||
|
}
|
||||||
|
impl WithEnvVarSet {
|
||||||
|
/// Save previous value assigned to key, set key=value
|
||||||
|
fn new(key: &str, value: &str) -> WithEnvVarSet {
|
||||||
|
let previous_env_value = env::var(key);
|
||||||
|
env::set_var(key, value);
|
||||||
|
WithEnvVarSet {
|
||||||
|
_previous_var_key: String::from(key),
|
||||||
|
_previous_var_value: previous_env_value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for WithEnvVarSet {
|
||||||
|
/// Restore previous value now that this is being dropped by context
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if let Ok(ref prev_value) = self._previous_var_value {
|
||||||
|
env::set_var(&self._previous_var_key, &prev_value);
|
||||||
|
} else {
|
||||||
|
env::remove_var(&self._previous_var_key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl FilterWriter {
|
||||||
|
/// Create a new filter running a command with $FILE pointing at the output name
|
||||||
|
///
|
||||||
|
/// #Arguments
|
||||||
|
///
|
||||||
|
/// * `command` - The shell command to execute
|
||||||
|
/// * `filepath` - Path of the output file (forwarded to command as $FILE)
|
||||||
|
fn new(command: &str, filepath: &str) -> FilterWriter {
|
||||||
|
// set $FILE, save previous value (if there was one)
|
||||||
|
let _with_env_var_set = WithEnvVarSet::new("FILE", &filepath);
|
||||||
|
|
||||||
|
let shell_process = Command::new(env::var("SHELL").unwrap_or("/bin/sh".to_owned()))
|
||||||
|
.arg("-c")
|
||||||
|
.arg(command)
|
||||||
|
.stdin(Stdio::piped())
|
||||||
|
.spawn()
|
||||||
|
.expect("Couldn't spawn filter command");
|
||||||
|
|
||||||
|
FilterWriter {
|
||||||
|
shell_process: shell_process,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for FilterWriter {
|
||||||
|
/// flush stdin, close it and wait on `shell_process` before dropping self
|
||||||
|
fn drop(&mut self) {
|
||||||
|
{
|
||||||
|
// close stdin by dropping it
|
||||||
|
let _stdin = self.shell_process.stdin.as_mut();
|
||||||
|
}
|
||||||
|
let exit_status = self
|
||||||
|
.shell_process
|
||||||
|
.wait()
|
||||||
|
.expect("Couldn't wait for child process");
|
||||||
|
if let Some(return_code) = exit_status.code() {
|
||||||
|
if return_code != 0 {
|
||||||
|
crash!(1, "Shell process returned {}", return_code);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
crash!(1, "Shell process terminated by signal")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Instantiate either a file writer or a "write to shell process's stdin" writer
|
||||||
|
pub fn instantiate_current_writer(
|
||||||
|
filter: &Option<String>,
|
||||||
|
filename: &str,
|
||||||
|
) -> BufWriter<Box<dyn Write>> {
|
||||||
|
match filter {
|
||||||
|
None => BufWriter::new(Box::new(
|
||||||
|
// write to the next file
|
||||||
|
std::fs::OpenOptions::new()
|
||||||
|
.write(true)
|
||||||
|
.create(true)
|
||||||
|
.open(std::path::Path::new(&filename))
|
||||||
|
.unwrap(),
|
||||||
|
) as Box<dyn Write>),
|
||||||
|
Some(ref filter_command) => BufWriter::new(Box::new(
|
||||||
|
// spawn a shell command and write to it
|
||||||
|
FilterWriter::new(&filter_command, &filename),
|
||||||
|
) as Box<dyn Write>),
|
||||||
|
}
|
||||||
|
}
|
19
src/uu/split/src/platform/windows.rs
Normal file
19
src/uu/split/src/platform/windows.rs
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
use std::io::BufWriter;
|
||||||
|
use std::io::Write;
|
||||||
|
/// Get a file writer
|
||||||
|
///
|
||||||
|
/// Unlike the unix version of this function, this _always_ returns
|
||||||
|
/// a file writer
|
||||||
|
pub fn instantiate_current_writer(
|
||||||
|
_filter: &Option<String>,
|
||||||
|
filename: &str,
|
||||||
|
) -> BufWriter<Box<dyn Write>> {
|
||||||
|
BufWriter::new(Box::new(
|
||||||
|
// write to the next file
|
||||||
|
std::fs::OpenOptions::new()
|
||||||
|
.write(true)
|
||||||
|
.create(true)
|
||||||
|
.open(std::path::Path::new(&filename))
|
||||||
|
.unwrap(),
|
||||||
|
) as Box<dyn Write>)
|
||||||
|
}
|
|
@ -10,8 +10,11 @@
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate uucore;
|
extern crate uucore;
|
||||||
|
|
||||||
|
mod platform;
|
||||||
|
|
||||||
use std::char;
|
use std::char;
|
||||||
use std::fs::{File, OpenOptions};
|
use std::env;
|
||||||
|
use std::fs::File;
|
||||||
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
|
@ -47,6 +50,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
"additional suffix to append to output file names",
|
"additional suffix to append to output file names",
|
||||||
"SUFFIX",
|
"SUFFIX",
|
||||||
);
|
);
|
||||||
|
opts.optopt(
|
||||||
|
"",
|
||||||
|
"filter",
|
||||||
|
"write to shell COMMAND file name is $FILE (Currently not implemented for Windows)",
|
||||||
|
"COMMAND",
|
||||||
|
);
|
||||||
opts.optopt("l", "lines", "put NUMBER lines per output file", "NUMBER");
|
opts.optopt("l", "lines", "put NUMBER lines per output file", "NUMBER");
|
||||||
opts.optflag(
|
opts.optflag(
|
||||||
"",
|
"",
|
||||||
|
@ -92,6 +101,7 @@ size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is
|
||||||
suffix_length: 0,
|
suffix_length: 0,
|
||||||
additional_suffix: "".to_owned(),
|
additional_suffix: "".to_owned(),
|
||||||
input: "".to_owned(),
|
input: "".to_owned(),
|
||||||
|
filter: None,
|
||||||
strategy: "".to_owned(),
|
strategy: "".to_owned(),
|
||||||
strategy_param: "".to_owned(),
|
strategy_param: "".to_owned(),
|
||||||
verbose: false,
|
verbose: false,
|
||||||
|
@ -138,6 +148,14 @@ size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is
|
||||||
settings.input = input;
|
settings.input = input;
|
||||||
settings.prefix = prefix;
|
settings.prefix = prefix;
|
||||||
|
|
||||||
|
settings.filter = matches.opt_str("filter");
|
||||||
|
|
||||||
|
if settings.filter.is_some() && cfg!(windows) {
|
||||||
|
// see https://github.com/rust-lang/rust/issues/29494
|
||||||
|
show_error!("--filter is currently not supported in this platform");
|
||||||
|
exit!(-1);
|
||||||
|
}
|
||||||
|
|
||||||
split(&settings)
|
split(&settings)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,6 +165,8 @@ struct Settings {
|
||||||
suffix_length: usize,
|
suffix_length: usize,
|
||||||
additional_suffix: String,
|
additional_suffix: String,
|
||||||
input: String,
|
input: String,
|
||||||
|
/// When supplied, a shell command to output to instead of xaa, xab …
|
||||||
|
filter: Option<String>,
|
||||||
strategy: String,
|
strategy: String,
|
||||||
strategy_param: String,
|
strategy_param: String,
|
||||||
verbose: bool,
|
verbose: bool,
|
||||||
|
@ -323,7 +343,6 @@ fn split(settings: &Settings) -> i32 {
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if control.request_new_file {
|
if control.request_new_file {
|
||||||
let mut filename = settings.prefix.clone();
|
let mut filename = settings.prefix.clone();
|
||||||
filename.push_str(
|
filename.push_str(
|
||||||
|
@ -336,17 +355,9 @@ fn split(settings: &Settings) -> i32 {
|
||||||
);
|
);
|
||||||
filename.push_str(settings.additional_suffix.as_ref());
|
filename.push_str(settings.additional_suffix.as_ref());
|
||||||
|
|
||||||
if fileno != 0 {
|
crash_if_err!(1, writer.flush());
|
||||||
crash_if_err!(1, writer.flush());
|
|
||||||
}
|
|
||||||
fileno += 1;
|
fileno += 1;
|
||||||
writer = BufWriter::new(Box::new(
|
writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
||||||
OpenOptions::new()
|
|
||||||
.write(true)
|
|
||||||
.create(true)
|
|
||||||
.open(Path::new(&filename))
|
|
||||||
.unwrap(),
|
|
||||||
) as Box<dyn Write>);
|
|
||||||
control.request_new_file = false;
|
control.request_new_file = false;
|
||||||
if settings.verbose {
|
if settings.verbose {
|
||||||
println!("creating file '{}'", filename);
|
println!("creating file '{}'", filename);
|
||||||
|
|
|
@ -4,6 +4,8 @@ extern crate regex;
|
||||||
use self::rand::{thread_rng, Rng};
|
use self::rand::{thread_rng, Rng};
|
||||||
use self::regex::Regex;
|
use self::regex::Regex;
|
||||||
use crate::common::util::*;
|
use crate::common::util::*;
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
use std::env;
|
||||||
use std::fs::{read_dir, File};
|
use std::fs::{read_dir, File};
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
@ -32,6 +34,7 @@ impl Glob {
|
||||||
self.collect().len()
|
self.collect().len()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get all files in `self.directory` that match `self.regex`
|
||||||
fn collect(&self) -> Vec<String> {
|
fn collect(&self) -> Vec<String> {
|
||||||
read_dir(Path::new(&self.directory.subdir))
|
read_dir(Path::new(&self.directory.subdir))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
@ -49,6 +52,7 @@ impl Glob {
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Accumulate bytes of all files in `self.collect()`
|
||||||
fn collate(&self) -> Vec<u8> {
|
fn collate(&self) -> Vec<u8> {
|
||||||
let mut files = self.collect();
|
let mut files = self.collect();
|
||||||
files.sort();
|
files.sort();
|
||||||
|
@ -60,11 +64,16 @@ impl Glob {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// File handle that user can add random bytes (line-formatted or not) to
|
||||||
struct RandomFile {
|
struct RandomFile {
|
||||||
inner: File,
|
inner: File,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RandomFile {
|
impl RandomFile {
|
||||||
|
/// Size of each line that's being generated
|
||||||
|
const LINESIZE: usize = 32;
|
||||||
|
|
||||||
|
/// `create()` file handle located at `at` / `name`
|
||||||
fn new(at: &AtPath, name: &str) -> RandomFile {
|
fn new(at: &AtPath, name: &str) -> RandomFile {
|
||||||
RandomFile {
|
RandomFile {
|
||||||
inner: File::create(&at.plus(name)).unwrap(),
|
inner: File::create(&at.plus(name)).unwrap(),
|
||||||
|
@ -81,11 +90,11 @@ impl RandomFile {
|
||||||
let _ = write!(self.inner, "{}", random_chars(n));
|
let _ = write!(self.inner, "{}", random_chars(n));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add n lines each of size `RandomFile::LINESIZE`
|
||||||
fn add_lines(&mut self, lines: usize) {
|
fn add_lines(&mut self, lines: usize) {
|
||||||
let line_size: usize = 32;
|
|
||||||
let mut n = lines;
|
let mut n = lines;
|
||||||
while n > 0 {
|
while n > 0 {
|
||||||
let _ = writeln!(self.inner, "{}", random_chars(line_size));
|
let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE));
|
||||||
n -= 1;
|
n -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -156,3 +165,64 @@ fn test_split_additional_suffix() {
|
||||||
assert_eq!(glob.count(), 2);
|
assert_eq!(glob.count(), 2);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// note: the test_filter* tests below are unix-only
|
||||||
|
// windows support has been waived for now because of the difficulty of getting
|
||||||
|
// the `cmd` call right
|
||||||
|
// see https://github.com/rust-lang/rust/issues/29494
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn test_filter() {
|
||||||
|
// like `test_split_default()` but run a command before writing
|
||||||
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
|
let name = "filtered";
|
||||||
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
||||||
|
let n_lines = 3;
|
||||||
|
RandomFile::new(&at, name).add_lines(n_lines);
|
||||||
|
|
||||||
|
// change all characters to 'i'
|
||||||
|
ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
|
||||||
|
.succeeds();
|
||||||
|
// assert all characters are 'i' / no character is not 'i'
|
||||||
|
// (assert that command succeded)
|
||||||
|
assert!(
|
||||||
|
glob.collate().iter().find(|&&c| {
|
||||||
|
// is not i
|
||||||
|
c != ('i' as u8)
|
||||||
|
// is not newline
|
||||||
|
&& c != ('\n' as u8)
|
||||||
|
}) == None
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn test_filter_with_env_var_set() {
|
||||||
|
// This test will ensure that if $FILE env var was set before running --filter, it'll stay that
|
||||||
|
// way
|
||||||
|
// implemented like `test_split_default()` but run a command before writing
|
||||||
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
|
let name = "filtered";
|
||||||
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
||||||
|
let n_lines = 3;
|
||||||
|
RandomFile::new(&at, name).add_lines(n_lines);
|
||||||
|
|
||||||
|
let env_var_value = "somevalue";
|
||||||
|
env::set_var("FILE", &env_var_value);
|
||||||
|
ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
|
||||||
|
.succeeds();
|
||||||
|
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
||||||
|
assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn test_filter_command_fails() {
|
||||||
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
|
let name = "filter-will-fail";
|
||||||
|
RandomFile::new(&at, name).add_lines(4);
|
||||||
|
|
||||||
|
ucmd.args(&["--filter=/a/path/that/totally/does/not/exist", name])
|
||||||
|
.fails();
|
||||||
|
}
|
||||||
|
|
|
@ -72,8 +72,12 @@ pub fn repeat_str(s: &str, n: u32) -> String {
|
||||||
pub struct CmdResult {
|
pub struct CmdResult {
|
||||||
//tmpd is used for convenience functions for asserts against fixtures
|
//tmpd is used for convenience functions for asserts against fixtures
|
||||||
tmpd: Option<Rc<TempDir>>,
|
tmpd: Option<Rc<TempDir>>,
|
||||||
|
/// zero-exit from running the Command?
|
||||||
|
/// see [`success`]
|
||||||
pub success: bool,
|
pub success: bool,
|
||||||
|
/// captured utf-8 standard output after running the Command
|
||||||
pub stdout: String,
|
pub stdout: String,
|
||||||
|
/// captured utf-8 standard error after running the Command
|
||||||
pub stderr: String,
|
pub stderr: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue