1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

--filter argument for split (#1681)

This commit is contained in:
Felipe Lema 2021-01-18 10:42:44 -03:00 committed by GitHub
parent 0bb5adad3f
commit 88911be6e0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 253 additions and 14 deletions

View file

@ -4,6 +4,8 @@ extern crate regex;
use self::rand::{thread_rng, Rng};
use self::regex::Regex;
use crate::common::util::*;
#[cfg(not(windows))]
use std::env;
use std::fs::{read_dir, File};
use std::io::Write;
use std::path::Path;
@ -32,6 +34,7 @@ impl Glob {
self.collect().len()
}
/// Get all files in `self.directory` that match `self.regex`
fn collect(&self) -> Vec<String> {
read_dir(Path::new(&self.directory.subdir))
.unwrap()
@ -49,6 +52,7 @@ impl Glob {
.collect()
}
/// Accumulate bytes of all files in `self.collect()`
fn collate(&self) -> Vec<u8> {
let mut files = self.collect();
files.sort();
@ -60,11 +64,16 @@ impl Glob {
}
}
/// File handle that user can add random bytes (line-formatted or not) to
struct RandomFile {
inner: File,
}
impl RandomFile {
/// Size of each line that's being generated
const LINESIZE: usize = 32;
/// `create()` file handle located at `at` / `name`
fn new(at: &AtPath, name: &str) -> RandomFile {
RandomFile {
inner: File::create(&at.plus(name)).unwrap(),
@ -81,11 +90,11 @@ impl RandomFile {
let _ = write!(self.inner, "{}", random_chars(n));
}
/// Add n lines each of size `RandomFile::LINESIZE`
fn add_lines(&mut self, lines: usize) {
let line_size: usize = 32;
let mut n = lines;
while n > 0 {
let _ = writeln!(self.inner, "{}", random_chars(line_size));
let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE));
n -= 1;
}
}
@ -156,3 +165,64 @@ fn test_split_additional_suffix() {
assert_eq!(glob.count(), 2);
assert_eq!(glob.collate(), at.read(name).into_bytes());
}
// note: the test_filter* tests below are unix-only
// windows support has been waived for now because of the difficulty of getting
// the `cmd` call right
// see https://github.com/rust-lang/rust/issues/29494
#[test]
#[cfg(unix)]
fn test_filter() {
// like `test_split_default()` but run a command before writing
let (at, mut ucmd) = at_and_ucmd!();
let name = "filtered";
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
let n_lines = 3;
RandomFile::new(&at, name).add_lines(n_lines);
// change all characters to 'i'
ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
.succeeds();
// assert all characters are 'i' / no character is not 'i'
// (assert that command succeded)
assert!(
glob.collate().iter().find(|&&c| {
// is not i
c != ('i' as u8)
// is not newline
&& c != ('\n' as u8)
}) == None
);
}
#[test]
#[cfg(unix)]
fn test_filter_with_env_var_set() {
// This test will ensure that if $FILE env var was set before running --filter, it'll stay that
// way
// implemented like `test_split_default()` but run a command before writing
let (at, mut ucmd) = at_and_ucmd!();
let name = "filtered";
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
let n_lines = 3;
RandomFile::new(&at, name).add_lines(n_lines);
let env_var_value = "somevalue";
env::set_var("FILE", &env_var_value);
ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
.succeeds();
assert_eq!(glob.collate(), at.read(name).into_bytes());
assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value);
}
#[test]
#[cfg(unix)]
fn test_filter_command_fails() {
let (at, mut ucmd) = at_and_ucmd!();
let name = "filter-will-fail";
RandomFile::new(&at, name).add_lines(4);
ucmd.args(&["--filter=/a/path/that/totally/does/not/exist", name])
.fails();
}