mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Fix split's handling of non-UTF-8 files
This commit is contained in:
parent
a9ac7af9e1
commit
7c1395366e
2 changed files with 176 additions and 113 deletions
|
@ -13,11 +13,11 @@ extern crate uucore;
|
||||||
mod platform;
|
mod platform;
|
||||||
|
|
||||||
use clap::{App, Arg};
|
use clap::{App, Arg};
|
||||||
use std::char;
|
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
use std::{char, fs::remove_file};
|
||||||
|
|
||||||
static NAME: &str = "split";
|
static NAME: &str = "split";
|
||||||
static VERSION: &str = env!("CARGO_PKG_VERSION");
|
static VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||||
|
@ -213,50 +213,65 @@ struct Settings {
|
||||||
verbose: bool,
|
verbose: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct SplitControl {
|
|
||||||
current_line: String, // Don't touch
|
|
||||||
request_new_file: bool, // Splitter implementation requests new file
|
|
||||||
}
|
|
||||||
|
|
||||||
trait Splitter {
|
trait Splitter {
|
||||||
// Consume the current_line and return the consumed string
|
// Consume as much as possible from `reader` so as to saturate `writer`.
|
||||||
fn consume(&mut self, _: &mut SplitControl) -> String;
|
// Equivalent to finishing one of the part files. Returns the number of
|
||||||
|
// bytes that have been moved.
|
||||||
|
fn consume(
|
||||||
|
&mut self,
|
||||||
|
reader: &mut BufReader<Box<dyn Read>>,
|
||||||
|
writer: &mut BufWriter<Box<dyn Write>>,
|
||||||
|
) -> usize;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct LineSplitter {
|
struct LineSplitter {
|
||||||
saved_lines_to_write: usize,
|
lines_per_split: usize,
|
||||||
lines_to_write: usize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LineSplitter {
|
impl LineSplitter {
|
||||||
fn new(settings: &Settings) -> LineSplitter {
|
fn new(settings: &Settings) -> LineSplitter {
|
||||||
let n = match settings.strategy_param.parse() {
|
|
||||||
Ok(a) => a,
|
|
||||||
Err(e) => crash!(1, "invalid number of lines: {}", e),
|
|
||||||
};
|
|
||||||
LineSplitter {
|
LineSplitter {
|
||||||
saved_lines_to_write: n,
|
lines_per_split: settings
|
||||||
lines_to_write: n,
|
.strategy_param
|
||||||
|
.parse()
|
||||||
|
.unwrap_or_else(|e| crash!(1, "invalid number of lines: {}", e)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Splitter for LineSplitter {
|
impl Splitter for LineSplitter {
|
||||||
fn consume(&mut self, control: &mut SplitControl) -> String {
|
fn consume(
|
||||||
self.lines_to_write -= 1;
|
&mut self,
|
||||||
if self.lines_to_write == 0 {
|
reader: &mut BufReader<Box<dyn Read>>,
|
||||||
self.lines_to_write = self.saved_lines_to_write;
|
writer: &mut BufWriter<Box<dyn Write>>,
|
||||||
control.request_new_file = true;
|
) -> usize {
|
||||||
|
let mut bytes_consumed = 0usize;
|
||||||
|
let mut buffer = String::with_capacity(1024);
|
||||||
|
for _ in 0..self.lines_per_split {
|
||||||
|
let bytes_read = reader
|
||||||
|
.read_line(&mut buffer)
|
||||||
|
.unwrap_or_else(|_| crash!(1, "error reading bytes from input file"));
|
||||||
|
// If we ever read 0 bytes then we know we've hit EOF.
|
||||||
|
if bytes_read == 0 {
|
||||||
|
return bytes_consumed;
|
||||||
|
}
|
||||||
|
|
||||||
|
writer
|
||||||
|
.write_all(buffer.as_bytes())
|
||||||
|
.unwrap_or_else(|_| crash!(1, "error writing bytes to output file"));
|
||||||
|
// Empty out the String buffer since `read_line` appends instead of
|
||||||
|
// replaces.
|
||||||
|
buffer.clear();
|
||||||
|
|
||||||
|
bytes_consumed += bytes_read;
|
||||||
}
|
}
|
||||||
control.current_line.clone()
|
|
||||||
|
bytes_consumed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ByteSplitter {
|
struct ByteSplitter {
|
||||||
saved_bytes_to_write: usize,
|
bytes_per_split: usize,
|
||||||
bytes_to_write: usize,
|
|
||||||
break_on_line_end: bool,
|
|
||||||
require_whole_line: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ByteSplitter {
|
impl ByteSplitter {
|
||||||
|
@ -294,36 +309,44 @@ impl ByteSplitter {
|
||||||
// Try to parse the actual numeral.
|
// Try to parse the actual numeral.
|
||||||
let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())]
|
let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())]
|
||||||
.parse::<usize>()
|
.parse::<usize>()
|
||||||
.unwrap_or_else(|_| crash!(1, "invalid number of bytes"));
|
.unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e));
|
||||||
|
|
||||||
ByteSplitter {
|
ByteSplitter {
|
||||||
saved_bytes_to_write: n * multiplier,
|
bytes_per_split: n * multiplier,
|
||||||
bytes_to_write: n * multiplier,
|
|
||||||
break_on_line_end: settings.strategy == "b",
|
|
||||||
require_whole_line: false,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Splitter for ByteSplitter {
|
impl Splitter for ByteSplitter {
|
||||||
fn consume(&mut self, control: &mut SplitControl) -> String {
|
fn consume(
|
||||||
let line = control.current_line.clone();
|
&mut self,
|
||||||
let n = std::cmp::min(line.chars().count(), self.bytes_to_write);
|
reader: &mut BufReader<Box<dyn Read>>,
|
||||||
if self.require_whole_line && n < line.chars().count() {
|
writer: &mut BufWriter<Box<dyn Write>>,
|
||||||
self.bytes_to_write = self.saved_bytes_to_write;
|
) -> usize {
|
||||||
control.request_new_file = true;
|
// We buffer reads and writes. We proceed until `bytes_consumed` is
|
||||||
self.require_whole_line = false;
|
// equal to `self.bytes_per_split` or we reach EOF.
|
||||||
return "".to_owned();
|
let mut bytes_consumed = 0usize;
|
||||||
|
const BUFFER_SIZE: usize = 1024;
|
||||||
|
let mut buffer = [0u8; BUFFER_SIZE];
|
||||||
|
while bytes_consumed < self.bytes_per_split {
|
||||||
|
// Don't overshoot `self.bytes_per_split`!
|
||||||
|
let bytes_desired = std::cmp::min(BUFFER_SIZE, self.bytes_per_split - bytes_consumed);
|
||||||
|
let bytes_read = reader
|
||||||
|
.read(&mut buffer[0..bytes_desired])
|
||||||
|
.unwrap_or_else(|_| crash!(1, "error reading bytes from input file"));
|
||||||
|
// If we ever read 0 bytes then we know we've hit EOF.
|
||||||
|
if bytes_read == 0 {
|
||||||
|
return bytes_consumed;
|
||||||
|
}
|
||||||
|
|
||||||
|
writer
|
||||||
|
.write_all(&buffer[0..bytes_read])
|
||||||
|
.unwrap_or_else(|_| crash!(1, "error writing bytes to output file"));
|
||||||
|
|
||||||
|
bytes_consumed += bytes_read;
|
||||||
}
|
}
|
||||||
self.bytes_to_write -= n;
|
|
||||||
if n == 0 {
|
bytes_consumed
|
||||||
self.bytes_to_write = self.saved_bytes_to_write;
|
|
||||||
control.request_new_file = true;
|
|
||||||
}
|
|
||||||
if self.break_on_line_end && n == line.chars().count() {
|
|
||||||
self.require_whole_line = self.break_on_line_end;
|
|
||||||
}
|
|
||||||
line[..n].to_owned()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -363,14 +386,13 @@ fn split(settings: &Settings) -> i32 {
|
||||||
let mut reader = BufReader::new(if settings.input == "-" {
|
let mut reader = BufReader::new(if settings.input == "-" {
|
||||||
Box::new(stdin()) as Box<dyn Read>
|
Box::new(stdin()) as Box<dyn Read>
|
||||||
} else {
|
} else {
|
||||||
let r = match File::open(Path::new(&settings.input)) {
|
let r = File::open(Path::new(&settings.input)).unwrap_or_else(|_| {
|
||||||
Ok(a) => a,
|
crash!(
|
||||||
Err(_) => crash!(
|
|
||||||
1,
|
1,
|
||||||
"cannot open '{}' for reading: No such file or directory",
|
"cannot open '{}' for reading: No such file or directory",
|
||||||
settings.input
|
settings.input
|
||||||
),
|
)
|
||||||
};
|
});
|
||||||
Box::new(r) as Box<dyn Read>
|
Box::new(r) as Box<dyn Read>
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -380,48 +402,39 @@ fn split(settings: &Settings) -> i32 {
|
||||||
a => crash!(1, "strategy {} not supported", a),
|
a => crash!(1, "strategy {} not supported", a),
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut control = SplitControl {
|
|
||||||
current_line: "".to_owned(), // Request new line
|
|
||||||
request_new_file: true, // Request new file
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut writer = BufWriter::new(Box::new(stdout()) as Box<dyn Write>);
|
|
||||||
let mut fileno = 0;
|
let mut fileno = 0;
|
||||||
loop {
|
loop {
|
||||||
if control.current_line.chars().count() == 0 {
|
// Get a new part file set up, and construct `writer` for it.
|
||||||
match reader.read_line(&mut control.current_line) {
|
let mut filename = settings.prefix.clone();
|
||||||
Ok(0) | Err(_) => break,
|
filename.push_str(
|
||||||
_ => {}
|
if settings.numeric_suffix {
|
||||||
|
num_prefix(fileno, settings.suffix_length)
|
||||||
|
} else {
|
||||||
|
str_prefix(fileno, settings.suffix_length)
|
||||||
}
|
}
|
||||||
}
|
.as_ref(),
|
||||||
if control.request_new_file {
|
);
|
||||||
let mut filename = settings.prefix.clone();
|
filename.push_str(settings.additional_suffix.as_ref());
|
||||||
filename.push_str(
|
let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
||||||
if settings.numeric_suffix {
|
|
||||||
num_prefix(fileno, settings.suffix_length)
|
|
||||||
} else {
|
|
||||||
str_prefix(fileno, settings.suffix_length)
|
|
||||||
}
|
|
||||||
.as_ref(),
|
|
||||||
);
|
|
||||||
filename.push_str(settings.additional_suffix.as_ref());
|
|
||||||
|
|
||||||
crash_if_err!(1, writer.flush());
|
let bytes_consumed = splitter.consume(&mut reader, &mut writer);
|
||||||
fileno += 1;
|
writer
|
||||||
writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
.flush()
|
||||||
control.request_new_file = false;
|
.unwrap_or_else(|e| crash!(1, "error flushing to output file: {}", e));
|
||||||
if settings.verbose {
|
|
||||||
println!("creating file '{}'", filename);
|
// If we didn't write anything we should clean up the empty file, and
|
||||||
|
// break from the loop.
|
||||||
|
if bytes_consumed == 0 {
|
||||||
|
// The output file is only ever created if filter's aren't used.
|
||||||
|
// Complicated, I know...
|
||||||
|
if settings.filter.is_none() {
|
||||||
|
remove_file(filename)
|
||||||
|
.unwrap_or_else(|e| crash!(1, "error removing empty file: {}", e));
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
let consumed = splitter.consume(&mut control);
|
fileno += 1;
|
||||||
crash_if_err!(1, writer.write_all(consumed.as_bytes()));
|
|
||||||
|
|
||||||
let advance = consumed.chars().count();
|
|
||||||
let clone = control.current_line.clone();
|
|
||||||
let sl = clone;
|
|
||||||
control.current_line = sl[advance..sl.chars().count()].to_owned();
|
|
||||||
}
|
}
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,11 +4,15 @@ extern crate regex;
|
||||||
use self::rand::{thread_rng, Rng};
|
use self::rand::{thread_rng, Rng};
|
||||||
use self::regex::Regex;
|
use self::regex::Regex;
|
||||||
use crate::common::util::*;
|
use crate::common::util::*;
|
||||||
|
use rand::SeedableRng;
|
||||||
#[cfg(not(windows))]
|
#[cfg(not(windows))]
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fs::{read_dir, File};
|
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
use std::{
|
||||||
|
fs::{read_dir, File},
|
||||||
|
io::BufWriter,
|
||||||
|
};
|
||||||
|
|
||||||
fn random_chars(n: usize) -> String {
|
fn random_chars(n: usize) -> String {
|
||||||
thread_rng()
|
thread_rng()
|
||||||
|
@ -58,7 +62,7 @@ impl Glob {
|
||||||
files.sort();
|
files.sort();
|
||||||
let mut data: Vec<u8> = vec![];
|
let mut data: Vec<u8> = vec![];
|
||||||
for name in &files {
|
for name in &files {
|
||||||
data.extend(self.directory.read(name).into_bytes());
|
data.extend(self.directory.read_bytes(name));
|
||||||
}
|
}
|
||||||
data
|
data
|
||||||
}
|
}
|
||||||
|
@ -81,20 +85,30 @@ impl RandomFile {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_bytes(&mut self, bytes: usize) {
|
fn add_bytes(&mut self, bytes: usize) {
|
||||||
let chunk_size: usize = if bytes >= 1024 { 1024 } else { bytes };
|
// Note that just writing random characters isn't enough to cover all
|
||||||
let mut n = bytes;
|
// cases. We need truly random bytes.
|
||||||
while n > chunk_size {
|
let mut writer = BufWriter::new(&self.inner);
|
||||||
let _ = write!(self.inner, "{}", random_chars(chunk_size));
|
|
||||||
n -= chunk_size;
|
// Seed the rng so as to avoid spurious test failures.
|
||||||
|
let mut rng = rand::rngs::StdRng::seed_from_u64(123);
|
||||||
|
let mut buffer = [0; 1024];
|
||||||
|
let mut remaining_size = bytes;
|
||||||
|
|
||||||
|
while remaining_size > 0 {
|
||||||
|
let to_write = std::cmp::min(remaining_size, buffer.len());
|
||||||
|
let buf = &mut buffer[..to_write];
|
||||||
|
rng.fill(buf);
|
||||||
|
writer.write(buf).unwrap();
|
||||||
|
|
||||||
|
remaining_size -= to_write;
|
||||||
}
|
}
|
||||||
let _ = write!(self.inner, "{}", random_chars(n));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add n lines each of size `RandomFile::LINESIZE`
|
/// Add n lines each of size `RandomFile::LINESIZE`
|
||||||
fn add_lines(&mut self, lines: usize) {
|
fn add_lines(&mut self, lines: usize) {
|
||||||
let mut n = lines;
|
let mut n = lines;
|
||||||
while n > 0 {
|
while n > 0 {
|
||||||
let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE));
|
writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE)).unwrap();
|
||||||
n -= 1;
|
n -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -104,18 +118,18 @@ impl RandomFile {
|
||||||
fn test_split_default() {
|
fn test_split_default() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_default";
|
let name = "split_default";
|
||||||
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
|
||||||
RandomFile::new(&at, name).add_lines(2000);
|
RandomFile::new(&at, name).add_lines(2000);
|
||||||
ucmd.args(&[name]).succeeds();
|
ucmd.args(&[name]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
||||||
assert_eq!(glob.count(), 2);
|
assert_eq!(glob.count(), 2);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_numeric_prefixed_chunks_by_bytes() {
|
fn test_split_numeric_prefixed_chunks_by_bytes() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_num_prefixed_chunks_by_bytes";
|
let name = "split_num_prefixed_chunks_by_bytes";
|
||||||
let glob = Glob::new(&at, ".", r"a\d\d$");
|
|
||||||
RandomFile::new(&at, name).add_bytes(10000);
|
RandomFile::new(&at, name).add_bytes(10000);
|
||||||
ucmd.args(&[
|
ucmd.args(&[
|
||||||
"-d", // --numeric-suffixes
|
"-d", // --numeric-suffixes
|
||||||
|
@ -123,52 +137,86 @@ fn test_split_numeric_prefixed_chunks_by_bytes() {
|
||||||
"1000", name, "a",
|
"1000", name, "a",
|
||||||
])
|
])
|
||||||
.succeeds();
|
.succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"a\d\d$");
|
||||||
assert_eq!(glob.count(), 10);
|
assert_eq!(glob.count(), 10);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
for filename in glob.collect() {
|
||||||
|
assert_eq!(glob.directory.metadata(&filename).len(), 1000);
|
||||||
|
}
|
||||||
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_str_prefixed_chunks_by_bytes() {
|
fn test_split_str_prefixed_chunks_by_bytes() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_str_prefixed_chunks_by_bytes";
|
let name = "split_str_prefixed_chunks_by_bytes";
|
||||||
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
|
|
||||||
RandomFile::new(&at, name).add_bytes(10000);
|
RandomFile::new(&at, name).add_bytes(10000);
|
||||||
|
// Important that this is less than 1024 since that's our internal buffer
|
||||||
|
// size. Good to test that we don't overshoot.
|
||||||
ucmd.args(&["-b", "1000", name, "b"]).succeeds();
|
ucmd.args(&["-b", "1000", name, "b"]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
|
||||||
assert_eq!(glob.count(), 10);
|
assert_eq!(glob.count(), 10);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
for filename in glob.collect() {
|
||||||
|
assert_eq!(glob.directory.metadata(&filename).len(), 1000);
|
||||||
|
}
|
||||||
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is designed to test what happens when the desired part size is not a
|
||||||
|
// multiple of the buffer size and we hopefully don't overshoot the desired part
|
||||||
|
// size.
|
||||||
|
#[test]
|
||||||
|
fn test_split_bytes_prime_part_size() {
|
||||||
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
|
let name = "test_split_bytes_prime_part_size";
|
||||||
|
RandomFile::new(&at, name).add_bytes(10000);
|
||||||
|
// 1753 is prime and greater than the buffer size, 1024.
|
||||||
|
ucmd.args(&["-b", "1753", name, "b"]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
|
||||||
|
assert_eq!(glob.count(), 6);
|
||||||
|
for i in 0..5 {
|
||||||
|
assert_eq!(glob.directory.metadata(&glob.collect()[i]).len(), 1753);
|
||||||
|
}
|
||||||
|
assert_eq!(glob.directory.metadata(&glob.collect()[5]).len(), 1235);
|
||||||
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_num_prefixed_chunks_by_lines() {
|
fn test_split_num_prefixed_chunks_by_lines() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_num_prefixed_chunks_by_lines";
|
let name = "split_num_prefixed_chunks_by_lines";
|
||||||
let glob = Glob::new(&at, ".", r"c\d\d$");
|
|
||||||
RandomFile::new(&at, name).add_lines(10000);
|
RandomFile::new(&at, name).add_lines(10000);
|
||||||
ucmd.args(&["-d", "-l", "1000", name, "c"]).succeeds();
|
ucmd.args(&["-d", "-l", "1000", name, "c"]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"c\d\d$");
|
||||||
assert_eq!(glob.count(), 10);
|
assert_eq!(glob.count(), 10);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_str_prefixed_chunks_by_lines() {
|
fn test_split_str_prefixed_chunks_by_lines() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_str_prefixed_chunks_by_lines";
|
let name = "split_str_prefixed_chunks_by_lines";
|
||||||
let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$");
|
|
||||||
RandomFile::new(&at, name).add_lines(10000);
|
RandomFile::new(&at, name).add_lines(10000);
|
||||||
ucmd.args(&["-l", "1000", name, "d"]).succeeds();
|
ucmd.args(&["-l", "1000", name, "d"]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$");
|
||||||
assert_eq!(glob.count(), 10);
|
assert_eq!(glob.count(), 10);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_additional_suffix() {
|
fn test_split_additional_suffix() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_additional_suffix";
|
let name = "split_additional_suffix";
|
||||||
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$");
|
|
||||||
RandomFile::new(&at, name).add_lines(2000);
|
RandomFile::new(&at, name).add_lines(2000);
|
||||||
ucmd.args(&["--additional-suffix", ".txt", name]).succeeds();
|
ucmd.args(&["--additional-suffix", ".txt", name]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$");
|
||||||
assert_eq!(glob.count(), 2);
|
assert_eq!(glob.count(), 2);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
// note: the test_filter* tests below are unix-only
|
// note: the test_filter* tests below are unix-only
|
||||||
|
@ -182,15 +230,16 @@ fn test_filter() {
|
||||||
// like `test_split_default()` but run a command before writing
|
// like `test_split_default()` but run a command before writing
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "filtered";
|
let name = "filtered";
|
||||||
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
|
||||||
let n_lines = 3;
|
let n_lines = 3;
|
||||||
RandomFile::new(&at, name).add_lines(n_lines);
|
RandomFile::new(&at, name).add_lines(n_lines);
|
||||||
|
|
||||||
// change all characters to 'i'
|
// change all characters to 'i'
|
||||||
ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
|
ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
|
||||||
.succeeds();
|
.succeeds();
|
||||||
|
|
||||||
// assert all characters are 'i' / no character is not 'i'
|
// assert all characters are 'i' / no character is not 'i'
|
||||||
// (assert that command succeded)
|
// (assert that command succeded)
|
||||||
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
||||||
assert!(
|
assert!(
|
||||||
glob.collate().iter().find(|&&c| {
|
glob.collate().iter().find(|&&c| {
|
||||||
// is not i
|
// is not i
|
||||||
|
@ -209,7 +258,6 @@ fn test_filter_with_env_var_set() {
|
||||||
// implemented like `test_split_default()` but run a command before writing
|
// implemented like `test_split_default()` but run a command before writing
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "filtered";
|
let name = "filtered";
|
||||||
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
|
||||||
let n_lines = 3;
|
let n_lines = 3;
|
||||||
RandomFile::new(&at, name).add_lines(n_lines);
|
RandomFile::new(&at, name).add_lines(n_lines);
|
||||||
|
|
||||||
|
@ -217,7 +265,9 @@ fn test_filter_with_env_var_set() {
|
||||||
env::set_var("FILE", &env_var_value);
|
env::set_var("FILE", &env_var_value);
|
||||||
ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
|
ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
|
||||||
.succeeds();
|
.succeeds();
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
|
||||||
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
||||||
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value);
|
assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue