mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
split: pass GNU test l-chunk
This commit is contained in:
parent
97d30bd486
commit
4dc46f10e9
3 changed files with 264 additions and 176 deletions
|
@ -1130,14 +1130,68 @@ impl<'a> Write for LineBytesChunkWriter<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Output file parameters
|
||||
struct OutFile {
|
||||
filename: String,
|
||||
maybe_writer: Option<BufWriter<Box<dyn Write>>>,
|
||||
}
|
||||
|
||||
impl OutFile {
|
||||
/// Get the writer for the output file
|
||||
/// Instantiate the writer if it has not been instantiated upfront
|
||||
fn get_writer(&mut self, settings: &Settings) -> UResult<&mut BufWriter<Box<dyn Write>>> {
|
||||
if self.maybe_writer.is_some() {
|
||||
Ok(self.maybe_writer.as_mut().unwrap())
|
||||
} else {
|
||||
// Writer was not instantiated upfront
|
||||
// Instantiate it and record for future use
|
||||
self.maybe_writer = Some(settings.instantiate_current_writer(self.filename.as_str())?);
|
||||
Ok(self.maybe_writer.as_mut().unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a set of Output Files
|
||||
/// This is a helper function to [`n_chunks_by_byte`], [`n_chunks_by_line`]
|
||||
/// and [`n_chunks_by_line_round_robin`].
|
||||
/// Each OutFile is generated with filename, while the writer for it could be
|
||||
/// optional, to be instantiated later by the calling function as needed.
|
||||
/// Optional writers could happen in [`n_chunks_by_line`]
|
||||
/// if `elide_empty_files` parameter is set to `true`.
|
||||
fn get_out_files(
|
||||
num_files: u64,
|
||||
settings: &Settings,
|
||||
is_writer_optional: bool,
|
||||
) -> UResult<Vec<OutFile>> {
|
||||
// This object is responsible for creating the filename for each chunk
|
||||
let mut filename_iterator: FilenameIterator<'_> =
|
||||
FilenameIterator::new(&settings.prefix, &settings.suffix)
|
||||
.map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
|
||||
let mut out_files: Vec<OutFile> = Vec::new();
|
||||
for _ in 0..num_files {
|
||||
let filename = filename_iterator
|
||||
.next()
|
||||
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||
let maybe_writer = if is_writer_optional {
|
||||
None
|
||||
} else {
|
||||
Some(settings.instantiate_current_writer(filename.as_str())?)
|
||||
};
|
||||
out_files.push(OutFile {
|
||||
filename,
|
||||
maybe_writer,
|
||||
});
|
||||
}
|
||||
Ok(out_files)
|
||||
}
|
||||
|
||||
/// Split a file or STDIN into a specific number of chunks by byte.
|
||||
/// If in Kth chunk of N mode - print the k-th chunk to STDOUT.
|
||||
///
|
||||
/// When file size cannot be evenly divided into the number of chunks of the same size,
|
||||
/// the first X chunks are 1 byte longer than the rest,
|
||||
/// where X is a modulus reminder of (file size % number of chunks)
|
||||
///
|
||||
/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
|
||||
/// In Kth chunk of N mode - writes to STDOUT the contents of the chunk identified by `kth_chunk`
|
||||
///
|
||||
/// In N chunks mode - this function always creates one output file for each chunk, even
|
||||
/// if there is an error reading or writing one of the chunks or if
|
||||
|
@ -1207,7 +1261,7 @@ where
|
|||
// In Kth chunk of N mode - we will write to stdout instead of to a file.
|
||||
let mut stdout_writer = std::io::stdout().lock();
|
||||
// In N chunks mode - we will write to `num_chunks` files
|
||||
let mut writers = vec![];
|
||||
let mut out_files: Vec<OutFile> = Vec::new();
|
||||
|
||||
// Calculate chunk size base and modulo reminder
|
||||
// to be used in calculating chunk_size later on
|
||||
|
@ -1219,16 +1273,7 @@ where
|
|||
// This will create each of the underlying files
|
||||
// or stdin pipes to child shell/command processes if in `--filter` mode
|
||||
if kth_chunk.is_none() {
|
||||
// This object is responsible for creating the filename for each chunk.
|
||||
let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
|
||||
.map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
|
||||
for _ in 0..num_chunks {
|
||||
let filename = filename_iterator
|
||||
.next()
|
||||
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||
let writer = settings.instantiate_current_writer(filename.as_str())?;
|
||||
writers.push(writer);
|
||||
}
|
||||
out_files = get_out_files(num_chunks, settings, false)?;
|
||||
}
|
||||
|
||||
for i in 1_u64..=num_chunks {
|
||||
|
@ -1272,7 +1317,7 @@ where
|
|||
}
|
||||
None => {
|
||||
let idx = (i - 1) as usize;
|
||||
let writer = writers.get_mut(idx).unwrap();
|
||||
let writer = out_files[idx].get_writer(settings)?;
|
||||
writer.write_all(buf)?;
|
||||
}
|
||||
}
|
||||
|
@ -1284,9 +1329,14 @@ where
|
|||
}
|
||||
|
||||
/// Split a file or STDIN into a specific number of chunks by line.
|
||||
/// If in Kth chunk of N mode - print the k-th chunk to STDOUT.
|
||||
///
|
||||
/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
|
||||
/// It is most likely that input cannot be evenly divided into the number of chunks
|
||||
/// of the same size in bytes or number of lines, since we cannot break lines.
|
||||
/// It is also likely that there could be empty files (having `elide_empty_files` is disabled)
|
||||
/// when a long line overlaps one or more chunks.
|
||||
///
|
||||
/// In Kth chunk of N mode - writes to STDOUT the contents of the chunk identified by `kth_chunk`
|
||||
/// Note: the `elide_empty_files` flag is ignored in this mode
|
||||
///
|
||||
/// In N chunks mode - this function always creates one output file for each chunk, even
|
||||
/// if there is an error reading or writing one of the chunks or if
|
||||
|
@ -1322,76 +1372,97 @@ where
|
|||
let initial_buf = &mut Vec::new();
|
||||
let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?;
|
||||
let reader = initial_buf.chain(reader);
|
||||
let chunk_size = (num_bytes / num_chunks) as usize;
|
||||
|
||||
// If input file is empty and we would not have determined the Kth chunk
|
||||
// in the Kth chunk of N chunk mode, then terminate immediately.
|
||||
// This happens on `split -n l/3/10 /dev/null`, for example.
|
||||
if kth_chunk.is_some() && num_bytes == 0 {
|
||||
// Similarly, if input file is empty and `elide_empty_files` parameter is enabled,
|
||||
// then we would have written zero chunks of output,
|
||||
// so terminate immediately as well.
|
||||
// This happens on `split -e -n l/3 /dev/null`, for example.
|
||||
if num_bytes == 0 && (kth_chunk.is_some() || settings.elide_empty_files) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// In Kth chunk of N mode - we will write to stdout instead of to a file.
|
||||
let mut stdout_writer = std::io::stdout().lock();
|
||||
// In N chunks mode - we will write to `num_chunks` files
|
||||
let mut writers = vec![];
|
||||
let mut out_files: Vec<OutFile> = Vec::new();
|
||||
|
||||
// Calculate chunk size base and modulo reminder
|
||||
// to be used in calculating `num_bytes_should_be_written` later on
|
||||
let chunk_size_base = num_bytes / num_chunks;
|
||||
let chunk_size_reminder = num_bytes % num_chunks;
|
||||
|
||||
// If in N chunks mode
|
||||
// Create one writer for each chunk.
|
||||
// This will create each of the underlying files
|
||||
// or stdin pipes to child shell/command processes if in `--filter` mode
|
||||
// Generate filenames for each file and
|
||||
// if `elide_empty_files` parameter is NOT enabled - instantiate the writer
|
||||
// which will create each of the underlying files or stdin pipes
|
||||
// to child shell/command processes if in `--filter` mode.
|
||||
// Otherwise keep writer optional, to be instantiated later if there is data
|
||||
// to write for the associated chunk.
|
||||
if kth_chunk.is_none() {
|
||||
// This object is responsible for creating the filename for each chunk.
|
||||
let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
|
||||
.map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
|
||||
for _ in 0..num_chunks {
|
||||
let filename = filename_iterator
|
||||
.next()
|
||||
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||
let writer = settings.instantiate_current_writer(filename.as_str())?;
|
||||
writers.push(writer);
|
||||
}
|
||||
out_files = get_out_files(num_chunks, settings, settings.elide_empty_files)?;
|
||||
}
|
||||
|
||||
let mut num_bytes_remaining_in_current_chunk = chunk_size;
|
||||
let mut i = 1;
|
||||
let mut chunk_number = 1;
|
||||
let sep = settings.separator;
|
||||
let mut num_bytes_should_be_written = chunk_size_base + (chunk_size_reminder > 0) as u64;
|
||||
let mut num_bytes_written = 0;
|
||||
|
||||
for line_result in reader.split(sep) {
|
||||
// add separator back in at the end of the line
|
||||
let mut line = line_result?;
|
||||
line.push(sep);
|
||||
// add separator back in at the end of the line,
|
||||
// since `reader.split(sep)` removes it,
|
||||
// except if the last line did not end with separator character
|
||||
if (num_bytes_written + line.len() as u64) < num_bytes {
|
||||
line.push(sep);
|
||||
}
|
||||
let bytes = line.as_slice();
|
||||
|
||||
match kth_chunk {
|
||||
Some(chunk_number) => {
|
||||
if i == chunk_number {
|
||||
Some(kth) => {
|
||||
if chunk_number == kth {
|
||||
stdout_writer.write_all(bytes)?;
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let idx = (i - 1) as usize;
|
||||
let maybe_writer = writers.get_mut(idx);
|
||||
let writer = maybe_writer.unwrap();
|
||||
// Should write into a file
|
||||
let idx = (chunk_number - 1) as usize;
|
||||
let writer = out_files[idx].get_writer(settings)?;
|
||||
custom_write_all(bytes, writer, settings)?;
|
||||
}
|
||||
}
|
||||
|
||||
let num_bytes = bytes.len();
|
||||
if num_bytes >= num_bytes_remaining_in_current_chunk {
|
||||
num_bytes_remaining_in_current_chunk = chunk_size;
|
||||
i += 1;
|
||||
} else {
|
||||
num_bytes_remaining_in_current_chunk -= num_bytes;
|
||||
// Advance to the next chunk if the current one is filled.
|
||||
// There could be a situation when a long line, which started in current chunk,
|
||||
// would overlap the next chunk (or even several next chunks),
|
||||
// and since we cannot break lines for this split strategy, we could end up with
|
||||
// empty files in place(s) of skipped chunk(s)
|
||||
let num_line_bytes = bytes.len() as u64;
|
||||
num_bytes_written += num_line_bytes;
|
||||
let mut skipped = -1;
|
||||
while num_bytes_should_be_written <= num_bytes_written {
|
||||
num_bytes_should_be_written +=
|
||||
chunk_size_base + (chunk_size_reminder > chunk_number) as u64;
|
||||
chunk_number += 1;
|
||||
skipped += 1;
|
||||
}
|
||||
|
||||
if let Some(chunk_number) = kth_chunk {
|
||||
if i > chunk_number {
|
||||
// If a chunk was skipped and `elide_empty_files` flag is set,
|
||||
// roll chunk_number back to preserve sequential continuity
|
||||
// of file names for files written to,
|
||||
// except for Kth chunk of N mode
|
||||
if settings.elide_empty_files && skipped > 0 && kth_chunk.is_none() {
|
||||
chunk_number -= skipped as u64;
|
||||
}
|
||||
|
||||
if let Some(kth) = kth_chunk {
|
||||
if chunk_number > kth {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -1432,23 +1503,14 @@ where
|
|||
// In Kth chunk of N mode - we will write to stdout instead of to a file.
|
||||
let mut stdout_writer = std::io::stdout().lock();
|
||||
// In N chunks mode - we will write to `num_chunks` files
|
||||
let mut writers = vec![];
|
||||
let mut out_files: Vec<OutFile> = Vec::new();
|
||||
|
||||
// If in N chunks mode
|
||||
// Create one writer for each chunk.
|
||||
// This will create each of the underlying files
|
||||
// or stdin pipes to child shell/command processes if in `--filter` mode
|
||||
if kth_chunk.is_none() {
|
||||
// This object is responsible for creating the filename for each chunk.
|
||||
let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
|
||||
.map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
|
||||
for _ in 0..num_chunks {
|
||||
let filename = filename_iterator
|
||||
.next()
|
||||
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||
let writer = settings.instantiate_current_writer(filename.as_str())?;
|
||||
writers.push(writer);
|
||||
}
|
||||
out_files = get_out_files(num_chunks, settings, false)?;
|
||||
}
|
||||
|
||||
let num_chunks: usize = num_chunks.try_into().unwrap();
|
||||
|
@ -1470,9 +1532,7 @@ where
|
|||
}
|
||||
}
|
||||
None => {
|
||||
let maybe_writer = writers.get_mut(i % num_chunks);
|
||||
let writer = maybe_writer.unwrap();
|
||||
|
||||
let writer = out_files[i % num_chunks].get_writer(settings)?;
|
||||
let writer_stdin_open = custom_write_all(bytes, writer, settings)?;
|
||||
if !writer_stdin_open {
|
||||
closed_writers += 1;
|
||||
|
|
|
@ -8,7 +8,10 @@
|
|||
use crate::{OPT_BYTES, OPT_LINES, OPT_LINE_BYTES, OPT_NUMBER};
|
||||
use clap::{parser::ValueSource, ArgMatches};
|
||||
use std::fmt;
|
||||
use uucore::parse_size::{parse_size_u64, parse_size_u64_max, ParseSizeError};
|
||||
use uucore::{
|
||||
display::Quotable,
|
||||
parse_size::{parse_size_u64, parse_size_u64_max, ParseSizeError},
|
||||
};
|
||||
|
||||
/// Sub-strategy of the [`Strategy::Number`]
|
||||
/// Splitting a file into a specific number of chunks.
|
||||
|
@ -208,10 +211,10 @@ impl fmt::Display for StrategyError {
|
|||
Self::Lines(e) => write!(f, "invalid number of lines: {e}"),
|
||||
Self::Bytes(e) => write!(f, "invalid number of bytes: {e}"),
|
||||
Self::NumberType(NumberTypeError::NumberOfChunks(s)) => {
|
||||
write!(f, "invalid number of chunks: {s}")
|
||||
write!(f, "invalid number of chunks: {}", s.quote())
|
||||
}
|
||||
Self::NumberType(NumberTypeError::ChunkNumber(s)) => {
|
||||
write!(f, "invalid chunk number: {s}")
|
||||
write!(f, "invalid chunk number: {}", s.quote())
|
||||
}
|
||||
Self::MultipleWays => write!(f, "cannot split in more than one way"),
|
||||
}
|
||||
|
|
|
@ -606,13 +606,13 @@ fn test_split_obs_lines_as_other_option_value() {
|
|||
.args(&["-n", "-200", "file"])
|
||||
.fails()
|
||||
.code_is(1)
|
||||
.stderr_contains("split: invalid number of chunks: -200\n");
|
||||
.stderr_contains("split: invalid number of chunks: '-200'\n");
|
||||
scene
|
||||
.ucmd()
|
||||
.args(&["--number", "-e200", "file"])
|
||||
.fails()
|
||||
.code_is(1)
|
||||
.stderr_contains("split: invalid number of chunks: -e200\n");
|
||||
.stderr_contains("split: invalid number of chunks: '-e200'\n");
|
||||
}
|
||||
|
||||
/// Test for using more than one obsolete lines option (standalone)
|
||||
|
@ -708,7 +708,7 @@ fn test_split_overflow_bytes_size() {
|
|||
fn test_split_stdin_num_chunks() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
ucmd.args(&["--number=1"]).pipe_in("").succeeds();
|
||||
assert_eq!(file_read(&at, "xaa"), "");
|
||||
assert_eq!(at.read("xaa"), "");
|
||||
assert!(!at.plus("xab").exists());
|
||||
}
|
||||
|
||||
|
@ -727,8 +727,8 @@ fn test_split_stdin_num_line_chunks() {
|
|||
ucmd.args(&["--number=l/2"])
|
||||
.pipe_in("1\n2\n3\n4\n5\n")
|
||||
.succeeds();
|
||||
assert_eq!(file_read(&at, "xaa"), "1\n2\n3\n");
|
||||
assert_eq!(file_read(&at, "xab"), "4\n5\n");
|
||||
assert_eq!(at.read("xaa"), "1\n2\n3\n");
|
||||
assert_eq!(at.read("xab"), "4\n5\n");
|
||||
assert!(!at.plus("xac").exists());
|
||||
}
|
||||
|
||||
|
@ -741,12 +741,6 @@ fn test_split_stdin_num_kth_line_chunk() {
|
|||
.stdout_only("2\n");
|
||||
}
|
||||
|
||||
fn file_read(at: &AtPath, filename: &str) -> String {
|
||||
let mut s = String::new();
|
||||
at.open(filename).read_to_string(&mut s).unwrap();
|
||||
s
|
||||
}
|
||||
|
||||
/// Test for the default suffix length behavior: dynamically increasing size.
|
||||
#[test]
|
||||
fn test_alphabetic_dynamic_suffix_length() {
|
||||
|
@ -766,11 +760,11 @@ fn test_alphabetic_dynamic_suffix_length() {
|
|||
for i in b'a'..=b'y' {
|
||||
for j in b'a'..=b'z' {
|
||||
let filename = format!("x{}{}", i as char, j as char);
|
||||
let contents = file_read(&at, &filename);
|
||||
let contents = at.read(&filename);
|
||||
assert_eq!(contents, "a");
|
||||
}
|
||||
}
|
||||
assert_eq!(file_read(&at, "xzaaa"), "a");
|
||||
assert_eq!(at.read("xzaaa"), "a");
|
||||
}
|
||||
|
||||
/// Test for the default suffix length behavior: dynamically increasing size.
|
||||
|
@ -790,10 +784,10 @@ fn test_numeric_dynamic_suffix_length() {
|
|||
.succeeds();
|
||||
for i in 0..90 {
|
||||
let filename = format!("x{i:02}");
|
||||
let contents = file_read(&at, &filename);
|
||||
let contents = at.read(&filename);
|
||||
assert_eq!(contents, "a");
|
||||
}
|
||||
assert_eq!(file_read(&at, "x9000"), "a");
|
||||
assert_eq!(at.read("x9000"), "a");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -812,10 +806,10 @@ fn test_hex_dynamic_suffix_length() {
|
|||
.succeeds();
|
||||
for i in 0..240 {
|
||||
let filename = format!("x{i:02x}");
|
||||
let contents = file_read(&at, &filename);
|
||||
let contents = at.read(&filename);
|
||||
assert_eq!(contents, "a");
|
||||
}
|
||||
assert_eq!(file_read(&at, "xf000"), "a");
|
||||
assert_eq!(at.read("xf000"), "a");
|
||||
}
|
||||
|
||||
/// Test for dynamic suffix length (auto-widening) disabled when suffix start number is specified
|
||||
|
@ -833,7 +827,7 @@ fn test_dynamic_suffix_length_on_with_suffix_start_no_value() {
|
|||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
ucmd.args(&["-b", "1", "--numeric-suffixes", "ninetyonebytes.txt"])
|
||||
.succeeds();
|
||||
assert_eq!(file_read(&at, "x9000"), "a");
|
||||
assert_eq!(at.read("x9000"), "a");
|
||||
}
|
||||
|
||||
/// Test for suffix auto-width with --number strategy and suffix start number
|
||||
|
@ -845,8 +839,8 @@ fn test_suffix_auto_width_with_number() {
|
|||
let glob = Glob::new(&at, ".", r"x\d\d\d$");
|
||||
assert_eq!(glob.count(), 100);
|
||||
assert_eq!(glob.collate(), at.read_bytes("fivelines.txt"));
|
||||
assert_eq!(file_read(&at, "x001"), "1\n");
|
||||
assert_eq!(file_read(&at, "x100"), "");
|
||||
assert_eq!(at.read("x001"), "1\n");
|
||||
assert_eq!(at.read("x100"), "");
|
||||
|
||||
new_ucmd!()
|
||||
.args(&["--numeric-suffixes=100", "--number=r/100", "fivelines.txt"])
|
||||
|
@ -926,17 +920,12 @@ creating file 'xaf'
|
|||
#[test]
|
||||
fn test_number_n() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
let file_read = |f| {
|
||||
let mut s = String::new();
|
||||
at.open(f).read_to_string(&mut s).unwrap();
|
||||
s
|
||||
};
|
||||
ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds();
|
||||
assert_eq!(file_read("xaa"), "abcdef");
|
||||
assert_eq!(file_read("xab"), "ghijkl");
|
||||
assert_eq!(file_read("xac"), "mnopq");
|
||||
assert_eq!(file_read("xad"), "rstuv");
|
||||
assert_eq!(file_read("xae"), "wxyz\n");
|
||||
assert_eq!(at.read("xaa"), "abcdef");
|
||||
assert_eq!(at.read("xab"), "ghijkl");
|
||||
assert_eq!(at.read("xac"), "mnopq");
|
||||
assert_eq!(at.read("xad"), "rstuv");
|
||||
assert_eq!(at.read("xae"), "wxyz\n");
|
||||
#[cfg(unix)]
|
||||
new_ucmd!()
|
||||
.args(&["--number=100", "/dev/null"])
|
||||
|
@ -974,11 +963,11 @@ fn test_number_kth_of_n() {
|
|||
new_ucmd!()
|
||||
.args(&["--number=0/5", "asciilowercase.txt"])
|
||||
.fails()
|
||||
.stderr_contains("split: invalid chunk number: 0");
|
||||
.stderr_contains("split: invalid chunk number: '0'");
|
||||
new_ucmd!()
|
||||
.args(&["--number=10/5", "asciilowercase.txt"])
|
||||
.fails()
|
||||
.stderr_contains("split: invalid chunk number: 10");
|
||||
.stderr_contains("split: invalid chunk number: '10'");
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
new_ucmd!()
|
||||
.args(&[
|
||||
|
@ -986,7 +975,7 @@ fn test_number_kth_of_n() {
|
|||
"asciilowercase.txt",
|
||||
])
|
||||
.fails()
|
||||
.stderr_contains("split: invalid number of chunks: 18446744073709551616");
|
||||
.stderr_contains("split: invalid number of chunks: '18446744073709551616'");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1020,32 +1009,27 @@ fn test_number_kth_of_n_round_robin() {
|
|||
"fivelines.txt",
|
||||
])
|
||||
.fails()
|
||||
.stderr_contains("split: invalid number of chunks: 18446744073709551616");
|
||||
.stderr_contains("split: invalid number of chunks: '18446744073709551616'");
|
||||
new_ucmd!()
|
||||
.args(&["--number", "r/0/3", "fivelines.txt"])
|
||||
.fails()
|
||||
.stderr_contains("split: invalid chunk number: 0");
|
||||
.stderr_contains("split: invalid chunk number: '0'");
|
||||
new_ucmd!()
|
||||
.args(&["--number", "r/10/3", "fivelines.txt"])
|
||||
.fails()
|
||||
.stderr_contains("split: invalid chunk number: 10");
|
||||
.stderr_contains("split: invalid chunk number: '10'");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_number_with_io_blksize() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
let file_read = |f| {
|
||||
let mut s = String::new();
|
||||
at.open(f).read_to_string(&mut s).unwrap();
|
||||
s
|
||||
};
|
||||
ucmd.args(&["-n", "5", "asciilowercase.txt", "---io-blksize", "1024"])
|
||||
.succeeds();
|
||||
assert_eq!(file_read("xaa"), "abcdef");
|
||||
assert_eq!(file_read("xab"), "ghijkl");
|
||||
assert_eq!(file_read("xac"), "mnopq");
|
||||
assert_eq!(file_read("xad"), "rstuv");
|
||||
assert_eq!(file_read("xae"), "wxyz\n");
|
||||
assert_eq!(at.read("xaa"), "abcdef");
|
||||
assert_eq!(at.read("xab"), "ghijkl");
|
||||
assert_eq!(at.read("xac"), "mnopq");
|
||||
assert_eq!(at.read("xad"), "rstuv");
|
||||
assert_eq!(at.read("xae"), "wxyz\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1153,7 +1137,7 @@ fn test_allow_empty_files() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_elide_empty_files() {
|
||||
fn test_elide_empty_files_n_chunks() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
ucmd.args(&["-e", "-n", "4", "threebytes.txt"])
|
||||
.succeeds()
|
||||
|
@ -1167,7 +1151,7 @@ fn test_elide_empty_files() {
|
|||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_elide_dev_null() {
|
||||
fn test_elide_dev_null_n_chunks() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
ucmd.args(&["-e", "-n", "3", "/dev/null"])
|
||||
.succeeds()
|
||||
|
@ -1191,24 +1175,58 @@ fn test_dev_zero() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_lines() {
|
||||
fn test_elide_empty_files_l_chunks() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
|
||||
let file_read = |f| {
|
||||
let mut s = String::new();
|
||||
at.open(f).read_to_string(&mut s).unwrap();
|
||||
s
|
||||
};
|
||||
|
||||
// Split into two files without splitting up lines.
|
||||
ucmd.args(&["-n", "l/2", "fivelines.txt"]).succeeds();
|
||||
|
||||
assert_eq!(file_read("xaa"), "1\n2\n3\n");
|
||||
assert_eq!(file_read("xab"), "4\n5\n");
|
||||
ucmd.args(&["-e", "-n", "l/7", "fivelines.txt"])
|
||||
.succeeds()
|
||||
.no_stdout()
|
||||
.no_stderr();
|
||||
assert_eq!(at.read("xaa"), "1\n");
|
||||
assert_eq!(at.read("xab"), "2\n");
|
||||
assert_eq!(at.read("xac"), "3\n");
|
||||
assert_eq!(at.read("xad"), "4\n");
|
||||
assert_eq!(at.read("xae"), "5\n");
|
||||
assert!(!at.plus("xaf").exists());
|
||||
assert!(!at.plus("xag").exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lines_kth() {
|
||||
#[cfg(unix)]
|
||||
fn test_elide_dev_null_l_chunks() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
ucmd.args(&["-e", "-n", "l/3", "/dev/null"])
|
||||
.succeeds()
|
||||
.no_stdout()
|
||||
.no_stderr();
|
||||
assert!(!at.plus("xaa").exists());
|
||||
assert!(!at.plus("xab").exists());
|
||||
assert!(!at.plus("xac").exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_number_by_bytes_dev_zero() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
ucmd.args(&["-n", "3", "/dev/zero"])
|
||||
.fails()
|
||||
.stderr_only("split: /dev/zero: cannot determine file size\n");
|
||||
assert!(!at.plus("xaa").exists());
|
||||
assert!(!at.plus("xab").exists());
|
||||
assert!(!at.plus("xac").exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_number_by_lines() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
// Split into two files without splitting up lines.
|
||||
ucmd.args(&["-n", "l/2", "fivelines.txt"]).succeeds();
|
||||
|
||||
assert_eq!(at.read("xaa"), "1\n2\n3\n");
|
||||
assert_eq!(at.read("xab"), "4\n5\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_number_by_lines_kth() {
|
||||
new_ucmd!()
|
||||
.args(&["-n", "l/3/10", "onehundredlines.txt"])
|
||||
.succeeds()
|
||||
|
@ -1217,13 +1235,27 @@ fn test_lines_kth() {
|
|||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn test_lines_kth_dev_null() {
|
||||
fn test_number_by_lines_kth_dev_null() {
|
||||
new_ucmd!()
|
||||
.args(&["-n", "l/3/10", "/dev/null"])
|
||||
.succeeds()
|
||||
.stdout_only("");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_number_by_lines_kth_no_end_sep() {
|
||||
new_ucmd!()
|
||||
.args(&["-n", "l/3/10"])
|
||||
.pipe_in("1\n2222\n3\n4")
|
||||
.succeeds()
|
||||
.stdout_only("2222\n");
|
||||
new_ucmd!()
|
||||
.args(&["-e", "-n", "l/8/10"])
|
||||
.pipe_in("1\n2222\n3\n4")
|
||||
.succeeds()
|
||||
.stdout_only("3\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_bytes() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
|
@ -1588,17 +1620,10 @@ fn test_effective_suffix_hex_last() {
|
|||
#[test]
|
||||
fn test_round_robin() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
|
||||
let file_read = |f| {
|
||||
let mut s = String::new();
|
||||
at.open(f).read_to_string(&mut s).unwrap();
|
||||
s
|
||||
};
|
||||
|
||||
ucmd.args(&["-n", "r/2", "fivelines.txt"]).succeeds();
|
||||
|
||||
assert_eq!(file_read("xaa"), "1\n3\n5\n");
|
||||
assert_eq!(file_read("xab"), "2\n4\n");
|
||||
assert_eq!(at.read("xaa"), "1\n3\n5\n");
|
||||
assert_eq!(at.read("xab"), "2\n4\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1631,7 +1656,7 @@ fn test_split_invalid_input() {
|
|||
.args(&["-n", "0", "file"])
|
||||
.fails()
|
||||
.no_stdout()
|
||||
.stderr_contains("split: invalid number of chunks: 0");
|
||||
.stderr_contains("split: invalid number of chunks: '0'");
|
||||
}
|
||||
|
||||
/// Test if there are invalid (non UTF-8) in the arguments - unix
|
||||
|
@ -1690,9 +1715,9 @@ fn test_split_separator_nl_lines() {
|
|||
.pipe_in("1\n2\n3\n4\n5\n")
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1\n2\n");
|
||||
assert_eq!(file_read(&at, "xab"), "3\n4\n");
|
||||
assert_eq!(file_read(&at, "xac"), "5\n");
|
||||
assert_eq!(at.read("xaa"), "1\n2\n");
|
||||
assert_eq!(at.read("xab"), "3\n4\n");
|
||||
assert_eq!(at.read("xac"), "5\n");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
@ -1703,9 +1728,9 @@ fn test_split_separator_nl_line_bytes() {
|
|||
.pipe_in("1\n2\n3\n4\n5\n")
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1\n2\n");
|
||||
assert_eq!(file_read(&at, "xab"), "3\n4\n");
|
||||
assert_eq!(file_read(&at, "xac"), "5\n");
|
||||
assert_eq!(at.read("xaa"), "1\n2\n");
|
||||
assert_eq!(at.read("xab"), "3\n4\n");
|
||||
assert_eq!(at.read("xac"), "5\n");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
@ -1715,9 +1740,9 @@ fn test_split_separator_nl_number_l() {
|
|||
ucmd.args(&["--number=l/3", "--separator=\n", "fivelines.txt"])
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1\n2\n");
|
||||
assert_eq!(file_read(&at, "xab"), "3\n4\n");
|
||||
assert_eq!(file_read(&at, "xac"), "5\n");
|
||||
assert_eq!(at.read("xaa"), "1\n2\n");
|
||||
assert_eq!(at.read("xab"), "3\n4\n");
|
||||
assert_eq!(at.read("xac"), "5\n");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
@ -1727,9 +1752,9 @@ fn test_split_separator_nl_number_r() {
|
|||
ucmd.args(&["--number=r/3", "--separator", "\n", "fivelines.txt"])
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1\n4\n");
|
||||
assert_eq!(file_read(&at, "xab"), "2\n5\n");
|
||||
assert_eq!(file_read(&at, "xac"), "3\n");
|
||||
assert_eq!(at.read("xaa"), "1\n4\n");
|
||||
assert_eq!(at.read("xab"), "2\n5\n");
|
||||
assert_eq!(at.read("xac"), "3\n");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
@ -1739,9 +1764,9 @@ fn test_split_separator_nul_lines() {
|
|||
ucmd.args(&["--lines=2", "-t", "\\0", "separator_nul.txt"])
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1\x002\0");
|
||||
assert_eq!(file_read(&at, "xab"), "3\x004\0");
|
||||
assert_eq!(file_read(&at, "xac"), "5\0");
|
||||
assert_eq!(at.read("xaa"), "1\x002\0");
|
||||
assert_eq!(at.read("xab"), "3\x004\0");
|
||||
assert_eq!(at.read("xac"), "5\0");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
@ -1751,9 +1776,9 @@ fn test_split_separator_nul_line_bytes() {
|
|||
ucmd.args(&["--line-bytes=4", "-t", "\\0", "separator_nul.txt"])
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1\x002\0");
|
||||
assert_eq!(file_read(&at, "xab"), "3\x004\0");
|
||||
assert_eq!(file_read(&at, "xac"), "5\0");
|
||||
assert_eq!(at.read("xaa"), "1\x002\0");
|
||||
assert_eq!(at.read("xab"), "3\x004\0");
|
||||
assert_eq!(at.read("xac"), "5\0");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
@ -1763,9 +1788,9 @@ fn test_split_separator_nul_number_l() {
|
|||
ucmd.args(&["--number=l/3", "--separator=\\0", "separator_nul.txt"])
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1\x002\0");
|
||||
assert_eq!(file_read(&at, "xab"), "3\x004\0");
|
||||
assert_eq!(file_read(&at, "xac"), "5\0");
|
||||
assert_eq!(at.read("xaa"), "1\x002\0");
|
||||
assert_eq!(at.read("xab"), "3\x004\0");
|
||||
assert_eq!(at.read("xac"), "5\0");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
@ -1775,9 +1800,9 @@ fn test_split_separator_nul_number_r() {
|
|||
ucmd.args(&["--number=r/3", "--separator=\\0", "separator_nul.txt"])
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1\x004\0");
|
||||
assert_eq!(file_read(&at, "xab"), "2\x005\0");
|
||||
assert_eq!(file_read(&at, "xac"), "3\0");
|
||||
assert_eq!(at.read("xaa"), "1\x004\0");
|
||||
assert_eq!(at.read("xab"), "2\x005\0");
|
||||
assert_eq!(at.read("xac"), "3\0");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
@ -1787,9 +1812,9 @@ fn test_split_separator_semicolon_lines() {
|
|||
ucmd.args(&["--lines=2", "-t", ";", "separator_semicolon.txt"])
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1;2;");
|
||||
assert_eq!(file_read(&at, "xab"), "3;4;");
|
||||
assert_eq!(file_read(&at, "xac"), "5;");
|
||||
assert_eq!(at.read("xaa"), "1;2;");
|
||||
assert_eq!(at.read("xab"), "3;4;");
|
||||
assert_eq!(at.read("xac"), "5;");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
@ -1799,9 +1824,9 @@ fn test_split_separator_semicolon_line_bytes() {
|
|||
ucmd.args(&["--line-bytes=4", "-t", ";", "separator_semicolon.txt"])
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1;2;");
|
||||
assert_eq!(file_read(&at, "xab"), "3;4;");
|
||||
assert_eq!(file_read(&at, "xac"), "5;");
|
||||
assert_eq!(at.read("xaa"), "1;2;");
|
||||
assert_eq!(at.read("xab"), "3;4;");
|
||||
assert_eq!(at.read("xac"), "5;");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
@ -1811,9 +1836,9 @@ fn test_split_separator_semicolon_number_l() {
|
|||
ucmd.args(&["--number=l/3", "--separator=;", "separator_semicolon.txt"])
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1;2;");
|
||||
assert_eq!(file_read(&at, "xab"), "3;4;");
|
||||
assert_eq!(file_read(&at, "xac"), "5;");
|
||||
assert_eq!(at.read("xaa"), "1;2;");
|
||||
assert_eq!(at.read("xab"), "3;4;");
|
||||
assert_eq!(at.read("xac"), "5;");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
@ -1823,9 +1848,9 @@ fn test_split_separator_semicolon_number_r() {
|
|||
ucmd.args(&["--number=r/3", "--separator=;", "separator_semicolon.txt"])
|
||||
.succeeds();
|
||||
|
||||
assert_eq!(file_read(&at, "xaa"), "1;4;");
|
||||
assert_eq!(file_read(&at, "xab"), "2;5;");
|
||||
assert_eq!(file_read(&at, "xac"), "3;");
|
||||
assert_eq!(at.read("xaa"), "1;4;");
|
||||
assert_eq!(at.read("xab"), "2;5;");
|
||||
assert_eq!(at.read("xac"), "3;");
|
||||
assert!(!at.plus("xad").exists());
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue