1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

split: pass GNU test l-chunk

This commit is contained in:
zhitkoff 2023-11-21 11:04:38 -05:00 committed by Daniel Hofstetter
parent 97d30bd486
commit 4dc46f10e9
3 changed files with 264 additions and 176 deletions

View file

@ -1130,14 +1130,68 @@ impl<'a> Write for LineBytesChunkWriter<'a> {
} }
} }
/// Output file parameters
struct OutFile {
filename: String,
maybe_writer: Option<BufWriter<Box<dyn Write>>>,
}
impl OutFile {
/// Get the writer for the output file
/// Instantiate the writer if it has not been instantiated upfront
fn get_writer(&mut self, settings: &Settings) -> UResult<&mut BufWriter<Box<dyn Write>>> {
if self.maybe_writer.is_some() {
Ok(self.maybe_writer.as_mut().unwrap())
} else {
// Writer was not instantiated upfront
// Instantiate it and record for future use
self.maybe_writer = Some(settings.instantiate_current_writer(self.filename.as_str())?);
Ok(self.maybe_writer.as_mut().unwrap())
}
}
}
/// Generate a set of Output Files
/// This is a helper function to [`n_chunks_by_byte`], [`n_chunks_by_line`]
/// and [`n_chunks_by_line_round_robin`].
/// Each OutFile is generated with filename, while the writer for it could be
/// optional, to be instantiated later by the calling function as needed.
/// Optional writers could happen in [`n_chunks_by_line`]
/// if `elide_empty_files` parameter is set to `true`.
fn get_out_files(
num_files: u64,
settings: &Settings,
is_writer_optional: bool,
) -> UResult<Vec<OutFile>> {
// This object is responsible for creating the filename for each chunk
let mut filename_iterator: FilenameIterator<'_> =
FilenameIterator::new(&settings.prefix, &settings.suffix)
.map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
let mut out_files: Vec<OutFile> = Vec::new();
for _ in 0..num_files {
let filename = filename_iterator
.next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
let maybe_writer = if is_writer_optional {
None
} else {
Some(settings.instantiate_current_writer(filename.as_str())?)
};
out_files.push(OutFile {
filename,
maybe_writer,
});
}
Ok(out_files)
}
/// Split a file or STDIN into a specific number of chunks by byte. /// Split a file or STDIN into a specific number of chunks by byte.
/// If in Kth chunk of N mode - print the k-th chunk to STDOUT.
/// ///
/// When file size cannot be evenly divided into the number of chunks of the same size, /// When file size cannot be evenly divided into the number of chunks of the same size,
/// the first X chunks are 1 byte longer than the rest, /// the first X chunks are 1 byte longer than the rest,
/// where X is a modulus reminder of (file size % number of chunks) /// where X is a modulus reminder of (file size % number of chunks)
/// ///
/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk` /// In Kth chunk of N mode - writes to STDOUT the contents of the chunk identified by `kth_chunk`
/// ///
/// In N chunks mode - this function always creates one output file for each chunk, even /// In N chunks mode - this function always creates one output file for each chunk, even
/// if there is an error reading or writing one of the chunks or if /// if there is an error reading or writing one of the chunks or if
@ -1207,7 +1261,7 @@ where
// In Kth chunk of N mode - we will write to stdout instead of to a file. // In Kth chunk of N mode - we will write to stdout instead of to a file.
let mut stdout_writer = std::io::stdout().lock(); let mut stdout_writer = std::io::stdout().lock();
// In N chunks mode - we will write to `num_chunks` files // In N chunks mode - we will write to `num_chunks` files
let mut writers = vec![]; let mut out_files: Vec<OutFile> = Vec::new();
// Calculate chunk size base and modulo reminder // Calculate chunk size base and modulo reminder
// to be used in calculating chunk_size later on // to be used in calculating chunk_size later on
@ -1219,16 +1273,7 @@ where
// This will create each of the underlying files // This will create each of the underlying files
// or stdin pipes to child shell/command processes if in `--filter` mode // or stdin pipes to child shell/command processes if in `--filter` mode
if kth_chunk.is_none() { if kth_chunk.is_none() {
// This object is responsible for creating the filename for each chunk. out_files = get_out_files(num_chunks, settings, false)?;
let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
.map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
for _ in 0..num_chunks {
let filename = filename_iterator
.next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
let writer = settings.instantiate_current_writer(filename.as_str())?;
writers.push(writer);
}
} }
for i in 1_u64..=num_chunks { for i in 1_u64..=num_chunks {
@ -1272,7 +1317,7 @@ where
} }
None => { None => {
let idx = (i - 1) as usize; let idx = (i - 1) as usize;
let writer = writers.get_mut(idx).unwrap(); let writer = out_files[idx].get_writer(settings)?;
writer.write_all(buf)?; writer.write_all(buf)?;
} }
} }
@ -1284,9 +1329,14 @@ where
} }
/// Split a file or STDIN into a specific number of chunks by line. /// Split a file or STDIN into a specific number of chunks by line.
/// If in Kth chunk of N mode - print the k-th chunk to STDOUT.
/// ///
/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk` /// It is most likely that input cannot be evenly divided into the number of chunks
/// of the same size in bytes or number of lines, since we cannot break lines.
/// It is also likely that there could be empty files (having `elide_empty_files` is disabled)
/// when a long line overlaps one or more chunks.
///
/// In Kth chunk of N mode - writes to STDOUT the contents of the chunk identified by `kth_chunk`
/// Note: the `elide_empty_files` flag is ignored in this mode
/// ///
/// In N chunks mode - this function always creates one output file for each chunk, even /// In N chunks mode - this function always creates one output file for each chunk, even
/// if there is an error reading or writing one of the chunks or if /// if there is an error reading or writing one of the chunks or if
@ -1322,76 +1372,97 @@ where
let initial_buf = &mut Vec::new(); let initial_buf = &mut Vec::new();
let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?; let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?;
let reader = initial_buf.chain(reader); let reader = initial_buf.chain(reader);
let chunk_size = (num_bytes / num_chunks) as usize;
// If input file is empty and we would not have determined the Kth chunk // If input file is empty and we would not have determined the Kth chunk
// in the Kth chunk of N chunk mode, then terminate immediately. // in the Kth chunk of N chunk mode, then terminate immediately.
// This happens on `split -n l/3/10 /dev/null`, for example. // This happens on `split -n l/3/10 /dev/null`, for example.
if kth_chunk.is_some() && num_bytes == 0 { // Similarly, if input file is empty and `elide_empty_files` parameter is enabled,
// then we would have written zero chunks of output,
// so terminate immediately as well.
// This happens on `split -e -n l/3 /dev/null`, for example.
if num_bytes == 0 && (kth_chunk.is_some() || settings.elide_empty_files) {
return Ok(()); return Ok(());
} }
// In Kth chunk of N mode - we will write to stdout instead of to a file. // In Kth chunk of N mode - we will write to stdout instead of to a file.
let mut stdout_writer = std::io::stdout().lock(); let mut stdout_writer = std::io::stdout().lock();
// In N chunks mode - we will write to `num_chunks` files // In N chunks mode - we will write to `num_chunks` files
let mut writers = vec![]; let mut out_files: Vec<OutFile> = Vec::new();
// Calculate chunk size base and modulo reminder
// to be used in calculating `num_bytes_should_be_written` later on
let chunk_size_base = num_bytes / num_chunks;
let chunk_size_reminder = num_bytes % num_chunks;
// If in N chunks mode // If in N chunks mode
// Create one writer for each chunk. // Generate filenames for each file and
// This will create each of the underlying files // if `elide_empty_files` parameter is NOT enabled - instantiate the writer
// or stdin pipes to child shell/command processes if in `--filter` mode // which will create each of the underlying files or stdin pipes
// to child shell/command processes if in `--filter` mode.
// Otherwise keep writer optional, to be instantiated later if there is data
// to write for the associated chunk.
if kth_chunk.is_none() { if kth_chunk.is_none() {
// This object is responsible for creating the filename for each chunk. out_files = get_out_files(num_chunks, settings, settings.elide_empty_files)?;
let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
.map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
for _ in 0..num_chunks {
let filename = filename_iterator
.next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
let writer = settings.instantiate_current_writer(filename.as_str())?;
writers.push(writer);
}
} }
let mut num_bytes_remaining_in_current_chunk = chunk_size; let mut chunk_number = 1;
let mut i = 1;
let sep = settings.separator; let sep = settings.separator;
let mut num_bytes_should_be_written = chunk_size_base + (chunk_size_reminder > 0) as u64;
let mut num_bytes_written = 0;
for line_result in reader.split(sep) { for line_result in reader.split(sep) {
// add separator back in at the end of the line
let mut line = line_result?; let mut line = line_result?;
line.push(sep); // add separator back in at the end of the line,
// since `reader.split(sep)` removes it,
// except if the last line did not end with separator character
if (num_bytes_written + line.len() as u64) < num_bytes {
line.push(sep);
}
let bytes = line.as_slice(); let bytes = line.as_slice();
match kth_chunk { match kth_chunk {
Some(chunk_number) => { Some(kth) => {
if i == chunk_number { if chunk_number == kth {
stdout_writer.write_all(bytes)?; stdout_writer.write_all(bytes)?;
} }
} }
None => { None => {
let idx = (i - 1) as usize; // Should write into a file
let maybe_writer = writers.get_mut(idx); let idx = (chunk_number - 1) as usize;
let writer = maybe_writer.unwrap(); let writer = out_files[idx].get_writer(settings)?;
custom_write_all(bytes, writer, settings)?; custom_write_all(bytes, writer, settings)?;
} }
} }
let num_bytes = bytes.len(); // Advance to the next chunk if the current one is filled.
if num_bytes >= num_bytes_remaining_in_current_chunk { // There could be a situation when a long line, which started in current chunk,
num_bytes_remaining_in_current_chunk = chunk_size; // would overlap the next chunk (or even several next chunks),
i += 1; // and since we cannot break lines for this split strategy, we could end up with
} else { // empty files in place(s) of skipped chunk(s)
num_bytes_remaining_in_current_chunk -= num_bytes; let num_line_bytes = bytes.len() as u64;
num_bytes_written += num_line_bytes;
let mut skipped = -1;
while num_bytes_should_be_written <= num_bytes_written {
num_bytes_should_be_written +=
chunk_size_base + (chunk_size_reminder > chunk_number) as u64;
chunk_number += 1;
skipped += 1;
} }
if let Some(chunk_number) = kth_chunk { // If a chunk was skipped and `elide_empty_files` flag is set,
if i > chunk_number { // roll chunk_number back to preserve sequential continuity
// of file names for files written to,
// except for Kth chunk of N mode
if settings.elide_empty_files && skipped > 0 && kth_chunk.is_none() {
chunk_number -= skipped as u64;
}
if let Some(kth) = kth_chunk {
if chunk_number > kth {
break; break;
} }
} }
} }
Ok(()) Ok(())
} }
@ -1432,23 +1503,14 @@ where
// In Kth chunk of N mode - we will write to stdout instead of to a file. // In Kth chunk of N mode - we will write to stdout instead of to a file.
let mut stdout_writer = std::io::stdout().lock(); let mut stdout_writer = std::io::stdout().lock();
// In N chunks mode - we will write to `num_chunks` files // In N chunks mode - we will write to `num_chunks` files
let mut writers = vec![]; let mut out_files: Vec<OutFile> = Vec::new();
// If in N chunks mode // If in N chunks mode
// Create one writer for each chunk. // Create one writer for each chunk.
// This will create each of the underlying files // This will create each of the underlying files
// or stdin pipes to child shell/command processes if in `--filter` mode // or stdin pipes to child shell/command processes if in `--filter` mode
if kth_chunk.is_none() { if kth_chunk.is_none() {
// This object is responsible for creating the filename for each chunk. out_files = get_out_files(num_chunks, settings, false)?;
let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)
.map_err(|e| io::Error::new(ErrorKind::Other, format!("{e}")))?;
for _ in 0..num_chunks {
let filename = filename_iterator
.next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
let writer = settings.instantiate_current_writer(filename.as_str())?;
writers.push(writer);
}
} }
let num_chunks: usize = num_chunks.try_into().unwrap(); let num_chunks: usize = num_chunks.try_into().unwrap();
@ -1470,9 +1532,7 @@ where
} }
} }
None => { None => {
let maybe_writer = writers.get_mut(i % num_chunks); let writer = out_files[i % num_chunks].get_writer(settings)?;
let writer = maybe_writer.unwrap();
let writer_stdin_open = custom_write_all(bytes, writer, settings)?; let writer_stdin_open = custom_write_all(bytes, writer, settings)?;
if !writer_stdin_open { if !writer_stdin_open {
closed_writers += 1; closed_writers += 1;

View file

@ -8,7 +8,10 @@
use crate::{OPT_BYTES, OPT_LINES, OPT_LINE_BYTES, OPT_NUMBER}; use crate::{OPT_BYTES, OPT_LINES, OPT_LINE_BYTES, OPT_NUMBER};
use clap::{parser::ValueSource, ArgMatches}; use clap::{parser::ValueSource, ArgMatches};
use std::fmt; use std::fmt;
use uucore::parse_size::{parse_size_u64, parse_size_u64_max, ParseSizeError}; use uucore::{
display::Quotable,
parse_size::{parse_size_u64, parse_size_u64_max, ParseSizeError},
};
/// Sub-strategy of the [`Strategy::Number`] /// Sub-strategy of the [`Strategy::Number`]
/// Splitting a file into a specific number of chunks. /// Splitting a file into a specific number of chunks.
@ -208,10 +211,10 @@ impl fmt::Display for StrategyError {
Self::Lines(e) => write!(f, "invalid number of lines: {e}"), Self::Lines(e) => write!(f, "invalid number of lines: {e}"),
Self::Bytes(e) => write!(f, "invalid number of bytes: {e}"), Self::Bytes(e) => write!(f, "invalid number of bytes: {e}"),
Self::NumberType(NumberTypeError::NumberOfChunks(s)) => { Self::NumberType(NumberTypeError::NumberOfChunks(s)) => {
write!(f, "invalid number of chunks: {s}") write!(f, "invalid number of chunks: {}", s.quote())
} }
Self::NumberType(NumberTypeError::ChunkNumber(s)) => { Self::NumberType(NumberTypeError::ChunkNumber(s)) => {
write!(f, "invalid chunk number: {s}") write!(f, "invalid chunk number: {}", s.quote())
} }
Self::MultipleWays => write!(f, "cannot split in more than one way"), Self::MultipleWays => write!(f, "cannot split in more than one way"),
} }

View file

@ -606,13 +606,13 @@ fn test_split_obs_lines_as_other_option_value() {
.args(&["-n", "-200", "file"]) .args(&["-n", "-200", "file"])
.fails() .fails()
.code_is(1) .code_is(1)
.stderr_contains("split: invalid number of chunks: -200\n"); .stderr_contains("split: invalid number of chunks: '-200'\n");
scene scene
.ucmd() .ucmd()
.args(&["--number", "-e200", "file"]) .args(&["--number", "-e200", "file"])
.fails() .fails()
.code_is(1) .code_is(1)
.stderr_contains("split: invalid number of chunks: -e200\n"); .stderr_contains("split: invalid number of chunks: '-e200'\n");
} }
/// Test for using more than one obsolete lines option (standalone) /// Test for using more than one obsolete lines option (standalone)
@ -708,7 +708,7 @@ fn test_split_overflow_bytes_size() {
fn test_split_stdin_num_chunks() { fn test_split_stdin_num_chunks() {
let (at, mut ucmd) = at_and_ucmd!(); let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["--number=1"]).pipe_in("").succeeds(); ucmd.args(&["--number=1"]).pipe_in("").succeeds();
assert_eq!(file_read(&at, "xaa"), ""); assert_eq!(at.read("xaa"), "");
assert!(!at.plus("xab").exists()); assert!(!at.plus("xab").exists());
} }
@ -727,8 +727,8 @@ fn test_split_stdin_num_line_chunks() {
ucmd.args(&["--number=l/2"]) ucmd.args(&["--number=l/2"])
.pipe_in("1\n2\n3\n4\n5\n") .pipe_in("1\n2\n3\n4\n5\n")
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1\n2\n3\n"); assert_eq!(at.read("xaa"), "1\n2\n3\n");
assert_eq!(file_read(&at, "xab"), "4\n5\n"); assert_eq!(at.read("xab"), "4\n5\n");
assert!(!at.plus("xac").exists()); assert!(!at.plus("xac").exists());
} }
@ -741,12 +741,6 @@ fn test_split_stdin_num_kth_line_chunk() {
.stdout_only("2\n"); .stdout_only("2\n");
} }
fn file_read(at: &AtPath, filename: &str) -> String {
let mut s = String::new();
at.open(filename).read_to_string(&mut s).unwrap();
s
}
/// Test for the default suffix length behavior: dynamically increasing size. /// Test for the default suffix length behavior: dynamically increasing size.
#[test] #[test]
fn test_alphabetic_dynamic_suffix_length() { fn test_alphabetic_dynamic_suffix_length() {
@ -766,11 +760,11 @@ fn test_alphabetic_dynamic_suffix_length() {
for i in b'a'..=b'y' { for i in b'a'..=b'y' {
for j in b'a'..=b'z' { for j in b'a'..=b'z' {
let filename = format!("x{}{}", i as char, j as char); let filename = format!("x{}{}", i as char, j as char);
let contents = file_read(&at, &filename); let contents = at.read(&filename);
assert_eq!(contents, "a"); assert_eq!(contents, "a");
} }
} }
assert_eq!(file_read(&at, "xzaaa"), "a"); assert_eq!(at.read("xzaaa"), "a");
} }
/// Test for the default suffix length behavior: dynamically increasing size. /// Test for the default suffix length behavior: dynamically increasing size.
@ -790,10 +784,10 @@ fn test_numeric_dynamic_suffix_length() {
.succeeds(); .succeeds();
for i in 0..90 { for i in 0..90 {
let filename = format!("x{i:02}"); let filename = format!("x{i:02}");
let contents = file_read(&at, &filename); let contents = at.read(&filename);
assert_eq!(contents, "a"); assert_eq!(contents, "a");
} }
assert_eq!(file_read(&at, "x9000"), "a"); assert_eq!(at.read("x9000"), "a");
} }
#[test] #[test]
@ -812,10 +806,10 @@ fn test_hex_dynamic_suffix_length() {
.succeeds(); .succeeds();
for i in 0..240 { for i in 0..240 {
let filename = format!("x{i:02x}"); let filename = format!("x{i:02x}");
let contents = file_read(&at, &filename); let contents = at.read(&filename);
assert_eq!(contents, "a"); assert_eq!(contents, "a");
} }
assert_eq!(file_read(&at, "xf000"), "a"); assert_eq!(at.read("xf000"), "a");
} }
/// Test for dynamic suffix length (auto-widening) disabled when suffix start number is specified /// Test for dynamic suffix length (auto-widening) disabled when suffix start number is specified
@ -833,7 +827,7 @@ fn test_dynamic_suffix_length_on_with_suffix_start_no_value() {
let (at, mut ucmd) = at_and_ucmd!(); let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-b", "1", "--numeric-suffixes", "ninetyonebytes.txt"]) ucmd.args(&["-b", "1", "--numeric-suffixes", "ninetyonebytes.txt"])
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "x9000"), "a"); assert_eq!(at.read("x9000"), "a");
} }
/// Test for suffix auto-width with --number strategy and suffix start number /// Test for suffix auto-width with --number strategy and suffix start number
@ -845,8 +839,8 @@ fn test_suffix_auto_width_with_number() {
let glob = Glob::new(&at, ".", r"x\d\d\d$"); let glob = Glob::new(&at, ".", r"x\d\d\d$");
assert_eq!(glob.count(), 100); assert_eq!(glob.count(), 100);
assert_eq!(glob.collate(), at.read_bytes("fivelines.txt")); assert_eq!(glob.collate(), at.read_bytes("fivelines.txt"));
assert_eq!(file_read(&at, "x001"), "1\n"); assert_eq!(at.read("x001"), "1\n");
assert_eq!(file_read(&at, "x100"), ""); assert_eq!(at.read("x100"), "");
new_ucmd!() new_ucmd!()
.args(&["--numeric-suffixes=100", "--number=r/100", "fivelines.txt"]) .args(&["--numeric-suffixes=100", "--number=r/100", "fivelines.txt"])
@ -926,17 +920,12 @@ creating file 'xaf'
#[test] #[test]
fn test_number_n() { fn test_number_n() {
let (at, mut ucmd) = at_and_ucmd!(); let (at, mut ucmd) = at_and_ucmd!();
let file_read = |f| {
let mut s = String::new();
at.open(f).read_to_string(&mut s).unwrap();
s
};
ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds(); ucmd.args(&["-n", "5", "asciilowercase.txt"]).succeeds();
assert_eq!(file_read("xaa"), "abcdef"); assert_eq!(at.read("xaa"), "abcdef");
assert_eq!(file_read("xab"), "ghijkl"); assert_eq!(at.read("xab"), "ghijkl");
assert_eq!(file_read("xac"), "mnopq"); assert_eq!(at.read("xac"), "mnopq");
assert_eq!(file_read("xad"), "rstuv"); assert_eq!(at.read("xad"), "rstuv");
assert_eq!(file_read("xae"), "wxyz\n"); assert_eq!(at.read("xae"), "wxyz\n");
#[cfg(unix)] #[cfg(unix)]
new_ucmd!() new_ucmd!()
.args(&["--number=100", "/dev/null"]) .args(&["--number=100", "/dev/null"])
@ -974,11 +963,11 @@ fn test_number_kth_of_n() {
new_ucmd!() new_ucmd!()
.args(&["--number=0/5", "asciilowercase.txt"]) .args(&["--number=0/5", "asciilowercase.txt"])
.fails() .fails()
.stderr_contains("split: invalid chunk number: 0"); .stderr_contains("split: invalid chunk number: '0'");
new_ucmd!() new_ucmd!()
.args(&["--number=10/5", "asciilowercase.txt"]) .args(&["--number=10/5", "asciilowercase.txt"])
.fails() .fails()
.stderr_contains("split: invalid chunk number: 10"); .stderr_contains("split: invalid chunk number: '10'");
#[cfg(target_pointer_width = "64")] #[cfg(target_pointer_width = "64")]
new_ucmd!() new_ucmd!()
.args(&[ .args(&[
@ -986,7 +975,7 @@ fn test_number_kth_of_n() {
"asciilowercase.txt", "asciilowercase.txt",
]) ])
.fails() .fails()
.stderr_contains("split: invalid number of chunks: 18446744073709551616"); .stderr_contains("split: invalid number of chunks: '18446744073709551616'");
} }
#[test] #[test]
@ -1020,32 +1009,27 @@ fn test_number_kth_of_n_round_robin() {
"fivelines.txt", "fivelines.txt",
]) ])
.fails() .fails()
.stderr_contains("split: invalid number of chunks: 18446744073709551616"); .stderr_contains("split: invalid number of chunks: '18446744073709551616'");
new_ucmd!() new_ucmd!()
.args(&["--number", "r/0/3", "fivelines.txt"]) .args(&["--number", "r/0/3", "fivelines.txt"])
.fails() .fails()
.stderr_contains("split: invalid chunk number: 0"); .stderr_contains("split: invalid chunk number: '0'");
new_ucmd!() new_ucmd!()
.args(&["--number", "r/10/3", "fivelines.txt"]) .args(&["--number", "r/10/3", "fivelines.txt"])
.fails() .fails()
.stderr_contains("split: invalid chunk number: 10"); .stderr_contains("split: invalid chunk number: '10'");
} }
#[test] #[test]
fn test_split_number_with_io_blksize() { fn test_split_number_with_io_blksize() {
let (at, mut ucmd) = at_and_ucmd!(); let (at, mut ucmd) = at_and_ucmd!();
let file_read = |f| {
let mut s = String::new();
at.open(f).read_to_string(&mut s).unwrap();
s
};
ucmd.args(&["-n", "5", "asciilowercase.txt", "---io-blksize", "1024"]) ucmd.args(&["-n", "5", "asciilowercase.txt", "---io-blksize", "1024"])
.succeeds(); .succeeds();
assert_eq!(file_read("xaa"), "abcdef"); assert_eq!(at.read("xaa"), "abcdef");
assert_eq!(file_read("xab"), "ghijkl"); assert_eq!(at.read("xab"), "ghijkl");
assert_eq!(file_read("xac"), "mnopq"); assert_eq!(at.read("xac"), "mnopq");
assert_eq!(file_read("xad"), "rstuv"); assert_eq!(at.read("xad"), "rstuv");
assert_eq!(file_read("xae"), "wxyz\n"); assert_eq!(at.read("xae"), "wxyz\n");
} }
#[test] #[test]
@ -1153,7 +1137,7 @@ fn test_allow_empty_files() {
} }
#[test] #[test]
fn test_elide_empty_files() { fn test_elide_empty_files_n_chunks() {
let (at, mut ucmd) = at_and_ucmd!(); let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-e", "-n", "4", "threebytes.txt"]) ucmd.args(&["-e", "-n", "4", "threebytes.txt"])
.succeeds() .succeeds()
@ -1167,7 +1151,7 @@ fn test_elide_empty_files() {
#[test] #[test]
#[cfg(unix)] #[cfg(unix)]
fn test_elide_dev_null() { fn test_elide_dev_null_n_chunks() {
let (at, mut ucmd) = at_and_ucmd!(); let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-e", "-n", "3", "/dev/null"]) ucmd.args(&["-e", "-n", "3", "/dev/null"])
.succeeds() .succeeds()
@ -1191,24 +1175,58 @@ fn test_dev_zero() {
} }
#[test] #[test]
fn test_lines() { fn test_elide_empty_files_l_chunks() {
let (at, mut ucmd) = at_and_ucmd!(); let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-e", "-n", "l/7", "fivelines.txt"])
let file_read = |f| { .succeeds()
let mut s = String::new(); .no_stdout()
at.open(f).read_to_string(&mut s).unwrap(); .no_stderr();
s assert_eq!(at.read("xaa"), "1\n");
}; assert_eq!(at.read("xab"), "2\n");
assert_eq!(at.read("xac"), "3\n");
// Split into two files without splitting up lines. assert_eq!(at.read("xad"), "4\n");
ucmd.args(&["-n", "l/2", "fivelines.txt"]).succeeds(); assert_eq!(at.read("xae"), "5\n");
assert!(!at.plus("xaf").exists());
assert_eq!(file_read("xaa"), "1\n2\n3\n"); assert!(!at.plus("xag").exists());
assert_eq!(file_read("xab"), "4\n5\n");
} }
#[test] #[test]
fn test_lines_kth() { #[cfg(unix)]
fn test_elide_dev_null_l_chunks() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-e", "-n", "l/3", "/dev/null"])
.succeeds()
.no_stdout()
.no_stderr();
assert!(!at.plus("xaa").exists());
assert!(!at.plus("xab").exists());
assert!(!at.plus("xac").exists());
}
#[test]
#[cfg(unix)]
fn test_number_by_bytes_dev_zero() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-n", "3", "/dev/zero"])
.fails()
.stderr_only("split: /dev/zero: cannot determine file size\n");
assert!(!at.plus("xaa").exists());
assert!(!at.plus("xab").exists());
assert!(!at.plus("xac").exists());
}
#[test]
fn test_number_by_lines() {
let (at, mut ucmd) = at_and_ucmd!();
// Split into two files without splitting up lines.
ucmd.args(&["-n", "l/2", "fivelines.txt"]).succeeds();
assert_eq!(at.read("xaa"), "1\n2\n3\n");
assert_eq!(at.read("xab"), "4\n5\n");
}
#[test]
fn test_number_by_lines_kth() {
new_ucmd!() new_ucmd!()
.args(&["-n", "l/3/10", "onehundredlines.txt"]) .args(&["-n", "l/3/10", "onehundredlines.txt"])
.succeeds() .succeeds()
@ -1217,13 +1235,27 @@ fn test_lines_kth() {
#[test] #[test]
#[cfg(unix)] #[cfg(unix)]
fn test_lines_kth_dev_null() { fn test_number_by_lines_kth_dev_null() {
new_ucmd!() new_ucmd!()
.args(&["-n", "l/3/10", "/dev/null"]) .args(&["-n", "l/3/10", "/dev/null"])
.succeeds() .succeeds()
.stdout_only(""); .stdout_only("");
} }
#[test]
fn test_number_by_lines_kth_no_end_sep() {
new_ucmd!()
.args(&["-n", "l/3/10"])
.pipe_in("1\n2222\n3\n4")
.succeeds()
.stdout_only("2222\n");
new_ucmd!()
.args(&["-e", "-n", "l/8/10"])
.pipe_in("1\n2222\n3\n4")
.succeeds()
.stdout_only("3\n");
}
#[test] #[test]
fn test_line_bytes() { fn test_line_bytes() {
let (at, mut ucmd) = at_and_ucmd!(); let (at, mut ucmd) = at_and_ucmd!();
@ -1588,17 +1620,10 @@ fn test_effective_suffix_hex_last() {
#[test] #[test]
fn test_round_robin() { fn test_round_robin() {
let (at, mut ucmd) = at_and_ucmd!(); let (at, mut ucmd) = at_and_ucmd!();
let file_read = |f| {
let mut s = String::new();
at.open(f).read_to_string(&mut s).unwrap();
s
};
ucmd.args(&["-n", "r/2", "fivelines.txt"]).succeeds(); ucmd.args(&["-n", "r/2", "fivelines.txt"]).succeeds();
assert_eq!(file_read("xaa"), "1\n3\n5\n"); assert_eq!(at.read("xaa"), "1\n3\n5\n");
assert_eq!(file_read("xab"), "2\n4\n"); assert_eq!(at.read("xab"), "2\n4\n");
} }
#[test] #[test]
@ -1631,7 +1656,7 @@ fn test_split_invalid_input() {
.args(&["-n", "0", "file"]) .args(&["-n", "0", "file"])
.fails() .fails()
.no_stdout() .no_stdout()
.stderr_contains("split: invalid number of chunks: 0"); .stderr_contains("split: invalid number of chunks: '0'");
} }
/// Test if there are invalid (non UTF-8) in the arguments - unix /// Test if there are invalid (non UTF-8) in the arguments - unix
@ -1690,9 +1715,9 @@ fn test_split_separator_nl_lines() {
.pipe_in("1\n2\n3\n4\n5\n") .pipe_in("1\n2\n3\n4\n5\n")
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1\n2\n"); assert_eq!(at.read("xaa"), "1\n2\n");
assert_eq!(file_read(&at, "xab"), "3\n4\n"); assert_eq!(at.read("xab"), "3\n4\n");
assert_eq!(file_read(&at, "xac"), "5\n"); assert_eq!(at.read("xac"), "5\n");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }
@ -1703,9 +1728,9 @@ fn test_split_separator_nl_line_bytes() {
.pipe_in("1\n2\n3\n4\n5\n") .pipe_in("1\n2\n3\n4\n5\n")
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1\n2\n"); assert_eq!(at.read("xaa"), "1\n2\n");
assert_eq!(file_read(&at, "xab"), "3\n4\n"); assert_eq!(at.read("xab"), "3\n4\n");
assert_eq!(file_read(&at, "xac"), "5\n"); assert_eq!(at.read("xac"), "5\n");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }
@ -1715,9 +1740,9 @@ fn test_split_separator_nl_number_l() {
ucmd.args(&["--number=l/3", "--separator=\n", "fivelines.txt"]) ucmd.args(&["--number=l/3", "--separator=\n", "fivelines.txt"])
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1\n2\n"); assert_eq!(at.read("xaa"), "1\n2\n");
assert_eq!(file_read(&at, "xab"), "3\n4\n"); assert_eq!(at.read("xab"), "3\n4\n");
assert_eq!(file_read(&at, "xac"), "5\n"); assert_eq!(at.read("xac"), "5\n");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }
@ -1727,9 +1752,9 @@ fn test_split_separator_nl_number_r() {
ucmd.args(&["--number=r/3", "--separator", "\n", "fivelines.txt"]) ucmd.args(&["--number=r/3", "--separator", "\n", "fivelines.txt"])
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1\n4\n"); assert_eq!(at.read("xaa"), "1\n4\n");
assert_eq!(file_read(&at, "xab"), "2\n5\n"); assert_eq!(at.read("xab"), "2\n5\n");
assert_eq!(file_read(&at, "xac"), "3\n"); assert_eq!(at.read("xac"), "3\n");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }
@ -1739,9 +1764,9 @@ fn test_split_separator_nul_lines() {
ucmd.args(&["--lines=2", "-t", "\\0", "separator_nul.txt"]) ucmd.args(&["--lines=2", "-t", "\\0", "separator_nul.txt"])
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1\x002\0"); assert_eq!(at.read("xaa"), "1\x002\0");
assert_eq!(file_read(&at, "xab"), "3\x004\0"); assert_eq!(at.read("xab"), "3\x004\0");
assert_eq!(file_read(&at, "xac"), "5\0"); assert_eq!(at.read("xac"), "5\0");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }
@ -1751,9 +1776,9 @@ fn test_split_separator_nul_line_bytes() {
ucmd.args(&["--line-bytes=4", "-t", "\\0", "separator_nul.txt"]) ucmd.args(&["--line-bytes=4", "-t", "\\0", "separator_nul.txt"])
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1\x002\0"); assert_eq!(at.read("xaa"), "1\x002\0");
assert_eq!(file_read(&at, "xab"), "3\x004\0"); assert_eq!(at.read("xab"), "3\x004\0");
assert_eq!(file_read(&at, "xac"), "5\0"); assert_eq!(at.read("xac"), "5\0");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }
@ -1763,9 +1788,9 @@ fn test_split_separator_nul_number_l() {
ucmd.args(&["--number=l/3", "--separator=\\0", "separator_nul.txt"]) ucmd.args(&["--number=l/3", "--separator=\\0", "separator_nul.txt"])
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1\x002\0"); assert_eq!(at.read("xaa"), "1\x002\0");
assert_eq!(file_read(&at, "xab"), "3\x004\0"); assert_eq!(at.read("xab"), "3\x004\0");
assert_eq!(file_read(&at, "xac"), "5\0"); assert_eq!(at.read("xac"), "5\0");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }
@ -1775,9 +1800,9 @@ fn test_split_separator_nul_number_r() {
ucmd.args(&["--number=r/3", "--separator=\\0", "separator_nul.txt"]) ucmd.args(&["--number=r/3", "--separator=\\0", "separator_nul.txt"])
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1\x004\0"); assert_eq!(at.read("xaa"), "1\x004\0");
assert_eq!(file_read(&at, "xab"), "2\x005\0"); assert_eq!(at.read("xab"), "2\x005\0");
assert_eq!(file_read(&at, "xac"), "3\0"); assert_eq!(at.read("xac"), "3\0");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }
@ -1787,9 +1812,9 @@ fn test_split_separator_semicolon_lines() {
ucmd.args(&["--lines=2", "-t", ";", "separator_semicolon.txt"]) ucmd.args(&["--lines=2", "-t", ";", "separator_semicolon.txt"])
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1;2;"); assert_eq!(at.read("xaa"), "1;2;");
assert_eq!(file_read(&at, "xab"), "3;4;"); assert_eq!(at.read("xab"), "3;4;");
assert_eq!(file_read(&at, "xac"), "5;"); assert_eq!(at.read("xac"), "5;");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }
@ -1799,9 +1824,9 @@ fn test_split_separator_semicolon_line_bytes() {
ucmd.args(&["--line-bytes=4", "-t", ";", "separator_semicolon.txt"]) ucmd.args(&["--line-bytes=4", "-t", ";", "separator_semicolon.txt"])
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1;2;"); assert_eq!(at.read("xaa"), "1;2;");
assert_eq!(file_read(&at, "xab"), "3;4;"); assert_eq!(at.read("xab"), "3;4;");
assert_eq!(file_read(&at, "xac"), "5;"); assert_eq!(at.read("xac"), "5;");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }
@ -1811,9 +1836,9 @@ fn test_split_separator_semicolon_number_l() {
ucmd.args(&["--number=l/3", "--separator=;", "separator_semicolon.txt"]) ucmd.args(&["--number=l/3", "--separator=;", "separator_semicolon.txt"])
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1;2;"); assert_eq!(at.read("xaa"), "1;2;");
assert_eq!(file_read(&at, "xab"), "3;4;"); assert_eq!(at.read("xab"), "3;4;");
assert_eq!(file_read(&at, "xac"), "5;"); assert_eq!(at.read("xac"), "5;");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }
@ -1823,9 +1848,9 @@ fn test_split_separator_semicolon_number_r() {
ucmd.args(&["--number=r/3", "--separator=;", "separator_semicolon.txt"]) ucmd.args(&["--number=r/3", "--separator=;", "separator_semicolon.txt"])
.succeeds(); .succeeds();
assert_eq!(file_read(&at, "xaa"), "1;4;"); assert_eq!(at.read("xaa"), "1;4;");
assert_eq!(file_read(&at, "xab"), "2;5;"); assert_eq!(at.read("xab"), "2;5;");
assert_eq!(file_read(&at, "xac"), "3;"); assert_eq!(at.read("xac"), "3;");
assert!(!at.plus("xad").exists()); assert!(!at.plus("xad").exists());
} }