mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
split: r-chunk tests and infinite input
This commit is contained in:
parent
440e7b1a59
commit
7b2a3e236e
2 changed files with 44 additions and 48 deletions
|
@ -622,7 +622,7 @@ fn custom_write_all<T: Write>(
|
||||||
|
|
||||||
/// Get the size of the input file in bytes
|
/// Get the size of the input file in bytes
|
||||||
/// Used only for subset of `--number=CHUNKS` strategy, as there is a need
|
/// Used only for subset of `--number=CHUNKS` strategy, as there is a need
|
||||||
/// to determine input file size upfront in order to know chunk size
|
/// to determine input file size upfront in order to estimate the chunk size
|
||||||
/// to be written into each of N files/chunks:
|
/// to be written into each of N files/chunks:
|
||||||
/// * N split into N files based on size of input
|
/// * N split into N files based on size of input
|
||||||
/// * K/N output Kth of N to stdout
|
/// * K/N output Kth of N to stdout
|
||||||
|
@ -1141,23 +1141,6 @@ struct OutFile {
|
||||||
is_new: bool,
|
is_new: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
// impl OutFile {
|
|
||||||
// /// Get the writer for the output file.
|
|
||||||
// /// Instantiate the writer if it has not been instantiated upfront
|
|
||||||
// /// or temporarily closed to free up system resources
|
|
||||||
// fn get_writer(&mut self, settings: &Settings) -> UResult<&mut BufWriter<Box<dyn Write>>> {
|
|
||||||
// if self.maybe_writer.is_some() {
|
|
||||||
// Ok(self.maybe_writer.as_mut().unwrap())
|
|
||||||
// } else {
|
|
||||||
// // Writer was not instantiated upfront or was temporarily closed due to system resources constraints.
|
|
||||||
// // Instantiate it and record for future use.
|
|
||||||
// self.maybe_writer =
|
|
||||||
// Some(settings.instantiate_current_writer(self.filename.as_str(), self.is_new)?);
|
|
||||||
// Ok(self.maybe_writer.as_mut().unwrap())
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
/// A set of output files
|
/// A set of output files
|
||||||
/// Used in [`n_chunks_by_byte`], [`n_chunks_by_line`]
|
/// Used in [`n_chunks_by_byte`], [`n_chunks_by_line`]
|
||||||
/// and [`n_chunks_by_line_round_robin`] functions.
|
/// and [`n_chunks_by_line_round_robin`] functions.
|
||||||
|
@ -1551,7 +1534,11 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Split a file or STDIN into a specific number of chunks by line, but
|
/// Split a file or STDIN into a specific number of chunks by line, but
|
||||||
/// assign lines via round-robin
|
/// assign lines via round-robin.
|
||||||
|
/// Note: There is no need to know the size of the input upfront for this method,
|
||||||
|
/// since the lines are assigned to chunks randomly and the size of each chunk
|
||||||
|
/// does not need to be estimated. As a result, "infinite" inputs are supported
|
||||||
|
/// for this method, i.e. `yes | split -n r/10` or `yes | split -n r/3/11`
|
||||||
///
|
///
|
||||||
/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
|
/// In Kth chunk of N mode - writes to stdout the contents of the chunk identified by `kth_chunk`
|
||||||
///
|
///
|
||||||
|
@ -1584,12 +1571,6 @@ fn n_chunks_by_line_round_robin<R>(
|
||||||
where
|
where
|
||||||
R: BufRead,
|
R: BufRead,
|
||||||
{
|
{
|
||||||
// Get the size of the input in bytes and compute the number
|
|
||||||
// of bytes per chunk.
|
|
||||||
let initial_buf = &mut Vec::new();
|
|
||||||
let num_bytes = get_input_size(&settings.input, reader, initial_buf, &settings.io_blksize)?;
|
|
||||||
let reader = initial_buf.chain(reader);
|
|
||||||
|
|
||||||
// In Kth chunk of N mode - we will write to stdout instead of to a file.
|
// In Kth chunk of N mode - we will write to stdout instead of to a file.
|
||||||
let mut stdout_writer = std::io::stdout().lock();
|
let mut stdout_writer = std::io::stdout().lock();
|
||||||
// In N chunks mode - we will write to `num_chunks` files
|
// In N chunks mode - we will write to `num_chunks` files
|
||||||
|
@ -1606,23 +1587,20 @@ where
|
||||||
let num_chunks: usize = num_chunks.try_into().unwrap();
|
let num_chunks: usize = num_chunks.try_into().unwrap();
|
||||||
let sep = settings.separator;
|
let sep = settings.separator;
|
||||||
let mut closed_writers = 0;
|
let mut closed_writers = 0;
|
||||||
let mut num_bytes_written = 0;
|
|
||||||
|
|
||||||
for (i, line_result) in reader.split(sep).enumerate() {
|
let mut i = 0;
|
||||||
let mut line = line_result?;
|
loop {
|
||||||
// add separator back in at the end of the line,
|
let line = &mut Vec::new();
|
||||||
// since `reader.split(sep)` removes it,
|
let num_bytes_read = reader.by_ref().read_until(sep, line)?;
|
||||||
// except if the last line did not end with separator character
|
|
||||||
if (num_bytes_written + line.len() as u64) < num_bytes {
|
// if there is nothing else to read - exit the loop
|
||||||
line.push(sep);
|
if num_bytes_read == 0 {
|
||||||
}
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
let bytes = line.as_slice();
|
let bytes = line.as_slice();
|
||||||
|
|
||||||
match kth_chunk {
|
match kth_chunk {
|
||||||
Some(chunk_number) => {
|
Some(chunk_number) => {
|
||||||
// The `.enumerate()` method returns index `i` starting with 0,
|
|
||||||
// but chunk number is given as a 1-indexed number,
|
|
||||||
// so compare to `chunk_number - 1`
|
|
||||||
if (i % num_chunks) == (chunk_number - 1) as usize {
|
if (i % num_chunks) == (chunk_number - 1) as usize {
|
||||||
stdout_writer.write_all(bytes)?;
|
stdout_writer.write_all(bytes)?;
|
||||||
}
|
}
|
||||||
|
@ -1632,17 +1610,15 @@ where
|
||||||
let writer_stdin_open = custom_write_all(bytes, writer, settings)?;
|
let writer_stdin_open = custom_write_all(bytes, writer, settings)?;
|
||||||
if !writer_stdin_open {
|
if !writer_stdin_open {
|
||||||
closed_writers += 1;
|
closed_writers += 1;
|
||||||
if closed_writers == num_chunks {
|
|
||||||
// all writers are closed - stop reading
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let num_line_bytes = bytes.len() as u64;
|
i += 1;
|
||||||
num_bytes_written += num_line_bytes;
|
if closed_writers == num_chunks {
|
||||||
|
// all writers are closed - stop reading
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,11 +2,13 @@
|
||||||
//
|
//
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase ghijkl mnopq rstuv wxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc
|
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes threebytes asciilowercase ghijkl mnopq rstuv wxyz fivelines twohundredfortyonebytes onehundredlines nbbbb dxen ncccc rlimit NOFILE
|
||||||
|
|
||||||
use crate::common::util::{AtPath, TestScenario};
|
use crate::common::util::{AtPath, TestScenario};
|
||||||
use rand::{thread_rng, Rng, SeedableRng};
|
use rand::{thread_rng, Rng, SeedableRng};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||||
|
use rlimit::Resource;
|
||||||
#[cfg(not(windows))]
|
#[cfg(not(windows))]
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
@ -1250,10 +1252,19 @@ fn test_number_by_lines_kth_no_end_sep() {
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("2222\n");
|
.stdout_only("2222\n");
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-e", "-n", "l/8/10"])
|
.args(&["-e", "-n", "l/2/2"])
|
||||||
.pipe_in("1\n2222\n3\n4")
|
.pipe_in("1\n2222\n3\n4")
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("3\n");
|
.stdout_only("3\n4");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_number_by_lines_rr_kth_no_end_sep() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "r/2/3"])
|
||||||
|
.pipe_in("1\n2\n3\n4\n5")
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("2\n5");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -1626,6 +1637,15 @@ fn test_round_robin() {
|
||||||
assert_eq!(at.read("xab"), "2\n4\n");
|
assert_eq!(at.read("xab"), "2\n4\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||||
|
fn test_round_robin_limited_file_descriptors() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "r/40", "onehundredlines.txt"])
|
||||||
|
.limit(Resource::NOFILE, 9, 9)
|
||||||
|
.succeeds();
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_invalid_input() {
|
fn test_split_invalid_input() {
|
||||||
// Test if stdout/stderr for '--lines' option is correct
|
// Test if stdout/stderr for '--lines' option is correct
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue