From 18bfd1ac68e8c0b35a867bba05c3f6972bdf3432 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sun, 2 Jan 2022 22:38:27 -0500 Subject: [PATCH] split: implement round-robin arg to --number Implement distributing lines of a file in a round-robin manner to a specified number of chunks. For example, $ (seq 1 10 | split -n r/3) && head -v xa[abc] ==> xaa <== 1 4 7 10 ==> xab <== 2 5 8 ==> xac <== 3 6 9 --- src/uu/split/src/split.rs | 43 +++++++++++++++++++++++++++++++++++++ tests/by-util/test_split.rs | 16 ++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 73abc966b..fb8c44dcb 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -1162,6 +1162,46 @@ where Ok(()) } +fn split_into_n_chunks_by_line_round_robin( + settings: &Settings, + reader: &mut R, + num_chunks: u64, +) -> UResult<()> +where + R: BufRead, +{ + // This object is responsible for creating the filename for each chunk. + let mut filename_iterator = FilenameIterator::new( + &settings.prefix, + &settings.additional_suffix, + settings.suffix_length, + settings.suffix_type, + ); + + // Create one writer for each chunk. This will create each + // of the underlying files (if not in `--filter` mode). + let mut writers = vec![]; + for _ in 0..num_chunks { + let filename = filename_iterator + .next() + .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; + let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); + writers.push(writer); + } + + let num_chunks: usize = num_chunks.try_into().unwrap(); + for (i, line_result) in reader.lines().enumerate() { + let line = line_result.unwrap(); + let maybe_writer = writers.get_mut(i % num_chunks); + let writer = maybe_writer.unwrap(); + let bytes = line.as_bytes(); + writer.write_all(bytes)?; + writer.write_all(b"\n")?; + } + + Ok(()) +} + fn split(settings: &Settings) -> UResult<()> { let mut reader = BufReader::new(if settings.input == "-" { Box::new(stdin()) as Box @@ -1188,6 +1228,9 @@ fn split(settings: &Settings) -> UResult<()> { let chunk_number = chunk_number - 1; kth_chunk_by_line(settings, &mut reader, chunk_number, num_chunks) } + Strategy::Number(NumberType::RoundRobin(num_chunks)) => { + split_into_n_chunks_by_line_round_robin(settings, &mut reader, num_chunks) + } Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")), Strategy::Lines(chunk_size) => { let mut writer = LineChunkWriter::new(chunk_size, settings) diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 642cb7c68..814074f3c 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -605,3 +605,19 @@ fn test_line_bytes() { assert_eq!(at.read("xac"), "cccc\ndd\n"); assert_eq!(at.read("xad"), "ee\n"); } + +#[test] +fn test_round_robin() { + let (at, mut ucmd) = at_and_ucmd!(); + + let file_read = |f| { + let mut s = String::new(); + at.open(f).read_to_string(&mut s).unwrap(); + s + }; + + ucmd.args(&["-n", "r/2", "fivelines.txt"]).succeeds(); + + assert_eq!(file_read("xaa"), "1\n3\n5\n"); + assert_eq!(file_read("xab"), "2\n4\n"); +}