1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

split: implement outputting kth chunk of file

Implement `-n l/k/N` option, where the `k`th chunk of the input file
is written to stdout. For example,

    $ seq -w 0 99 > f; split -n l/3/10 f
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
This commit is contained in:
Jeffrey Finkelstein 2022-01-05 21:06:04 -05:00 committed by Sylvestre Ledru
parent bb379b5384
commit ee36dea1a9
3 changed files with 180 additions and 0 deletions

View file

@ -859,6 +859,11 @@ where
/// ///
/// This function returns an error if there is a problem reading from /// This function returns an error if there is a problem reading from
/// `reader` or writing to one of the output files. /// `reader` or writing to one of the output files.
///
/// # See also
///
/// * [`kth_chunk_by_line`], which splits its input in the same way,
/// but writes only one specified chunk to stdout.
fn split_into_n_chunks_by_line<R>( fn split_into_n_chunks_by_line<R>(
settings: &Settings, settings: &Settings,
reader: &mut R, reader: &mut R,
@ -915,6 +920,67 @@ where
Ok(()) Ok(())
} }
/// Print the k-th chunk of a file, splitting by line.
///
/// This function is like [`split_into_n_chunks_by_line`], but instead
/// of writing each chunk to its own file, it only writes to stdout
/// the contents of the chunk identified by `chunk_number`.
///
/// # Errors
///
/// This function returns an error if there is a problem reading from
/// `reader` or writing to one of the output files.
///
/// # See also
///
/// * [`split_into_n_chunks_by_line`], which splits its input in the
/// same way, but writes each chunk to its own file.
fn kth_chunk_by_line<R>(
settings: &Settings,
reader: &mut R,
chunk_number: u64,
num_chunks: u64,
) -> UResult<()>
where
R: BufRead,
{
// Get the size of the input file in bytes and compute the number
// of bytes per chunk.
let metadata = metadata(&settings.input).unwrap();
let num_bytes = metadata.len();
let chunk_size = (num_bytes / (num_chunks as u64)) as usize;
// Write to stdout instead of to a file.
let stdout = std::io::stdout();
let mut writer = stdout.lock();
let mut num_bytes_remaining_in_current_chunk = chunk_size;
let mut i = 0;
for line_result in reader.lines() {
let line = line_result?;
let bytes = line.as_bytes();
if i == chunk_number {
writer.write_all(bytes)?;
writer.write_all(b"\n")?;
}
// Add one byte for the newline character.
let num_bytes = bytes.len() + 1;
if num_bytes >= num_bytes_remaining_in_current_chunk {
num_bytes_remaining_in_current_chunk = chunk_size;
i += 1;
} else {
num_bytes_remaining_in_current_chunk -= num_bytes;
}
if i > chunk_number {
break;
}
}
Ok(())
}
fn split(settings: &Settings) -> UResult<()> { fn split(settings: &Settings) -> UResult<()> {
let mut reader = BufReader::new(if settings.input == "-" { let mut reader = BufReader::new(if settings.input == "-" {
Box::new(stdin()) as Box<dyn Read> Box::new(stdin()) as Box<dyn Read>
@ -935,6 +1001,12 @@ fn split(settings: &Settings) -> UResult<()> {
Strategy::Number(NumberType::Lines(num_chunks)) => { Strategy::Number(NumberType::Lines(num_chunks)) => {
split_into_n_chunks_by_line(settings, &mut reader, num_chunks) split_into_n_chunks_by_line(settings, &mut reader, num_chunks)
} }
Strategy::Number(NumberType::KthLines(chunk_number, num_chunks)) => {
// The chunk number is given as a 1-indexed number, but it
// is a little easier to deal with a 0-indexed number.
let chunk_number = chunk_number - 1;
kth_chunk_by_line(settings, &mut reader, chunk_number, num_chunks)
}
Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")), Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")),
Strategy::Lines(chunk_size) => { Strategy::Lines(chunk_size) => {
let mut writer = LineChunkWriter::new(chunk_size, settings) let mut writer = LineChunkWriter::new(chunk_size, settings)

View file

@ -587,3 +587,11 @@ fn test_lines() {
assert_eq!(file_read("xaa"), "1\n2\n3\n"); assert_eq!(file_read("xaa"), "1\n2\n3\n");
assert_eq!(file_read("xab"), "4\n5\n"); assert_eq!(file_read("xab"), "4\n5\n");
} }
#[test]
fn test_lines_kth() {
new_ucmd!()
.args(&["-n", "l/3/10", "onehundredlines.txt"])
.succeeds()
.stdout_only("20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n");
}

100
tests/fixtures/split/onehundredlines.txt vendored Normal file
View file

@ -0,0 +1,100 @@
00
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99