mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
split: implement outputting kth chunk of file
Implement `-n l/k/N` option, where the `k`th chunk of the input file is written to stdout. For example, $ seq -w 0 99 > f; split -n l/3/10 f 20 21 22 23 24 25 26 27 28 29
This commit is contained in:
parent
bb379b5384
commit
ee36dea1a9
3 changed files with 180 additions and 0 deletions
|
@ -859,6 +859,11 @@ where
|
|||
///
|
||||
/// This function returns an error if there is a problem reading from
|
||||
/// `reader` or writing to one of the output files.
|
||||
///
|
||||
/// # See also
|
||||
///
|
||||
/// * [`kth_chunk_by_line`], which splits its input in the same way,
|
||||
/// but writes only one specified chunk to stdout.
|
||||
fn split_into_n_chunks_by_line<R>(
|
||||
settings: &Settings,
|
||||
reader: &mut R,
|
||||
|
@ -915,6 +920,67 @@ where
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Print the k-th chunk of a file, splitting by line.
|
||||
///
|
||||
/// This function is like [`split_into_n_chunks_by_line`], but instead
|
||||
/// of writing each chunk to its own file, it only writes to stdout
|
||||
/// the contents of the chunk identified by `chunk_number`.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// This function returns an error if there is a problem reading from
|
||||
/// `reader` or writing to one of the output files.
|
||||
///
|
||||
/// # See also
|
||||
///
|
||||
/// * [`split_into_n_chunks_by_line`], which splits its input in the
|
||||
/// same way, but writes each chunk to its own file.
|
||||
fn kth_chunk_by_line<R>(
|
||||
settings: &Settings,
|
||||
reader: &mut R,
|
||||
chunk_number: u64,
|
||||
num_chunks: u64,
|
||||
) -> UResult<()>
|
||||
where
|
||||
R: BufRead,
|
||||
{
|
||||
// Get the size of the input file in bytes and compute the number
|
||||
// of bytes per chunk.
|
||||
let metadata = metadata(&settings.input).unwrap();
|
||||
let num_bytes = metadata.len();
|
||||
let chunk_size = (num_bytes / (num_chunks as u64)) as usize;
|
||||
|
||||
// Write to stdout instead of to a file.
|
||||
let stdout = std::io::stdout();
|
||||
let mut writer = stdout.lock();
|
||||
|
||||
let mut num_bytes_remaining_in_current_chunk = chunk_size;
|
||||
let mut i = 0;
|
||||
for line_result in reader.lines() {
|
||||
let line = line_result?;
|
||||
let bytes = line.as_bytes();
|
||||
if i == chunk_number {
|
||||
writer.write_all(bytes)?;
|
||||
writer.write_all(b"\n")?;
|
||||
}
|
||||
|
||||
// Add one byte for the newline character.
|
||||
let num_bytes = bytes.len() + 1;
|
||||
if num_bytes >= num_bytes_remaining_in_current_chunk {
|
||||
num_bytes_remaining_in_current_chunk = chunk_size;
|
||||
i += 1;
|
||||
} else {
|
||||
num_bytes_remaining_in_current_chunk -= num_bytes;
|
||||
}
|
||||
|
||||
if i > chunk_number {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn split(settings: &Settings) -> UResult<()> {
|
||||
let mut reader = BufReader::new(if settings.input == "-" {
|
||||
Box::new(stdin()) as Box<dyn Read>
|
||||
|
@ -935,6 +1001,12 @@ fn split(settings: &Settings) -> UResult<()> {
|
|||
Strategy::Number(NumberType::Lines(num_chunks)) => {
|
||||
split_into_n_chunks_by_line(settings, &mut reader, num_chunks)
|
||||
}
|
||||
Strategy::Number(NumberType::KthLines(chunk_number, num_chunks)) => {
|
||||
// The chunk number is given as a 1-indexed number, but it
|
||||
// is a little easier to deal with a 0-indexed number.
|
||||
let chunk_number = chunk_number - 1;
|
||||
kth_chunk_by_line(settings, &mut reader, chunk_number, num_chunks)
|
||||
}
|
||||
Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")),
|
||||
Strategy::Lines(chunk_size) => {
|
||||
let mut writer = LineChunkWriter::new(chunk_size, settings)
|
||||
|
|
|
@ -587,3 +587,11 @@ fn test_lines() {
|
|||
assert_eq!(file_read("xaa"), "1\n2\n3\n");
|
||||
assert_eq!(file_read("xab"), "4\n5\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lines_kth() {
|
||||
new_ucmd!()
|
||||
.args(&["-n", "l/3/10", "onehundredlines.txt"])
|
||||
.succeeds()
|
||||
.stdout_only("20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n");
|
||||
}
|
||||
|
|
100
tests/fixtures/split/onehundredlines.txt
vendored
Normal file
100
tests/fixtures/split/onehundredlines.txt
vendored
Normal file
|
@ -0,0 +1,100 @@
|
|||
00
|
||||
01
|
||||
02
|
||||
03
|
||||
04
|
||||
05
|
||||
06
|
||||
07
|
||||
08
|
||||
09
|
||||
10
|
||||
11
|
||||
12
|
||||
13
|
||||
14
|
||||
15
|
||||
16
|
||||
17
|
||||
18
|
||||
19
|
||||
20
|
||||
21
|
||||
22
|
||||
23
|
||||
24
|
||||
25
|
||||
26
|
||||
27
|
||||
28
|
||||
29
|
||||
30
|
||||
31
|
||||
32
|
||||
33
|
||||
34
|
||||
35
|
||||
36
|
||||
37
|
||||
38
|
||||
39
|
||||
40
|
||||
41
|
||||
42
|
||||
43
|
||||
44
|
||||
45
|
||||
46
|
||||
47
|
||||
48
|
||||
49
|
||||
50
|
||||
51
|
||||
52
|
||||
53
|
||||
54
|
||||
55
|
||||
56
|
||||
57
|
||||
58
|
||||
59
|
||||
60
|
||||
61
|
||||
62
|
||||
63
|
||||
64
|
||||
65
|
||||
66
|
||||
67
|
||||
68
|
||||
69
|
||||
70
|
||||
71
|
||||
72
|
||||
73
|
||||
74
|
||||
75
|
||||
76
|
||||
77
|
||||
78
|
||||
79
|
||||
80
|
||||
81
|
||||
82
|
||||
83
|
||||
84
|
||||
85
|
||||
86
|
||||
87
|
||||
88
|
||||
89
|
||||
90
|
||||
91
|
||||
92
|
||||
93
|
||||
94
|
||||
95
|
||||
96
|
||||
97
|
||||
98
|
||||
99
|
Loading…
Add table
Add a link
Reference in a new issue