mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
split: implement outputting kth chunk of file
Implement `-n l/k/N` option, where the `k`th chunk of the input file is written to stdout. For example, $ seq -w 0 99 > f; split -n l/3/10 f 20 21 22 23 24 25 26 27 28 29
This commit is contained in:
parent
bb379b5384
commit
ee36dea1a9
3 changed files with 180 additions and 0 deletions
|
@ -859,6 +859,11 @@ where
|
||||||
///
|
///
|
||||||
/// This function returns an error if there is a problem reading from
|
/// This function returns an error if there is a problem reading from
|
||||||
/// `reader` or writing to one of the output files.
|
/// `reader` or writing to one of the output files.
|
||||||
|
///
|
||||||
|
/// # See also
|
||||||
|
///
|
||||||
|
/// * [`kth_chunk_by_line`], which splits its input in the same way,
|
||||||
|
/// but writes only one specified chunk to stdout.
|
||||||
fn split_into_n_chunks_by_line<R>(
|
fn split_into_n_chunks_by_line<R>(
|
||||||
settings: &Settings,
|
settings: &Settings,
|
||||||
reader: &mut R,
|
reader: &mut R,
|
||||||
|
@ -915,6 +920,67 @@ where
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Print the k-th chunk of a file, splitting by line.
|
||||||
|
///
|
||||||
|
/// This function is like [`split_into_n_chunks_by_line`], but instead
|
||||||
|
/// of writing each chunk to its own file, it only writes to stdout
|
||||||
|
/// the contents of the chunk identified by `chunk_number`.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// This function returns an error if there is a problem reading from
|
||||||
|
/// `reader` or writing to one of the output files.
|
||||||
|
///
|
||||||
|
/// # See also
|
||||||
|
///
|
||||||
|
/// * [`split_into_n_chunks_by_line`], which splits its input in the
|
||||||
|
/// same way, but writes each chunk to its own file.
|
||||||
|
fn kth_chunk_by_line<R>(
|
||||||
|
settings: &Settings,
|
||||||
|
reader: &mut R,
|
||||||
|
chunk_number: u64,
|
||||||
|
num_chunks: u64,
|
||||||
|
) -> UResult<()>
|
||||||
|
where
|
||||||
|
R: BufRead,
|
||||||
|
{
|
||||||
|
// Get the size of the input file in bytes and compute the number
|
||||||
|
// of bytes per chunk.
|
||||||
|
let metadata = metadata(&settings.input).unwrap();
|
||||||
|
let num_bytes = metadata.len();
|
||||||
|
let chunk_size = (num_bytes / (num_chunks as u64)) as usize;
|
||||||
|
|
||||||
|
// Write to stdout instead of to a file.
|
||||||
|
let stdout = std::io::stdout();
|
||||||
|
let mut writer = stdout.lock();
|
||||||
|
|
||||||
|
let mut num_bytes_remaining_in_current_chunk = chunk_size;
|
||||||
|
let mut i = 0;
|
||||||
|
for line_result in reader.lines() {
|
||||||
|
let line = line_result?;
|
||||||
|
let bytes = line.as_bytes();
|
||||||
|
if i == chunk_number {
|
||||||
|
writer.write_all(bytes)?;
|
||||||
|
writer.write_all(b"\n")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add one byte for the newline character.
|
||||||
|
let num_bytes = bytes.len() + 1;
|
||||||
|
if num_bytes >= num_bytes_remaining_in_current_chunk {
|
||||||
|
num_bytes_remaining_in_current_chunk = chunk_size;
|
||||||
|
i += 1;
|
||||||
|
} else {
|
||||||
|
num_bytes_remaining_in_current_chunk -= num_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
if i > chunk_number {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn split(settings: &Settings) -> UResult<()> {
|
fn split(settings: &Settings) -> UResult<()> {
|
||||||
let mut reader = BufReader::new(if settings.input == "-" {
|
let mut reader = BufReader::new(if settings.input == "-" {
|
||||||
Box::new(stdin()) as Box<dyn Read>
|
Box::new(stdin()) as Box<dyn Read>
|
||||||
|
@ -935,6 +1001,12 @@ fn split(settings: &Settings) -> UResult<()> {
|
||||||
Strategy::Number(NumberType::Lines(num_chunks)) => {
|
Strategy::Number(NumberType::Lines(num_chunks)) => {
|
||||||
split_into_n_chunks_by_line(settings, &mut reader, num_chunks)
|
split_into_n_chunks_by_line(settings, &mut reader, num_chunks)
|
||||||
}
|
}
|
||||||
|
Strategy::Number(NumberType::KthLines(chunk_number, num_chunks)) => {
|
||||||
|
// The chunk number is given as a 1-indexed number, but it
|
||||||
|
// is a little easier to deal with a 0-indexed number.
|
||||||
|
let chunk_number = chunk_number - 1;
|
||||||
|
kth_chunk_by_line(settings, &mut reader, chunk_number, num_chunks)
|
||||||
|
}
|
||||||
Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")),
|
Strategy::Number(_) => Err(USimpleError::new(1, "-n mode not yet fully implemented")),
|
||||||
Strategy::Lines(chunk_size) => {
|
Strategy::Lines(chunk_size) => {
|
||||||
let mut writer = LineChunkWriter::new(chunk_size, settings)
|
let mut writer = LineChunkWriter::new(chunk_size, settings)
|
||||||
|
|
|
@ -587,3 +587,11 @@ fn test_lines() {
|
||||||
assert_eq!(file_read("xaa"), "1\n2\n3\n");
|
assert_eq!(file_read("xaa"), "1\n2\n3\n");
|
||||||
assert_eq!(file_read("xab"), "4\n5\n");
|
assert_eq!(file_read("xab"), "4\n5\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_lines_kth() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-n", "l/3/10", "onehundredlines.txt"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_only("20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n");
|
||||||
|
}
|
||||||
|
|
100
tests/fixtures/split/onehundredlines.txt
vendored
Normal file
100
tests/fixtures/split/onehundredlines.txt
vendored
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
00
|
||||||
|
01
|
||||||
|
02
|
||||||
|
03
|
||||||
|
04
|
||||||
|
05
|
||||||
|
06
|
||||||
|
07
|
||||||
|
08
|
||||||
|
09
|
||||||
|
10
|
||||||
|
11
|
||||||
|
12
|
||||||
|
13
|
||||||
|
14
|
||||||
|
15
|
||||||
|
16
|
||||||
|
17
|
||||||
|
18
|
||||||
|
19
|
||||||
|
20
|
||||||
|
21
|
||||||
|
22
|
||||||
|
23
|
||||||
|
24
|
||||||
|
25
|
||||||
|
26
|
||||||
|
27
|
||||||
|
28
|
||||||
|
29
|
||||||
|
30
|
||||||
|
31
|
||||||
|
32
|
||||||
|
33
|
||||||
|
34
|
||||||
|
35
|
||||||
|
36
|
||||||
|
37
|
||||||
|
38
|
||||||
|
39
|
||||||
|
40
|
||||||
|
41
|
||||||
|
42
|
||||||
|
43
|
||||||
|
44
|
||||||
|
45
|
||||||
|
46
|
||||||
|
47
|
||||||
|
48
|
||||||
|
49
|
||||||
|
50
|
||||||
|
51
|
||||||
|
52
|
||||||
|
53
|
||||||
|
54
|
||||||
|
55
|
||||||
|
56
|
||||||
|
57
|
||||||
|
58
|
||||||
|
59
|
||||||
|
60
|
||||||
|
61
|
||||||
|
62
|
||||||
|
63
|
||||||
|
64
|
||||||
|
65
|
||||||
|
66
|
||||||
|
67
|
||||||
|
68
|
||||||
|
69
|
||||||
|
70
|
||||||
|
71
|
||||||
|
72
|
||||||
|
73
|
||||||
|
74
|
||||||
|
75
|
||||||
|
76
|
||||||
|
77
|
||||||
|
78
|
||||||
|
79
|
||||||
|
80
|
||||||
|
81
|
||||||
|
82
|
||||||
|
83
|
||||||
|
84
|
||||||
|
85
|
||||||
|
86
|
||||||
|
87
|
||||||
|
88
|
||||||
|
89
|
||||||
|
90
|
||||||
|
91
|
||||||
|
92
|
||||||
|
93
|
||||||
|
94
|
||||||
|
95
|
||||||
|
96
|
||||||
|
97
|
||||||
|
98
|
||||||
|
99
|
Loading…
Add table
Add a link
Reference in a new issue