mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 19:47:45 +00:00
Fix a bug in split where chunking would be skipped when the chunk size (#3800)
* Fix a bug in split where chunking would be skipped when the chunk size happened to be an exact divisor of the buffer size used to read the input stream. The issue here was that file was being split byte-wise in chunks of 1G. The input stream was being read in chunks of 8KB, which evenly divides the chunk size. Because the check to allocate the next output chunk was done at the bottom of the loop previously, it would never occur because the current input chunk was fully consumed at that point. By moving the check to the top of the loop (but still late enough that we know we have bytes to write) we resolve this issue. This scenario is unfortunately hard to write a test for, since we don't explicitly control the input chunk size. Fixes https://github.com/uutils/coreutils/issues/3790
This commit is contained in:
parent
5ecabb8467
commit
9fad6fde35
2 changed files with 31 additions and 21 deletions
|
@ -618,6 +618,21 @@ impl<'a> Write for ByteChunkWriter<'a> {
|
|||
return Ok(carryover_bytes_written);
|
||||
}
|
||||
|
||||
if self.num_bytes_remaining_in_current_chunk == 0 {
|
||||
// Increment the chunk number, reset the number of bytes remaining, and instantiate the new underlying writer.
|
||||
self.num_chunks_written += 1;
|
||||
self.num_bytes_remaining_in_current_chunk = self.chunk_size;
|
||||
|
||||
// Allocate the new file, since at this point we know there are bytes to be written to it.
|
||||
let filename = self.filename_iterator.next().ok_or_else(|| {
|
||||
std::io::Error::new(ErrorKind::Other, "output file suffixes exhausted")
|
||||
})?;
|
||||
if self.settings.verbose {
|
||||
println!("creating file {}", filename.quote());
|
||||
}
|
||||
self.inner = self.settings.instantiate_current_writer(&filename)?;
|
||||
}
|
||||
|
||||
// If the capacity of this chunk is greater than the number of
|
||||
// bytes in `buf`, then write all the bytes in `buf`. Otherwise,
|
||||
// write enough bytes to fill the current chunk, then increment
|
||||
|
@ -635,38 +650,18 @@ impl<'a> Write for ByteChunkWriter<'a> {
|
|||
// n, which is already usize.
|
||||
let i = self.num_bytes_remaining_in_current_chunk as usize;
|
||||
let num_bytes_written = self.inner.write(&buf[..i])?;
|
||||
self.num_bytes_remaining_in_current_chunk -= num_bytes_written as u64;
|
||||
|
||||
// It's possible that the underlying writer did not
|
||||
// write all the bytes.
|
||||
if num_bytes_written < i {
|
||||
self.num_bytes_remaining_in_current_chunk -= num_bytes_written as u64;
|
||||
return Ok(carryover_bytes_written + num_bytes_written);
|
||||
} else {
|
||||
// Move the window to look at only the remaining bytes.
|
||||
buf = &buf[i..];
|
||||
|
||||
// Increment the chunk number, reset the number of
|
||||
// bytes remaining, and instantiate the new
|
||||
// underlying writer.
|
||||
self.num_chunks_written += 1;
|
||||
self.num_bytes_remaining_in_current_chunk = self.chunk_size;
|
||||
|
||||
// Remember for the next iteration that we wrote these bytes.
|
||||
carryover_bytes_written += num_bytes_written;
|
||||
|
||||
// Only create the writer for the next chunk if
|
||||
// there are any remaining bytes to write. This
|
||||
// check prevents us from creating a new empty
|
||||
// file.
|
||||
if !buf.is_empty() {
|
||||
let filename = self.filename_iterator.next().ok_or_else(|| {
|
||||
std::io::Error::new(ErrorKind::Other, "output file suffixes exhausted")
|
||||
})?;
|
||||
if self.settings.verbose {
|
||||
println!("creating file {}", filename.quote());
|
||||
}
|
||||
self.inner = self.settings.instantiate_current_writer(&filename)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -683,3 +683,18 @@ fn test_guard_input() {
|
|||
.stderr_only("split: 'xaa' would overwrite input; aborting");
|
||||
assert_eq!(at.read("xaa"), "1\n2\n3\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_of_input_chunk() {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
let name = "multiple_of_input_chunk";
|
||||
RandomFile::new(&at, name).add_bytes(16 * 1024);
|
||||
ucmd.args(&["-b", "8K", name, "b"]).succeeds();
|
||||
|
||||
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
|
||||
assert_eq!(glob.count(), 2);
|
||||
for filename in glob.collect() {
|
||||
assert_eq!(glob.directory.metadata(&filename).len(), 8 * 1024);
|
||||
}
|
||||
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue