1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

split: fix a racing condition that causes issue #7869

This commit is contained in:
yuankunzhang 2025-05-03 23:02:41 +08:00 committed by Yuankun Zhang
parent c8dbd185c0
commit d412f582cb
2 changed files with 59 additions and 18 deletions

View file

@ -834,13 +834,7 @@ struct LineChunkWriter<'a> {
impl<'a> LineChunkWriter<'a> {
fn new(chunk_size: u64, settings: &'a Settings) -> UResult<Self> {
let mut filename_iterator = FilenameIterator::new(&settings.prefix, &settings.suffix)?;
let filename = filename_iterator
.next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
if settings.verbose {
println!("creating file {}", filename.quote());
}
let inner = settings.instantiate_current_writer(&filename, true)?;
let inner = Self::start_new_chunk(settings, &mut filename_iterator)?;
Ok(LineChunkWriter {
settings,
chunk_size,
@ -850,6 +844,19 @@ impl<'a> LineChunkWriter<'a> {
filename_iterator,
})
}
fn start_new_chunk(
settings: &Settings,
filename_iterator: &mut FilenameIterator,
) -> io::Result<BufWriter<Box<dyn Write>>> {
let filename = filename_iterator
.next()
.ok_or_else(|| io::Error::other("output file suffixes exhausted"))?;
if settings.verbose {
println!("creating file {}", filename.quote());
}
settings.instantiate_current_writer(&filename, true)
}
}
impl Write for LineChunkWriter<'_> {
@ -869,14 +876,7 @@ impl Write for LineChunkWriter<'_> {
// corresponding writer.
if self.num_lines_remaining_in_current_chunk == 0 {
self.num_chunks_written += 1;
let filename = self
.filename_iterator
.next()
.ok_or_else(|| io::Error::other("output file suffixes exhausted"))?;
if self.settings.verbose {
println!("creating file {}", filename.quote());
}
self.inner = self.settings.instantiate_current_writer(&filename, true)?;
self.inner = Self::start_new_chunk(self.settings, &mut self.filename_iterator)?;
self.num_lines_remaining_in_current_chunk = self.chunk_size;
}
@ -889,9 +889,19 @@ impl Write for LineChunkWriter<'_> {
self.num_lines_remaining_in_current_chunk -= 1;
}
// There might be bytes remaining in the buffer, and we write
// them to the current chunk. But first, we may need to rotate
// the current chunk in case it has already reached its line
// limit.
if prev < buf.len() {
if self.num_lines_remaining_in_current_chunk == 0 {
self.inner = Self::start_new_chunk(self.settings, &mut self.filename_iterator)?;
self.num_lines_remaining_in_current_chunk = self.chunk_size;
}
let num_bytes_written =
custom_write(&buf[prev..buf.len()], &mut self.inner, self.settings)?;
total_bytes_written += num_bytes_written;
}
Ok(total_bytes_written)
}

View file

@ -120,6 +120,15 @@ impl RandomFile {
n -= 1;
}
}
/// Add n lines each of the given size.
fn add_lines_with_line_size(&mut self, lines: usize, line_size: usize) {
let mut n = lines;
while n > 0 {
writeln!(self.inner, "{}", random_chars(line_size)).unwrap();
n -= 1;
}
}
}
#[test]
@ -430,6 +439,28 @@ fn test_split_lines_number() {
.stderr_only("split: invalid number of lines: 'file'\n");
}
/// Test interference between split line size and IO buffer capacity.
/// See issue #7869.
#[test]
fn test_split_lines_interfere_with_io_buf_capacity() {
let buf_capacity = BufWriter::new(Vec::new()).capacity();
// We intentionally set the line size to be less than the IO write buffer
// capacity. This is to trigger the condition where after the first split
// file is written, there are still bytes left in the buffer. We then
// test that those bytes are written to the next split file.
let line_size = buf_capacity - 2;
let (at, mut ucmd) = at_and_ucmd!();
let name = "split_lines_interfere_with_io_buf_capacity";
RandomFile::new(&at, name).add_lines_with_line_size(2, line_size);
ucmd.args(&["-l", "1", name]).succeeds();
// Note that `lines_size` doesn't take the trailing newline into account,
// we add 1 for adjustment.
assert_eq!(at.read("xaa").len(), line_size + 1);
assert_eq!(at.read("xab").len(), line_size + 1);
}
/// Test short lines option with value concatenated
#[test]
fn test_split_lines_short_concatenated_with_value() {