mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 19:47:45 +00:00
head: fix bug reading back through files (#7248)
* head: fix bug reading back through files Fix issue #7247. Rework logic for reading/seeking backwards through files. Bug was seen when reading back through large files. Added test case to validate fix.
This commit is contained in:
parent
93d58b17b4
commit
f94ff78ea4
2 changed files with 104 additions and 22 deletions
|
@ -350,16 +350,10 @@ fn read_but_last_n_lines(
|
||||||
/// Return the index in `input` just after the `n`th line from the end.
|
/// Return the index in `input` just after the `n`th line from the end.
|
||||||
///
|
///
|
||||||
/// If `n` exceeds the number of lines in this file, then return 0.
|
/// If `n` exceeds the number of lines in this file, then return 0.
|
||||||
///
|
/// This function rewinds the cursor to the
|
||||||
/// The cursor must be at the start of the seekable input before
|
|
||||||
/// calling this function. This function rewinds the cursor to the
|
|
||||||
/// beginning of the input just before returning unless there is an
|
/// beginning of the input just before returning unless there is an
|
||||||
/// I/O error.
|
/// I/O error.
|
||||||
///
|
///
|
||||||
/// If `zeroed` is `false`, interpret the newline character `b'\n'` as
|
|
||||||
/// a line ending. If `zeroed` is `true`, interpret the null character
|
|
||||||
/// `b'\0'` as a line ending instead.
|
|
||||||
///
|
|
||||||
/// # Errors
|
/// # Errors
|
||||||
///
|
///
|
||||||
/// This function returns an error if there is a problem seeking
|
/// This function returns an error if there is a problem seeking
|
||||||
|
@ -390,20 +384,21 @@ fn find_nth_line_from_end<R>(input: &mut R, n: u64, separator: u8) -> std::io::R
|
||||||
where
|
where
|
||||||
R: Read + Seek,
|
R: Read + Seek,
|
||||||
{
|
{
|
||||||
let size = input.seek(SeekFrom::End(0))?;
|
let file_size = input.seek(SeekFrom::End(0))?;
|
||||||
|
|
||||||
let mut buffer = [0u8; BUF_SIZE];
|
let mut buffer = [0u8; BUF_SIZE];
|
||||||
let buf_size: usize = (BUF_SIZE as u64).min(size).try_into().unwrap();
|
|
||||||
let buffer = &mut buffer[..buf_size];
|
|
||||||
|
|
||||||
let mut i = 0u64;
|
let mut i = 0u64;
|
||||||
let mut lines = 0u64;
|
let mut lines = 0u64;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
// the casts here are ok, `buffer.len()` should never be above a few k
|
// the casts here are ok, `buffer.len()` should never be above a few k
|
||||||
input.seek(SeekFrom::Current(
|
let bytes_remaining_to_search = file_size - i;
|
||||||
-((buffer.len() as i64).min((size - i) as i64)),
|
let bytes_to_read_this_loop = bytes_remaining_to_search.min(BUF_SIZE.try_into().unwrap());
|
||||||
))?;
|
let read_start_offset = bytes_remaining_to_search - bytes_to_read_this_loop;
|
||||||
|
let buffer = &mut buffer[..bytes_to_read_this_loop.try_into().unwrap()];
|
||||||
|
|
||||||
|
input.seek(SeekFrom::Start(read_start_offset))?;
|
||||||
input.read_exact(buffer)?;
|
input.read_exact(buffer)?;
|
||||||
for byte in buffer.iter().rev() {
|
for byte in buffer.iter().rev() {
|
||||||
if byte == &separator {
|
if byte == &separator {
|
||||||
|
@ -412,11 +407,11 @@ where
|
||||||
// if it were just `n`,
|
// if it were just `n`,
|
||||||
if lines == n + 1 {
|
if lines == n + 1 {
|
||||||
input.rewind()?;
|
input.rewind()?;
|
||||||
return Ok(size - i);
|
return Ok(file_size - i);
|
||||||
}
|
}
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
if size - i == 0 {
|
if file_size - i == 0 {
|
||||||
input.rewind()?;
|
input.rewind()?;
|
||||||
return Ok(0);
|
return Ok(0);
|
||||||
}
|
}
|
||||||
|
@ -700,12 +695,59 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_find_nth_line_from_end() {
|
fn test_find_nth_line_from_end() {
|
||||||
let mut input = Cursor::new("x\ny\nz\n");
|
// Make sure our input buffer is several multiples of BUF_SIZE in size
|
||||||
assert_eq!(find_nth_line_from_end(&mut input, 0, b'\n').unwrap(), 6);
|
// such that we can be reasonably confident we've exercised all logic paths.
|
||||||
assert_eq!(find_nth_line_from_end(&mut input, 1, b'\n').unwrap(), 4);
|
// Make the contents of the buffer look like...
|
||||||
assert_eq!(find_nth_line_from_end(&mut input, 2, b'\n').unwrap(), 2);
|
// aaaa\n
|
||||||
assert_eq!(find_nth_line_from_end(&mut input, 3, b'\n').unwrap(), 0);
|
// aaaa\n
|
||||||
assert_eq!(find_nth_line_from_end(&mut input, 4, b'\n').unwrap(), 0);
|
// aaaa\n
|
||||||
assert_eq!(find_nth_line_from_end(&mut input, 1000, b'\n').unwrap(), 0);
|
// aaaa\n
|
||||||
|
// aaaa\n
|
||||||
|
// ...
|
||||||
|
// This will make it easier to validate the results since each line will have
|
||||||
|
// 5 bytes in it.
|
||||||
|
|
||||||
|
let minimum_buffer_size = BUF_SIZE * 4;
|
||||||
|
let mut input_buffer = vec![];
|
||||||
|
let mut loop_iteration: u64 = 0;
|
||||||
|
while input_buffer.len() < minimum_buffer_size {
|
||||||
|
for _n in 0..4 {
|
||||||
|
input_buffer.push(b'a');
|
||||||
|
}
|
||||||
|
loop_iteration += 1;
|
||||||
|
input_buffer.push(b'\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
let lines_in_input_file = loop_iteration;
|
||||||
|
let input_length = lines_in_input_file * 5;
|
||||||
|
assert_eq!(input_length, input_buffer.len().try_into().unwrap());
|
||||||
|
let mut input = Cursor::new(input_buffer);
|
||||||
|
// We now have loop_iteration lines in the buffer Now walk backwards through the buffer
|
||||||
|
// to confirm everything parses correctly.
|
||||||
|
// Use a large step size to prevent the test from taking too long, but don't use a power
|
||||||
|
// of 2 in case we miss some corner case.
|
||||||
|
let step_size = 511;
|
||||||
|
for n in (0..lines_in_input_file).filter(|v| v % step_size == 0) {
|
||||||
|
// The 5*n comes from 5-bytes per row.
|
||||||
|
assert_eq!(
|
||||||
|
find_nth_line_from_end(&mut input, n, b'\n').unwrap(),
|
||||||
|
input_length - 5 * n
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now confirm that if we query with a value >= lines_in_input_file we get an offset
|
||||||
|
// of 0
|
||||||
|
assert_eq!(
|
||||||
|
find_nth_line_from_end(&mut input, lines_in_input_file, b'\n').unwrap(),
|
||||||
|
0
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
find_nth_line_from_end(&mut input, lines_in_input_file + 1, b'\n').unwrap(),
|
||||||
|
0
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
find_nth_line_from_end(&mut input, lines_in_input_file + 1000, b'\n').unwrap(),
|
||||||
|
0
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -392,6 +392,46 @@ fn test_presume_input_pipe_5_chars() {
|
||||||
.stdout_is_fixture("lorem_ipsum_5_chars.expected");
|
.stdout_is_fixture("lorem_ipsum_5_chars.expected");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_read_backwards_lines_large_file() {
|
||||||
|
// Create our fixtures on the fly. We need the input file to be at least double
|
||||||
|
// the size of BUF_SIZE as specified in head.rs. Go for something a bit bigger
|
||||||
|
// than that.
|
||||||
|
let scene = TestScenario::new(util_name!());
|
||||||
|
let fixtures = &scene.fixtures;
|
||||||
|
let seq_30000_file_name = "seq_30000";
|
||||||
|
let seq_1000_file_name = "seq_1000";
|
||||||
|
scene
|
||||||
|
.cmd("seq")
|
||||||
|
.arg("30000")
|
||||||
|
.set_stdout(fixtures.make_file(seq_30000_file_name))
|
||||||
|
.succeeds();
|
||||||
|
scene
|
||||||
|
.cmd("seq")
|
||||||
|
.arg("1000")
|
||||||
|
.set_stdout(fixtures.make_file(seq_1000_file_name))
|
||||||
|
.succeeds();
|
||||||
|
|
||||||
|
// Now run our tests.
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.args(&["-n", "-29000", "seq_30000"])
|
||||||
|
.succeeds()
|
||||||
|
.stdout_is_fixture("seq_1000");
|
||||||
|
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.args(&["-n", "-30000", "seq_30000"])
|
||||||
|
.run()
|
||||||
|
.stdout_is_fixture("emptyfile.txt");
|
||||||
|
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.args(&["-n", "-30001", "seq_30000"])
|
||||||
|
.run()
|
||||||
|
.stdout_is_fixture("emptyfile.txt");
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(all(
|
#[cfg(all(
|
||||||
not(target_os = "windows"),
|
not(target_os = "windows"),
|
||||||
not(target_os = "macos"),
|
not(target_os = "macos"),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue