mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #7294 from karlmcdowall/head_read_but_last_n_bug
head: Fix bug printing large non-seekable files
This commit is contained in:
commit
12bb82fac0
2 changed files with 59 additions and 45 deletions
|
@ -7,7 +7,7 @@
|
|||
|
||||
use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
|
||||
use std::ffi::OsString;
|
||||
use std::io::{self, ErrorKind, Read, Seek, SeekFrom, Write};
|
||||
use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
|
||||
use std::num::TryFromIntError;
|
||||
use thiserror::Error;
|
||||
use uucore::display::Quotable;
|
||||
|
@ -239,10 +239,7 @@ impl HeadOptions {
|
|||
}
|
||||
}
|
||||
|
||||
fn read_n_bytes<R>(input: R, n: u64) -> std::io::Result<()>
|
||||
where
|
||||
R: Read,
|
||||
{
|
||||
fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<()> {
|
||||
// Read the first `n` bytes from the `input` reader.
|
||||
let mut reader = input.take(n);
|
||||
|
||||
|
@ -287,48 +284,22 @@ fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize>
|
|||
}
|
||||
}
|
||||
|
||||
/// Print to stdout all but the last `n` bytes from the given reader.
|
||||
fn read_but_last_n_bytes(input: &mut impl std::io::BufRead, n: u64) -> std::io::Result<()> {
|
||||
if n == 0 {
|
||||
//prints everything
|
||||
return read_n_bytes(input, u64::MAX);
|
||||
}
|
||||
|
||||
fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result<()> {
|
||||
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
|
||||
let stdout = std::io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
|
||||
let mut ring_buffer = Vec::new();
|
||||
|
||||
let mut buffer = [0u8; BUF_SIZE];
|
||||
let mut total_read = 0;
|
||||
|
||||
loop {
|
||||
let read = match input.read(&mut buffer) {
|
||||
Ok(0) => break,
|
||||
Ok(read) => read,
|
||||
Err(e) => match e.kind() {
|
||||
ErrorKind::Interrupted => continue,
|
||||
_ => return Err(e),
|
||||
},
|
||||
};
|
||||
|
||||
total_read += read;
|
||||
|
||||
if total_read <= n {
|
||||
// Fill the ring buffer without exceeding n bytes
|
||||
let overflow = n - total_read;
|
||||
ring_buffer.extend_from_slice(&buffer[..read - overflow]);
|
||||
} else {
|
||||
// Write the ring buffer and the part of the buffer that exceeds n
|
||||
stdout.write_all(&ring_buffer)?;
|
||||
stdout.write_all(&buffer[..read - n + ring_buffer.len()])?;
|
||||
ring_buffer.clear();
|
||||
ring_buffer.extend_from_slice(&buffer[read - n + ring_buffer.len()..read]);
|
||||
}
|
||||
let stdout = stdout.lock();
|
||||
// Even though stdout is buffered, it will flush on each newline in the
|
||||
// input stream. This can be costly, so add an extra layer of buffering
|
||||
// over the top. This gives a significant speedup (approx 4x).
|
||||
let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout);
|
||||
for byte in take_all_but(input.bytes(), n) {
|
||||
writer.write_all(&[byte?])?;
|
||||
}
|
||||
// Make sure we finish writing everything to the target before
|
||||
// exiting. Otherwise, when Rust is implicitly flushing, any
|
||||
// error will be silently ignored.
|
||||
writer.flush()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -343,6 +314,10 @@ fn read_but_last_n_lines(
|
|||
for bytes in take_all_but(lines(input, separator), n) {
|
||||
stdout.write_all(&bytes?)?;
|
||||
}
|
||||
// Make sure we finish writing everything to the target before
|
||||
// exiting. Otherwise, when Rust is implicitly flushing, any
|
||||
// error will be silently ignored.
|
||||
stdout.flush()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
@ -440,8 +415,7 @@ fn head_backwards_without_seek_file(
|
|||
input: &mut std::fs::File,
|
||||
options: &HeadOptions,
|
||||
) -> std::io::Result<()> {
|
||||
let reader = &mut std::io::BufReader::with_capacity(BUF_SIZE, &*input);
|
||||
|
||||
let reader = std::io::BufReader::with_capacity(BUF_SIZE, &*input);
|
||||
match options.mode {
|
||||
Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n)?,
|
||||
Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into())?,
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
// file that was distributed with this source code.
|
||||
|
||||
// spell-checker:ignore (words) bogusfile emptyfile abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstu
|
||||
// spell-checker:ignore (words) seekable
|
||||
|
||||
use crate::common::util::TestScenario;
|
||||
|
||||
|
@ -392,6 +393,45 @@ fn test_presume_input_pipe_5_chars() {
|
|||
.stdout_is_fixture("lorem_ipsum_5_chars.expected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_but_last_bytes_large_file_piped() {
|
||||
// Validate print-all-but-last-n-bytes with a large piped-in (i.e. non-seekable) file.
|
||||
let scene = TestScenario::new(util_name!());
|
||||
let fixtures = &scene.fixtures;
|
||||
|
||||
// First, create all our fixtures.
|
||||
let seq_30000_file_name = "seq_30000";
|
||||
let seq_29000_file_name = "seq_29000";
|
||||
let seq_29001_30000_file_name = "seq_29001_30000";
|
||||
scene
|
||||
.cmd("seq")
|
||||
.arg("30000")
|
||||
.set_stdout(fixtures.make_file(seq_30000_file_name))
|
||||
.succeeds();
|
||||
scene
|
||||
.cmd("seq")
|
||||
.arg("29000")
|
||||
.set_stdout(fixtures.make_file(seq_29000_file_name))
|
||||
.succeeds();
|
||||
scene
|
||||
.cmd("seq")
|
||||
.args(&["29001", "30000"])
|
||||
.set_stdout(fixtures.make_file(seq_29001_30000_file_name))
|
||||
.succeeds();
|
||||
|
||||
let seq_29001_30000_file_length = fixtures
|
||||
.open(seq_29001_30000_file_name)
|
||||
.metadata()
|
||||
.unwrap()
|
||||
.len();
|
||||
scene
|
||||
.ucmd()
|
||||
.args(&["-c", &format!("-{}", seq_29001_30000_file_length)])
|
||||
.pipe_in_fixture(seq_30000_file_name)
|
||||
.succeeds()
|
||||
.stdout_only_fixture(seq_29000_file_name);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_backwards_lines_large_file() {
|
||||
// Create our fixtures on the fly. We need the input file to be at least double
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue