1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Head: ensure stdin input stream is correct on exit

Fix issue #7028
Head tool now ensures that stdin is set to the last character
that was output by the tool. This ensures that if any subsequent
tools are run from the same input stream they will start at the
correct point in the stream.
This commit is contained in:
Karl McDowall 2025-02-01 15:19:12 -07:00
parent 604cc404ff
commit ad20cb35a0
2 changed files with 325 additions and 60 deletions

View file

@ -7,8 +7,12 @@
use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
use std::ffi::OsString;
#[cfg(unix)]
use std::fs::File;
use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
use std::num::TryFromIntError;
#[cfg(unix)]
use std::os::fd::{AsRawFd, FromRawFd};
use thiserror::Error;
use uucore::display::Quotable;
use uucore::error::{FromIo, UError, UResult};
@ -239,7 +243,7 @@ impl HeadOptions {
}
}
fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<()> {
fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<u64> {
// Read the first `n` bytes from the `input` reader.
let mut reader = input.take(n);
@ -247,31 +251,31 @@ fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<()> {
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
io::copy(&mut reader, &mut stdout)?;
let bytes_written = io::copy(&mut reader, &mut stdout)?;
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
stdout.flush()?;
Ok(())
Ok(bytes_written)
}
fn read_n_lines(input: &mut impl std::io::BufRead, n: u64, separator: u8) -> std::io::Result<()> {
fn read_n_lines(input: &mut impl std::io::BufRead, n: u64, separator: u8) -> std::io::Result<u64> {
// Read the first `n` lines from the `input` reader.
let mut reader = take_lines(input, n, separator);
// Write those bytes to `stdout`.
let mut stdout = std::io::stdout();
io::copy(&mut reader, &mut stdout)?;
let bytes_written = io::copy(&mut reader, &mut stdout)?;
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
stdout.flush()?;
Ok(())
Ok(bytes_written)
}
fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize> {
@ -284,7 +288,8 @@ fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize>
}
}
fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result<()> {
fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result<u64> {
let mut bytes_written = 0;
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
let stdout = std::io::stdout();
let stdout = stdout.lock();
@ -294,32 +299,36 @@ fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Resul
let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout);
for byte in take_all_but(input.bytes(), n) {
writer.write_all(&[byte?])?;
bytes_written += 1;
}
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
writer.flush()?;
}
Ok(())
Ok(bytes_written)
}
fn read_but_last_n_lines(
input: impl std::io::BufRead,
n: u64,
separator: u8,
) -> std::io::Result<()> {
) -> std::io::Result<u64> {
let mut bytes_written: u64 = 0;
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
for bytes in take_all_but(lines(input, separator), n) {
stdout.write_all(&bytes?)?;
let bytes = bytes?;
bytes_written += u64::try_from(bytes.len()).unwrap();
stdout.write_all(&bytes)?;
}
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
stdout.flush()?;
}
Ok(())
Ok(bytes_written)
}
/// Return the index in `input` just after the `n`th line from the end.
@ -400,45 +409,43 @@ fn is_seekable(input: &mut std::fs::File) -> bool {
&& input.seek(SeekFrom::Start(current_pos.unwrap())).is_ok()
}
fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> {
fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<u64> {
let st = input.metadata()?;
let seekable = is_seekable(input);
let blksize_limit = uucore::fs::sane_blksize::sane_blksize_from_metadata(&st);
if !seekable || st.len() <= blksize_limit {
return head_backwards_without_seek_file(input, options);
head_backwards_without_seek_file(input, options)
} else {
head_backwards_on_seekable_file(input, options)
}
head_backwards_on_seekable_file(input, options)
}
fn head_backwards_without_seek_file(
input: &mut std::fs::File,
options: &HeadOptions,
) -> std::io::Result<()> {
) -> std::io::Result<u64> {
let reader = std::io::BufReader::with_capacity(BUF_SIZE, &*input);
match options.mode {
Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n)?,
Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into())?,
Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n),
Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into()),
_ => unreachable!(),
}
Ok(())
}
fn head_backwards_on_seekable_file(
input: &mut std::fs::File,
options: &HeadOptions,
) -> std::io::Result<()> {
) -> std::io::Result<u64> {
match options.mode {
Mode::AllButLastBytes(n) => {
let size = input.metadata()?.len();
if n >= size {
return Ok(());
Ok(0)
} else {
read_n_bytes(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
size - n,
)?;
)
}
}
Mode::AllButLastLines(n) => {
@ -446,14 +453,13 @@ fn head_backwards_on_seekable_file(
read_n_bytes(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
found,
)?;
)
}
_ => unreachable!(),
}
Ok(())
}
fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> {
fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<u64> {
match options.mode {
Mode::FirstBytes(n) => {
read_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n)
@ -480,16 +486,41 @@ fn uu_head(options: &HeadOptions) -> UResult<()> {
println!("==> standard input <==");
}
let stdin = std::io::stdin();
let mut stdin = stdin.lock();
match options.mode {
Mode::FirstBytes(n) => read_n_bytes(&mut stdin, n),
Mode::AllButLastBytes(n) => read_but_last_n_bytes(&mut stdin, n),
Mode::FirstLines(n) => read_n_lines(&mut stdin, n, options.line_ending.into()),
Mode::AllButLastLines(n) => {
read_but_last_n_lines(&mut stdin, n, options.line_ending.into())
#[cfg(unix)]
{
let stdin_raw_fd = stdin.as_raw_fd();
let mut stdin_file = unsafe { File::from_raw_fd(stdin_raw_fd) };
let current_pos = stdin_file.stream_position();
if let Ok(current_pos) = current_pos {
// We have a seekable file. Ensure we set the input stream to the
// last byte read so that any tools that parse the remainder of
// the stdin stream read from the correct place.
let bytes_read = head_file(&mut stdin_file, options)?;
stdin_file.seek(SeekFrom::Start(current_pos + bytes_read))?;
} else {
let _bytes_read = head_file(&mut stdin_file, options)?;
}
}
#[cfg(not(unix))]
{
let mut stdin = stdin.lock();
match options.mode {
Mode::FirstBytes(n) => read_n_bytes(&mut stdin, n),
Mode::AllButLastBytes(n) => read_but_last_n_bytes(&mut stdin, n),
Mode::FirstLines(n) => {
read_n_lines(&mut stdin, n, options.line_ending.into())
}
Mode::AllButLastLines(n) => {
read_but_last_n_lines(&mut stdin, n, options.line_ending.into())
}
}?;
}
Ok(())
}
(name, false) => {
let mut file = match std::fs::File::open(name) {
@ -508,7 +539,8 @@ fn uu_head(options: &HeadOptions) -> UResult<()> {
}
println!("==> {name} <==");
}
head_file(&mut file, options)
head_file(&mut file, options)?;
Ok(())
}
};
if let Err(e) = res {

View file

@ -7,6 +7,14 @@
// spell-checker:ignore (words) seekable
use crate::common::util::TestScenario;
#[cfg(all(
not(target_os = "windows"),
not(target_os = "macos"),
not(target_os = "android"),
not(target_os = "freebsd"),
not(target_os = "openbsd")
))]
use std::io::Read;
static INPUT: &str = "lorem_ipsum.txt";
@ -400,51 +408,51 @@ fn test_all_but_last_bytes_large_file_piped() {
let fixtures = &scene.fixtures;
// First, create all our fixtures.
let seq_30000_file_name = "seq_30000";
let seq_29000_file_name = "seq_29000";
let seq_29001_30000_file_name = "seq_29001_30000";
let seq_20000_file_name = "seq_20000";
let seq_19000_file_name = "seq_19000";
let seq_19001_20000_file_name = "seq_19001_20000";
scene
.cmd("seq")
.arg("30000")
.set_stdout(fixtures.make_file(seq_30000_file_name))
.arg("20000")
.set_stdout(fixtures.make_file(seq_20000_file_name))
.succeeds();
scene
.cmd("seq")
.arg("29000")
.set_stdout(fixtures.make_file(seq_29000_file_name))
.arg("19000")
.set_stdout(fixtures.make_file(seq_19000_file_name))
.succeeds();
scene
.cmd("seq")
.args(&["29001", "30000"])
.set_stdout(fixtures.make_file(seq_29001_30000_file_name))
.args(&["19001", "20000"])
.set_stdout(fixtures.make_file(seq_19001_20000_file_name))
.succeeds();
let seq_29001_30000_file_length = fixtures
.open(seq_29001_30000_file_name)
let seq_19001_20000_file_length = fixtures
.open(seq_19001_20000_file_name)
.metadata()
.unwrap()
.len();
scene
.ucmd()
.args(&["-c", &format!("-{}", seq_29001_30000_file_length)])
.pipe_in_fixture(seq_30000_file_name)
.args(&["-c", &format!("-{}", seq_19001_20000_file_length)])
.pipe_in_fixture(seq_20000_file_name)
.succeeds()
.stdout_only_fixture(seq_29000_file_name);
.stdout_only_fixture(seq_19000_file_name);
}
#[test]
fn test_read_backwards_lines_large_file() {
fn test_all_but_last_lines_large_file() {
// Create our fixtures on the fly. We need the input file to be at least double
// the size of BUF_SIZE as specified in head.rs. Go for something a bit bigger
// than that.
let scene = TestScenario::new(util_name!());
let fixtures = &scene.fixtures;
let seq_30000_file_name = "seq_30000";
let seq_20000_file_name = "seq_20000";
let seq_1000_file_name = "seq_1000";
scene
.cmd("seq")
.arg("30000")
.set_stdout(fixtures.make_file(seq_30000_file_name))
.arg("20000")
.set_stdout(fixtures.make_file(seq_20000_file_name))
.succeeds();
scene
.cmd("seq")
@ -455,21 +463,246 @@ fn test_read_backwards_lines_large_file() {
// Now run our tests.
scene
.ucmd()
.args(&["-n", "-29000", "seq_30000"])
.args(&["-n", "-19000", seq_20000_file_name])
.succeeds()
.stdout_is_fixture("seq_1000");
.stdout_only_fixture("seq_1000");
scene
.ucmd()
.args(&["-n", "-30000", "seq_30000"])
.run()
.stdout_is_fixture("emptyfile.txt");
.args(&["-n", "-20000", seq_20000_file_name])
.succeeds()
.stdout_only_fixture("emptyfile.txt");
scene
.ucmd()
.args(&["-n", "-30001", "seq_30000"])
.run()
.stdout_is_fixture("emptyfile.txt");
.args(&["-n", "-20001", seq_20000_file_name])
.succeeds()
.stdout_only_fixture("emptyfile.txt");
}
#[cfg(all(
not(target_os = "windows"),
not(target_os = "macos"),
not(target_os = "android"),
not(target_os = "freebsd"),
not(target_os = "openbsd")
))]
#[test]
fn test_validate_stdin_offset_lines() {
// A handful of unix-only tests to validate behavior when reading from stdin on a seekable
// file. GNU-compatibility requires that the stdin file be left such that if another
// process is invoked on the same stdin file after head has run, the subsequent file should
// start reading from the byte after the last byte printed by head.
// Since this is unix-only requirement, keep this as a separate test rather than adding a
// conditionally-compiled segment to multiple tests.
//
// Test scenarios...
// 1 - Print the first n lines
// 2 - Print all-but the last n lines
// 3 - Print all but the last n lines, large file.
let scene = TestScenario::new(util_name!());
let fixtures = &scene.fixtures;
// Test 1 - Print the first n lines
fixtures.write("f1", "a\nb\nc\n");
let file = fixtures.open("f1");
let mut file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-n", "1"])
.set_stdin(file)
.succeeds()
.stdout_only("a\n");
let mut bytes_remaining_in_stdin = vec![];
assert_eq!(
file_shadow
.read_to_end(&mut bytes_remaining_in_stdin)
.unwrap(),
4
);
assert_eq!(
String::from_utf8(bytes_remaining_in_stdin).unwrap(),
"b\nc\n"
);
// Test 2 - Print all-but the last n lines
fixtures.write("f2", "a\nb\nc\n");
let file = fixtures.open("f2");
let mut file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-n", "-1"])
.set_stdin(file)
.succeeds()
.stdout_only("a\nb\n");
let mut bytes_remaining_in_stdin = vec![];
assert_eq!(
file_shadow
.read_to_end(&mut bytes_remaining_in_stdin)
.unwrap(),
2
);
assert_eq!(String::from_utf8(bytes_remaining_in_stdin).unwrap(), "c\n");
// Test 3 - Print all but the last n lines, large input file.
// First, create all our fixtures.
let seq_20000_file_name = "seq_20000";
let seq_1000_file_name = "seq_1000";
let seq_1001_20000_file_name = "seq_1001_20000";
scene
.cmd("seq")
.arg("20000")
.set_stdout(fixtures.make_file(seq_20000_file_name))
.succeeds();
scene
.cmd("seq")
.arg("1000")
.set_stdout(fixtures.make_file(seq_1000_file_name))
.succeeds();
scene
.cmd("seq")
.args(&["1001", "20000"])
.set_stdout(fixtures.make_file(seq_1001_20000_file_name))
.succeeds();
let file = fixtures.open(seq_20000_file_name);
let file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-n", "-19000"])
.set_stdin(file)
.succeeds()
.stdout_only_fixture(seq_1000_file_name);
scene
.cmd("cat")
.set_stdin(file_shadow)
.succeeds()
.stdout_only_fixture(seq_1001_20000_file_name);
}
#[cfg(all(
not(target_os = "windows"),
not(target_os = "macos"),
not(target_os = "android"),
not(target_os = "freebsd"),
not(target_os = "openbsd")
))]
#[test]
fn test_validate_stdin_offset_bytes() {
// A handful of unix-only tests to validate behavior when reading from stdin on a seekable
// file. GNU-compatibility requires that the stdin file be left such that if another
// process is invoked on the same stdin file after head has run, the subsequent file should
// start reading from the byte after the last byte printed by head.
// Since this is unix-only requirement, keep this as a separate test rather than adding a
// conditionally-compiled segment to multiple tests.
//
// Test scenarios...
// 1 - Print the first n bytes
// 2 - Print all-but the last n bytes
// 3 - Print all-but the last n bytes, with n=0 (i.e. print everything)
// 4 - Print all but the last n bytes, large file.
let scene = TestScenario::new(util_name!());
let fixtures = &scene.fixtures;
// Test 1 - Print the first n bytes
fixtures.write("f1", "abc\ndef\n");
let file = fixtures.open("f1");
let mut file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-c", "2"])
.set_stdin(file)
.succeeds()
.stdout_only("ab");
let mut bytes_remaining_in_stdin = vec![];
assert_eq!(
file_shadow
.read_to_end(&mut bytes_remaining_in_stdin)
.unwrap(),
6
);
assert_eq!(
String::from_utf8(bytes_remaining_in_stdin).unwrap(),
"c\ndef\n"
);
// Test 2 - Print all-but the last n bytes
fixtures.write("f2", "abc\ndef\n");
let file = fixtures.open("f2");
let mut file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-c", "-3"])
.set_stdin(file)
.succeeds()
.stdout_only("abc\nd");
let mut bytes_remaining_in_stdin = vec![];
assert_eq!(
file_shadow
.read_to_end(&mut bytes_remaining_in_stdin)
.unwrap(),
3
);
assert_eq!(String::from_utf8(bytes_remaining_in_stdin).unwrap(), "ef\n");
// Test 3 - Print all-but the last n bytes, n=0 (i.e. print everything)
fixtures.write("f3", "abc\ndef\n");
let file = fixtures.open("f3");
let mut file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-c", "-0"])
.set_stdin(file)
.succeeds()
.stdout_only("abc\ndef\n");
let mut bytes_remaining_in_stdin = vec![];
assert_eq!(
file_shadow
.read_to_end(&mut bytes_remaining_in_stdin)
.unwrap(),
0
);
assert_eq!(String::from_utf8(bytes_remaining_in_stdin).unwrap(), "");
// Test 4 - Print all but the last n bytes, large input file.
// First, create all our fixtures.
let seq_20000_file_name = "seq_20000";
let seq_19000_file_name = "seq_19000";
let seq_19001_20000_file_name = "seq_19001_20000";
scene
.cmd("seq")
.arg("20000")
.set_stdout(fixtures.make_file(seq_20000_file_name))
.succeeds();
scene
.cmd("seq")
.arg("19000")
.set_stdout(fixtures.make_file(seq_19000_file_name))
.succeeds();
scene
.cmd("seq")
.args(&["19001", "20000"])
.set_stdout(fixtures.make_file(seq_19001_20000_file_name))
.succeeds();
let file = fixtures.open(seq_20000_file_name);
let file_shadow = file.try_clone().unwrap();
let seq_19001_20000_file_length = fixtures
.open(seq_19001_20000_file_name)
.metadata()
.unwrap()
.len();
scene
.ucmd()
.args(&["-c", &format!("-{}", seq_19001_20000_file_length)])
.set_stdin(file)
.succeeds()
.stdout_only_fixture(seq_19000_file_name);
scene
.cmd("cat")
.set_stdin(file_shadow)
.succeeds()
.stdout_only_fixture(seq_19001_20000_file_name);
}
#[cfg(all(