1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge pull request #7253 from karlmcdowall/head_stdio_file

Head: ensure stdin input stream is correct on exit
This commit is contained in:
Dorian Péron 2025-02-28 17:05:06 +01:00 committed by GitHub
commit 76ad6042b5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 325 additions and 60 deletions

View file

@ -7,8 +7,12 @@
use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
use std::ffi::OsString; use std::ffi::OsString;
#[cfg(unix)]
use std::fs::File;
use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write}; use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
use std::num::TryFromIntError; use std::num::TryFromIntError;
#[cfg(unix)]
use std::os::fd::{AsRawFd, FromRawFd};
use thiserror::Error; use thiserror::Error;
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{FromIo, UError, UResult}; use uucore::error::{FromIo, UError, UResult};
@ -239,7 +243,7 @@ impl HeadOptions {
} }
} }
fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<()> { fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<u64> {
// Read the first `n` bytes from the `input` reader. // Read the first `n` bytes from the `input` reader.
let mut reader = input.take(n); let mut reader = input.take(n);
@ -247,31 +251,31 @@ fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<()> {
let stdout = std::io::stdout(); let stdout = std::io::stdout();
let mut stdout = stdout.lock(); let mut stdout = stdout.lock();
io::copy(&mut reader, &mut stdout)?; let bytes_written = io::copy(&mut reader, &mut stdout)?;
// Make sure we finish writing everything to the target before // Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any // exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored. // error will be silently ignored.
stdout.flush()?; stdout.flush()?;
Ok(()) Ok(bytes_written)
} }
fn read_n_lines(input: &mut impl std::io::BufRead, n: u64, separator: u8) -> std::io::Result<()> { fn read_n_lines(input: &mut impl std::io::BufRead, n: u64, separator: u8) -> std::io::Result<u64> {
// Read the first `n` lines from the `input` reader. // Read the first `n` lines from the `input` reader.
let mut reader = take_lines(input, n, separator); let mut reader = take_lines(input, n, separator);
// Write those bytes to `stdout`. // Write those bytes to `stdout`.
let mut stdout = std::io::stdout(); let mut stdout = std::io::stdout();
io::copy(&mut reader, &mut stdout)?; let bytes_written = io::copy(&mut reader, &mut stdout)?;
// Make sure we finish writing everything to the target before // Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any // exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored. // error will be silently ignored.
stdout.flush()?; stdout.flush()?;
Ok(()) Ok(bytes_written)
} }
fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize> { fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize> {
@ -284,7 +288,8 @@ fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize>
} }
} }
fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result<()> { fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result<u64> {
let mut bytes_written = 0;
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) { if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
let stdout = std::io::stdout(); let stdout = std::io::stdout();
let stdout = stdout.lock(); let stdout = stdout.lock();
@ -294,32 +299,36 @@ fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Resul
let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout); let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout);
for byte in take_all_but(input.bytes(), n) { for byte in take_all_but(input.bytes(), n) {
writer.write_all(&[byte?])?; writer.write_all(&[byte?])?;
bytes_written += 1;
} }
// Make sure we finish writing everything to the target before // Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any // exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored. // error will be silently ignored.
writer.flush()?; writer.flush()?;
} }
Ok(()) Ok(bytes_written)
} }
fn read_but_last_n_lines( fn read_but_last_n_lines(
input: impl std::io::BufRead, input: impl std::io::BufRead,
n: u64, n: u64,
separator: u8, separator: u8,
) -> std::io::Result<()> { ) -> std::io::Result<u64> {
let mut bytes_written: u64 = 0;
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) { if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
let stdout = std::io::stdout(); let stdout = std::io::stdout();
let mut stdout = stdout.lock(); let mut stdout = stdout.lock();
for bytes in take_all_but(lines(input, separator), n) { for bytes in take_all_but(lines(input, separator), n) {
stdout.write_all(&bytes?)?; let bytes = bytes?;
bytes_written += u64::try_from(bytes.len()).unwrap();
stdout.write_all(&bytes)?;
} }
// Make sure we finish writing everything to the target before // Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any // exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored. // error will be silently ignored.
stdout.flush()?; stdout.flush()?;
} }
Ok(()) Ok(bytes_written)
} }
/// Return the index in `input` just after the `n`th line from the end. /// Return the index in `input` just after the `n`th line from the end.
@ -400,45 +409,43 @@ fn is_seekable(input: &mut std::fs::File) -> bool {
&& input.seek(SeekFrom::Start(current_pos.unwrap())).is_ok() && input.seek(SeekFrom::Start(current_pos.unwrap())).is_ok()
} }
fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<u64> {
let st = input.metadata()?; let st = input.metadata()?;
let seekable = is_seekable(input); let seekable = is_seekable(input);
let blksize_limit = uucore::fs::sane_blksize::sane_blksize_from_metadata(&st); let blksize_limit = uucore::fs::sane_blksize::sane_blksize_from_metadata(&st);
if !seekable || st.len() <= blksize_limit { if !seekable || st.len() <= blksize_limit {
return head_backwards_without_seek_file(input, options); head_backwards_without_seek_file(input, options)
} } else {
head_backwards_on_seekable_file(input, options) head_backwards_on_seekable_file(input, options)
} }
}
fn head_backwards_without_seek_file( fn head_backwards_without_seek_file(
input: &mut std::fs::File, input: &mut std::fs::File,
options: &HeadOptions, options: &HeadOptions,
) -> std::io::Result<()> { ) -> std::io::Result<u64> {
let reader = std::io::BufReader::with_capacity(BUF_SIZE, &*input); let reader = std::io::BufReader::with_capacity(BUF_SIZE, &*input);
match options.mode { match options.mode {
Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n)?, Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n),
Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into())?, Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into()),
_ => unreachable!(), _ => unreachable!(),
} }
Ok(())
} }
fn head_backwards_on_seekable_file( fn head_backwards_on_seekable_file(
input: &mut std::fs::File, input: &mut std::fs::File,
options: &HeadOptions, options: &HeadOptions,
) -> std::io::Result<()> { ) -> std::io::Result<u64> {
match options.mode { match options.mode {
Mode::AllButLastBytes(n) => { Mode::AllButLastBytes(n) => {
let size = input.metadata()?.len(); let size = input.metadata()?.len();
if n >= size { if n >= size {
return Ok(()); Ok(0)
} else { } else {
read_n_bytes( read_n_bytes(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input), &mut std::io::BufReader::with_capacity(BUF_SIZE, input),
size - n, size - n,
)?; )
} }
} }
Mode::AllButLastLines(n) => { Mode::AllButLastLines(n) => {
@ -446,14 +453,13 @@ fn head_backwards_on_seekable_file(
read_n_bytes( read_n_bytes(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input), &mut std::io::BufReader::with_capacity(BUF_SIZE, input),
found, found,
)?; )
} }
_ => unreachable!(), _ => unreachable!(),
} }
Ok(())
} }
fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<u64> {
match options.mode { match options.mode {
Mode::FirstBytes(n) => { Mode::FirstBytes(n) => {
read_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n) read_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n)
@ -480,16 +486,41 @@ fn uu_head(options: &HeadOptions) -> UResult<()> {
println!("==> standard input <=="); println!("==> standard input <==");
} }
let stdin = std::io::stdin(); let stdin = std::io::stdin();
#[cfg(unix)]
{
let stdin_raw_fd = stdin.as_raw_fd();
let mut stdin_file = unsafe { File::from_raw_fd(stdin_raw_fd) };
let current_pos = stdin_file.stream_position();
if let Ok(current_pos) = current_pos {
// We have a seekable file. Ensure we set the input stream to the
// last byte read so that any tools that parse the remainder of
// the stdin stream read from the correct place.
let bytes_read = head_file(&mut stdin_file, options)?;
stdin_file.seek(SeekFrom::Start(current_pos + bytes_read))?;
} else {
let _bytes_read = head_file(&mut stdin_file, options)?;
}
}
#[cfg(not(unix))]
{
let mut stdin = stdin.lock(); let mut stdin = stdin.lock();
match options.mode { match options.mode {
Mode::FirstBytes(n) => read_n_bytes(&mut stdin, n), Mode::FirstBytes(n) => read_n_bytes(&mut stdin, n),
Mode::AllButLastBytes(n) => read_but_last_n_bytes(&mut stdin, n), Mode::AllButLastBytes(n) => read_but_last_n_bytes(&mut stdin, n),
Mode::FirstLines(n) => read_n_lines(&mut stdin, n, options.line_ending.into()), Mode::FirstLines(n) => {
read_n_lines(&mut stdin, n, options.line_ending.into())
}
Mode::AllButLastLines(n) => { Mode::AllButLastLines(n) => {
read_but_last_n_lines(&mut stdin, n, options.line_ending.into()) read_but_last_n_lines(&mut stdin, n, options.line_ending.into())
} }
}?;
} }
Ok(())
} }
(name, false) => { (name, false) => {
let mut file = match std::fs::File::open(name) { let mut file = match std::fs::File::open(name) {
@ -508,7 +539,8 @@ fn uu_head(options: &HeadOptions) -> UResult<()> {
} }
println!("==> {name} <=="); println!("==> {name} <==");
} }
head_file(&mut file, options) head_file(&mut file, options)?;
Ok(())
} }
}; };
if let Err(e) = res { if let Err(e) = res {

View file

@ -7,6 +7,14 @@
// spell-checker:ignore (words) seekable // spell-checker:ignore (words) seekable
use crate::common::util::TestScenario; use crate::common::util::TestScenario;
#[cfg(all(
not(target_os = "windows"),
not(target_os = "macos"),
not(target_os = "android"),
not(target_os = "freebsd"),
not(target_os = "openbsd")
))]
use std::io::Read;
static INPUT: &str = "lorem_ipsum.txt"; static INPUT: &str = "lorem_ipsum.txt";
@ -400,51 +408,51 @@ fn test_all_but_last_bytes_large_file_piped() {
let fixtures = &scene.fixtures; let fixtures = &scene.fixtures;
// First, create all our fixtures. // First, create all our fixtures.
let seq_30000_file_name = "seq_30000"; let seq_20000_file_name = "seq_20000";
let seq_29000_file_name = "seq_29000"; let seq_19000_file_name = "seq_19000";
let seq_29001_30000_file_name = "seq_29001_30000"; let seq_19001_20000_file_name = "seq_19001_20000";
scene scene
.cmd("seq") .cmd("seq")
.arg("30000") .arg("20000")
.set_stdout(fixtures.make_file(seq_30000_file_name)) .set_stdout(fixtures.make_file(seq_20000_file_name))
.succeeds(); .succeeds();
scene scene
.cmd("seq") .cmd("seq")
.arg("29000") .arg("19000")
.set_stdout(fixtures.make_file(seq_29000_file_name)) .set_stdout(fixtures.make_file(seq_19000_file_name))
.succeeds(); .succeeds();
scene scene
.cmd("seq") .cmd("seq")
.args(&["29001", "30000"]) .args(&["19001", "20000"])
.set_stdout(fixtures.make_file(seq_29001_30000_file_name)) .set_stdout(fixtures.make_file(seq_19001_20000_file_name))
.succeeds(); .succeeds();
let seq_29001_30000_file_length = fixtures let seq_19001_20000_file_length = fixtures
.open(seq_29001_30000_file_name) .open(seq_19001_20000_file_name)
.metadata() .metadata()
.unwrap() .unwrap()
.len(); .len();
scene scene
.ucmd() .ucmd()
.args(&["-c", &format!("-{}", seq_29001_30000_file_length)]) .args(&["-c", &format!("-{}", seq_19001_20000_file_length)])
.pipe_in_fixture(seq_30000_file_name) .pipe_in_fixture(seq_20000_file_name)
.succeeds() .succeeds()
.stdout_only_fixture(seq_29000_file_name); .stdout_only_fixture(seq_19000_file_name);
} }
#[test] #[test]
fn test_read_backwards_lines_large_file() { fn test_all_but_last_lines_large_file() {
// Create our fixtures on the fly. We need the input file to be at least double // Create our fixtures on the fly. We need the input file to be at least double
// the size of BUF_SIZE as specified in head.rs. Go for something a bit bigger // the size of BUF_SIZE as specified in head.rs. Go for something a bit bigger
// than that. // than that.
let scene = TestScenario::new(util_name!()); let scene = TestScenario::new(util_name!());
let fixtures = &scene.fixtures; let fixtures = &scene.fixtures;
let seq_30000_file_name = "seq_30000"; let seq_20000_file_name = "seq_20000";
let seq_1000_file_name = "seq_1000"; let seq_1000_file_name = "seq_1000";
scene scene
.cmd("seq") .cmd("seq")
.arg("30000") .arg("20000")
.set_stdout(fixtures.make_file(seq_30000_file_name)) .set_stdout(fixtures.make_file(seq_20000_file_name))
.succeeds(); .succeeds();
scene scene
.cmd("seq") .cmd("seq")
@ -455,21 +463,246 @@ fn test_read_backwards_lines_large_file() {
// Now run our tests. // Now run our tests.
scene scene
.ucmd() .ucmd()
.args(&["-n", "-29000", "seq_30000"]) .args(&["-n", "-19000", seq_20000_file_name])
.succeeds() .succeeds()
.stdout_is_fixture("seq_1000"); .stdout_only_fixture("seq_1000");
scene scene
.ucmd() .ucmd()
.args(&["-n", "-30000", "seq_30000"]) .args(&["-n", "-20000", seq_20000_file_name])
.run() .succeeds()
.stdout_is_fixture("emptyfile.txt"); .stdout_only_fixture("emptyfile.txt");
scene scene
.ucmd() .ucmd()
.args(&["-n", "-30001", "seq_30000"]) .args(&["-n", "-20001", seq_20000_file_name])
.run() .succeeds()
.stdout_is_fixture("emptyfile.txt"); .stdout_only_fixture("emptyfile.txt");
}
#[cfg(all(
not(target_os = "windows"),
not(target_os = "macos"),
not(target_os = "android"),
not(target_os = "freebsd"),
not(target_os = "openbsd")
))]
#[test]
fn test_validate_stdin_offset_lines() {
// A handful of unix-only tests to validate behavior when reading from stdin on a seekable
// file. GNU-compatibility requires that the stdin file be left such that if another
// process is invoked on the same stdin file after head has run, the subsequent file should
// start reading from the byte after the last byte printed by head.
// Since this is unix-only requirement, keep this as a separate test rather than adding a
// conditionally-compiled segment to multiple tests.
//
// Test scenarios...
// 1 - Print the first n lines
// 2 - Print all-but the last n lines
// 3 - Print all but the last n lines, large file.
let scene = TestScenario::new(util_name!());
let fixtures = &scene.fixtures;
// Test 1 - Print the first n lines
fixtures.write("f1", "a\nb\nc\n");
let file = fixtures.open("f1");
let mut file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-n", "1"])
.set_stdin(file)
.succeeds()
.stdout_only("a\n");
let mut bytes_remaining_in_stdin = vec![];
assert_eq!(
file_shadow
.read_to_end(&mut bytes_remaining_in_stdin)
.unwrap(),
4
);
assert_eq!(
String::from_utf8(bytes_remaining_in_stdin).unwrap(),
"b\nc\n"
);
// Test 2 - Print all-but the last n lines
fixtures.write("f2", "a\nb\nc\n");
let file = fixtures.open("f2");
let mut file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-n", "-1"])
.set_stdin(file)
.succeeds()
.stdout_only("a\nb\n");
let mut bytes_remaining_in_stdin = vec![];
assert_eq!(
file_shadow
.read_to_end(&mut bytes_remaining_in_stdin)
.unwrap(),
2
);
assert_eq!(String::from_utf8(bytes_remaining_in_stdin).unwrap(), "c\n");
// Test 3 - Print all but the last n lines, large input file.
// First, create all our fixtures.
let seq_20000_file_name = "seq_20000";
let seq_1000_file_name = "seq_1000";
let seq_1001_20000_file_name = "seq_1001_20000";
scene
.cmd("seq")
.arg("20000")
.set_stdout(fixtures.make_file(seq_20000_file_name))
.succeeds();
scene
.cmd("seq")
.arg("1000")
.set_stdout(fixtures.make_file(seq_1000_file_name))
.succeeds();
scene
.cmd("seq")
.args(&["1001", "20000"])
.set_stdout(fixtures.make_file(seq_1001_20000_file_name))
.succeeds();
let file = fixtures.open(seq_20000_file_name);
let file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-n", "-19000"])
.set_stdin(file)
.succeeds()
.stdout_only_fixture(seq_1000_file_name);
scene
.cmd("cat")
.set_stdin(file_shadow)
.succeeds()
.stdout_only_fixture(seq_1001_20000_file_name);
}
#[cfg(all(
not(target_os = "windows"),
not(target_os = "macos"),
not(target_os = "android"),
not(target_os = "freebsd"),
not(target_os = "openbsd")
))]
#[test]
fn test_validate_stdin_offset_bytes() {
// A handful of unix-only tests to validate behavior when reading from stdin on a seekable
// file. GNU-compatibility requires that the stdin file be left such that if another
// process is invoked on the same stdin file after head has run, the subsequent file should
// start reading from the byte after the last byte printed by head.
// Since this is unix-only requirement, keep this as a separate test rather than adding a
// conditionally-compiled segment to multiple tests.
//
// Test scenarios...
// 1 - Print the first n bytes
// 2 - Print all-but the last n bytes
// 3 - Print all-but the last n bytes, with n=0 (i.e. print everything)
// 4 - Print all but the last n bytes, large file.
let scene = TestScenario::new(util_name!());
let fixtures = &scene.fixtures;
// Test 1 - Print the first n bytes
fixtures.write("f1", "abc\ndef\n");
let file = fixtures.open("f1");
let mut file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-c", "2"])
.set_stdin(file)
.succeeds()
.stdout_only("ab");
let mut bytes_remaining_in_stdin = vec![];
assert_eq!(
file_shadow
.read_to_end(&mut bytes_remaining_in_stdin)
.unwrap(),
6
);
assert_eq!(
String::from_utf8(bytes_remaining_in_stdin).unwrap(),
"c\ndef\n"
);
// Test 2 - Print all-but the last n bytes
fixtures.write("f2", "abc\ndef\n");
let file = fixtures.open("f2");
let mut file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-c", "-3"])
.set_stdin(file)
.succeeds()
.stdout_only("abc\nd");
let mut bytes_remaining_in_stdin = vec![];
assert_eq!(
file_shadow
.read_to_end(&mut bytes_remaining_in_stdin)
.unwrap(),
3
);
assert_eq!(String::from_utf8(bytes_remaining_in_stdin).unwrap(), "ef\n");
// Test 3 - Print all-but the last n bytes, n=0 (i.e. print everything)
fixtures.write("f3", "abc\ndef\n");
let file = fixtures.open("f3");
let mut file_shadow = file.try_clone().unwrap();
scene
.ucmd()
.args(&["-c", "-0"])
.set_stdin(file)
.succeeds()
.stdout_only("abc\ndef\n");
let mut bytes_remaining_in_stdin = vec![];
assert_eq!(
file_shadow
.read_to_end(&mut bytes_remaining_in_stdin)
.unwrap(),
0
);
assert_eq!(String::from_utf8(bytes_remaining_in_stdin).unwrap(), "");
// Test 4 - Print all but the last n bytes, large input file.
// First, create all our fixtures.
let seq_20000_file_name = "seq_20000";
let seq_19000_file_name = "seq_19000";
let seq_19001_20000_file_name = "seq_19001_20000";
scene
.cmd("seq")
.arg("20000")
.set_stdout(fixtures.make_file(seq_20000_file_name))
.succeeds();
scene
.cmd("seq")
.arg("19000")
.set_stdout(fixtures.make_file(seq_19000_file_name))
.succeeds();
scene
.cmd("seq")
.args(&["19001", "20000"])
.set_stdout(fixtures.make_file(seq_19001_20000_file_name))
.succeeds();
let file = fixtures.open(seq_20000_file_name);
let file_shadow = file.try_clone().unwrap();
let seq_19001_20000_file_length = fixtures
.open(seq_19001_20000_file_name)
.metadata()
.unwrap()
.len();
scene
.ucmd()
.args(&["-c", &format!("-{}", seq_19001_20000_file_length)])
.set_stdin(file)
.succeeds()
.stdout_only_fixture(seq_19000_file_name);
scene
.cmd("cat")
.set_stdin(file_shadow)
.succeeds()
.stdout_only_fixture(seq_19001_20000_file_name);
} }
#[cfg(all( #[cfg(all(