diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index c7566ee0f..2b82a0107 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -7,8 +7,12 @@ use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::ffi::OsString; +#[cfg(unix)] +use std::fs::File; use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write}; use std::num::TryFromIntError; +#[cfg(unix)] +use std::os::fd::{AsRawFd, FromRawFd}; use thiserror::Error; use uucore::display::Quotable; use uucore::error::{FromIo, UError, UResult}; @@ -239,7 +243,7 @@ impl HeadOptions { } } -fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<()> { +fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result { // Read the first `n` bytes from the `input` reader. let mut reader = input.take(n); @@ -247,31 +251,31 @@ fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<()> { let stdout = std::io::stdout(); let mut stdout = stdout.lock(); - io::copy(&mut reader, &mut stdout)?; + let bytes_written = io::copy(&mut reader, &mut stdout)?; // Make sure we finish writing everything to the target before // exiting. Otherwise, when Rust is implicitly flushing, any // error will be silently ignored. stdout.flush()?; - Ok(()) + Ok(bytes_written) } -fn read_n_lines(input: &mut impl std::io::BufRead, n: u64, separator: u8) -> std::io::Result<()> { +fn read_n_lines(input: &mut impl std::io::BufRead, n: u64, separator: u8) -> std::io::Result { // Read the first `n` lines from the `input` reader. let mut reader = take_lines(input, n, separator); // Write those bytes to `stdout`. let mut stdout = std::io::stdout(); - io::copy(&mut reader, &mut stdout)?; + let bytes_written = io::copy(&mut reader, &mut stdout)?; // Make sure we finish writing everything to the target before // exiting. Otherwise, when Rust is implicitly flushing, any // error will be silently ignored. stdout.flush()?; - Ok(()) + Ok(bytes_written) } fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option { @@ -284,7 +288,8 @@ fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option } } -fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result<()> { +fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result { + let mut bytes_written = 0; if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) { let stdout = std::io::stdout(); let stdout = stdout.lock(); @@ -294,32 +299,36 @@ fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Resul let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout); for byte in take_all_but(input.bytes(), n) { writer.write_all(&[byte?])?; + bytes_written += 1; } // Make sure we finish writing everything to the target before // exiting. Otherwise, when Rust is implicitly flushing, any // error will be silently ignored. writer.flush()?; } - Ok(()) + Ok(bytes_written) } fn read_but_last_n_lines( input: impl std::io::BufRead, n: u64, separator: u8, -) -> std::io::Result<()> { +) -> std::io::Result { + let mut bytes_written: u64 = 0; if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) { let stdout = std::io::stdout(); let mut stdout = stdout.lock(); for bytes in take_all_but(lines(input, separator), n) { - stdout.write_all(&bytes?)?; + let bytes = bytes?; + bytes_written += u64::try_from(bytes.len()).unwrap(); + stdout.write_all(&bytes)?; } // Make sure we finish writing everything to the target before // exiting. Otherwise, when Rust is implicitly flushing, any // error will be silently ignored. stdout.flush()?; } - Ok(()) + Ok(bytes_written) } /// Return the index in `input` just after the `n`th line from the end. @@ -400,45 +409,43 @@ fn is_seekable(input: &mut std::fs::File) -> bool { && input.seek(SeekFrom::Start(current_pos.unwrap())).is_ok() } -fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { +fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result { let st = input.metadata()?; let seekable = is_seekable(input); let blksize_limit = uucore::fs::sane_blksize::sane_blksize_from_metadata(&st); if !seekable || st.len() <= blksize_limit { - return head_backwards_without_seek_file(input, options); + head_backwards_without_seek_file(input, options) + } else { + head_backwards_on_seekable_file(input, options) } - - head_backwards_on_seekable_file(input, options) } fn head_backwards_without_seek_file( input: &mut std::fs::File, options: &HeadOptions, -) -> std::io::Result<()> { +) -> std::io::Result { let reader = std::io::BufReader::with_capacity(BUF_SIZE, &*input); match options.mode { - Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n)?, - Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into())?, + Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n), + Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into()), _ => unreachable!(), } - - Ok(()) } fn head_backwards_on_seekable_file( input: &mut std::fs::File, options: &HeadOptions, -) -> std::io::Result<()> { +) -> std::io::Result { match options.mode { Mode::AllButLastBytes(n) => { let size = input.metadata()?.len(); if n >= size { - return Ok(()); + Ok(0) } else { read_n_bytes( &mut std::io::BufReader::with_capacity(BUF_SIZE, input), size - n, - )?; + ) } } Mode::AllButLastLines(n) => { @@ -446,14 +453,13 @@ fn head_backwards_on_seekable_file( read_n_bytes( &mut std::io::BufReader::with_capacity(BUF_SIZE, input), found, - )?; + ) } _ => unreachable!(), } - Ok(()) } -fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { +fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result { match options.mode { Mode::FirstBytes(n) => { read_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n) @@ -480,16 +486,41 @@ fn uu_head(options: &HeadOptions) -> UResult<()> { println!("==> standard input <=="); } let stdin = std::io::stdin(); - let mut stdin = stdin.lock(); - match options.mode { - Mode::FirstBytes(n) => read_n_bytes(&mut stdin, n), - Mode::AllButLastBytes(n) => read_but_last_n_bytes(&mut stdin, n), - Mode::FirstLines(n) => read_n_lines(&mut stdin, n, options.line_ending.into()), - Mode::AllButLastLines(n) => { - read_but_last_n_lines(&mut stdin, n, options.line_ending.into()) + #[cfg(unix)] + { + let stdin_raw_fd = stdin.as_raw_fd(); + let mut stdin_file = unsafe { File::from_raw_fd(stdin_raw_fd) }; + let current_pos = stdin_file.stream_position(); + if let Ok(current_pos) = current_pos { + // We have a seekable file. Ensure we set the input stream to the + // last byte read so that any tools that parse the remainder of + // the stdin stream read from the correct place. + + let bytes_read = head_file(&mut stdin_file, options)?; + stdin_file.seek(SeekFrom::Start(current_pos + bytes_read))?; + } else { + let _bytes_read = head_file(&mut stdin_file, options)?; } } + + #[cfg(not(unix))] + { + let mut stdin = stdin.lock(); + + match options.mode { + Mode::FirstBytes(n) => read_n_bytes(&mut stdin, n), + Mode::AllButLastBytes(n) => read_but_last_n_bytes(&mut stdin, n), + Mode::FirstLines(n) => { + read_n_lines(&mut stdin, n, options.line_ending.into()) + } + Mode::AllButLastLines(n) => { + read_but_last_n_lines(&mut stdin, n, options.line_ending.into()) + } + }?; + } + + Ok(()) } (name, false) => { let mut file = match std::fs::File::open(name) { @@ -508,7 +539,8 @@ fn uu_head(options: &HeadOptions) -> UResult<()> { } println!("==> {name} <=="); } - head_file(&mut file, options) + head_file(&mut file, options)?; + Ok(()) } }; if let Err(e) = res { diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 4e5f14935..d747f9271 100644 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -7,6 +7,14 @@ // spell-checker:ignore (words) seekable use crate::common::util::TestScenario; +#[cfg(all( + not(target_os = "windows"), + not(target_os = "macos"), + not(target_os = "android"), + not(target_os = "freebsd"), + not(target_os = "openbsd") +))] +use std::io::Read; static INPUT: &str = "lorem_ipsum.txt"; @@ -400,51 +408,51 @@ fn test_all_but_last_bytes_large_file_piped() { let fixtures = &scene.fixtures; // First, create all our fixtures. - let seq_30000_file_name = "seq_30000"; - let seq_29000_file_name = "seq_29000"; - let seq_29001_30000_file_name = "seq_29001_30000"; + let seq_20000_file_name = "seq_20000"; + let seq_19000_file_name = "seq_19000"; + let seq_19001_20000_file_name = "seq_19001_20000"; scene .cmd("seq") - .arg("30000") - .set_stdout(fixtures.make_file(seq_30000_file_name)) + .arg("20000") + .set_stdout(fixtures.make_file(seq_20000_file_name)) .succeeds(); scene .cmd("seq") - .arg("29000") - .set_stdout(fixtures.make_file(seq_29000_file_name)) + .arg("19000") + .set_stdout(fixtures.make_file(seq_19000_file_name)) .succeeds(); scene .cmd("seq") - .args(&["29001", "30000"]) - .set_stdout(fixtures.make_file(seq_29001_30000_file_name)) + .args(&["19001", "20000"]) + .set_stdout(fixtures.make_file(seq_19001_20000_file_name)) .succeeds(); - let seq_29001_30000_file_length = fixtures - .open(seq_29001_30000_file_name) + let seq_19001_20000_file_length = fixtures + .open(seq_19001_20000_file_name) .metadata() .unwrap() .len(); scene .ucmd() - .args(&["-c", &format!("-{}", seq_29001_30000_file_length)]) - .pipe_in_fixture(seq_30000_file_name) + .args(&["-c", &format!("-{}", seq_19001_20000_file_length)]) + .pipe_in_fixture(seq_20000_file_name) .succeeds() - .stdout_only_fixture(seq_29000_file_name); + .stdout_only_fixture(seq_19000_file_name); } #[test] -fn test_read_backwards_lines_large_file() { +fn test_all_but_last_lines_large_file() { // Create our fixtures on the fly. We need the input file to be at least double // the size of BUF_SIZE as specified in head.rs. Go for something a bit bigger // than that. let scene = TestScenario::new(util_name!()); let fixtures = &scene.fixtures; - let seq_30000_file_name = "seq_30000"; + let seq_20000_file_name = "seq_20000"; let seq_1000_file_name = "seq_1000"; scene .cmd("seq") - .arg("30000") - .set_stdout(fixtures.make_file(seq_30000_file_name)) + .arg("20000") + .set_stdout(fixtures.make_file(seq_20000_file_name)) .succeeds(); scene .cmd("seq") @@ -455,21 +463,246 @@ fn test_read_backwards_lines_large_file() { // Now run our tests. scene .ucmd() - .args(&["-n", "-29000", "seq_30000"]) + .args(&["-n", "-19000", seq_20000_file_name]) .succeeds() - .stdout_is_fixture("seq_1000"); + .stdout_only_fixture("seq_1000"); scene .ucmd() - .args(&["-n", "-30000", "seq_30000"]) - .run() - .stdout_is_fixture("emptyfile.txt"); + .args(&["-n", "-20000", seq_20000_file_name]) + .succeeds() + .stdout_only_fixture("emptyfile.txt"); scene .ucmd() - .args(&["-n", "-30001", "seq_30000"]) - .run() - .stdout_is_fixture("emptyfile.txt"); + .args(&["-n", "-20001", seq_20000_file_name]) + .succeeds() + .stdout_only_fixture("emptyfile.txt"); +} + +#[cfg(all( + not(target_os = "windows"), + not(target_os = "macos"), + not(target_os = "android"), + not(target_os = "freebsd"), + not(target_os = "openbsd") +))] +#[test] +fn test_validate_stdin_offset_lines() { + // A handful of unix-only tests to validate behavior when reading from stdin on a seekable + // file. GNU-compatibility requires that the stdin file be left such that if another + // process is invoked on the same stdin file after head has run, the subsequent file should + // start reading from the byte after the last byte printed by head. + // Since this is unix-only requirement, keep this as a separate test rather than adding a + // conditionally-compiled segment to multiple tests. + // + // Test scenarios... + // 1 - Print the first n lines + // 2 - Print all-but the last n lines + // 3 - Print all but the last n lines, large file. + let scene = TestScenario::new(util_name!()); + let fixtures = &scene.fixtures; + + // Test 1 - Print the first n lines + fixtures.write("f1", "a\nb\nc\n"); + let file = fixtures.open("f1"); + let mut file_shadow = file.try_clone().unwrap(); + scene + .ucmd() + .args(&["-n", "1"]) + .set_stdin(file) + .succeeds() + .stdout_only("a\n"); + let mut bytes_remaining_in_stdin = vec![]; + assert_eq!( + file_shadow + .read_to_end(&mut bytes_remaining_in_stdin) + .unwrap(), + 4 + ); + assert_eq!( + String::from_utf8(bytes_remaining_in_stdin).unwrap(), + "b\nc\n" + ); + + // Test 2 - Print all-but the last n lines + fixtures.write("f2", "a\nb\nc\n"); + let file = fixtures.open("f2"); + let mut file_shadow = file.try_clone().unwrap(); + scene + .ucmd() + .args(&["-n", "-1"]) + .set_stdin(file) + .succeeds() + .stdout_only("a\nb\n"); + let mut bytes_remaining_in_stdin = vec![]; + assert_eq!( + file_shadow + .read_to_end(&mut bytes_remaining_in_stdin) + .unwrap(), + 2 + ); + assert_eq!(String::from_utf8(bytes_remaining_in_stdin).unwrap(), "c\n"); + + // Test 3 - Print all but the last n lines, large input file. + // First, create all our fixtures. + let seq_20000_file_name = "seq_20000"; + let seq_1000_file_name = "seq_1000"; + let seq_1001_20000_file_name = "seq_1001_20000"; + scene + .cmd("seq") + .arg("20000") + .set_stdout(fixtures.make_file(seq_20000_file_name)) + .succeeds(); + scene + .cmd("seq") + .arg("1000") + .set_stdout(fixtures.make_file(seq_1000_file_name)) + .succeeds(); + scene + .cmd("seq") + .args(&["1001", "20000"]) + .set_stdout(fixtures.make_file(seq_1001_20000_file_name)) + .succeeds(); + + let file = fixtures.open(seq_20000_file_name); + let file_shadow = file.try_clone().unwrap(); + scene + .ucmd() + .args(&["-n", "-19000"]) + .set_stdin(file) + .succeeds() + .stdout_only_fixture(seq_1000_file_name); + scene + .cmd("cat") + .set_stdin(file_shadow) + .succeeds() + .stdout_only_fixture(seq_1001_20000_file_name); +} + +#[cfg(all( + not(target_os = "windows"), + not(target_os = "macos"), + not(target_os = "android"), + not(target_os = "freebsd"), + not(target_os = "openbsd") +))] +#[test] +fn test_validate_stdin_offset_bytes() { + // A handful of unix-only tests to validate behavior when reading from stdin on a seekable + // file. GNU-compatibility requires that the stdin file be left such that if another + // process is invoked on the same stdin file after head has run, the subsequent file should + // start reading from the byte after the last byte printed by head. + // Since this is unix-only requirement, keep this as a separate test rather than adding a + // conditionally-compiled segment to multiple tests. + // + // Test scenarios... + // 1 - Print the first n bytes + // 2 - Print all-but the last n bytes + // 3 - Print all-but the last n bytes, with n=0 (i.e. print everything) + // 4 - Print all but the last n bytes, large file. + let scene = TestScenario::new(util_name!()); + let fixtures = &scene.fixtures; + + // Test 1 - Print the first n bytes + fixtures.write("f1", "abc\ndef\n"); + let file = fixtures.open("f1"); + let mut file_shadow = file.try_clone().unwrap(); + scene + .ucmd() + .args(&["-c", "2"]) + .set_stdin(file) + .succeeds() + .stdout_only("ab"); + let mut bytes_remaining_in_stdin = vec![]; + assert_eq!( + file_shadow + .read_to_end(&mut bytes_remaining_in_stdin) + .unwrap(), + 6 + ); + assert_eq!( + String::from_utf8(bytes_remaining_in_stdin).unwrap(), + "c\ndef\n" + ); + + // Test 2 - Print all-but the last n bytes + fixtures.write("f2", "abc\ndef\n"); + let file = fixtures.open("f2"); + let mut file_shadow = file.try_clone().unwrap(); + scene + .ucmd() + .args(&["-c", "-3"]) + .set_stdin(file) + .succeeds() + .stdout_only("abc\nd"); + let mut bytes_remaining_in_stdin = vec![]; + assert_eq!( + file_shadow + .read_to_end(&mut bytes_remaining_in_stdin) + .unwrap(), + 3 + ); + assert_eq!(String::from_utf8(bytes_remaining_in_stdin).unwrap(), "ef\n"); + + // Test 3 - Print all-but the last n bytes, n=0 (i.e. print everything) + fixtures.write("f3", "abc\ndef\n"); + let file = fixtures.open("f3"); + let mut file_shadow = file.try_clone().unwrap(); + scene + .ucmd() + .args(&["-c", "-0"]) + .set_stdin(file) + .succeeds() + .stdout_only("abc\ndef\n"); + let mut bytes_remaining_in_stdin = vec![]; + assert_eq!( + file_shadow + .read_to_end(&mut bytes_remaining_in_stdin) + .unwrap(), + 0 + ); + assert_eq!(String::from_utf8(bytes_remaining_in_stdin).unwrap(), ""); + + // Test 4 - Print all but the last n bytes, large input file. + // First, create all our fixtures. + let seq_20000_file_name = "seq_20000"; + let seq_19000_file_name = "seq_19000"; + let seq_19001_20000_file_name = "seq_19001_20000"; + scene + .cmd("seq") + .arg("20000") + .set_stdout(fixtures.make_file(seq_20000_file_name)) + .succeeds(); + scene + .cmd("seq") + .arg("19000") + .set_stdout(fixtures.make_file(seq_19000_file_name)) + .succeeds(); + scene + .cmd("seq") + .args(&["19001", "20000"]) + .set_stdout(fixtures.make_file(seq_19001_20000_file_name)) + .succeeds(); + + let file = fixtures.open(seq_20000_file_name); + let file_shadow = file.try_clone().unwrap(); + let seq_19001_20000_file_length = fixtures + .open(seq_19001_20000_file_name) + .metadata() + .unwrap() + .len(); + scene + .ucmd() + .args(&["-c", &format!("-{}", seq_19001_20000_file_length)]) + .set_stdin(file) + .succeeds() + .stdout_only_fixture(seq_19000_file_name); + scene + .cmd("cat") + .set_stdin(file_shadow) + .succeeds() + .stdout_only_fixture(seq_19001_20000_file_name); } #[cfg(all(