od: Ensure stdin is left in the correct state

Fixes issue #7666 For `od` utility, if client specifies `-N` maximum bytes to be read then ensure stdin is left pointing to the next byte when `od` exits. To do this... - Bypass standard buffering on stdin. - Instantiate BufReader further up the stack to maintain performance.
2025-09-13 10:37:58 +00:00 · 2025-04-17 19:18:53 -06:00 · 2025-04-17 19:18:53 -06:00 · b692fad45d
commit b692fad45d
parent 349e56897c
3 changed files with 76 additions and 10 deletions
--- a/src/uu/od/src/multifilereader.rs
+++ b/src/uu/od/src/multifilereader.rs
@ -5,7 +5,9 @@
 // spell-checker:ignore (ToDO) multifile curr fnames fname xfrd fillloop mockstream
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;
 #[cfg(unix)]
 use std::os::fd::{AsRawFd, FromRawFd};
 use uucore::display::Quotable;
 use uucore::show_error;
@ -48,13 +50,36 @@ impl MultifileReader<'_> {
            }
            match self.ni.remove(0) {
                InputSource::Stdin => {
-                    self.curr_file = Some(Box::new(BufReader::new(io::stdin())));
+                    // In order to pass GNU compatibility tests, when the client passes in the
                    // `-N` flag we must not read any bytes beyond that limit. As such, we need
                    // to disable the default buffering for stdin below.
                    // For performance reasons we do still do buffered reads from stdin, but
                    // the buffering is done elsewhere and in a way that is aware of the `-N`
                    // limit.
                    let stdin = io::stdin();
                    #[cfg(unix)]
                    {
                        let stdin_raw_fd = stdin.as_raw_fd();
                        let stdin_file = unsafe { File::from_raw_fd(stdin_raw_fd) };
                        self.curr_file = Some(Box::new(stdin_file));
                    }
                    // For non-unix platforms we don't have GNU compatibility requirements, so
                    // we don't need to prevent stdin buffering. This is sub-optimal (since
                    // there will still be additional buffering further up the stack), but
                    // doesn't seem worth worrying about at this time.
                    #[cfg(not(unix))]
                    {
                        self.curr_file = Some(Box::new(stdin));
                    }
                    break;
                }
                InputSource::FileName(fname) => {
                    match File::open(fname) {
                        Ok(f) => {
-                            self.curr_file = Some(Box::new(BufReader::new(f)));
+                            // No need to wrap `f` in a BufReader - buffered reading is taken care
                            // of elsewhere.
                            self.curr_file = Some(Box::new(f));
                            break;
                        }
                        Err(e) => {
--- a/src/uu/od/src/od.rs
+++ b/src/uu/od/src/od.rs
@ -26,6 +26,7 @@ mod prn_int;
 use std::cmp;
 use std::fmt::Write;
 use std::io::BufReader;
 use crate::byteorder_io::ByteOrder;
 use crate::formatteriteminfo::FormatWriter;
@ -603,7 +604,7 @@ fn open_input_peek_reader(
    input_strings: &[String],
    skip_bytes: u64,
    read_bytes: Option<u64>,
-) -> PeekReader<PartialReader<MultifileReader>> {
+) -> PeekReader<BufReader<PartialReader<MultifileReader>>> {
    // should return  "impl PeekRead + Read + HasError" when supported in (stable) rust
    let inputs = input_strings
        .iter()
@ -615,7 +616,18 @@ fn open_input_peek_reader(
    let mf = MultifileReader::new(inputs);
    let pr = PartialReader::new(mf, skip_bytes, read_bytes);
-    PeekReader::new(pr)
+    // Add a BufReader over the top of the PartialReader. This will have the
    // effect of generating buffered reads to files/stdin, but since these reads
    // go through MultifileReader (which limits the maximum number of bytes read)
    // we won't ever read more bytes than were specified with the `-N` flag.
    let buf_pr = BufReader::new(pr);
    PeekReader::new(buf_pr)
 }
 impl<R: HasError> HasError for BufReader<R> {
    fn has_error(&self) -> bool {
        self.get_ref().has_error()
    }
 }
 fn format_error_message(error: &ParseSizeError, s: &str, option: &str) -> String {
--- a/tests/by-util/test_od.rs
+++ b/tests/by-util/test_od.rs
@ -5,6 +5,9 @@
 // spell-checker:ignore abcdefghijklmnopqrstuvwxyz Anone
 #[cfg(unix)]
 use std::io::Read;
 use unindent::unindent;
 use uutests::at_and_ucmd;
 use uutests::new_ucmd;
@ -660,14 +663,40 @@ fn test_skip_bytes_error() {
 #[test]
 fn test_read_bytes() {
    let scene = TestScenario::new(util_name!());
    let fixtures = &scene.fixtures;
    let input = "abcdefghijklmnopqrstuvwxyz\n12345678";
-    new_ucmd!()
+
    fixtures.write("f1", input);
    let file = fixtures.open("f1");
    #[cfg(unix)]
    let mut file_shadow = file.try_clone().unwrap();
    scene
        .ucmd()
        .arg("--endian=little")
        .arg("--read-bytes=27")
-        .run_piped_stdin(input.as_bytes())
+        .set_stdin(file)
-        .no_stderr()
+        .succeeds()
-        .success()
+        .stdout_only(unindent(ALPHA_OUT));
-        .stdout_is(unindent(ALPHA_OUT));
+
    // On unix platforms, confirm that only 27 bytes and strictly no more were read from stdin.
    // Leaving stdin in the correct state is required for GNU compatibility.
    #[cfg(unix)]
    {
        // skip(27) to skip the 27 bytes that should have been consumed with the
        // --read-bytes flag.
        let expected_bytes_remaining_in_stdin: Vec<_> = input.bytes().skip(27).collect();
        let mut bytes_remaining_in_stdin = vec![];
        assert_eq!(
            file_shadow
                .read_to_end(&mut bytes_remaining_in_stdin)
                .unwrap(),
            expected_bytes_remaining_in_stdin.len()
        );
        assert_eq!(expected_bytes_remaining_in_stdin, bytes_remaining_in_stdin);
    }
 }
 #[test]