od: Ensure stdin is left in the correct state

Fixes issue #7666 For `od` utility, if client specifies `-N` maximum bytes to be read then ensure stdin is left pointing to the next byte when `od` exits. To do this... - Bypass standard buffering on stdin. - Instantiate BufReader further up the stack to maintain performance.
2025-09-13 02:27:56 +00:00 · 2025-04-17 19:18:53 -06:00 · 2025-04-17 19:18:53 -06:00 · b692fad45d
commit b692fad45d
parent 349e56897c
3 changed files with 76 additions and 10 deletions
--- a/src/uu/od/src/multifilereader.rs
+++ b/src/uu/od/src/multifilereader.rs
@ -5,7 +5,9 @@
 // spell-checker:ignore (ToDO) multifile curr fnames fname xfrd fillloop mockstream

 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;
+#[cfg(unix)]
+use std::os::fd::{AsRawFd, FromRawFd};

 use uucore::display::Quotable;
 use uucore::show_error;
@ -48,13 +50,36 @@ impl MultifileReader<'_> {
            }
            match self.ni.remove(0) {
                InputSource::Stdin => {
-                    self.curr_file = Some(Box::new(BufReader::new(io::stdin())));
+                    // In order to pass GNU compatibility tests, when the client passes in the
+                    // `-N` flag we must not read any bytes beyond that limit. As such, we need
+                    // to disable the default buffering for stdin below.
+                    // For performance reasons we do still do buffered reads from stdin, but
+                    // the buffering is done elsewhere and in a way that is aware of the `-N`
+                    // limit.
+                    let stdin = io::stdin();
+                    #[cfg(unix)]
+                    {
+                        let stdin_raw_fd = stdin.as_raw_fd();
+                        let stdin_file = unsafe { File::from_raw_fd(stdin_raw_fd) };
+                        self.curr_file = Some(Box::new(stdin_file));
+                    }
+
+                    // For non-unix platforms we don't have GNU compatibility requirements, so
+                    // we don't need to prevent stdin buffering. This is sub-optimal (since
+                    // there will still be additional buffering further up the stack), but
+                    // doesn't seem worth worrying about at this time.
+                    #[cfg(not(unix))]
+                    {
+                        self.curr_file = Some(Box::new(stdin));
+                    }
                    break;
                }
                InputSource::FileName(fname) => {
                    match File::open(fname) {
                        Ok(f) => {
-                            self.curr_file = Some(Box::new(BufReader::new(f)));
+                            // No need to wrap `f` in a BufReader - buffered reading is taken care
+                            // of elsewhere.
+                            self.curr_file = Some(Box::new(f));
                            break;
                        }
                        Err(e) => {
--- a/src/uu/od/src/od.rs
+++ b/src/uu/od/src/od.rs
@ -26,6 +26,7 @@ mod prn_int;

 use std::cmp;
 use std::fmt::Write;
+use std::io::BufReader;

 use crate::byteorder_io::ByteOrder;
 use crate::formatteriteminfo::FormatWriter;
@ -603,7 +604,7 @@ fn open_input_peek_reader(
    input_strings: &[String],
    skip_bytes: u64,
    read_bytes: Option<u64>,
-) -> PeekReader<PartialReader<MultifileReader>> {
+) -> PeekReader<BufReader<PartialReader<MultifileReader>>> {
    // should return  "impl PeekRead + Read + HasError" when supported in (stable) rust
    let inputs = input_strings
        .iter()
@ -615,7 +616,18 @@ fn open_input_peek_reader(

    let mf = MultifileReader::new(inputs);
    let pr = PartialReader::new(mf, skip_bytes, read_bytes);
-    PeekReader::new(pr)
+    // Add a BufReader over the top of the PartialReader. This will have the
+    // effect of generating buffered reads to files/stdin, but since these reads
+    // go through MultifileReader (which limits the maximum number of bytes read)
+    // we won't ever read more bytes than were specified with the `-N` flag.
+    let buf_pr = BufReader::new(pr);
+    PeekReader::new(buf_pr)
+}
+
+impl<R: HasError> HasError for BufReader<R> {
+    fn has_error(&self) -> bool {
+        self.get_ref().has_error()
+    }
 }

 fn format_error_message(error: &ParseSizeError, s: &str, option: &str) -> String {
--- a/tests/by-util/test_od.rs
+++ b/tests/by-util/test_od.rs
@ -5,6 +5,9 @@

 // spell-checker:ignore abcdefghijklmnopqrstuvwxyz Anone

+#[cfg(unix)]
+use std::io::Read;
+
 use unindent::unindent;
 use uutests::at_and_ucmd;
 use uutests::new_ucmd;
@ -660,14 +663,40 @@ fn test_skip_bytes_error() {

 #[test]
 fn test_read_bytes() {
+    let scene = TestScenario::new(util_name!());
+    let fixtures = &scene.fixtures;
+
    let input = "abcdefghijklmnopqrstuvwxyz\n12345678";
-    new_ucmd!()
+
+    fixtures.write("f1", input);
+    let file = fixtures.open("f1");
+    #[cfg(unix)]
+    let mut file_shadow = file.try_clone().unwrap();
+
+    scene
+        .ucmd()
        .arg("--endian=little")
        .arg("--read-bytes=27")
-        .run_piped_stdin(input.as_bytes())
-        .no_stderr()
-        .success()
-        .stdout_is(unindent(ALPHA_OUT));
+        .set_stdin(file)
+        .succeeds()
+        .stdout_only(unindent(ALPHA_OUT));
+
+    // On unix platforms, confirm that only 27 bytes and strictly no more were read from stdin.
+    // Leaving stdin in the correct state is required for GNU compatibility.
+    #[cfg(unix)]
+    {
+        // skip(27) to skip the 27 bytes that should have been consumed with the
+        // --read-bytes flag.
+        let expected_bytes_remaining_in_stdin: Vec<_> = input.bytes().skip(27).collect();
+        let mut bytes_remaining_in_stdin = vec![];
+        assert_eq!(
+            file_shadow
+                .read_to_end(&mut bytes_remaining_in_stdin)
+                .unwrap(),
+            expected_bytes_remaining_in_stdin.len()
+        );
+        assert_eq!(expected_bytes_remaining_in_stdin, bytes_remaining_in_stdin);
+    }
 }

 #[test]