From 2658f8ae5badb3a5e55968ec396a29c891e0c795 Mon Sep 17 00:00:00 2001
From: Joining7943 <111500881+Joining7943@users.noreply.github.com>
Date: Fri, 9 Sep 2022 13:50:59 +0200
Subject: [PATCH] tail: improve performance of piped stdin

Rewrite handling of stdin when it is piped and read input in chunks.

Fixes https://github.com/uutils/coreutils/issues/3842
---
 Cargo.lock                 |   1 +
 src/uu/tail/Cargo.toml     |   1 +
 src/uu/tail/src/chunks.rs  | 618 ++++++++++++++++++++++++++++++-
 src/uu/tail/src/tail.rs    | 119 +++---
 tests/by-util/test_tail.rs | 733 ++++++++++++++++++++++++++++++++++++-
 tests/common/mod.rs        |   1 +
 tests/common/random.rs     | 314 ++++++++++++++++
 7 files changed, 1704 insertions(+), 83 deletions(-)
 create mode 100644 tests/common/random.rs

diff --git a/Cargo.lock b/Cargo.lock
index d0bf1c93c..25f8c6aa6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2871,6 +2871,7 @@ version = "0.0.15"
 dependencies = [
  "clap",
  "libc",
+ "memchr",
  "nix",
  "notify",
  "same-file",
diff --git a/src/uu/tail/Cargo.toml b/src/uu/tail/Cargo.toml
index 8e12beafa..dd5747cc4 100644
--- a/src/uu/tail/Cargo.toml
+++ b/src/uu/tail/Cargo.toml
@@ -18,6 +18,7 @@ path = "src/tail.rs"
 [dependencies]
 clap = { version = "3.2", features = ["wrap_help", "cargo"] }
 libc = "0.2.132"
+memchr = "2.5.0"
 notify = { version = "=5.0.0-pre.16", features=["macos_kqueue"]}
 uucore = { version=">=0.0.15", package="uucore", path="../../uucore", features=["ringbuffer", "lines"] }
 same-file = "1.0.6"
diff --git a/src/uu/tail/src/chunks.rs b/src/uu/tail/src/chunks.rs
index 0ba64540a..8fb53c769 100644
--- a/src/uu/tail/src/chunks.rs
+++ b/src/uu/tail/src/chunks.rs
@@ -1,14 +1,29 @@
-//! Iterating over a file by chunks, starting at the end of the file.
+//  * This file is part of the uutils coreutils package.
+//  *
+//  * For the full copyright and license information, please view the LICENSE
+//  * file that was distributed with this source code.
+
+//! Iterating over a file by chunks, either starting at the end of the file with [`ReverseChunks`]
+//! or at the end of piped stdin with [`LinesChunk`] or [`BytesChunk`].
 //!
-//! Use [`ReverseChunks::new`] to create a new iterator over chunks of
-//! bytes from the file.
+//! Use [`ReverseChunks::new`] to create a new iterator over chunks of bytes from the file.
+// spell-checker:ignore (ToDO) filehandle BUFSIZ
+use std::collections::VecDeque;
 use std::fs::File;
-use std::io::{Read, Seek, SeekFrom};
+use std::io::{BufReader, Read, Seek, SeekFrom, Write};
+use uucore::error::UResult;
 
 /// When reading files in reverse in `bounded_tail`, this is the size of each
 /// block read at a time.
 pub const BLOCK_SIZE: u64 = 1 << 16;
 
+/// The size of the backing buffer of a LinesChunk or BytesChunk in bytes. The value of BUFFER_SIZE
+/// originates from the BUFSIZ constant in stdio.h and the libc crate to make stream IO efficient.
+/// In the latter the value is constantly set to 8192 on all platforms, where the value in stdio.h
+/// is determined on each platform differently. Since libc chose 8192 as a reasonable default the
+/// value here is set to this value, too.
+pub const BUFFER_SIZE: usize = 8192;
+
 /// An iterator over a file in non-overlapping chunks from the end of the file.
 ///
 /// Each chunk is a [`Vec`]<[`u8`]> of size [`BLOCK_SIZE`] (except
@@ -86,3 +101,598 @@ impl<'a> Iterator for ReverseChunks<'a> {
         Some(buf[0..(block_size as usize)].to_vec())
     }
 }
+
+/// The type of the backing buffer of [`BytesChunk`] and [`LinesChunk`] which can hold
+/// [`BUFFER_SIZE`] elements at max.
+type ChunkBuffer = [u8; BUFFER_SIZE];
+
+/// A [`BytesChunk`] storing a fixed size number of bytes in a buffer.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct BytesChunk {
+    /// The [`ChunkBuffer`], an array storing the bytes, for example filled by
+    /// [`BytesChunk::fill`]
+    buffer: ChunkBuffer,
+
+    /// Stores the number of bytes, this buffer holds. This is not equal to buffer.len(), since the
+    /// [`BytesChunk`] may store less bytes than the internal buffer can hold. In addition
+    /// [`BytesChunk`] may be reused, what makes it necessary to track the number of stored bytes.
+    /// The choice of usize is sufficient here, since the number of bytes max value is
+    /// [`BUFFER_SIZE`], which is a usize.
+    bytes: usize,
+}
+
+impl BytesChunk {
+    #[allow(clippy::new_without_default)]
+    pub fn new() -> Self {
+        Self {
+            buffer: [0; BUFFER_SIZE],
+            bytes: 0,
+        }
+    }
+
+    /// Create a new chunk from an existing chunk. The new chunk's buffer will be copied from the
+    /// old chunk's buffer, copying the slice `[offset..old_chunk.bytes]` into the new chunk's
+    /// buffer but starting at 0 instead of offset. If the offset is larger or equal to
+    /// `chunk.lines` then a new empty `BytesChunk` is returned.
+    ///
+    /// # Arguments
+    ///
+    /// * `chunk`: The chunk to create a new `BytesChunk` chunk from
+    /// * `offset`: Start to copy the old chunk's buffer from this position. May not be larger
+    ///             than `chunk.bytes`.
+    ///
+    /// # Examples
+    ///
+    /// ```rust,ignore
+    /// let mut chunk = BytesChunk::new();
+    /// chunk.buffer[1] = 1;
+    /// chunk.bytes = 2;
+    /// let new_chunk = BytesChunk::from_chunk(&chunk, 0);
+    /// assert_eq!(2, new_chunk.get_buffer().len());
+    /// assert_eq!(&[0, 1], new_chunk.get_buffer());
+    ///
+    /// let new_chunk = BytesChunk::from_chunk(&chunk, 1);
+    /// assert_eq!(1, new_chunk.get_buffer().len());
+    /// assert_eq!(&[1], new_chunk.get_buffer());
+    /// ```
+    fn from_chunk(chunk: &Self, offset: usize) -> Self {
+        if offset >= chunk.bytes {
+            return Self::new();
+        }
+
+        let mut buffer: ChunkBuffer = [0; BUFFER_SIZE];
+        let slice = chunk.get_buffer_with(offset);
+        buffer[..slice.len()].copy_from_slice(slice);
+        Self {
+            buffer,
+            bytes: chunk.bytes - offset,
+        }
+    }
+
+    /// Receive the internal buffer safely, so it returns a slice only containing as many bytes as
+    /// large the `self.bytes` value is.
+    ///
+    /// returns: a slice containing the bytes of the internal buffer from `[0..self.bytes]`
+    ///
+    /// # Examples
+    ///
+    /// ```rust,ignore
+    /// let mut chunk = BytesChunk::new();
+    /// chunk.bytes = 1;
+    /// assert_eq!(&[0], chunk.get_buffer());
+    /// ```
+    pub fn get_buffer(&self) -> &[u8] {
+        &self.buffer[..self.bytes]
+    }
+
+    /// Like [`BytesChunk::get_buffer`], but returning a slice from `[offset.self.bytes]`.
+    ///
+    /// returns: a slice containing the bytes of the internal buffer from `[offset..self.bytes]`
+    ///
+    /// # Examples
+    ///
+    /// ```rust,ignore
+    /// let mut chunk = BytesChunk::new();
+    /// chunk.bytes = 2;
+    /// assert_eq!(&[0], chunk.get_buffer_with(1));
+    /// ```
+    pub fn get_buffer_with(&self, offset: usize) -> &[u8] {
+        &self.buffer[offset..self.bytes]
+    }
+
+    pub fn has_data(&self) -> bool {
+        self.bytes > 0
+    }
+
+    /// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, draining the reader by
+    /// that number of bytes. If EOF is reached (so 0 bytes are read), then returns
+    /// [`UResult<None>`] or else the result with [`Some(bytes)`] where bytes is the number of bytes
+    /// read from the source.
+    pub fn fill(&mut self, filehandle: &mut BufReader<impl Read>) -> UResult<Option<usize>> {
+        let num_bytes = filehandle.read(&mut self.buffer)?;
+        self.bytes = num_bytes;
+        if num_bytes == 0 {
+            return Ok(None);
+        }
+
+        Ok(Some(self.bytes))
+    }
+}
+
+/// An abstraction layer on top of [`BytesChunk`] mainly to simplify filling only the needed amount
+/// of chunks. See also [`Self::fill`].
+pub struct BytesChunkBuffer {
+    /// The number of bytes to print
+    num_print: u64,
+    /// The current number of bytes summed over all stored chunks in [`Self::chunks`]. Use u64 here
+    /// to support files > 4GB on 32-bit systems. Note, this differs from `BytesChunk::bytes` which
+    /// is a usize. The choice of u64 is based on `tail::FilterMode::Bytes`.
+    bytes: u64,
+    /// The buffer to store [`BytesChunk`] in
+    chunks: VecDeque<Box<BytesChunk>>,
+}
+
+impl BytesChunkBuffer {
+    /// Creates a new [`BytesChunkBuffer`].
+    ///
+    /// # Arguments
+    ///
+    /// * `num_print`: The number of bytes to print
+    ///
+    /// # Examples
+    ///
+    /// ```rust,ignore
+    /// let mut chunk = BytesChunk::new();
+    /// chunk.buffer[1] = 1;
+    /// chunk.bytes = 2;
+    /// let new_chunk = BytesChunk::from_chunk(&chunk, 0);
+    /// assert_eq!(2, new_chunk.get_buffer().len());
+    /// assert_eq!(&[0, 1], new_chunk.get_buffer());
+    ///
+    /// let new_chunk = BytesChunk::from_chunk(&chunk, 1);
+    /// assert_eq!(1, new_chunk.get_buffer().len());
+    /// assert_eq!(&[1], new_chunk.get_buffer());
+    /// ```
+    pub fn new(num_print: u64) -> Self {
+        Self {
+            bytes: 0,
+            num_print,
+            chunks: VecDeque::new(),
+        }
+    }
+
+    /// Fills this buffer with chunks and consumes the reader completely. This method ensures that
+    /// there are exactly as many chunks as needed to match `self.num_print` bytes, so there are
+    /// in sum exactly `self.num_print` bytes stored in all chunks. The method returns an iterator
+    /// over these chunks. If there are no chunks, for example because the piped stdin contained no
+    /// bytes, or `num_print = 0` then `iterator.next` returns None.
+    ///
+    /// # Examples
+    ///
+    /// ```rust,ignore
+    /// use crate::chunks::BytesChunkBuffer;
+    /// use std::io::{BufReader, Cursor};
+    ///
+    /// let mut reader = BufReader::new(Cursor::new(""));
+    /// let num_print = 0;
+    /// let mut chunks = BytesChunkBuffer::new(num_print);
+    /// chunks.fill(&mut reader).unwrap();
+    ///
+    /// let mut reader = BufReader::new(Cursor::new("a"));
+    /// let num_print = 1;
+    /// let mut chunks = BytesChunkBuffer::new(num_print);
+    /// chunks.fill(&mut reader).unwrap();
+    /// ```
+    pub fn fill(&mut self, reader: &mut BufReader<impl Read>) -> UResult<()> {
+        let mut chunk = Box::new(BytesChunk::new());
+
+        // fill chunks with all bytes from reader and reuse already instantiated chunks if possible
+        while (chunk.fill(reader)?).is_some() {
+            self.bytes += chunk.bytes as u64;
+            self.chunks.push_back(chunk);
+
+            let first = &self.chunks[0];
+            if self.bytes - first.bytes as u64 > self.num_print {
+                chunk = self.chunks.pop_front().unwrap();
+                self.bytes -= chunk.bytes as u64;
+            } else {
+                chunk = Box::new(BytesChunk::new());
+            }
+        }
+
+        // quit early if there are no chunks for example in case the pipe was empty
+        if self.chunks.is_empty() {
+            return Ok(());
+        }
+
+        let chunk = self.chunks.pop_front().unwrap();
+
+        // calculate the offset in the first chunk and put the calculated chunk as first element in
+        // the self.chunks collection. The calculated offset must be in the range 0 to BUFFER_SIZE
+        // and is therefore safely convertible to a usize without losses.
+        let offset = self.bytes.saturating_sub(self.num_print) as usize;
+        self.chunks
+            .push_front(Box::new(BytesChunk::from_chunk(&chunk, offset)));
+
+        Ok(())
+    }
+
+    pub fn print(&self, mut writer: impl Write) -> UResult<()> {
+        for chunk in &self.chunks {
+            writer.write_all(chunk.get_buffer())?;
+        }
+        Ok(())
+    }
+}
+
+/// Works similar to a [`BytesChunk`] but also stores the number of lines encountered in the current
+/// buffer. The size of the buffer is limited to a fixed size number of bytes.
+#[derive(Debug)]
+pub struct LinesChunk {
+    /// Work on top of a [`BytesChunk`]
+    chunk: BytesChunk,
+    /// The number of lines delimited by `delimiter`. The choice of usize is sufficient here,
+    /// because lines max value is the number of bytes contained in this chunk's buffer, and the
+    /// number of bytes max value is [`BUFFER_SIZE`], which is a usize.
+    lines: usize,
+    /// The delimiter to use, to count the lines
+    delimiter: u8,
+}
+
+impl LinesChunk {
+    pub fn new(delimiter: u8) -> Self {
+        Self {
+            chunk: BytesChunk::new(),
+            lines: 0,
+            delimiter,
+        }
+    }
+
+    /// Count the number of lines delimited with [`Self::delimiter`] contained in the buffer.
+    /// Currently [`memchr`] is used because performance is better than using an iterator or for
+    /// loop.
+    ///
+    /// # Examples
+    ///
+    /// ```rust,ignore
+    /// let mut chunk = LinesChunk::new(b'\n');
+    /// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
+    /// chunk.bytes = 12;
+    /// assert_eq!(2, chunk.count_lines());
+    ///
+    /// chunk.buffer[0..14].copy_from_slice("hello\r\nworld\r\n".as_bytes());
+    /// chunk.bytes = 14;
+    /// assert_eq!(2, chunk.count_lines());
+    /// ```
+    fn count_lines(&self) -> usize {
+        memchr::memchr_iter(self.delimiter, self.get_buffer()).count()
+    }
+
+    /// Creates a new [`LinesChunk`] from an existing one with an offset in lines. The new chunk
+    /// contains exactly `chunk.lines - offset` lines. The offset in bytes is calculated and applied
+    /// to the new chunk, so the new chunk contains only the bytes encountered after the offset in
+    /// number of lines and the `delimiter`. If the offset is larger than `chunk.lines` then a new
+    /// empty `LinesChunk` is returned.
+    ///
+    /// # Arguments
+    ///
+    /// * `chunk`: The chunk to create the new chunk from
+    /// * `offset`: The offset in number of lines (not bytes)
+    ///
+    /// # Examples
+    ///
+    /// ```rust,ignore
+    /// let mut chunk = LinesChunk::new(b'\n');
+    /// // manually filling the buffer and setting the correct values for bytes and lines
+    /// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
+    /// chunk.bytes = 12;
+    /// chunk.lines = 2;
+    ///
+    /// let offset = 1; // offset in number of lines
+    /// let new_chunk = LinesChunk::from(&chunk, offset);
+    /// assert_eq!("world\n".as_bytes(), new_chunk.get_buffer());
+    /// assert_eq!(6, new_chunk.bytes);
+    /// assert_eq!(1, new_chunk.lines);
+    /// ```
+    fn from_chunk(chunk: &Self, offset: usize) -> Self {
+        if offset > chunk.lines {
+            return Self::new(chunk.delimiter);
+        }
+
+        let bytes_offset = chunk.calculate_bytes_offset_from(offset);
+        let new_chunk = BytesChunk::from_chunk(&chunk.chunk, bytes_offset);
+
+        Self {
+            chunk: new_chunk,
+            lines: chunk.lines - offset,
+            delimiter: chunk.delimiter,
+        }
+    }
+
+    /// Returns true if this buffer has stored any bytes.
+    ///
+    /// # Examples
+    ///
+    /// ```rust,ignore
+    /// let mut chunk = LinesChunk::new(b'\n');
+    /// assert!(!chunk.has_data());
+    ///
+    /// chunk.buffer[0] = 1;
+    /// assert!(!chunk.has_data());
+    ///
+    /// chunk.bytes = 1;
+    /// assert!(chunk.has_data());
+    /// ```
+    pub fn has_data(&self) -> bool {
+        self.chunk.has_data()
+    }
+
+    /// Returns this buffer safely. See [`BytesChunk::get_buffer`]
+    ///
+    /// returns: &[u8] with length `self.bytes`
+    pub fn get_buffer(&self) -> &[u8] {
+        self.chunk.get_buffer()
+    }
+
+    /// Returns this buffer safely with an offset applied. See [`BytesChunk::get_buffer_with`].
+    ///
+    /// returns: &[u8] with length `self.bytes - offset`
+    pub fn get_buffer_with(&self, offset: usize) -> &[u8] {
+        self.chunk.get_buffer_with(offset)
+    }
+
+    /// Return the number of lines the buffer contains. `self.lines` needs to be set before the call
+    /// to this function returns the correct value. If the calculation of lines is needed then
+    /// use `self.count_lines`.
+    pub fn get_lines(&self) -> usize {
+        self.lines
+    }
+
+    /// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, draining the reader by
+    /// that number of bytes. This function works like the [`BytesChunk::fill`] function besides
+    /// that this function also counts and stores the number of lines encountered while reading from
+    /// the `filehandle`.
+    pub fn fill(&mut self, filehandle: &mut BufReader<impl Read>) -> UResult<Option<usize>> {
+        match self.chunk.fill(filehandle)? {
+            None => {
+                self.lines = 0;
+                Ok(None)
+            }
+            Some(bytes) => {
+                self.lines = self.count_lines();
+                Ok(Some(bytes))
+            }
+        }
+    }
+
+    /// Calculates the offset in bytes within this buffer from the offset in number of lines. The
+    /// resulting offset is 0-based and points to the byte after the delimiter.
+    ///
+    /// # Arguments
+    ///
+    /// * `offset`: the offset in number of lines. If offset is 0 then 0 is returned, if larger than
+    ///             the contained lines then self.bytes is returned.
+    ///
+    /// # Examples
+    ///
+    /// ```rust,ignore
+    /// let mut chunk = LinesChunk::new(b'\n');
+    /// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
+    /// chunk.bytes = 12;
+    /// chunk.lines = 2; // note that if not setting lines the result might not be what is expected
+    /// let bytes_offset = chunk.calculate_bytes_offset_from(1);
+    /// assert_eq!(6, bytes_offset);
+    /// assert_eq!(
+    ///     "world\n",
+    ///     String::from_utf8_lossy(chunk.get_buffer_with(bytes_offset)));
+    /// ```
+    fn calculate_bytes_offset_from(&self, offset: usize) -> usize {
+        let mut lines_offset = offset;
+        let mut bytes_offset = 0;
+        for byte in self.get_buffer().iter() {
+            if lines_offset == 0 {
+                break;
+            }
+            if byte == &self.delimiter {
+                lines_offset -= 1;
+            }
+            bytes_offset += 1;
+        }
+        bytes_offset
+    }
+
+    /// Print the bytes contained in this buffer calculated with the given offset in number of
+    /// lines.
+    ///
+    /// # Arguments
+    ///
+    /// * `writer`: must implement [`Write`]
+    /// * `offset`: An offset in number of lines.
+    pub fn print_lines(&self, writer: &mut impl Write, offset: usize) -> UResult<()> {
+        self.print_bytes(writer, self.calculate_bytes_offset_from(offset))
+    }
+
+    /// Print the bytes contained in this buffer beginning from the given offset in number of bytes.
+    ///
+    /// # Arguments
+    ///
+    /// * `writer`: must implement [`Write`]
+    /// * `offset`: An offset in number of bytes.
+    pub fn print_bytes(&self, writer: &mut impl Write, offset: usize) -> UResult<()> {
+        writer.write_all(self.get_buffer_with(offset))?;
+        Ok(())
+    }
+}
+
+/// An abstraction layer on top of [`LinesChunk`] mainly to simplify filling only the needed amount
+/// of chunks. See also [`Self::fill`]. Works similar like [`BytesChunkBuffer`], but works on top
+/// of lines delimited by `self.delimiter` instead of bytes.
+pub struct LinesChunkBuffer {
+    /// The delimiter to recognize a line. Any [`u8`] is allowed.
+    delimiter: u8,
+    /// The amount of lines occurring in all currently stored [`LinesChunk`]s. Use u64 here to
+    /// support files > 4GB on 32-bit systems. Note, this differs from [`LinesChunk::lines`] which
+    /// is a usize. The choice of u64 is based on `tail::FilterMode::Lines`.
+    lines: u64,
+    /// The amount of lines to print.
+    num_print: u64,
+    /// Stores the [`LinesChunk`]
+    chunks: VecDeque<Box<LinesChunk>>,
+}
+
+impl LinesChunkBuffer {
+    /// Create a new [`LinesChunkBuffer`]
+    pub fn new(delimiter: u8, num_print: u64) -> Self {
+        Self {
+            delimiter,
+            num_print,
+            lines: 0,
+            chunks: VecDeque::new(),
+        }
+    }
+
+    /// Fills this buffer with chunks and consumes the reader completely. This method ensures that
+    /// there are exactly as many chunks as needed to match `self.num_print` lines, so there are
+    /// in sum exactly `self.num_print` lines stored in all chunks. The method returns an iterator
+    /// over these chunks. If there are no chunks, for example because the piped stdin contained no
+    /// lines, or `num_print = 0` then `iterator.next` will return None.
+    pub fn fill(&mut self, reader: &mut BufReader<impl Read>) -> UResult<()> {
+        let mut chunk = Box::new(LinesChunk::new(self.delimiter));
+
+        while (chunk.fill(reader)?).is_some() {
+            self.lines += chunk.lines as u64;
+            self.chunks.push_back(chunk);
+
+            let first = &self.chunks[0];
+            if self.lines - first.lines as u64 > self.num_print {
+                chunk = self.chunks.pop_front().unwrap();
+
+                self.lines -= chunk.lines as u64;
+            } else {
+                chunk = Box::new(LinesChunk::new(self.delimiter));
+            }
+        }
+
+        if !&self.chunks.is_empty() {
+            let length = &self.chunks.len();
+            let last = &mut self.chunks[length - 1];
+            if !last.get_buffer().ends_with(&[self.delimiter]) {
+                last.lines += 1;
+                self.lines += 1;
+            }
+        } else {
+            // chunks is empty when a file is empty so quitting early here
+            return Ok(());
+        }
+
+        // skip unnecessary chunks and save the first chunk which may hold some lines we have to
+        // print
+        let chunk = loop {
+            // it's safe to call unwrap here because there is at least one chunk and sorting out
+            // more chunks than exist shouldn't be possible.
+            let chunk = self.chunks.pop_front().unwrap();
+
+            // skip is true as long there are enough lines left in the other stored chunks.
+            let skip = self.lines - chunk.lines as u64 > self.num_print;
+            if skip {
+                self.lines -= chunk.lines as u64;
+            } else {
+                break chunk;
+            }
+        };
+
+        // Calculate the number of lines to skip in the current chunk. The calculated value must be
+        // in the range 0 to BUFFER_SIZE and is therefore safely convertible to a usize without
+        // losses.
+        let skip_lines = self.lines.saturating_sub(self.num_print) as usize;
+        let chunk = LinesChunk::from_chunk(&chunk, skip_lines);
+        self.chunks.push_front(Box::new(chunk));
+
+        Ok(())
+    }
+
+    pub fn print(&self, mut writer: impl Write) -> UResult<()> {
+        for chunk in &self.chunks {
+            chunk.print_bytes(&mut writer, 0)?;
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::chunks::{BytesChunk, BUFFER_SIZE};
+
+    #[test]
+    fn test_bytes_chunk_from_when_offset_is_zero() {
+        let mut chunk = BytesChunk::new();
+        chunk.bytes = BUFFER_SIZE;
+        chunk.buffer[1] = 1;
+        let other = BytesChunk::from_chunk(&chunk, 0);
+        assert_eq!(other, chunk);
+
+        chunk.bytes = 2;
+        let other = BytesChunk::from_chunk(&chunk, 0);
+        assert_eq!(other, chunk);
+
+        chunk.bytes = 1;
+        let other = BytesChunk::from_chunk(&chunk, 0);
+        assert_eq!(other.buffer, [0; BUFFER_SIZE]);
+        assert_eq!(other.bytes, chunk.bytes);
+
+        chunk.bytes = BUFFER_SIZE;
+        let other = BytesChunk::from_chunk(&chunk, 2);
+        assert_eq!(other.buffer, [0; BUFFER_SIZE]);
+        assert_eq!(other.bytes, BUFFER_SIZE - 2);
+    }
+
+    #[test]
+    fn test_bytes_chunk_from_when_offset_is_not_zero() {
+        let mut chunk = BytesChunk::new();
+        chunk.bytes = BUFFER_SIZE;
+        chunk.buffer[1] = 1;
+
+        let other = BytesChunk::from_chunk(&chunk, 1);
+        let mut expected_buffer = [0; BUFFER_SIZE];
+        expected_buffer[0] = 1;
+        assert_eq!(other.buffer, expected_buffer);
+        assert_eq!(other.bytes, BUFFER_SIZE - 1);
+
+        let other = BytesChunk::from_chunk(&chunk, 2);
+        assert_eq!(other.buffer, [0; BUFFER_SIZE]);
+        assert_eq!(other.bytes, BUFFER_SIZE - 2);
+    }
+
+    #[test]
+    fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_1() {
+        let mut chunk = BytesChunk::new();
+        chunk.bytes = BUFFER_SIZE;
+        let new_chunk = BytesChunk::from_chunk(&chunk, BUFFER_SIZE + 1);
+        assert_eq!(0, new_chunk.bytes);
+    }
+
+    #[test]
+    fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_2() {
+        let mut chunk = BytesChunk::new();
+        chunk.bytes = 0;
+        let new_chunk = BytesChunk::from_chunk(&chunk, 1);
+        assert_eq!(0, new_chunk.bytes);
+    }
+
+    #[test]
+    fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_3() {
+        let mut chunk = BytesChunk::new();
+        chunk.bytes = 1;
+        let new_chunk = BytesChunk::from_chunk(&chunk, 2);
+        assert_eq!(0, new_chunk.bytes);
+    }
+
+    #[test]
+    fn test_bytes_chunk_from_when_offset_is_equal_to_chunk_size() {
+        let mut chunk = BytesChunk::new();
+        chunk.buffer[0] = 1;
+        chunk.bytes = 1;
+        let new_chunk = BytesChunk::from_chunk(&chunk, 1);
+        assert_eq!(0, new_chunk.bytes);
+    }
+}
diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs
index 28a65093d..d8442f09b 100644
--- a/src/uu/tail/src/tail.rs
+++ b/src/uu/tail/src/tail.rs
@@ -7,7 +7,7 @@
 //  * For the full copyright and license information, please view the LICENSE
 //  * file that was distributed with this source code.
 
-// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized
+// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized filehandle
 // spell-checker:ignore (libs) kqueue
 // spell-checker:ignore (acronyms)
 // spell-checker:ignore (env/flags)
@@ -21,8 +21,9 @@ extern crate clap;
 
 #[macro_use]
 extern crate uucore;
+extern crate core;
 
-mod chunks;
+pub mod chunks;
 mod parse;
 mod platform;
 use crate::files::FileHandling;
@@ -30,11 +31,11 @@ use chunks::ReverseChunks;
 
 use clap::{Arg, Command, ValueSource};
 use notify::{RecommendedWatcher, RecursiveMode, Watcher, WatcherKind};
+use std::cmp::Ordering;
 use std::collections::{HashMap, VecDeque};
 use std::ffi::OsString;
-use std::fmt;
 use std::fs::{File, Metadata};
-use std::io::{stdin, stdout, BufRead, BufReader, Read, Seek, SeekFrom, Write};
+use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Read, Seek, SeekFrom, Write};
 use std::path::{Path, PathBuf};
 use std::sync::mpsc::{self, channel, Receiver};
 use std::time::Duration;
@@ -43,9 +44,7 @@ use uucore::error::{
     get_exit_code, set_exit_code, FromIo, UError, UResult, USimpleError, UUsageError,
 };
 use uucore::format_usage;
-use uucore::lines::lines;
 use uucore::parse_size::{parse_size, ParseSizeError};
-use uucore::ringbuffer::RingBuffer;
 
 #[cfg(unix)]
 use std::os::unix::fs::MetadataExt;
@@ -1458,70 +1457,58 @@ fn bounded_tail(file: &mut File, settings: &Settings) {
     std::io::copy(file, &mut stdout).unwrap();
 }
 
-/// An alternative to [`Iterator::skip`] with u64 instead of usize. This is
-/// necessary because the usize limit doesn't make sense when iterating over
-/// something that's not in memory. For example, a very large file. This allows
-/// us to skip data larger than 4 GiB even on 32-bit platforms.
-fn skip_u64(iter: &mut impl Iterator, num: u64) {
-    for _ in 0..num {
-        if iter.next().is_none() {
-            break;
-        }
-    }
-}
-
-/// Collect the last elements of an iterator into a `VecDeque`.
-///
-/// This function returns a [`VecDeque`] containing either the last
-/// `count` elements of `iter`, an [`Iterator`] over [`Result`]
-/// instances, or all but the first `count` elements of `iter`. If
-/// `beginning` is `true`, then all but the first `count` elements are
-/// returned.
-///
-/// # Panics
-///
-/// If any element of `iter` is an [`Err`], then this function panics.
-fn unbounded_tail_collect<T, E>(
-    mut iter: impl Iterator<Item = Result<T, E>>,
-    count: u64,
-    beginning: bool,
-) -> UResult<VecDeque<T>>
-where
-    E: fmt::Debug,
-{
-    if beginning {
-        // GNU `tail` seems to index bytes and lines starting at 1, not
-        // at 0. It seems to treat `+0` and `+1` as the same thing.
-        let i = count.max(1) - 1;
-        skip_u64(&mut iter, i);
-        Ok(iter.map(|r| r.unwrap()).collect())
-    } else {
-        let count: usize = count
-            .try_into()
-            .map_err(|_| USimpleError::new(1, "Insufficient addressable memory"))?;
-        Ok(RingBuffer::from_iter(iter.map(|r| r.unwrap()), count).data)
-    }
-}
-
 fn unbounded_tail<T: Read>(reader: &mut BufReader<T>, settings: &Settings) -> UResult<()> {
-    // Read through each line/char and store them in a ringbuffer that always
-    // contains count lines/chars. When reaching the end of file, output the
-    // data in the ringbuf.
-    match settings.mode {
-        FilterMode::Lines(count, sep) => {
-            let mut stdout = stdout();
-            for line in unbounded_tail_collect(lines(reader, sep), count, settings.beginning)? {
-                stdout
-                    .write_all(&line)
-                    .map_err_context(|| String::from("IO error"))?;
-            }
+    let stdout = stdout();
+    let mut writer = BufWriter::new(stdout.lock());
+    match (&settings.mode, settings.beginning) {
+        (FilterMode::Lines(count, sep), false) => {
+            let mut chunks = chunks::LinesChunkBuffer::new(*sep, *count);
+            chunks.fill(reader)?;
+            chunks.print(writer)?;
         }
-        FilterMode::Bytes(count) => {
-            for byte in unbounded_tail_collect(reader.bytes(), count, settings.beginning)? {
-                if let Err(err) = stdout().write(&[byte]) {
-                    return Err(USimpleError::new(1, err.to_string()));
+        (FilterMode::Lines(count, sep), true) => {
+            let mut num_skip = (*count).max(1) - 1;
+            let mut chunk = chunks::LinesChunk::new(*sep);
+            while chunk.fill(reader)?.is_some() {
+                let lines = chunk.get_lines() as u64;
+                if lines < num_skip {
+                    num_skip -= lines;
+                } else {
+                    break;
                 }
             }
+            if chunk.has_data() {
+                chunk.print_lines(&mut writer, num_skip as usize)?;
+                io::copy(reader, &mut writer)?;
+            }
+        }
+        (FilterMode::Bytes(count), false) => {
+            let mut chunks = chunks::BytesChunkBuffer::new(*count);
+            chunks.fill(reader)?;
+            chunks.print(writer)?;
+        }
+        (FilterMode::Bytes(count), true) => {
+            let mut num_skip = (*count).max(1) - 1;
+            let mut chunk = chunks::BytesChunk::new();
+            loop {
+                if let Some(bytes) = chunk.fill(reader)? {
+                    let bytes: u64 = bytes as u64;
+                    match bytes.cmp(&num_skip) {
+                        Ordering::Less => num_skip -= bytes,
+                        Ordering::Equal => {
+                            break;
+                        }
+                        Ordering::Greater => {
+                            writer.write_all(chunk.get_buffer_with(num_skip as usize))?;
+                            break;
+                        }
+                    }
+                } else {
+                    return Ok(());
+                }
+            }
+
+            io::copy(reader, &mut writer)?;
         }
     }
     Ok(())
diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs
index 1a48cebfe..442c07979 100644
--- a/tests/by-util/test_tail.rs
+++ b/tests/by-util/test_tail.rs
@@ -3,7 +3,7 @@
 //  * For the full copyright and license information, please view the LICENSE
 //  * file that was distributed with this source code.
 
-// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile file siette ocho nueve diez
+// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile file siette ocho nueve diez MULT
 // spell-checker:ignore (libs) kqueue
 // spell-checker:ignore (jargon) tailable untailable
 
@@ -1090,18 +1090,6 @@ fn test_invalid_num() {
         .fails()
         .stderr_str()
         .starts_with("tail: invalid number of lines: '1Y': Value too large for defined data type");
-    #[cfg(target_pointer_width = "32")]
-    {
-        let sizes = ["1000G", "10T"];
-        for size in &sizes {
-            new_ucmd!()
-                .args(&["-c", size])
-                .fails()
-                .code_is(1)
-                .stderr_str()
-                .starts_with("tail: Insufficient addressable memory");
-        }
-    }
     new_ucmd!()
         .args(&["-c", "-³"])
         .fails()
@@ -2484,6 +2472,725 @@ fn test_illegal_seek() {
     assert_eq!(p.wait().unwrap().code().unwrap(), 1);
 }
 
+#[cfg(all(not(target_os = "android"), not(target_os = "windows")))] // FIXME: See https://github.com/uutils/coreutils/issues/3881
+mod pipe_tests {
+    use super::*;
+    use crate::common::random::*;
+    use rand::distributions::Alphanumeric;
+    use tail::chunks::BUFFER_SIZE as CHUNK_BUFFER_SIZE;
+
+    #[test]
+    fn test_pipe_when_lines_option_value_is_higher_than_contained_lines() {
+        let test_string = "a\nb\n";
+        new_ucmd!()
+            .args(&["-n", "3"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(test_string);
+
+        new_ucmd!()
+            .args(&["-n", "4"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(test_string);
+
+        new_ucmd!()
+            .args(&["-n", "999"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(test_string);
+
+        new_ucmd!()
+            .args(&["-n", "+3"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        new_ucmd!()
+            .args(&["-n", "+4"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        new_ucmd!()
+            .args(&["-n", "+999"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+    }
+
+    #[test]
+    fn test_pipe_when_negative_lines_option_given_no_newline_at_eof() {
+        let test_string = "a\nb";
+
+        new_ucmd!()
+            .args(&["-n", "0"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        new_ucmd!()
+            .args(&["-n", "1"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("b");
+
+        new_ucmd!()
+            .args(&["-n", "2"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("a\nb");
+    }
+
+    #[test]
+    fn test_pipe_when_positive_lines_option_given_no_newline_at_eof() {
+        let test_string = "a\nb";
+
+        new_ucmd!()
+            .args(&["-n", "+0"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("a\nb");
+
+        new_ucmd!()
+            .args(&["-n", "+1"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("a\nb");
+
+        new_ucmd!()
+            .args(&["-n", "+2"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("b");
+    }
+
+    #[test]
+    fn test_pipe_when_lines_option_given_multibyte_utf8_characters() {
+        // the test string consists of from left to right a 4-byte,3-byte,2-byte,1-byte utf-8 character
+        let test_string = "𝅘𝅥𝅮\n⏻\nƒ\na";
+
+        new_ucmd!()
+            .args(&["-n", "+0"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(test_string);
+
+        new_ucmd!()
+            .args(&["-n", "+2"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("⏻\nƒ\na");
+
+        new_ucmd!()
+            .args(&["-n", "+3"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("ƒ\na");
+
+        new_ucmd!()
+            .args(&["-n", "+4"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("a");
+
+        new_ucmd!()
+            .args(&["-n", "+5"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        new_ucmd!()
+            .args(&["-n", "-4"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(test_string);
+
+        new_ucmd!()
+            .args(&["-n", "-3"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("⏻\nƒ\na");
+
+        new_ucmd!()
+            .args(&["-n", "-2"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("ƒ\na");
+
+        new_ucmd!()
+            .args(&["-n", "-1"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("a");
+
+        new_ucmd!()
+            .args(&["-n", "-0"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+    }
+
+    #[test]
+    fn test_pipe_when_lines_option_given_input_size_is_equal_to_buffer_size_no_newline_at_eof() {
+        let total_lines = 1;
+        let random_string = RandomString::generate_with_delimiter(
+            Alphanumeric,
+            b'\n',
+            total_lines,
+            false,
+            CHUNK_BUFFER_SIZE,
+        );
+        let random_string = random_string.as_str();
+        let lines = random_string.split_inclusive('\n');
+
+        let expected = lines.clone().skip(1).collect::<String>();
+        new_ucmd!()
+            .args(&["-n", "+2"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(expected);
+
+        let expected = lines.clone().skip(1).collect::<String>();
+        new_ucmd!()
+            .args(&["-n", "-1"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(expected);
+    }
+
+    #[test]
+    fn test_pipe_when_lines_option_given_input_size_is_equal_to_buffer_size() {
+        let total_lines = 100;
+        let random_string = RandomString::generate_with_delimiter(
+            Alphanumeric,
+            b'\n',
+            total_lines,
+            true,
+            CHUNK_BUFFER_SIZE,
+        );
+        let random_string = random_string.as_str();
+        let lines = random_string.split_inclusive('\n');
+
+        new_ucmd!()
+            .args(&["-n", "+0"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(random_string);
+
+        let expected = lines.clone().skip(1).collect::<String>();
+        new_ucmd!()
+            .args(&["-n", "+2"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(expected);
+
+        new_ucmd!()
+            .args(&["-n", "-0"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        let expected = lines.clone().skip(total_lines - 1).collect::<String>();
+        new_ucmd!()
+            .args(&["-n", "-1"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(expected);
+
+        let expected = lines.clone().skip(1).collect::<String>();
+        new_ucmd!()
+            .args(&["-n", "-99"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(expected);
+
+        new_ucmd!()
+            .args(&["-n", "-100"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(random_string);
+    }
+
+    #[test]
+    fn test_pipe_when_lines_option_given_input_size_is_one_byte_greater_than_buffer_size() {
+        let total_lines = 100;
+        let random_string = RandomString::generate_with_delimiter(
+            Alphanumeric,
+            b'\n',
+            total_lines,
+            true,
+            CHUNK_BUFFER_SIZE + 1,
+        );
+        let random_string = random_string.as_str();
+        let lines = random_string.split_inclusive('\n');
+
+        new_ucmd!()
+            .args(&["-n", "+0"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(random_string);
+
+        let expected = lines.clone().skip(total_lines - 1).collect::<String>();
+        new_ucmd!()
+            .args(&["-n", "-1"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(expected);
+
+        let expected = lines.clone().skip(1).collect::<String>();
+        new_ucmd!()
+            .args(&["-n", "+2"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(expected);
+
+        let expected = lines.clone().skip(1).collect::<String>();
+        new_ucmd!()
+            .args(&["-n", "-99"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(expected);
+    }
+
+    #[test]
+    fn test_pipe_when_lines_option_given_input_size_has_multiple_size_of_buffer_size() {
+        let total_lines = 100;
+        let random_string = RandomString::generate_with_delimiter(
+            Alphanumeric,
+            b'\n',
+            total_lines,
+            true,
+            CHUNK_BUFFER_SIZE * 3 + 1,
+        );
+        let random_string = random_string.as_str();
+        let lines = random_string.split_inclusive('\n');
+
+        new_ucmd!()
+            .args(&["-n", "+0"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(random_string);
+
+        let expected = lines.clone().skip(1).collect::<String>();
+        new_ucmd!()
+            .args(&["-n", "+2"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(expected);
+
+        new_ucmd!()
+            .args(&["-n", "-0"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        let expected = lines.clone().skip(total_lines - 1).collect::<String>();
+        new_ucmd!()
+            .args(&["-n", "-1"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(expected);
+
+        let expected = lines.clone().skip(1).collect::<String>();
+        new_ucmd!()
+            .args(&["-n", "-99"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(expected);
+
+        new_ucmd!()
+            .args(&["-n", "-100"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(random_string);
+    }
+
+    #[test]
+    fn test_pipe_when_bytes_option_value_is_higher_than_contained_bytes() {
+        let test_string = "a\nb";
+        new_ucmd!()
+            .args(&["-c", "4"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(test_string);
+
+        new_ucmd!()
+            .args(&["-c", "5"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(test_string);
+
+        new_ucmd!()
+            .args(&["-c", "999"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(test_string);
+
+        new_ucmd!()
+            .args(&["-c", "+4"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        new_ucmd!()
+            .args(&["-c", "+5"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        new_ucmd!()
+            .args(&["-c", "+999"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+    }
+
+    #[test]
+    fn test_pipe_when_bytes_option_given_multibyte_utf8_characters() {
+        // the test string consists of from left to right a 4-byte,3-byte,2-byte,1-byte utf-8 character
+        let test_string = "𝅘𝅥𝅮⏻ƒa";
+
+        new_ucmd!()
+            .args(&["-c", "+0"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(test_string);
+
+        new_ucmd!()
+            .args(&["-c", "+2"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(&test_string.as_bytes()[1..]);
+
+        new_ucmd!()
+            .args(&["-c", "+5"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("⏻ƒa");
+
+        new_ucmd!()
+            .args(&["-c", "+8"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("ƒa");
+
+        new_ucmd!()
+            .args(&["-c", "+10"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("a");
+
+        new_ucmd!()
+            .args(&["-c", "+11"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        new_ucmd!()
+            .args(&["-c", "-1"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("a");
+
+        new_ucmd!()
+            .args(&["-c", "-2"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(&"ƒa".as_bytes()[1..]);
+
+        new_ucmd!()
+            .args(&["-c", "-3"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("ƒa");
+
+        new_ucmd!()
+            .args(&["-c", "-6"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only("⏻ƒa");
+
+        new_ucmd!()
+            .args(&["-c", "-10"])
+            .pipe_in(test_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(test_string);
+    }
+
+    #[test]
+    fn test_pipe_when_bytes_option_given_input_size_is_equal_to_buffer_size() {
+        let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE);
+        let random_string = random_string.as_str();
+
+        new_ucmd!()
+            .args(&["-c", "+0"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(random_string);
+
+        let expected = &random_string.as_bytes()[1..];
+        new_ucmd!()
+            .args(&["-c", "+2"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        new_ucmd!()
+            .args(&["-c", "-0"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        let expected = &random_string.as_bytes()[1..];
+        new_ucmd!()
+            .args(&["-c", "-8191"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        new_ucmd!()
+            .args(&["-c", "-8192"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(random_string);
+
+        new_ucmd!()
+            .args(&["-c", "-8193"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(random_string);
+
+        let expected = &random_string.as_bytes()[CHUNK_BUFFER_SIZE - 1..];
+        new_ucmd!()
+            .args(&["-c", "-1"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+    }
+
+    #[test]
+    fn test_pipe_when_bytes_option_given_input_size_is_one_byte_greater_than_buffer_size() {
+        let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE + 1);
+        let random_string = random_string.as_str();
+
+        new_ucmd!()
+            .args(&["-c", "+0"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(random_string);
+
+        let expected = &random_string.as_bytes()[1..];
+        new_ucmd!()
+            .args(&["-c", "+2"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        new_ucmd!()
+            .args(&["-c", "-0"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        let expected = &random_string.as_bytes()[CHUNK_BUFFER_SIZE..];
+        new_ucmd!()
+            .args(&["-c", "-1"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        let expected = &random_string.as_bytes()[1..];
+        new_ucmd!()
+            .args(&["-c", "-8192"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        new_ucmd!()
+            .args(&["-c", "-8193"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(random_string);
+    }
+
+    #[test]
+    fn test_pipe_when_bytes_option_given_input_size_has_multiple_size_of_buffer_size() {
+        let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE * 3);
+        let random_string = random_string.as_str();
+
+        new_ucmd!()
+            .args(&["-c", "+0"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(random_string);
+
+        new_ucmd!()
+            .args(&["-c", "-0"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .no_stdout()
+            .no_stderr();
+
+        let expected = &random_string.as_bytes()[8192..];
+        new_ucmd!()
+            .args(&["-c", "+8193"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        let expected = &random_string.as_bytes()[8193..];
+        new_ucmd!()
+            .args(&["-c", "+8194"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        let expected = &random_string.as_bytes()[16384..];
+        new_ucmd!()
+            .args(&["-c", "+16385"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        let expected = &random_string.as_bytes()[16385..];
+        new_ucmd!()
+            .args(&["-c", "+16386"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        let expected = &random_string.as_bytes()[16384..];
+        new_ucmd!()
+            .args(&["-c", "-8192"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        let expected = &random_string.as_bytes()[16383..];
+        new_ucmd!()
+            .args(&["-c", "-8193"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        let expected = &random_string.as_bytes()[8192..];
+        new_ucmd!()
+            .args(&["-c", "-16384"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        let expected = &random_string.as_bytes()[8191..];
+        new_ucmd!()
+            .args(&["-c", "-16385"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only_bytes(expected);
+
+        new_ucmd!()
+            .args(&["-c", "-24576"])
+            .pipe_in(random_string)
+            .ignore_stdin_write_error()
+            .succeeds()
+            .stdout_only(random_string);
+    }
+}
+
 #[test]
 fn test_seek_bytes_backward_outside_file() {
     new_ucmd!()
diff --git a/tests/common/mod.rs b/tests/common/mod.rs
index 3fcd90441..f73cd42af 100644
--- a/tests/common/mod.rs
+++ b/tests/common/mod.rs
@@ -1,3 +1,4 @@
 #[macro_use]
 pub mod macros;
+pub mod random;
 pub mod util;
diff --git a/tests/common/random.rs b/tests/common/random.rs
new file mode 100644
index 000000000..338aeab50
--- /dev/null
+++ b/tests/common/random.rs
@@ -0,0 +1,314 @@
+//  * This file is part of the uutils coreutils package.
+//  *
+//  * For the full copyright and license information, please view the LICENSE
+//  * file that was distributed with this source code.
+
+use rand::distributions::{Distribution, Uniform};
+use rand::{thread_rng, Rng};
+
+/// Samples alphanumeric characters `[A-Za-z0-9]` including newline `\n`
+///
+/// # Examples
+///
+/// ```rust,ignore
+/// use rand::{Rng, thread_rng};
+///
+/// let vec = thread_rng()
+///     .sample_iter(AlphanumericNewline)
+///     .take(10)
+///     .collect::<Vec<u8>>();
+/// println!("Random chars: {}", String::from_utf8(vec).unwrap());
+/// ```
+#[derive(Clone, Copy, Debug)]
+pub struct AlphanumericNewline;
+
+impl AlphanumericNewline {
+    /// The charset to act upon
+    const CHARSET: &'static [u8] =
+        b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\n";
+
+    /// Generate a random byte from [`Self::CHARSET`] and return it as `u8`.
+    ///
+    /// # Arguments
+    ///
+    /// * `rng`: A [`rand::Rng`]
+    ///
+    /// returns: u8
+    fn random<R>(rng: &mut R) -> u8
+    where
+        R: Rng + ?Sized,
+    {
+        let idx = rng.gen_range(0..Self::CHARSET.len());
+        Self::CHARSET[idx]
+    }
+}
+
+impl Distribution<u8> for AlphanumericNewline {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u8 {
+        Self::random(rng)
+    }
+}
+
+/// Generate a random string from a [`Distribution`]
+///
+/// # Examples
+///
+/// ```rust,ignore
+/// use crate::common::random::{AlphanumericNewline, RandomString};
+/// use rand::distributions::Alphanumeric;
+///
+/// // generates a 100 byte string with characters from AlphanumericNewline
+/// let random_string = RandomString::generate(&AlphanumericNewline, 100);
+/// assert_eq!(100, random_string.len());
+///
+/// // generates a 100 byte string with 10 newline characters not ending with a newline
+/// let string = RandomString::generate_with_delimiter(&Alphanumeric, b'\n', 10, false, 100);
+/// assert_eq!(100, random_string.len());
+/// ```
+pub struct RandomString;
+
+impl RandomString {
+    /// Generate a random string from the given [`Distribution`] with the given `length` in bytes.
+    ///
+    /// # Arguments
+    ///
+    /// * `dist`: A u8 [`Distribution`]
+    /// * `length`: the length of the resulting string in bytes
+    ///
+    /// returns: String
+    pub fn generate<D>(dist: D, length: usize) -> String
+    where
+        D: Distribution<u8>,
+    {
+        thread_rng()
+            .sample_iter(dist)
+            .take(length)
+            .map(|b| b as char)
+            .collect()
+    }
+
+    /// Generate a random string from the [`Distribution`] with the given `length` in bytes. The
+    /// function takes a `delimiter`, which is randomly distributed in the string, such that exactly
+    /// `num_delimiter` amount of `delimiter`s occur. If `end_with_delimiter` is set, then the
+    /// string ends with the delimiter, else the string does not end with the delimiter.
+    ///
+    /// # Arguments
+    ///
+    /// * `dist`: A `u8` [`Distribution`]
+    /// * `delimiter`: A `u8` delimiter, which does not need to be included in the `Distribution`
+    /// * `num_delimiter`: The number of `delimiter`s contained in the resulting string
+    /// * `end_with_delimiter`: If the string shall end with the given delimiter
+    /// * `length`: the length of the resulting string in bytes
+    ///
+    /// returns: String
+    ///
+    /// # Examples
+    ///
+    /// ```rust,ignore
+    /// use crate::common::random::{AlphanumericNewline, RandomString};
+    ///
+    /// // generates a 100 byte string with 10 '\0' byte characters not ending with a '\0' byte
+    /// let string = RandomString::generate_with_delimiter(&AlphanumericNewline, 0, 10, false, 100);
+    /// assert_eq!(100, random_string.len());
+    /// assert_eq!(
+    ///     10,
+    ///     random_string.as_bytes().iter().filter(|p| **p == 0).count()
+    /// );
+    /// assert!(!random_string.as_bytes().ends_with(&[0]));
+    /// ```
+    pub fn generate_with_delimiter<D>(
+        dist: D,
+        delimiter: u8,
+        num_delimiter: usize,
+        end_with_delimiter: bool,
+        length: usize,
+    ) -> String
+    where
+        D: Distribution<u8>,
+    {
+        if length == 0 {
+            return String::from("");
+        } else if length == 1 {
+            return if num_delimiter > 0 {
+                String::from(delimiter as char)
+            } else {
+                String::from(thread_rng().sample(&dist) as char)
+            };
+        }
+
+        let samples = length - 1;
+        let mut result: Vec<u8> = thread_rng().sample_iter(&dist).take(samples).collect();
+
+        if num_delimiter == 0 {
+            result.push(thread_rng().sample(&dist));
+            return String::from_utf8(result).unwrap();
+        }
+
+        let num_delimiter = if end_with_delimiter {
+            num_delimiter - 1
+        } else {
+            num_delimiter
+        };
+
+        let between = Uniform::new(0, samples);
+        for _ in 0..num_delimiter {
+            let mut pos = between.sample(&mut thread_rng());
+            let turn = pos;
+            while result[pos] == delimiter {
+                pos += 1;
+                if pos >= samples {
+                    pos = 0;
+                }
+                if pos == turn {
+                    break;
+                }
+            }
+            result[pos] = delimiter;
+        }
+
+        if end_with_delimiter {
+            result.push(delimiter);
+        } else {
+            result.push(thread_rng().sample(&dist));
+        }
+
+        String::from_utf8(result).unwrap()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::distributions::Alphanumeric;
+
+    #[test]
+    fn test_random_string_generate() {
+        let random_string = RandomString::generate(&AlphanumericNewline, 0);
+        assert_eq!(0, random_string.len());
+
+        let random_string = RandomString::generate(&AlphanumericNewline, 1);
+        assert_eq!(1, random_string.len());
+
+        let random_string = RandomString::generate(&AlphanumericNewline, 100);
+        assert_eq!(100, random_string.len());
+    }
+
+    #[test]
+    fn test_random_string_generate_with_delimiter_when_length_is_zero() {
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, false, 0);
+        assert_eq!(0, random_string.len());
+    }
+
+    #[test]
+    fn test_random_string_generate_with_delimiter_when_num_delimiter_is_greater_than_length() {
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, false, 1);
+        assert_eq!(1, random_string.len());
+        assert!(random_string.as_bytes().contains(&0));
+        assert!(random_string.as_bytes().ends_with(&[0]));
+    }
+
+    #[test]
+    fn test_random_string_generate_with_delimiter_should_end_with_delimiter() {
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 1);
+        assert_eq!(1, random_string.len());
+        assert_eq!(
+            1,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+        assert!(random_string.as_bytes().ends_with(&[0]));
+
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 1);
+        assert_eq!(1, random_string.len());
+        assert_eq!(
+            1,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+        assert!(random_string.as_bytes().ends_with(&[0]));
+
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 2);
+        assert_eq!(2, random_string.len());
+        assert_eq!(
+            1,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+        assert!(random_string.as_bytes().ends_with(&[0]));
+
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, true, 2);
+        assert_eq!(2, random_string.len());
+        assert_eq!(
+            2,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+        assert!(random_string.as_bytes().ends_with(&[0]));
+
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 3);
+        assert_eq!(3, random_string.len());
+        assert_eq!(
+            1,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+        assert!(random_string.as_bytes().ends_with(&[0]));
+    }
+
+    #[test]
+    fn test_random_string_generate_with_delimiter_should_not_end_with_delimiter() {
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, false, 1);
+        assert_eq!(1, random_string.len());
+        assert_eq!(
+            0,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, true, 1);
+        assert_eq!(1, random_string.len());
+        assert_eq!(
+            0,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 2);
+        assert_eq!(2, random_string.len());
+        assert_eq!(
+            1,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+        assert!(!random_string.as_bytes().ends_with(&[0]));
+
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 3);
+        assert_eq!(3, random_string.len());
+        assert_eq!(
+            1,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+        assert!(!random_string.as_bytes().ends_with(&[0]));
+
+        let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, false, 3);
+        assert_eq!(3, random_string.len());
+        assert_eq!(
+            2,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+        assert!(!random_string.as_bytes().ends_with(&[0]));
+    }
+
+    #[test]
+    fn test_generate_with_delimiter_with_greater_length() {
+        let random_string =
+            RandomString::generate_with_delimiter(&Alphanumeric, 0, 100, false, 1000);
+        assert_eq!(1000, random_string.len());
+        assert_eq!(
+            100,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+        assert!(!random_string.as_bytes().ends_with(&[0]));
+
+        let random_string =
+            RandomString::generate_with_delimiter(&Alphanumeric, 0, 100, true, 1000);
+        assert_eq!(1000, random_string.len());
+        assert_eq!(
+            100,
+            random_string.as_bytes().iter().filter(|p| **p == 0).count()
+        );
+        assert!(random_string.as_bytes().ends_with(&[0]));
+    }
+}