1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

tail: improve performance of piped stdin

Rewrite handling of stdin when it is piped and read input in chunks.

Fixes https://github.com/uutils/coreutils/issues/3842
This commit is contained in:
Joining7943 2022-09-09 13:50:59 +02:00 committed by Sylvestre Ledru
parent b39f5239e7
commit 2658f8ae5b
7 changed files with 1704 additions and 83 deletions

1
Cargo.lock generated
View file

@ -2871,6 +2871,7 @@ version = "0.0.15"
dependencies = [
"clap",
"libc",
"memchr",
"nix",
"notify",
"same-file",

View file

@ -18,6 +18,7 @@ path = "src/tail.rs"
[dependencies]
clap = { version = "3.2", features = ["wrap_help", "cargo"] }
libc = "0.2.132"
memchr = "2.5.0"
notify = { version = "=5.0.0-pre.16", features=["macos_kqueue"]}
uucore = { version=">=0.0.15", package="uucore", path="../../uucore", features=["ringbuffer", "lines"] }
same-file = "1.0.6"

View file

@ -1,14 +1,29 @@
//! Iterating over a file by chunks, starting at the end of the file.
// * This file is part of the uutils coreutils package.
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
//! Iterating over a file by chunks, either starting at the end of the file with [`ReverseChunks`]
//! or at the end of piped stdin with [`LinesChunk`] or [`BytesChunk`].
//!
//! Use [`ReverseChunks::new`] to create a new iterator over chunks of
//! bytes from the file.
//! Use [`ReverseChunks::new`] to create a new iterator over chunks of bytes from the file.
// spell-checker:ignore (ToDO) filehandle BUFSIZ
use std::collections::VecDeque;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use std::io::{BufReader, Read, Seek, SeekFrom, Write};
use uucore::error::UResult;
/// When reading files in reverse in `bounded_tail`, this is the size of each
/// block read at a time.
pub const BLOCK_SIZE: u64 = 1 << 16;
/// The size of the backing buffer of a LinesChunk or BytesChunk in bytes. The value of BUFFER_SIZE
/// originates from the BUFSIZ constant in stdio.h and the libc crate to make stream IO efficient.
/// In the latter the value is constantly set to 8192 on all platforms, where the value in stdio.h
/// is determined on each platform differently. Since libc chose 8192 as a reasonable default the
/// value here is set to this value, too.
pub const BUFFER_SIZE: usize = 8192;
/// An iterator over a file in non-overlapping chunks from the end of the file.
///
/// Each chunk is a [`Vec`]<[`u8`]> of size [`BLOCK_SIZE`] (except
@ -86,3 +101,598 @@ impl<'a> Iterator for ReverseChunks<'a> {
Some(buf[0..(block_size as usize)].to_vec())
}
}
/// The type of the backing buffer of [`BytesChunk`] and [`LinesChunk`] which can hold
/// [`BUFFER_SIZE`] elements at max.
type ChunkBuffer = [u8; BUFFER_SIZE];
/// A [`BytesChunk`] storing a fixed size number of bytes in a buffer.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct BytesChunk {
/// The [`ChunkBuffer`], an array storing the bytes, for example filled by
/// [`BytesChunk::fill`]
buffer: ChunkBuffer,
/// Stores the number of bytes, this buffer holds. This is not equal to buffer.len(), since the
/// [`BytesChunk`] may store less bytes than the internal buffer can hold. In addition
/// [`BytesChunk`] may be reused, what makes it necessary to track the number of stored bytes.
/// The choice of usize is sufficient here, since the number of bytes max value is
/// [`BUFFER_SIZE`], which is a usize.
bytes: usize,
}
impl BytesChunk {
#[allow(clippy::new_without_default)]
pub fn new() -> Self {
Self {
buffer: [0; BUFFER_SIZE],
bytes: 0,
}
}
/// Create a new chunk from an existing chunk. The new chunk's buffer will be copied from the
/// old chunk's buffer, copying the slice `[offset..old_chunk.bytes]` into the new chunk's
/// buffer but starting at 0 instead of offset. If the offset is larger or equal to
/// `chunk.lines` then a new empty `BytesChunk` is returned.
///
/// # Arguments
///
/// * `chunk`: The chunk to create a new `BytesChunk` chunk from
/// * `offset`: Start to copy the old chunk's buffer from this position. May not be larger
/// than `chunk.bytes`.
///
/// # Examples
///
/// ```rust,ignore
/// let mut chunk = BytesChunk::new();
/// chunk.buffer[1] = 1;
/// chunk.bytes = 2;
/// let new_chunk = BytesChunk::from_chunk(&chunk, 0);
/// assert_eq!(2, new_chunk.get_buffer().len());
/// assert_eq!(&[0, 1], new_chunk.get_buffer());
///
/// let new_chunk = BytesChunk::from_chunk(&chunk, 1);
/// assert_eq!(1, new_chunk.get_buffer().len());
/// assert_eq!(&[1], new_chunk.get_buffer());
/// ```
fn from_chunk(chunk: &Self, offset: usize) -> Self {
if offset >= chunk.bytes {
return Self::new();
}
let mut buffer: ChunkBuffer = [0; BUFFER_SIZE];
let slice = chunk.get_buffer_with(offset);
buffer[..slice.len()].copy_from_slice(slice);
Self {
buffer,
bytes: chunk.bytes - offset,
}
}
/// Receive the internal buffer safely, so it returns a slice only containing as many bytes as
/// large the `self.bytes` value is.
///
/// returns: a slice containing the bytes of the internal buffer from `[0..self.bytes]`
///
/// # Examples
///
/// ```rust,ignore
/// let mut chunk = BytesChunk::new();
/// chunk.bytes = 1;
/// assert_eq!(&[0], chunk.get_buffer());
/// ```
pub fn get_buffer(&self) -> &[u8] {
&self.buffer[..self.bytes]
}
/// Like [`BytesChunk::get_buffer`], but returning a slice from `[offset.self.bytes]`.
///
/// returns: a slice containing the bytes of the internal buffer from `[offset..self.bytes]`
///
/// # Examples
///
/// ```rust,ignore
/// let mut chunk = BytesChunk::new();
/// chunk.bytes = 2;
/// assert_eq!(&[0], chunk.get_buffer_with(1));
/// ```
pub fn get_buffer_with(&self, offset: usize) -> &[u8] {
&self.buffer[offset..self.bytes]
}
pub fn has_data(&self) -> bool {
self.bytes > 0
}
/// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, draining the reader by
/// that number of bytes. If EOF is reached (so 0 bytes are read), then returns
/// [`UResult<None>`] or else the result with [`Some(bytes)`] where bytes is the number of bytes
/// read from the source.
pub fn fill(&mut self, filehandle: &mut BufReader<impl Read>) -> UResult<Option<usize>> {
let num_bytes = filehandle.read(&mut self.buffer)?;
self.bytes = num_bytes;
if num_bytes == 0 {
return Ok(None);
}
Ok(Some(self.bytes))
}
}
/// An abstraction layer on top of [`BytesChunk`] mainly to simplify filling only the needed amount
/// of chunks. See also [`Self::fill`].
pub struct BytesChunkBuffer {
/// The number of bytes to print
num_print: u64,
/// The current number of bytes summed over all stored chunks in [`Self::chunks`]. Use u64 here
/// to support files > 4GB on 32-bit systems. Note, this differs from `BytesChunk::bytes` which
/// is a usize. The choice of u64 is based on `tail::FilterMode::Bytes`.
bytes: u64,
/// The buffer to store [`BytesChunk`] in
chunks: VecDeque<Box<BytesChunk>>,
}
impl BytesChunkBuffer {
/// Creates a new [`BytesChunkBuffer`].
///
/// # Arguments
///
/// * `num_print`: The number of bytes to print
///
/// # Examples
///
/// ```rust,ignore
/// let mut chunk = BytesChunk::new();
/// chunk.buffer[1] = 1;
/// chunk.bytes = 2;
/// let new_chunk = BytesChunk::from_chunk(&chunk, 0);
/// assert_eq!(2, new_chunk.get_buffer().len());
/// assert_eq!(&[0, 1], new_chunk.get_buffer());
///
/// let new_chunk = BytesChunk::from_chunk(&chunk, 1);
/// assert_eq!(1, new_chunk.get_buffer().len());
/// assert_eq!(&[1], new_chunk.get_buffer());
/// ```
pub fn new(num_print: u64) -> Self {
Self {
bytes: 0,
num_print,
chunks: VecDeque::new(),
}
}
/// Fills this buffer with chunks and consumes the reader completely. This method ensures that
/// there are exactly as many chunks as needed to match `self.num_print` bytes, so there are
/// in sum exactly `self.num_print` bytes stored in all chunks. The method returns an iterator
/// over these chunks. If there are no chunks, for example because the piped stdin contained no
/// bytes, or `num_print = 0` then `iterator.next` returns None.
///
/// # Examples
///
/// ```rust,ignore
/// use crate::chunks::BytesChunkBuffer;
/// use std::io::{BufReader, Cursor};
///
/// let mut reader = BufReader::new(Cursor::new(""));
/// let num_print = 0;
/// let mut chunks = BytesChunkBuffer::new(num_print);
/// chunks.fill(&mut reader).unwrap();
///
/// let mut reader = BufReader::new(Cursor::new("a"));
/// let num_print = 1;
/// let mut chunks = BytesChunkBuffer::new(num_print);
/// chunks.fill(&mut reader).unwrap();
/// ```
pub fn fill(&mut self, reader: &mut BufReader<impl Read>) -> UResult<()> {
let mut chunk = Box::new(BytesChunk::new());
// fill chunks with all bytes from reader and reuse already instantiated chunks if possible
while (chunk.fill(reader)?).is_some() {
self.bytes += chunk.bytes as u64;
self.chunks.push_back(chunk);
let first = &self.chunks[0];
if self.bytes - first.bytes as u64 > self.num_print {
chunk = self.chunks.pop_front().unwrap();
self.bytes -= chunk.bytes as u64;
} else {
chunk = Box::new(BytesChunk::new());
}
}
// quit early if there are no chunks for example in case the pipe was empty
if self.chunks.is_empty() {
return Ok(());
}
let chunk = self.chunks.pop_front().unwrap();
// calculate the offset in the first chunk and put the calculated chunk as first element in
// the self.chunks collection. The calculated offset must be in the range 0 to BUFFER_SIZE
// and is therefore safely convertible to a usize without losses.
let offset = self.bytes.saturating_sub(self.num_print) as usize;
self.chunks
.push_front(Box::new(BytesChunk::from_chunk(&chunk, offset)));
Ok(())
}
pub fn print(&self, mut writer: impl Write) -> UResult<()> {
for chunk in &self.chunks {
writer.write_all(chunk.get_buffer())?;
}
Ok(())
}
}
/// Works similar to a [`BytesChunk`] but also stores the number of lines encountered in the current
/// buffer. The size of the buffer is limited to a fixed size number of bytes.
#[derive(Debug)]
pub struct LinesChunk {
/// Work on top of a [`BytesChunk`]
chunk: BytesChunk,
/// The number of lines delimited by `delimiter`. The choice of usize is sufficient here,
/// because lines max value is the number of bytes contained in this chunk's buffer, and the
/// number of bytes max value is [`BUFFER_SIZE`], which is a usize.
lines: usize,
/// The delimiter to use, to count the lines
delimiter: u8,
}
impl LinesChunk {
pub fn new(delimiter: u8) -> Self {
Self {
chunk: BytesChunk::new(),
lines: 0,
delimiter,
}
}
/// Count the number of lines delimited with [`Self::delimiter`] contained in the buffer.
/// Currently [`memchr`] is used because performance is better than using an iterator or for
/// loop.
///
/// # Examples
///
/// ```rust,ignore
/// let mut chunk = LinesChunk::new(b'\n');
/// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
/// chunk.bytes = 12;
/// assert_eq!(2, chunk.count_lines());
///
/// chunk.buffer[0..14].copy_from_slice("hello\r\nworld\r\n".as_bytes());
/// chunk.bytes = 14;
/// assert_eq!(2, chunk.count_lines());
/// ```
fn count_lines(&self) -> usize {
memchr::memchr_iter(self.delimiter, self.get_buffer()).count()
}
/// Creates a new [`LinesChunk`] from an existing one with an offset in lines. The new chunk
/// contains exactly `chunk.lines - offset` lines. The offset in bytes is calculated and applied
/// to the new chunk, so the new chunk contains only the bytes encountered after the offset in
/// number of lines and the `delimiter`. If the offset is larger than `chunk.lines` then a new
/// empty `LinesChunk` is returned.
///
/// # Arguments
///
/// * `chunk`: The chunk to create the new chunk from
/// * `offset`: The offset in number of lines (not bytes)
///
/// # Examples
///
/// ```rust,ignore
/// let mut chunk = LinesChunk::new(b'\n');
/// // manually filling the buffer and setting the correct values for bytes and lines
/// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
/// chunk.bytes = 12;
/// chunk.lines = 2;
///
/// let offset = 1; // offset in number of lines
/// let new_chunk = LinesChunk::from(&chunk, offset);
/// assert_eq!("world\n".as_bytes(), new_chunk.get_buffer());
/// assert_eq!(6, new_chunk.bytes);
/// assert_eq!(1, new_chunk.lines);
/// ```
fn from_chunk(chunk: &Self, offset: usize) -> Self {
if offset > chunk.lines {
return Self::new(chunk.delimiter);
}
let bytes_offset = chunk.calculate_bytes_offset_from(offset);
let new_chunk = BytesChunk::from_chunk(&chunk.chunk, bytes_offset);
Self {
chunk: new_chunk,
lines: chunk.lines - offset,
delimiter: chunk.delimiter,
}
}
/// Returns true if this buffer has stored any bytes.
///
/// # Examples
///
/// ```rust,ignore
/// let mut chunk = LinesChunk::new(b'\n');
/// assert!(!chunk.has_data());
///
/// chunk.buffer[0] = 1;
/// assert!(!chunk.has_data());
///
/// chunk.bytes = 1;
/// assert!(chunk.has_data());
/// ```
pub fn has_data(&self) -> bool {
self.chunk.has_data()
}
/// Returns this buffer safely. See [`BytesChunk::get_buffer`]
///
/// returns: &[u8] with length `self.bytes`
pub fn get_buffer(&self) -> &[u8] {
self.chunk.get_buffer()
}
/// Returns this buffer safely with an offset applied. See [`BytesChunk::get_buffer_with`].
///
/// returns: &[u8] with length `self.bytes - offset`
pub fn get_buffer_with(&self, offset: usize) -> &[u8] {
self.chunk.get_buffer_with(offset)
}
/// Return the number of lines the buffer contains. `self.lines` needs to be set before the call
/// to this function returns the correct value. If the calculation of lines is needed then
/// use `self.count_lines`.
pub fn get_lines(&self) -> usize {
self.lines
}
/// Fills `self.buffer` with maximal [`BUFFER_SIZE`] number of bytes, draining the reader by
/// that number of bytes. This function works like the [`BytesChunk::fill`] function besides
/// that this function also counts and stores the number of lines encountered while reading from
/// the `filehandle`.
pub fn fill(&mut self, filehandle: &mut BufReader<impl Read>) -> UResult<Option<usize>> {
match self.chunk.fill(filehandle)? {
None => {
self.lines = 0;
Ok(None)
}
Some(bytes) => {
self.lines = self.count_lines();
Ok(Some(bytes))
}
}
}
/// Calculates the offset in bytes within this buffer from the offset in number of lines. The
/// resulting offset is 0-based and points to the byte after the delimiter.
///
/// # Arguments
///
/// * `offset`: the offset in number of lines. If offset is 0 then 0 is returned, if larger than
/// the contained lines then self.bytes is returned.
///
/// # Examples
///
/// ```rust,ignore
/// let mut chunk = LinesChunk::new(b'\n');
/// chunk.buffer[0..12].copy_from_slice("hello\nworld\n".as_bytes());
/// chunk.bytes = 12;
/// chunk.lines = 2; // note that if not setting lines the result might not be what is expected
/// let bytes_offset = chunk.calculate_bytes_offset_from(1);
/// assert_eq!(6, bytes_offset);
/// assert_eq!(
/// "world\n",
/// String::from_utf8_lossy(chunk.get_buffer_with(bytes_offset)));
/// ```
fn calculate_bytes_offset_from(&self, offset: usize) -> usize {
let mut lines_offset = offset;
let mut bytes_offset = 0;
for byte in self.get_buffer().iter() {
if lines_offset == 0 {
break;
}
if byte == &self.delimiter {
lines_offset -= 1;
}
bytes_offset += 1;
}
bytes_offset
}
/// Print the bytes contained in this buffer calculated with the given offset in number of
/// lines.
///
/// # Arguments
///
/// * `writer`: must implement [`Write`]
/// * `offset`: An offset in number of lines.
pub fn print_lines(&self, writer: &mut impl Write, offset: usize) -> UResult<()> {
self.print_bytes(writer, self.calculate_bytes_offset_from(offset))
}
/// Print the bytes contained in this buffer beginning from the given offset in number of bytes.
///
/// # Arguments
///
/// * `writer`: must implement [`Write`]
/// * `offset`: An offset in number of bytes.
pub fn print_bytes(&self, writer: &mut impl Write, offset: usize) -> UResult<()> {
writer.write_all(self.get_buffer_with(offset))?;
Ok(())
}
}
/// An abstraction layer on top of [`LinesChunk`] mainly to simplify filling only the needed amount
/// of chunks. See also [`Self::fill`]. Works similar like [`BytesChunkBuffer`], but works on top
/// of lines delimited by `self.delimiter` instead of bytes.
pub struct LinesChunkBuffer {
/// The delimiter to recognize a line. Any [`u8`] is allowed.
delimiter: u8,
/// The amount of lines occurring in all currently stored [`LinesChunk`]s. Use u64 here to
/// support files > 4GB on 32-bit systems. Note, this differs from [`LinesChunk::lines`] which
/// is a usize. The choice of u64 is based on `tail::FilterMode::Lines`.
lines: u64,
/// The amount of lines to print.
num_print: u64,
/// Stores the [`LinesChunk`]
chunks: VecDeque<Box<LinesChunk>>,
}
impl LinesChunkBuffer {
/// Create a new [`LinesChunkBuffer`]
pub fn new(delimiter: u8, num_print: u64) -> Self {
Self {
delimiter,
num_print,
lines: 0,
chunks: VecDeque::new(),
}
}
/// Fills this buffer with chunks and consumes the reader completely. This method ensures that
/// there are exactly as many chunks as needed to match `self.num_print` lines, so there are
/// in sum exactly `self.num_print` lines stored in all chunks. The method returns an iterator
/// over these chunks. If there are no chunks, for example because the piped stdin contained no
/// lines, or `num_print = 0` then `iterator.next` will return None.
pub fn fill(&mut self, reader: &mut BufReader<impl Read>) -> UResult<()> {
let mut chunk = Box::new(LinesChunk::new(self.delimiter));
while (chunk.fill(reader)?).is_some() {
self.lines += chunk.lines as u64;
self.chunks.push_back(chunk);
let first = &self.chunks[0];
if self.lines - first.lines as u64 > self.num_print {
chunk = self.chunks.pop_front().unwrap();
self.lines -= chunk.lines as u64;
} else {
chunk = Box::new(LinesChunk::new(self.delimiter));
}
}
if !&self.chunks.is_empty() {
let length = &self.chunks.len();
let last = &mut self.chunks[length - 1];
if !last.get_buffer().ends_with(&[self.delimiter]) {
last.lines += 1;
self.lines += 1;
}
} else {
// chunks is empty when a file is empty so quitting early here
return Ok(());
}
// skip unnecessary chunks and save the first chunk which may hold some lines we have to
// print
let chunk = loop {
// it's safe to call unwrap here because there is at least one chunk and sorting out
// more chunks than exist shouldn't be possible.
let chunk = self.chunks.pop_front().unwrap();
// skip is true as long there are enough lines left in the other stored chunks.
let skip = self.lines - chunk.lines as u64 > self.num_print;
if skip {
self.lines -= chunk.lines as u64;
} else {
break chunk;
}
};
// Calculate the number of lines to skip in the current chunk. The calculated value must be
// in the range 0 to BUFFER_SIZE and is therefore safely convertible to a usize without
// losses.
let skip_lines = self.lines.saturating_sub(self.num_print) as usize;
let chunk = LinesChunk::from_chunk(&chunk, skip_lines);
self.chunks.push_front(Box::new(chunk));
Ok(())
}
pub fn print(&self, mut writer: impl Write) -> UResult<()> {
for chunk in &self.chunks {
chunk.print_bytes(&mut writer, 0)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::chunks::{BytesChunk, BUFFER_SIZE};
#[test]
fn test_bytes_chunk_from_when_offset_is_zero() {
let mut chunk = BytesChunk::new();
chunk.bytes = BUFFER_SIZE;
chunk.buffer[1] = 1;
let other = BytesChunk::from_chunk(&chunk, 0);
assert_eq!(other, chunk);
chunk.bytes = 2;
let other = BytesChunk::from_chunk(&chunk, 0);
assert_eq!(other, chunk);
chunk.bytes = 1;
let other = BytesChunk::from_chunk(&chunk, 0);
assert_eq!(other.buffer, [0; BUFFER_SIZE]);
assert_eq!(other.bytes, chunk.bytes);
chunk.bytes = BUFFER_SIZE;
let other = BytesChunk::from_chunk(&chunk, 2);
assert_eq!(other.buffer, [0; BUFFER_SIZE]);
assert_eq!(other.bytes, BUFFER_SIZE - 2);
}
#[test]
fn test_bytes_chunk_from_when_offset_is_not_zero() {
let mut chunk = BytesChunk::new();
chunk.bytes = BUFFER_SIZE;
chunk.buffer[1] = 1;
let other = BytesChunk::from_chunk(&chunk, 1);
let mut expected_buffer = [0; BUFFER_SIZE];
expected_buffer[0] = 1;
assert_eq!(other.buffer, expected_buffer);
assert_eq!(other.bytes, BUFFER_SIZE - 1);
let other = BytesChunk::from_chunk(&chunk, 2);
assert_eq!(other.buffer, [0; BUFFER_SIZE]);
assert_eq!(other.bytes, BUFFER_SIZE - 2);
}
#[test]
fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_1() {
let mut chunk = BytesChunk::new();
chunk.bytes = BUFFER_SIZE;
let new_chunk = BytesChunk::from_chunk(&chunk, BUFFER_SIZE + 1);
assert_eq!(0, new_chunk.bytes);
}
#[test]
fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_2() {
let mut chunk = BytesChunk::new();
chunk.bytes = 0;
let new_chunk = BytesChunk::from_chunk(&chunk, 1);
assert_eq!(0, new_chunk.bytes);
}
#[test]
fn test_bytes_chunk_from_when_offset_is_larger_than_chunk_size_3() {
let mut chunk = BytesChunk::new();
chunk.bytes = 1;
let new_chunk = BytesChunk::from_chunk(&chunk, 2);
assert_eq!(0, new_chunk.bytes);
}
#[test]
fn test_bytes_chunk_from_when_offset_is_equal_to_chunk_size() {
let mut chunk = BytesChunk::new();
chunk.buffer[0] = 1;
chunk.bytes = 1;
let new_chunk = BytesChunk::from_chunk(&chunk, 1);
assert_eq!(0, new_chunk.bytes);
}
}

View file

@ -7,7 +7,7 @@
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized
// spell-checker:ignore (ToDO) seekable seek'd tail'ing ringbuffer ringbuf unwatch Uncategorized filehandle
// spell-checker:ignore (libs) kqueue
// spell-checker:ignore (acronyms)
// spell-checker:ignore (env/flags)
@ -21,8 +21,9 @@ extern crate clap;
#[macro_use]
extern crate uucore;
extern crate core;
mod chunks;
pub mod chunks;
mod parse;
mod platform;
use crate::files::FileHandling;
@ -30,11 +31,11 @@ use chunks::ReverseChunks;
use clap::{Arg, Command, ValueSource};
use notify::{RecommendedWatcher, RecursiveMode, Watcher, WatcherKind};
use std::cmp::Ordering;
use std::collections::{HashMap, VecDeque};
use std::ffi::OsString;
use std::fmt;
use std::fs::{File, Metadata};
use std::io::{stdin, stdout, BufRead, BufReader, Read, Seek, SeekFrom, Write};
use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Read, Seek, SeekFrom, Write};
use std::path::{Path, PathBuf};
use std::sync::mpsc::{self, channel, Receiver};
use std::time::Duration;
@ -43,9 +44,7 @@ use uucore::error::{
get_exit_code, set_exit_code, FromIo, UError, UResult, USimpleError, UUsageError,
};
use uucore::format_usage;
use uucore::lines::lines;
use uucore::parse_size::{parse_size, ParseSizeError};
use uucore::ringbuffer::RingBuffer;
#[cfg(unix)]
use std::os::unix::fs::MetadataExt;
@ -1458,70 +1457,58 @@ fn bounded_tail(file: &mut File, settings: &Settings) {
std::io::copy(file, &mut stdout).unwrap();
}
/// An alternative to [`Iterator::skip`] with u64 instead of usize. This is
/// necessary because the usize limit doesn't make sense when iterating over
/// something that's not in memory. For example, a very large file. This allows
/// us to skip data larger than 4 GiB even on 32-bit platforms.
fn skip_u64(iter: &mut impl Iterator, num: u64) {
for _ in 0..num {
if iter.next().is_none() {
fn unbounded_tail<T: Read>(reader: &mut BufReader<T>, settings: &Settings) -> UResult<()> {
let stdout = stdout();
let mut writer = BufWriter::new(stdout.lock());
match (&settings.mode, settings.beginning) {
(FilterMode::Lines(count, sep), false) => {
let mut chunks = chunks::LinesChunkBuffer::new(*sep, *count);
chunks.fill(reader)?;
chunks.print(writer)?;
}
(FilterMode::Lines(count, sep), true) => {
let mut num_skip = (*count).max(1) - 1;
let mut chunk = chunks::LinesChunk::new(*sep);
while chunk.fill(reader)?.is_some() {
let lines = chunk.get_lines() as u64;
if lines < num_skip {
num_skip -= lines;
} else {
break;
}
}
if chunk.has_data() {
chunk.print_lines(&mut writer, num_skip as usize)?;
io::copy(reader, &mut writer)?;
}
}
(FilterMode::Bytes(count), false) => {
let mut chunks = chunks::BytesChunkBuffer::new(*count);
chunks.fill(reader)?;
chunks.print(writer)?;
}
(FilterMode::Bytes(count), true) => {
let mut num_skip = (*count).max(1) - 1;
let mut chunk = chunks::BytesChunk::new();
loop {
if let Some(bytes) = chunk.fill(reader)? {
let bytes: u64 = bytes as u64;
match bytes.cmp(&num_skip) {
Ordering::Less => num_skip -= bytes,
Ordering::Equal => {
break;
}
Ordering::Greater => {
writer.write_all(chunk.get_buffer_with(num_skip as usize))?;
break;
}
}
}
/// Collect the last elements of an iterator into a `VecDeque`.
///
/// This function returns a [`VecDeque`] containing either the last
/// `count` elements of `iter`, an [`Iterator`] over [`Result`]
/// instances, or all but the first `count` elements of `iter`. If
/// `beginning` is `true`, then all but the first `count` elements are
/// returned.
///
/// # Panics
///
/// If any element of `iter` is an [`Err`], then this function panics.
fn unbounded_tail_collect<T, E>(
mut iter: impl Iterator<Item = Result<T, E>>,
count: u64,
beginning: bool,
) -> UResult<VecDeque<T>>
where
E: fmt::Debug,
{
if beginning {
// GNU `tail` seems to index bytes and lines starting at 1, not
// at 0. It seems to treat `+0` and `+1` as the same thing.
let i = count.max(1) - 1;
skip_u64(&mut iter, i);
Ok(iter.map(|r| r.unwrap()).collect())
} else {
let count: usize = count
.try_into()
.map_err(|_| USimpleError::new(1, "Insufficient addressable memory"))?;
Ok(RingBuffer::from_iter(iter.map(|r| r.unwrap()), count).data)
return Ok(());
}
}
}
fn unbounded_tail<T: Read>(reader: &mut BufReader<T>, settings: &Settings) -> UResult<()> {
// Read through each line/char and store them in a ringbuffer that always
// contains count lines/chars. When reaching the end of file, output the
// data in the ringbuf.
match settings.mode {
FilterMode::Lines(count, sep) => {
let mut stdout = stdout();
for line in unbounded_tail_collect(lines(reader, sep), count, settings.beginning)? {
stdout
.write_all(&line)
.map_err_context(|| String::from("IO error"))?;
}
}
FilterMode::Bytes(count) => {
for byte in unbounded_tail_collect(reader.bytes(), count, settings.beginning)? {
if let Err(err) = stdout().write(&[byte]) {
return Err(USimpleError::new(1, err.to_string()));
}
}
io::copy(reader, &mut writer)?;
}
}
Ok(())

View file

@ -3,7 +3,7 @@
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile file siette ocho nueve diez
// spell-checker:ignore (ToDO) abcdefghijklmnopqrstuvwxyz efghijklmnopqrstuvwxyz vwxyz emptyfile file siette ocho nueve diez MULT
// spell-checker:ignore (libs) kqueue
// spell-checker:ignore (jargon) tailable untailable
@ -1090,18 +1090,6 @@ fn test_invalid_num() {
.fails()
.stderr_str()
.starts_with("tail: invalid number of lines: '1Y': Value too large for defined data type");
#[cfg(target_pointer_width = "32")]
{
let sizes = ["1000G", "10T"];
for size in &sizes {
new_ucmd!()
.args(&["-c", size])
.fails()
.code_is(1)
.stderr_str()
.starts_with("tail: Insufficient addressable memory");
}
}
new_ucmd!()
.args(&["-c", ""])
.fails()
@ -2484,6 +2472,725 @@ fn test_illegal_seek() {
assert_eq!(p.wait().unwrap().code().unwrap(), 1);
}
#[cfg(all(not(target_os = "android"), not(target_os = "windows")))] // FIXME: See https://github.com/uutils/coreutils/issues/3881
mod pipe_tests {
use super::*;
use crate::common::random::*;
use rand::distributions::Alphanumeric;
use tail::chunks::BUFFER_SIZE as CHUNK_BUFFER_SIZE;
#[test]
fn test_pipe_when_lines_option_value_is_higher_than_contained_lines() {
let test_string = "a\nb\n";
new_ucmd!()
.args(&["-n", "3"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(test_string);
new_ucmd!()
.args(&["-n", "4"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(test_string);
new_ucmd!()
.args(&["-n", "999"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(test_string);
new_ucmd!()
.args(&["-n", "+3"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
new_ucmd!()
.args(&["-n", "+4"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
new_ucmd!()
.args(&["-n", "+999"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
}
#[test]
fn test_pipe_when_negative_lines_option_given_no_newline_at_eof() {
let test_string = "a\nb";
new_ucmd!()
.args(&["-n", "0"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
new_ucmd!()
.args(&["-n", "1"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("b");
new_ucmd!()
.args(&["-n", "2"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("a\nb");
}
#[test]
fn test_pipe_when_positive_lines_option_given_no_newline_at_eof() {
let test_string = "a\nb";
new_ucmd!()
.args(&["-n", "+0"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("a\nb");
new_ucmd!()
.args(&["-n", "+1"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("a\nb");
new_ucmd!()
.args(&["-n", "+2"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("b");
}
#[test]
fn test_pipe_when_lines_option_given_multibyte_utf8_characters() {
// the test string consists of from left to right a 4-byte,3-byte,2-byte,1-byte utf-8 character
let test_string = "𝅘𝅥𝅮\n\nƒ\na";
new_ucmd!()
.args(&["-n", "+0"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(test_string);
new_ucmd!()
.args(&["-n", "+2"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("\nƒ\na");
new_ucmd!()
.args(&["-n", "+3"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("ƒ\na");
new_ucmd!()
.args(&["-n", "+4"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("a");
new_ucmd!()
.args(&["-n", "+5"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
new_ucmd!()
.args(&["-n", "-4"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(test_string);
new_ucmd!()
.args(&["-n", "-3"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("\nƒ\na");
new_ucmd!()
.args(&["-n", "-2"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("ƒ\na");
new_ucmd!()
.args(&["-n", "-1"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("a");
new_ucmd!()
.args(&["-n", "-0"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
}
#[test]
fn test_pipe_when_lines_option_given_input_size_is_equal_to_buffer_size_no_newline_at_eof() {
let total_lines = 1;
let random_string = RandomString::generate_with_delimiter(
Alphanumeric,
b'\n',
total_lines,
false,
CHUNK_BUFFER_SIZE,
);
let random_string = random_string.as_str();
let lines = random_string.split_inclusive('\n');
let expected = lines.clone().skip(1).collect::<String>();
new_ucmd!()
.args(&["-n", "+2"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(expected);
let expected = lines.clone().skip(1).collect::<String>();
new_ucmd!()
.args(&["-n", "-1"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(expected);
}
#[test]
fn test_pipe_when_lines_option_given_input_size_is_equal_to_buffer_size() {
let total_lines = 100;
let random_string = RandomString::generate_with_delimiter(
Alphanumeric,
b'\n',
total_lines,
true,
CHUNK_BUFFER_SIZE,
);
let random_string = random_string.as_str();
let lines = random_string.split_inclusive('\n');
new_ucmd!()
.args(&["-n", "+0"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(random_string);
let expected = lines.clone().skip(1).collect::<String>();
new_ucmd!()
.args(&["-n", "+2"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(expected);
new_ucmd!()
.args(&["-n", "-0"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
let expected = lines.clone().skip(total_lines - 1).collect::<String>();
new_ucmd!()
.args(&["-n", "-1"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(expected);
let expected = lines.clone().skip(1).collect::<String>();
new_ucmd!()
.args(&["-n", "-99"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(expected);
new_ucmd!()
.args(&["-n", "-100"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(random_string);
}
#[test]
fn test_pipe_when_lines_option_given_input_size_is_one_byte_greater_than_buffer_size() {
let total_lines = 100;
let random_string = RandomString::generate_with_delimiter(
Alphanumeric,
b'\n',
total_lines,
true,
CHUNK_BUFFER_SIZE + 1,
);
let random_string = random_string.as_str();
let lines = random_string.split_inclusive('\n');
new_ucmd!()
.args(&["-n", "+0"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(random_string);
let expected = lines.clone().skip(total_lines - 1).collect::<String>();
new_ucmd!()
.args(&["-n", "-1"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(expected);
let expected = lines.clone().skip(1).collect::<String>();
new_ucmd!()
.args(&["-n", "+2"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(expected);
let expected = lines.clone().skip(1).collect::<String>();
new_ucmd!()
.args(&["-n", "-99"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(expected);
}
#[test]
fn test_pipe_when_lines_option_given_input_size_has_multiple_size_of_buffer_size() {
let total_lines = 100;
let random_string = RandomString::generate_with_delimiter(
Alphanumeric,
b'\n',
total_lines,
true,
CHUNK_BUFFER_SIZE * 3 + 1,
);
let random_string = random_string.as_str();
let lines = random_string.split_inclusive('\n');
new_ucmd!()
.args(&["-n", "+0"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(random_string);
let expected = lines.clone().skip(1).collect::<String>();
new_ucmd!()
.args(&["-n", "+2"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(expected);
new_ucmd!()
.args(&["-n", "-0"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
let expected = lines.clone().skip(total_lines - 1).collect::<String>();
new_ucmd!()
.args(&["-n", "-1"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(expected);
let expected = lines.clone().skip(1).collect::<String>();
new_ucmd!()
.args(&["-n", "-99"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(expected);
new_ucmd!()
.args(&["-n", "-100"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(random_string);
}
#[test]
fn test_pipe_when_bytes_option_value_is_higher_than_contained_bytes() {
let test_string = "a\nb";
new_ucmd!()
.args(&["-c", "4"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(test_string);
new_ucmd!()
.args(&["-c", "5"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(test_string);
new_ucmd!()
.args(&["-c", "999"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(test_string);
new_ucmd!()
.args(&["-c", "+4"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
new_ucmd!()
.args(&["-c", "+5"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
new_ucmd!()
.args(&["-c", "+999"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
}
#[test]
fn test_pipe_when_bytes_option_given_multibyte_utf8_characters() {
// the test string consists of from left to right a 4-byte,3-byte,2-byte,1-byte utf-8 character
let test_string = "𝅘𝅥𝅮⏻ƒa";
new_ucmd!()
.args(&["-c", "+0"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(test_string);
new_ucmd!()
.args(&["-c", "+2"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(&test_string.as_bytes()[1..]);
new_ucmd!()
.args(&["-c", "+5"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("⏻ƒa");
new_ucmd!()
.args(&["-c", "+8"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("ƒa");
new_ucmd!()
.args(&["-c", "+10"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("a");
new_ucmd!()
.args(&["-c", "+11"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
new_ucmd!()
.args(&["-c", "-1"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("a");
new_ucmd!()
.args(&["-c", "-2"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(&"ƒa".as_bytes()[1..]);
new_ucmd!()
.args(&["-c", "-3"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("ƒa");
new_ucmd!()
.args(&["-c", "-6"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only("⏻ƒa");
new_ucmd!()
.args(&["-c", "-10"])
.pipe_in(test_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(test_string);
}
#[test]
fn test_pipe_when_bytes_option_given_input_size_is_equal_to_buffer_size() {
let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE);
let random_string = random_string.as_str();
new_ucmd!()
.args(&["-c", "+0"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(random_string);
let expected = &random_string.as_bytes()[1..];
new_ucmd!()
.args(&["-c", "+2"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
new_ucmd!()
.args(&["-c", "-0"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
let expected = &random_string.as_bytes()[1..];
new_ucmd!()
.args(&["-c", "-8191"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
new_ucmd!()
.args(&["-c", "-8192"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(random_string);
new_ucmd!()
.args(&["-c", "-8193"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(random_string);
let expected = &random_string.as_bytes()[CHUNK_BUFFER_SIZE - 1..];
new_ucmd!()
.args(&["-c", "-1"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
}
#[test]
fn test_pipe_when_bytes_option_given_input_size_is_one_byte_greater_than_buffer_size() {
let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE + 1);
let random_string = random_string.as_str();
new_ucmd!()
.args(&["-c", "+0"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(random_string);
let expected = &random_string.as_bytes()[1..];
new_ucmd!()
.args(&["-c", "+2"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
new_ucmd!()
.args(&["-c", "-0"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
let expected = &random_string.as_bytes()[CHUNK_BUFFER_SIZE..];
new_ucmd!()
.args(&["-c", "-1"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
let expected = &random_string.as_bytes()[1..];
new_ucmd!()
.args(&["-c", "-8192"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
new_ucmd!()
.args(&["-c", "-8193"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(random_string);
}
#[test]
fn test_pipe_when_bytes_option_given_input_size_has_multiple_size_of_buffer_size() {
let random_string = RandomString::generate(AlphanumericNewline, CHUNK_BUFFER_SIZE * 3);
let random_string = random_string.as_str();
new_ucmd!()
.args(&["-c", "+0"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(random_string);
new_ucmd!()
.args(&["-c", "-0"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.no_stdout()
.no_stderr();
let expected = &random_string.as_bytes()[8192..];
new_ucmd!()
.args(&["-c", "+8193"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
let expected = &random_string.as_bytes()[8193..];
new_ucmd!()
.args(&["-c", "+8194"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
let expected = &random_string.as_bytes()[16384..];
new_ucmd!()
.args(&["-c", "+16385"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
let expected = &random_string.as_bytes()[16385..];
new_ucmd!()
.args(&["-c", "+16386"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
let expected = &random_string.as_bytes()[16384..];
new_ucmd!()
.args(&["-c", "-8192"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
let expected = &random_string.as_bytes()[16383..];
new_ucmd!()
.args(&["-c", "-8193"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
let expected = &random_string.as_bytes()[8192..];
new_ucmd!()
.args(&["-c", "-16384"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
let expected = &random_string.as_bytes()[8191..];
new_ucmd!()
.args(&["-c", "-16385"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only_bytes(expected);
new_ucmd!()
.args(&["-c", "-24576"])
.pipe_in(random_string)
.ignore_stdin_write_error()
.succeeds()
.stdout_only(random_string);
}
}
#[test]
fn test_seek_bytes_backward_outside_file() {
new_ucmd!()

View file

@ -1,3 +1,4 @@
#[macro_use]
pub mod macros;
pub mod random;
pub mod util;

314
tests/common/random.rs Normal file
View file

@ -0,0 +1,314 @@
// * This file is part of the uutils coreutils package.
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
use rand::distributions::{Distribution, Uniform};
use rand::{thread_rng, Rng};
/// Samples alphanumeric characters `[A-Za-z0-9]` including newline `\n`
///
/// # Examples
///
/// ```rust,ignore
/// use rand::{Rng, thread_rng};
///
/// let vec = thread_rng()
/// .sample_iter(AlphanumericNewline)
/// .take(10)
/// .collect::<Vec<u8>>();
/// println!("Random chars: {}", String::from_utf8(vec).unwrap());
/// ```
#[derive(Clone, Copy, Debug)]
pub struct AlphanumericNewline;
impl AlphanumericNewline {
/// The charset to act upon
const CHARSET: &'static [u8] =
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\n";
/// Generate a random byte from [`Self::CHARSET`] and return it as `u8`.
///
/// # Arguments
///
/// * `rng`: A [`rand::Rng`]
///
/// returns: u8
fn random<R>(rng: &mut R) -> u8
where
R: Rng + ?Sized,
{
let idx = rng.gen_range(0..Self::CHARSET.len());
Self::CHARSET[idx]
}
}
impl Distribution<u8> for AlphanumericNewline {
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u8 {
Self::random(rng)
}
}
/// Generate a random string from a [`Distribution`]
///
/// # Examples
///
/// ```rust,ignore
/// use crate::common::random::{AlphanumericNewline, RandomString};
/// use rand::distributions::Alphanumeric;
///
/// // generates a 100 byte string with characters from AlphanumericNewline
/// let random_string = RandomString::generate(&AlphanumericNewline, 100);
/// assert_eq!(100, random_string.len());
///
/// // generates a 100 byte string with 10 newline characters not ending with a newline
/// let string = RandomString::generate_with_delimiter(&Alphanumeric, b'\n', 10, false, 100);
/// assert_eq!(100, random_string.len());
/// ```
pub struct RandomString;
impl RandomString {
/// Generate a random string from the given [`Distribution`] with the given `length` in bytes.
///
/// # Arguments
///
/// * `dist`: A u8 [`Distribution`]
/// * `length`: the length of the resulting string in bytes
///
/// returns: String
pub fn generate<D>(dist: D, length: usize) -> String
where
D: Distribution<u8>,
{
thread_rng()
.sample_iter(dist)
.take(length)
.map(|b| b as char)
.collect()
}
/// Generate a random string from the [`Distribution`] with the given `length` in bytes. The
/// function takes a `delimiter`, which is randomly distributed in the string, such that exactly
/// `num_delimiter` amount of `delimiter`s occur. If `end_with_delimiter` is set, then the
/// string ends with the delimiter, else the string does not end with the delimiter.
///
/// # Arguments
///
/// * `dist`: A `u8` [`Distribution`]
/// * `delimiter`: A `u8` delimiter, which does not need to be included in the `Distribution`
/// * `num_delimiter`: The number of `delimiter`s contained in the resulting string
/// * `end_with_delimiter`: If the string shall end with the given delimiter
/// * `length`: the length of the resulting string in bytes
///
/// returns: String
///
/// # Examples
///
/// ```rust,ignore
/// use crate::common::random::{AlphanumericNewline, RandomString};
///
/// // generates a 100 byte string with 10 '\0' byte characters not ending with a '\0' byte
/// let string = RandomString::generate_with_delimiter(&AlphanumericNewline, 0, 10, false, 100);
/// assert_eq!(100, random_string.len());
/// assert_eq!(
/// 10,
/// random_string.as_bytes().iter().filter(|p| **p == 0).count()
/// );
/// assert!(!random_string.as_bytes().ends_with(&[0]));
/// ```
pub fn generate_with_delimiter<D>(
dist: D,
delimiter: u8,
num_delimiter: usize,
end_with_delimiter: bool,
length: usize,
) -> String
where
D: Distribution<u8>,
{
if length == 0 {
return String::from("");
} else if length == 1 {
return if num_delimiter > 0 {
String::from(delimiter as char)
} else {
String::from(thread_rng().sample(&dist) as char)
};
}
let samples = length - 1;
let mut result: Vec<u8> = thread_rng().sample_iter(&dist).take(samples).collect();
if num_delimiter == 0 {
result.push(thread_rng().sample(&dist));
return String::from_utf8(result).unwrap();
}
let num_delimiter = if end_with_delimiter {
num_delimiter - 1
} else {
num_delimiter
};
let between = Uniform::new(0, samples);
for _ in 0..num_delimiter {
let mut pos = between.sample(&mut thread_rng());
let turn = pos;
while result[pos] == delimiter {
pos += 1;
if pos >= samples {
pos = 0;
}
if pos == turn {
break;
}
}
result[pos] = delimiter;
}
if end_with_delimiter {
result.push(delimiter);
} else {
result.push(thread_rng().sample(&dist));
}
String::from_utf8(result).unwrap()
}
}
#[cfg(test)]
mod tests {
use super::*;
use rand::distributions::Alphanumeric;
#[test]
fn test_random_string_generate() {
let random_string = RandomString::generate(&AlphanumericNewline, 0);
assert_eq!(0, random_string.len());
let random_string = RandomString::generate(&AlphanumericNewline, 1);
assert_eq!(1, random_string.len());
let random_string = RandomString::generate(&AlphanumericNewline, 100);
assert_eq!(100, random_string.len());
}
#[test]
fn test_random_string_generate_with_delimiter_when_length_is_zero() {
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, false, 0);
assert_eq!(0, random_string.len());
}
#[test]
fn test_random_string_generate_with_delimiter_when_num_delimiter_is_greater_than_length() {
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, false, 1);
assert_eq!(1, random_string.len());
assert!(random_string.as_bytes().contains(&0));
assert!(random_string.as_bytes().ends_with(&[0]));
}
#[test]
fn test_random_string_generate_with_delimiter_should_end_with_delimiter() {
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 1);
assert_eq!(1, random_string.len());
assert_eq!(
1,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
assert!(random_string.as_bytes().ends_with(&[0]));
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 1);
assert_eq!(1, random_string.len());
assert_eq!(
1,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
assert!(random_string.as_bytes().ends_with(&[0]));
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 2);
assert_eq!(2, random_string.len());
assert_eq!(
1,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
assert!(random_string.as_bytes().ends_with(&[0]));
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, true, 2);
assert_eq!(2, random_string.len());
assert_eq!(
2,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
assert!(random_string.as_bytes().ends_with(&[0]));
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, true, 3);
assert_eq!(3, random_string.len());
assert_eq!(
1,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
assert!(random_string.as_bytes().ends_with(&[0]));
}
#[test]
fn test_random_string_generate_with_delimiter_should_not_end_with_delimiter() {
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, false, 1);
assert_eq!(1, random_string.len());
assert_eq!(
0,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 0, true, 1);
assert_eq!(1, random_string.len());
assert_eq!(
0,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 2);
assert_eq!(2, random_string.len());
assert_eq!(
1,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
assert!(!random_string.as_bytes().ends_with(&[0]));
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 1, false, 3);
assert_eq!(3, random_string.len());
assert_eq!(
1,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
assert!(!random_string.as_bytes().ends_with(&[0]));
let random_string = RandomString::generate_with_delimiter(&Alphanumeric, 0, 2, false, 3);
assert_eq!(3, random_string.len());
assert_eq!(
2,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
assert!(!random_string.as_bytes().ends_with(&[0]));
}
#[test]
fn test_generate_with_delimiter_with_greater_length() {
let random_string =
RandomString::generate_with_delimiter(&Alphanumeric, 0, 100, false, 1000);
assert_eq!(1000, random_string.len());
assert_eq!(
100,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
assert!(!random_string.as_bytes().ends_with(&[0]));
let random_string =
RandomString::generate_with_delimiter(&Alphanumeric, 0, 100, true, 1000);
assert_eq!(1000, random_string.len());
assert_eq!(
100,
random_string.as_bytes().iter().filter(|p| **p == 0).count()
);
assert!(random_string.as_bytes().ends_with(&[0]));
}
}