1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-08-05 23:47:46 +00:00

head: rework handling of non-seekable files

Fix issue #7372
Rework logic for handling all-but-last-lines and all-but-last-bytes
for non-seekable files. Changes give large performance improvement.
This commit is contained in:
Karl McDowall 2025-02-21 10:53:56 -07:00
parent 62025d862f
commit 29875312a1
2 changed files with 589 additions and 115 deletions

View file

@ -17,7 +17,6 @@ use thiserror::Error;
use uucore::display::Quotable;
use uucore::error::{FromIo, UError, UResult};
use uucore::line_ending::LineEnding;
use uucore::lines::lines;
use uucore::{format_usage, help_about, help_usage, show};
const BUF_SIZE: usize = 65536;
@ -37,7 +36,8 @@ mod options {
mod parse;
mod take;
use take::take_all_but;
use take::copy_all_but_n_bytes;
use take::copy_all_but_n_lines;
use take::take_lines;
#[derive(Error, Debug)]
@ -274,14 +274,16 @@ fn read_n_lines(input: &mut impl std::io::BufRead, n: u64, separator: u8) -> std
let mut reader = take_lines(input, n, separator);
// Write those bytes to `stdout`.
let mut stdout = std::io::stdout();
let stdout = std::io::stdout();
let stdout = stdout.lock();
let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout);
let bytes_written = io::copy(&mut reader, &mut stdout).map_err(wrap_in_stdout_error)?;
let bytes_written = io::copy(&mut reader, &mut writer).map_err(wrap_in_stdout_error)?;
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
stdout.flush().map_err(wrap_in_stdout_error)?;
writer.flush().map_err(wrap_in_stdout_error)?;
Ok(bytes_written)
}
@ -296,43 +298,37 @@ fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize>
}
}
fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result<u64> {
let mut bytes_written = 0;
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
let stdout = std::io::stdout();
let stdout = stdout.lock();
// Even though stdout is buffered, it will flush on each newline in the
// input stream. This can be costly, so add an extra layer of buffering
// over the top. This gives a significant speedup (approx 4x).
let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout);
for byte in take_all_but(input.bytes(), n) {
writer.write_all(&[byte?]).map_err(wrap_in_stdout_error)?;
bytes_written += 1;
}
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
writer.flush().map_err(wrap_in_stdout_error)?;
}
Ok(bytes_written)
}
fn read_but_last_n_lines(
input: impl std::io::BufRead,
n: u64,
separator: u8,
) -> std::io::Result<u64> {
fn read_but_last_n_bytes(mut input: impl Read, n: u64) -> std::io::Result<u64> {
let mut bytes_written: u64 = 0;
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
for bytes in take_all_but(lines(input, separator), n) {
let bytes = bytes?;
bytes_written += u64::try_from(bytes.len()).unwrap();
bytes_written = copy_all_but_n_bytes(&mut input, &mut stdout, n)
.map_err(wrap_in_stdout_error)?
.try_into()
.unwrap();
stdout.write_all(&bytes).map_err(wrap_in_stdout_error)?;
}
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
stdout.flush().map_err(wrap_in_stdout_error)?;
}
Ok(bytes_written)
}
fn read_but_last_n_lines(mut input: impl Read, n: u64, separator: u8) -> std::io::Result<u64> {
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
if n == 0 {
return io::copy(&mut input, &mut stdout).map_err(wrap_in_stdout_error);
}
let mut bytes_written: u64 = 0;
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
bytes_written = copy_all_but_n_lines(input, &mut stdout, n, separator)
.map_err(wrap_in_stdout_error)?
.try_into()
.unwrap();
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
@ -434,10 +430,9 @@ fn head_backwards_without_seek_file(
input: &mut std::fs::File,
options: &HeadOptions,
) -> std::io::Result<u64> {
let reader = std::io::BufReader::with_capacity(BUF_SIZE, &*input);
match options.mode {
Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n),
Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into()),
Mode::AllButLastBytes(n) => read_but_last_n_bytes(input, n),
Mode::AllButLastLines(n) => read_but_last_n_lines(input, n, options.line_ending.into()),
_ => unreachable!(),
}
}
@ -452,18 +447,12 @@ fn head_backwards_on_seekable_file(
if n >= size {
Ok(0)
} else {
read_n_bytes(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
size - n,
)
read_n_bytes(input, size - n)
}
}
Mode::AllButLastLines(n) => {
let found = find_nth_line_from_end(input, n, options.line_ending.into())?;
read_n_bytes(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
found,
)
read_n_bytes(input, found)
}
_ => unreachable!(),
}
@ -471,9 +460,7 @@ fn head_backwards_on_seekable_file(
fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<u64> {
match options.mode {
Mode::FirstBytes(n) => {
read_n_bytes(&mut std::io::BufReader::with_capacity(BUF_SIZE, input), n)
}
Mode::FirstBytes(n) => read_n_bytes(input, n),
Mode::FirstLines(n) => read_n_lines(
&mut std::io::BufReader::with_capacity(BUF_SIZE, input),
n,

View file

@ -3,67 +3,308 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//! Take all but the last elements of an iterator.
use std::io::Read;
use memchr::memchr_iter;
use std::collections::VecDeque;
use std::io::{ErrorKind, Read, Write};
use uucore::ringbuffer::RingBuffer;
const BUF_SIZE: usize = 65536;
/// Create an iterator over all but the last `n` elements of `iter`.
///
/// # Examples
///
/// ```rust,ignore
/// let data = [1, 2, 3, 4, 5];
/// let n = 2;
/// let mut iter = take_all_but(data.iter(), n);
/// assert_eq!(Some(4), iter.next());
/// assert_eq!(Some(5), iter.next());
/// assert_eq!(None, iter.next());
/// ```
pub fn take_all_but<I: Iterator>(iter: I, n: usize) -> TakeAllBut<I> {
TakeAllBut::new(iter, n)
struct TakeAllBuffer {
buffer: Vec<u8>,
start_index: usize,
}
/// An iterator that only iterates over the last elements of another iterator.
pub struct TakeAllBut<I: Iterator> {
iter: I,
buf: RingBuffer<<I as Iterator>::Item>,
impl TakeAllBuffer {
fn new() -> Self {
TakeAllBuffer {
buffer: vec![],
start_index: 0,
}
}
fn fill_buffer(&mut self, reader: &mut impl Read) -> std::io::Result<usize> {
self.buffer.resize(BUF_SIZE, 0);
self.start_index = 0;
loop {
match reader.read(&mut self.buffer[..]) {
Ok(n) => {
self.buffer.truncate(n);
return Ok(n);
}
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
}
}
}
fn write_bytes_exact(&mut self, writer: &mut impl Write, bytes: usize) -> std::io::Result<()> {
let buffer_to_write = &self.remaining_buffer()[..bytes];
writer.write_all(buffer_to_write)?;
self.start_index += bytes;
assert!(self.start_index <= self.buffer.len());
Ok(())
}
fn write_all(&mut self, writer: &mut impl Write) -> std::io::Result<usize> {
let remaining_bytes = self.remaining_bytes();
self.write_bytes_exact(writer, remaining_bytes)?;
Ok(remaining_bytes)
}
fn write_bytes_limit(
&mut self,
writer: &mut impl Write,
max_bytes: usize,
) -> std::io::Result<usize> {
let bytes_to_write = self.remaining_bytes().min(max_bytes);
self.write_bytes_exact(writer, bytes_to_write)?;
Ok(bytes_to_write)
}
fn remaining_buffer(&self) -> &[u8] {
&self.buffer[self.start_index..]
}
fn remaining_bytes(&self) -> usize {
self.remaining_buffer().len()
}
fn is_empty(&self) -> bool {
assert!(self.start_index <= self.buffer.len());
self.start_index == self.buffer.len()
}
}
impl<I: Iterator> TakeAllBut<I> {
pub fn new(mut iter: I, n: usize) -> Self {
// Create a new ring buffer and fill it up.
//
// If there are fewer than `n` elements in `iter`, then we
// exhaust the iterator so that whenever `TakeAllBut::next()` is
// called, it will return `None`, as expected.
let mut buf = RingBuffer::new(n);
for _ in 0..n {
let value = match iter.next() {
None => {
/// Function to copy all but `n` bytes from the reader to the writer.
///
/// If `n` exceeds the number of bytes in the input file then nothing is copied.
/// If no errors are encountered then the function returns the number of bytes
/// copied.
///
/// Algorithm for this function is as follows...
/// 1 - Chunks of the input file are read into a queue of TakeAllBuffer instances.
/// Chunks are read until at least we have enough data to write out the entire contents of the
/// first TakeAllBuffer in the queue whilst still retaining at least `n` bytes in the queue.
/// If we hit EoF at any point, stop reading.
/// 2 - Asses whether we managed to queue up greater-than `n` bytes. If not, we must be done, in
/// which case break and return.
/// 3 - Write either the full first buffer of data, or just enough bytes to get back down to having
/// the required `n` bytes of data queued.
/// 4 - Go back to (1).
pub fn copy_all_but_n_bytes(
reader: &mut impl Read,
writer: &mut impl Write,
n: usize,
) -> std::io::Result<usize> {
let mut buffers: VecDeque<TakeAllBuffer> = VecDeque::new();
let mut empty_buffer_pool: Vec<TakeAllBuffer> = vec![];
let mut buffered_bytes: usize = 0;
let mut total_bytes_copied = 0;
loop {
loop {
// Try to buffer at least enough to write the entire first buffer.
let front_buffer = buffers.front();
if let Some(front_buffer) = front_buffer {
if buffered_bytes >= n + front_buffer.remaining_bytes() {
break;
}
Some(x) => x,
};
buf.push_back(value);
}
let mut new_buffer = empty_buffer_pool.pop().unwrap_or_else(TakeAllBuffer::new);
let filled_bytes = new_buffer.fill_buffer(reader)?;
if filled_bytes == 0 {
// filled_bytes==0 => Eof
break;
}
buffers.push_back(new_buffer);
buffered_bytes += filled_bytes;
}
Self { iter, buf }
// If we've got <=n bytes buffered here we have nothing left to do.
if buffered_bytes <= n {
break;
}
let excess_buffered_bytes = buffered_bytes - n;
// Since we have some data buffered, can assume we have >=1 buffer - i.e. safe to unwrap.
let front_buffer = buffers.front_mut().unwrap();
let bytes_written = front_buffer.write_bytes_limit(writer, excess_buffered_bytes)?;
buffered_bytes -= bytes_written;
total_bytes_copied += bytes_written;
// If the front buffer is empty (which it probably is), push it into the empty-buffer-pool.
if front_buffer.is_empty() {
empty_buffer_pool.push(buffers.pop_front().unwrap());
}
}
Ok(total_bytes_copied)
}
struct TakeAllLinesBuffer {
inner: TakeAllBuffer,
terminated_lines: usize,
partial_line: bool,
}
struct BytesAndLines {
bytes: usize,
terminated_lines: usize,
}
impl TakeAllLinesBuffer {
fn new() -> Self {
TakeAllLinesBuffer {
inner: TakeAllBuffer::new(),
terminated_lines: 0,
partial_line: false,
}
}
fn fill_buffer(
&mut self,
reader: &mut impl Read,
separator: u8,
) -> std::io::Result<BytesAndLines> {
let bytes_read = self.inner.fill_buffer(reader)?;
// Count the number of lines...
self.terminated_lines = memchr_iter(separator, self.inner.remaining_buffer()).count();
if let Some(last_char) = self.inner.remaining_buffer().last() {
if *last_char != separator {
self.partial_line = true;
}
}
Ok(BytesAndLines {
bytes: bytes_read,
terminated_lines: self.terminated_lines,
})
}
fn write_lines(
&mut self,
writer: &mut impl Write,
max_lines: usize,
separator: u8,
) -> std::io::Result<BytesAndLines> {
assert!(max_lines > 0, "Must request at least 1 line.");
let ret;
if max_lines > self.terminated_lines {
ret = BytesAndLines {
bytes: self.inner.write_all(writer)?,
terminated_lines: self.terminated_lines,
};
self.terminated_lines = 0;
} else {
let index = memchr_iter(separator, self.inner.remaining_buffer()).nth(max_lines - 1);
assert!(
index.is_some(),
"Somehow we're being asked to write more lines than we have, that's a bug in copy_all_but_lines."
);
let index = index.unwrap();
// index is the offset of the separator character, zero indexed. Need to add 1 to get the number
// of bytes to write.
let bytes_to_write = index + 1;
self.inner.write_bytes_exact(writer, bytes_to_write)?;
ret = BytesAndLines {
bytes: bytes_to_write,
terminated_lines: max_lines,
};
self.terminated_lines -= max_lines;
}
Ok(ret)
}
fn is_empty(&self) -> bool {
self.inner.is_empty()
}
fn terminated_lines(&self) -> usize {
self.terminated_lines
}
fn partial_line(&self) -> bool {
self.partial_line
}
}
impl<I: Iterator> Iterator for TakeAllBut<I>
where
I: Iterator,
{
type Item = <I as Iterator>::Item;
/// Function to copy all but `n` lines from the reader to the writer.
///
/// Lines are inferred from the `separator` value passed in by the client.
/// If `n` exceeds the number of lines in the input file then nothing is copied.
/// The last line in the file is not required to end with a `separator` character.
/// If no errors are encountered then they function returns the number of bytes
/// copied.
///
/// Algorithm for this function is as follows...
/// 1 - Chunks of the input file are read into a queue of TakeAllLinesBuffer instances.
/// Chunks are read until at least we have enough lines that we can write out the entire
/// contents of the first TakeAllLinesBuffer in the queue whilst still retaining at least
/// `n` lines in the queue.
/// If we hit EoF at any point, stop reading.
/// 2 - Asses whether we managed to queue up greater-than `n` lines. If not, we must be done, in
/// which case break and return.
/// 3 - Write either the full first buffer of data, or just enough lines to get back down to
/// having the required `n` lines of data queued.
/// 4 - Go back to (1).
///
/// Note that lines will regularly straddle multiple TakeAllLinesBuffer instances. The partial_line
/// flag on TakeAllLinesBuffer tracks this, and we use that to ensure that we write out enough
/// lines in the case that the input file doesn't end with a `separator` character.
pub fn copy_all_but_n_lines<R: Read, W: Write>(
mut reader: R,
writer: &mut W,
n: usize,
separator: u8,
) -> std::io::Result<usize> {
// This function requires `n` > 0. Assert it!
assert!(n > 0);
let mut buffers: VecDeque<TakeAllLinesBuffer> = VecDeque::new();
let mut buffered_terminated_lines: usize = 0;
let mut empty_buffers = vec![];
let mut total_bytes_copied = 0;
loop {
// Try to buffer enough such that we can write out the entire first buffer.
loop {
// First check if we have enough lines buffered that we can write out the entire
// front buffer. If so, break.
let front_buffer = buffers.front();
if let Some(front_buffer) = front_buffer {
if buffered_terminated_lines > n + front_buffer.terminated_lines() {
break;
}
}
// Else we need to try to buffer more data...
let mut new_buffer = empty_buffers.pop().unwrap_or_else(TakeAllLinesBuffer::new);
let fill_result = new_buffer.fill_buffer(&mut reader, separator)?;
if fill_result.bytes == 0 {
// fill_result.bytes == 0 => EoF.
break;
}
buffered_terminated_lines += fill_result.terminated_lines;
buffers.push_back(new_buffer);
}
fn next(&mut self) -> Option<<I as Iterator>::Item> {
match self.iter.next() {
Some(value) => self.buf.push_back(value),
None => None,
// If we've not buffered more lines than we need to hold back we must be done.
if buffered_terminated_lines < n
|| (buffered_terminated_lines == n && !buffers.back().unwrap().partial_line())
{
break;
}
let excess_buffered_terminated_lines = buffered_terminated_lines - n;
// Since we have some data buffered can assume we have at least 1 buffer, so safe to unwrap.
let lines_to_write = if buffers.back().unwrap().partial_line() {
excess_buffered_terminated_lines + 1
} else {
excess_buffered_terminated_lines
};
let front_buffer = buffers.front_mut().unwrap();
let write_result = front_buffer.write_lines(writer, lines_to_write, separator)?;
buffered_terminated_lines -= write_result.terminated_lines;
total_bytes_copied += write_result.bytes;
// If the front buffer is empty (which it probably is), push it into the empty-buffer-pool.
if front_buffer.is_empty() {
empty_buffers.push(buffers.pop_front().unwrap());
}
}
Ok(total_bytes_copied)
}
/// Like `std::io::Take`, but for lines instead of bytes.
@ -118,38 +359,284 @@ pub fn take_lines<R>(reader: R, limit: u64, separator: u8) -> TakeLines<R> {
#[cfg(test)]
mod tests {
use std::io::BufRead;
use std::io::BufReader;
use std::io::{BufRead, BufReader};
use crate::take::take_all_but;
use crate::take::take_lines;
use crate::take::{
copy_all_but_n_bytes, copy_all_but_n_lines, take_lines, TakeAllBuffer, TakeAllLinesBuffer,
};
#[test]
fn test_fewer_elements() {
let mut iter = take_all_but([0, 1, 2].iter(), 2);
assert_eq!(Some(&0), iter.next());
assert_eq!(None, iter.next());
fn test_take_all_buffer_exact_bytes() {
let input_buffer = "abc";
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut take_all_buffer = TakeAllBuffer::new();
let bytes_read = take_all_buffer.fill_buffer(&mut input_reader).unwrap();
assert_eq!(bytes_read, input_buffer.len());
assert_eq!(take_all_buffer.remaining_bytes(), input_buffer.len());
assert_eq!(take_all_buffer.remaining_buffer(), input_buffer.as_bytes());
assert!(!take_all_buffer.is_empty());
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
for (index, c) in input_buffer.bytes().enumerate() {
take_all_buffer
.write_bytes_exact(&mut output_reader, 1)
.unwrap();
let buf_ref = output_reader.get_ref();
assert_eq!(buf_ref.len(), index + 1);
assert_eq!(buf_ref[index], c);
assert_eq!(
take_all_buffer.remaining_bytes(),
input_buffer.len() - (index + 1)
);
assert_eq!(
take_all_buffer.remaining_buffer(),
&input_buffer.as_bytes()[index + 1..]
);
}
assert!(take_all_buffer.is_empty());
assert_eq!(take_all_buffer.remaining_bytes(), 0);
assert_eq!(take_all_buffer.remaining_buffer(), "".as_bytes());
}
#[test]
fn test_same_number_of_elements() {
let mut iter = take_all_but([0, 1].iter(), 2);
assert_eq!(None, iter.next());
fn test_take_all_buffer_all_bytes() {
let input_buffer = "abc";
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut take_all_buffer = TakeAllBuffer::new();
let bytes_read = take_all_buffer.fill_buffer(&mut input_reader).unwrap();
assert_eq!(bytes_read, input_buffer.len());
assert_eq!(take_all_buffer.remaining_bytes(), input_buffer.len());
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_written = take_all_buffer.write_all(&mut output_reader).unwrap();
assert_eq!(bytes_written, input_buffer.len());
assert_eq!(output_reader.get_ref().as_slice(), input_buffer.as_bytes());
assert!(take_all_buffer.is_empty());
assert_eq!(take_all_buffer.remaining_bytes(), 0);
assert_eq!(take_all_buffer.remaining_buffer(), "".as_bytes());
// Now do a write_all on an empty TakeAllBuffer. Confirm correct behavior.
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_written = take_all_buffer.write_all(&mut output_reader).unwrap();
assert_eq!(bytes_written, 0);
assert_eq!(output_reader.get_ref().as_slice().len(), 0);
}
#[test]
fn test_more_elements() {
let mut iter = take_all_but([0].iter(), 2);
assert_eq!(None, iter.next());
fn test_take_all_buffer_limit_bytes() {
let input_buffer = "abc";
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut take_all_buffer = TakeAllBuffer::new();
let bytes_read = take_all_buffer.fill_buffer(&mut input_reader).unwrap();
assert_eq!(bytes_read, input_buffer.len());
assert_eq!(take_all_buffer.remaining_bytes(), input_buffer.len());
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
// Write all but 1 bytes.
let bytes_to_write = input_buffer.len() - 1;
let bytes_written = take_all_buffer
.write_bytes_limit(&mut output_reader, bytes_to_write)
.unwrap();
assert_eq!(bytes_written, bytes_to_write);
assert_eq!(
output_reader.get_ref().as_slice(),
&input_buffer.as_bytes()[..bytes_to_write]
);
assert!(!take_all_buffer.is_empty());
assert_eq!(take_all_buffer.remaining_bytes(), 1);
assert_eq!(
take_all_buffer.remaining_buffer(),
&input_buffer.as_bytes()[bytes_to_write..]
);
// Write 1 more byte - i.e. last byte in buffer.
let bytes_to_write = 1;
let bytes_written = take_all_buffer
.write_bytes_limit(&mut output_reader, bytes_to_write)
.unwrap();
assert_eq!(bytes_written, bytes_to_write);
assert_eq!(output_reader.get_ref().as_slice(), input_buffer.as_bytes());
assert!(take_all_buffer.is_empty());
assert_eq!(take_all_buffer.remaining_bytes(), 0);
assert_eq!(take_all_buffer.remaining_buffer(), "".as_bytes());
// Write 1 more byte - i.e. confirm behavior on already empty buffer.
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_to_write = 1;
let bytes_written = take_all_buffer
.write_bytes_limit(&mut output_reader, bytes_to_write)
.unwrap();
assert_eq!(bytes_written, 0);
assert_eq!(output_reader.get_ref().as_slice().len(), 0);
assert!(take_all_buffer.is_empty());
assert_eq!(take_all_buffer.remaining_bytes(), 0);
assert_eq!(take_all_buffer.remaining_buffer(), "".as_bytes());
}
#[test]
fn test_zero_elements() {
let mut iter = take_all_but([0, 1, 2].iter(), 0);
assert_eq!(Some(&0), iter.next());
assert_eq!(Some(&1), iter.next());
assert_eq!(Some(&2), iter.next());
assert_eq!(None, iter.next());
fn test_take_all_lines_buffer() {
// 3 lines with new-lines and one partial line.
let input_buffer = "a\nb\nc\ndef";
let separator = b'\n';
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut take_all_lines_buffer = TakeAllLinesBuffer::new();
let fill_result = take_all_lines_buffer
.fill_buffer(&mut input_reader, separator)
.unwrap();
assert_eq!(fill_result.bytes, input_buffer.len());
assert_eq!(fill_result.terminated_lines, 3);
assert_eq!(take_all_lines_buffer.terminated_lines(), 3);
assert!(!take_all_lines_buffer.is_empty());
assert!(take_all_lines_buffer.partial_line());
// Write 1st line.
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let lines_to_write = 1;
let write_result = take_all_lines_buffer
.write_lines(&mut output_reader, lines_to_write, separator)
.unwrap();
assert_eq!(write_result.bytes, 2);
assert_eq!(write_result.terminated_lines, lines_to_write);
assert_eq!(output_reader.get_ref().as_slice(), "a\n".as_bytes());
assert!(!take_all_lines_buffer.is_empty());
assert_eq!(take_all_lines_buffer.terminated_lines(), 2);
// Write 2nd line.
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let lines_to_write = 1;
let write_result = take_all_lines_buffer
.write_lines(&mut output_reader, lines_to_write, separator)
.unwrap();
assert_eq!(write_result.bytes, 2);
assert_eq!(write_result.terminated_lines, lines_to_write);
assert_eq!(output_reader.get_ref().as_slice(), "b\n".as_bytes());
assert!(!take_all_lines_buffer.is_empty());
assert_eq!(take_all_lines_buffer.terminated_lines(), 1);
// Now try to write 3 lines even though we have only 1 line remaining. Should write everything left in the buffer.
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let lines_to_write = 3;
let write_result = take_all_lines_buffer
.write_lines(&mut output_reader, lines_to_write, separator)
.unwrap();
assert_eq!(write_result.bytes, 5);
assert_eq!(write_result.terminated_lines, 1);
assert_eq!(output_reader.get_ref().as_slice(), "c\ndef".as_bytes());
assert!(take_all_lines_buffer.is_empty());
assert_eq!(take_all_lines_buffer.terminated_lines(), 0);
// Test empty buffer.
let input_buffer = "";
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut take_all_lines_buffer = TakeAllLinesBuffer::new();
let fill_result = take_all_lines_buffer
.fill_buffer(&mut input_reader, separator)
.unwrap();
assert_eq!(fill_result.bytes, 0);
assert_eq!(fill_result.terminated_lines, 0);
assert_eq!(take_all_lines_buffer.terminated_lines(), 0);
assert!(take_all_lines_buffer.is_empty());
assert!(!take_all_lines_buffer.partial_line());
// Test buffer that ends with newline.
let input_buffer = "\n";
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut take_all_lines_buffer = TakeAllLinesBuffer::new();
let fill_result = take_all_lines_buffer
.fill_buffer(&mut input_reader, separator)
.unwrap();
assert_eq!(fill_result.bytes, 1);
assert_eq!(fill_result.terminated_lines, 1);
assert_eq!(take_all_lines_buffer.terminated_lines(), 1);
assert!(!take_all_lines_buffer.is_empty());
assert!(!take_all_lines_buffer.partial_line());
}
#[test]
fn test_copy_all_but_n_bytes() {
// Test the copy_all_but_bytes fn. Test several scenarios...
// 1 - Hold back more bytes than the input will provide. Should have nothing written to output.
let input_buffer = "a\nb\nc\ndef";
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_copied = copy_all_but_n_bytes(
&mut input_reader,
&mut output_reader,
input_buffer.len() + 1,
)
.unwrap();
assert_eq!(bytes_copied, 0);
// 2 - Hold back exactly the number of bytes the input will provide. Should have nothing written to output.
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_copied =
copy_all_but_n_bytes(&mut input_reader, &mut output_reader, input_buffer.len())
.unwrap();
assert_eq!(bytes_copied, 0);
// 3 - Hold back 1 fewer byte than input will provide. Should have one byte written to output.
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_copied = copy_all_but_n_bytes(
&mut input_reader,
&mut output_reader,
input_buffer.len() - 1,
)
.unwrap();
assert_eq!(bytes_copied, 1);
assert_eq!(output_reader.get_ref()[..], input_buffer.as_bytes()[0..1]);
}
#[test]
fn test_copy_all_but_n_lines() {
// Test the copy_all_but_lines fn. Test several scenarios...
// 1 - Hold back more lines than the input will provide. Should have nothing written to output.
let input_buffer = "a\nb\nc\ndef";
let separator = b'\n';
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_copied =
copy_all_but_n_lines(&mut input_reader, &mut output_reader, 5, separator).unwrap();
assert_eq!(bytes_copied, 0);
// 2 - Hold back exactly the number of lines the input will provide. Should have nothing written to output.
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_copied =
copy_all_but_n_lines(&mut input_reader, &mut output_reader, 4, separator).unwrap();
assert_eq!(bytes_copied, 0);
// 3 - Hold back 1 fewer lines than input will provide. Should have one line written to output.
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_copied =
copy_all_but_n_lines(&mut input_reader, &mut output_reader, 3, separator).unwrap();
assert_eq!(bytes_copied, 2);
assert_eq!(output_reader.get_ref()[..], input_buffer.as_bytes()[0..2]);
// Now test again with an input that has a new-line ending...
// 4 - Hold back more lines than the input will provide. Should have nothing written to output.
let input_buffer = "a\nb\nc\ndef\n";
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_copied =
copy_all_but_n_lines(&mut input_reader, &mut output_reader, 5, separator).unwrap();
assert_eq!(bytes_copied, 0);
// 5 - Hold back exactly the number of lines the input will provide. Should have nothing written to output.
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_copied =
copy_all_but_n_lines(&mut input_reader, &mut output_reader, 4, separator).unwrap();
assert_eq!(bytes_copied, 0);
// 6 - Hold back 1 fewer lines than input will provide. Should have one line written to output.
let mut input_reader = std::io::Cursor::new(input_buffer);
let mut output_reader = std::io::Cursor::new(vec![0x10; 0]);
let bytes_copied =
copy_all_but_n_lines(&mut input_reader, &mut output_reader, 3, separator).unwrap();
assert_eq!(bytes_copied, 2);
assert_eq!(output_reader.get_ref()[..], input_buffer.as_bytes()[0..2]);
}
#[test]