From cba0696b90091d9efc115ca2cf1c6b67db94befd Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sat, 29 Jan 2022 22:15:23 -0500 Subject: [PATCH 1/2] head: don't add trailing newline to end of file Prevent `head` from adding a trailing newline to the end of a file that did not originally have one when using `head --lines=-0`. --- src/uu/head/src/head.rs | 5 ++- src/uu/head/src/lines.rs | 80 +++++++++++++++++++++++++++++++++++++- tests/by-util/test_head.rs | 8 +++- 3 files changed, 88 insertions(+), 5 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index eded419df..959d87604 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -38,6 +38,7 @@ mod options { mod lines; mod parse; mod take; +use lines::lines; use lines::zlines; use take::take_all_but; use take::take_lines; @@ -285,8 +286,8 @@ fn read_but_last_n_lines( stdout.write_all(&bytes?)?; } } else { - for line in take_all_but(input.lines(), n) { - println!("{}", line?); + for line in take_all_but(lines(input), n) { + print!("{}", line?); } } Ok(()) diff --git a/src/uu/head/src/lines.rs b/src/uu/head/src/lines.rs index 474f5717d..5c1b23b27 100644 --- a/src/uu/head/src/lines.rs +++ b/src/uu/head/src/lines.rs @@ -1,11 +1,75 @@ +// * This file is part of the uutils coreutils package. +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. // spell-checker:ignore (vars) zline zlines - -//! Iterate over zero-terminated lines. +//! Iterate over lines, including the line ending character(s). +//! +//! This module provides the [`lines`] and [`zlines`] functions, +//! similar to the [`BufRead::lines`] method. While the +//! [`BufRead::lines`] method yields [`String`] instances that do not +//! include the line ending characters (`"\n"` or `"\r\n"`), our +//! functions yield [`String`] instances that include the line ending +//! characters. This is useful if the input data does not end with a +//! newline character and you want to preserve the exact form of the +//! input data. use std::io::BufRead; /// The zero byte, representing the null character. const ZERO: u8 = 0; +/// Returns an iterator over the lines, including line ending characters. +/// +/// This function is just like [`BufRead::lines`], but it includes the +/// line ending characters in each yielded [`String`] if the input +/// data has them. +/// +/// # Examples +/// +/// If the input data does not end with a newline character (`'\n'`), +/// then the last [`String`] yielded by this iterator also does not +/// end with a newline: +/// +/// ```rust,ignore +/// use std::io::BufRead; +/// use std::io::Cursor; +/// +/// let cursor = Cursor::new(b"x\ny\nz"); +/// let mut it = cursor.lines(); +/// +/// assert_eq!(it.next(), Some(String::from("x\n"))); +/// assert_eq!(it.next(), Some(String::from("y\n"))); +/// assert_eq!(it.next(), Some(String::from("z"))); +/// assert_eq!(it.next(), None); +/// ``` +pub(crate) fn lines(reader: B) -> Lines +where + B: BufRead, +{ + Lines { buf: reader } +} + +/// An iterator over the lines of an instance of `BufRead`. +/// +/// This struct is generally created by calling [`lines`] on a `BufRead`. +/// Please see the documentation of [`lines`] for more details. +pub(crate) struct Lines { + buf: B, +} + +impl Iterator for Lines { + type Item = std::io::Result; + + fn next(&mut self) -> Option> { + let mut buf = String::new(); + match self.buf.read_line(&mut buf) { + Ok(0) => None, + Ok(_n) => Some(Ok(buf)), + Err(e) => Some(Err(e)), + } + } +} + /// Returns an iterator over the lines of the given reader. /// /// The iterator returned from this function will yield instances of @@ -50,6 +114,7 @@ impl Iterator for ZLines { #[cfg(test)] mod tests { + use crate::lines::lines; use crate::lines::zlines; use std::io::Cursor; @@ -72,4 +137,15 @@ mod tests { assert_eq!(iter.next(), Some(b"z".to_vec())); assert_eq!(iter.next(), None); } + + #[test] + fn test_lines() { + let cursor = Cursor::new(b"x\ny\nz"); + let mut it = lines(cursor).map(|l| l.unwrap()); + + assert_eq!(it.next(), Some(String::from("x\n"))); + assert_eq!(it.next(), Some(String::from("y\n"))); + assert_eq!(it.next(), Some(String::from("z"))); + assert_eq!(it.next(), None); + } } diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 246f5b62a..8f4932edf 100644 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -157,11 +157,17 @@ fn test_negative_byte_syntax() { #[test] fn test_negative_zero_lines() { new_ucmd!() - .args(&["--lines=-0"]) + .arg("--lines=-0") .pipe_in("a\nb\n") .succeeds() .stdout_is("a\nb\n"); + new_ucmd!() + .arg("--lines=-0") + .pipe_in("a\nb") + .succeeds() + .stdout_is("a\nb"); } + #[test] fn test_negative_zero_bytes() { new_ucmd!() From b9c2066ee9a6c517aceb7a089c5a4e83a84f8dc5 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sat, 29 Jan 2022 22:24:27 -0500 Subject: [PATCH 2/2] uucore: move lines.rs to be a uucore feature Refactor the `lines.rs` module to be a feature in `uucore`. It was common to both `head` and `tail`. --- src/uu/head/Cargo.toml | 2 +- src/uu/head/src/head.rs | 12 +- src/uu/head/src/lines.rs | 151 ------------------ src/uu/tail/Cargo.toml | 2 +- src/uu/tail/src/tail.rs | 3 +- src/uucore/Cargo.toml | 1 + src/uucore/src/lib/features.rs | 2 + .../src => uucore/src/lib/features}/lines.rs | 14 +- src/uucore/src/lib/lib.rs | 2 + 9 files changed, 22 insertions(+), 167 deletions(-) delete mode 100644 src/uu/head/src/lines.rs rename src/{uu/tail/src => uucore/src/lib/features}/lines.rs (89%) diff --git a/src/uu/head/Cargo.toml b/src/uu/head/Cargo.toml index 5d05f1921..04a512492 100644 --- a/src/uu/head/Cargo.toml +++ b/src/uu/head/Cargo.toml @@ -17,7 +17,7 @@ path = "src/head.rs" [dependencies] clap = { version = "3.0", features = ["wrap_help", "cargo"] } memchr = "2" -uucore = { version=">=0.0.11", package="uucore", path="../../uucore", features=["ringbuffer"] } +uucore = { version=">=0.0.11", package="uucore", path="../../uucore", features=["ringbuffer", "lines"] } [[bin]] name = "head" diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 959d87604..e78dec78b 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -11,6 +11,7 @@ use std::ffi::OsString; use std::io::{self, BufWriter, ErrorKind, Read, Seek, SeekFrom, Write}; use uucore::display::Quotable; use uucore::error::{FromIo, UError, UResult, USimpleError}; +use uucore::lines::lines; use uucore::show; const BUF_SIZE: usize = 65536; @@ -35,11 +36,8 @@ mod options { pub const ZERO_NAME: &str = "ZERO"; pub const FILES_NAME: &str = "FILE"; } -mod lines; mod parse; mod take; -use lines::lines; -use lines::zlines; use take::take_all_but; use take::take_lines; @@ -282,12 +280,14 @@ fn read_but_last_n_lines( if zero { let stdout = std::io::stdout(); let mut stdout = stdout.lock(); - for bytes in take_all_but(zlines(input), n) { + for bytes in take_all_but(lines(input, b'\0'), n) { stdout.write_all(&bytes?)?; } } else { - for line in take_all_but(lines(input), n) { - print!("{}", line?); + let stdout = std::io::stdout(); + let mut stdout = stdout.lock(); + for bytes in take_all_but(lines(input, b'\n'), n) { + stdout.write_all(&bytes?)?; } } Ok(()) diff --git a/src/uu/head/src/lines.rs b/src/uu/head/src/lines.rs deleted file mode 100644 index 5c1b23b27..000000000 --- a/src/uu/head/src/lines.rs +++ /dev/null @@ -1,151 +0,0 @@ -// * This file is part of the uutils coreutils package. -// * -// * For the full copyright and license information, please view the LICENSE -// * file that was distributed with this source code. -// spell-checker:ignore (vars) zline zlines -//! Iterate over lines, including the line ending character(s). -//! -//! This module provides the [`lines`] and [`zlines`] functions, -//! similar to the [`BufRead::lines`] method. While the -//! [`BufRead::lines`] method yields [`String`] instances that do not -//! include the line ending characters (`"\n"` or `"\r\n"`), our -//! functions yield [`String`] instances that include the line ending -//! characters. This is useful if the input data does not end with a -//! newline character and you want to preserve the exact form of the -//! input data. -use std::io::BufRead; - -/// The zero byte, representing the null character. -const ZERO: u8 = 0; - -/// Returns an iterator over the lines, including line ending characters. -/// -/// This function is just like [`BufRead::lines`], but it includes the -/// line ending characters in each yielded [`String`] if the input -/// data has them. -/// -/// # Examples -/// -/// If the input data does not end with a newline character (`'\n'`), -/// then the last [`String`] yielded by this iterator also does not -/// end with a newline: -/// -/// ```rust,ignore -/// use std::io::BufRead; -/// use std::io::Cursor; -/// -/// let cursor = Cursor::new(b"x\ny\nz"); -/// let mut it = cursor.lines(); -/// -/// assert_eq!(it.next(), Some(String::from("x\n"))); -/// assert_eq!(it.next(), Some(String::from("y\n"))); -/// assert_eq!(it.next(), Some(String::from("z"))); -/// assert_eq!(it.next(), None); -/// ``` -pub(crate) fn lines(reader: B) -> Lines -where - B: BufRead, -{ - Lines { buf: reader } -} - -/// An iterator over the lines of an instance of `BufRead`. -/// -/// This struct is generally created by calling [`lines`] on a `BufRead`. -/// Please see the documentation of [`lines`] for more details. -pub(crate) struct Lines { - buf: B, -} - -impl Iterator for Lines { - type Item = std::io::Result; - - fn next(&mut self) -> Option> { - let mut buf = String::new(); - match self.buf.read_line(&mut buf) { - Ok(0) => None, - Ok(_n) => Some(Ok(buf)), - Err(e) => Some(Err(e)), - } - } -} - -/// Returns an iterator over the lines of the given reader. -/// -/// The iterator returned from this function will yield instances of -/// [`std::io::Result`]<[`Vec`]<[`u8`]>>, representing the bytes of the line -/// *including* the null character (with the possible exception of the -/// last line, which may not have one). -/// -/// # Examples -/// -/// ```rust,ignore -/// use std::io::Cursor; -/// -/// let cursor = Cursor::new(b"x\0y\0z\0"); -/// let mut iter = zlines(cursor).map(|l| l.unwrap()); -/// assert_eq!(iter.next(), Some(b"x\0".to_vec())); -/// assert_eq!(iter.next(), Some(b"y\0".to_vec())); -/// assert_eq!(iter.next(), Some(b"z\0".to_vec())); -/// assert_eq!(iter.next(), None); -/// ``` -pub fn zlines(buf: B) -> ZLines { - ZLines { buf } -} - -/// An iterator over the zero-terminated lines of an instance of `BufRead`. -pub struct ZLines { - buf: B, -} - -impl Iterator for ZLines { - type Item = std::io::Result>; - - fn next(&mut self) -> Option>> { - let mut buf = Vec::new(); - match self.buf.read_until(ZERO, &mut buf) { - Ok(0) => None, - Ok(_) => Some(Ok(buf)), - Err(e) => Some(Err(e)), - } - } -} - -#[cfg(test)] -mod tests { - - use crate::lines::lines; - use crate::lines::zlines; - use std::io::Cursor; - - #[test] - fn test_null_terminated() { - let cursor = Cursor::new(b"x\0y\0z\0"); - let mut iter = zlines(cursor).map(|l| l.unwrap()); - assert_eq!(iter.next(), Some(b"x\0".to_vec())); - assert_eq!(iter.next(), Some(b"y\0".to_vec())); - assert_eq!(iter.next(), Some(b"z\0".to_vec())); - assert_eq!(iter.next(), None); - } - - #[test] - fn test_not_null_terminated() { - let cursor = Cursor::new(b"x\0y\0z"); - let mut iter = zlines(cursor).map(|l| l.unwrap()); - assert_eq!(iter.next(), Some(b"x\0".to_vec())); - assert_eq!(iter.next(), Some(b"y\0".to_vec())); - assert_eq!(iter.next(), Some(b"z".to_vec())); - assert_eq!(iter.next(), None); - } - - #[test] - fn test_lines() { - let cursor = Cursor::new(b"x\ny\nz"); - let mut it = lines(cursor).map(|l| l.unwrap()); - - assert_eq!(it.next(), Some(String::from("x\n"))); - assert_eq!(it.next(), Some(String::from("y\n"))); - assert_eq!(it.next(), Some(String::from("z"))); - assert_eq!(it.next(), None); - } -} diff --git a/src/uu/tail/Cargo.toml b/src/uu/tail/Cargo.toml index d70502dab..4f40431b1 100644 --- a/src/uu/tail/Cargo.toml +++ b/src/uu/tail/Cargo.toml @@ -17,7 +17,7 @@ path = "src/tail.rs" [dependencies] clap = { version = "3.0", features = ["wrap_help", "cargo"] } libc = "0.2.42" -uucore = { version=">=0.0.11", package="uucore", path="../../uucore", features=["ringbuffer"] } +uucore = { version=">=0.0.11", package="uucore", path="../../uucore", features=["ringbuffer", "lines"] } [target.'cfg(windows)'.dependencies] winapi = { version="0.3", features=["fileapi", "handleapi", "processthreadsapi", "synchapi", "winbase"] } diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 951399866..2c9a248f0 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -16,11 +16,9 @@ extern crate clap; extern crate uucore; mod chunks; -mod lines; mod parse; mod platform; use chunks::ReverseChunks; -use lines::lines; use clap::{App, AppSettings, Arg}; use std::collections::VecDeque; @@ -33,6 +31,7 @@ use std::thread::sleep; use std::time::Duration; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError}; +use uucore::lines::lines; use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::ringbuffer::RingBuffer; diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 3a6bf25c1..5bd5994cc 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -55,6 +55,7 @@ encoding = ["data-encoding", "data-encoding-macro", "z85", "thiserror"] entries = ["libc"] fs = ["libc", "nix", "winapi-util"] fsext = ["libc", "time"] +lines = [] memo = ["itertools"] mode = ["libc"] perms = ["libc", "walkdir"] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index 999d8af6c..b1b87a613 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -6,6 +6,8 @@ pub mod encoding; pub mod fs; #[cfg(feature = "fsext")] pub mod fsext; +#[cfg(feature = "lines")] +pub mod lines; #[cfg(feature = "memo")] pub mod memo; #[cfg(feature = "ringbuffer")] diff --git a/src/uu/tail/src/lines.rs b/src/uucore/src/lib/features/lines.rs similarity index 89% rename from src/uu/tail/src/lines.rs rename to src/uucore/src/lib/features/lines.rs index ee8b36662..a7f4df76d 100644 --- a/src/uu/tail/src/lines.rs +++ b/src/uucore/src/lib/features/lines.rs @@ -2,15 +2,17 @@ // * // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. +// spell-checker:ignore (vars) //! Iterate over lines, including the line ending character(s). //! //! This module provides the [`lines`] function, similar to the //! [`BufRead::lines`] method. While the [`BufRead::lines`] method //! yields [`String`] instances that do not include the line ending -//! characters (`"\n"` or `"\r\n"`), our function yields [`String`] -//! instances that include the line ending characters. This is useful -//! if the input data does not end with a newline character and you -//! want to preserve the exact form of the input data. +//! characters (`"\n"` or `"\r\n"`), our functions yield +//! [`Vec`]<['u8']> instances that include the line ending +//! characters. This is useful if the input data does not end with a +//! newline character and you want to preserve the exact form of the +//! input data. use std::io::BufRead; /// Returns an iterator over the lines, including line ending characters. @@ -51,7 +53,7 @@ use std::io::BufRead; /// assert_eq!(it.next(), Some(Vec::from("z"))); /// assert_eq!(it.next(), None); /// ``` -pub(crate) fn lines(reader: B, sep: u8) -> Lines +pub fn lines(reader: B, sep: u8) -> Lines where B: BufRead, { @@ -62,7 +64,7 @@ where /// /// This struct is generally created by calling [`lines`] on a `BufRead`. /// Please see the documentation of [`lines`] for more details. -pub(crate) struct Lines { +pub struct Lines { buf: B, sep: u8, } diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index ae7788e05..4dc5e6987 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -38,6 +38,8 @@ pub use crate::features::encoding; pub use crate::features::fs; #[cfg(feature = "fsext")] pub use crate::features::fsext; +#[cfg(feature = "lines")] +pub use crate::features::lines; #[cfg(feature = "memo")] pub use crate::features::memo; #[cfg(feature = "ringbuffer")]