From e041fda51d59a63783648815f345830f1d0163d9 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 6 Oct 2021 20:33:13 +0200 Subject: [PATCH 01/12] tac: do not re-compile regular expression for each file --- src/uu/tac/src/tac.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index 4a93a7c65..caf77c4a7 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -44,9 +44,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 { raw_separator }; - let files: Vec = match matches.values_of(options::FILE) { - Some(v) => v.map(|v| v.to_owned()).collect(), - None => vec!["-".to_owned()], + let files: Vec<&str> = match matches.values_of(options::FILE) { + Some(v) => v.collect(), + None => vec!["-"], }; tac(files, before, regex, separator) @@ -102,7 +102,7 @@ pub fn uu_app() -> App<'static, 'static> { /// returns [`std::io::Error`]. fn buffer_tac_regex( data: &[u8], - pattern: regex::bytes::Regex, + pattern: ®ex::bytes::Regex, before: bool, ) -> std::io::Result<()> { let mut out = stdout(); @@ -208,10 +208,16 @@ fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()> Ok(()) } -fn tac(filenames: Vec, before: bool, regex: bool, separator: &str) -> i32 { +fn tac(filenames: Vec<&str>, before: bool, regex: bool, separator: &str) -> i32 { let mut exit_code = 0; - for filename in &filenames { + let pattern = if regex { + Some(crash_if_err!(1, regex::bytes::Regex::new(separator))) + } else { + None + }; + + for &filename in &filenames { let mut file = BufReader::new(if filename == "-" { Box::new(stdin()) as Box } else { @@ -244,8 +250,7 @@ fn tac(filenames: Vec, before: bool, regex: bool, separator: &str) -> i3 exit_code = 1; continue; }; - if regex { - let pattern = crash_if_err!(1, regex::bytes::Regex::new(separator)); + if let Some(pattern) = &pattern { buffer_tac_regex(&data, pattern, before) } else { buffer_tac(&data, before, separator) From 0d583754cad9f4d6587469a428fb41a69059818c Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 6 Oct 2021 21:38:08 +0200 Subject: [PATCH 02/12] tac: lock stdout only once instead for each line --- src/uu/tac/src/tac.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index caf77c4a7..eab6a943b 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -105,7 +105,8 @@ fn buffer_tac_regex( pattern: ®ex::bytes::Regex, before: bool, ) -> std::io::Result<()> { - let mut out = stdout(); + let out = stdout(); + let mut out = out.lock(); // The index of the line separator for the current line. // @@ -171,7 +172,8 @@ fn buffer_tac_regex( /// `separator` appears at the beginning of each line, as in /// `"/abc/def"`. fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()> { - let mut out = stdout(); + let out = stdout(); + let mut out = out.lock(); // The number of bytes in the line separator. let slen = separator.as_bytes().len(); From 28b04fa89950ff1bac7863934050ef9c26a0022d Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 6 Oct 2021 20:50:34 +0200 Subject: [PATCH 03/12] tac: do not use a buffered read as fs::read is more efficient and io::Stdin is buffered internally --- src/uu/tac/src/tac.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index eab6a943b..92bf1ea00 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -12,8 +12,8 @@ extern crate uucore; use clap::{crate_version, App, Arg}; use memchr::memmem; -use std::io::{stdin, stdout, BufReader, Read, Write}; -use std::{fs::File, path::Path}; +use std::io::{stdin, stdout, Read, Write}; +use std::{fs::read, path::Path}; use uucore::display::Quotable; use uucore::InvalidEncodingHandling; @@ -220,8 +220,14 @@ fn tac(filenames: Vec<&str>, before: bool, regex: bool, separator: &str) -> i32 }; for &filename in &filenames { - let mut file = BufReader::new(if filename == "-" { - Box::new(stdin()) as Box + let data = if filename == "-" { + let mut data = Vec::new(); + if let Err(e) = stdin().read_to_end(&mut data) { + show_error!("failed to read from stdin: {}", e); + exit_code = 1; + continue; + } + data } else { let path = Path::new(filename); if path.is_dir() || path.metadata().is_err() { @@ -236,22 +242,16 @@ fn tac(filenames: Vec<&str>, before: bool, regex: bool, separator: &str) -> i32 exit_code = 1; continue; } - match File::open(path) { - Ok(f) => Box::new(f) as Box, + match read(path) { + Ok(data) => data, Err(e) => { - show_error!("failed to open {} for reading: {}", filename.quote(), e); + show_error!("failed to read {}: {}", filename.quote(), e); exit_code = 1; continue; } } - }); - - let mut data = Vec::new(); - if let Err(e) = file.read_to_end(&mut data) { - show_error!("failed to read {}: {}", filename.quote(), e); - exit_code = 1; - continue; }; + if let Some(pattern) = &pattern { buffer_tac_regex(&data, pattern, before) } else { From 4eab275235f2d89ea48bb7b2b921ea33a8db3fa7 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Sat, 9 Oct 2021 12:12:57 +0200 Subject: [PATCH 04/12] tac: buffer stdout more coarsely than line-based following the GNU tac implementation. --- src/uu/tac/src/tac.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index 92bf1ea00..370e92230 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -12,7 +12,7 @@ extern crate uucore; use clap::{crate_version, App, Arg}; use memchr::memmem; -use std::io::{stdin, stdout, Read, Write}; +use std::io::{stdin, stdout, BufWriter, Read, Write}; use std::{fs::read, path::Path}; use uucore::display::Quotable; use uucore::InvalidEncodingHandling; @@ -106,7 +106,7 @@ fn buffer_tac_regex( before: bool, ) -> std::io::Result<()> { let out = stdout(); - let mut out = out.lock(); + let mut out = BufWriter::new(out.lock()); // The index of the line separator for the current line. // @@ -173,7 +173,7 @@ fn buffer_tac_regex( /// `"/abc/def"`. fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()> { let out = stdout(); - let mut out = out.lock(); + let mut out = BufWriter::new(out.lock()); // The number of bytes in the line separator. let slen = separator.as_bytes().len(); From c526df57b8e10b94b096476c5eaf20eae52e7061 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 6 Oct 2021 21:08:11 +0200 Subject: [PATCH 05/12] tac: opportunistically use memory maps Since tac must read its input files completely to start processing them from the end, it is particularly suited to use memory maps to benefit from the page cache maintained by the operating systems to bring the necessary data into memory as required. This does also include situations where the input is stdin, but not via a pipe but for example a file descriptor set up by the user's shell through an input redirection. --- Cargo.lock | 10 ++++++ src/uu/tac/Cargo.toml | 3 ++ src/uu/tac/src/tac.rs | 72 +++++++++++++++++++++++++++++++++---------- 3 files changed, 68 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6c30a182d..3633928c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1064,6 +1064,15 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" +[[package]] +name = "memmap2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4647a11b578fead29cdbb34d4adef8dd3dc35b876c9c6d5240d83f205abfe96e" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.4" @@ -3025,6 +3034,7 @@ version = "0.0.7" dependencies = [ "clap", "memchr 2.4.0", + "memmap2", "regex", "uucore", "uucore_procs", diff --git a/src/uu/tac/Cargo.toml b/src/uu/tac/Cargo.toml index 1e436e916..00803c8d2 100644 --- a/src/uu/tac/Cargo.toml +++ b/src/uu/tac/Cargo.toml @@ -1,3 +1,5 @@ +# spell-checker:ignore memmap + [package] name = "uu_tac" version = "0.0.7" @@ -16,6 +18,7 @@ path = "src/tac.rs" [dependencies] memchr = "2" +memmap2 = "0.5" regex = "1" clap = { version = "2.33", features = ["wrap_help"] } uucore = { version=">=0.0.9", package="uucore", path="../../uucore" } diff --git a/src/uu/tac/src/tac.rs b/src/uu/tac/src/tac.rs index 370e92230..cdb2d74e3 100644 --- a/src/uu/tac/src/tac.rs +++ b/src/uu/tac/src/tac.rs @@ -5,15 +5,19 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore (ToDO) sbytes slen dlen memmem +// spell-checker:ignore (ToDO) sbytes slen dlen memmem memmap Mmap mmap SIGBUS #[macro_use] extern crate uucore; use clap::{crate_version, App, Arg}; use memchr::memmem; +use memmap2::Mmap; use std::io::{stdin, stdout, BufWriter, Read, Write}; -use std::{fs::read, path::Path}; +use std::{ + fs::{read, File}, + path::Path, +}; use uucore::display::Quotable; use uucore::InvalidEncodingHandling; @@ -220,14 +224,23 @@ fn tac(filenames: Vec<&str>, before: bool, regex: bool, separator: &str) -> i32 }; for &filename in &filenames { - let data = if filename == "-" { - let mut data = Vec::new(); - if let Err(e) = stdin().read_to_end(&mut data) { - show_error!("failed to read from stdin: {}", e); - exit_code = 1; - continue; + let mmap; + let buf; + + let data: &[u8] = if filename == "-" { + if let Some(mmap1) = try_mmap_stdin() { + mmap = mmap1; + &mmap + } else { + let mut buf1 = Vec::new(); + if let Err(e) = stdin().read_to_end(&mut buf1) { + show_error!("failed to read from stdin: {}", e); + exit_code = 1; + continue; + } + buf = buf1; + &buf } - data } else { let path = Path::new(filename); if path.is_dir() || path.metadata().is_err() { @@ -242,22 +255,47 @@ fn tac(filenames: Vec<&str>, before: bool, regex: bool, separator: &str) -> i32 exit_code = 1; continue; } - match read(path) { - Ok(data) => data, - Err(e) => { - show_error!("failed to read {}: {}", filename.quote(), e); - exit_code = 1; - continue; + + if let Some(mmap1) = try_mmap_path(path) { + mmap = mmap1; + &mmap + } else { + match read(path) { + Ok(buf1) => { + buf = buf1; + &buf + } + Err(e) => { + show_error!("failed to read {}: {}", filename.quote(), e); + exit_code = 1; + continue; + } } } }; if let Some(pattern) = &pattern { - buffer_tac_regex(&data, pattern, before) + buffer_tac_regex(data, pattern, before) } else { - buffer_tac(&data, before, separator) + buffer_tac(data, before, separator) } .unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e)); } exit_code } + +fn try_mmap_stdin() -> Option { + // SAFETY: If the file is truncated while we map it, SIGBUS will be raised + // and our process will be terminated, thus preventing access of invalid memory. + unsafe { Mmap::map(&stdin()).ok() } +} + +fn try_mmap_path(path: &Path) -> Option { + let file = File::open(path).ok()?; + + // SAFETY: If the file is truncated while we map it, SIGBUS will be raised + // and our process will be terminated, thus preventing access of invalid memory. + let mmap = unsafe { Mmap::map(&file).ok()? }; + + Some(mmap) +} From 86d22aaa1d4a1bd252083cfd2a144bbfa5198c8f Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Sun, 10 Oct 2021 13:54:35 +0200 Subject: [PATCH 06/12] tac: Add a simple how to for benchmarking --- src/uu/tac/BENCHMARKING.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/uu/tac/BENCHMARKING.md diff --git a/src/uu/tac/BENCHMARKING.md b/src/uu/tac/BENCHMARKING.md new file mode 100644 index 000000000..4e6d13ea2 --- /dev/null +++ b/src/uu/tac/BENCHMARKING.md @@ -0,0 +1,25 @@ +## Benchmarking `tac` + + + +`tac` is often used to process log files in reverse chronological order, i.e. from newer towards older entries. In this case, the performance target to yield results as fast as possible, i.e. without reading in the whole file that is to be reversed line-by-line. Therefore, a sensible benchmark is to read a large log file containing N lines and measure how long it takes to produce the last K lines from that file. + +Large text files can for example be found in the [Wikipedia database dumps](https://dumps.wikimedia.org/wikidatawiki/latest/), usually sized at multiple gigabytes and comprising more than 100M lines. + +After you have obtained and uncompressed such a file, you need to build `tac` in release mode + +```shell +$ cargo build --release --package uu_tac +``` + +and then you can time how it long it takes to extract the last 10M lines by running + +```shell +$ /usr/bin/time ./target/release/tac wikidatawiki-20211001-pages-logging.xml | head -n10000000 >/dev/null +``` + +For more systematic measurements that include warm-ups, repetitions and comparisons, [Hyperfine](https://github.com/sharkdp/hyperfine) can be helpful. For example, to compare this implementation to the one provided by your distribution run + +```shell +$ hyperfine "./target/release/tac wikidatawiki-20211001-pages-logging.xml | head -n10000000 >/dev/null" "/usr/bin/tac wikidatawiki-20211001-pages-logging.xml | head -n10000000 >/dev/null" +``` From 9e21d26b7ca92233a717ebc95cf7a670e393000d Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Mon, 11 Oct 2021 16:51:14 -0400 Subject: [PATCH 07/12] tests: add template string to assert! statements Add missing "{}" template strings to `assert!()` statements. --- tests/common/util.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/common/util.rs b/tests/common/util.rs index 61576a087..1f6a4f6aa 100644 --- a/tests/common/util.rs +++ b/tests/common/util.rs @@ -864,7 +864,7 @@ impl UCommand { /// Add a parameter to the invocation. Path arguments are treated relative /// to the test environment directory. pub fn arg>(&mut self, arg: S) -> &mut UCommand { - assert!(!self.has_run, ALREADY_RUN); + assert!(!self.has_run, "{}", ALREADY_RUN); self.comm_string.push(' '); self.comm_string .push_str(arg.as_ref().to_str().unwrap_or_default()); @@ -875,7 +875,7 @@ impl UCommand { /// Add multiple parameters to the invocation. Path arguments are treated relative /// to the test environment directory. pub fn args>(&mut self, args: &[S]) -> &mut UCommand { - assert!(!self.has_run, MULTIPLE_STDIN_MEANINGLESS); + assert!(!self.has_run, "{}", MULTIPLE_STDIN_MEANINGLESS); let strings = args .iter() .map(|s| s.as_ref().to_os_string()) @@ -893,7 +893,11 @@ impl UCommand { /// provides standard input to feed in to the command when spawned pub fn pipe_in>>(&mut self, input: T) -> &mut UCommand { - assert!(!self.bytes_into_stdin.is_some(), MULTIPLE_STDIN_MEANINGLESS); + assert!( + !self.bytes_into_stdin.is_some(), + "{}", + MULTIPLE_STDIN_MEANINGLESS + ); self.bytes_into_stdin = Some(input.into()); self } @@ -908,7 +912,7 @@ impl UCommand { /// This is typically useful to test non-standard workflows /// like feeding something to a command that does not read it pub fn ignore_stdin_write_error(&mut self) -> &mut UCommand { - assert!(!self.bytes_into_stdin.is_none(), NO_STDIN_MEANINGLESS); + assert!(!self.bytes_into_stdin.is_none(), "{}", NO_STDIN_MEANINGLESS); self.ignore_stdin_write_error = true; self } @@ -918,7 +922,7 @@ impl UCommand { K: AsRef, V: AsRef, { - assert!(!self.has_run, ALREADY_RUN); + assert!(!self.has_run, "{}", ALREADY_RUN); self.raw.env(key, val); self } @@ -937,7 +941,7 @@ impl UCommand { /// Spawns the command, feeds the stdin if any, and returns the /// child process immediately. pub fn run_no_wait(&mut self) -> Child { - assert!(!self.has_run, ALREADY_RUN); + assert!(!self.has_run, "{}", ALREADY_RUN); self.has_run = true; log_info("run", &self.comm_string); let mut child = self From c50b5ac11015a19fb6782b43ce5b4fe1ca93936d Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Mon, 11 Oct 2021 17:36:08 -0400 Subject: [PATCH 08/12] hashsum: fix handling of \r\n in Windows text mode Fix a bug in which "\r\n" was not being replaced with "\n" in text mode on Windows. This would happen only if one call to `write()` ended with a "\r" character and the next call to `write()` started with a "\n" character. This commit fixes the bug by buffering a "\r" character if it appears at the end of one call to `write()` and only writing if the first character in the next call to `write()` is *not* a "\n" character. Fixes issue #2681. --- src/uu/hashsum/src/digest.rs | 93 ++++++++++++++++++++++++++++++----- src/uu/hashsum/src/hashsum.rs | 8 +++ 2 files changed, 89 insertions(+), 12 deletions(-) diff --git a/src/uu/hashsum/src/digest.rs b/src/uu/hashsum/src/digest.rs index 531dc7e4f..809223fa7 100644 --- a/src/uu/hashsum/src/digest.rs +++ b/src/uu/hashsum/src/digest.rs @@ -189,13 +189,31 @@ pub struct DigestWriter<'a> { /// "\n" before passing input bytes to the [`digest`]. #[allow(dead_code)] binary: bool, - // TODO This is dead code only on non-Windows operating systems. It - // might be better to use a `#[cfg(windows)]` guard here. + + /// Whether the previous + #[allow(dead_code)] + was_last_character_carriage_return: bool, + // TODO These are dead code only on non-Windows operating systems. + // It might be better to use a `#[cfg(windows)]` guard here. } impl<'a> DigestWriter<'a> { pub fn new(digest: &'a mut Box, binary: bool) -> DigestWriter { - DigestWriter { digest, binary } + let was_last_character_carriage_return = false; + DigestWriter { + digest, + binary, + was_last_character_carriage_return, + } + } + + pub fn finalize(&mut self) -> bool { + if self.was_last_character_carriage_return { + self.digest.input(&[b'\r']); + true + } else { + false + } } } @@ -213,22 +231,40 @@ impl<'a> Write for DigestWriter<'a> { return Ok(buf.len()); } - // In Windows text mode, replace each occurrence of "\r\n" - // with "\n". + // The remaining code handles Windows text mode, where we must + // replace each occurrence of "\r\n" with "\n". // - // Find all occurrences of "\r\n", inputting the slice just - // before the "\n" in the previous instance of "\r\n" and - // the beginning of this "\r\n". - // - // FIXME This fails if one call to `write()` ends with the - // "\r" and the next call to `write()` begins with the "\n". + // First, if the last character written was "\r" and the first + // character in the current buffer to write is not "\n", then we + // need to write the "\r" that we buffered from the previous + // call to `write()`. let n = buf.len(); + if self.was_last_character_carriage_return && n > 0 && buf[0] != b'\n' { + self.digest.input(&[b'\r']); + } + + // Next, find all occurrences of "\r\n", inputting the slice + // just before the "\n" in the previous instance of "\r\n" and + // the beginning of this "\r\n". let mut i_prev = 0; for i in memmem::find_iter(buf, b"\r\n") { self.digest.input(&buf[i_prev..i]); i_prev = i + 1; } - self.digest.input(&buf[i_prev..n]); + + // Finally, check whether the last character is "\r". If so, + // buffer it until we know that the next character is not "\n", + // which can only be known on the next call to `write()`. + // + // This all assumes that `write()` will be called on adjacent + // blocks of the input. + if n > 0 && buf[n - 1] == b'\r' { + self.was_last_character_carriage_return = true; + self.digest.input(&buf[i_prev..n - 1]); + } else { + self.was_last_character_carriage_return = false; + self.digest.input(&buf[i_prev..n]); + } // Even though we dropped a "\r" for each "\r\n" we found, we // still report the number of bytes written as `n`. This is @@ -243,3 +279,36 @@ impl<'a> Write for DigestWriter<'a> { Ok(()) } } + +#[cfg(test)] +mod tests { + + /// Test for replacing a "\r\n" sequence with "\n" when the "\r" is + /// at the end of one block and the "\n" is at the beginning of the + /// next block, when reading in blocks. + #[cfg(windows)] + #[test] + fn test_crlf_across_blocks() { + use std::io::Write; + + use crate::digest::Digest; + use crate::digest::DigestWriter; + + // Writing "\r" in one call to `write()`, and then "\n" in another. + let mut digest = Box::new(md5::Context::new()) as Box; + let mut writer_crlf = DigestWriter::new(&mut digest, false); + writer_crlf.write(&[b'\r']).unwrap(); + writer_crlf.write(&[b'\n']).unwrap(); + writer_crlf.finalize(); + let result_crlf = digest.result_str(); + + // We expect "\r\n" to be replaced with "\n" in text mode on Windows. + let mut digest = Box::new(md5::Context::new()) as Box; + let mut writer_lf = DigestWriter::new(&mut digest, false); + writer_lf.write(&[b'\n']).unwrap(); + writer_lf.finalize(); + let result_lf = digest.result_str(); + + assert_eq!(result_crlf, result_lf); + } +} diff --git a/src/uu/hashsum/src/hashsum.rs b/src/uu/hashsum/src/hashsum.rs index 4186043f5..07070ed1b 100644 --- a/src/uu/hashsum/src/hashsum.rs +++ b/src/uu/hashsum/src/hashsum.rs @@ -611,8 +611,16 @@ fn digest_reader( // If `binary` is `false` and the operating system is Windows, then // `DigestWriter` replaces "\r\n" with "\n" before it writes the // bytes into `digest`. Otherwise, it just inserts the bytes as-is. + // + // In order to support replacing "\r\n", we must call `finalize()` + // in order to support the possibility that the last character read + // from the reader was "\r". (This character gets buffered by + // `DigestWriter` and only written if the following character is + // "\n". But when "\r" is the last character read, we need to force + // it to be written.) let mut digest_writer = DigestWriter::new(digest, binary); std::io::copy(reader, &mut digest_writer)?; + digest_writer.finalize(); if digest.output_bits() > 0 { Ok(digest.result_str()) From d1e02665bfcb2c530f147ad89678eb6eb8a11a32 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Mon, 11 Oct 2021 18:18:29 -0400 Subject: [PATCH 09/12] fixup! hashsum: fix handling of \r\n in Windows text mode --- src/uu/hashsum/src/digest.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/hashsum/src/digest.rs b/src/uu/hashsum/src/digest.rs index 809223fa7..61f425662 100644 --- a/src/uu/hashsum/src/digest.rs +++ b/src/uu/hashsum/src/digest.rs @@ -297,15 +297,15 @@ mod tests { // Writing "\r" in one call to `write()`, and then "\n" in another. let mut digest = Box::new(md5::Context::new()) as Box; let mut writer_crlf = DigestWriter::new(&mut digest, false); - writer_crlf.write(&[b'\r']).unwrap(); - writer_crlf.write(&[b'\n']).unwrap(); + writer_crlf.write_all(&[b'\r']).unwrap(); + writer_crlf.write_all(&[b'\n']).unwrap(); writer_crlf.finalize(); let result_crlf = digest.result_str(); // We expect "\r\n" to be replaced with "\n" in text mode on Windows. let mut digest = Box::new(md5::Context::new()) as Box; let mut writer_lf = DigestWriter::new(&mut digest, false); - writer_lf.write(&[b'\n']).unwrap(); + writer_lf.write_all(&[b'\n']).unwrap(); writer_lf.finalize(); let result_lf = digest.result_str(); From 429e1d0f12eadf6a709bbdbf55aa2c0ce70657df Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Mon, 11 Oct 2021 18:29:08 -0400 Subject: [PATCH 10/12] head: use default() instead of new() for options Remove the `HeadOptions::new()` function in favor of the `Default` implementation. Both were implemented, but only `Default::default()` is needed. --- src/uu/head/src/head.rs | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index ead734088..c6635ac67 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -108,6 +108,12 @@ enum Modes { Bytes(usize), } +impl Default for Modes { + fn default() -> Self { + Modes::Lines(10) + } +} + fn parse_mode(src: &str, closure: F) -> Result<(Modes, bool), String> where F: FnOnce(usize) -> Modes, @@ -144,7 +150,7 @@ fn arg_iterate<'a>( } } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Default)] struct HeadOptions { pub quiet: bool, pub verbose: bool, @@ -155,22 +161,11 @@ struct HeadOptions { } impl HeadOptions { - pub fn new() -> HeadOptions { - HeadOptions { - quiet: false, - verbose: false, - zeroed: false, - all_but_last: false, - mode: Modes::Lines(10), - files: Vec::new(), - } - } - ///Construct options from matches pub fn get_from(args: impl uucore::Args) -> Result { let matches = uu_app().get_matches_from(arg_iterate(args)?); - let mut options = HeadOptions::new(); + let mut options: HeadOptions = Default::default(); options.quiet = matches.is_present(options::QUIET_NAME); options.verbose = matches.is_present(options::VERBOSE_NAME); @@ -197,12 +192,6 @@ impl HeadOptions { Ok(options) } } -// to make clippy shut up -impl Default for HeadOptions { - fn default() -> Self { - Self::new() - } -} fn read_n_bytes(input: R, n: usize) -> std::io::Result<()> where @@ -523,17 +512,13 @@ mod tests { assert!(options("-c IsThisJustFantasy").is_err()); } #[test] - #[allow(clippy::bool_comparison)] fn test_options_correct_defaults() { - let opts = HeadOptions::new(); - let opts2: HeadOptions = Default::default(); + let opts: HeadOptions = Default::default(); - assert_eq!(opts, opts2); - - assert!(opts.verbose == false); - assert!(opts.quiet == false); - assert!(opts.zeroed == false); - assert!(opts.all_but_last == false); + assert!(!opts.verbose); + assert!(!opts.quiet); + assert!(!opts.zeroed); + assert!(!opts.all_but_last); assert_eq!(opts.mode, Modes::Lines(10)); assert!(opts.files.is_empty()); } From cb34b660cbba77c94c35e23b38909246f3558591 Mon Sep 17 00:00:00 2001 From: Smicry Date: Tue, 19 Oct 2021 00:15:17 +0800 Subject: [PATCH 11/12] add tail usage --- src/uu/head/src/head.rs | 1 - src/uu/tail/src/tail.rs | 13 +++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index c6635ac67..42b7c0fda 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -19,7 +19,6 @@ const BUF_SIZE: usize = 65536; const ABOUT: &str = "\ Print the first 10 lines of each FILE to standard output.\n\ With more than one FILE, precede each with a header giving the file name.\n\ - \n\ With no FILE, or when FILE is -, read standard input.\n\ \n\ Mandatory arguments to long flags are mandatory for short flags too.\ diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 89fbe4d36..eaf7bf8bf 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -35,6 +35,15 @@ use crate::platform::stdin_is_pipe_or_fifo; #[cfg(unix)] use std::os::unix::fs::MetadataExt; +const ABOUT: &str = "\ + Print the last 10 lines of each FILE to standard output.\n\ + With more than one FILE, precede each with a header giving the file name.\n\ + With no FILE, or when FILE is -, read standard input.\n\ + \n\ + Mandatory arguments to long flags are mandatory for short flags too.\ + "; +const USAGE: &str = "tail [FLAG]... [FILE]..."; + pub mod options { pub mod verbosity { pub static QUIET: &str = "quiet"; @@ -218,8 +227,8 @@ pub fn uumain(args: impl uucore::Args) -> i32 { pub fn uu_app() -> App<'static, 'static> { App::new(uucore::util_name()) .version(crate_version!()) - .about("output the last part of files") - // TODO: add usage + .about(ABOUT) + .usage(USAGE) .arg( Arg::with_name(options::BYTES) .short("c") From bfa8a2a068f58eb63228ea9a6d8392d70144b721 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Tue, 19 Oct 2021 01:12:12 +0200 Subject: [PATCH 12/12] tests/util: add more wrappers for common file handling tasks truncate, rename, remove, copy, rmdir, etc. --- tests/common/util.rs | 57 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/tests/common/util.rs b/tests/common/util.rs index 61576a087..d60fb3952 100644 --- a/tests/common/util.rs +++ b/tests/common/util.rs @@ -510,43 +510,86 @@ impl AtPath { } pub fn write(&self, name: &str, contents: &str) { - log_info("open(write)", self.plus_as_string(name)); + log_info("write(default)", self.plus_as_string(name)); std::fs::write(self.plus(name), contents) .unwrap_or_else(|e| panic!("Couldn't write {}: {}", name, e)); } pub fn write_bytes(&self, name: &str, contents: &[u8]) { - log_info("open(write)", self.plus_as_string(name)); + log_info("write(default)", self.plus_as_string(name)); std::fs::write(self.plus(name), contents) .unwrap_or_else(|e| panic!("Couldn't write {}: {}", name, e)); } pub fn append(&self, name: &str, contents: &str) { - log_info("open(append)", self.plus_as_string(name)); + log_info("write(append)", self.plus_as_string(name)); let mut f = OpenOptions::new() .write(true) .append(true) + .create(true) .open(self.plus(name)) .unwrap(); f.write_all(contents.as_bytes()) - .unwrap_or_else(|e| panic!("Couldn't write {}: {}", name, e)); + .unwrap_or_else(|e| panic!("Couldn't write(append) {}: {}", name, e)); } pub fn append_bytes(&self, name: &str, contents: &[u8]) { - log_info("open(append)", self.plus_as_string(name)); + log_info("write(append)", self.plus_as_string(name)); let mut f = OpenOptions::new() .write(true) .append(true) + .create(true) .open(self.plus(name)) .unwrap(); f.write_all(contents) - .unwrap_or_else(|e| panic!("Couldn't append to {}: {}", name, e)); + .unwrap_or_else(|e| panic!("Couldn't write(append) to {}: {}", name, e)); + } + + pub fn truncate(&self, name: &str, contents: &str) { + log_info("write(truncate)", self.plus_as_string(name)); + let mut f = OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(self.plus(name)) + .unwrap(); + f.write_all(contents.as_bytes()) + .unwrap_or_else(|e| panic!("Couldn't write(truncate) {}: {}", name, e)); + } + + pub fn rename(&self, source: &str, target: &str) { + let source = self.plus(source); + let target = self.plus(target); + log_info("rename", format!("{:?} {:?}", source, target)); + std::fs::rename(&source, &target) + .unwrap_or_else(|e| panic!("Couldn't rename {:?} -> {:?}: {}", source, target, e)); + } + + pub fn remove(&self, source: &str) { + let source = self.plus(source); + log_info("remove", format!("{:?}", source)); + std::fs::remove_file(&source) + .unwrap_or_else(|e| panic!("Couldn't remove {:?}: {}", source, e)); + } + + pub fn copy(&self, source: &str, target: &str) { + let source = self.plus(source); + let target = self.plus(target); + log_info("copy", format!("{:?} {:?}", source, target)); + std::fs::copy(&source, &target) + .unwrap_or_else(|e| panic!("Couldn't copy {:?} -> {:?}: {}", source, target, e)); + } + + pub fn rmdir(&self, dir: &str) { + log_info("rmdir", self.plus_as_string(dir)); + fs::remove_dir(&self.plus(dir)).unwrap(); } pub fn mkdir(&self, dir: &str) { log_info("mkdir", self.plus_as_string(dir)); fs::create_dir(&self.plus(dir)).unwrap(); } + pub fn mkdir_all(&self, dir: &str) { log_info("mkdir_all", self.plus_as_string(dir)); fs::create_dir_all(self.plus(dir)).unwrap(); @@ -1020,6 +1063,8 @@ impl UCommand { } } +/// Wrapper for `child.stdout.read_exact()`. +/// Careful, this blocks indefinitely if `size` bytes is never reached. pub fn read_size(child: &mut Child, size: usize) -> String { let mut output = Vec::new(); output.resize(size, 0);