From c69afa00ffcd5721b7bcbaa452495f8b8e673007 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 29 Apr 2021 18:25:34 +0200 Subject: [PATCH 001/148] ls: implement device symbol and id --- src/uu/ls/src/ls.rs | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index d78e1977a..777f16e7f 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1623,10 +1623,18 @@ fn format_prefixed(prefixed: NumberPrefix) -> String { fn display_file_size(metadata: &Metadata, config: &Config) -> String { // NOTE: The human-readable behaviour deviates from the GNU ls. // The GNU ls uses binary prefixes by default. - match config.size_format { - SizeFormat::Binary => format_prefixed(NumberPrefix::binary(metadata.len() as f64)), - SizeFormat::Decimal => format_prefixed(NumberPrefix::decimal(metadata.len() as f64)), - SizeFormat::Bytes => metadata.len().to_string(), + let ft = metadata.file_type(); + if ft.is_char_device() || ft.is_block_device() { + let dev: u64 = metadata.rdev(); + let major = (dev >> 8) as u8; + let minor = dev as u8; + return format!("{}, {}", major, minor); + } else { + match config.size_format { + SizeFormat::Binary => format_prefixed(NumberPrefix::binary(metadata.len() as f64)), + SizeFormat::Decimal => format_prefixed(NumberPrefix::decimal(metadata.len() as f64)), + SizeFormat::Bytes => metadata.len().to_string(), + } } } @@ -1635,6 +1643,10 @@ fn display_file_type(file_type: FileType) -> char { 'd' } else if file_type.is_symlink() { 'l' + } else if file_type.is_block_device() { + 'b' + } else if file_type.is_char_device() { + 'c' } else { '-' } From d6248279133a001f66d6c2138a0ffbbb8f3f36fb Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 29 Apr 2021 18:44:46 +0200 Subject: [PATCH 002/148] ls: fix windows and add more file types --- src/uu/ls/src/ls.rs | 48 ++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 777f16e7f..adfc654ba 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1621,20 +1621,23 @@ fn format_prefixed(prefixed: NumberPrefix) -> String { } fn display_file_size(metadata: &Metadata, config: &Config) -> String { + #[cfg(unix)] + { + let ft = metadata.file_type(); + if ft.is_char_device() || ft.is_block_device() { + let dev: u64 = metadata.rdev(); + let major = (dev >> 8) as u8; + let minor = dev as u8; + return format!("{}, {}", major, minor); + } + } + // NOTE: The human-readable behaviour deviates from the GNU ls. // The GNU ls uses binary prefixes by default. - let ft = metadata.file_type(); - if ft.is_char_device() || ft.is_block_device() { - let dev: u64 = metadata.rdev(); - let major = (dev >> 8) as u8; - let minor = dev as u8; - return format!("{}, {}", major, minor); - } else { - match config.size_format { - SizeFormat::Binary => format_prefixed(NumberPrefix::binary(metadata.len() as f64)), - SizeFormat::Decimal => format_prefixed(NumberPrefix::decimal(metadata.len() as f64)), - SizeFormat::Bytes => metadata.len().to_string(), - } + match config.size_format { + SizeFormat::Binary => format_prefixed(NumberPrefix::binary(metadata.len() as f64)), + SizeFormat::Decimal => format_prefixed(NumberPrefix::decimal(metadata.len() as f64)), + SizeFormat::Bytes => metadata.len().to_string(), } } @@ -1643,11 +1646,24 @@ fn display_file_type(file_type: FileType) -> char { 'd' } else if file_type.is_symlink() { 'l' - } else if file_type.is_block_device() { - 'b' - } else if file_type.is_char_device() { - 'c' } else { + #[cfg(unix)] + { + if file_type.is_block_device() { + 'b' + } else if file_type.is_char_device() { + 'c' + } else if file_type.is_fifo() { + 'p' + } else if file_type.is_socket() { + 's' + } else if file_type.is_file() { + '-' + } else { + '?' + } + } + #[cfg(not(unix))] '-' } } From 01d178cf172bb205721be32c4e9d52e0e38b722d Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sat, 1 May 2021 16:53:34 +0200 Subject: [PATCH 003/148] sort: don't rely on serde-json for extsort It is much faster to just write the lines to disk, separated by \n (or \0 if zero-terminated is enabled), instead of serializing to json. external_sort now knows of the Line struct instead of interacting with it using the ExternallySortable trait. Similarly, it now uses the crash_if_err! macro to handle errors, instead of bubbling them up. Some functions were changed from taking &[Line] as the input to taking an Iterator. This removes the need to collect to a Vec when not necessary. --- Cargo.lock | 8 - src/uu/sort/Cargo.toml | 4 +- src/uu/sort/src/external_sort/mod.rs | 356 ++++++++++----------------- src/uu/sort/src/numeric_str_cmp.rs | 7 +- src/uu/sort/src/sort.rs | 220 ++++++++--------- 5 files changed, 249 insertions(+), 346 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 31787e626..9ffc56720 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1372,9 +1372,6 @@ name = "serde" version = "1.0.125" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171" -dependencies = [ - "serde_derive", -] [[package]] name = "serde_cbor" @@ -1453,9 +1450,6 @@ name = "smallvec" version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" -dependencies = [ - "serde", -] [[package]] name = "strsim" @@ -2391,8 +2385,6 @@ dependencies = [ "rand 0.7.3", "rayon", "semver", - "serde", - "serde_json", "smallvec 1.6.1", "tempdir", "unicode-width", diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 80ffc92c9..3784ccbb0 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -15,15 +15,13 @@ edition = "2018" path = "src/sort.rs" [dependencies] -serde_json = { version = "1.0.64", default-features = false, features = ["alloc"] } -serde = { version = "1.0", features = ["derive"] } rayon = "1.5" rand = "0.7" clap = "2.33" fnv = "1.0.7" itertools = "0.10.0" semver = "0.9.0" -smallvec = { version="1.6.1", features=["serde"] } +smallvec = "1.6.1" unicode-width = "0.1.8" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/sort/src/external_sort/mod.rs b/src/uu/sort/src/external_sort/mod.rs index fd942d4a7..725b17bbd 100644 --- a/src/uu/sort/src/external_sort/mod.rs +++ b/src/uu/sort/src/external_sort/mod.rs @@ -1,50 +1,32 @@ -use std::clone::Clone; -use std::cmp::Ordering::Less; +use std::cmp::Ordering; use std::collections::VecDeque; -use std::error::Error; use std::fs::{File, OpenOptions}; -use std::io::SeekFrom::Start; +use std::io::SeekFrom; use std::io::{BufRead, BufReader, BufWriter, Seek, Write}; -use std::marker::PhantomData; -use std::path::PathBuf; +use std::path::Path; -use serde::de::DeserializeOwned; -use serde::Serialize; -use serde_json; use tempdir::TempDir; use super::{GlobalSettings, Line}; -/// Trait for types that can be used by -/// [ExternalSorter](struct.ExternalSorter.html). Must be sortable, cloneable, -/// serializeable, and able to report on it's size -pub trait ExternallySortable: Clone + Serialize + DeserializeOwned { - /// Get the size, in bytes, of this object (used to constrain the buffer - /// used in the external sort). - fn get_size(&self) -> u64; -} - /// Iterator that provides sorted `T`s -pub struct ExtSortedIterator { +pub struct ExtSortedIterator { buffers: Vec>, chunk_offsets: Vec, - max_per_chunk: u64, - chunks: u64, + max_per_chunk: usize, + chunks: usize, tmp_dir: TempDir, settings: GlobalSettings, failed: bool, } -impl Iterator for ExtSortedIterator -where - Line: ExternallySortable, -{ - type Item = Result>; +impl Iterator for ExtSortedIterator { + type Item = Line; /// # Errors /// /// This method can fail due to issues reading intermediate sorted chunks - /// from disk, or due to serde deserialization issues + /// from disk fn next(&mut self) -> Option { if self.failed { return None; @@ -53,29 +35,18 @@ where let mut empty = true; for chunk_num in 0..self.chunks { if self.buffers[chunk_num as usize].is_empty() { - let mut f = match File::open(self.tmp_dir.path().join(chunk_num.to_string())) { - Ok(f) => f, - Err(e) => { - self.failed = true; - return Some(Err(Box::new(e))); - } - }; - match f.seek(Start(self.chunk_offsets[chunk_num as usize])) { - Ok(_) => (), - Err(e) => { - self.failed = true; - return Some(Err(Box::new(e))); - } - } - let bytes_read = - match fill_buff(&mut self.buffers[chunk_num as usize], f, self.max_per_chunk) { - Ok(bytes_read) => bytes_read, - Err(e) => { - self.failed = true; - return Some(Err(e)); - } - }; - self.chunk_offsets[chunk_num as usize] += bytes_read; + let mut f = crash_if_err!( + 1, + File::open(self.tmp_dir.path().join(chunk_num.to_string())) + ); + crash_if_err!(1, f.seek(SeekFrom::Start(self.chunk_offsets[chunk_num]))); + let bytes_read = fill_buff( + &mut self.buffers[chunk_num as usize], + f, + self.max_per_chunk, + &self.settings, + ); + self.chunk_offsets[chunk_num as usize] += bytes_read as u64; if !self.buffers[chunk_num as usize].is_empty() { empty = false; } @@ -91,205 +62,150 @@ where // check is_empty() before unwrap()ing let mut idx = 0; for chunk_num in 0..self.chunks as usize { - if !self.buffers[chunk_num].is_empty() { - if self.buffers[idx].is_empty() - || (super::compare_by)( + if !self.buffers[chunk_num].is_empty() + && (self.buffers[idx].is_empty() + || super::compare_by( self.buffers[chunk_num].front().unwrap(), self.buffers[idx].front().unwrap(), &self.settings, - ) == Less - { - idx = chunk_num; - } + ) == Ordering::Less) + { + idx = chunk_num; } } // unwrap due to checks above let r = self.buffers[idx].pop_front().unwrap(); - Some(Ok(r)) + Some(r) } } -/// Perform an external sort on an unsorted stream of incoming data -pub struct ExternalSorter -where - Line: ExternallySortable, -{ - tmp_dir: Option, - buffer_bytes: u64, - phantom: PhantomData, - settings: GlobalSettings, -} +/// Sort (based on `compare`) the `T`s provided by `unsorted` and return an +/// iterator +/// +/// # Errors +/// +/// This method can fail due to issues writing intermediate sorted chunks +/// to disk. +pub fn ext_sort( + unsorted: impl Iterator, + settings: &GlobalSettings, +) -> ExtSortedIterator { + let tmp_dir = crash_if_err!(1, TempDir::new_in(&settings.tmp_dir, "uutils_sort")); -impl ExternalSorter -where - Line: ExternallySortable, -{ - /// Create a new `ExternalSorter` with a specified memory buffer and - /// temporary directory - pub fn new( - buffer_bytes: u64, - tmp_dir: Option, - settings: GlobalSettings, - ) -> ExternalSorter { - ExternalSorter { - buffer_bytes, - tmp_dir, - phantom: PhantomData, + let mut iter = ExtSortedIterator { + buffers: Vec::new(), + chunk_offsets: Vec::new(), + max_per_chunk: 0, + chunks: 0, + tmp_dir, + settings: settings.clone(), + failed: false, + }; + + let mut total_read = 0; + let mut chunk = Vec::new(); + + // make the initial chunks on disk + for seq in unsorted { + let seq_size = seq.estimate_size(); + total_read += seq_size; + + chunk.push(seq); + + if total_read >= settings.buffer_size { + super::sort_by(&mut chunk, &settings); + write_chunk( + settings, + &iter.tmp_dir.path().join(iter.chunks.to_string()), + &mut chunk, + ); + chunk.clear(); + total_read = 0; + iter.chunks += 1; + } + } + // write the last chunk + if !chunk.is_empty() { + super::sort_by(&mut chunk, &settings); + write_chunk( settings, - } + &iter.tmp_dir.path().join(iter.chunks.to_string()), + &mut chunk, + ); + iter.chunks += 1; } - /// Sort (based on `compare`) the `T`s provided by `unsorted` and return an - /// iterator - /// - /// # Errors - /// - /// This method can fail due to issues writing intermediate sorted chunks - /// to disk, or due to serde serialization issues - pub fn sort_by( - &self, - unsorted: I, - settings: GlobalSettings, - ) -> Result, Box> - where - I: Iterator, - { - let tmp_dir = match self.tmp_dir { - Some(ref p) => TempDir::new_in(p, "uutils_sort")?, - None => TempDir::new("uutils_sort")?, - }; - // creating the thing we need to return first due to the face that we need to - // borrow tmp_dir and move it out - let mut iter = ExtSortedIterator { - buffers: Vec::new(), - chunk_offsets: Vec::new(), - max_per_chunk: 0, - chunks: 0, - tmp_dir, - settings, - failed: false, - }; + // initialize buffers for each chunk + // + // Having a right sized buffer for each chunk for smallish values seems silly to me? + // + // We will have to have the entire iter in memory sometime right? + // Set minimum to the size of the writer buffer, ~8K - { - let mut total_read = 0; - let mut chunk = Vec::new(); - // Initial buffer is specified by user - let mut adjusted_buffer_size = self.buffer_bytes; - let (iter_size, _) = unsorted.size_hint(); - - // make the initial chunks on disk - for seq in unsorted { - let seq_size = seq.get_size(); - total_read += seq_size; - - // GNU minimum is 16 * (sizeof struct + 2), but GNU uses about - // 1/10 the memory that we do. And GNU even says in the code it may - // not work on small buffer sizes. - // - // The following seems to work pretty well, and has about the same max - // RSS as lower minimum values. - // - let minimum_buffer_size: u64 = iter_size as u64 * seq_size / 8; - - adjusted_buffer_size = - // Grow buffer size for a struct/Line larger than buffer - if adjusted_buffer_size < seq_size { - seq_size - } else if adjusted_buffer_size < minimum_buffer_size { - minimum_buffer_size - } else { - adjusted_buffer_size - }; - chunk.push(seq); - - if total_read >= adjusted_buffer_size { - super::sort_by(&mut chunk, &self.settings); - self.write_chunk( - &iter.tmp_dir.path().join(iter.chunks.to_string()), - &mut chunk, - )?; - chunk.clear(); - total_read = 0; - iter.chunks += 1; - } - } - // write the last chunk - if chunk.len() > 0 { - super::sort_by(&mut chunk, &self.settings); - self.write_chunk( - &iter.tmp_dir.path().join(iter.chunks.to_string()), - &mut chunk, - )?; - iter.chunks += 1; - } - - // initialize buffers for each chunk - // - // Having a right sized buffer for each chunk for smallish values seems silly to me? - // - // We will have to have the entire iter in memory sometime right? - // Set minimum to the size of the writer buffer, ~8K - // - const MINIMUM_READBACK_BUFFER: u64 = 8200; - let right_sized_buffer = adjusted_buffer_size - .checked_div(iter.chunks) - .unwrap_or(adjusted_buffer_size); - iter.max_per_chunk = if right_sized_buffer > MINIMUM_READBACK_BUFFER { - right_sized_buffer - } else { - MINIMUM_READBACK_BUFFER - }; - iter.buffers = vec![VecDeque::new(); iter.chunks as usize]; - iter.chunk_offsets = vec![0 as u64; iter.chunks as usize]; - for chunk_num in 0..iter.chunks { - let offset = fill_buff( - &mut iter.buffers[chunk_num as usize], - File::open(iter.tmp_dir.path().join(chunk_num.to_string()))?, - iter.max_per_chunk, - )?; - iter.chunk_offsets[chunk_num as usize] = offset; - } - } - - Ok(iter) + const MINIMUM_READBACK_BUFFER: usize = 8200; + let right_sized_buffer = settings + .buffer_size + .checked_div(iter.chunks) + .unwrap_or(settings.buffer_size); + iter.max_per_chunk = if right_sized_buffer > MINIMUM_READBACK_BUFFER { + right_sized_buffer + } else { + MINIMUM_READBACK_BUFFER + }; + iter.buffers = vec![VecDeque::new(); iter.chunks]; + iter.chunk_offsets = vec![0; iter.chunks]; + for chunk_num in 0..iter.chunks { + let offset = fill_buff( + &mut iter.buffers[chunk_num], + crash_if_err!( + 1, + File::open(iter.tmp_dir.path().join(chunk_num.to_string())) + ), + iter.max_per_chunk, + &settings, + ); + iter.chunk_offsets[chunk_num] = offset as u64; } - fn write_chunk(&self, file: &PathBuf, chunk: &mut Vec) -> Result<(), Box> { - let new_file = OpenOptions::new().create(true).append(true).open(file)?; - let mut buf_write = Box::new(BufWriter::new(new_file)) as Box; - for s in chunk { - let mut serialized = serde_json::to_string(&s).expect("JSON write error: "); - serialized.push_str("\n"); - buf_write.write(serialized.as_bytes())?; - } - buf_write.flush()?; - - Ok(()) - } + iter } -fn fill_buff( +fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec) { + let new_file = crash_if_err!(1, OpenOptions::new().create(true).append(true).open(file)); + let mut buf_write = BufWriter::new(new_file); + for s in chunk { + crash_if_err!(1, buf_write.write_all(s.line.as_bytes())); + crash_if_err!( + 1, + buf_write.write_all(if settings.zero_terminated { "\0" } else { "\n" }.as_bytes(),) + ); + } + crash_if_err!(1, buf_write.flush()); +} + +fn fill_buff( vec: &mut VecDeque, file: File, - max_bytes: u64, -) -> Result> -where - Line: ExternallySortable, -{ + max_bytes: usize, + settings: &GlobalSettings, +) -> usize { let mut total_read = 0; let mut bytes_read = 0; - for line in BufReader::new(file).lines() { - let line_s = line?; + for line in BufReader::new(file).split(if settings.zero_terminated { + b'\0' + } else { + b'\n' + }) { + let line_s = String::from_utf8(crash_if_err!(1, line)).unwrap(); bytes_read += line_s.len() + 1; - // This is where the bad stuff happens usually - let deserialized: Line = serde_json::from_str(&line_s).expect("JSON read error: "); - total_read += deserialized.get_size(); + let deserialized = Line::new(line_s, settings); + total_read += deserialized.estimate_size(); vec.push_back(deserialized); if total_read > max_bytes { break; } } - Ok(bytes_read as u64) + bytes_read } diff --git a/src/uu/sort/src/numeric_str_cmp.rs b/src/uu/sort/src/numeric_str_cmp.rs index b74d97867..f8666b701 100644 --- a/src/uu/sort/src/numeric_str_cmp.rs +++ b/src/uu/sort/src/numeric_str_cmp.rs @@ -14,21 +14,20 @@ //! More specifically, exponent can be understood so that the original number is in (1..10)*10^exponent. //! From that follows the constraints of this algorithm: It is able to compare numbers in ±(1*10^[i64::MIN]..10*10^[i64::MAX]). -use serde::{Deserialize, Serialize}; use std::{cmp::Ordering, ops::Range}; -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, Clone)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] enum Sign { Negative, Positive, } -#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] +#[derive(Debug, PartialEq, Clone)] pub struct NumInfo { exponent: i64, sign: Sign, } -#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] +#[derive(Debug, PartialEq, Clone)] pub struct NumInfoParseSettings { pub accept_si_units: bool, pub thousands_separator: Option, diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 18d9304fa..7c053e4ad 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -19,7 +19,7 @@ mod external_sort; mod numeric_str_cmp; use clap::{App, Arg}; -use external_sort::{ExternalSorter, ExternallySortable}; +use external_sort::ext_sort; use fnv::FnvHasher; use itertools::Itertools; use numeric_str_cmp::{numeric_str_cmp, NumInfo, NumInfoParseSettings}; @@ -27,14 +27,13 @@ use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use rayon::prelude::*; use semver::Version; -use serde::{Deserialize, Serialize}; use smallvec::SmallVec; use std::cmp::Ordering; use std::collections::BinaryHeap; use std::env; use std::fs::File; use std::hash::{Hash, Hasher}; -use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Lines, Read, Write}; +use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; use std::mem::replace; use std::ops::Range; use std::path::Path; @@ -104,7 +103,7 @@ enum SortMode { Default, } #[derive(Clone)] -struct GlobalSettings { +pub struct GlobalSettings { mode: SortMode, debug: bool, ignore_blanks: bool, @@ -204,7 +203,7 @@ impl From<&GlobalSettings> for KeySettings { } } -#[derive(Debug, Serialize, Deserialize, Clone)] +#[derive(Debug, Clone)] /// Represents the string selected by a FieldSelector. enum SelectionRange { /// If we had to transform this selection, we have to store a new string. @@ -236,7 +235,7 @@ impl SelectionRange { } } -#[derive(Serialize, Deserialize, Clone)] +#[derive(Clone)] enum NumCache { AsF64(GeneralF64ParseResult), WithInfo(NumInfo), @@ -257,7 +256,8 @@ impl NumCache { } } } -#[derive(Serialize, Deserialize, Clone)] + +#[derive(Clone)] struct Selection { range: SelectionRange, num_cache: NumCache, @@ -272,22 +272,19 @@ impl Selection { type Field = Range; -#[derive(Serialize, Deserialize, Clone)] -struct Line { +#[derive(Clone)] +pub struct Line { line: String, // The common case is not to specify fields. Let's make this fast. selections: SmallVec<[Selection; 1]>, } -impl ExternallySortable for Line { - fn get_size(&self) -> u64 { - // Currently 96 bytes, but that could change, so we get that size here - std::mem::size_of::() as u64 - } -} - impl Line { - fn new(line: String, settings: &GlobalSettings) -> Self { + pub fn estimate_size(&self) -> usize { + self.line.capacity() + self.selections.capacity() * std::mem::size_of::() + } + + pub fn new(line: String, settings: &GlobalSettings) -> Self { let fields = if settings .selectors .iter() @@ -299,7 +296,7 @@ impl Line { None }; - let selections = settings + let selections: SmallVec<[Selection; 1]> = settings .selectors .iter() .map(|selector| { @@ -725,7 +722,7 @@ impl FieldSelector { } struct MergeableFile<'a> { - lines: Lines>>, + lines: Box + 'a>, current_line: Line, settings: &'a GlobalSettings, } @@ -765,11 +762,11 @@ impl<'a> FileMerger<'a> { settings, } } - fn push_file(&mut self, mut lines: Lines>>) { - if let Some(Ok(next_line)) = lines.next() { + fn push_file(&mut self, mut lines: Box + 'a>) { + if let Some(next_line) = lines.next() { let mergeable_file = MergeableFile { lines, - current_line: Line::new(next_line, &self.settings), + current_line: next_line, settings: &self.settings, }; self.heap.push(mergeable_file); @@ -783,11 +780,8 @@ impl<'a> Iterator for FileMerger<'a> { match self.heap.pop() { Some(mut current) => { match current.lines.next() { - Some(Ok(next_line)) => { - let ret = replace( - &mut current.current_line, - Line::new(next_line, &self.settings), - ); + Some(next_line) => { + let ret = replace(&mut current.current_line, next_line); self.heap.push(current); Some(ret) } @@ -1155,90 +1149,108 @@ pub fn uumain(args: impl uucore::Args) -> i32 { exec(files, settings) } -fn exec(files: Vec, settings: GlobalSettings) -> i32 { - let mut lines = Vec::new(); - let mut file_merger = FileMerger::new(&settings); +fn file_to_lines_iter<'a>( + file: &str, + settings: &'a GlobalSettings, +) -> Option + 'a> { + let (reader, _) = match open(file) { + Some(x) => x, + None => return None, + }; - for path in &files { - let (reader, _) = match open(path) { - Some(x) => x, - None => continue, - }; + let buf_reader = BufReader::new(reader); - let buf_reader = BufReader::new(reader); - - if settings.merge { - file_merger.push_file(buf_reader.lines()); - } else if settings.zero_terminated { - for line in buf_reader.split(b'\0').flatten() { - lines.push(Line::new( - std::str::from_utf8(&line) - .expect("Could not parse string from zero terminated input.") + Some( + buf_reader + .split(if settings.zero_terminated { + b'\0' + } else { + b'\n' + }) + .map(move |line| { + Line::new( + std::str::from_utf8(&line.unwrap()) + .expect("input is not valid utf-8") .to_string(), - &settings, - )); - } + settings, + ) + }), + ) +} + +fn output_sorted_lines(iter: impl Iterator, settings: &GlobalSettings) { + if settings.unique { + print_sorted( + iter.dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal), + &settings, + ); + } else { + print_sorted(iter, &settings); + } +} + +fn exec(files: Vec, settings: GlobalSettings) -> i32 { + if settings.merge { + let mut file_merger = FileMerger::new(&settings); + for lines in files + .iter() + .filter_map(|file| file_to_lines_iter(file, &settings)) + { + file_merger.push_file(Box::new(lines)); + } + output_sorted_lines(file_merger, &settings); + } else { + let lines = files + .iter() + .filter_map(|file| file_to_lines_iter(file, &settings)) + .flatten(); + + if settings.check { + return exec_check_file(lines, &settings); + } + + // Only use ext_sorter when we need to. + // Probably faster that we don't create + // an owned value each run + if settings.ext_sort { + let sorted_lines = ext_sort(lines, &settings); + output_sorted_lines(sorted_lines, &settings); } else { - for line in buf_reader.lines() { - if let Ok(n) = line { - lines.push(Line::new(n, &settings)); + let mut lines = vec![]; + + // This is duplicated from fn file_to_lines_iter, but using that function directly results in a performance regression. + for (file, _) in files.iter().map(|file| open(file)).flatten() { + let buf_reader = BufReader::new(file); + for line in buf_reader.split(if settings.zero_terminated { + b'\0' } else { - break; + b'\n' + }) { + let string = String::from_utf8(line.unwrap()).unwrap(); + lines.push(Line::new(string, &settings)); } } + + sort_by(&mut lines, &settings); + output_sorted_lines(lines.into_iter(), &settings); } } - if settings.check { - return exec_check_file(&lines, &settings); - } - - // Only use ext_sorter when we need to. - // Probably faster that we don't create - // an owned value each run - if settings.ext_sort { - lines = ext_sort_by(lines, settings.clone()); - } else { - sort_by(&mut lines, &settings); - } - - if settings.merge { - if settings.unique { - print_sorted( - file_merger.dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal), - &settings, - ) - } else { - print_sorted(file_merger, &settings) - } - } else if settings.unique { - print_sorted( - lines - .into_iter() - .dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal), - &settings, - ) - } else { - print_sorted(lines.into_iter(), &settings) - } - 0 } -fn exec_check_file(unwrapped_lines: &[Line], settings: &GlobalSettings) -> i32 { +fn exec_check_file(unwrapped_lines: impl Iterator, settings: &GlobalSettings) -> i32 { // errors yields the line before each disorder, // plus the last line (quirk of .coalesce()) - let mut errors = - unwrapped_lines - .iter() - .enumerate() - .coalesce(|(last_i, last_line), (i, line)| { - if compare_by(&last_line, &line, &settings) == Ordering::Greater { - Err(((last_i, last_line), (i, line))) - } else { - Ok((i, line)) - } - }); + let mut errors = unwrapped_lines + .enumerate() + .coalesce(|(last_i, last_line), (i, line)| { + if compare_by(&last_line, &line, &settings) == Ordering::Greater { + Err(((last_i, last_line), (i, line))) + } else { + Ok((i, line)) + } + }); if let Some((first_error_index, _line)) = errors.next() { // Check for a second "error", as .coalesce() always returns the last // line, no matter what our merging function does. @@ -1257,20 +1269,6 @@ fn exec_check_file(unwrapped_lines: &[Line], settings: &GlobalSettings) -> i32 { } } -fn ext_sort_by(unsorted: Vec, settings: GlobalSettings) -> Vec { - let external_sorter = ExternalSorter::new( - settings.buffer_size as u64, - Some(settings.tmp_dir.clone()), - settings.clone(), - ); - let iter = external_sorter - .sort_by(unsorted.into_iter(), settings.clone()) - .unwrap() - .map(|x| x.unwrap()) - .collect::>(); - iter -} - fn sort_by(unsorted: &mut Vec, settings: &GlobalSettings) { if settings.stable || settings.unique { unsorted.par_sort_by(|a, b| compare_by(a, b, &settings)) @@ -1375,7 +1373,7 @@ fn get_leading_gen(input: &str) -> Range { leading_whitespace_len..input.len() } -#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, PartialOrd)] +#[derive(Copy, Clone, PartialEq, PartialOrd)] enum GeneralF64ParseResult { Invalid, NaN, From 83554f4475a2c1c9ddbfe33bedc9ef53bafd922d Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sat, 1 May 2021 17:48:01 +0200 Subject: [PATCH 004/148] add benchmarking instructions --- src/uu/sort/BENCHMARKING.md | 6 ++++++ src/uu/sort/src/sort.rs | 6 ++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/uu/sort/BENCHMARKING.md b/src/uu/sort/BENCHMARKING.md index 1caea0326..1810e8a4e 100644 --- a/src/uu/sort/BENCHMARKING.md +++ b/src/uu/sort/BENCHMARKING.md @@ -69,6 +69,12 @@ Run `cargo build --release` before benchmarking after you make a change! - Benchmark numeric sorting with hyperfine: `hyperfine "target/release/coreutils sort shuffled_numbers_si.txt -h -o output.txt"`. +## External sorting + +Try running commands with the `-S` option set to an amount of memory to be used, such as `1M`. Additionally, you could try sorting +huge files (ideally multiple Gigabytes) with `-S`. Creating such a large file can be achieved by running `cat shuffled_wordlist.txt | sort -R >> shuffled_wordlist.txt` +multiple times (this will add the contents of `shuffled_wordlist.txt` to itself). + ## Stdout and stdin performance Try to run the above benchmarks by piping the input through stdin (standard input) and redirect the diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 7c053e4ad..be7944a0f 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1169,9 +1169,7 @@ fn file_to_lines_iter<'a>( }) .map(move |line| { Line::new( - std::str::from_utf8(&line.unwrap()) - .expect("input is not valid utf-8") - .to_string(), + crash_if_err!(1, String::from_utf8(crash_if_err!(1, line))), settings, ) }), @@ -1226,7 +1224,7 @@ fn exec(files: Vec, settings: GlobalSettings) -> i32 { } else { b'\n' }) { - let string = String::from_utf8(line.unwrap()).unwrap(); + let string = crash_if_err!(1, String::from_utf8(crash_if_err!(1, line))); lines.push(Line::new(string, &settings)); } } From b21a309c3f75d77b7e1c4f21a145e8ff893509e3 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sat, 1 May 2021 21:29:18 +0200 Subject: [PATCH 005/148] add a benchmarking example --- src/uu/sort/BENCHMARKING.md | 1 + 1 file changed, 1 insertion(+) diff --git a/src/uu/sort/BENCHMARKING.md b/src/uu/sort/BENCHMARKING.md index 1810e8a4e..17a944b29 100644 --- a/src/uu/sort/BENCHMARKING.md +++ b/src/uu/sort/BENCHMARKING.md @@ -74,6 +74,7 @@ Run `cargo build --release` before benchmarking after you make a change! Try running commands with the `-S` option set to an amount of memory to be used, such as `1M`. Additionally, you could try sorting huge files (ideally multiple Gigabytes) with `-S`. Creating such a large file can be achieved by running `cat shuffled_wordlist.txt | sort -R >> shuffled_wordlist.txt` multiple times (this will add the contents of `shuffled_wordlist.txt` to itself). +Example: Run `hyperfine "target/release/coreutils sort shuffled_wordlist.txt -S 1M"` ## Stdout and stdin performance From 484558e37dbc95ba583a4210205164ef5521655b Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sat, 1 May 2021 21:38:36 +0200 Subject: [PATCH 006/148] Update src/uu/sort/BENCHMARKING.md Co-authored-by: Sylvestre Ledru --- src/uu/sort/BENCHMARKING.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/BENCHMARKING.md b/src/uu/sort/BENCHMARKING.md index 17a944b29..71c331105 100644 --- a/src/uu/sort/BENCHMARKING.md +++ b/src/uu/sort/BENCHMARKING.md @@ -74,7 +74,8 @@ Run `cargo build --release` before benchmarking after you make a change! Try running commands with the `-S` option set to an amount of memory to be used, such as `1M`. Additionally, you could try sorting huge files (ideally multiple Gigabytes) with `-S`. Creating such a large file can be achieved by running `cat shuffled_wordlist.txt | sort -R >> shuffled_wordlist.txt` multiple times (this will add the contents of `shuffled_wordlist.txt` to itself). -Example: Run `hyperfine "target/release/coreutils sort shuffled_wordlist.txt -S 1M"` +Example: Run `hyperfine './target/release/coreutils sort shuffled_wordlist.txt -S 1M' 'sort shuffled_wordlist.txt -S 1M'` +` ## Stdout and stdin performance From dc5bd9f0bed39fa659edd43ac7c670c71ebf53bc Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 2 May 2021 17:27:44 +0200 Subject: [PATCH 007/148] improve memory usage estimation --- src/uu/sort/src/sort.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index be7944a0f..7436b9fda 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -281,7 +281,9 @@ pub struct Line { impl Line { pub fn estimate_size(&self) -> usize { - self.line.capacity() + self.selections.capacity() * std::mem::size_of::() + self.line.capacity() + + self.selections.capacity() * std::mem::size_of::() + + std::mem::size_of::() } pub fn new(line: String, settings: &GlobalSettings) -> Self { From 8b35dd974141218987749cf3b60ba8f5a190c1a0 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 2 May 2021 17:27:52 +0200 Subject: [PATCH 008/148] add requested tests --- tests/by-util/test_sort.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index eac9490a5..4465e861f 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -37,7 +37,29 @@ fn test_larger_than_specified_segment() { .arg("50K") .arg("ext_sort.txt") .succeeds() - .stdout_is_fixture(format!("{}", "ext_sort.expected")); + .stdout_is_fixture("ext_sort.expected"); +} + +#[test] +fn test_smaller_than_specified_segment() { + new_ucmd!() + .arg("-n") + .arg("-S") + .arg("100M") + .arg("ext_sort.txt") + .succeeds() + .stdout_is_fixture("ext_sort.expected"); +} + +#[test] +fn test_extsort_zero_terminated() { + new_ucmd!() + .arg("-z") + .arg("-S") + .arg("10K") + .arg("zero-terminated.txt") + .succeeds() + .stdout_is_fixture("zero-terminated.expected"); } #[test] From 56761ba584112c84f71523b2a3ef101bbc14ae49 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Mon, 3 May 2021 22:30:56 +0200 Subject: [PATCH 009/148] stat: implement support for macos --- src/uu/stat/Cargo.toml | 1 + src/uu/stat/src/fsext.rs | 187 +++++++++++++++++++++++++++++++++++++ src/uu/stat/src/stat.rs | 13 +-- tests/by-util/test_stat.rs | 14 +++ 4 files changed, 205 insertions(+), 10 deletions(-) diff --git a/src/uu/stat/Cargo.toml b/src/uu/stat/Cargo.toml index 96bf63ffe..43c5432f8 100644 --- a/src/uu/stat/Cargo.toml +++ b/src/uu/stat/Cargo.toml @@ -17,6 +17,7 @@ path = "src/stat.rs" [dependencies] clap = "2.33" time = "0.1.40" +libc = "0.2" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/stat/src/fsext.rs b/src/uu/stat/src/fsext.rs index d90099892..4e949047d 100644 --- a/src/uu/stat/src/fsext.rs +++ b/src/uu/stat/src/fsext.rs @@ -9,6 +9,12 @@ extern crate time; +#[cfg(target_os = "linux")] +static LINUX_MTAB: &str = "/etc/mtab"; +#[cfg(target_os = "linux")] +static LINUX_MOUNTINFO: &str = "/proc/self/mountinfo"; +static MOUNT_OPT_BIND: &str = "bind"; + use self::time::Timespec; use std::time::UNIX_EPOCH; pub use uucore::libc::{ @@ -413,3 +419,184 @@ pub fn pretty_fstype<'a>(fstype: i64) -> Cow<'a, str> { other => format!("UNKNOWN ({:#x})", other).into(), } } + +#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] +extern "C" { + #[cfg(all(target_vendor = "apple", target_arch = "x86_64"))] + #[link_name = "getmntinfo$INODE64"] + fn getmntinfo(mntbufp: *mut *mut Sstatfs, flags: c_int) -> c_int; + + #[cfg(any( + all(target_os = "freebsd"), + all(target_vendor = "apple", target_arch = "aarch64") + ))] + fn getmntinfo(mntbufp: *mut *mut Sstatfs, flags: c_int) -> c_int; +} + +#[derive(Debug, Clone)] +pub struct MountInfo { + // it stores `volume_name` in windows platform and `dev_id` in unix platform + dev_id: String, + dev_name: String, + fs_type: String, + pub mount_dir: String, + mount_option: String, // we only care "bind" option + mount_root: String, + remote: bool, + dummy: bool, +} + +impl MountInfo { + fn set_missing_fields(&mut self) { + #[cfg(unix)] + { + // We want to keep the dev_id on Windows + // but set dev_id + let path = CString::new(self.mount_dir.clone()).unwrap(); + unsafe { + let mut stat = mem::zeroed(); + if libc::stat(path.as_ptr(), &mut stat) == 0 { + self.dev_id = (stat.st_dev as i32).to_string(); + } else { + self.dev_id = "".to_string(); + } + } + } + // set MountInfo::dummy + match self.fs_type.as_ref() { + "autofs" | "proc" | "subfs" + /* for Linux 2.6/3.x */ + | "debugfs" | "devpts" | "fusectl" | "mqueue" | "rpc_pipefs" | "sysfs" + /* FreeBSD, Linux 2.4 */ + | "devfs" + /* for NetBSD 3.0 */ + | "kernfs" + /* for Irix 6.5 */ + | "ignore" => self.dummy = true, + _ => self.dummy = self.fs_type == "none" + && self.mount_option.find(MOUNT_OPT_BIND).is_none(), + } + // set MountInfo::remote + #[cfg(unix)] + { + if self.dev_name.find(':').is_some() + || (self.dev_name.starts_with("//") && self.fs_type == "smbfs" + || self.fs_type == "cifs") + || self.dev_name == "-hosts" + { + self.remote = true; + } else { + self.remote = false; + } + } + } + + #[cfg(target_os = "linux")] + fn new(file_name: &str, raw: Vec<&str>) -> Option { + match file_name { + // Format: 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + // "man proc" for more details + "/proc/self/mountinfo" => { + let mut m = MountInfo { + dev_id: "".to_string(), + dev_name: raw[9].to_string(), + fs_type: raw[8].to_string(), + mount_root: raw[3].to_string(), + mount_dir: raw[4].to_string(), + mount_option: raw[5].to_string(), + remote: false, + dummy: false, + }; + m.set_missing_fields(); + Some(m) + } + "/etc/mtab" => { + let mut m = MountInfo { + dev_id: "".to_string(), + dev_name: raw[0].to_string(), + fs_type: raw[2].to_string(), + mount_root: "".to_string(), + mount_dir: raw[1].to_string(), + mount_option: raw[3].to_string(), + remote: false, + dummy: false, + }; + m.set_missing_fields(); + Some(m) + } + _ => None, + } + } +} + +#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] +use std::ffi::CStr; +#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] +impl From for MountInfo { + fn from(statfs: Sstatfs) -> Self { + let mut info = MountInfo { + dev_id: "".to_string(), + dev_name: unsafe { + CStr::from_ptr(&statfs.f_mntfromname[0]) + .to_string_lossy() + .into_owned() + }, + fs_type: unsafe { + CStr::from_ptr(&statfs.f_fstypename[0]) + .to_string_lossy() + .into_owned() + }, + mount_dir: unsafe { + CStr::from_ptr(&statfs.f_mntonname[0]) + .to_string_lossy() + .into_owned() + }, + mount_root: "".to_string(), + mount_option: "".to_string(), + remote: false, + dummy: false, + }; + info.set_missing_fields(); + info + } +} + +#[cfg(target_os = "linux")] +use std::fs::File; +#[cfg(target_os = "linux")] +use std::io::{BufRead, BufReader}; +#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] +use std::ptr; +#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] +use std::slice; +pub fn read_fs_list() -> Vec { + #[cfg(target_os = "linux")] + { + let (file_name, fobj) = File::open(LINUX_MOUNTINFO) + .map(|f| (LINUX_MOUNTINFO, f)) + .or_else(|_| File::open(LINUX_MTAB).map(|f| (LINUX_MTAB, f))) + .expect("failed to find mount list files"); + let reader = BufReader::new(fobj); + reader + .lines() + .filter_map(|line| line.ok()) + .filter_map(|line| { + let raw_data = line.split_whitespace().collect::>(); + MountInfo::new(file_name, raw_data) + }) + .collect::>() + } + #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] + { + let mut mptr: *mut Sstatfs = ptr::null_mut(); + let len = unsafe { getmntinfo(&mut mptr, 1 as c_int) }; + if len < 0 { + crash!(1, "getmntinfo failed"); + } + let mounts = unsafe { slice::from_raw_parts(mptr, len as usize) }; + mounts + .iter() + .map(|m| MountInfo::from(*m)) + .collect::>() + } +} diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index 5216fb293..dab5f6d97 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -18,8 +18,6 @@ use uucore::entries; use clap::{App, Arg, ArgMatches}; use std::borrow::Cow; use std::convert::AsRef; -use std::fs::File; -use std::io::{BufRead, BufReader}; use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::path::Path; use std::{cmp, fs, iter}; @@ -97,7 +95,6 @@ pub mod options { static ARG_FILES: &str = "files"; -const MOUNT_INFO: &str = "/etc/mtab"; pub const F_ALTER: u8 = 1; pub const F_ZERO: u8 = 1 << 1; pub const F_LEFT: u8 = 1 << 2; @@ -490,13 +487,9 @@ impl Stater { // mount points aren't displayed when showing filesystem information None } else { - let reader = BufReader::new( - File::open(MOUNT_INFO).unwrap_or_else(|_| panic!("Failed to read {}", MOUNT_INFO)), - ); - let mut mount_list = reader - .lines() - .filter_map(Result::ok) - .filter_map(|line| line.split_whitespace().nth(1).map(ToOwned::to_owned)) + let mut mount_list = read_fs_list() + .iter() + .map(|mi| mi.mount_dir.clone()) .collect::>(); // Reverse sort. The longer comes first. mount_list.sort(); diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 60d735c51..0069d2f0d 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -317,6 +317,20 @@ fn test_multi_files() { .stdout_is(expected_result(&args)); } +#[cfg(any(target_os = "linux", target_os = "freebsd", target_vendor = "apple"))] +#[test] +fn test_one_file() { + let (at, mut ucmd) = at_and_ucmd!(); + let file = "TEST_FILE.mp4"; + at.touch(file); + + ucmd.arg(file) + .succeeds() + .stdout_contains(format!("File: `{}'", file)) + .stdout_contains(format!("Size: 0")) + .stdout_contains(format!("Access: (0644/-rw-r--r--)")); +} + #[test] #[cfg(target_os = "linux")] fn test_printf() { From 5bcfa88f0ab75c32b5278150531b8affdce53f94 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Mon, 3 May 2021 23:09:45 +0200 Subject: [PATCH 010/148] stat: fix test to ignore selinux related output --- tests/by-util/test_stat.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 60d735c51..7b7e990f4 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -198,9 +198,16 @@ fn test_terse_normal_format() { let expect = expected_result(&args); println!("actual: {:?}", actual); println!("expect: {:?}", expect); - let v_actual: Vec<&str> = actual.split(' ').collect(); - let v_expect: Vec<&str> = expect.split(' ').collect(); + let v_actual: Vec<&str> = actual.trim().split(' ').collect(); + let mut v_expect: Vec<&str> = expect.trim().split(' ').collect(); assert!(!v_expect.is_empty()); + + // uu_stat does not support selinux + if v_actual.len() == v_expect.len() - 1 && v_expect[v_expect.len() - 1].contains(":") { + // assume last element contains: `SELinux security context string` + v_expect.pop(); + } + // * allow for inequality if `stat` (aka, expect) returns "0" (unknown value) assert!( expect == "0" From 231bb7be93639576bdc553ae7a6fa2f7f5568ddc Mon Sep 17 00:00:00 2001 From: rethab Date: Wed, 5 May 2021 22:59:40 +0200 Subject: [PATCH 011/148] Migrate mknod to clap, closes #2051 (#2056) * mknod: add tests for fifo * mknod: add test for character device --- .gitignore | 1 + Cargo.lock | 2 +- src/uu/mknod/Cargo.toml | 2 +- src/uu/mknod/src/mknod.rs | 304 +++++++++++++++------------- src/uu/mknod/src/parsemode.rs | 54 +++++ src/uucore/src/lib/features/mode.rs | 33 ++- tests/by-util/test_mknod.rs | 125 +++++++++++- tests/common/util.rs | 16 +- 8 files changed, 371 insertions(+), 166 deletions(-) create mode 100644 src/uu/mknod/src/parsemode.rs diff --git a/.gitignore b/.gitignore index b1ac52506..11f46e13e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ target/ Cargo.lock lib*.a /docs/_build +*.iml diff --git a/Cargo.lock b/Cargo.lock index 6ff3cd5c1..62fa80c2d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2116,7 +2116,7 @@ dependencies = [ name = "uu_mknod" version = "0.0.6" dependencies = [ - "getopts", + "clap", "libc", "uucore", "uucore_procs", diff --git a/src/uu/mknod/Cargo.toml b/src/uu/mknod/Cargo.toml index 2c3ac8fb9..1320e3546 100644 --- a/src/uu/mknod/Cargo.toml +++ b/src/uu/mknod/Cargo.toml @@ -16,7 +16,7 @@ name = "uu_mknod" path = "src/mknod.rs" [dependencies] -getopts = "0.2.18" +clap = "2.33" libc = "^0.2.42" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["mode"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/mknod/src/mknod.rs b/src/uu/mknod/src/mknod.rs index fc6fb0870..5b6c2fa8c 100644 --- a/src/uu/mknod/src/mknod.rs +++ b/src/uu/mknod/src/mknod.rs @@ -5,21 +5,41 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) parsemode makedev sysmacros makenod newmode perror IFBLK IFCHR IFIFO +// spell-checker:ignore (ToDO) parsemode makedev sysmacros perror IFBLK IFCHR IFIFO #[macro_use] extern crate uucore; +use std::ffi::CString; + +use clap::{App, Arg, ArgMatches}; use libc::{dev_t, mode_t}; use libc::{S_IFBLK, S_IFCHR, S_IFIFO, S_IRGRP, S_IROTH, S_IRUSR, S_IWGRP, S_IWOTH, S_IWUSR}; -use getopts::Options; - -use std::ffi::CString; use uucore::InvalidEncodingHandling; static NAME: &str = "mknod"; static VERSION: &str = env!("CARGO_PKG_VERSION"); +static ABOUT: &str = "Create the special file NAME of the given TYPE."; +static USAGE: &str = "mknod [OPTION]... NAME TYPE [MAJOR MINOR]"; +static LONG_HELP: &str = "Mandatory arguments to long options are mandatory for short options too. +-m, --mode=MODE set file permission bits to MODE, not a=rw - umask +--help display this help and exit +--version output version information and exit + +Both MAJOR and MINOR must be specified when TYPE is b, c, or u, and they +must be omitted when TYPE is p. If MAJOR or MINOR begins with 0x or 0X, +it is interpreted as hexadecimal; otherwise, if it begins with 0, as octal; +otherwise, as decimal. TYPE may be: + +b create a block (buffered) special file +c, u create a character (unbuffered) special file +p create a FIFO + +NOTE: your shell may have its own version of mknod, which usually supersedes +the version described here. Please refer to your shell's documentation +for details about the options it supports. +"; const MODE_RW_UGO: mode_t = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; @@ -30,13 +50,35 @@ fn makedev(maj: u64, min: u64) -> dev_t { } #[cfg(windows)] -fn _makenod(path: CString, mode: mode_t, dev: dev_t) -> i32 { +fn _makenod(file_name: &str, mode: mode_t, dev: dev_t) -> i32 { panic!("Unsupported for windows platform") } #[cfg(unix)] -fn _makenod(path: CString, mode: mode_t, dev: dev_t) -> i32 { - unsafe { libc::mknod(path.as_ptr(), mode, dev) } +fn _makenod(file_name: &str, mode: mode_t, dev: dev_t) -> i32 { + let c_str = CString::new(file_name).expect("Failed to convert to CString"); + + // the user supplied a mode + let set_umask = mode & MODE_RW_UGO != MODE_RW_UGO; + + unsafe { + // store prev umask + let last_umask = if set_umask { libc::umask(0) } else { 0 }; + + let errno = libc::mknod(c_str.as_ptr(), mode, dev); + + // set umask back to original value + if set_umask { + libc::umask(last_umask); + } + + if errno == -1 { + let c_str = CString::new(NAME).expect("Failed to convert to CString"); + // shows the error from the mknod syscall + libc::perror(c_str.as_ptr()); + } + errno + } } #[allow(clippy::cognitive_complexity)] @@ -44,156 +86,136 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let args = args .collect_str(InvalidEncodingHandling::Ignore) .accept_any(); - - let mut opts = Options::new(); - // Linux-specific options, not implemented // opts.optflag("Z", "", "set the SELinux security context to default type"); // opts.optopt("", "context", "like -Z, or if CTX is specified then set the SELinux or SMACK security context to CTX"); - opts.optopt( - "m", - "mode", - "set file permission bits to MODE, not a=rw - umask", - "MODE", - ); - opts.optflag("", "help", "display this help and exit"); - opts.optflag("", "version", "output version information and exit"); + let matches = App::new(executable!()) + .version(VERSION) + .usage(USAGE) + .after_help(LONG_HELP) + .about(ABOUT) + .arg( + Arg::with_name("mode") + .short("m") + .long("mode") + .value_name("MODE") + .help("set file permission bits to MODE, not a=rw - umask"), + ) + .arg( + Arg::with_name("name") + .value_name("NAME") + .help("name of the new file") + .required(true) + .index(1), + ) + .arg( + Arg::with_name("type") + .value_name("TYPE") + .help("type of the new file (b, c, u or p)") + .required(true) + .validator(valid_type) + .index(2), + ) + .arg( + Arg::with_name("major") + .value_name("MAJOR") + .help("major file type") + .validator(valid_u64) + .index(3), + ) + .arg( + Arg::with_name("minor") + .value_name("MINOR") + .help("minor file type") + .validator(valid_u64) + .index(4), + ) + .get_matches_from(args); - let matches = match opts.parse(&args[1..]) { - Ok(m) => m, - Err(f) => crash!(1, "{}\nTry '{} --help' for more information.", f, NAME), + let mode = match get_mode(&matches) { + Ok(mode) => mode, + Err(err) => { + show_info!("{}", err); + return 1; + } }; - if matches.opt_present("help") { - println!( - "Usage: {0} [OPTION]... NAME TYPE [MAJOR MINOR] + let file_name = matches.value_of("name").expect("Missing argument 'NAME'"); -Mandatory arguments to long options are mandatory for short options too. - -m, --mode=MODE set file permission bits to MODE, not a=rw - umask - --help display this help and exit - --version output version information and exit + // Only check the first character, to allow mnemonic usage like + // 'mknod /dev/rst0 character 18 0'. + let ch = matches + .value_of("type") + .expect("Missing argument 'TYPE'") + .chars() + .next() + .expect("Failed to get the first char"); -Both MAJOR and MINOR must be specified when TYPE is b, c, or u, and they -must be omitted when TYPE is p. If MAJOR or MINOR begins with 0x or 0X, -it is interpreted as hexadecimal; otherwise, if it begins with 0, as octal; -otherwise, as decimal. TYPE may be: - - b create a block (buffered) special file - c, u create a character (unbuffered) special file - p create a FIFO - -NOTE: your shell may have its own version of mknod, which usually supersedes -the version described here. Please refer to your shell's documentation -for details about the options it supports.", - NAME - ); - return 0; - } - - if matches.opt_present("version") { - println!("{} {}", NAME, VERSION); - return 0; - } - - let mut last_umask: mode_t = 0; - let mut newmode: mode_t = MODE_RW_UGO; - if matches.opt_present("mode") { - match uucore::mode::parse_mode(matches.opt_str("mode")) { - Ok(parsed) => { - if parsed > 0o777 { - show_info!("mode must specify only file permission bits"); - return 1; - } - newmode = parsed; - } - Err(e) => { - show_info!("{}", e); - return 1; - } + if ch == 'p' { + if matches.is_present("major") || matches.is_present("minor") { + eprintln!("Fifos do not have major and minor device numbers."); + eprintln!("Try '{} --help' for more information.", NAME); + 1 + } else { + _makenod(file_name, S_IFIFO | mode, 0) } - unsafe { - last_umask = libc::umask(0); - } - } + } else { + match (matches.value_of("major"), matches.value_of("minor")) { + (None, None) | (_, None) | (None, _) => { + eprintln!("Special files require major and minor device numbers."); + eprintln!("Try '{} --help' for more information.", NAME); + 1 + } + (Some(major), Some(minor)) => { + let major = major.parse::().expect("validated by clap"); + let minor = minor.parse::().expect("validated by clap"); - let mut ret = 0i32; - match matches.free.len() { - 0 => show_usage_error!("missing operand"), - 1 => show_usage_error!("missing operand after ‘{}’", matches.free[0]), - _ => { - let args = &matches.free; - let c_str = CString::new(args[0].as_str()).expect("Failed to convert to CString"); - - // Only check the first character, to allow mnemonic usage like - // 'mknod /dev/rst0 character 18 0'. - let ch = args[1] - .chars() - .next() - .expect("Failed to get the first char"); - - if ch == 'p' { - if args.len() > 2 { - show_info!("{}: extra operand ‘{}’", NAME, args[2]); - if args.len() == 4 { - eprintln!("Fifos do not have major and minor device numbers."); - } - eprintln!("Try '{} --help' for more information.", NAME); - return 1; - } - - ret = _makenod(c_str, S_IFIFO | newmode, 0); - } else { - if args.len() < 4 { - show_info!("missing operand after ‘{}’", args[args.len() - 1]); - if args.len() == 2 { - eprintln!("Special files require major and minor device numbers."); - } - eprintln!("Try '{} --help' for more information.", NAME); - return 1; - } else if args.len() > 4 { - show_usage_error!("extra operand ‘{}’", args[4]); - return 1; - } else if !"bcu".contains(ch) { - show_usage_error!("invalid device type ‘{}’", args[1]); - return 1; - } - - let maj = args[2].parse::(); - let min = args[3].parse::(); - if maj.is_err() { - show_info!("invalid major device number ‘{}’", args[2]); - return 1; - } else if min.is_err() { - show_info!("invalid minor device number ‘{}’", args[3]); - return 1; - } - - let (maj, min) = (maj.unwrap(), min.unwrap()); - let dev = makedev(maj, min); + let dev = makedev(major, minor); if ch == 'b' { // block special file - ret = _makenod(c_str, S_IFBLK | newmode, dev); - } else { + _makenod(file_name, S_IFBLK | mode, dev) + } else if ch == 'c' || ch == 'u' { // char special file - ret = _makenod(c_str, S_IFCHR | newmode, dev); + _makenod(file_name, S_IFCHR | mode, dev) + } else { + unreachable!("{} was validated to be only b, c or u", ch); } } } } - - if last_umask != 0 { - unsafe { - libc::umask(last_umask); - } - } - if ret == -1 { - let c_str = CString::new(format!("{}: {}", NAME, matches.free[0]).as_str()) - .expect("Failed to convert to CString"); - unsafe { - libc::perror(c_str.as_ptr()); - } - } - - ret +} + +fn get_mode(matches: &ArgMatches) -> Result { + match matches.value_of("mode") { + None => Ok(MODE_RW_UGO), + Some(str_mode) => uucore::mode::parse_mode(str_mode) + .map_err(|e| format!("invalid mode ({})", e)) + .and_then(|mode| { + if mode > 0o777 { + Err("mode must specify only file permission bits".to_string()) + } else { + Ok(mode) + } + }), + } +} + +fn valid_type(tpe: String) -> Result<(), String> { + // Only check the first character, to allow mnemonic usage like + // 'mknod /dev/rst0 character 18 0'. + tpe.chars() + .next() + .ok_or_else(|| "missing device type".to_string()) + .and_then(|first_char| { + if vec!['b', 'c', 'u', 'p'].contains(&first_char) { + Ok(()) + } else { + Err(format!("invalid device type ‘{}’", tpe)) + } + }) +} + +fn valid_u64(num: String) -> Result<(), String> { + num.parse::().map(|_| ()).map_err(|_| num) } diff --git a/src/uu/mknod/src/parsemode.rs b/src/uu/mknod/src/parsemode.rs new file mode 100644 index 000000000..026fc4a56 --- /dev/null +++ b/src/uu/mknod/src/parsemode.rs @@ -0,0 +1,54 @@ +// spell-checker:ignore (ToDO) fperm + +use libc::{mode_t, S_IRGRP, S_IROTH, S_IRUSR, S_IWGRP, S_IWOTH, S_IWUSR}; + +use uucore::mode; + +pub const MODE_RW_UGO: mode_t = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + +pub fn parse_mode(mode: &str) -> Result { + let arr: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; + let result = if mode.contains(arr) { + mode::parse_numeric(MODE_RW_UGO as u32, mode) + } else { + mode::parse_symbolic(MODE_RW_UGO as u32, mode, true) + }; + result.map(|mode| mode as mode_t) +} + +#[cfg(test)] +mod test { + /// Test if the program is running under WSL + // ref: @@ + // ToDO: test on WSL2 which likely doesn't need special handling; plan change to `is_wsl_1()` if WSL2 is less needy + pub fn is_wsl() -> bool { + #[cfg(target_os = "linux")] + { + if let Ok(b) = std::fs::read("/proc/sys/kernel/osrelease") { + if let Ok(s) = std::str::from_utf8(&b) { + let a = s.to_ascii_lowercase(); + return a.contains("microsoft") || a.contains("wsl"); + } + } + } + false + } + + #[test] + fn symbolic_modes() { + assert_eq!(super::parse_mode("u+x").unwrap(), 0o766); + assert_eq!( + super::parse_mode("+x").unwrap(), + if !is_wsl() { 0o777 } else { 0o776 } + ); + assert_eq!(super::parse_mode("a-w").unwrap(), 0o444); + assert_eq!(super::parse_mode("g-r").unwrap(), 0o626); + } + + #[test] + fn numeric_modes() { + assert_eq!(super::parse_mode("644").unwrap(), 0o644); + assert_eq!(super::parse_mode("+100").unwrap(), 0o766); + assert_eq!(super::parse_mode("-4").unwrap(), 0o662); + } +} diff --git a/src/uucore/src/lib/features/mode.rs b/src/uucore/src/lib/features/mode.rs index 1bb79ac03..4fb5a6509 100644 --- a/src/uucore/src/lib/features/mode.rs +++ b/src/uucore/src/lib/features/mode.rs @@ -132,19 +132,15 @@ fn parse_change(mode: &str, fperm: u32, considering_dir: bool) -> (u32, usize) { (srwx, pos) } -pub fn parse_mode(mode: Option) -> Result { +pub fn parse_mode(mode: &str) -> Result { let fperm = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - if let Some(mode) = mode { - let arr: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; - let result = if mode.contains(arr) { - parse_numeric(fperm as u32, mode.as_str()) - } else { - parse_symbolic(fperm as u32, mode.as_str(), true) - }; - result.map(|mode| mode as mode_t) + let arr: &[char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; + let result = if mode.contains(arr) { + parse_numeric(fperm as u32, mode) } else { - Ok(fperm) - } + parse_symbolic(fperm as u32, mode, true) + }; + result.map(|mode| mode as mode_t) } #[cfg(test)] @@ -152,20 +148,19 @@ mod test { #[test] fn symbolic_modes() { - assert_eq!(super::parse_mode(Some("u+x".to_owned())).unwrap(), 0o766); + assert_eq!(super::parse_mode("u+x").unwrap(), 0o766); assert_eq!( - super::parse_mode(Some("+x".to_owned())).unwrap(), + super::parse_mode("+x").unwrap(), if !crate::os::is_wsl_1() { 0o777 } else { 0o776 } ); - assert_eq!(super::parse_mode(Some("a-w".to_owned())).unwrap(), 0o444); - assert_eq!(super::parse_mode(Some("g-r".to_owned())).unwrap(), 0o626); + assert_eq!(super::parse_mode("a-w").unwrap(), 0o444); + assert_eq!(super::parse_mode("g-r").unwrap(), 0o626); } #[test] fn numeric_modes() { - assert_eq!(super::parse_mode(Some("644".to_owned())).unwrap(), 0o644); - assert_eq!(super::parse_mode(Some("+100".to_owned())).unwrap(), 0o766); - assert_eq!(super::parse_mode(Some("-4".to_owned())).unwrap(), 0o662); - assert_eq!(super::parse_mode(None).unwrap(), 0o666); + assert_eq!(super::parse_mode("644").unwrap(), 0o644); + assert_eq!(super::parse_mode("+100").unwrap(), 0o766); + assert_eq!(super::parse_mode("-4").unwrap(), 0o662); } } diff --git a/tests/by-util/test_mknod.rs b/tests/by-util/test_mknod.rs index 651491045..1d39372ac 100644 --- a/tests/by-util/test_mknod.rs +++ b/tests/by-util/test_mknod.rs @@ -1 +1,124 @@ -// ToDO: add tests +use crate::common::util::*; + +#[cfg(not(windows))] +#[test] +fn test_mknod_help() { + new_ucmd!() + .arg("--help") + .succeeds() + .no_stderr() + .stdout_contains("USAGE:"); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_version() { + assert!(new_ucmd!() + .arg("--version") + .succeeds() + .no_stderr() + .stdout_str() + .starts_with("mknod")); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_fifo_default_writable() { + let ts = TestScenario::new(util_name!()); + ts.ucmd().arg("test_file").arg("p").succeeds(); + assert!(ts.fixtures.is_fifo("test_file")); + assert!(!ts.fixtures.metadata("test_file").permissions().readonly()); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_fifo_mnemonic_usage() { + let ts = TestScenario::new(util_name!()); + ts.ucmd().arg("test_file").arg("pipe").succeeds(); + assert!(ts.fixtures.is_fifo("test_file")); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_fifo_read_only() { + let ts = TestScenario::new(util_name!()); + ts.ucmd() + .arg("-m") + .arg("a=r") + .arg("test_file") + .arg("p") + .succeeds(); + assert!(ts.fixtures.is_fifo("test_file")); + assert!(ts.fixtures.metadata("test_file").permissions().readonly()); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_fifo_invalid_extra_operand() { + new_ucmd!() + .arg("test_file") + .arg("p") + .arg("1") + .arg("2") + .fails() + .stderr_contains(&"Fifos do not have major and minor device numbers"); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_character_device_requires_major_and_minor() { + new_ucmd!() + .arg("test_file") + .arg("c") + .fails() + .status_code(1) + .stderr_contains(&"Special files require major and minor device numbers."); + new_ucmd!() + .arg("test_file") + .arg("c") + .arg("1") + .fails() + .status_code(1) + .stderr_contains(&"Special files require major and minor device numbers."); + new_ucmd!() + .arg("test_file") + .arg("c") + .arg("1") + .arg("c") + .fails() + .status_code(1) + .stderr_contains(&"Invalid value for ''"); + new_ucmd!() + .arg("test_file") + .arg("c") + .arg("c") + .arg("1") + .fails() + .status_code(1) + .stderr_contains(&"Invalid value for ''"); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_invalid_arg() { + new_ucmd!() + .arg("--foo") + .fails() + .status_code(1) + .no_stdout() + .stderr_contains(&"Found argument '--foo' which wasn't expected"); +} + +#[test] +#[cfg(not(windows))] +fn test_mknod_invalid_mode() { + new_ucmd!() + .arg("--mode") + .arg("rw") + .arg("test_file") + .arg("p") + .fails() + .no_stdout() + .status_code(1) + .stderr_contains(&"invalid mode"); +} diff --git a/tests/common/util.rs b/tests/common/util.rs index 1ade70127..719849afc 100644 --- a/tests/common/util.rs +++ b/tests/common/util.rs @@ -163,7 +163,7 @@ impl CmdResult { /// asserts that the command's exit code is the same as the given one pub fn status_code(&self, code: i32) -> &CmdResult { - assert!(self.code == Some(code)); + assert_eq!(self.code, Some(code)); self } @@ -295,12 +295,22 @@ impl CmdResult { } pub fn stdout_contains>(&self, cmp: T) -> &CmdResult { - assert!(self.stdout_str().contains(cmp.as_ref())); + assert!( + self.stdout_str().contains(cmp.as_ref()), + "'{}' does not contain '{}'", + self.stdout_str(), + cmp.as_ref() + ); self } pub fn stderr_contains>(&self, cmp: T) -> &CmdResult { - assert!(self.stderr_str().contains(cmp.as_ref())); + assert!( + self.stderr_str().contains(cmp.as_ref()), + "'{}' does not contain '{}'", + self.stderr_str(), + cmp.as_ref() + ); self } From 7d2b051866f77000d253de324ad69e3fbe31ce9d Mon Sep 17 00:00:00 2001 From: Anup Mahindre Date: Thu, 6 May 2021 02:33:25 +0530 Subject: [PATCH 012/148] Implement Total size feature (#2170) * ls: Implement total size feature - Implement total size reporting that was missing - Fix minor formatting / readability nits * tests: Add tests for ls total sizes feature * ls: Fix MSRV build errors due to unsupported attributes for if blocks * ls: Add windows support for total sizes feature - Add windows support (defaults to file size as block sizes related infromation is not avialable on windows) - Renamed some functions --- Cargo.lock | 2 ++ src/uu/ls/src/ls.rs | 74 ++++++++++++++++++++++++++++------------ tests/by-util/test_ls.rs | 45 ++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 62fa80c2d..3cd0c7cda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1692,6 +1692,7 @@ dependencies = [ name = "uu_basename" version = "0.0.6" dependencies = [ + "clap", "uucore", "uucore_procs", ] @@ -2645,6 +2646,7 @@ dependencies = [ name = "uu_who" version = "0.0.6" dependencies = [ + "clap", "uucore", "uucore_procs", ] diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 0e2754f07..f24bf513e 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1179,31 +1179,32 @@ impl PathData { } fn list(locs: Vec, config: Config) -> i32 { - let number_of_locs = locs.len(); - let mut files = Vec::::new(); let mut dirs = Vec::::new(); let mut has_failed = false; let mut out = BufWriter::new(stdout()); - for loc in locs { + for loc in &locs { let p = PathBuf::from(&loc); if !p.exists() { show_error!("'{}': {}", &loc, "No such file or directory"); - // We found an error, the return code of ls should not be 0 - // And no need to continue the execution + /* + We found an error, the return code of ls should not be 0 + And no need to continue the execution + */ has_failed = true; continue; } let path_data = PathData::new(p, None, None, &config, true); - let show_dir_contents = if let Some(ft) = path_data.file_type() { - !config.directory && ft.is_dir() - } else { - has_failed = true; - false + let show_dir_contents = match path_data.file_type() { + Some(ft) => !config.directory && ft.is_dir(), + None => { + has_failed = true; + false + } }; if show_dir_contents { @@ -1217,7 +1218,7 @@ fn list(locs: Vec, config: Config) -> i32 { sort_entries(&mut dirs, &config); for dir in dirs { - if number_of_locs > 1 { + if locs.len() > 1 { let _ = writeln!(out, "\n{}:", dir.p_buf.display()); } enter_directory(&dir, &config, &mut out); @@ -1331,7 +1332,7 @@ fn display_dir_entry_size(entry: &PathData, config: &Config) -> (usize, usize) { if let Some(md) = entry.md() { ( display_symlink_count(&md).len(), - display_file_size(&md, config).len(), + display_size(md.len(), config).len(), ) } else { (0, 0) @@ -1344,14 +1345,22 @@ fn pad_left(string: String, count: usize) -> String { fn display_items(items: &[PathData], config: &Config, out: &mut BufWriter) { if config.format == Format::Long { - let (mut max_links, mut max_size) = (1, 1); + let (mut max_links, mut max_width) = (1, 1); + let mut total_size = 0; + for item in items { - let (links, size) = display_dir_entry_size(item, config); + let (links, width) = display_dir_entry_size(item, config); max_links = links.max(max_links); - max_size = size.max(max_size); + max_width = width.max(max_width); + total_size += item.md().map_or(0, |md| get_block_size(md, config)); } + + if total_size > 0 { + let _ = writeln!(out, "total {}", display_size(total_size, config)); + } + for item in items { - display_item_long(item, max_links, max_size, config, out); + display_item_long(item, max_links, max_width, config, out); } } else { let names = items.iter().filter_map(|i| display_file_name(&i, config)); @@ -1396,6 +1405,29 @@ fn display_items(items: &[PathData], config: &Config, out: &mut BufWriter u64 { + /* GNU ls will display sizes in terms of block size + md.len() will differ from this value when the file has some holes + */ + #[cfg(unix)] + { + // hard-coded for now - enabling setting this remains a TODO + let ls_block_size = 1024; + return match config.size_format { + SizeFormat::Binary => md.blocks() * 512, + SizeFormat::Decimal => md.blocks() * 512, + SizeFormat::Bytes => md.blocks() * 512 / ls_block_size, + }; + } + + #[cfg(not(unix))] + { + let _ = config; + // no way to get block size for windows, fall-back to file size + md.len() + } +} + fn display_grid( names: impl Iterator, width: u16, @@ -1471,7 +1503,7 @@ fn display_item_long( let _ = writeln!( out, " {} {} {}", - pad_left(display_file_size(&md, config), max_size), + pad_left(display_size(md.len(), config), max_size), display_date(&md, config), // unwrap is fine because it fails when metadata is not available // but we already know that it is because it's checked at the @@ -1626,13 +1658,13 @@ fn format_prefixed(prefixed: NumberPrefix) -> String { } } -fn display_file_size(metadata: &Metadata, config: &Config) -> String { +fn display_size(len: u64, config: &Config) -> String { // NOTE: The human-readable behaviour deviates from the GNU ls. // The GNU ls uses binary prefixes by default. match config.size_format { - SizeFormat::Binary => format_prefixed(NumberPrefix::binary(metadata.len() as f64)), - SizeFormat::Decimal => format_prefixed(NumberPrefix::decimal(metadata.len() as f64)), - SizeFormat::Bytes => metadata.len().to_string(), + SizeFormat::Binary => format_prefixed(NumberPrefix::binary(len as f64)), + SizeFormat::Decimal => format_prefixed(NumberPrefix::decimal(len as f64)), + SizeFormat::Bytes => len.to_string(), } } diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 110764aa5..0985ba719 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -5,6 +5,7 @@ use crate::common::util::*; extern crate regex; use self::regex::Regex; +use std::collections::HashMap; use std::path::Path; use std::thread::sleep; use std::time::Duration; @@ -308,6 +309,50 @@ fn test_ls_long() { } } +#[test] +fn test_ls_long_total_size() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + at.touch(&at.plus_as_string("test-long")); + at.append("test-long", "1"); + at.touch(&at.plus_as_string("test-long2")); + at.append("test-long2", "2"); + + let expected_prints: HashMap<_, _> = if cfg!(unix) { + [ + ("long_vanilla", "total 8"), + ("long_human_readable", "total 8.0K"), + ("long_si", "total 8.2k"), + ] + .iter() + .cloned() + .collect() + } else { + [ + ("long_vanilla", "total 2"), + ("long_human_readable", "total 2"), + ("long_si", "total 2"), + ] + .iter() + .cloned() + .collect() + }; + + for arg in &["-l", "--long", "--format=long", "--format=verbose"] { + let result = scene.ucmd().arg(arg).succeeds(); + result.stdout_contains(expected_prints["long_vanilla"]); + + for arg2 in &["-h", "--human-readable", "--si"] { + let result = scene.ucmd().arg(arg).arg(arg2).succeeds(); + result.stdout_contains(if *arg2 == "--si" { + expected_prints["long_si"] + } else { + expected_prints["long_human_readable"] + }); + } + } +} + #[test] fn test_ls_long_formats() { let scene = TestScenario::new(util_name!()); From a2658250fc12f7b1d55978afa3774dd8263955c9 Mon Sep 17 00:00:00 2001 From: jaggededgedjustice Date: Wed, 5 May 2021 22:12:17 +0100 Subject: [PATCH 013/148] Fix fmt crashing on subtracting unsigned numbers (#2178) --- src/uu/fmt/src/linebreak.rs | 2 +- tests/by-util/test_fmt.rs | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/uu/fmt/src/linebreak.rs b/src/uu/fmt/src/linebreak.rs index 50cb6f77f..fe9f8568e 100644 --- a/src/uu/fmt/src/linebreak.rs +++ b/src/uu/fmt/src/linebreak.rs @@ -296,7 +296,7 @@ fn find_kp_breakpoints<'a, T: Iterator>>( (0, 0.0) } else { compute_demerits( - (args.opts.goal - tlen) as isize, + args.opts.goal as isize - tlen as isize, stretch, w.word_nchars as isize, active.prev_rat, diff --git a/tests/by-util/test_fmt.rs b/tests/by-util/test_fmt.rs index 21a5f3396..a83fae58e 100644 --- a/tests/by-util/test_fmt.rs +++ b/tests/by-util/test_fmt.rs @@ -33,18 +33,16 @@ fn test_fmt_w_too_big() { "fmt: error: invalid width: '2501': Numerical result out of range" ); } -/* #[test] - Fails for now, see https://github.com/uutils/coreutils/issues/1501 +#[test] fn test_fmt_w() { let result = new_ucmd!() .arg("-w") .arg("10") .arg("one-word-per-line.txt") .run(); - //.stdout_is_fixture("call_graph.expected"); - assert_eq!(result.stdout_str().trim(), "this is a file with one word per line"); + //.stdout_is_fixture("call_graph.expected"); + assert_eq!( + result.stdout_str().trim(), + "this is\na file\nwith one\nword per\nline" + ); } - - -fmt is pretty broken in general, needs more works to have more tests - */ From 9f9735694db35ac47b4ad1b01b146c3d80f377d7 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 5 May 2021 22:52:07 +0200 Subject: [PATCH 014/148] refresh cargo.lock with recent updates --- Cargo.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3cd0c7cda..a0169b412 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -618,7 +618,7 @@ checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8" dependencies = [ "cfg-if 1.0.0", "libc", - "redox_syscall 0.2.7", + "redox_syscall 0.2.8", "winapi 0.3.9", ] @@ -1259,9 +1259,9 @@ checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" [[package]] name = "redox_syscall" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85dd92e586f7355c633911e11f77f3d12f04b1b1bd76a198bd34ae3af8341ef2" +checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc" dependencies = [ "bitflags", ] @@ -1272,7 +1272,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8440d8acb4fd3d277125b4bd01a6f38aee8d814b3b5fc09b3f2b825d37d3fe8f" dependencies = [ - "redox_syscall 0.2.7", + "redox_syscall 0.2.8", ] [[package]] @@ -1537,7 +1537,7 @@ checksum = "077185e2eac69c3f8379a4298e1e07cd36beb962290d4a51199acf0fdc10607e" dependencies = [ "libc", "numtoa", - "redox_syscall 0.2.7", + "redox_syscall 0.2.8", "redox_termios", ] From 928fc59845d9854fd16ae324d4ed82882bec99bd Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 6 May 2021 10:43:48 +0200 Subject: [PATCH 015/148] Ignore test_lookup until issue #2181 is fixed --- tests/by-util/test_who.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/by-util/test_who.rs b/tests/by-util/test_who.rs index a5637f23a..8aeecfb55 100644 --- a/tests/by-util/test_who.rs +++ b/tests/by-util/test_who.rs @@ -162,6 +162,7 @@ fn test_users() { #[cfg(target_os = "linux")] #[test] +#[ignore] fn test_lookup() { for opt in vec!["--lookup"] { new_ucmd!() From cdd3998a445ea31a14320d27b97f1f8d74d3af4d Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Thu, 6 May 2021 14:10:16 +0200 Subject: [PATCH 016/148] gitignore: add ds_store files --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 11f46e13e..77e8f717e 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ Cargo.lock lib*.a /docs/_build *.iml +### macOS ### +.DS_Store From b24b9d501bfc84702f635669d703a7a7aa0156fc Mon Sep 17 00:00:00 2001 From: Idan Attias Date: Thu, 6 May 2021 10:52:35 +0300 Subject: [PATCH 017/148] logname: replace getopts with clap --- Cargo.lock | 1 + src/uu/logname/Cargo.toml | 1 + src/uu/logname/src/logname.rs | 16 ++++++++-------- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a0169b412..24c008040 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2068,6 +2068,7 @@ dependencies = [ name = "uu_logname" version = "0.0.6" dependencies = [ + "clap", "libc", "uucore", "uucore_procs", diff --git a/src/uu/logname/Cargo.toml b/src/uu/logname/Cargo.toml index 416f817d7..4aa4d68f4 100644 --- a/src/uu/logname/Cargo.toml +++ b/src/uu/logname/Cargo.toml @@ -16,6 +16,7 @@ path = "src/logname.rs" [dependencies] libc = "0.2.42" +clap = "2.33" uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/logname/src/logname.rs b/src/uu/logname/src/logname.rs index 8c6a946f5..9f9319e65 100644 --- a/src/uu/logname/src/logname.rs +++ b/src/uu/logname/src/logname.rs @@ -13,7 +13,8 @@ extern crate uucore; use std::ffi::CStr; -use uucore::InvalidEncodingHandling; + +use clap::App; extern "C" { // POSIX requires using getlogin (or equivalent code) @@ -31,15 +32,14 @@ fn get_userlogin() -> Option { } } -static SYNTAX: &str = ""; static SUMMARY: &str = "Print user's login name"; -static LONG_HELP: &str = ""; +static VERSION: &str = env!("CARGO_PKG_VERSION"); -pub fn uumain(args: impl uucore::Args) -> i32 { - app!(SYNTAX, SUMMARY, LONG_HELP).parse( - args.collect_str(InvalidEncodingHandling::ConvertLossy) - .accept_any(), - ); +pub fn uumain(_: impl uucore::Args) -> i32 { + let _ = App::new(executable!()) + .version(VERSION) + .about(SUMMARY) + .get_matches(); match get_userlogin() { Some(userlogin) => println!("{}", userlogin), From 41eb930292ba994b11e03dd700a0180f008c5d9b Mon Sep 17 00:00:00 2001 From: Idan Attias Date: Thu, 6 May 2021 11:06:38 +0300 Subject: [PATCH 018/148] logname: align profile --- src/uu/logname/src/logname.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/uu/logname/src/logname.rs b/src/uu/logname/src/logname.rs index 9f9319e65..ae0f93533 100644 --- a/src/uu/logname/src/logname.rs +++ b/src/uu/logname/src/logname.rs @@ -13,6 +13,7 @@ extern crate uucore; use std::ffi::CStr; +use uucore::InvalidEncodingHandling; use clap::App; @@ -35,10 +36,21 @@ fn get_userlogin() -> Option { static SUMMARY: &str = "Print user's login name"; static VERSION: &str = env!("CARGO_PKG_VERSION"); -pub fn uumain(_: impl uucore::Args) -> i32 { +fn get_usage() -> String { + format!("{0}", executable!()) +} + +pub fn uumain(args: impl uucore::Args) -> i32 { + let _ = args + .collect_str(InvalidEncodingHandling::Ignore) + .accept_any(); + + let usage = get_usage(); + let _ = App::new(executable!()) .version(VERSION) .about(SUMMARY) + .usage(&usage[..]) .get_matches(); match get_userlogin() { From 34b9809223ac5d260fa9931da6206597bf49865a Mon Sep 17 00:00:00 2001 From: Idan Attias Date: Thu, 6 May 2021 11:59:58 +0300 Subject: [PATCH 019/148] logname: fix test & style warning --- src/uu/logname/src/logname.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/uu/logname/src/logname.rs b/src/uu/logname/src/logname.rs index ae0f93533..14bf7ef3b 100644 --- a/src/uu/logname/src/logname.rs +++ b/src/uu/logname/src/logname.rs @@ -37,21 +37,20 @@ static SUMMARY: &str = "Print user's login name"; static VERSION: &str = env!("CARGO_PKG_VERSION"); fn get_usage() -> String { - format!("{0}", executable!()) + String::from(executable!()) } pub fn uumain(args: impl uucore::Args) -> i32 { - let _ = args + let args = args .collect_str(InvalidEncodingHandling::Ignore) .accept_any(); let usage = get_usage(); - let _ = App::new(executable!()) .version(VERSION) .about(SUMMARY) .usage(&usage[..]) - .get_matches(); + .get_matches_from(args); match get_userlogin() { Some(userlogin) => println!("{}", userlogin), From 704c6865b1d3f5a3523b354b60ee30f8ee6c59f2 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 7 May 2021 09:57:31 +0200 Subject: [PATCH 020/148] refresh cargo.lock with recent updates --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 24c008040..2362342d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1277,9 +1277,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.5.3" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce5f1ceb7f74abbce32601642fcf8e8508a8a8991e0621c7d750295b9095702b" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" dependencies = [ "aho-corasick", "memchr 2.4.0", @@ -1312,9 +1312,9 @@ dependencies = [ [[package]] name = "retain_mut" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53552c6c49e1e13f1a203ef0080ab3bbef0beb570a528993e83df057a9d9bba1" +checksum = "e9c17925a9027d298a4603d286befe3f9dc0e8ed02523141914eb628798d6e5b" [[package]] name = "rust-ini" From c38373946a6a24afa02050a9dc41d88bc2afcdd7 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Fri, 7 May 2021 21:49:44 +0200 Subject: [PATCH 021/148] sort: optimize the Line struct --- Cargo.lock | 9 +-- src/uu/sort/Cargo.toml | 1 - src/uu/sort/src/sort.rs | 118 +++++++++++++++++++++++++--------------- 3 files changed, 74 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2362342d4..13441d4fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1445,12 +1445,6 @@ dependencies = [ "maybe-uninit", ] -[[package]] -name = "smallvec" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" - [[package]] name = "strsim" version = "0.8.0" @@ -1912,7 +1906,7 @@ dependencies = [ "quickcheck", "rand 0.7.3", "rand_chacha", - "smallvec 0.6.14", + "smallvec", "uucore", "uucore_procs", ] @@ -2392,7 +2386,6 @@ dependencies = [ "rand 0.7.3", "rayon", "semver", - "smallvec 1.6.1", "tempdir", "unicode-width", "uucore", diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 3784ccbb0..5221f1f4e 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -21,7 +21,6 @@ clap = "2.33" fnv = "1.0.7" itertools = "0.10.0" semver = "0.9.0" -smallvec = "1.6.1" unicode-width = "0.1.8" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index d8978cb2b..71d912f33 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -20,8 +20,8 @@ mod external_sort; mod numeric_str_cmp; use clap::{App, Arg}; -use external_sort::ext_sort; use custom_str_cmp::custom_str_cmp; +use external_sort::ext_sort; use fnv::FnvHasher; use itertools::Itertools; use numeric_str_cmp::{numeric_str_cmp, NumInfo, NumInfoParseSettings}; @@ -29,7 +29,6 @@ use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use rayon::prelude::*; use semver::Version; -use smallvec::SmallVec; use std::cmp::Ordering; use std::collections::BinaryHeap; use std::env; @@ -231,7 +230,6 @@ impl SelectionRange { enum NumCache { AsF64(GeneralF64ParseResult), WithInfo(NumInfo), - None, } impl NumCache { @@ -252,7 +250,7 @@ impl NumCache { #[derive(Clone)] struct Selection { range: SelectionRange, - num_cache: NumCache, + num_cache: Option>, } impl Selection { @@ -266,15 +264,17 @@ type Field = Range; #[derive(Clone)] pub struct Line { - line: String, + line: Box, // The common case is not to specify fields. Let's make this fast. - selections: SmallVec<[Selection; 1]>, + first_selection: Selection, + other_selections: Box<[Selection]>, } impl Line { + /// Estimate the number of bytes that this Line is occupying pub fn estimate_size(&self) -> usize { - self.line.capacity() - + self.selections.capacity() * std::mem::size_of::() + self.line.len() + + self.other_selections.len() * std::mem::size_of::() + std::mem::size_of::() } @@ -290,35 +290,22 @@ impl Line { None }; - let selections: SmallVec<[Selection; 1]> = settings - .selectors - .iter() - .map(|selector| { - let mut range = - SelectionRange::new(selector.get_selection(&line, fields.as_deref())); - let num_cache = if selector.settings.mode == SortMode::Numeric - || selector.settings.mode == SortMode::HumanNumeric - { - let (info, num_range) = NumInfo::parse( - range.get_str(&line), - NumInfoParseSettings { - accept_si_units: selector.settings.mode == SortMode::HumanNumeric, - thousands_separator: Some(THOUSANDS_SEP), - decimal_pt: Some(DECIMAL_PT), - }, - ); - range.shorten(num_range); - NumCache::WithInfo(info) - } else if selector.settings.mode == SortMode::GeneralNumeric { - let str = range.get_str(&line); - NumCache::AsF64(general_f64_parse(&str[get_leading_gen(str)])) - } else { - NumCache::None - }; - Selection { range, num_cache } - }) + let mut selectors = settings.selectors.iter(); + + let first_selection = selectors + .next() + .unwrap() + .get_selection(&line, fields.as_deref()); + + let other_selections: Vec = selectors + .map(|selector| selector.get_selection(&line, fields.as_deref())) .collect(); - Self { line, selections } + + Self { + line: line.into_boxed_str(), + first_selection, + other_selections: other_selections.into_boxed_slice(), + } } /// Writes indicators for the selections this line matched. The original line content is NOT expected @@ -337,7 +324,7 @@ impl Line { let fields = tokenize(&self.line, settings.separator); for selector in settings.selectors.iter() { - let mut selection = selector.get_selection(&self.line, Some(&fields)); + let mut selection = selector.get_range(&self.line, Some(&fields)); match selector.settings.mode { SortMode::Numeric | SortMode::HumanNumeric => { // find out which range is used for numeric comparisons @@ -594,9 +581,35 @@ impl FieldSelector { self.from.field != 1 || self.from.char == 0 || self.to.is_some() } + fn get_selection(&self, line: &str, fields: Option<&[Field]>) -> Selection { + let mut range = SelectionRange::new(self.get_range(&line, fields)); + let num_cache = if self.settings.mode == SortMode::Numeric + || self.settings.mode == SortMode::HumanNumeric + { + let (info, num_range) = NumInfo::parse( + range.get_str(&line), + NumInfoParseSettings { + accept_si_units: self.settings.mode == SortMode::HumanNumeric, + thousands_separator: Some(THOUSANDS_SEP), + decimal_pt: Some(DECIMAL_PT), + }, + ); + range.shorten(num_range); + Some(Box::new(NumCache::WithInfo(info))) + } else if self.settings.mode == SortMode::GeneralNumeric { + let str = range.get_str(&line); + Some(Box::new(NumCache::AsF64(general_f64_parse( + &str[get_leading_gen(str)], + )))) + } else { + None + }; + Selection { range, num_cache } + } + /// Look up the slice that corresponds to this selector for the given line. - /// If needs_fields returned false, fields may be None. - fn get_selection<'a>(&self, line: &'a str, tokens: Option<&[Field]>) -> Range { + /// If needs_fields returned false, tokens may be None. + fn get_range<'a>(&self, line: &'a str, tokens: Option<&[Field]>) -> Range { enum Resolution { // The start index of the resolved character, inclusive StartOfChar(usize), @@ -1237,8 +1250,11 @@ fn sort_by(unsorted: &mut Vec, settings: &GlobalSettings) { fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering { for (idx, selector) in global_settings.selectors.iter().enumerate() { - let a_selection = &a.selections[idx]; - let b_selection = &b.selections[idx]; + let (a_selection, b_selection) = if idx == 0 { + (&a.first_selection, &b.first_selection) + } else { + (&a.other_selections[idx - 1], &b.other_selections[idx - 1]) + }; let a_str = a_selection.get_str(a); let b_str = b_selection.get_str(b); let settings = &selector.settings; @@ -1248,12 +1264,12 @@ fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering } else { match settings.mode { SortMode::Numeric | SortMode::HumanNumeric => numeric_str_cmp( - (a_str, a_selection.num_cache.as_num_info()), - (b_str, b_selection.num_cache.as_num_info()), + (a_str, a_selection.num_cache.as_ref().unwrap().as_num_info()), + (b_str, b_selection.num_cache.as_ref().unwrap().as_num_info()), ), SortMode::GeneralNumeric => general_numeric_compare( - a_selection.num_cache.as_f64(), - b_selection.num_cache.as_f64(), + a_selection.num_cache.as_ref().unwrap().as_f64(), + b_selection.num_cache.as_ref().unwrap().as_f64(), ), SortMode::Month => month_compare(a_str, b_str), SortMode::Version => version_compare(a_str, b_str), @@ -1591,4 +1607,16 @@ mod tests { let line = "..a..a"; assert_eq!(tokenize(line, Some('a')), vec![0..2, 3..5]); } + + #[test] + #[cfg(target_pointer_width = "64")] + fn test_line_size() { + // We should make sure to not regress the size of the Line struct because + // it is unconditional overhead for every line we sort. + assert_eq!(std::mem::size_of::(), 56); + // These are the fields of Line: + assert_eq!(std::mem::size_of::>(), 16); + assert_eq!(std::mem::size_of::(), 24); + assert_eq!(std::mem::size_of::>(), 16); + } } From 8c9faa16b94c9ef9064b9a2e9d521046619bcc66 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Fri, 7 May 2021 21:50:33 +0200 Subject: [PATCH 022/148] sort: improve memory usage for extsort --- src/uu/sort/src/external_sort/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/src/external_sort/mod.rs b/src/uu/sort/src/external_sort/mod.rs index 725b17bbd..662250e1d 100644 --- a/src/uu/sort/src/external_sort/mod.rs +++ b/src/uu/sort/src/external_sort/mod.rs @@ -113,7 +113,7 @@ pub fn ext_sort( chunk.push(seq); - if total_read >= settings.buffer_size { + if total_read + chunk.len() * std::mem::size_of::() >= settings.buffer_size { super::sort_by(&mut chunk, &settings); write_chunk( settings, @@ -136,6 +136,9 @@ pub fn ext_sort( iter.chunks += 1; } + // We manually drop here to not go over our memory limit when we allocate below. + drop(chunk); + // initialize buffers for each chunk // // Having a right sized buffer for each chunk for smallish values seems silly to me? From 3b6c7bc9e97c9d01d31cef2e8fd3641743985bc6 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sat, 8 May 2021 00:50:36 +0200 Subject: [PATCH 023/148] Fix mistakes with merging --- src/uu/ls/src/ls.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 06bbeddea..bacd4176a 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1332,7 +1332,7 @@ fn display_dir_entry_size(entry: &PathData, config: &Config) -> (usize, usize) { if let Some(md) = entry.md() { ( display_symlink_count(&md).len(), - display_size(md.len(), config).len(), + display_size_or_rdev(&md, config).len(), ) } else { (0, 0) @@ -1503,7 +1503,7 @@ fn display_item_long( let _ = writeln!( out, " {} {} {}", - pad_left(display_size(md.len(), config), max_size), + pad_left(display_size_or_rdev(md, config), max_size), display_date(&md, config), // unwrap is fine because it fails when metadata is not available // but we already know that it is because it's checked at the @@ -1658,7 +1658,7 @@ fn format_prefixed(prefixed: NumberPrefix) -> String { } } -fn display_size(metadata: &Metadata, config: &Config) -> String { +fn display_size_or_rdev(metadata: &Metadata, config: &Config) -> String { #[cfg(unix)] { let ft = metadata.file_type(); @@ -1670,12 +1670,16 @@ fn display_size(metadata: &Metadata, config: &Config) -> String { } } + display_size(metadata.len(), config) +} + +fn display_size(size: u64, config: &Config) -> String { // NOTE: The human-readable behaviour deviates from the GNU ls. // The GNU ls uses binary prefixes by default. match config.size_format { - SizeFormat::Binary => format_prefixed(NumberPrefix::binary(len as f64)), - SizeFormat::Decimal => format_prefixed(NumberPrefix::decimal(len as f64)), - SizeFormat::Bytes => len.to_string(), + SizeFormat::Binary => format_prefixed(NumberPrefix::binary(size as f64)), + SizeFormat::Decimal => format_prefixed(NumberPrefix::decimal(size as f64)), + SizeFormat::Bytes => size.to_string(), } } From c0c240f194da65e1cc53d7d878cd89fb00d346bb Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 5 May 2021 19:05:03 +0100 Subject: [PATCH 024/148] du: fix couple of du unit tests for FreeBSD. --- tests/by-util/test_du.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 111f2dc90..c1b7fcb7b 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -53,7 +53,11 @@ fn _du_basics_subdir(s: &str) { fn _du_basics_subdir(s: &str) { assert_eq!(s, "0\tsubdir/deeper\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_basics_subdir(s: &str) { + assert_eq!(s, "8\tsubdir/deeper\n"); +} +#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows"), not(target_os = "freebsd")))] fn _du_basics_subdir(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -100,7 +104,11 @@ fn _du_soft_link(s: &str) { fn _du_soft_link(s: &str) { assert_eq!(s, "8\tsubdir/links\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_soft_link(s: &str) { + assert_eq!(s, "16\tsubdir/links\n"); +} +#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows"), not(target_os = "freebsd")))] fn _du_soft_link(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -141,7 +149,11 @@ fn _du_hard_link(s: &str) { fn _du_hard_link(s: &str) { assert_eq!(s, "8\tsubdir/links\n") } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_hard_link(s: &str) { + assert_eq!(s, "16\tsubdir/links\n") +} +#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows"), not(target_os = "freebsd")))] fn _du_hard_link(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -181,7 +193,11 @@ fn _du_d_flag(s: &str) { fn _du_d_flag(s: &str) { assert_eq!(s, "8\t./subdir\n8\t./\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_d_flag(s: &str) { + assert_eq!(s, "28\t./subdir\n36\t./\n"); +} +#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows"), not(target_os = "freebsd")))] fn _du_d_flag(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { From 64c1f164211d6f7bb147cf2be9c14e963aad2cf2 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Fri, 7 May 2021 23:40:07 +0200 Subject: [PATCH 025/148] sort: allow some functions to be called with OsStr --- src/uu/sort/src/sort.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index d8978cb2b..730be0039 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -20,8 +20,8 @@ mod external_sort; mod numeric_str_cmp; use clap::{App, Arg}; -use external_sort::ext_sort; use custom_str_cmp::custom_str_cmp; +use external_sort::ext_sort; use fnv::FnvHasher; use itertools::Itertools; use numeric_str_cmp::{numeric_str_cmp, NumInfo, NumInfoParseSettings}; @@ -33,6 +33,7 @@ use smallvec::SmallVec; use std::cmp::Ordering; use std::collections::BinaryHeap; use std::env; +use std::ffi::OsStr; use std::fs::File; use std::hash::{Hash, Hasher}; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; @@ -1109,10 +1110,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 { exec(files, settings) } -fn file_to_lines_iter<'a>( - file: &str, - settings: &'a GlobalSettings, -) -> Option + 'a> { +fn file_to_lines_iter( + file: impl AsRef, + settings: &'_ GlobalSettings, +) -> Option + '_> { let (reader, _) = match open(file) { Some(x) => x, None => return None, @@ -1177,7 +1178,7 @@ fn exec(files: Vec, settings: GlobalSettings) -> i32 { let mut lines = vec![]; // This is duplicated from fn file_to_lines_iter, but using that function directly results in a performance regression. - for (file, _) in files.iter().map(|file| open(file)).flatten() { + for (file, _) in files.iter().map(open).flatten() { let buf_reader = BufReader::new(file); for line in buf_reader.split(if settings.zero_terminated { b'\0' @@ -1501,7 +1502,8 @@ fn print_sorted>(iter: T, settings: &GlobalSettings) { } // from cat.rs -fn open(path: &str) -> Option<(Box, bool)> { +fn open(path: impl AsRef) -> Option<(Box, bool)> { + let path = path.as_ref(); if path == "-" { let stdin = stdin(); return Some((Box::new(stdin) as Box, is_stdin_interactive())); @@ -1510,7 +1512,7 @@ fn open(path: &str) -> Option<(Box, bool)> { match File::open(Path::new(path)) { Ok(f) => Some((Box::new(f) as Box, false)), Err(e) => { - show_error!("{0}: {1}", path, e.to_string()); + show_error!("{0:?}: {1}", path, e.to_string()); None } } From 38effc93b3d8a34a1136a9911eb9b1e0da7359c7 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Fri, 7 May 2021 23:39:00 +0200 Subject: [PATCH 026/148] sort: use FileMerger for extsort merge step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FileMerger is much more efficient than the previous algorithm, which looped over all elements every time to determine the next element. FileMerger uses a BinaryHeap, which should bring the complexity for the merge step down from O(n²) to O(n log n). --- src/uu/sort/src/external_sort/mod.rs | 178 +++++---------------------- 1 file changed, 30 insertions(+), 148 deletions(-) diff --git a/src/uu/sort/src/external_sort/mod.rs b/src/uu/sort/src/external_sort/mod.rs index 725b17bbd..af6902367 100644 --- a/src/uu/sort/src/external_sort/mod.rs +++ b/src/uu/sort/src/external_sort/mod.rs @@ -1,91 +1,33 @@ -use std::cmp::Ordering; -use std::collections::VecDeque; -use std::fs::{File, OpenOptions}; -use std::io::SeekFrom; -use std::io::{BufRead, BufReader, BufWriter, Seek, Write}; +use std::fs::OpenOptions; +use std::io::{BufWriter, Write}; use std::path::Path; use tempdir::TempDir; +use crate::{file_to_lines_iter, FileMerger}; + use super::{GlobalSettings, Line}; /// Iterator that provides sorted `T`s -pub struct ExtSortedIterator { - buffers: Vec>, - chunk_offsets: Vec, - max_per_chunk: usize, - chunks: usize, - tmp_dir: TempDir, - settings: GlobalSettings, - failed: bool, +pub struct ExtSortedIterator<'a> { + file_merger: FileMerger<'a>, + // Keep tmp_dir around, it is deleted when dropped. + _tmp_dir: TempDir, } -impl Iterator for ExtSortedIterator { +impl<'a> Iterator for ExtSortedIterator<'a> { type Item = Line; - - /// # Errors - /// - /// This method can fail due to issues reading intermediate sorted chunks - /// from disk fn next(&mut self) -> Option { - if self.failed { - return None; - } - // fill up any empty buffers - let mut empty = true; - for chunk_num in 0..self.chunks { - if self.buffers[chunk_num as usize].is_empty() { - let mut f = crash_if_err!( - 1, - File::open(self.tmp_dir.path().join(chunk_num.to_string())) - ); - crash_if_err!(1, f.seek(SeekFrom::Start(self.chunk_offsets[chunk_num]))); - let bytes_read = fill_buff( - &mut self.buffers[chunk_num as usize], - f, - self.max_per_chunk, - &self.settings, - ); - self.chunk_offsets[chunk_num as usize] += bytes_read as u64; - if !self.buffers[chunk_num as usize].is_empty() { - empty = false; - } - } else { - empty = false; - } - } - if empty { - return None; - } - - // find the next record to write - // check is_empty() before unwrap()ing - let mut idx = 0; - for chunk_num in 0..self.chunks as usize { - if !self.buffers[chunk_num].is_empty() - && (self.buffers[idx].is_empty() - || super::compare_by( - self.buffers[chunk_num].front().unwrap(), - self.buffers[idx].front().unwrap(), - &self.settings, - ) == Ordering::Less) - { - idx = chunk_num; - } - } - - // unwrap due to checks above - let r = self.buffers[idx].pop_front().unwrap(); - Some(r) + self.file_merger.next() } } /// Sort (based on `compare`) the `T`s provided by `unsorted` and return an /// iterator /// -/// # Errors +/// # Panics /// -/// This method can fail due to issues writing intermediate sorted chunks +/// This method can panic due to issues writing intermediate sorted chunks /// to disk. pub fn ext_sort( unsorted: impl Iterator, @@ -93,19 +35,12 @@ pub fn ext_sort( ) -> ExtSortedIterator { let tmp_dir = crash_if_err!(1, TempDir::new_in(&settings.tmp_dir, "uutils_sort")); - let mut iter = ExtSortedIterator { - buffers: Vec::new(), - chunk_offsets: Vec::new(), - max_per_chunk: 0, - chunks: 0, - tmp_dir, - settings: settings.clone(), - failed: false, - }; - let mut total_read = 0; let mut chunk = Vec::new(); + let mut chunks_read = 0; + let mut file_merger = FileMerger::new(settings); + // make the initial chunks on disk for seq in unsorted { let seq_size = seq.estimate_size(); @@ -113,62 +48,35 @@ pub fn ext_sort( chunk.push(seq); - if total_read >= settings.buffer_size { + if total_read >= settings.buffer_size && chunk.len() >= 2 { super::sort_by(&mut chunk, &settings); - write_chunk( - settings, - &iter.tmp_dir.path().join(iter.chunks.to_string()), - &mut chunk, - ); + + let file_path = tmp_dir.path().join(chunks_read.to_string()); + write_chunk(settings, &file_path, &mut chunk); chunk.clear(); total_read = 0; - iter.chunks += 1; + chunks_read += 1; + + file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap())) } } // write the last chunk if !chunk.is_empty() { super::sort_by(&mut chunk, &settings); + + let file_path = tmp_dir.path().join(chunks_read.to_string()); write_chunk( settings, - &iter.tmp_dir.path().join(iter.chunks.to_string()), + &tmp_dir.path().join(chunks_read.to_string()), &mut chunk, ); - iter.chunks += 1; + + file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap())); } - - // initialize buffers for each chunk - // - // Having a right sized buffer for each chunk for smallish values seems silly to me? - // - // We will have to have the entire iter in memory sometime right? - // Set minimum to the size of the writer buffer, ~8K - - const MINIMUM_READBACK_BUFFER: usize = 8200; - let right_sized_buffer = settings - .buffer_size - .checked_div(iter.chunks) - .unwrap_or(settings.buffer_size); - iter.max_per_chunk = if right_sized_buffer > MINIMUM_READBACK_BUFFER { - right_sized_buffer - } else { - MINIMUM_READBACK_BUFFER - }; - iter.buffers = vec![VecDeque::new(); iter.chunks]; - iter.chunk_offsets = vec![0; iter.chunks]; - for chunk_num in 0..iter.chunks { - let offset = fill_buff( - &mut iter.buffers[chunk_num], - crash_if_err!( - 1, - File::open(iter.tmp_dir.path().join(chunk_num.to_string())) - ), - iter.max_per_chunk, - &settings, - ); - iter.chunk_offsets[chunk_num] = offset as u64; + ExtSortedIterator { + file_merger, + _tmp_dir: tmp_dir, } - - iter } fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec) { @@ -183,29 +91,3 @@ fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec) { } crash_if_err!(1, buf_write.flush()); } - -fn fill_buff( - vec: &mut VecDeque, - file: File, - max_bytes: usize, - settings: &GlobalSettings, -) -> usize { - let mut total_read = 0; - let mut bytes_read = 0; - for line in BufReader::new(file).split(if settings.zero_terminated { - b'\0' - } else { - b'\n' - }) { - let line_s = String::from_utf8(crash_if_err!(1, line)).unwrap(); - bytes_read += line_s.len() + 1; - let deserialized = Line::new(line_s, settings); - total_read += deserialized.estimate_size(); - vec.push_back(deserialized); - if total_read > max_bytes { - break; - } - } - - bytes_read -} From a8853765831ae747affb6af22491ae428a1b6d0e Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Fri, 7 May 2021 23:36:36 +0200 Subject: [PATCH 027/148] uucore: refactor - reduce duplicate code related to `fs::display_permissions` This is a refactor to reduce duplicate code, it affects chmod/ls/stat. * merge `stat/src/fsext::pretty_access` into `uucore/src/lib/feature/fs::display_permissions_unix` * move tests for `fs::display_permissions` from `test_stat::test_access` to `uucore/src/lib/features/fs::test_display_permissions` * adjust `uu_chmod`, `uu_ls` and `uu_stat` to use `uucore::fs::display_permissions` --- src/uu/chmod/src/chmod.rs | 11 ++-- src/uu/ls/src/ls.rs | 15 +---- src/uu/stat/Cargo.toml | 2 +- src/uu/stat/src/fsext.rs | 66 --------------------- src/uu/stat/src/stat.rs | 4 +- src/uucore/src/lib/features/fs.rs | 97 +++++++++++++++++++++++++++---- tests/by-util/test_stat.rs | 36 ------------ 7 files changed, 97 insertions(+), 134 deletions(-) diff --git a/src/uu/chmod/src/chmod.rs b/src/uu/chmod/src/chmod.rs index d01f0316e..88e3403fe 100644 --- a/src/uu/chmod/src/chmod.rs +++ b/src/uu/chmod/src/chmod.rs @@ -15,6 +15,7 @@ use std::fs; use std::os::unix::fs::{MetadataExt, PermissionsExt}; use std::path::Path; use uucore::fs::display_permissions_unix; +use uucore::libc::mode_t; #[cfg(not(windows))] use uucore::mode; use uucore::InvalidEncodingHandling; @@ -306,7 +307,7 @@ impl Chmoder { "mode of '{}' retained as {:04o} ({})", file.display(), fperm, - display_permissions_unix(fperm), + display_permissions_unix(fperm as mode_t, false), ); } Ok(()) @@ -319,9 +320,9 @@ impl Chmoder { "failed to change mode of file '{}' from {:o} ({}) to {:o} ({})", file.display(), fperm, - display_permissions_unix(fperm), + display_permissions_unix(fperm as mode_t, false), mode, - display_permissions_unix(mode) + display_permissions_unix(mode as mode_t, false) ); } Err(1) @@ -331,9 +332,9 @@ impl Chmoder { "mode of '{}' changed from {:o} ({}) to {:o} ({})", file.display(), fperm, - display_permissions_unix(fperm), + display_permissions_unix(fperm as mode_t, false), mode, - display_permissions_unix(mode) + display_permissions_unix(mode as mode_t, false) ); } Ok(()) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index f24bf513e..36f0ad758 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1480,9 +1480,8 @@ fn display_item_long( let _ = write!( out, - "{}{} {}", - display_file_type(md.file_type()), - display_permissions(&md), + "{} {}", + display_permissions(&md, true), pad_left(display_symlink_count(&md), max_links), ); @@ -1668,16 +1667,6 @@ fn display_size(len: u64, config: &Config) -> String { } } -fn display_file_type(file_type: FileType) -> char { - if file_type.is_dir() { - 'd' - } else if file_type.is_symlink() { - 'l' - } else { - '-' - } -} - #[cfg(unix)] fn file_is_executable(md: &Metadata) -> bool { // Mode always returns u32, but the flags might not be, based on the platform diff --git a/src/uu/stat/Cargo.toml b/src/uu/stat/Cargo.toml index 96bf63ffe..c325c20db 100644 --- a/src/uu/stat/Cargo.toml +++ b/src/uu/stat/Cargo.toml @@ -17,7 +17,7 @@ path = "src/stat.rs" [dependencies] clap = "2.33" time = "0.1.40" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc"] } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc", "fs"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } [[bin]] diff --git a/src/uu/stat/src/fsext.rs b/src/uu/stat/src/fsext.rs index d90099892..53280790e 100644 --- a/src/uu/stat/src/fsext.rs +++ b/src/uu/stat/src/fsext.rs @@ -41,13 +41,6 @@ impl BirthTime for Metadata { } } -#[macro_export] -macro_rules! has { - ($mode:expr, $perm:expr) => { - $mode & $perm != 0 - }; -} - pub fn pretty_time(sec: i64, nsec: i64) -> String { // sec == seconds since UNIX_EPOCH // nsec == nanoseconds since (UNIX_EPOCH + sec) @@ -81,65 +74,6 @@ pub fn pretty_filetype<'a>(mode: mode_t, size: u64) -> &'a str { } } -pub fn pretty_access(mode: mode_t) -> String { - let mut result = String::with_capacity(10); - result.push(match mode & S_IFMT { - S_IFDIR => 'd', - S_IFCHR => 'c', - S_IFBLK => 'b', - S_IFREG => '-', - S_IFIFO => 'p', - S_IFLNK => 'l', - S_IFSOCK => 's', - // TODO: Other file types - _ => '?', - }); - - result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' }); - result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISUID as mode_t) { - if has!(mode, S_IXUSR) { - 's' - } else { - 'S' - } - } else if has!(mode, S_IXUSR) { - 'x' - } else { - '-' - }); - - result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' }); - result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISGID as mode_t) { - if has!(mode, S_IXGRP) { - 's' - } else { - 'S' - } - } else if has!(mode, S_IXGRP) { - 'x' - } else { - '-' - }); - - result.push(if has!(mode, S_IROTH) { 'r' } else { '-' }); - result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISVTX as mode_t) { - if has!(mode, S_IXOTH) { - 't' - } else { - 'T' - } - } else if has!(mode, S_IXOTH) { - 'x' - } else { - '-' - }); - - result -} - use std::borrow::Cow; use std::convert::{AsRef, From}; use std::ffi::CString; diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index 5216fb293..d46c54910 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -7,13 +7,13 @@ // spell-checker:ignore (ToDO) mtab fsext showfs otype fmtstr prec ftype blocksize nlink rdev fnodes fsid namelen blksize inodes fstype iosize statfs gnulib NBLOCKSIZE -#[macro_use] mod fsext; pub use crate::fsext::*; #[macro_use] extern crate uucore; use uucore::entries; +use uucore::fs::display_permissions; use clap::{App, Arg, ArgMatches}; use std::borrow::Cow; @@ -575,7 +575,7 @@ impl Stater { } // access rights in human readable form 'A' => { - arg = pretty_access(meta.mode() as mode_t); + arg = display_permissions(&meta, true); otype = OutputType::Str; } // number of blocks allocated (see %B) diff --git a/src/uucore/src/lib/features/fs.rs b/src/uucore/src/lib/features/fs.rs index a72d6ea82..040c36e95 100644 --- a/src/uucore/src/lib/features/fs.rs +++ b/src/uucore/src/lib/features/fs.rs @@ -8,8 +8,9 @@ #[cfg(unix)] use libc::{ - mode_t, S_IRGRP, S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, - S_IXGRP, S_IXOTH, S_IXUSR, + mode_t, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, S_IRGRP, + S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, S_IXGRP, S_IXOTH, + S_IXUSR, }; use std::borrow::Cow; use std::env; @@ -23,9 +24,10 @@ use std::os::unix::fs::MetadataExt; use std::path::{Component, Path, PathBuf}; #[cfg(unix)] +#[macro_export] macro_rules! has { ($mode:expr, $perm:expr) => { - $mode & ($perm as u32) != 0 + $mode & $perm != 0 }; } @@ -240,22 +242,42 @@ pub fn is_stderr_interactive() -> bool { #[cfg(not(unix))] #[allow(unused_variables)] -pub fn display_permissions(metadata: &fs::Metadata) -> String { +pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String { + if display_file_type { + return String::from("----------"); + } String::from("---------") } #[cfg(unix)] -pub fn display_permissions(metadata: &fs::Metadata) -> String { +pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String { let mode: mode_t = metadata.mode() as mode_t; - display_permissions_unix(mode as u32) + display_permissions_unix(mode, display_file_type) } #[cfg(unix)] -pub fn display_permissions_unix(mode: u32) -> String { - let mut result = String::with_capacity(9); +pub fn display_permissions_unix(mode: mode_t, display_file_type: bool) -> String { + let mut result; + if display_file_type { + result = String::with_capacity(10); + result.push(match mode & S_IFMT { + S_IFDIR => 'd', + S_IFCHR => 'c', + S_IFBLK => 'b', + S_IFREG => '-', + S_IFIFO => 'p', + S_IFLNK => 'l', + S_IFSOCK => 's', + // TODO: Other file types + _ => '?', + }); + } else { + result = String::with_capacity(9); + } + result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' }); result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISUID) { + result.push(if has!(mode, S_ISUID as mode_t) { if has!(mode, S_IXUSR) { 's' } else { @@ -269,7 +291,7 @@ pub fn display_permissions_unix(mode: u32) -> String { result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' }); result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISGID) { + result.push(if has!(mode, S_ISGID as mode_t) { if has!(mode, S_IXGRP) { 's' } else { @@ -283,7 +305,7 @@ pub fn display_permissions_unix(mode: u32) -> String { result.push(if has!(mode, S_IROTH) { 'r' } else { '-' }); result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISVTX) { + result.push(if has!(mode, S_ISVTX as mode_t) { if has!(mode, S_IXOTH) { 't' } else { @@ -355,4 +377,57 @@ mod tests { ); } } + + #[cfg(unix)] + #[test] + fn test_display_permissions() { + assert_eq!( + "drwxr-xr-x", + display_permissions_unix(S_IFDIR | 0o755, true) + ); + assert_eq!( + "rwxr-xr-x", + display_permissions_unix(S_IFDIR | 0o755, false) + ); + assert_eq!( + "-rw-r--r--", + display_permissions_unix(S_IFREG | 0o644, true) + ); + assert_eq!( + "srw-r-----", + display_permissions_unix(S_IFSOCK | 0o640, true) + ); + assert_eq!( + "lrw-r-xr-x", + display_permissions_unix(S_IFLNK | 0o655, true) + ); + assert_eq!("?rw-r-xr-x", display_permissions_unix(0o655, true)); + + assert_eq!( + "brwSr-xr-x", + display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o655, true) + ); + assert_eq!( + "brwsr-xr-x", + display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o755, true) + ); + + assert_eq!( + "prw---sr--", + display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o614, true) + ); + assert_eq!( + "prw---Sr--", + display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o604, true) + ); + + assert_eq!( + "c---r-xr-t", + display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o055, true) + ); + assert_eq!( + "c---r-xr-T", + display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o054, true) + ); + } } diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 7b7e990f4..5c4e62610 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -9,42 +9,6 @@ pub use self::stat::*; mod test_fsext { use super::*; - #[test] - fn test_access() { - assert_eq!("drwxr-xr-x", pretty_access(S_IFDIR | 0o755)); - assert_eq!("-rw-r--r--", pretty_access(S_IFREG | 0o644)); - assert_eq!("srw-r-----", pretty_access(S_IFSOCK | 0o640)); - assert_eq!("lrw-r-xr-x", pretty_access(S_IFLNK | 0o655)); - assert_eq!("?rw-r-xr-x", pretty_access(0o655)); - - assert_eq!( - "brwSr-xr-x", - pretty_access(S_IFBLK | S_ISUID as mode_t | 0o655) - ); - assert_eq!( - "brwsr-xr-x", - pretty_access(S_IFBLK | S_ISUID as mode_t | 0o755) - ); - - assert_eq!( - "prw---sr--", - pretty_access(S_IFIFO | S_ISGID as mode_t | 0o614) - ); - assert_eq!( - "prw---Sr--", - pretty_access(S_IFIFO | S_ISGID as mode_t | 0o604) - ); - - assert_eq!( - "c---r-xr-t", - pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o055) - ); - assert_eq!( - "c---r-xr-T", - pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o054) - ); - } - #[test] fn test_file_type() { assert_eq!("block special file", pretty_filetype(S_IFBLK, 0)); From 525f71badafd5191f202ce7fdf95d4b9ceb2a208 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Wed, 5 May 2021 20:59:37 -0400 Subject: [PATCH 028/148] wc: rm leading space when printing multiple counts Remove the leading space from the output of `wc` when printing two or more types of counts. Fixes #2173. --- src/uu/wc/src/wc.rs | 55 ++++++++++++++++++++++++++++++++-------- tests/by-util/test_wc.rs | 34 +++++++++---------------- 2 files changed, 57 insertions(+), 32 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 3b70856fa..43ce11aa8 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -323,7 +323,12 @@ fn wc(files: Vec, settings: &Settings) -> Result<(), u32> { error_count += 1; WordCount::default() }); - max_width = max(max_width, word_count.bytes.to_string().len() + 1); + // Compute the number of digits needed to display the number + // of bytes in the file. Even if the settings indicate that we + // won't *display* the number of bytes, we still use the + // number of digits in the byte count as the width when + // formatting each count as a string for output. + max_width = max(max_width, word_count.bytes.to_string().len()); total_word_count += word_count; results.push(word_count.with_title(path)); } @@ -364,24 +369,54 @@ fn print_stats( min_width = 0; } + let mut is_first: bool = true; + if settings.show_lines { - write!(stdout_lock, "{:1$}", result.count.lines, min_width)?; + if is_first { + write!(stdout_lock, "{:1$}", result.count.lines, min_width)?; + } else { + write!(stdout_lock, " {:1$}", result.count.lines, min_width)?; + } + is_first = false; } if settings.show_words { - write!(stdout_lock, "{:1$}", result.count.words, min_width)?; + if is_first { + write!(stdout_lock, "{:1$}", result.count.words, min_width)?; + } else { + write!(stdout_lock, " {:1$}", result.count.words, min_width)?; + } + is_first = false; } if settings.show_bytes { - write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?; + if is_first { + write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?; + } else { + write!(stdout_lock, " {:1$}", result.count.bytes, min_width)?; + } + is_first = false; } if settings.show_chars { - write!(stdout_lock, "{:1$}", result.count.chars, min_width)?; + if is_first { + write!(stdout_lock, "{:1$}", result.count.chars, min_width)?; + } else { + write!(stdout_lock, " {:1$}", result.count.chars, min_width)?; + } + is_first = false; } if settings.show_max_line_length { - write!( - stdout_lock, - "{:1$}", - result.count.max_line_length, min_width - )?; + if is_first { + write!( + stdout_lock, + "{:1$}", + result.count.max_line_length, min_width + )?; + } else { + write!( + stdout_lock, + " {:1$}", + result.count.max_line_length, min_width + )?; + } } if result.title == "-" { diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index a16f1854e..87a86fca4 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -33,7 +33,7 @@ fn test_stdin_default() { new_ucmd!() .pipe_in_fixture("lorem_ipsum.txt") .run() - .stdout_is(" 13 109 772\n"); + .stdout_is(" 13 109 772\n"); } #[test] @@ -42,7 +42,7 @@ fn test_utf8() { .args(&["-lwmcL"]) .pipe_in_fixture("UTF_8_test.txt") .run() - .stdout_is(" 300 4969 22781 22213 79\n"); + .stdout_is(" 300 4969 22781 22213 79\n"); // GNU returns " 300 2086 22219 22781 79" // TODO: we should fix that to match GNU's behavior } @@ -71,7 +71,7 @@ fn test_stdin_all_counts() { .args(&["-c", "-m", "-l", "-L", "-w"]) .pipe_in_fixture("alice_in_wonderland.txt") .run() - .stdout_is(" 5 57 302 302 66\n"); + .stdout_is(" 5 57 302 302 66\n"); } #[test] @@ -79,7 +79,7 @@ fn test_single_default() { new_ucmd!() .arg("moby_dick.txt") .run() - .stdout_is(" 18 204 1115 moby_dick.txt\n"); + .stdout_is(" 18 204 1115 moby_dick.txt\n"); } #[test] @@ -95,7 +95,7 @@ fn test_single_all_counts() { new_ucmd!() .args(&["-c", "-l", "-L", "-m", "-w", "alice_in_wonderland.txt"]) .run() - .stdout_is(" 5 57 302 302 66 alice_in_wonderland.txt\n"); + .stdout_is(" 5 57 302 302 66 alice_in_wonderland.txt\n"); } #[test] @@ -108,64 +108,54 @@ fn test_multiple_default() { ]) .run() .stdout_is( - " 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \ - alice_in_wonderland.txt\n 36 370 2189 total\n", + " 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \ + alice_in_wonderland.txt\n 36 370 2189 total\n", ); } /// Test for an empty file. #[test] fn test_file_empty() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "emptyfile.txt"]) .run() - .stdout_is(" 0 0 0 0 0 emptyfile.txt\n"); + .stdout_is("0 0 0 0 0 emptyfile.txt\n"); } /// Test for an file containing a single non-whitespace character /// *without* a trailing newline. #[test] fn test_file_single_line_no_trailing_newline() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "notrailingnewline.txt"]) .run() - .stdout_is(" 1 1 2 2 1 notrailingnewline.txt\n"); + .stdout_is("1 1 2 2 1 notrailingnewline.txt\n"); } /// Test for a file that has 100 empty lines (that is, the contents of /// the file are the newline character repeated one hundred times). #[test] fn test_file_many_empty_lines() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "manyemptylines.txt"]) .run() - .stdout_is(" 100 0 100 100 0 manyemptylines.txt\n"); + .stdout_is("100 0 100 100 0 manyemptylines.txt\n"); } /// Test for a file that has one long line comprising only spaces. #[test] fn test_file_one_long_line_only_spaces() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "onelongemptyline.txt"]) .run() - .stdout_is(" 1 0 10001 10001 10000 onelongemptyline.txt\n"); + .stdout_is(" 1 0 10001 10001 10000 onelongemptyline.txt\n"); } /// Test for a file that has one long line comprising a single "word". #[test] fn test_file_one_long_word() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "onelongword.txt"]) .run() - .stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n"); + .stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n"); } From ee43655bdbb836ee6e5d461b75b9dd0a12c65ce5 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Fri, 7 May 2021 13:59:31 -0400 Subject: [PATCH 029/148] fixup! wc: rm leading space when printing multiple counts --- src/uu/wc/src/wc.rs | 47 ++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 43ce11aa8..b5f2a273b 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -372,51 +372,42 @@ fn print_stats( let mut is_first: bool = true; if settings.show_lines { - if is_first { - write!(stdout_lock, "{:1$}", result.count.lines, min_width)?; - } else { - write!(stdout_lock, " {:1$}", result.count.lines, min_width)?; + if !is_first { + write!(stdout_lock, " ")?; } + write!(stdout_lock, "{:1$}", result.count.lines, min_width)?; is_first = false; } if settings.show_words { - if is_first { - write!(stdout_lock, "{:1$}", result.count.words, min_width)?; - } else { - write!(stdout_lock, " {:1$}", result.count.words, min_width)?; + if !is_first { + write!(stdout_lock, " ")?; } + write!(stdout_lock, "{:1$}", result.count.words, min_width)?; is_first = false; } if settings.show_bytes { - if is_first { - write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?; - } else { - write!(stdout_lock, " {:1$}", result.count.bytes, min_width)?; + if !is_first { + write!(stdout_lock, " ")?; } + write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?; is_first = false; } if settings.show_chars { - if is_first { - write!(stdout_lock, "{:1$}", result.count.chars, min_width)?; - } else { - write!(stdout_lock, " {:1$}", result.count.chars, min_width)?; + if !is_first { + write!(stdout_lock, " ")?; } + write!(stdout_lock, "{:1$}", result.count.chars, min_width)?; is_first = false; } if settings.show_max_line_length { - if is_first { - write!( - stdout_lock, - "{:1$}", - result.count.max_line_length, min_width - )?; - } else { - write!( - stdout_lock, - " {:1$}", - result.count.max_line_length, min_width - )?; + if !is_first { + write!(stdout_lock, " ")?; } + write!( + stdout_lock, + "{:1$}", + result.count.max_line_length, min_width + )?; } if result.title == "-" { From a74a729aa8c6f7b0b38b907a1e227b9d5a61bf4d Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 8 May 2021 13:13:52 +0200 Subject: [PATCH 030/148] rustfmt the recent change --- tests/by-util/test_df.rs | 2 +- tests/by-util/test_du.rs | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tests/by-util/test_df.rs b/tests/by-util/test_df.rs index e3b7141d1..ac3776b96 100644 --- a/tests/by-util/test_df.rs +++ b/tests/by-util/test_df.rs @@ -27,7 +27,7 @@ fn test_df_output() { stdout_only("Filesystem Size Used Available Capacity Use% Mounted on \n"); } else { new_ucmd!().arg("-H").arg("-total").succeeds().stdout_only( - "Filesystem Size Used Available Use% Mounted on \n" + "Filesystem Size Used Available Use% Mounted on \n", ); } } diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index c1b7fcb7b..c72bd02a6 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -57,7 +57,11 @@ fn _du_basics_subdir(s: &str) { fn _du_basics_subdir(s: &str) { assert_eq!(s, "8\tsubdir/deeper\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows"), not(target_os = "freebsd")))] +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_basics_subdir(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -108,7 +112,11 @@ fn _du_soft_link(s: &str) { fn _du_soft_link(s: &str) { assert_eq!(s, "16\tsubdir/links\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows"), not(target_os = "freebsd")))] +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_soft_link(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -153,7 +161,11 @@ fn _du_hard_link(s: &str) { fn _du_hard_link(s: &str) { assert_eq!(s, "16\tsubdir/links\n") } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows"), not(target_os = "freebsd")))] +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_hard_link(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -197,7 +209,11 @@ fn _du_d_flag(s: &str) { fn _du_d_flag(s: &str) { assert_eq!(s, "28\t./subdir\n36\t./\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows"), not(target_os = "freebsd")))] +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_d_flag(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { From 50f4941d4903d13c018a7aeb35eac4f4ebfa103f Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Tue, 4 May 2021 19:04:23 -0400 Subject: [PATCH 031/148] wc: refactor WordCount into its own module Move the `WordCount` struct and its implementations into the `wordcount.rs`. --- src/uu/wc/src/wc.rs | 48 ++------------------------------------ src/uu/wc/src/wordcount.rs | 47 +++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 46 deletions(-) create mode 100644 src/uu/wc/src/wordcount.rs diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index b5f2a273b..8e973ccbd 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -12,8 +12,10 @@ extern crate uucore; mod count_bytes; mod countable; +mod wordcount; use count_bytes::count_bytes_fast; use countable::WordCountable; +use wordcount::{TitledWordCount, WordCount}; use clap::{App, Arg, ArgMatches}; use thiserror::Error; @@ -21,7 +23,6 @@ use thiserror::Error; use std::cmp::max; use std::fs::File; use std::io::{self, Write}; -use std::ops::{Add, AddAssign}; use std::path::Path; use std::str::from_utf8; @@ -82,51 +83,6 @@ impl Settings { } } -#[derive(Debug, Default, Copy, Clone)] -struct WordCount { - bytes: usize, - chars: usize, - lines: usize, - words: usize, - max_line_length: usize, -} - -impl Add for WordCount { - type Output = Self; - - fn add(self, other: Self) -> Self { - Self { - bytes: self.bytes + other.bytes, - chars: self.chars + other.chars, - lines: self.lines + other.lines, - words: self.words + other.words, - max_line_length: max(self.max_line_length, other.max_line_length), - } - } -} - -impl AddAssign for WordCount { - fn add_assign(&mut self, other: Self) { - *self = *self + other - } -} - -impl WordCount { - fn with_title(self, title: &str) -> TitledWordCount { - TitledWordCount { title, count: self } - } -} - -/// This struct supplements the actual word count with a title that is displayed -/// to the user at the end of the program. -/// The reason we don't simply include title in the `WordCount` struct is that -/// it would result in unneccesary copying of `String`. -#[derive(Debug, Default, Clone)] -struct TitledWordCount<'a> { - title: &'a str, - count: WordCount, -} - static ABOUT: &str = "Display newline, word, and byte counts for each FILE, and a total line if more than one FILE is specified."; static VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/src/uu/wc/src/wordcount.rs b/src/uu/wc/src/wordcount.rs new file mode 100644 index 000000000..38efb216f --- /dev/null +++ b/src/uu/wc/src/wordcount.rs @@ -0,0 +1,47 @@ +use std::cmp::max; +use std::ops::{Add, AddAssign}; + +#[derive(Debug, Default, Copy, Clone)] +pub struct WordCount { + pub bytes: usize, + pub chars: usize, + pub lines: usize, + pub words: usize, + pub max_line_length: usize, +} + +impl Add for WordCount { + type Output = Self; + + fn add(self, other: Self) -> Self { + Self { + bytes: self.bytes + other.bytes, + chars: self.chars + other.chars, + lines: self.lines + other.lines, + words: self.words + other.words, + max_line_length: max(self.max_line_length, other.max_line_length), + } + } +} + +impl AddAssign for WordCount { + fn add_assign(&mut self, other: Self) { + *self = *self + other + } +} + +impl WordCount { + pub fn with_title(self, title: &str) -> TitledWordCount { + TitledWordCount { title, count: self } + } +} + +/// This struct supplements the actual word count with a title that is displayed +/// to the user at the end of the program. +/// The reason we don't simply include title in the `WordCount` struct is that +/// it would result in unneccesary copying of `String`. +#[derive(Debug, Default, Clone)] +pub struct TitledWordCount<'a> { + pub title: &'a str, + pub count: WordCount, +} From ba8f4ea67041c500a2ca55fc09e87408d8a531f2 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Tue, 4 May 2021 22:13:28 -0400 Subject: [PATCH 032/148] wc: move counting code into WordCount::from_line() Refactor the counting code from the inner loop of the `wc` program into the `WordCount::from_line()` associated function. This commit also splits that function up into other helper functions that encapsulate decoding characters and finding word boundaries from raw bytes. This commit also implements the `Sum` trait for the `WordCount` struct, so that we can simply call `sum()` on an iterator that yields `WordCount` instances. --- src/uu/wc/src/wc.rs | 73 +++++---------------------------- src/uu/wc/src/wordcount.rs | 84 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 62 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 8e973ccbd..33b2ba5ec 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -24,7 +24,6 @@ use std::cmp::max; use std::fs::File; use std::io::{self, Write}; use std::path::Path; -use std::str::from_utf8; #[derive(Error, Debug)] pub enum WcError { @@ -163,18 +162,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } } -const CR: u8 = b'\r'; -const LF: u8 = b'\n'; -const SPACE: u8 = b' '; -const TAB: u8 = b'\t'; -const SYN: u8 = 0x16_u8; -const FF: u8 = 0x0C_u8; - -#[inline(always)] -fn is_word_separator(byte: u8) -> bool { - byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF -} - fn word_count_from_reader( mut reader: T, settings: &Settings, @@ -195,58 +182,20 @@ fn word_count_from_reader( // we do not need to decode the byte stream if we're only counting bytes/newlines let decode_chars = settings.show_chars || settings.show_words || settings.show_max_line_length; - let mut line_count: usize = 0; - let mut word_count: usize = 0; - let mut byte_count: usize = 0; - let mut char_count: usize = 0; - let mut longest_line_length: usize = 0; - let mut ends_lf: bool; - - // reading from a TTY seems to raise a condition on, rather than return Some(0) like a file. - // hence the option wrapped in a result here - for line_result in reader.lines() { - let raw_line = match line_result { - Ok(l) => l, + // Sum the WordCount for each line. Show a warning for each line + // that results in an IO error when trying to read it. + let total = reader + .lines() + .filter_map(|res| match res { + Ok(line) => Some(line), Err(e) => { show_warning!("Error while reading {}: {}", path, e); - continue; + None } - }; - - // GNU 'wc' only counts lines that end in LF as lines - ends_lf = *raw_line.last().unwrap() == LF; - line_count += ends_lf as usize; - - byte_count += raw_line.len(); - - if decode_chars { - // try and convert the bytes to UTF-8 first - let current_char_count; - match from_utf8(&raw_line[..]) { - Ok(line) => { - word_count += line.split_whitespace().count(); - current_char_count = line.chars().count(); - } - Err(..) => { - word_count += raw_line.split(|&x| is_word_separator(x)).count(); - current_char_count = raw_line.iter().filter(|c| c.is_ascii()).count() - } - } - char_count += current_char_count; - if current_char_count > longest_line_length { - // -L is a GNU 'wc' extension so same behavior on LF - longest_line_length = current_char_count - (ends_lf as usize); - } - } - } - - Ok(WordCount { - bytes: byte_count, - chars: char_count, - lines: line_count, - words: word_count, - max_line_length: longest_line_length, - }) + }) + .map(|line| WordCount::from_line(&line, decode_chars)) + .sum(); + Ok(total) } fn word_count_from_path(path: &str, settings: &Settings) -> WcResult { diff --git a/src/uu/wc/src/wordcount.rs b/src/uu/wc/src/wordcount.rs index 38efb216f..785e57eff 100644 --- a/src/uu/wc/src/wordcount.rs +++ b/src/uu/wc/src/wordcount.rs @@ -1,5 +1,19 @@ use std::cmp::max; +use std::iter::Sum; use std::ops::{Add, AddAssign}; +use std::str::from_utf8; + +const CR: u8 = b'\r'; +const LF: u8 = b'\n'; +const SPACE: u8 = b' '; +const TAB: u8 = b'\t'; +const SYN: u8 = 0x16_u8; +const FF: u8 = 0x0C_u8; + +#[inline(always)] +fn is_word_separator(byte: u8) -> bool { + byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF +} #[derive(Debug, Default, Copy, Clone)] pub struct WordCount { @@ -30,10 +44,80 @@ impl AddAssign for WordCount { } } +impl Sum for WordCount { + fn sum(iter: I) -> WordCount + where + I: Iterator, + { + iter.fold(WordCount::default(), |acc, x| acc + x) + } +} + impl WordCount { + /// Count the characters and whitespace-separated words in the given bytes. + /// + /// `line` is a slice of bytes that will be decoded as ASCII characters. + fn ascii_word_and_char_count(line: &[u8]) -> (usize, usize) { + let word_count = line.split(|&x| is_word_separator(x)).count(); + let char_count = line.iter().filter(|c| c.is_ascii()).count(); + (word_count, char_count) + } + + /// Create a [`WordCount`] from a sequence of bytes representing a line. + /// + /// If the last byte of `line` encodes a newline character (`\n`), + /// then the [`lines`] field will be set to 1. Otherwise, it will + /// be set to 0. The [`bytes`] field is simply the length of + /// `line`. + /// + /// If `decode_chars` is `false`, the [`chars`] and [`words`] + /// fields will be set to 0. If it is `true`, this function will + /// attempt to decode the bytes first as UTF-8, and failing that, + /// as ASCII. + pub fn from_line(line: &[u8], decode_chars: bool) -> WordCount { + // GNU 'wc' only counts lines that end in LF as lines + let lines = (*line.last().unwrap() == LF) as usize; + let bytes = line.len(); + let (words, chars) = if decode_chars { + WordCount::word_and_char_count(line) + } else { + (0, 0) + }; + // -L is a GNU 'wc' extension so same behavior on LF + let max_line_length = if chars > 0 { chars - lines } else { 0 }; + WordCount { + bytes, + chars, + lines, + words, + max_line_length, + } + } + + /// Count the UTF-8 characters and words in the given string slice. + /// + /// `s` is a string slice that is assumed to be a UTF-8 string. + fn utf8_word_and_char_count(s: &str) -> (usize, usize) { + let word_count = s.split_whitespace().count(); + let char_count = s.chars().count(); + (word_count, char_count) + } + pub fn with_title(self, title: &str) -> TitledWordCount { TitledWordCount { title, count: self } } + + /// Count the characters and words in the given slice of bytes. + /// + /// `line` is a slice of bytes that will be decoded as UTF-8 + /// characters, or if that fails, as ASCII characters. + fn word_and_char_count(line: &[u8]) -> (usize, usize) { + // try and convert the bytes to UTF-8 first + match from_utf8(line) { + Ok(s) => WordCount::utf8_word_and_char_count(s), + Err(..) => WordCount::ascii_word_and_char_count(line), + } + } } /// This struct supplements the actual word count with a title that is displayed From a9ac7af9e14a9baa674a43e4164f85234a2d952b Mon Sep 17 00:00:00 2001 From: Samuel Ainsworth Date: Tue, 4 May 2021 00:21:39 -0700 Subject: [PATCH 033/148] Simplify parsing of --bytes for the split command --- src/uu/split/src/split.rs | 60 +++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 4f80e25a3..445c1f205 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -261,31 +261,41 @@ struct ByteSplitter { impl ByteSplitter { fn new(settings: &Settings) -> ByteSplitter { - let mut strategy_param: Vec = settings.strategy_param.chars().collect(); - let suffix = strategy_param.pop().unwrap(); - let multiplier = match suffix { - '0'..='9' => 1usize, - 'b' => 512usize, - 'k' => 1024usize, - 'm' => 1024usize * 1024usize, - _ => crash!(1, "invalid number of bytes"), - }; - let n = if suffix.is_alphabetic() { - match strategy_param - .iter() - .cloned() - .collect::() - .parse::() - { - Ok(a) => a, - Err(e) => crash!(1, "invalid number of bytes: {}", e), - } - } else { - match settings.strategy_param.parse::() { - Ok(a) => a, - Err(e) => crash!(1, "invalid number of bytes: {}", e), - } - }; + // These multipliers are the same as supported by GNU coreutils with the + // exception of zetabytes (2^70) and yottabytes (2^80) as they overflow + // standard machine usize (2^64), so we disable for now. Note however + // that they are supported by the GNU coreutils split. Ignored for now. + let modifiers: Vec<(&str, usize)> = vec![ + ("K", 1024usize), + ("M", 1024 * 1024), + ("G", 1024 * 1024 * 1024), + ("T", 1024 * 1024 * 1024 * 1024), + ("P", 1024 * 1024 * 1024 * 1024 * 1024), + ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + // ("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + // ("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ("KB", 1000), + ("MB", 1000 * 1000), + ("GB", 1000 * 1000 * 1000), + ("TB", 1000 * 1000 * 1000 * 1000), + ("PB", 1000 * 1000 * 1000 * 1000 * 1000), + ("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000), + // ("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), + // ("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), + ]; + + // This sequential find is acceptable since none of the modifiers are + // suffixes of any other modifiers, a la Huffman codes. + let (suffix, multiplier) = modifiers + .iter() + .find(|(suffix, _)| settings.strategy_param.ends_with(suffix)) + .unwrap_or(&("", 1)); + + // Try to parse the actual numeral. + let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())] + .parse::() + .unwrap_or_else(|_| crash!(1, "invalid number of bytes")); + ByteSplitter { saved_bytes_to_write: n * multiplier, bytes_to_write: n * multiplier, From 7c1395366e151ade7ac3fc3c056e55150d62dedc Mon Sep 17 00:00:00 2001 From: Samuel Ainsworth Date: Tue, 4 May 2021 04:01:01 -0700 Subject: [PATCH 034/148] Fix split's handling of non-UTF-8 files --- src/uu/split/src/split.rs | 191 +++++++++++++++++++----------------- tests/by-util/test_split.rs | 98 +++++++++++++----- 2 files changed, 176 insertions(+), 113 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 445c1f205..128ef73c6 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -13,11 +13,11 @@ extern crate uucore; mod platform; use clap::{App, Arg}; -use std::char; use std::env; use std::fs::File; -use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; +use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write}; use std::path::Path; +use std::{char, fs::remove_file}; static NAME: &str = "split"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -213,50 +213,65 @@ struct Settings { verbose: bool, } -struct SplitControl { - current_line: String, // Don't touch - request_new_file: bool, // Splitter implementation requests new file -} - trait Splitter { - // Consume the current_line and return the consumed string - fn consume(&mut self, _: &mut SplitControl) -> String; + // Consume as much as possible from `reader` so as to saturate `writer`. + // Equivalent to finishing one of the part files. Returns the number of + // bytes that have been moved. + fn consume( + &mut self, + reader: &mut BufReader>, + writer: &mut BufWriter>, + ) -> usize; } struct LineSplitter { - saved_lines_to_write: usize, - lines_to_write: usize, + lines_per_split: usize, } impl LineSplitter { fn new(settings: &Settings) -> LineSplitter { - let n = match settings.strategy_param.parse() { - Ok(a) => a, - Err(e) => crash!(1, "invalid number of lines: {}", e), - }; LineSplitter { - saved_lines_to_write: n, - lines_to_write: n, + lines_per_split: settings + .strategy_param + .parse() + .unwrap_or_else(|e| crash!(1, "invalid number of lines: {}", e)), } } } impl Splitter for LineSplitter { - fn consume(&mut self, control: &mut SplitControl) -> String { - self.lines_to_write -= 1; - if self.lines_to_write == 0 { - self.lines_to_write = self.saved_lines_to_write; - control.request_new_file = true; + fn consume( + &mut self, + reader: &mut BufReader>, + writer: &mut BufWriter>, + ) -> usize { + let mut bytes_consumed = 0usize; + let mut buffer = String::with_capacity(1024); + for _ in 0..self.lines_per_split { + let bytes_read = reader + .read_line(&mut buffer) + .unwrap_or_else(|_| crash!(1, "error reading bytes from input file")); + // If we ever read 0 bytes then we know we've hit EOF. + if bytes_read == 0 { + return bytes_consumed; + } + + writer + .write_all(buffer.as_bytes()) + .unwrap_or_else(|_| crash!(1, "error writing bytes to output file")); + // Empty out the String buffer since `read_line` appends instead of + // replaces. + buffer.clear(); + + bytes_consumed += bytes_read; } - control.current_line.clone() + + bytes_consumed } } struct ByteSplitter { - saved_bytes_to_write: usize, - bytes_to_write: usize, - break_on_line_end: bool, - require_whole_line: bool, + bytes_per_split: usize, } impl ByteSplitter { @@ -294,36 +309,44 @@ impl ByteSplitter { // Try to parse the actual numeral. let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())] .parse::() - .unwrap_or_else(|_| crash!(1, "invalid number of bytes")); + .unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e)); ByteSplitter { - saved_bytes_to_write: n * multiplier, - bytes_to_write: n * multiplier, - break_on_line_end: settings.strategy == "b", - require_whole_line: false, + bytes_per_split: n * multiplier, } } } impl Splitter for ByteSplitter { - fn consume(&mut self, control: &mut SplitControl) -> String { - let line = control.current_line.clone(); - let n = std::cmp::min(line.chars().count(), self.bytes_to_write); - if self.require_whole_line && n < line.chars().count() { - self.bytes_to_write = self.saved_bytes_to_write; - control.request_new_file = true; - self.require_whole_line = false; - return "".to_owned(); + fn consume( + &mut self, + reader: &mut BufReader>, + writer: &mut BufWriter>, + ) -> usize { + // We buffer reads and writes. We proceed until `bytes_consumed` is + // equal to `self.bytes_per_split` or we reach EOF. + let mut bytes_consumed = 0usize; + const BUFFER_SIZE: usize = 1024; + let mut buffer = [0u8; BUFFER_SIZE]; + while bytes_consumed < self.bytes_per_split { + // Don't overshoot `self.bytes_per_split`! + let bytes_desired = std::cmp::min(BUFFER_SIZE, self.bytes_per_split - bytes_consumed); + let bytes_read = reader + .read(&mut buffer[0..bytes_desired]) + .unwrap_or_else(|_| crash!(1, "error reading bytes from input file")); + // If we ever read 0 bytes then we know we've hit EOF. + if bytes_read == 0 { + return bytes_consumed; + } + + writer + .write_all(&buffer[0..bytes_read]) + .unwrap_or_else(|_| crash!(1, "error writing bytes to output file")); + + bytes_consumed += bytes_read; } - self.bytes_to_write -= n; - if n == 0 { - self.bytes_to_write = self.saved_bytes_to_write; - control.request_new_file = true; - } - if self.break_on_line_end && n == line.chars().count() { - self.require_whole_line = self.break_on_line_end; - } - line[..n].to_owned() + + bytes_consumed } } @@ -363,14 +386,13 @@ fn split(settings: &Settings) -> i32 { let mut reader = BufReader::new(if settings.input == "-" { Box::new(stdin()) as Box } else { - let r = match File::open(Path::new(&settings.input)) { - Ok(a) => a, - Err(_) => crash!( + let r = File::open(Path::new(&settings.input)).unwrap_or_else(|_| { + crash!( 1, "cannot open '{}' for reading: No such file or directory", settings.input - ), - }; + ) + }); Box::new(r) as Box }); @@ -380,48 +402,39 @@ fn split(settings: &Settings) -> i32 { a => crash!(1, "strategy {} not supported", a), }; - let mut control = SplitControl { - current_line: "".to_owned(), // Request new line - request_new_file: true, // Request new file - }; - - let mut writer = BufWriter::new(Box::new(stdout()) as Box); let mut fileno = 0; loop { - if control.current_line.chars().count() == 0 { - match reader.read_line(&mut control.current_line) { - Ok(0) | Err(_) => break, - _ => {} + // Get a new part file set up, and construct `writer` for it. + let mut filename = settings.prefix.clone(); + filename.push_str( + if settings.numeric_suffix { + num_prefix(fileno, settings.suffix_length) + } else { + str_prefix(fileno, settings.suffix_length) } - } - if control.request_new_file { - let mut filename = settings.prefix.clone(); - filename.push_str( - if settings.numeric_suffix { - num_prefix(fileno, settings.suffix_length) - } else { - str_prefix(fileno, settings.suffix_length) - } - .as_ref(), - ); - filename.push_str(settings.additional_suffix.as_ref()); + .as_ref(), + ); + filename.push_str(settings.additional_suffix.as_ref()); + let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); - crash_if_err!(1, writer.flush()); - fileno += 1; - writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); - control.request_new_file = false; - if settings.verbose { - println!("creating file '{}'", filename); + let bytes_consumed = splitter.consume(&mut reader, &mut writer); + writer + .flush() + .unwrap_or_else(|e| crash!(1, "error flushing to output file: {}", e)); + + // If we didn't write anything we should clean up the empty file, and + // break from the loop. + if bytes_consumed == 0 { + // The output file is only ever created if filter's aren't used. + // Complicated, I know... + if settings.filter.is_none() { + remove_file(filename) + .unwrap_or_else(|e| crash!(1, "error removing empty file: {}", e)); } + break; } - let consumed = splitter.consume(&mut control); - crash_if_err!(1, writer.write_all(consumed.as_bytes())); - - let advance = consumed.chars().count(); - let clone = control.current_line.clone(); - let sl = clone; - control.current_line = sl[advance..sl.chars().count()].to_owned(); + fileno += 1; } 0 } diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 521cbbe9a..37856f419 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -4,11 +4,15 @@ extern crate regex; use self::rand::{thread_rng, Rng}; use self::regex::Regex; use crate::common::util::*; +use rand::SeedableRng; #[cfg(not(windows))] use std::env; -use std::fs::{read_dir, File}; use std::io::Write; use std::path::Path; +use std::{ + fs::{read_dir, File}, + io::BufWriter, +}; fn random_chars(n: usize) -> String { thread_rng() @@ -58,7 +62,7 @@ impl Glob { files.sort(); let mut data: Vec = vec![]; for name in &files { - data.extend(self.directory.read(name).into_bytes()); + data.extend(self.directory.read_bytes(name)); } data } @@ -81,20 +85,30 @@ impl RandomFile { } fn add_bytes(&mut self, bytes: usize) { - let chunk_size: usize = if bytes >= 1024 { 1024 } else { bytes }; - let mut n = bytes; - while n > chunk_size { - let _ = write!(self.inner, "{}", random_chars(chunk_size)); - n -= chunk_size; + // Note that just writing random characters isn't enough to cover all + // cases. We need truly random bytes. + let mut writer = BufWriter::new(&self.inner); + + // Seed the rng so as to avoid spurious test failures. + let mut rng = rand::rngs::StdRng::seed_from_u64(123); + let mut buffer = [0; 1024]; + let mut remaining_size = bytes; + + while remaining_size > 0 { + let to_write = std::cmp::min(remaining_size, buffer.len()); + let buf = &mut buffer[..to_write]; + rng.fill(buf); + writer.write(buf).unwrap(); + + remaining_size -= to_write; } - let _ = write!(self.inner, "{}", random_chars(n)); } /// Add n lines each of size `RandomFile::LINESIZE` fn add_lines(&mut self, lines: usize) { let mut n = lines; while n > 0 { - let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE)); + writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE)).unwrap(); n -= 1; } } @@ -104,18 +118,18 @@ impl RandomFile { fn test_split_default() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_default"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); RandomFile::new(&at, name).add_lines(2000); ucmd.args(&[name]).succeeds(); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); assert_eq!(glob.count(), 2); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_numeric_prefixed_chunks_by_bytes() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_num_prefixed_chunks_by_bytes"; - let glob = Glob::new(&at, ".", r"a\d\d$"); RandomFile::new(&at, name).add_bytes(10000); ucmd.args(&[ "-d", // --numeric-suffixes @@ -123,52 +137,86 @@ fn test_split_numeric_prefixed_chunks_by_bytes() { "1000", name, "a", ]) .succeeds(); + + let glob = Glob::new(&at, ".", r"a\d\d$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + for filename in glob.collect() { + assert_eq!(glob.directory.metadata(&filename).len(), 1000); + } + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_str_prefixed_chunks_by_bytes() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_str_prefixed_chunks_by_bytes"; - let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$"); RandomFile::new(&at, name).add_bytes(10000); + // Important that this is less than 1024 since that's our internal buffer + // size. Good to test that we don't overshoot. ucmd.args(&["-b", "1000", name, "b"]).succeeds(); + + let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + for filename in glob.collect() { + assert_eq!(glob.directory.metadata(&filename).len(), 1000); + } + assert_eq!(glob.collate(), at.read_bytes(name)); +} + +// This is designed to test what happens when the desired part size is not a +// multiple of the buffer size and we hopefully don't overshoot the desired part +// size. +#[test] +fn test_split_bytes_prime_part_size() { + let (at, mut ucmd) = at_and_ucmd!(); + let name = "test_split_bytes_prime_part_size"; + RandomFile::new(&at, name).add_bytes(10000); + // 1753 is prime and greater than the buffer size, 1024. + ucmd.args(&["-b", "1753", name, "b"]).succeeds(); + + let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.count(), 6); + for i in 0..5 { + assert_eq!(glob.directory.metadata(&glob.collect()[i]).len(), 1753); + } + assert_eq!(glob.directory.metadata(&glob.collect()[5]).len(), 1235); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_num_prefixed_chunks_by_lines() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_num_prefixed_chunks_by_lines"; - let glob = Glob::new(&at, ".", r"c\d\d$"); RandomFile::new(&at, name).add_lines(10000); ucmd.args(&["-d", "-l", "1000", name, "c"]).succeeds(); + + let glob = Glob::new(&at, ".", r"c\d\d$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_str_prefixed_chunks_by_lines() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_str_prefixed_chunks_by_lines"; - let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$"); RandomFile::new(&at, name).add_lines(10000); ucmd.args(&["-l", "1000", name, "d"]).succeeds(); + + let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_additional_suffix() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_additional_suffix"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$"); RandomFile::new(&at, name).add_lines(2000); ucmd.args(&["--additional-suffix", ".txt", name]).succeeds(); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$"); assert_eq!(glob.count(), 2); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } // note: the test_filter* tests below are unix-only @@ -182,15 +230,16 @@ fn test_filter() { // like `test_split_default()` but run a command before writing let (at, mut ucmd) = at_and_ucmd!(); let name = "filtered"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); let n_lines = 3; RandomFile::new(&at, name).add_lines(n_lines); // change all characters to 'i' ucmd.args(&["--filter=sed s/./i/g > $FILE", name]) .succeeds(); + // assert all characters are 'i' / no character is not 'i' // (assert that command succeded) + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); assert!( glob.collate().iter().find(|&&c| { // is not i @@ -209,7 +258,6 @@ fn test_filter_with_env_var_set() { // implemented like `test_split_default()` but run a command before writing let (at, mut ucmd) = at_and_ucmd!(); let name = "filtered"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); let n_lines = 3; RandomFile::new(&at, name).add_lines(n_lines); @@ -217,7 +265,9 @@ fn test_filter_with_env_var_set() { env::set_var("FILE", &env_var_value); ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name]) .succeeds(); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.collate(), at.read_bytes(name)); assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value); } From b8a3a8995f875fb4615fd3bbf51e0820cf6bdc95 Mon Sep 17 00:00:00 2001 From: Samuel Ainsworth Date: Tue, 4 May 2021 15:19:35 -0700 Subject: [PATCH 035/148] Fix test_split_bytes_prime_part_size --- tests/by-util/test_split.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 37856f419..d83de4323 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -176,10 +176,13 @@ fn test_split_bytes_prime_part_size() { let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$"); assert_eq!(glob.count(), 6); + let mut fns = glob.collect(); + // glob.collect() is not guaranteed to return in sorted order, so we sort. + fns.sort(); for i in 0..5 { - assert_eq!(glob.directory.metadata(&glob.collect()[i]).len(), 1753); + assert_eq!(glob.directory.metadata(&fns[i]).len(), 1753); } - assert_eq!(glob.directory.metadata(&glob.collect()[5]).len(), 1235); + assert_eq!(glob.directory.metadata(&fns[5]).len(), 1235); assert_eq!(glob.collate(), at.read_bytes(name)); } From bacad8ed93d00315746fdc5142180a8517f2731c Mon Sep 17 00:00:00 2001 From: Samuel Ainsworth Date: Tue, 4 May 2021 15:21:35 -0700 Subject: [PATCH 036/148] Use u128 instead of usize for large numbers, and consistency across architectures --- src/uu/split/src/split.rs | 47 ++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 128ef73c6..b2d141b8a 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -221,7 +221,7 @@ trait Splitter { &mut self, reader: &mut BufReader>, writer: &mut BufWriter>, - ) -> usize; + ) -> u128; } struct LineSplitter { @@ -244,8 +244,8 @@ impl Splitter for LineSplitter { &mut self, reader: &mut BufReader>, writer: &mut BufWriter>, - ) -> usize { - let mut bytes_consumed = 0usize; + ) -> u128 { + let mut bytes_consumed = 0u128; let mut buffer = String::with_capacity(1024); for _ in 0..self.lines_per_split { let bytes_read = reader @@ -263,7 +263,7 @@ impl Splitter for LineSplitter { // replaces. buffer.clear(); - bytes_consumed += bytes_read; + bytes_consumed += bytes_read as u128; } bytes_consumed @@ -271,32 +271,29 @@ impl Splitter for LineSplitter { } struct ByteSplitter { - bytes_per_split: usize, + bytes_per_split: u128, } impl ByteSplitter { fn new(settings: &Settings) -> ByteSplitter { - // These multipliers are the same as supported by GNU coreutils with the - // exception of zetabytes (2^70) and yottabytes (2^80) as they overflow - // standard machine usize (2^64), so we disable for now. Note however - // that they are supported by the GNU coreutils split. Ignored for now. - let modifiers: Vec<(&str, usize)> = vec![ - ("K", 1024usize), + // These multipliers are the same as supported by GNU coreutils. + let modifiers: Vec<(&str, u128)> = vec![ + ("K", 1024u128), ("M", 1024 * 1024), ("G", 1024 * 1024 * 1024), ("T", 1024 * 1024 * 1024 * 1024), ("P", 1024 * 1024 * 1024 * 1024 * 1024), ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024), - // ("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), - // ("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), ("KB", 1000), ("MB", 1000 * 1000), ("GB", 1000 * 1000 * 1000), ("TB", 1000 * 1000 * 1000 * 1000), ("PB", 1000 * 1000 * 1000 * 1000 * 1000), ("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000), - // ("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), - // ("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), + ("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), + ("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), ]; // This sequential find is acceptable since none of the modifiers are @@ -308,7 +305,7 @@ impl ByteSplitter { // Try to parse the actual numeral. let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())] - .parse::() + .parse::() .unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e)); ByteSplitter { @@ -322,15 +319,23 @@ impl Splitter for ByteSplitter { &mut self, reader: &mut BufReader>, writer: &mut BufWriter>, - ) -> usize { + ) -> u128 { // We buffer reads and writes. We proceed until `bytes_consumed` is // equal to `self.bytes_per_split` or we reach EOF. - let mut bytes_consumed = 0usize; + let mut bytes_consumed = 0u128; const BUFFER_SIZE: usize = 1024; let mut buffer = [0u8; BUFFER_SIZE]; while bytes_consumed < self.bytes_per_split { - // Don't overshoot `self.bytes_per_split`! - let bytes_desired = std::cmp::min(BUFFER_SIZE, self.bytes_per_split - bytes_consumed); + // Don't overshoot `self.bytes_per_split`! Note: Using std::cmp::min + // doesn't really work since we have to get types to match which + // can't be done in a way that keeps all conversions safe. + let bytes_desired = if (BUFFER_SIZE as u128) <= self.bytes_per_split - bytes_consumed { + BUFFER_SIZE + } else { + // This is a safe conversion since the difference must be less + // than BUFFER_SIZE in this branch. + (self.bytes_per_split - bytes_consumed) as usize + }; let bytes_read = reader .read(&mut buffer[0..bytes_desired]) .unwrap_or_else(|_| crash!(1, "error reading bytes from input file")); @@ -343,7 +348,7 @@ impl Splitter for ByteSplitter { .write_all(&buffer[0..bytes_read]) .unwrap_or_else(|_| crash!(1, "error writing bytes to output file")); - bytes_consumed += bytes_read; + bytes_consumed += bytes_read as u128; } bytes_consumed From 2ff9cc657039739b3fa22c5a2c47f02544faec3a Mon Sep 17 00:00:00 2001 From: Samuel Ainsworth Date: Tue, 4 May 2021 15:28:46 -0700 Subject: [PATCH 037/148] Typo in comment --- src/uu/split/src/split.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index b2d141b8a..726c9b8cd 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -430,7 +430,7 @@ fn split(settings: &Settings) -> i32 { // If we didn't write anything we should clean up the empty file, and // break from the loop. if bytes_consumed == 0 { - // The output file is only ever created if filter's aren't used. + // The output file is only ever created if --filter isn't used. // Complicated, I know... if settings.filter.is_none() { remove_file(filename) From d686f7e48f929c7fccfade292dce0a1dd2ff8eea Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sat, 8 May 2021 22:31:53 +0200 Subject: [PATCH 038/148] sort: improve comments --- src/uu/sort/src/sort.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 02e54baf8..776f71058 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -582,11 +582,14 @@ impl FieldSelector { self.from.field != 1 || self.from.char == 0 || self.to.is_some() } - fn get_selection(&self, line: &str, fields: Option<&[Field]>) -> Selection { - let mut range = SelectionRange::new(self.get_range(&line, fields)); + /// Get the selection that corresponds to this selector for the line. + /// If needs_fields returned false, tokens may be None. + fn get_selection(&self, line: &str, tokens: Option<&[Field]>) -> Selection { + let mut range = SelectionRange::new(self.get_range(&line, tokens)); let num_cache = if self.settings.mode == SortMode::Numeric || self.settings.mode == SortMode::HumanNumeric { + // Parse NumInfo for this number. let (info, num_range) = NumInfo::parse( range.get_str(&line), NumInfoParseSettings { @@ -595,20 +598,23 @@ impl FieldSelector { decimal_pt: Some(DECIMAL_PT), }, ); + // Shorten the range to what we need to pass to numeric_str_cmp later. range.shorten(num_range); Some(Box::new(NumCache::WithInfo(info))) } else if self.settings.mode == SortMode::GeneralNumeric { + // Parse this number as f64, as this is the requirement for general numeric sorting. let str = range.get_str(&line); Some(Box::new(NumCache::AsF64(general_f64_parse( &str[get_leading_gen(str)], )))) } else { + // This is not a numeric sort, so we don't need a NumCache. None }; Selection { range, num_cache } } - /// Look up the slice that corresponds to this selector for the given line. + /// Look up the range in the line that corresponds to this selector. /// If needs_fields returned false, tokens may be None. fn get_range<'a>(&self, line: &'a str, tokens: Option<&[Field]>) -> Range { enum Resolution { @@ -1356,7 +1362,8 @@ enum GeneralF64ParseResult { Infinity, } -/// Parse the beginning string into an f64, returning -inf instead of NaN on errors. +/// Parse the beginning string into a GeneralF64ParseResult. +/// Using a GeneralF64ParseResult instead of f64 is necessary to correctly order floats. #[inline(always)] fn general_f64_parse(a: &str) -> GeneralF64ParseResult { // The actual behavior here relies on Rust's implementation of parsing floating points. From e0ebf907a4fec88d81bb20b226f0ade6ae932d60 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sat, 8 May 2021 23:06:17 +0200 Subject: [PATCH 039/148] sort: make merging stable When merging files we need to prioritize files that occur earlier in the command line arguments with -m. This also makes the extsort merge step (and thus extsort itself) stable again. --- src/uu/sort/src/sort.rs | 11 ++++++++++- tests/by-util/test_sort.rs | 24 +++++++++++++++++++++++ tests/fixtures/sort/ext_stable.expected | 4 ++++ tests/fixtures/sort/ext_stable.txt | 4 ++++ tests/fixtures/sort/merge_stable.expected | 3 +++ tests/fixtures/sort/merge_stable_1.txt | 2 ++ tests/fixtures/sort/merge_stable_2.txt | 1 + 7 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/sort/ext_stable.expected create mode 100644 tests/fixtures/sort/ext_stable.txt create mode 100644 tests/fixtures/sort/merge_stable.expected create mode 100644 tests/fixtures/sort/merge_stable_1.txt create mode 100644 tests/fixtures/sort/merge_stable_2.txt diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 730be0039..d35c62f87 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -686,6 +686,7 @@ struct MergeableFile<'a> { lines: Box + 'a>, current_line: Line, settings: &'a GlobalSettings, + file_index: usize, } // BinaryHeap depends on `Ord`. Note that we want to pop smallest items @@ -693,7 +694,14 @@ struct MergeableFile<'a> { // trick it into the right order by calling reverse() here. impl<'a> Ord for MergeableFile<'a> { fn cmp(&self, other: &MergeableFile) -> Ordering { - compare_by(&self.current_line, &other.current_line, self.settings).reverse() + let comparison = compare_by(&self.current_line, &other.current_line, self.settings); + if comparison == Ordering::Equal { + // If lines are equal, the earlier file takes precedence. + self.file_index.cmp(&other.file_index) + } else { + comparison + } + .reverse() } } @@ -729,6 +737,7 @@ impl<'a> FileMerger<'a> { lines, current_line: next_line, settings: &self.settings, + file_index: self.heap.len(), }; self.heap.push(mergeable_file); } diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 4465e861f..bad9d577e 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -51,6 +51,18 @@ fn test_smaller_than_specified_segment() { .stdout_is_fixture("ext_sort.expected"); } +#[test] +fn test_ext_sort_stable() { + new_ucmd!() + .arg("-n") + .arg("--stable") + .arg("-S") + .arg("0M") + .arg("ext_stable.txt") + .succeeds() + .stdout_only_fixture("ext_stable.expected"); +} + #[test] fn test_extsort_zero_terminated() { new_ucmd!() @@ -566,6 +578,18 @@ fn test_merge_unique() { .stdout_only_fixture("merge_ints_interleaved.expected"); } +#[test] +fn test_merge_stable() { + new_ucmd!() + .arg("-m") + .arg("--stable") + .arg("-n") + .arg("merge_stable_1.txt") + .arg("merge_stable_2.txt") + .succeeds() + .stdout_only_fixture("merge_stable.expected"); +} + #[test] fn test_merge_reversed() { new_ucmd!() diff --git a/tests/fixtures/sort/ext_stable.expected b/tests/fixtures/sort/ext_stable.expected new file mode 100644 index 000000000..11ca4deb7 --- /dev/null +++ b/tests/fixtures/sort/ext_stable.expected @@ -0,0 +1,4 @@ +0a +0a +0b +0b diff --git a/tests/fixtures/sort/ext_stable.txt b/tests/fixtures/sort/ext_stable.txt new file mode 100644 index 000000000..11ca4deb7 --- /dev/null +++ b/tests/fixtures/sort/ext_stable.txt @@ -0,0 +1,4 @@ +0a +0a +0b +0b diff --git a/tests/fixtures/sort/merge_stable.expected b/tests/fixtures/sort/merge_stable.expected new file mode 100644 index 000000000..49f57888d --- /dev/null +++ b/tests/fixtures/sort/merge_stable.expected @@ -0,0 +1,3 @@ +0a +0c +0b diff --git a/tests/fixtures/sort/merge_stable_1.txt b/tests/fixtures/sort/merge_stable_1.txt new file mode 100644 index 000000000..20528104f --- /dev/null +++ b/tests/fixtures/sort/merge_stable_1.txt @@ -0,0 +1,2 @@ +0a +0c \ No newline at end of file diff --git a/tests/fixtures/sort/merge_stable_2.txt b/tests/fixtures/sort/merge_stable_2.txt new file mode 100644 index 000000000..d3523d976 --- /dev/null +++ b/tests/fixtures/sort/merge_stable_2.txt @@ -0,0 +1 @@ +0b \ No newline at end of file From 112b04276922a3c10f39abf88907bccf714d6b30 Mon Sep 17 00:00:00 2001 From: Nicolas Thery Date: Sun, 9 May 2021 15:42:55 +0200 Subject: [PATCH 040/148] wc: emit '-' in ouput when set on command-line When stdin is explicitly specified on the command-line with '-', emit it in the output stats to match GNU wc output. Fixes #2188. --- src/uu/wc/src/wc.rs | 100 ++++++++++++++++++++++++++----------- src/uu/wc/src/wordcount.rs | 8 +-- tests/by-util/test_wc.rs | 9 ++++ 3 files changed, 85 insertions(+), 32 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 33b2ba5ec..226608d40 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -104,6 +104,34 @@ fn get_usage() -> String { ) } +enum StdinKind { + /// Stdin specified on command-line with "-". + Explicit, + + /// Stdin implicitly specified on command-line by not passing any positional argument. + Implicit, +} + +/// Supported inputs. +enum Input { + /// A regular file. + Path(String), + + /// Standard input. + Stdin(StdinKind), +} + +impl Input { + /// Converts input to title that appears in stats. + fn to_title(&self) -> Option<&str> { + match self { + Input::Path(path) => Some(path), + Input::Stdin(StdinKind::Explicit) => Some("-"), + Input::Stdin(StdinKind::Implicit) => None, + } + } +} + pub fn uumain(args: impl uucore::Args) -> i32 { let usage = get_usage(); @@ -144,18 +172,27 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .arg(Arg::with_name(ARG_FILES).multiple(true).takes_value(true)) .get_matches_from(args); - let mut files: Vec = matches + let mut inputs: Vec = matches .values_of(ARG_FILES) - .map(|v| v.map(ToString::to_string).collect()) + .map(|v| { + v.map(|i| { + if i == "-" { + Input::Stdin(StdinKind::Explicit) + } else { + Input::Path(ToString::to_string(i)) + } + }) + .collect() + }) .unwrap_or_default(); - if files.is_empty() { - files.push("-".to_owned()); + if inputs.is_empty() { + inputs.push(Input::Stdin(StdinKind::Implicit)); } let settings = Settings::new(&matches); - if wc(files, &settings).is_ok() { + if wc(inputs, &settings).is_ok() { 0 } else { 1 @@ -198,32 +235,35 @@ fn word_count_from_reader( Ok(total) } -fn word_count_from_path(path: &str, settings: &Settings) -> WcResult { - if path == "-" { - let stdin = io::stdin(); - let stdin_lock = stdin.lock(); - word_count_from_reader(stdin_lock, settings, path) - } else { - let path_obj = Path::new(path); - if path_obj.is_dir() { - Err(WcError::IsDirectory(path.to_owned())) - } else { - let file = File::open(path)?; - word_count_from_reader(file, settings, path) +fn word_count_from_input(input: &Input, settings: &Settings) -> WcResult { + match input { + Input::Stdin(_) => { + let stdin = io::stdin(); + let stdin_lock = stdin.lock(); + word_count_from_reader(stdin_lock, settings, "-") + } + Input::Path(path) => { + let path_obj = Path::new(path); + if path_obj.is_dir() { + Err(WcError::IsDirectory(path.to_owned())) + } else { + let file = File::open(path)?; + word_count_from_reader(file, settings, path) + } } } } -fn wc(files: Vec, settings: &Settings) -> Result<(), u32> { +fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { let mut total_word_count = WordCount::default(); let mut results = vec![]; let mut max_width: usize = 0; let mut error_count = 0; - let num_files = files.len(); + let num_inputs = inputs.len(); - for path in &files { - let word_count = word_count_from_path(&path, settings).unwrap_or_else(|err| { + for input in &inputs { + let word_count = word_count_from_input(&input, settings).unwrap_or_else(|err| { show_error!("{}", err); error_count += 1; WordCount::default() @@ -235,18 +275,22 @@ fn wc(files: Vec, settings: &Settings) -> Result<(), u32> { // formatting each count as a string for output. max_width = max(max_width, word_count.bytes.to_string().len()); total_word_count += word_count; - results.push(word_count.with_title(path)); + results.push(word_count.with_title(input.to_title())); } for result in &results { if let Err(err) = print_stats(settings, &result, max_width) { - show_warning!("failed to print result for {}: {}", result.title, err); + show_warning!( + "failed to print result for {}: {}", + result.title.unwrap_or(""), + err + ); error_count += 1; } } - if num_files > 1 { - let total_result = total_word_count.with_title("total"); + if num_inputs > 1 { + let total_result = total_word_count.with_title(Some("total")); if let Err(err) = print_stats(settings, &total_result, max_width) { show_warning!("failed to print total: {}", err); error_count += 1; @@ -315,10 +359,10 @@ fn print_stats( )?; } - if result.title == "-" { - writeln!(stdout_lock)?; + if let Some(title) = result.title { + writeln!(stdout_lock, " {}", title)?; } else { - writeln!(stdout_lock, " {}", result.title)?; + writeln!(stdout_lock)?; } Ok(()) diff --git a/src/uu/wc/src/wordcount.rs b/src/uu/wc/src/wordcount.rs index 785e57eff..9e2a81fca 100644 --- a/src/uu/wc/src/wordcount.rs +++ b/src/uu/wc/src/wordcount.rs @@ -103,7 +103,7 @@ impl WordCount { (word_count, char_count) } - pub fn with_title(self, title: &str) -> TitledWordCount { + pub fn with_title(self, title: Option<&str>) -> TitledWordCount { TitledWordCount { title, count: self } } @@ -120,12 +120,12 @@ impl WordCount { } } -/// This struct supplements the actual word count with a title that is displayed -/// to the user at the end of the program. +/// This struct supplements the actual word count with an optional title that is +/// displayed to the user at the end of the program. /// The reason we don't simply include title in the `WordCount` struct is that /// it would result in unneccesary copying of `String`. #[derive(Debug, Default, Clone)] pub struct TitledWordCount<'a> { - pub title: &'a str, + pub title: Option<&'a str>, pub count: WordCount, } diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 87a86fca4..b61d7e3aa 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -36,6 +36,15 @@ fn test_stdin_default() { .stdout_is(" 13 109 772\n"); } +#[test] +fn test_stdin_explicit() { + new_ucmd!() + .pipe_in_fixture("lorem_ipsum.txt") + .arg("-") + .run() + .stdout_is(" 13 109 772 -\n"); +} + #[test] fn test_utf8() { new_ucmd!() From 33206e1adcac4a938d879a815b7804e5ced42d4b Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 9 May 2021 18:42:16 +0200 Subject: [PATCH 041/148] Ignore test_domain_socket as it fails too often --- tests/by-util/test_cat.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/by-util/test_cat.rs b/tests/by-util/test_cat.rs index c8ae29a9d..67722daa2 100644 --- a/tests/by-util/test_cat.rs +++ b/tests/by-util/test_cat.rs @@ -395,6 +395,7 @@ fn test_dev_full_show_all() { #[test] #[cfg(unix)] +#[ignore] fn test_domain_socket() { use std::io::prelude::*; use std::sync::{Arc, Barrier}; From 8747800697d7d58532c71805cb8f7fbfc32783d6 Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Sun, 9 May 2021 21:53:03 +0300 Subject: [PATCH 042/148] Switched 'arch' to use clap instead of getopts --- Cargo.lock | 2 ++ src/uu/arch/Cargo.toml | 1 + src/uu/arch/src/arch.rs | 17 ++++++++++------- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 13441d4fe..730c53547 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1658,6 +1658,7 @@ dependencies = [ name = "uu_arch" version = "0.0.6" dependencies = [ + "clap", "platform-info", "uucore", "uucore_procs", @@ -2406,6 +2407,7 @@ name = "uu_stat" version = "0.0.6" dependencies = [ "clap", + "libc", "time", "uucore", "uucore_procs", diff --git a/src/uu/arch/Cargo.toml b/src/uu/arch/Cargo.toml index 0b4359620..b3fe1f8cb 100644 --- a/src/uu/arch/Cargo.toml +++ b/src/uu/arch/Cargo.toml @@ -16,6 +16,7 @@ path = "src/arch.rs" [dependencies] platform-info = "0.1" +clap = "2.33" uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/arch/src/arch.rs b/src/uu/arch/src/arch.rs index a4c57e282..31278f000 100644 --- a/src/uu/arch/src/arch.rs +++ b/src/uu/arch/src/arch.rs @@ -10,17 +10,20 @@ extern crate uucore; use platform_info::*; -use uucore::InvalidEncodingHandling; -static SYNTAX: &str = "Display machine architecture"; +use clap::App; + +static VERSION: &str = env!("CARGO_PKG_VERSION"); +static ABOUT: &str = "Display machine architecture"; static SUMMARY: &str = "Determine architecture name for current machine."; -static LONG_HELP: &str = ""; pub fn uumain(args: impl uucore::Args) -> i32 { - app!(SYNTAX, SUMMARY, LONG_HELP).parse( - args.collect_str(InvalidEncodingHandling::ConvertLossy) - .accept_any(), - ); + App::new(executable!()) + .version(VERSION) + .about(ABOUT) + .after_help(SUMMARY) + .get_matches_from(args); + let uts = return_if_err!(1, PlatformInfo::new()); println!("{}", uts.machine().trim()); 0 From 0cc779c73360199b661246fa343101ef07bfece1 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sun, 9 May 2021 21:36:39 -0400 Subject: [PATCH 043/148] tail: simplify unbounded_tail() function Refactor common code out of two branches of the `unbounded_tail()` function into a new `unbounded_tail_collect()` helper function, that collects from an iterator into a `VecDeque` and keeps either the last `n` elements or all but the first `n` elements. This commit also adds a new struct, `RingBuffer`, in a new module, `ringbuffer.rs`, to be responsible for keeping the last `n` elements of an iterator. --- src/uu/tail/src/ringbuffer.rs | 61 ++++++++++++++++++++++ src/uu/tail/src/tail.rs | 98 +++++++++++++---------------------- 2 files changed, 96 insertions(+), 63 deletions(-) create mode 100644 src/uu/tail/src/ringbuffer.rs diff --git a/src/uu/tail/src/ringbuffer.rs b/src/uu/tail/src/ringbuffer.rs new file mode 100644 index 000000000..86483b8ed --- /dev/null +++ b/src/uu/tail/src/ringbuffer.rs @@ -0,0 +1,61 @@ +//! A fixed-size ring buffer. +use std::collections::VecDeque; + +/// A fixed-size ring buffer backed by a `VecDeque`. +/// +/// If the ring buffer is not full, then calling the [`push_back`] +/// method appends elements, as in a [`VecDeque`]. If the ring buffer +/// is full, then calling [`push_back`] removes the element at the +/// front of the buffer (in a first-in, first-out manner) before +/// appending the new element to the back of the buffer. +/// +/// Use [`from_iter`] to take the last `size` elements from an +/// iterator. +/// +/// # Examples +/// +/// After exceeding the size limit, the oldest elements are dropped in +/// favor of the newest element: +/// +/// ```rust,ignore +/// let buffer: RingBuffer = RingBuffer::new(2); +/// buffer.push_back(0); +/// buffer.push_back(1); +/// buffer.push_back(2); +/// assert_eq!(vec![1, 2], buffer.data); +/// ``` +/// +/// Take the last `n` elements from an iterator: +/// +/// ```rust,ignore +/// let iter = vec![0, 1, 2, 3].iter(); +/// assert_eq!(vec![2, 3], RingBuffer::from_iter(iter, 2).data); +/// ``` +pub struct RingBuffer { + pub data: VecDeque, + size: usize, +} + +impl RingBuffer { + pub fn new(size: usize) -> RingBuffer { + RingBuffer { + data: VecDeque::new(), + size, + } + } + + pub fn from_iter(iter: impl Iterator, size: usize) -> RingBuffer { + let mut ringbuf = RingBuffer::new(size); + for value in iter { + ringbuf.push_back(value); + } + ringbuf + } + + pub fn push_back(&mut self, value: T) { + if self.size <= self.data.len() { + self.data.pop_front(); + } + self.data.push_back(value) + } +} diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index fec88e841..0a3ff778d 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -16,6 +16,8 @@ extern crate clap; extern crate uucore; mod platform; +mod ringbuffer; +use ringbuffer::RingBuffer; use clap::{App, Arg}; use std::collections::VecDeque; @@ -482,71 +484,46 @@ fn bounded_tail(mut file: &File, settings: &Settings) { } } +/// Collect the last elements of an iterator into a `VecDeque`. +/// +/// This function returns a [`VecDeque`] containing either the last +/// `count` elements of `iter`, an [`Iterator`] over [`Result`] +/// instances, or all but the first `count` elements of `iter`. If +/// `beginning` is `true`, then all but the first `count` elements are +/// returned. +/// +/// # Panics +/// +/// If any element of `iter` is an [`Err`], then this function panics. +fn unbounded_tail_collect( + iter: impl Iterator>, + count: u64, + beginning: bool, +) -> VecDeque +where + E: fmt::Debug, +{ + if beginning { + iter.skip(count as usize).map(|r| r.unwrap()).collect() + } else { + RingBuffer::from_iter(iter.map(|r| r.unwrap()), count as usize).data + } +} + fn unbounded_tail(reader: &mut BufReader, settings: &Settings) { // Read through each line/char and store them in a ringbuffer that always // contains count lines/chars. When reaching the end of file, output the // data in the ringbuf. match settings.mode { - FilterMode::Lines(mut count, _delimiter) => { - let mut ringbuf: VecDeque = VecDeque::new(); - let mut skip = if settings.beginning { - let temp = count; - count = ::std::u64::MAX; - temp - 1 - } else { - 0 - }; - loop { - let mut datum = String::new(); - match reader.read_line(&mut datum) { - Ok(0) => break, - Ok(_) => { - if skip > 0 { - skip -= 1; - } else { - if count <= ringbuf.len() as u64 { - ringbuf.pop_front(); - } - ringbuf.push_back(datum); - } - } - Err(err) => panic!("{}", err), - } - } - let mut stdout = stdout(); - for datum in &ringbuf { - print_string(&mut stdout, datum); + FilterMode::Lines(count, _) => { + for line in unbounded_tail_collect(reader.lines(), count, settings.beginning) { + println!("{}", line); } } - FilterMode::Bytes(mut count) => { - let mut ringbuf: VecDeque = VecDeque::new(); - let mut skip = if settings.beginning { - let temp = count; - count = ::std::u64::MAX; - temp - 1 - } else { - 0 - }; - loop { - let mut datum = [0; 1]; - match reader.read(&mut datum) { - Ok(0) => break, - Ok(_) => { - if skip > 0 { - skip -= 1; - } else { - if count <= ringbuf.len() as u64 { - ringbuf.pop_front(); - } - ringbuf.push_back(datum[0]); - } - } - Err(err) => panic!("{}", err), - } - } - let mut stdout = stdout(); - for datum in &ringbuf { - print_byte(&mut stdout, *datum); + FilterMode::Bytes(count) => { + for byte in unbounded_tail_collect(reader.bytes(), count, settings.beginning) { + let mut stdout = stdout(); + print_byte(&mut stdout, byte); } } } @@ -562,8 +539,3 @@ fn print_byte(stdout: &mut T, ch: u8) { crash!(1, "{}", err); } } - -#[inline] -fn print_string(_: &mut T, s: &str) { - print!("{}", s); -} From 881bbf512ed544142214066d2b60ff6419b27d73 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 10 May 2021 08:59:45 +0200 Subject: [PATCH 044/148] refresh cargo.lock with recent updates --- Cargo.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.lock b/Cargo.lock index 13441d4fe..e729bfcd2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2406,6 +2406,7 @@ name = "uu_stat" version = "0.0.6" dependencies = [ "clap", + "libc", "time", "uucore", "uucore_procs", From 203ee463c74fd0c7f134c2f1009d9a5accf8fbf9 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Sun, 9 May 2021 14:21:15 +0200 Subject: [PATCH 045/148] stat/uucore: refactor - move fsext.rs to uucore --- src/uu/stat/Cargo.toml | 4 +- src/uu/stat/src/stat.rs | 9 ++-- src/uucore/Cargo.toml | 1 + src/uucore/src/lib/features.rs | 2 + .../src => uucore/src/lib/features}/fsext.rs | 43 ++++++++++++++++--- src/uucore/src/lib/lib.rs | 2 + tests/by-util/test_stat.rs | 27 ------------ 7 files changed, 47 insertions(+), 41 deletions(-) rename src/{uu/stat/src => uucore/src/lib/features}/fsext.rs (93%) diff --git a/src/uu/stat/Cargo.toml b/src/uu/stat/Cargo.toml index c51f972a9..86b7da139 100644 --- a/src/uu/stat/Cargo.toml +++ b/src/uu/stat/Cargo.toml @@ -16,9 +16,7 @@ path = "src/stat.rs" [dependencies] clap = "2.33" -time = "0.1.40" -libc = "0.2" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc", "fs"] } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc", "fs", "fsext"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } [[bin]] diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index 905058766..8b148d39d 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -5,15 +5,16 @@ // For the full copyright and license information, please view the LICENSE file // that was distributed with this source code. -// spell-checker:ignore (ToDO) mtab fsext showfs otype fmtstr prec ftype blocksize nlink rdev fnodes fsid namelen blksize inodes fstype iosize statfs gnulib NBLOCKSIZE - -mod fsext; -pub use crate::fsext::*; +// spell-checker:ignore (ToDO) showfs otype fmtstr prec ftype blocksize nlink rdev fnodes fsid namelen blksize inodes fstype iosize statfs gnulib NBLOCKSIZE #[macro_use] extern crate uucore; use uucore::entries; use uucore::fs::display_permissions; +use uucore::fsext::{ + pretty_filetype, pretty_fstype, pretty_time, read_fs_list, statfs, BirthTime, FsMeta, +}; +use uucore::libc::mode_t; use clap::{App, Arg, ArgMatches}; use std::borrow::Cow; diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 291456760..51bb4c66e 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -38,6 +38,7 @@ default = [] encoding = ["data-encoding", "thiserror"] entries = ["libc"] fs = ["libc"] +fsext = ["libc", "time"] mode = ["libc"] parse_time = [] perms = ["libc"] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index c26225cb7..0287b9675 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -4,6 +4,8 @@ pub mod encoding; #[cfg(feature = "fs")] pub mod fs; +#[cfg(feature = "fsext")] +pub mod fsext; #[cfg(feature = "parse_time")] pub mod parse_time; #[cfg(feature = "zero-copy")] diff --git a/src/uu/stat/src/fsext.rs b/src/uucore/src/lib/features/fsext.rs similarity index 93% rename from src/uu/stat/src/fsext.rs rename to src/uucore/src/lib/features/fsext.rs index e831a159e..3c95af73e 100644 --- a/src/uu/stat/src/fsext.rs +++ b/src/uucore/src/lib/features/fsext.rs @@ -9,6 +9,8 @@ extern crate time; +pub use crate::*; // import macros from `../../macros.rs` + #[cfg(target_os = "linux")] static LINUX_MTAB: &str = "/etc/mtab"; #[cfg(target_os = "linux")] @@ -16,12 +18,12 @@ static LINUX_MOUNTINFO: &str = "/proc/self/mountinfo"; static MOUNT_OPT_BIND: &str = "bind"; use self::time::Timespec; -use std::time::UNIX_EPOCH; -pub use uucore::libc::{ +pub use libc::{ c_int, mode_t, strerror, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, S_IRGRP, S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, S_IXGRP, S_IXOTH, S_IXUSR, }; +use std::time::UNIX_EPOCH; pub trait BirthTime { fn pretty_birth(&self) -> String; @@ -93,7 +95,7 @@ use std::path::Path; target_os = "android", target_os = "freebsd" ))] -use uucore::libc::statfs as Sstatfs; +use libc::statfs as Sstatfs; #[cfg(any( target_os = "openbsd", target_os = "netbsd", @@ -101,7 +103,7 @@ use uucore::libc::statfs as Sstatfs; target_os = "bitrig", target_os = "dragonfly" ))] -use uucore::libc::statvfs as Sstatfs; +use libc::statvfs as Sstatfs; #[cfg(any( target_os = "linux", @@ -109,7 +111,7 @@ use uucore::libc::statvfs as Sstatfs; target_os = "android", target_os = "freebsd" ))] -use uucore::libc::statfs as statfs_fn; +use libc::statfs as statfs_fn; #[cfg(any( target_os = "openbsd", target_os = "netbsd", @@ -117,7 +119,7 @@ use uucore::libc::statfs as statfs_fn; target_os = "bitrig", target_os = "dragonfly" ))] -use uucore::libc::statvfs as statfs_fn; +use libc::statvfs as statfs_fn; pub trait FsMeta { fn fs_type(&self) -> i64; @@ -184,7 +186,7 @@ impl FsMeta for Sstatfs { #[cfg(any(target_vendor = "apple", target_os = "freebsd", target_os = "linux"))] fn fsid(&self) -> u64 { let f_fsid: &[u32; 2] = - unsafe { &*(&self.f_fsid as *const uucore::libc::fsid_t as *const [u32; 2]) }; + unsafe { &*(&self.f_fsid as *const libc::fsid_t as *const [u32; 2]) }; (u64::from(f_fsid[0])) << 32 | u64::from(f_fsid[1]) } #[cfg(not(any(target_vendor = "apple", target_os = "freebsd", target_os = "linux")))] @@ -534,3 +536,30 @@ pub fn read_fs_list() -> Vec { .collect::>() } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_file_type() { + assert_eq!("block special file", pretty_filetype(S_IFBLK, 0)); + assert_eq!("character special file", pretty_filetype(S_IFCHR, 0)); + assert_eq!("regular file", pretty_filetype(S_IFREG, 1)); + assert_eq!("regular empty file", pretty_filetype(S_IFREG, 0)); + assert_eq!("weird file", pretty_filetype(0, 0)); + } + + #[test] + fn test_fs_type() { + assert_eq!("ext2/ext3", pretty_fstype(0xEF53)); + assert_eq!("tmpfs", pretty_fstype(0x01021994)); + assert_eq!("nfs", pretty_fstype(0x6969)); + assert_eq!("btrfs", pretty_fstype(0x9123683e)); + assert_eq!("xfs", pretty_fstype(0x58465342)); + assert_eq!("zfs", pretty_fstype(0x2FC12FC1)); + assert_eq!("ntfs", pretty_fstype(0x5346544e)); + assert_eq!("fat", pretty_fstype(0x4006)); + assert_eq!("UNKNOWN (0x1234)", pretty_fstype(0x1234)); + } +} diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 6dddf8696..f2a4292fb 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -35,6 +35,8 @@ pub use crate::mods::ranges; pub use crate::features::encoding; #[cfg(feature = "fs")] pub use crate::features::fs; +#[cfg(feature = "fsext")] +pub use crate::features::fsext; #[cfg(feature = "parse_time")] pub use crate::features::parse_time; #[cfg(feature = "zero-copy")] diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 569d6873e..308dcb9f5 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -5,33 +5,6 @@ use crate::common::util::*; extern crate stat; pub use self::stat::*; -#[cfg(test)] -mod test_fsext { - use super::*; - - #[test] - fn test_file_type() { - assert_eq!("block special file", pretty_filetype(S_IFBLK, 0)); - assert_eq!("character special file", pretty_filetype(S_IFCHR, 0)); - assert_eq!("regular file", pretty_filetype(S_IFREG, 1)); - assert_eq!("regular empty file", pretty_filetype(S_IFREG, 0)); - assert_eq!("weird file", pretty_filetype(0, 0)); - } - - #[test] - fn test_fs_type() { - assert_eq!("ext2/ext3", pretty_fstype(0xEF53)); - assert_eq!("tmpfs", pretty_fstype(0x01021994)); - assert_eq!("nfs", pretty_fstype(0x6969)); - assert_eq!("btrfs", pretty_fstype(0x9123683e)); - assert_eq!("xfs", pretty_fstype(0x58465342)); - assert_eq!("zfs", pretty_fstype(0x2FC12FC1)); - assert_eq!("ntfs", pretty_fstype(0x5346544e)); - assert_eq!("fat", pretty_fstype(0x4006)); - assert_eq!("UNKNOWN (0x1234)", pretty_fstype(0x1234)); - } -} - #[test] fn test_scanutil() { assert_eq!(Some((-5, 2)), "-5zxc".scan_num::()); From 4ac75898c377c433a2cb2698dd09c94dae14540e Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Mon, 10 May 2021 13:28:35 +0200 Subject: [PATCH 046/148] fix clippy warnings --- src/uu/factor/src/factor.rs | 3 ++- src/uu/ls/src/ls.rs | 4 ++-- src/uu/test/src/parser.rs | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 138254b51..ebe06a1c5 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -163,6 +163,7 @@ pub fn factor(mut n: u64) -> Factors { let (factors, n) = table::factor(n, factors); + #[allow(clippy::let_and_return)] let r = if n < (1 << 32) { _factor::>(n, factors) } else { @@ -280,6 +281,6 @@ impl std::ops::BitXor for Factors { } debug_assert_eq!(r.product(), self.product().pow(rhs.into())); - return r; + r } } diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 16f2ce8ff..c5389295b 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1413,11 +1413,11 @@ fn get_block_size(md: &Metadata, config: &Config) -> u64 { { // hard-coded for now - enabling setting this remains a TODO let ls_block_size = 1024; - return match config.size_format { + match config.size_format { SizeFormat::Binary => md.blocks() * 512, SizeFormat::Decimal => md.blocks() * 512, SizeFormat::Bytes => md.blocks() * 512 / ls_block_size, - }; + } } #[cfg(not(unix))] diff --git a/src/uu/test/src/parser.rs b/src/uu/test/src/parser.rs index f1ca9dad6..2c9c9db30 100644 --- a/src/uu/test/src/parser.rs +++ b/src/uu/test/src/parser.rs @@ -121,6 +121,8 @@ impl Parser { /// Test if the next token in the stream is a BOOLOP (-a or -o), without /// removing the token from the stream. fn peek_is_boolop(&mut self) -> bool { + // TODO: change to `matches!(self.peek(), Symbol::BoolOp(_))` once MSRV is 1.42 + // #[allow(clippy::match_like_matches_macro)] // needs MSRV 1.43 if let Symbol::BoolOp(_) = self.peek() { true } else { From 381f8dafc6eed456f4a9298baba635ccfd5c826b Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Mon, 10 May 2021 10:56:33 +0200 Subject: [PATCH 047/148] df/uucore: refactor - move duplicate code to uucore/fsext.rs --- src/uu/df/Cargo.toml | 6 +- src/uu/df/src/df.rs | 486 +---------------- src/uucore/Cargo.toml | 3 + src/uucore/src/lib/features/fsext.rs | 754 ++++++++++++++++++--------- src/uucore/src/lib/lib.rs | 4 +- 5 files changed, 519 insertions(+), 734 deletions(-) diff --git a/src/uu/df/Cargo.toml b/src/uu/df/Cargo.toml index 4770cb557..0e65fdb32 100644 --- a/src/uu/df/Cargo.toml +++ b/src/uu/df/Cargo.toml @@ -16,14 +16,10 @@ path = "src/df.rs" [dependencies] clap = "2.33" -libc = "0.2" number_prefix = "0.4" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["libc", "fsext"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } -[target.'cfg(target_os = "windows")'.dependencies] -winapi = { version = "0.3", features = ["errhandlingapi", "fileapi", "handleapi", "winerror"] } - [[bin]] name = "df" path = "src/main.rs" diff --git a/src/uu/df/src/df.rs b/src/uu/df/src/df.rs index c917eb2e8..8219b0a27 100644 --- a/src/uu/df/src/df.rs +++ b/src/uu/df/src/df.rs @@ -6,22 +6,17 @@ // For the full copyright and license information, please view the LICENSE file // that was distributed with this source code. -// spell-checker:ignore (ToDO) mountinfo mtab BLOCKSIZE getmntinfo fobj mptr noatime Iused overmounted -// spell-checker:ignore (libc/fs) asyncreads asyncwrites autofs bavail bfree bsize charspare cifs debugfs devfs devpts ffree frsize fsid fstypename fusectl inode inodes iosize kernfs mntbufp mntfromname mntonname mqueue namemax pipefs smbfs statfs statvfs subfs syncreads syncwrites sysfs wcslen +// spell-checker:ignore (ToDO) mountinfo BLOCKSIZE fobj mptr noatime Iused overmounted +// spell-checker:ignore (libc/fs) asyncreads asyncwrites autofs bavail bfree bsize charspare cifs debugfs devfs devpts ffree frsize fsid fstypename fusectl inode inodes iosize kernfs mntbufp mntfromname mntonname mqueue namemax pipefs smbfs statvfs subfs syncreads syncwrites sysfs wcslen #[macro_use] extern crate uucore; +#[cfg(unix)] +use uucore::fsext::statfs_fn; +use uucore::fsext::{read_fs_list, FsUsage, MountInfo}; use clap::{App, Arg}; -#[cfg(windows)] -use winapi::um::errhandlingapi::GetLastError; -#[cfg(windows)] -use winapi::um::fileapi::{ - FindFirstVolumeW, FindNextVolumeW, FindVolumeClose, GetDriveTypeW, GetVolumeInformationW, - GetVolumePathNamesForVolumeNameW, QueryDosDeviceW, -}; - use number_prefix::NumberPrefix; use std::cell::Cell; use std::collections::HashMap; @@ -32,41 +27,11 @@ use std::ffi::CString; #[cfg(unix)] use std::mem; -#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] -use libc::c_int; -#[cfg(target_vendor = "apple")] -use libc::statfs; -#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] -use std::ffi::CStr; -#[cfg(any(target_vendor = "apple", target_os = "freebsd", target_os = "windows"))] -use std::ptr; -#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] -use std::slice; - #[cfg(target_os = "freebsd")] -use libc::{c_char, fsid_t, uid_t}; +use uucore::libc::{c_char, fsid_t, uid_t}; -#[cfg(target_os = "linux")] -use std::fs::File; -#[cfg(target_os = "linux")] -use std::io::{BufRead, BufReader}; - -#[cfg(windows)] -use std::ffi::OsString; -#[cfg(windows)] -use std::os::windows::ffi::OsStrExt; -#[cfg(windows)] -use std::os::windows::ffi::OsStringExt; #[cfg(windows)] use std::path::Path; -#[cfg(windows)] -use winapi::shared::minwindef::DWORD; -#[cfg(windows)] -use winapi::um::fileapi::GetDiskFreeSpaceW; -#[cfg(windows)] -use winapi::um::handleapi::INVALID_HANDLE_VALUE; -#[cfg(windows)] -use winapi::um::winbase::DRIVE_REMOTE; static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Show information about the file system on which each FILE resides,\n\ @@ -75,14 +40,6 @@ static ABOUT: &str = "Show information about the file system on which each FILE static EXIT_OK: i32 = 0; static EXIT_ERR: i32 = 1; -#[cfg(windows)] -const MAX_PATH: usize = 266; - -#[cfg(target_os = "linux")] -static LINUX_MOUNTINFO: &str = "/proc/self/mountinfo"; -#[cfg(target_os = "linux")] -static LINUX_MTAB: &str = "/etc/mtab"; - static OPT_ALL: &str = "all"; static OPT_BLOCKSIZE: &str = "blocksize"; static OPT_DIRECT: &str = "direct"; @@ -101,8 +58,6 @@ static OPT_TYPE: &str = "type"; static OPT_PRINT_TYPE: &str = "print-type"; static OPT_EXCLUDE_TYPE: &str = "exclude-type"; -static MOUNT_OPT_BIND: &str = "bind"; - /// Store names of file systems as a selector. /// Note: `exclude` takes priority over `include`. struct FsSelector { @@ -121,136 +76,16 @@ struct Options { fs_selector: FsSelector, } -#[derive(Debug, Clone)] -struct MountInfo { - // it stores `volume_name` in windows platform and `dev_id` in unix platform - dev_id: String, - dev_name: String, - fs_type: String, - mount_dir: String, - mount_option: String, // we only care "bind" option - mount_root: String, - remote: bool, - dummy: bool, -} - -#[cfg(all( - target_os = "freebsd", - not(all(target_vendor = "apple", target_arch = "x86_64")) -))] -#[repr(C)] -#[derive(Copy, Clone)] -#[allow(non_camel_case_types)] -struct statfs { - f_version: u32, - f_type: u32, - f_flags: u64, - f_bsize: u64, - f_iosize: u64, - f_blocks: u64, - f_bfree: u64, - f_bavail: i64, - f_files: u64, - f_ffree: i64, - f_syncwrites: u64, - f_asyncwrites: u64, - f_syncreads: u64, - f_asyncreads: u64, - f_spare: [u64; 10usize], - f_namemax: u32, - f_owner: uid_t, - f_fsid: fsid_t, - f_charspare: [c_char; 80usize], - f_fstypename: [c_char; 16usize], - f_mntfromname: [c_char; 88usize], - f_mntonname: [c_char; 88usize], -} - -#[derive(Debug, Clone)] -struct FsUsage { - blocksize: u64, - blocks: u64, - bfree: u64, - bavail: u64, - bavail_top_bit_set: bool, - files: u64, - ffree: u64, -} - #[derive(Debug, Clone)] struct Filesystem { mountinfo: MountInfo, usage: FsUsage, } -#[cfg(windows)] -macro_rules! String2LPWSTR { - ($str: expr) => { - OsString::from($str.clone()) - .as_os_str() - .encode_wide() - .chain(Some(0)) - .collect::>() - .as_ptr() - }; -} - -#[cfg(windows)] -#[allow(non_snake_case)] -fn LPWSTR2String(buf: &[u16]) -> String { - let len = unsafe { libc::wcslen(buf.as_ptr()) }; - OsString::from_wide(&buf[..len as usize]) - .into_string() - .unwrap() -} - fn get_usage() -> String { format!("{0} [OPTION]... [FILE]...", executable!()) } -#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] -extern "C" { - #[cfg(all(target_vendor = "apple", target_arch = "x86_64"))] - #[link_name = "getmntinfo$INODE64"] - fn getmntinfo(mntbufp: *mut *mut statfs, flags: c_int) -> c_int; - - #[cfg(any( - all(target_os = "freebsd"), - all(target_vendor = "apple", target_arch = "aarch64") - ))] - fn getmntinfo(mntbufp: *mut *mut statfs, flags: c_int) -> c_int; -} - -#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] -impl From for MountInfo { - fn from(statfs: statfs) -> Self { - let mut info = MountInfo { - dev_id: "".to_string(), - dev_name: unsafe { - CStr::from_ptr(&statfs.f_mntfromname[0]) - .to_string_lossy() - .into_owned() - }, - fs_type: unsafe { - CStr::from_ptr(&statfs.f_fstypename[0]) - .to_string_lossy() - .into_owned() - }, - mount_dir: unsafe { - CStr::from_ptr(&statfs.f_mntonname[0]) - .to_string_lossy() - .into_owned() - }, - mount_root: "".to_string(), - mount_option: "".to_string(), - remote: false, - dummy: false, - }; - info.set_missing_fields(); - info - } -} - impl FsSelector { fn new() -> FsSelector { FsSelector { @@ -295,239 +130,6 @@ impl Options { } } -impl MountInfo { - fn set_missing_fields(&mut self) { - #[cfg(unix)] - { - // We want to keep the dev_id on Windows - // but set dev_id - let path = CString::new(self.mount_dir.clone()).unwrap(); - unsafe { - let mut stat = mem::zeroed(); - if libc::stat(path.as_ptr(), &mut stat) == 0 { - self.dev_id = (stat.st_dev as i32).to_string(); - } else { - self.dev_id = "".to_string(); - } - } - } - // set MountInfo::dummy - match self.fs_type.as_ref() { - "autofs" | "proc" | "subfs" - /* for Linux 2.6/3.x */ - | "debugfs" | "devpts" | "fusectl" | "mqueue" | "rpc_pipefs" | "sysfs" - /* FreeBSD, Linux 2.4 */ - | "devfs" - /* for NetBSD 3.0 */ - | "kernfs" - /* for Irix 6.5 */ - | "ignore" => self.dummy = true, - _ => self.dummy = self.fs_type == "none" - && self.mount_option.find(MOUNT_OPT_BIND).is_none(), - } - // set MountInfo::remote - #[cfg(windows)] - { - self.remote = DRIVE_REMOTE == unsafe { GetDriveTypeW(String2LPWSTR!(self.mount_root)) }; - } - #[cfg(unix)] - { - if self.dev_name.find(':').is_some() - || (self.dev_name.starts_with("//") && self.fs_type == "smbfs" - || self.fs_type == "cifs") - || self.dev_name == "-hosts" - { - self.remote = true; - } else { - self.remote = false; - } - } - } - - #[cfg(target_os = "linux")] - fn new(file_name: &str, raw: Vec<&str>) -> Option { - match file_name { - // Format: 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue - // "man proc" for more details - "/proc/self/mountinfo" => { - let mut m = MountInfo { - dev_id: "".to_string(), - dev_name: raw[9].to_string(), - fs_type: raw[8].to_string(), - mount_root: raw[3].to_string(), - mount_dir: raw[4].to_string(), - mount_option: raw[5].to_string(), - remote: false, - dummy: false, - }; - m.set_missing_fields(); - Some(m) - } - "/etc/mtab" => { - let mut m = MountInfo { - dev_id: "".to_string(), - dev_name: raw[0].to_string(), - fs_type: raw[2].to_string(), - mount_root: "".to_string(), - mount_dir: raw[1].to_string(), - mount_option: raw[3].to_string(), - remote: false, - dummy: false, - }; - m.set_missing_fields(); - Some(m) - } - _ => None, - } - } - #[cfg(windows)] - fn new(mut volume_name: String) -> Option { - let mut dev_name_buf = [0u16; MAX_PATH]; - volume_name.pop(); - unsafe { - QueryDosDeviceW( - OsString::from(volume_name.clone()) - .as_os_str() - .encode_wide() - .chain(Some(0)) - .skip(4) - .collect::>() - .as_ptr(), - dev_name_buf.as_mut_ptr(), - dev_name_buf.len() as DWORD, - ) - }; - volume_name.push('\\'); - let dev_name = LPWSTR2String(&dev_name_buf); - - let mut mount_root_buf = [0u16; MAX_PATH]; - let success = unsafe { - GetVolumePathNamesForVolumeNameW( - String2LPWSTR!(volume_name), - mount_root_buf.as_mut_ptr(), - mount_root_buf.len() as DWORD, - ptr::null_mut(), - ) - }; - if 0 == success { - // TODO: support the case when `GetLastError()` returns `ERROR_MORE_DATA` - return None; - } - let mount_root = LPWSTR2String(&mount_root_buf); - - let mut fs_type_buf = [0u16; MAX_PATH]; - let success = unsafe { - GetVolumeInformationW( - String2LPWSTR!(mount_root), - ptr::null_mut(), - 0 as DWORD, - ptr::null_mut(), - ptr::null_mut(), - ptr::null_mut(), - fs_type_buf.as_mut_ptr(), - fs_type_buf.len() as DWORD, - ) - }; - let fs_type = if 0 != success { - Some(LPWSTR2String(&fs_type_buf)) - } else { - None - }; - let mut mn_info = MountInfo { - dev_id: volume_name, - dev_name, - fs_type: fs_type.unwrap_or_else(|| "".to_string()), - mount_root, - mount_dir: "".to_string(), - mount_option: "".to_string(), - remote: false, - dummy: false, - }; - mn_info.set_missing_fields(); - Some(mn_info) - } -} - -impl FsUsage { - #[cfg(unix)] - fn new(statvfs: libc::statvfs) -> FsUsage { - { - FsUsage { - blocksize: if statvfs.f_frsize != 0 { - statvfs.f_frsize as u64 - } else { - statvfs.f_bsize as u64 - }, - blocks: statvfs.f_blocks as u64, - bfree: statvfs.f_bfree as u64, - bavail: statvfs.f_bavail as u64, - bavail_top_bit_set: ((statvfs.f_bavail as u64) & (1u64.rotate_right(1))) != 0, - files: statvfs.f_files as u64, - ffree: statvfs.f_ffree as u64, - } - } - } - #[cfg(not(unix))] - fn new(path: &Path) -> FsUsage { - let mut root_path = [0u16; MAX_PATH]; - let success = unsafe { - GetVolumePathNamesForVolumeNameW( - //path_utf8.as_ptr(), - String2LPWSTR!(path.as_os_str()), - root_path.as_mut_ptr(), - root_path.len() as DWORD, - ptr::null_mut(), - ) - }; - if 0 == success { - crash!( - EXIT_ERR, - "GetVolumePathNamesForVolumeNameW failed: {}", - unsafe { GetLastError() } - ); - } - - let mut sectors_per_cluster = 0; - let mut bytes_per_sector = 0; - let mut number_of_free_clusters = 0; - let mut total_number_of_clusters = 0; - - let success = unsafe { - GetDiskFreeSpaceW( - String2LPWSTR!(path.as_os_str()), - &mut sectors_per_cluster, - &mut bytes_per_sector, - &mut number_of_free_clusters, - &mut total_number_of_clusters, - ) - }; - if 0 == success { - // Fails in case of CD for example - //crash!(EXIT_ERR, "GetDiskFreeSpaceW failed: {}", unsafe { - //GetLastError() - //}); - } - - let bytes_per_cluster = sectors_per_cluster as u64 * bytes_per_sector as u64; - FsUsage { - // f_bsize File system block size. - blocksize: bytes_per_cluster as u64, - // f_blocks - Total number of blocks on the file system, in units of f_frsize. - // frsize = Fundamental file system block size (fragment size). - blocks: total_number_of_clusters as u64, - // Total number of free blocks. - bfree: number_of_free_clusters as u64, - // Total number of free blocks available to non-privileged processes. - bavail: 0 as u64, - bavail_top_bit_set: ((bytes_per_sector as u64) & (1u64.rotate_right(1))) != 0, - // Total number of file nodes (inodes) on the file system. - files: 0 as u64, // Not available on windows - // Total number of free file nodes (inodes). - ffree: 4096 as u64, // Meaningless on Windows - } - } -} - impl Filesystem { // TODO: resolve uuid in `mountinfo.dev_name` if exists fn new(mountinfo: MountInfo) -> Option { @@ -548,7 +150,7 @@ impl Filesystem { unsafe { let path = CString::new(_stat_path).unwrap(); let mut statvfs = mem::zeroed(); - if libc::statvfs(path.as_ptr(), &mut statvfs) < 0 { + if statfs_fn(path.as_ptr(), &mut statvfs) < 0 { None } else { Some(Filesystem { @@ -565,80 +167,6 @@ impl Filesystem { } } -/// Read file system list. -fn read_fs_list() -> Vec { - #[cfg(target_os = "linux")] - { - let (file_name, fobj) = File::open(LINUX_MOUNTINFO) - .map(|f| (LINUX_MOUNTINFO, f)) - .or_else(|_| File::open(LINUX_MTAB).map(|f| (LINUX_MTAB, f))) - .expect("failed to find mount list files"); - let reader = BufReader::new(fobj); - reader - .lines() - .filter_map(|line| line.ok()) - .filter_map(|line| { - let raw_data = line.split_whitespace().collect::>(); - MountInfo::new(file_name, raw_data) - }) - .collect::>() - } - #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] - { - let mut mptr: *mut statfs = ptr::null_mut(); - let len = unsafe { getmntinfo(&mut mptr, 1 as c_int) }; - if len < 0 { - crash!(EXIT_ERR, "getmntinfo failed"); - } - let mounts = unsafe { slice::from_raw_parts(mptr, len as usize) }; - mounts - .iter() - .map(|m| MountInfo::from(*m)) - .collect::>() - } - #[cfg(windows)] - { - let mut volume_name_buf = [0u16; MAX_PATH]; - // As recommended in the MS documentation, retrieve the first volume before the others - let find_handle = unsafe { - FindFirstVolumeW(volume_name_buf.as_mut_ptr(), volume_name_buf.len() as DWORD) - }; - if INVALID_HANDLE_VALUE == find_handle { - crash!(EXIT_ERR, "FindFirstVolumeW failed: {}", unsafe { - GetLastError() - }); - } - let mut mounts = Vec::::new(); - loop { - let volume_name = LPWSTR2String(&volume_name_buf); - if !volume_name.starts_with("\\\\?\\") || !volume_name.ends_with('\\') { - show_warning!("A bad path was skipped: {}", volume_name); - continue; - } - if let Some(m) = MountInfo::new(volume_name) { - mounts.push(m); - } - if 0 == unsafe { - FindNextVolumeW( - find_handle, - volume_name_buf.as_mut_ptr(), - volume_name_buf.len() as DWORD, - ) - } { - let err = unsafe { GetLastError() }; - if err != winapi::shared::winerror::ERROR_NO_MORE_FILES { - crash!(EXIT_ERR, "FindNextVolumeW failed: {}", err); - } - break; - } - } - unsafe { - FindVolumeClose(find_handle); - } - mounts - } -} - fn filter_mount_list(vmi: Vec, paths: &[String], opt: &Options) -> Vec { vmi.into_iter() .filter_map(|mi| { diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 51bb4c66e..da51f7ca4 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -29,6 +29,9 @@ time = { version="<= 0.1.43", optional=true } data-encoding = { version="~2.1", optional=true } ## data-encoding: require v2.1; but v2.2.0 breaks the build for MinSRV v1.31.0 libc = { version="0.2.15, <= 0.2.85", optional=true } ## libc: initial utmp support added in v0.2.15; but v0.2.68 breaks the build for MinSRV v1.31.0 +[target.'cfg(target_os = "windows")'.dependencies] +winapi = { version = "0.3", features = ["errhandlingapi", "fileapi", "handleapi", "winerror"] } + [target.'cfg(target_os = "redox")'.dependencies] termion = "1.5" diff --git a/src/uucore/src/lib/features/fsext.rs b/src/uucore/src/lib/features/fsext.rs index 3c95af73e..887c31e01 100644 --- a/src/uucore/src/lib/features/fsext.rs +++ b/src/uucore/src/lib/features/fsext.rs @@ -1,6 +1,8 @@ // This file is part of the uutils coreutils package. // // (c) Jian Zeng +// (c) Fangxu Hu +// (c) Sylvestre Ledru // // For the full copyright and license information, please view the LICENSE file // that was distributed with this source code. @@ -12,19 +14,106 @@ extern crate time; pub use crate::*; // import macros from `../../macros.rs` #[cfg(target_os = "linux")] -static LINUX_MTAB: &str = "/etc/mtab"; +const LINUX_MTAB: &str = "/etc/mtab"; #[cfg(target_os = "linux")] -static LINUX_MOUNTINFO: &str = "/proc/self/mountinfo"; +const LINUX_MOUNTINFO: &str = "/proc/self/mountinfo"; static MOUNT_OPT_BIND: &str = "bind"; +#[cfg(windows)] +const MAX_PATH: usize = 266; +#[cfg(not(unix))] +static EXIT_ERR: i32 = 1; + +#[cfg(windows)] +use std::ffi::OsString; +#[cfg(windows)] +use std::os::windows::ffi::OsStrExt; +#[cfg(windows)] +use std::os::windows::ffi::OsStringExt; +#[cfg(windows)] +use winapi::shared::minwindef::DWORD; +#[cfg(windows)] +use winapi::um::errhandlingapi::GetLastError; +#[cfg(windows)] +use winapi::um::fileapi::GetDiskFreeSpaceW; +#[cfg(windows)] +use winapi::um::fileapi::{ + FindFirstVolumeW, FindNextVolumeW, FindVolumeClose, GetDriveTypeW, GetVolumeInformationW, + GetVolumePathNamesForVolumeNameW, QueryDosDeviceW, +}; +#[cfg(windows)] +use winapi::um::handleapi::INVALID_HANDLE_VALUE; +#[cfg(windows)] +use winapi::um::winbase::DRIVE_REMOTE; + +#[cfg(windows)] +macro_rules! String2LPWSTR { + ($str: expr) => { + OsString::from($str.clone()) + .as_os_str() + .encode_wide() + .chain(Some(0)) + .collect::>() + .as_ptr() + }; +} + +#[cfg(windows)] +#[allow(non_snake_case)] +fn LPWSTR2String(buf: &[u16]) -> String { + let len = unsafe { libc::wcslen(buf.as_ptr()) }; + OsString::from_wide(&buf[..len as usize]) + .into_string() + .unwrap() +} use self::time::Timespec; -pub use libc::{ - c_int, mode_t, strerror, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, - S_IFSOCK, S_IRGRP, S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, - S_IXGRP, S_IXOTH, S_IXUSR, +#[cfg(unix)] +use libc::{ + mode_t, strerror, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, }; +use std::borrow::Cow; +use std::convert::{AsRef, From}; +#[cfg(unix)] +use std::ffi::CString; +#[cfg(unix)] +use std::io::Error as IOError; +#[cfg(unix)] +use std::mem; +use std::path::Path; use std::time::UNIX_EPOCH; +#[cfg(any( + target_os = "linux", + target_vendor = "apple", + target_os = "android", + target_os = "freebsd" +))] +pub use libc::statfs as Sstatfs; +#[cfg(any( + target_os = "openbsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "bitrig", + target_os = "dragonfly" +))] +pub use libc::statvfs as Sstatfs; + +#[cfg(any( + target_os = "linux", + target_vendor = "apple", + target_os = "android", + target_os = "freebsd" +))] +pub use libc::statfs as statfs_fn; +#[cfg(any( + target_os = "openbsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "bitrig", + target_os = "dragonfly" +))] +pub use libc::statvfs as statfs_fn; + pub trait BirthTime { fn pretty_birth(&self) -> String; fn birth(&self) -> String; @@ -49,78 +138,389 @@ impl BirthTime for Metadata { } } -pub fn pretty_time(sec: i64, nsec: i64) -> String { - // sec == seconds since UNIX_EPOCH - // nsec == nanoseconds since (UNIX_EPOCH + sec) - let tm = time::at(Timespec::new(sec, nsec as i32)); - let res = time::strftime("%Y-%m-%d %H:%M:%S.%f %z", &tm).unwrap(); - if res.ends_with(" -0000") { - res.replace(" -0000", " +0000") - } else { - res - } +#[derive(Debug, Clone)] +pub struct MountInfo { + // it stores `volume_name` in windows platform and `dev_id` in unix platform + pub dev_id: String, + pub dev_name: String, + pub fs_type: String, + pub mount_dir: String, + pub mount_option: String, // we only care "bind" option + pub mount_root: String, + pub remote: bool, + pub dummy: bool, } -pub fn pretty_filetype<'a>(mode: mode_t, size: u64) -> &'a str { - match mode & S_IFMT { - S_IFREG => { - if size != 0 { - "regular file" - } else { - "regular empty file" +impl MountInfo { + fn set_missing_fields(&mut self) { + #[cfg(unix)] + { + // We want to keep the dev_id on Windows + // but set dev_id + let path = CString::new(self.mount_dir.clone()).unwrap(); + unsafe { + let mut stat = mem::zeroed(); + if libc::stat(path.as_ptr(), &mut stat) == 0 { + self.dev_id = (stat.st_dev as i32).to_string(); + } else { + self.dev_id = "".to_string(); + } } } - S_IFDIR => "directory", - S_IFLNK => "symbolic link", - S_IFCHR => "character special file", - S_IFBLK => "block special file", - S_IFIFO => "fifo", - S_IFSOCK => "socket", - // TODO: Other file types - // See coreutils/gnulib/lib/file-type.c - _ => "weird file", + // set MountInfo::dummy + match self.fs_type.as_ref() { + "autofs" | "proc" | "subfs" + /* for Linux 2.6/3.x */ + | "debugfs" | "devpts" | "fusectl" | "mqueue" | "rpc_pipefs" | "sysfs" + /* FreeBSD, Linux 2.4 */ + | "devfs" + /* for NetBSD 3.0 */ + | "kernfs" + /* for Irix 6.5 */ + | "ignore" => self.dummy = true, + _ => self.dummy = self.fs_type == "none" + && self.mount_option.find(MOUNT_OPT_BIND).is_none(), + } + // set MountInfo::remote + #[cfg(windows)] + { + self.remote = DRIVE_REMOTE == unsafe { GetDriveTypeW(String2LPWSTR!(self.mount_root)) }; + } + #[cfg(unix)] + { + if self.dev_name.find(':').is_some() + || (self.dev_name.starts_with("//") && self.fs_type == "smbfs" + || self.fs_type == "cifs") + || self.dev_name == "-hosts" + { + self.remote = true; + } else { + self.remote = false; + } + } + } + + #[cfg(target_os = "linux")] + fn new(file_name: &str, raw: Vec<&str>) -> Option { + match file_name { + // Format: 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + // "man proc" for more details + LINUX_MOUNTINFO => { + let mut m = MountInfo { + dev_id: "".to_string(), + dev_name: raw[9].to_string(), + fs_type: raw[8].to_string(), + mount_root: raw[3].to_string(), + mount_dir: raw[4].to_string(), + mount_option: raw[5].to_string(), + remote: false, + dummy: false, + }; + m.set_missing_fields(); + Some(m) + } + LINUX_MTAB => { + let mut m = MountInfo { + dev_id: "".to_string(), + dev_name: raw[0].to_string(), + fs_type: raw[2].to_string(), + mount_root: "".to_string(), + mount_dir: raw[1].to_string(), + mount_option: raw[3].to_string(), + remote: false, + dummy: false, + }; + m.set_missing_fields(); + Some(m) + } + _ => None, + } + } + #[cfg(windows)] + fn new(mut volume_name: String) -> Option { + let mut dev_name_buf = [0u16; MAX_PATH]; + volume_name.pop(); + unsafe { + QueryDosDeviceW( + OsString::from(volume_name.clone()) + .as_os_str() + .encode_wide() + .chain(Some(0)) + .skip(4) + .collect::>() + .as_ptr(), + dev_name_buf.as_mut_ptr(), + dev_name_buf.len() as DWORD, + ) + }; + volume_name.push('\\'); + let dev_name = LPWSTR2String(&dev_name_buf); + + let mut mount_root_buf = [0u16; MAX_PATH]; + let success = unsafe { + GetVolumePathNamesForVolumeNameW( + String2LPWSTR!(volume_name), + mount_root_buf.as_mut_ptr(), + mount_root_buf.len() as DWORD, + ptr::null_mut(), + ) + }; + if 0 == success { + // TODO: support the case when `GetLastError()` returns `ERROR_MORE_DATA` + return None; + } + let mount_root = LPWSTR2String(&mount_root_buf); + + let mut fs_type_buf = [0u16; MAX_PATH]; + let success = unsafe { + GetVolumeInformationW( + String2LPWSTR!(mount_root), + ptr::null_mut(), + 0 as DWORD, + ptr::null_mut(), + ptr::null_mut(), + ptr::null_mut(), + fs_type_buf.as_mut_ptr(), + fs_type_buf.len() as DWORD, + ) + }; + let fs_type = if 0 != success { + Some(LPWSTR2String(&fs_type_buf)) + } else { + None + }; + let mut mn_info = MountInfo { + dev_id: volume_name, + dev_name, + fs_type: fs_type.unwrap_or_else(|| "".to_string()), + mount_root, + mount_dir: "".to_string(), + mount_option: "".to_string(), + remote: false, + dummy: false, + }; + mn_info.set_missing_fields(); + Some(mn_info) } } -use std::borrow::Cow; -use std::convert::{AsRef, From}; -use std::ffi::CString; -use std::io::Error as IOError; -use std::mem; -use std::path::Path; +#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] +use std::ffi::CStr; +#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] +impl From for MountInfo { + fn from(statfs: Sstatfs) -> Self { + let mut info = MountInfo { + dev_id: "".to_string(), + dev_name: unsafe { + CStr::from_ptr(&statfs.f_mntfromname[0]) + .to_string_lossy() + .into_owned() + }, + fs_type: unsafe { + CStr::from_ptr(&statfs.f_fstypename[0]) + .to_string_lossy() + .into_owned() + }, + mount_dir: unsafe { + CStr::from_ptr(&statfs.f_mntonname[0]) + .to_string_lossy() + .into_owned() + }, + mount_root: "".to_string(), + mount_option: "".to_string(), + remote: false, + dummy: false, + }; + info.set_missing_fields(); + info + } +} -#[cfg(any( - target_os = "linux", - target_vendor = "apple", - target_os = "android", - target_os = "freebsd" -))] -use libc::statfs as Sstatfs; -#[cfg(any( - target_os = "openbsd", - target_os = "netbsd", - target_os = "openbsd", - target_os = "bitrig", - target_os = "dragonfly" -))] -use libc::statvfs as Sstatfs; +#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] +use libc::c_int; +#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] +extern "C" { + #[cfg(all(target_vendor = "apple", target_arch = "x86_64"))] + #[link_name = "getmntinfo$INODE64"] + fn getmntinfo(mntbufp: *mut *mut Sstatfs, flags: c_int) -> c_int; -#[cfg(any( - target_os = "linux", - target_vendor = "apple", - target_os = "android", - target_os = "freebsd" -))] -use libc::statfs as statfs_fn; -#[cfg(any( - target_os = "openbsd", - target_os = "netbsd", - target_os = "openbsd", - target_os = "bitrig", - target_os = "dragonfly" -))] -use libc::statvfs as statfs_fn; + #[cfg(any( + all(target_os = "freebsd"), + all(target_vendor = "apple", target_arch = "aarch64") + ))] + fn getmntinfo(mntbufp: *mut *mut Sstatfs, flags: c_int) -> c_int; +} +#[cfg(target_os = "linux")] +use std::fs::File; +#[cfg(target_os = "linux")] +use std::io::{BufRead, BufReader}; +#[cfg(any(target_vendor = "apple", target_os = "freebsd", target_os = "windows"))] +use std::ptr; +#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] +use std::slice; +/// Read file system list. +pub fn read_fs_list() -> Vec { + #[cfg(target_os = "linux")] + { + let (file_name, fobj) = File::open(LINUX_MOUNTINFO) + .map(|f| (LINUX_MOUNTINFO, f)) + .or_else(|_| File::open(LINUX_MTAB).map(|f| (LINUX_MTAB, f))) + .expect("failed to find mount list files"); + let reader = BufReader::new(fobj); + reader + .lines() + .filter_map(|line| line.ok()) + .filter_map(|line| { + let raw_data = line.split_whitespace().collect::>(); + MountInfo::new(file_name, raw_data) + }) + .collect::>() + } + #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] + { + let mut mptr: *mut Sstatfs = ptr::null_mut(); + let len = unsafe { getmntinfo(&mut mptr, 1_i32) }; + if len < 0 { + crash!(1, "getmntinfo failed"); + } + let mounts = unsafe { slice::from_raw_parts(mptr, len as usize) }; + mounts + .iter() + .map(|m| MountInfo::from(*m)) + .collect::>() + } + #[cfg(windows)] + { + let mut volume_name_buf = [0u16; MAX_PATH]; + // As recommended in the MS documentation, retrieve the first volume before the others + let find_handle = unsafe { + FindFirstVolumeW(volume_name_buf.as_mut_ptr(), volume_name_buf.len() as DWORD) + }; + if INVALID_HANDLE_VALUE == find_handle { + crash!(EXIT_ERR, "FindFirstVolumeW failed: {}", unsafe { + GetLastError() + }); + } + let mut mounts = Vec::::new(); + loop { + let volume_name = LPWSTR2String(&volume_name_buf); + if !volume_name.starts_with("\\\\?\\") || !volume_name.ends_with('\\') { + show_warning!("A bad path was skipped: {}", volume_name); + continue; + } + if let Some(m) = MountInfo::new(volume_name) { + mounts.push(m); + } + if 0 == unsafe { + FindNextVolumeW( + find_handle, + volume_name_buf.as_mut_ptr(), + volume_name_buf.len() as DWORD, + ) + } { + let err = unsafe { GetLastError() }; + if err != winapi::shared::winerror::ERROR_NO_MORE_FILES { + crash!(EXIT_ERR, "FindNextVolumeW failed: {}", err); + } + break; + } + } + unsafe { + FindVolumeClose(find_handle); + } + mounts + } +} + +#[derive(Debug, Clone)] +pub struct FsUsage { + pub blocksize: u64, + pub blocks: u64, + pub bfree: u64, + pub bavail: u64, + pub bavail_top_bit_set: bool, + pub files: u64, + pub ffree: u64, +} + +impl FsUsage { + #[cfg(unix)] + pub fn new(statvfs: Sstatfs) -> FsUsage { + { + FsUsage { + blocksize: statvfs.f_bsize as u64, // or `statvfs.f_frsize` ? + blocks: statvfs.f_blocks as u64, + bfree: statvfs.f_bfree as u64, + bavail: statvfs.f_bavail as u64, + bavail_top_bit_set: ((statvfs.f_bavail as u64) & (1u64.rotate_right(1))) != 0, + files: statvfs.f_files as u64, + ffree: statvfs.f_ffree as u64, + } + } + } + #[cfg(not(unix))] + pub fn new(path: &Path) -> FsUsage { + let mut root_path = [0u16; MAX_PATH]; + let success = unsafe { + GetVolumePathNamesForVolumeNameW( + //path_utf8.as_ptr(), + String2LPWSTR!(path.as_os_str()), + root_path.as_mut_ptr(), + root_path.len() as DWORD, + ptr::null_mut(), + ) + }; + if 0 == success { + crash!( + EXIT_ERR, + "GetVolumePathNamesForVolumeNameW failed: {}", + unsafe { GetLastError() } + ); + } + + let mut sectors_per_cluster = 0; + let mut bytes_per_sector = 0; + let mut number_of_free_clusters = 0; + let mut total_number_of_clusters = 0; + + let success = unsafe { + GetDiskFreeSpaceW( + String2LPWSTR!(path.as_os_str()), + &mut sectors_per_cluster, + &mut bytes_per_sector, + &mut number_of_free_clusters, + &mut total_number_of_clusters, + ) + }; + if 0 == success { + // Fails in case of CD for example + //crash!(EXIT_ERR, "GetDiskFreeSpaceW failed: {}", unsafe { + //GetLastError() + //}); + } + + let bytes_per_cluster = sectors_per_cluster as u64 * bytes_per_sector as u64; + FsUsage { + // f_bsize File system block size. + blocksize: bytes_per_cluster as u64, + // f_blocks - Total number of blocks on the file system, in units of f_frsize. + // frsize = Fundamental file system block size (fragment size). + blocks: total_number_of_clusters as u64, + // Total number of free blocks. + bfree: number_of_free_clusters as u64, + // Total number of free blocks available to non-privileged processes. + bavail: 0 as u64, + bavail_top_bit_set: ((bytes_per_sector as u64) & (1u64.rotate_right(1))) != 0, + // Total number of file nodes (inodes) on the file system. + files: 0 as u64, // Not available on windows + // Total number of free file nodes (inodes). + ffree: 4096 as u64, // Meaningless on Windows + } + } +} + +#[cfg(unix)] pub trait FsMeta { fn fs_type(&self) -> i64; fn iosize(&self) -> u64; @@ -134,6 +534,7 @@ pub trait FsMeta { fn namelen(&self) -> u64; } +#[cfg(unix)] impl FsMeta for Sstatfs { fn blksize(&self) -> i64 { self.f_bsize as i64 @@ -213,6 +614,7 @@ impl FsMeta for Sstatfs { } } +#[cfg(unix)] pub fn statfs>(path: P) -> Result where Vec: From

, @@ -236,6 +638,40 @@ where } } +pub fn pretty_time(sec: i64, nsec: i64) -> String { + // sec == seconds since UNIX_EPOCH + // nsec == nanoseconds since (UNIX_EPOCH + sec) + let tm = time::at(Timespec::new(sec, nsec as i32)); + let res = time::strftime("%Y-%m-%d %H:%M:%S.%f %z", &tm).unwrap(); + if res.ends_with(" -0000") { + res.replace(" -0000", " +0000") + } else { + res + } +} + +#[cfg(unix)] +pub fn pretty_filetype<'a>(mode: mode_t, size: u64) -> &'a str { + match mode & S_IFMT { + S_IFREG => { + if size != 0 { + "regular file" + } else { + "regular empty file" + } + } + S_IFDIR => "directory", + S_IFLNK => "symbolic link", + S_IFCHR => "character special file", + S_IFBLK => "block special file", + S_IFIFO => "fifo", + S_IFSOCK => "socket", + // TODO: Other file types + // See coreutils/gnulib/lib/file-type.c + _ => "weird file", + } +} + pub fn pretty_fstype<'a>(fstype: i64) -> Cow<'a, str> { match fstype { 0x6163_6673 => "acfs".into(), @@ -356,192 +792,12 @@ pub fn pretty_fstype<'a>(fstype: i64) -> Cow<'a, str> { } } -#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] -extern "C" { - #[cfg(all(target_vendor = "apple", target_arch = "x86_64"))] - #[link_name = "getmntinfo$INODE64"] - fn getmntinfo(mntbufp: *mut *mut Sstatfs, flags: c_int) -> c_int; - - #[cfg(any( - all(target_os = "freebsd"), - all(target_vendor = "apple", target_arch = "aarch64") - ))] - fn getmntinfo(mntbufp: *mut *mut Sstatfs, flags: c_int) -> c_int; -} - -#[derive(Debug, Clone)] -pub struct MountInfo { - // it stores `volume_name` in windows platform and `dev_id` in unix platform - dev_id: String, - dev_name: String, - fs_type: String, - pub mount_dir: String, - mount_option: String, // we only care "bind" option - mount_root: String, - remote: bool, - dummy: bool, -} - -impl MountInfo { - fn set_missing_fields(&mut self) { - #[cfg(unix)] - { - // We want to keep the dev_id on Windows - // but set dev_id - let path = CString::new(self.mount_dir.clone()).unwrap(); - unsafe { - let mut stat = mem::zeroed(); - if libc::stat(path.as_ptr(), &mut stat) == 0 { - self.dev_id = (stat.st_dev as i32).to_string(); - } else { - self.dev_id = "".to_string(); - } - } - } - // set MountInfo::dummy - match self.fs_type.as_ref() { - "autofs" | "proc" | "subfs" - /* for Linux 2.6/3.x */ - | "debugfs" | "devpts" | "fusectl" | "mqueue" | "rpc_pipefs" | "sysfs" - /* FreeBSD, Linux 2.4 */ - | "devfs" - /* for NetBSD 3.0 */ - | "kernfs" - /* for Irix 6.5 */ - | "ignore" => self.dummy = true, - _ => self.dummy = self.fs_type == "none" - && self.mount_option.find(MOUNT_OPT_BIND).is_none(), - } - // set MountInfo::remote - #[cfg(unix)] - { - if self.dev_name.find(':').is_some() - || (self.dev_name.starts_with("//") && self.fs_type == "smbfs" - || self.fs_type == "cifs") - || self.dev_name == "-hosts" - { - self.remote = true; - } else { - self.remote = false; - } - } - } - - #[cfg(target_os = "linux")] - fn new(file_name: &str, raw: Vec<&str>) -> Option { - match file_name { - // Format: 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue - // "man proc" for more details - "/proc/self/mountinfo" => { - let mut m = MountInfo { - dev_id: "".to_string(), - dev_name: raw[9].to_string(), - fs_type: raw[8].to_string(), - mount_root: raw[3].to_string(), - mount_dir: raw[4].to_string(), - mount_option: raw[5].to_string(), - remote: false, - dummy: false, - }; - m.set_missing_fields(); - Some(m) - } - "/etc/mtab" => { - let mut m = MountInfo { - dev_id: "".to_string(), - dev_name: raw[0].to_string(), - fs_type: raw[2].to_string(), - mount_root: "".to_string(), - mount_dir: raw[1].to_string(), - mount_option: raw[3].to_string(), - remote: false, - dummy: false, - }; - m.set_missing_fields(); - Some(m) - } - _ => None, - } - } -} - -#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] -use std::ffi::CStr; -#[cfg(any(target_os = "freebsd", target_vendor = "apple"))] -impl From for MountInfo { - fn from(statfs: Sstatfs) -> Self { - let mut info = MountInfo { - dev_id: "".to_string(), - dev_name: unsafe { - CStr::from_ptr(&statfs.f_mntfromname[0]) - .to_string_lossy() - .into_owned() - }, - fs_type: unsafe { - CStr::from_ptr(&statfs.f_fstypename[0]) - .to_string_lossy() - .into_owned() - }, - mount_dir: unsafe { - CStr::from_ptr(&statfs.f_mntonname[0]) - .to_string_lossy() - .into_owned() - }, - mount_root: "".to_string(), - mount_option: "".to_string(), - remote: false, - dummy: false, - }; - info.set_missing_fields(); - info - } -} - -#[cfg(target_os = "linux")] -use std::fs::File; -#[cfg(target_os = "linux")] -use std::io::{BufRead, BufReader}; -#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] -use std::ptr; -#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] -use std::slice; -pub fn read_fs_list() -> Vec { - #[cfg(target_os = "linux")] - { - let (file_name, fobj) = File::open(LINUX_MOUNTINFO) - .map(|f| (LINUX_MOUNTINFO, f)) - .or_else(|_| File::open(LINUX_MTAB).map(|f| (LINUX_MTAB, f))) - .expect("failed to find mount list files"); - let reader = BufReader::new(fobj); - reader - .lines() - .filter_map(|line| line.ok()) - .filter_map(|line| { - let raw_data = line.split_whitespace().collect::>(); - MountInfo::new(file_name, raw_data) - }) - .collect::>() - } - #[cfg(any(target_os = "freebsd", target_vendor = "apple"))] - { - let mut mptr: *mut Sstatfs = ptr::null_mut(); - let len = unsafe { getmntinfo(&mut mptr, 1 as c_int) }; - if len < 0 { - crash!(1, "getmntinfo failed"); - } - let mounts = unsafe { slice::from_raw_parts(mptr, len as usize) }; - mounts - .iter() - .map(|m| MountInfo::from(*m)) - .collect::>() - } -} - #[cfg(test)] mod tests { use super::*; #[test] + #[cfg(unix)] fn test_file_type() { assert_eq!("block special file", pretty_filetype(S_IFBLK, 0)); assert_eq!("character special file", pretty_filetype(S_IFCHR, 0)); diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index f2a4292fb..28bae08cb 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -189,6 +189,7 @@ mod tests { vec.into_iter().collect_str(handling) } + #[cfg(any(unix, target_os = "redox"))] fn test_invalid_utf8_args_lossy(os_str: &OsStr) { //assert our string is invalid utf8 assert!(os_str.to_os_string().into_string().is_err()); @@ -212,6 +213,7 @@ mod tests { ); } + #[cfg(any(unix, target_os = "redox"))] fn test_invalid_utf8_args_ignore(os_str: &OsStr) { //assert our string is invalid utf8 assert!(os_str.to_os_string().into_string().is_err()); @@ -236,7 +238,7 @@ mod tests { //create a vector containing only correct encoding let test_vec = make_os_vec(&OsString::from("test2")); //expect complete conversion without losses, even when lossy conversion is accepted - let _ = collect_os_str(test_vec.clone(), InvalidEncodingHandling::ConvertLossy) + let _ = collect_os_str(test_vec, InvalidEncodingHandling::ConvertLossy) .expect_complete("Lossy conversion not expected in this test"); } From 2ec4bee350bf5d974665d3391fc91c6d90729456 Mon Sep 17 00:00:00 2001 From: Daniel Rocco Date: Thu, 6 May 2021 08:28:54 -0400 Subject: [PATCH 048/148] test: improve handling of inverted Boolean expressions - add `==` as undocumented alias of `=` - handle negated comparison of `=` as literal - negation generally applies to only the first expression of a Boolean chain, except when combining evaluation of two literal strings --- src/uu/test/src/parser.rs | 88 ++++++++++++++++++++++++++---------- src/uu/test/src/test.rs | 2 +- tests/by-util/test_test.rs | 93 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+), 25 deletions(-) diff --git a/src/uu/test/src/parser.rs b/src/uu/test/src/parser.rs index 2c9c9db30..aa44bc5f2 100644 --- a/src/uu/test/src/parser.rs +++ b/src/uu/test/src/parser.rs @@ -33,7 +33,7 @@ impl Symbol { "(" => Symbol::LParen, "!" => Symbol::Bang, "-a" | "-o" => Symbol::BoolOp(s), - "=" | "!=" => Symbol::StringOp(s), + "=" | "==" | "!=" => Symbol::StringOp(s), "-eq" | "-ge" | "-gt" | "-le" | "-lt" | "-ne" => Symbol::IntOp(s), "-ef" | "-nt" | "-ot" => Symbol::FileOp(s), "-n" | "-z" => Symbol::StrlenOp(s), @@ -83,7 +83,7 @@ impl Symbol { /// TERM → str OP str /// TERM → str | 𝜖 /// OP → STRINGOP | INTOP | FILEOP -/// STRINGOP → = | != +/// STRINGOP → = | == | != /// INTOP → -eq | -ge | -gt | -le | -lt | -ne /// FILEOP → -ef | -nt | -ot /// STRLEN → -n | -z @@ -163,7 +163,7 @@ impl Parser { match self.peek() { // lparen is a literal when followed by nothing or comparison Symbol::None | Symbol::StringOp(_) | Symbol::IntOp(_) | Symbol::FileOp(_) => { - self.literal(Symbol::Literal(OsString::from("("))); + self.literal(Symbol::LParen.into_literal()); } // empty parenthetical Symbol::Literal(s) if s == ")" => {} @@ -183,27 +183,67 @@ impl Parser { /// /// * `! =`: negate the result of the implicit string length test of `=` /// * `! = foo`: compare the literal strings `!` and `foo` - /// * `! `: negate the result of the expression + /// * `! = = str`: negate comparison of literal `=` and `str` + /// * `!`: bang followed by nothing is literal + /// * `! EXPR`: negate the result of the expression + /// + /// Combined Boolean & negation: + /// + /// * `! ( EXPR ) [BOOLOP EXPR]`: negate the parenthesized expression only + /// * `! UOP str BOOLOP EXPR`: negate the unary subexpression + /// * `! str BOOLOP str`: negate the entire Boolean expression + /// * `! str BOOLOP EXPR BOOLOP EXPR`: negate the value of the first `str` term /// fn bang(&mut self) { - if let Symbol::StringOp(_) | Symbol::IntOp(_) | Symbol::FileOp(_) = self.peek() { - // we need to peek ahead one more token to disambiguate the first - // two cases listed above: case 1 — `! ` — and - // case 2: ` OP str`. - let peek2 = self.tokens.clone().nth(1); + match self.peek() { + Symbol::StringOp(_) | Symbol::IntOp(_) | Symbol::FileOp(_) | Symbol::BoolOp(_) => { + // we need to peek ahead one more token to disambiguate the first + // three cases listed above + let peek2 = Symbol::new(self.tokens.clone().nth(1)); - if peek2.is_none() { - // op is literal - let op = self.next_token().into_literal(); - self.stack.push(op); - self.stack.push(Symbol::Bang); - } else { - // bang is literal; parsing continues with op - self.literal(Symbol::Literal(OsString::from("!"))); + match peek2 { + // case 1: `! ` + // case 3: `! = OP str` + Symbol::StringOp(_) | Symbol::None => { + // op is literal + let op = self.next_token().into_literal(); + self.literal(op); + self.stack.push(Symbol::Bang); + } + // case 2: ` OP str [BOOLOP EXPR]`. + _ => { + // bang is literal; parsing continues with op + self.literal(Symbol::Bang.into_literal()); + self.maybe_boolop(); + } + } + } + + // bang followed by nothing is literal + Symbol::None => self.stack.push(Symbol::Bang.into_literal()), + + _ => { + // peek ahead up to 4 tokens to determine if we need to negate + // the entire expression or just the first term + let peek4: Vec = self + .tokens + .clone() + .take(4) + .map(|token| Symbol::new(Some(token))) + .collect(); + + match peek4.as_slice() { + // we peeked ahead 4 but there were only 3 tokens left + [Symbol::Literal(_), Symbol::BoolOp(_), Symbol::Literal(_)] => { + self.expr(); + self.stack.push(Symbol::Bang); + } + _ => { + self.term(); + self.stack.push(Symbol::Bang); + } + } } - } else { - self.expr(); - self.stack.push(Symbol::Bang); } } @@ -211,13 +251,14 @@ impl Parser { /// as appropriate. fn maybe_boolop(&mut self) { if self.peek_is_boolop() { - let token = self.tokens.next().unwrap(); // safe because we peeked + let symbol = self.next_token(); // BoolOp by itself interpreted as Literal if let Symbol::None = self.peek() { - self.literal(Symbol::Literal(token)) + self.literal(symbol.into_literal()); } else { - self.boolop(Symbol::BoolOp(token)) + self.boolop(symbol); + self.maybe_boolop(); } } } @@ -231,7 +272,6 @@ impl Parser { if op == Symbol::BoolOp(OsString::from("-a")) { self.term(); self.stack.push(op); - self.maybe_boolop(); } else { self.expr(); self.stack.push(op); diff --git a/src/uu/test/src/test.rs b/src/uu/test/src/test.rs index 3e97af0a6..86950ecc2 100644 --- a/src/uu/test/src/test.rs +++ b/src/uu/test/src/test.rs @@ -57,7 +57,7 @@ fn eval(stack: &mut Vec) -> Result { Some(Symbol::StringOp(op)) => { let b = stack.pop(); let a = stack.pop(); - Ok(if op == "=" { a == b } else { a != b }) + Ok(if op == "!=" { a != b } else { a == b }) } Some(Symbol::IntOp(op)) => { let b = pop_literal!(); diff --git a/tests/by-util/test_test.rs b/tests/by-util/test_test.rs index 000013d9c..0dfc0c620 100644 --- a/tests/by-util/test_test.rs +++ b/tests/by-util/test_test.rs @@ -122,6 +122,13 @@ fn test_zero_len_not_equals_zero_len_is_false() { new_ucmd!().args(&["", "!=", ""]).run().status_code(1); } +#[test] +fn test_double_equal_is_string_comparison_op() { + // undocumented but part of the GNU test suite + new_ucmd!().args(&["t", "==", "t"]).succeeds(); + new_ucmd!().args(&["t", "==", "f"]).run().status_code(1); +} + #[test] fn test_string_comparison() { let scenario = TestScenario::new(util_name!()); @@ -131,11 +138,22 @@ fn test_string_comparison() { ["(", "=", "("], ["(", "!=", ")"], ["!", "=", "!"], + ["=", "=", "="], ]; for test in &tests { scenario.ucmd().args(&test[..]).succeeds(); } + + // run the inverse of all these tests + for test in &tests { + scenario + .ucmd() + .arg("!") + .args(&test[..]) + .run() + .status_code(1); + } } #[test] @@ -485,6 +503,81 @@ fn test_op_prec_and_or_2_overridden_by_parentheses() { .status_code(1); } +#[test] +fn test_negated_boolean_precedence() { + let scenario = TestScenario::new(util_name!()); + + let tests = [ + vec!["!", "(", "foo", ")", "-o", "bar"], + vec!["!", "", "-o", "", "-a", ""], + vec!["!", "(", "", "-a", "", ")", "-o", ""], + ]; + + for test in &tests { + scenario.ucmd().args(&test[..]).succeeds(); + } + + let negative_tests = [ + vec!["!", "-n", "", "-a", ""], + vec!["", "-a", "", "-o", ""], + vec!["!", "", "-a", "", "-o", ""], + vec!["!", "(", "", "-a", "", ")", "-a", ""], + ]; + + for test in &negative_tests { + scenario.ucmd().args(&test[..]).run().status_code(1); + } +} + +#[test] +fn test_bang_boolop_precedence() { + // For a Boolean combination of two literals, bang inverts the entire expression + new_ucmd!().args(&["!", "", "-a", ""]).succeeds(); + new_ucmd!().args(&["!", "", "-o", ""]).succeeds(); + + new_ucmd!() + .args(&["!", "a value", "-o", "another value"]) + .run() + .status_code(1); + + // Introducing a UOP — even one that is equivalent to a bare string — causes + // bang to invert only the first term + new_ucmd!() + .args(&["!", "-n", "", "-a", ""]) + .run() + .status_code(1); + new_ucmd!() + .args(&["!", "", "-a", "-n", ""]) + .run() + .status_code(1); + + // for compound Boolean expressions, bang inverts the _next_ expression + // only, not the entire compound expression + new_ucmd!() + .args(&["!", "", "-a", "", "-a", ""]) + .run() + .status_code(1); + + // parentheses can override this + new_ucmd!() + .args(&["!", "(", "", "-a", "", "-a", "", ")"]) + .succeeds(); +} + +#[test] +fn test_inverted_parenthetical_boolop_precedence() { + // For a Boolean combination of two literals, bang inverts the entire expression + new_ucmd!() + .args(&["!", "a value", "-o", "another value"]) + .run() + .status_code(1); + + // only the parenthetical is inverted, not the entire expression + new_ucmd!() + .args(&["!", "(", "a value", ")", "-o", "another value"]) + .succeeds(); +} + #[test] #[ignore = "fixme: error reporting"] fn test_dangling_parenthesis() { From 6aee792a9318dd47b9c9738a3a6c440e99f6fb85 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 11 May 2021 09:29:46 +0200 Subject: [PATCH 049/148] Remove travis CI * it is redundant with github action * less integrated * fails someone for some unexpected reasons * it is blocking code coverage results ?! --- .travis.yml | 72 ------------------------------------------------- CONTRIBUTING.md | 4 --- README.md | 1 - 3 files changed, 77 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 389ba44b0..000000000 --- a/.travis.yml +++ /dev/null @@ -1,72 +0,0 @@ -language: rust - -rust: - - stable - - beta - -os: - - linux - # - osx - -env: - # sphinx v1.8.0 is bugged & fails for linux builds; so, force specific `sphinx` version - global: FEATURES='' TEST_INSTALL='' SPHINX_VERSIONED='sphinx==1.7.8' - -matrix: - allow_failures: - - rust: beta - - rust: nightly - fast_finish: true - include: - - rust: 1.40.0 - env: FEATURES=unix - # - rust: stable - # os: linux - # env: FEATURES=unix TEST_INSTALL=true - # - rust: stable - # os: osx - # env: FEATURES=macos TEST_INSTALL=true - - rust: nightly - os: linux - env: FEATURES=nightly,unix TEST_INSTALL=true - - rust: nightly - os: osx - env: FEATURES=nightly,macos TEST_INSTALL=true - - rust: nightly - os: linux - env: FEATURES=nightly,feat_os_unix_redox CC=x86_64-unknown-redox-gcc CARGO_ARGS='--no-default-features --target=x86_64-unknown-redox' REDOX=1 - -cache: - directories: - - $HOME/.cargo - -sudo: true - -before_install: - - if [ $REDOX ]; then ./.travis/redox-toolchain.sh; fi - -install: - - if [ $TRAVIS_OS_NAME = linux ]; then sudo apt-get install python-pip && sudo pip install $SPHINX_VERSIONED; fi - - | - if [ $TRAVIS_OS_NAME = osx ]; then - brew update - brew upgrade python - pip3 install $SPHINX_VERSIONED - fi - -script: - - cargo build $CARGO_ARGS --features "$FEATURES" - - if [ ! $REDOX ]; then cargo test $CARGO_ARGS -p uucore -p coreutils --features "$FEATURES" --no-fail-fast; fi - - if [ -n "$TEST_INSTALL" ]; then mkdir installdir_test; DESTDIR=installdir_test make install; [ `ls installdir_test/usr/local/bin | wc -l` -gt 0 ]; fi - -addons: - apt: - packages: - - libssl-dev - -after_success: | - if [ "$TRAVIS_OS_NAME" = linux -a "$TRAVIS_RUST_VERSION" = stable ]; then - bash <(curl https://raw.githubusercontent.com/xd009642/tarpaulin/master/travis-install.sh) - cargo tarpaulin --out Xml - bash <(curl -s https://codecov.io/bash) - fi diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bcb1f8fff..3793a0968 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -70,10 +70,6 @@ lines for non-utility modules include: README: add help ``` -``` -travis: fix build -``` - ``` uucore: add new modules ``` diff --git a/README.md b/README.md index 95dc036fd..7de4419af 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ [![LOC](https://tokei.rs/b1/github/uutils/coreutils?category=code)](https://github.com/Aaronepower/tokei) [![dependency status](https://deps.rs/repo/github/uutils/coreutils/status.svg)](https://deps.rs/repo/github/uutils/coreutils) -[![Build Status](https://api.travis-ci.org/uutils/coreutils.svg?branch=master)](https://travis-ci.org/uutils/coreutils) [![Build Status (FreeBSD)](https://api.cirrus-ci.com/github/uutils/coreutils.svg)](https://cirrus-ci.com/github/uutils/coreutils/master) [![CodeCov](https://codecov.io/gh/uutils/coreutils/branch/master/graph/badge.svg)](https://codecov.io/gh/uutils/coreutils) From b9d44facb9a8c03b968280ff4a085e863a97e829 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 11 May 2021 10:27:13 +0200 Subject: [PATCH 050/148] refresh cargo.lock with recent updates --- Cargo.lock | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e729bfcd2..a3f870fc6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -783,9 +783,9 @@ checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" [[package]] name = "js-sys" -version = "0.3.50" +version = "0.3.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d99f9e3e84b8f67f846ef5b4cbbc3b1c29f6c759fcbce6f01aa0e73d932a24c" +checksum = "83bdfbace3a0e81a4253f73b49e960b053e396a11012cbd49b9b74d6a2b67062" dependencies = [ "wasm-bindgen", ] @@ -2723,9 +2723,9 @@ checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" [[package]] name = "wasm-bindgen" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83240549659d187488f91f33c0f8547cbfef0b2088bc470c116d1d260ef623d9" +checksum = "d54ee1d4ed486f78874278e63e4069fc1ab9f6a18ca492076ffb90c5eb2997fd" dependencies = [ "cfg-if 1.0.0", "wasm-bindgen-macro", @@ -2733,9 +2733,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae70622411ca953215ca6d06d3ebeb1e915f0f6613e3b495122878d7ebec7dae" +checksum = "3b33f6a0694ccfea53d94db8b2ed1c3a8a4c86dd936b13b9f0a15ec4a451b900" dependencies = [ "bumpalo", "lazy_static", @@ -2748,9 +2748,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e734d91443f177bfdb41969de821e15c516931c3c3db3d318fa1b68975d0f6f" +checksum = "088169ca61430fe1e58b8096c24975251700e7b1f6fd91cc9d59b04fb9b18bd4" dependencies = [ "quote 1.0.9", "wasm-bindgen-macro-support", @@ -2758,9 +2758,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53739ff08c8a68b0fdbcd54c372b8ab800b1449ab3c9d706503bc7dd1621b2c" +checksum = "be2241542ff3d9f241f5e2cb6dd09b37efe786df8851c54957683a49f0987a97" dependencies = [ "proc-macro2", "quote 1.0.9", @@ -2771,15 +2771,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9a543ae66aa233d14bb765ed9af4a33e81b8b58d1584cf1b47ff8cd0b9e4489" +checksum = "d7cff876b8f18eed75a66cf49b65e7f967cb354a7aa16003fb55dbfd25b44b4f" [[package]] name = "web-sys" -version = "0.3.50" +version = "0.3.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a905d57e488fec8861446d3393670fb50d27a262344013181c2cdf9fff5481be" +checksum = "e828417b379f3df7111d3a2a9e5753706cae29c41f7c4029ee9fd77f3e09e582" dependencies = [ "js-sys", "wasm-bindgen", From 8200d399e8d5bc7d7da0d7a65f693620b73e0ed2 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Tue, 11 May 2021 23:03:59 +0200 Subject: [PATCH 051/148] date: fix format for nanoseconds --- src/uu/date/src/date.rs | 13 +++++++++---- tests/by-util/test_date.rs | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index 317fd72d4..577cba460 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -207,11 +207,16 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .alias(OPT_UNIVERSAL_2) .help("print or set Coordinated Universal Time (UTC)"), ) - .arg(Arg::with_name(OPT_FORMAT).multiple(true)) + .arg(Arg::with_name(OPT_FORMAT).multiple(false)) .get_matches_from(args); let format = if let Some(form) = matches.value_of(OPT_FORMAT) { - let form = form[1..].into(); + if !form.starts_with('+') { + eprintln!("date: invalid date ‘{}’", form); + return 1; + } + // GNU `date` uses `%N` for nano seconds, however crate::chrono uses `%f` + let form = form[1..].replace("%N", "%f"); Format::Custom(form) } else if let Some(fmt) = matches .values_of(OPT_ISO_8601) @@ -237,7 +242,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let set_to = match matches.value_of(OPT_SET).map(parse_date) { None => None, Some(Err((input, _err))) => { - eprintln!("date: invalid date '{}'", input); + eprintln!("date: invalid date ‘{}’", input); return 1; } Some(Ok(date)) => Some(date), @@ -301,7 +306,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { println!("{}", formatted); } Err((input, _err)) => { - println!("date: invalid date '{}'", input); + println!("date: invalid date ‘{}’", input); } } } diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 0ca0a74ea..464655315 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -104,6 +104,23 @@ fn test_date_format_full_day() { .stdout_matches(&re); } +#[test] +fn test_date_nano_seconds() { + // %N nanoseconds (000000000..999999999) + let re = Regex::new(r"^\d{1,9}$").unwrap(); + new_ucmd!().arg("+%N").succeeds().stdout_matches(&re); +} + +#[test] +fn test_date_format_without_plus() { + // [+FORMAT] + new_ucmd!() + .arg("%s") + .fails() + .stderr_contains("date: invalid date ‘%s’") + .code_is(1); +} + #[test] #[cfg(all(unix, not(target_os = "macos")))] fn test_date_set_valid() { From 12a43d6eb3e1ec5d6a249f548e7b05c2bfb29065 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Wed, 12 May 2021 10:21:24 +0200 Subject: [PATCH 052/148] date: fix format literal for nanoseconds --- src/uu/date/src/date.rs | 7 ++++--- tests/by-util/test_date.rs | 6 ++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index 577cba460..1fe80c03f 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -215,8 +215,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { eprintln!("date: invalid date ‘{}’", form); return 1; } - // GNU `date` uses `%N` for nano seconds, however crate::chrono uses `%f` - let form = form[1..].replace("%N", "%f"); + let form = form[1..].to_string(); Format::Custom(form) } else if let Some(fmt) = matches .values_of(OPT_ISO_8601) @@ -302,7 +301,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 { for date in dates { match date { Ok(date) => { - let formatted = date.format(format_string); + // GNU `date` uses `%N` for nano seconds, however crate::chrono uses `%f` + let format_string = &format_string.replace("%N", "%f"); + let formatted = date.format(format_string).to_string().replace("%f", "%N"); println!("{}", formatted); } Err((input, _err)) => { diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index 464655315..f4990566a 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -121,6 +121,12 @@ fn test_date_format_without_plus() { .code_is(1); } +#[test] +fn test_date_format_literal() { + new_ucmd!().arg("+%%s").succeeds().stdout_is("%s\n"); + new_ucmd!().arg("+%%N").succeeds().stdout_is("%N\n"); +} + #[test] #[cfg(all(unix, not(target_os = "macos")))] fn test_date_set_valid() { From 0669c89ef311111f1cd1d81d89032457bb2fd0c8 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 12 May 2021 14:47:45 +0200 Subject: [PATCH 053/148] refresh cargo.lock with recent updates --- Cargo.lock | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d942c04d4..7bd97c917 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1820,11 +1820,9 @@ name = "uu_df" version = "0.0.6" dependencies = [ "clap", - "libc", "number_prefix", "uucore", "uucore_procs", - "winapi 0.3.9", ] [[package]] @@ -2407,8 +2405,6 @@ name = "uu_stat" version = "0.0.6" dependencies = [ "clap", - "libc", - "time", "uucore", "uucore_procs", ] @@ -2682,6 +2678,7 @@ dependencies = [ "thiserror", "time", "wild", + "winapi 0.3.9", ] [[package]] From 3114fd77be546db6e72046d57d0abfb956486f20 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Tue, 11 May 2021 20:13:38 -0400 Subject: [PATCH 054/148] tail: use &mut File instead of mut file: &File --- src/uu/tail/src/tail.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 0a3ff778d..9246f4f43 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -241,7 +241,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } let mut file = File::open(&path).unwrap(); if is_seekable(&mut file) { - bounded_tail(&file, &settings); + bounded_tail(&mut file, &settings); if settings.follow { let reader = BufReader::new(file); readers.push(reader); @@ -400,7 +400,7 @@ fn follow(readers: &mut [BufReader], filenames: &[String], settings: /// true. The `file` is left seek'd to the position just after the byte that /// `should_stop` returned true for. fn backwards_thru_file( - mut file: &File, + file: &mut File, size: u64, buf: &mut Vec, delimiter: u8, @@ -448,14 +448,14 @@ fn backwards_thru_file( /// end of the file, and then read the file "backwards" in blocks of size /// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up /// being a nice performance win for very large files. -fn bounded_tail(mut file: &File, settings: &Settings) { +fn bounded_tail(file: &mut File, settings: &Settings) { let size = file.seek(SeekFrom::End(0)).unwrap(); let mut buf = vec![0; BLOCK_SIZE as usize]; // Find the position in the file to start printing from. match settings.mode { FilterMode::Lines(mut count, delimiter) => { - backwards_thru_file(&file, size, &mut buf, delimiter, &mut |byte| { + backwards_thru_file(file, size, &mut buf, delimiter, &mut |byte| { if byte == delimiter { count -= 1; count == 0 From 2e621759b255391b379ce34fa42e86bc68ed4701 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Tue, 11 May 2021 21:10:30 -0400 Subject: [PATCH 055/148] tail: refactor code into ReverseChunks iterator Refactor code from the `backwards_thru_file()` function into a new `ReverseChunks` iterator, and use that iterator to simplify the implementation of the `backwards_thru_file()` function. The `ReverseChunks` iterator yields `Vec` objects, each of which references bytes of a given file. --- src/uu/tail/src/chunks.rs | 83 ++++++++++++++++++++++++++++++++++++ src/uu/tail/src/tail.rs | 89 ++++++++++++++++----------------------- 2 files changed, 120 insertions(+), 52 deletions(-) create mode 100644 src/uu/tail/src/chunks.rs diff --git a/src/uu/tail/src/chunks.rs b/src/uu/tail/src/chunks.rs new file mode 100644 index 000000000..57a26dabf --- /dev/null +++ b/src/uu/tail/src/chunks.rs @@ -0,0 +1,83 @@ +//! Iterating over a file by chunks, starting at the end of the file. +//! +//! Use [`ReverseChunks::new`] to create a new iterator over chunks of +//! bytes from the file. +use std::fs::File; +use std::io::{Read, Seek, SeekFrom}; + +/// When reading files in reverse in `bounded_tail`, this is the size of each +/// block read at a time. +pub const BLOCK_SIZE: u64 = 1 << 16; + +/// An iterator over a file in non-overlapping chunks from the end of the file. +/// +/// Each chunk is a [`Vec`]<[`u8`]> of size [`BLOCK_SIZE`] (except +/// possibly the last chunk, which might be smaller). Each call to +/// [`next`] will seek backwards through the given file. +pub struct ReverseChunks<'a> { + /// The file to iterate over, by blocks, from the end to the beginning. + file: &'a File, + + /// The total number of bytes in the file. + size: u64, + + /// The total number of blocks to read. + max_blocks_to_read: usize, + + /// The index of the next block to read. + block_idx: usize, +} + +impl<'a> ReverseChunks<'a> { + pub fn new(file: &'a mut File) -> ReverseChunks<'a> { + let size = file.seek(SeekFrom::End(0)).unwrap(); + let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize; + let block_idx = 0; + ReverseChunks { + file, + size, + max_blocks_to_read, + block_idx, + } + } +} + +impl<'a> Iterator for ReverseChunks<'a> { + type Item = Vec; + + fn next(&mut self) -> Option { + // If there are no more chunks to read, terminate the iterator. + if self.block_idx >= self.max_blocks_to_read { + return None; + } + + // The chunk size is `BLOCK_SIZE` for all but the last chunk + // (that is, the chunk closest to the beginning of the file), + // which contains the remainder of the bytes. + let block_size = if self.block_idx == self.max_blocks_to_read - 1 { + self.size % BLOCK_SIZE + } else { + BLOCK_SIZE + }; + + // Seek backwards by the next chunk, read the full chunk into + // `buf`, and then seek back to the start of the chunk again. + let mut buf = vec![0; BLOCK_SIZE as usize]; + let pos = self + .file + .seek(SeekFrom::Current(-(block_size as i64))) + .unwrap(); + self.file + .read_exact(&mut buf[0..(block_size as usize)]) + .unwrap(); + let pos2 = self + .file + .seek(SeekFrom::Current(-(block_size as i64))) + .unwrap(); + assert_eq!(pos, pos2); + + self.block_idx += 1; + + Some(buf[0..(block_size as usize)].to_vec()) + } +} diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 9246f4f43..6dafee184 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -15,8 +15,11 @@ extern crate clap; #[macro_use] extern crate uucore; +mod chunks; mod platform; mod ringbuffer; +use chunks::ReverseChunks; +use chunks::BLOCK_SIZE; use ringbuffer::RingBuffer; use clap::{App, Arg}; @@ -355,10 +358,6 @@ pub fn parse_size(mut size_slice: &str) -> Result { } } -/// When reading files in reverse in `bounded_tail`, this is the size of each -/// block read at a time. -const BLOCK_SIZE: u64 = 1 << 16; - fn follow(readers: &mut [BufReader], filenames: &[String], settings: &Settings) { assert!(settings.follow); let mut last = readers.len() - 1; @@ -396,48 +395,42 @@ fn follow(readers: &mut [BufReader], filenames: &[String], settings: } } -/// Iterate over bytes in the file, in reverse, until `should_stop` returns -/// true. The `file` is left seek'd to the position just after the byte that -/// `should_stop` returned true for. -fn backwards_thru_file( - file: &mut File, - size: u64, - buf: &mut Vec, - delimiter: u8, - should_stop: &mut F, -) where - F: FnMut(u8) -> bool, -{ - assert!(buf.len() >= BLOCK_SIZE as usize); +/// Iterate over bytes in the file, in reverse, until we find the +/// `num_delimiters` instance of `delimiter`. The `file` is left seek'd to the +/// position just after that delimiter. +fn backwards_thru_file(file: &mut File, num_delimiters: usize, delimiter: u8) { + // This variable counts the number of delimiters found in the file + // so far (reading from the end of the file toward the beginning). + let mut counter = 0; - let max_blocks_to_read = (size as f64 / BLOCK_SIZE as f64).ceil() as usize; + for (block_idx, slice) in ReverseChunks::new(file).enumerate() { + // Iterate over each byte in the slice in reverse order. + let mut iter = slice.iter().enumerate().rev(); - for block_idx in 0..max_blocks_to_read { - let block_size = if block_idx == max_blocks_to_read - 1 { - size % BLOCK_SIZE - } else { - BLOCK_SIZE - }; - - // Seek backwards by the next block, read the full block into - // `buf`, and then seek back to the start of the block again. - let pos = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap(); - file.read_exact(&mut buf[0..(block_size as usize)]).unwrap(); - let pos2 = file.seek(SeekFrom::Current(-(block_size as i64))).unwrap(); - assert_eq!(pos, pos2); - - // Iterate backwards through the bytes, calling `should_stop` on each - // one. - let slice = &buf[0..(block_size as usize)]; - for (i, ch) in slice.iter().enumerate().rev() { - // Ignore one trailing newline. - if block_idx == 0 && i as u64 == block_size - 1 && *ch == delimiter { - continue; + // Ignore a trailing newline in the last block, if there is one. + if block_idx == 0 { + if let Some(c) = slice.last() { + if *c == delimiter { + iter.next(); + } } + } - if should_stop(*ch) { - file.seek(SeekFrom::Current((i + 1) as i64)).unwrap(); - return; + // For each byte, increment the count of the number of + // delimiters found. If we have found more than the specified + // number of delimiters, terminate the search and seek to the + // appropriate location in the file. + for (i, ch) in iter { + if *ch == delimiter { + counter += 1; + if counter >= num_delimiters { + // After each iteration of the outer loop, the + // cursor in the file is at the *beginning* of the + // block, so seeking forward by `i + 1` bytes puts + // us right after the found delimiter. + file.seek(SeekFrom::Current((i + 1) as i64)).unwrap(); + return; + } } } } @@ -449,20 +442,12 @@ fn backwards_thru_file( /// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up /// being a nice performance win for very large files. fn bounded_tail(file: &mut File, settings: &Settings) { - let size = file.seek(SeekFrom::End(0)).unwrap(); let mut buf = vec![0; BLOCK_SIZE as usize]; // Find the position in the file to start printing from. match settings.mode { - FilterMode::Lines(mut count, delimiter) => { - backwards_thru_file(file, size, &mut buf, delimiter, &mut |byte| { - if byte == delimiter { - count -= 1; - count == 0 - } else { - false - } - }); + FilterMode::Lines(count, delimiter) => { + backwards_thru_file(file, count as usize, delimiter); } FilterMode::Bytes(count) => { file.seek(SeekFrom::End(-(count as i64))).unwrap(); From a4fc2b5106ad1a9226c6c396b5bb2ea191ac9814 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Thu, 13 May 2021 10:17:57 +0200 Subject: [PATCH 056/148] who: fix `--lookup` This closes #2181. `who --lookup` is failing with a runtime panic (double free). Since `crate::dns-lookup` already includes a safe wrapper for `getaddrinfo` I used this crate instead of further debugging the existing code in utmpx::canon_host(). * It was neccessary to remove the version constraint for libc in uucore. --- Cargo.lock | 29 +++++++++++-- src/uu/pinky/src/pinky.rs | 15 ++----- src/uu/who/src/who.rs | 16 ++----- src/uucore/Cargo.toml | 1 + src/uucore/src/lib/features/utmpx.rs | 65 +++++++++++++--------------- tests/by-util/test_pinky.rs | 17 ++++++++ tests/by-util/test_who.rs | 1 - 7 files changed, 79 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d942c04d4..77957de80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -576,6 +576,18 @@ dependencies = [ "generic-array", ] +[[package]] +name = "dns-lookup" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "093d88961fd18c4ecacb8c80cd0b356463ba941ba11e0e01f9cf5271380b79dc" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "socket2", + "winapi 0.3.9", +] + [[package]] name = "dunce" version = "1.0.1" @@ -1445,6 +1457,17 @@ dependencies = [ "maybe-uninit", ] +[[package]] +name = "socket2" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "122e570113d28d773067fab24266b66753f6ea915758651696b6e35e49f88d6e" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "winapi 0.3.9", +] + [[package]] name = "strsim" version = "0.8.0" @@ -1820,11 +1843,9 @@ name = "uu_df" version = "0.0.6" dependencies = [ "clap", - "libc", "number_prefix", "uucore", "uucore_procs", - "winapi 0.3.9", ] [[package]] @@ -2407,8 +2428,6 @@ name = "uu_stat" version = "0.0.6" dependencies = [ "clap", - "libc", - "time", "uucore", "uucore_procs", ] @@ -2672,6 +2691,7 @@ name = "uucore" version = "0.0.8" dependencies = [ "data-encoding", + "dns-lookup", "dunce", "getopts", "lazy_static", @@ -2682,6 +2702,7 @@ dependencies = [ "thiserror", "time", "wild", + "winapi 0.3.9", ] [[package]] diff --git a/src/uu/pinky/src/pinky.rs b/src/uu/pinky/src/pinky.rs index e116a2382..f0ab44e5f 100644 --- a/src/uu/pinky/src/pinky.rs +++ b/src/uu/pinky/src/pinky.rs @@ -286,17 +286,10 @@ impl Pinky { print!(" {}", time_string(&ut)); - if self.include_where && !ut.host().is_empty() { - let ut_host = ut.host(); - let mut res = ut_host.splitn(2, ':'); - let host = match res.next() { - Some(_) => ut.canon_host().unwrap_or_else(|_| ut_host.clone()), - None => ut_host.clone(), - }; - match res.next() { - Some(d) => print!(" {}:{}", host, d), - None => print!(" {}", host), - } + let mut s = ut.host(); + if self.include_where && !s.is_empty() { + s = safe_unwrap!(ut.canon_host()); + print!(" {}", s); } println!(); diff --git a/src/uu/who/src/who.rs b/src/uu/who/src/who.rs index ba1360eff..aef23b3a2 100644 --- a/src/uu/who/src/who.rs +++ b/src/uu/who/src/who.rs @@ -548,20 +548,10 @@ impl Who { " ?".into() }; - let mut buf = vec![]; - let ut_host = ut.host(); - let mut res = ut_host.splitn(2, ':'); - if let Some(h) = res.next() { - if self.do_lookup { - buf.push(ut.canon_host().unwrap_or_else(|_| h.to_owned())); - } else { - buf.push(h.to_owned()); - } + let mut s = ut.host(); + if self.do_lookup { + s = safe_unwrap!(ut.canon_host()); } - if let Some(h) = res.next() { - buf.push(h.to_owned()); - } - let s = buf.join(":"); let hoststr = if s.is_empty() { s } else { format!("({})", s) }; self.print_line( diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index da51f7ca4..85efe0434 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -16,6 +16,7 @@ edition = "2018" path="src/lib/lib.rs" [dependencies] +dns-lookup = "1.0.5" dunce = "1.0.0" getopts = "<= 0.2.21" wild = "2.0.4" diff --git a/src/uucore/src/lib/features/utmpx.rs b/src/uucore/src/lib/features/utmpx.rs index 0308d8a5e..96db33c35 100644 --- a/src/uucore/src/lib/features/utmpx.rs +++ b/src/uucore/src/lib/features/utmpx.rs @@ -188,47 +188,40 @@ impl Utmpx { /// Canonicalize host name using DNS pub fn canon_host(&self) -> IOResult { - const AI_CANONNAME: libc::c_int = 0x2; let host = self.host(); - let host = host.split(':').next().unwrap(); - let hints = libc::addrinfo { - ai_flags: AI_CANONNAME, - ai_family: 0, - ai_socktype: 0, - ai_protocol: 0, - ai_addrlen: 0, - ai_addr: ptr::null_mut(), - ai_canonname: ptr::null_mut(), - ai_next: ptr::null_mut(), - }; - let c_host = CString::new(host).unwrap(); - let mut res = ptr::null_mut(); - let status = unsafe { - libc::getaddrinfo( - c_host.as_ptr(), - ptr::null(), - &hints as *const _, - &mut res as *mut _, - ) - }; - if status == 0 { - let info: libc::addrinfo = unsafe { ptr::read(res as *const _) }; - // http://lists.gnu.org/archive/html/bug-coreutils/2006-09/msg00300.html - // says Darwin 7.9.0 getaddrinfo returns 0 but sets - // res->ai_canonname to NULL. - let ret = if info.ai_canonname.is_null() { - Ok(String::from(host)) - } else { - Ok(unsafe { CString::from_raw(info.ai_canonname).into_string().unwrap() }) + + // TODO: change to use `split_once` when MSRV hits 1.52.0 + // let (hostname, display) = host.split_once(':').unwrap_or((&host, "")); + let mut h = host.split(':'); + let hostname = h.next().unwrap_or(&host); + let display = h.next().unwrap_or(""); + + if !hostname.is_empty() { + extern crate dns_lookup; + use dns_lookup::{getaddrinfo, AddrInfoHints}; + + const AI_CANONNAME: i32 = 0x2; + let hints = AddrInfoHints { + flags: AI_CANONNAME, + ..AddrInfoHints::default() }; - unsafe { - libc::freeaddrinfo(res); + let sockets = getaddrinfo(Some(&hostname), None, Some(hints)) + .unwrap() + .collect::>>()?; + for socket in sockets { + if let Some(ai_canonname) = socket.canonname { + return Ok(if display.is_empty() { + ai_canonname + } else { + format!("{}:{}", ai_canonname, display) + }); + } } - ret - } else { - Err(IOError::last_os_error()) } + + Ok(host.to_string()) } + pub fn iter_all_records() -> UtmpxIter { UtmpxIter } diff --git a/tests/by-util/test_pinky.rs b/tests/by-util/test_pinky.rs index 1a7ef8b61..904a05f93 100644 --- a/tests/by-util/test_pinky.rs +++ b/tests/by-util/test_pinky.rs @@ -98,6 +98,23 @@ fn test_short_format_q() { assert_eq!(v_actual, v_expect); } +#[cfg(target_os = "linux")] +#[test] +fn test_no_flag() { + let scene = TestScenario::new(util_name!()); + + let actual = scene.ucmd().succeeds().stdout_move_str(); + let expect = scene + .cmd_keepenv(util_name!()) + .env("LANGUAGE", "C") + .succeeds() + .stdout_move_str(); + + let v_actual: Vec<&str> = actual.split_whitespace().collect(); + let v_expect: Vec<&str> = expect.split_whitespace().collect(); + assert_eq!(v_actual, v_expect); +} + #[cfg(target_os = "linux")] fn expected_result(args: &[&str]) -> String { TestScenario::new(util_name!()) diff --git a/tests/by-util/test_who.rs b/tests/by-util/test_who.rs index 8aeecfb55..a5637f23a 100644 --- a/tests/by-util/test_who.rs +++ b/tests/by-util/test_who.rs @@ -162,7 +162,6 @@ fn test_users() { #[cfg(target_os = "linux")] #[test] -#[ignore] fn test_lookup() { for opt in vec!["--lookup"] { new_ucmd!() From e8d911d9d5ccf1c92d53a02ed8cc8fc729950b36 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sat, 15 May 2021 10:32:03 -0400 Subject: [PATCH 057/148] wc: correct some error messages for invalid inputs Change the error messages that get printed to `stderr` for compatibility with GNU `wc` when an input is a directory and when an input does not exist. Fixes #2211. --- src/uu/wc/src/wc.rs | 27 +++++++++++++++++++++++++-- tests/by-util/test_wc.rs | 23 +++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 226608d40..5670508f4 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -22,7 +22,7 @@ use thiserror::Error; use std::cmp::max; use std::fs::File; -use std::io::{self, Write}; +use std::io::{self, ErrorKind, Write}; use std::path::Path; #[derive(Error, Debug)] @@ -254,6 +254,29 @@ fn word_count_from_input(input: &Input, settings: &Settings) -> WcResult { + show_error_custom_description!(path, "Is a directory"); + } + (Input::Path(path), WcError::Io(e)) if e.kind() == ErrorKind::NotFound => { + show_error_custom_description!(path, "No such file or directory"); + } + (_, e) => { + show_error!("{}", e); + } + }; +} + fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { let mut total_word_count = WordCount::default(); let mut results = vec![]; @@ -264,7 +287,7 @@ fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { for input in &inputs { let word_count = word_count_from_input(&input, settings).unwrap_or_else(|err| { - show_error!("{}", err); + show_error(&input, err); error_count += 1; WordCount::default() }); diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index b61d7e3aa..8036d0eaa 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -168,3 +168,26 @@ fn test_file_one_long_word() { .run() .stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n"); } + +/// Test that getting counts from a directory is an error. +#[test] +fn test_read_from_directory_error() { + // TODO To match GNU `wc`, the `stdout` should be: + // + // " 0 0 0 .\n" + // + new_ucmd!() + .args(&["."]) + .fails() + .stderr_contains(".: Is a directory\n") + .stdout_is("0 0 0 .\n"); +} + +/// Test that getting counts from nonexistent file is an error. +#[test] +fn test_read_from_nonexistent_file() { + new_ucmd!() + .args(&["bogusfile"]) + .fails() + .stderr_contains("bogusfile: No such file or directory\n"); +} From 97a49c7c95ec51839650b3b95ebff7d44ea401db Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Fri, 7 May 2021 15:07:17 -0400 Subject: [PATCH 058/148] wc: compute min width to format counts up front Fix two issues with the string formatting width for counts displayed by `wc`. First, the output was previously not using the default minimum width (seven characters) when reading from `stdin`. This commit corrects this behavior to match GNU `wc`. For example, $ cat alice_in_wonderland.txt | wc 5 57 302 Second, if at least 10^7 bytes were read from `stdin` *after* reading from a smaller regular file, then every output row would have width 8. This disagrees with GNU `wc`, in which only the `stdin` row and the total row would have width 8. This commit corrects this behavior to match GNU `wc`. For example, $ printf "%.0s0" {1..10000000} | wc emptyfile.txt - 0 0 0 emptyfile.txt 0 1 10000000 0 1 10000000 total Fixes #2186. --- src/uu/wc/src/wc.rs | 106 +++++++++++++++++++++++++++++++++++---- tests/by-util/test_wc.rs | 38 +++++++++++--- 2 files changed, 128 insertions(+), 16 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 5670508f4..b323f7261 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -20,11 +20,13 @@ use wordcount::{TitledWordCount, WordCount}; use clap::{App, Arg, ArgMatches}; use thiserror::Error; -use std::cmp::max; -use std::fs::File; +use std::fs::{self, File}; use std::io::{self, ErrorKind, Write}; use std::path::Path; +/// The minimum character width for formatting counts when reading from stdin. +const MINIMUM_WIDTH: usize = 7; + #[derive(Error, Debug)] pub enum WcError { #[error("{0}")] @@ -277,11 +279,101 @@ fn show_error(input: &Input, err: WcError) { }; } +/// Compute the number of digits needed to represent any count for this input. +/// +/// If `input` is [`Input::Stdin`], then this function returns +/// [`MINIMUM_WIDTH`]. Otherwise, if metadata could not be read from +/// `input` then this function returns 1. +/// +/// # Errors +/// +/// This function will return an error if `input` is a [`Input::Path`] +/// and there is a problem accessing the metadata of the given `input`. +/// +/// # Examples +/// +/// A [`Input::Stdin`] gets a default minimum width: +/// +/// ```rust,ignore +/// let input = Input::Stdin(StdinKind::Explicit); +/// assert_eq!(7, digit_width(input)); +/// ``` +fn digit_width(input: &Input) -> WcResult> { + match input { + Input::Stdin(_) => Ok(Some(MINIMUM_WIDTH)), + Input::Path(filename) => { + let path = Path::new(filename); + let metadata = fs::metadata(path)?; + if metadata.is_file() { + // TODO We are now computing the number of bytes in a file + // twice: once here and once in `WordCount::from_line()` (or + // in `count_bytes_fast()` if that function is called + // instead). See GitHub issue #2201. + let num_bytes = metadata.len(); + let num_digits = num_bytes.to_string().len(); + Ok(Some(num_digits)) + } else { + Ok(None) + } + } + } +} + +/// Compute the number of digits needed to represent all counts in all inputs. +/// +/// `inputs` may include zero or more [`Input::Stdin`] entries, each of +/// which represents reading from `stdin`. The presence of any such +/// entry causes this function to return a width that is at least +/// [`MINIMUM_WIDTH`]. +/// +/// If `input` is empty, then this function returns 1. If file metadata +/// could not be read from any of the [`Input::Path`] inputs and there +/// are no [`Input::Stdin`] inputs, then this function returns 1. +/// +/// If there is a problem accessing the metadata, this function will +/// silently ignore the error and assume that the number of digits +/// needed to display the counts for that file is 1. +/// +/// # Examples +/// +/// An empty slice implies a width of 1: +/// +/// ```rust,ignore +/// assert_eq!(1, max_width(&vec![])); +/// ``` +/// +/// The presence of [`Input::Stdin`] implies a minimum width: +/// +/// ```rust,ignore +/// let inputs = vec![Input::Stdin(StdinKind::Explicit)]; +/// assert_eq!(7, max_width(&inputs)); +/// ``` +fn max_width(inputs: &[Input]) -> usize { + let mut result = 1; + for input in inputs { + match digit_width(input) { + Ok(maybe_n) => { + if let Some(n) = maybe_n { + result = result.max(n); + } + } + Err(_) => continue, + } + } + result +} + fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { + // Compute the width, in digits, to use when formatting counts. + // + // The width is the number of digits needed to print the number of + // bytes in the largest file. This is true regardless of whether + // the `settings` indicate that the bytes will be displayed. + let mut error_count = 0; + let max_width = max_width(&inputs); + let mut total_word_count = WordCount::default(); let mut results = vec![]; - let mut max_width: usize = 0; - let mut error_count = 0; let num_inputs = inputs.len(); @@ -291,12 +383,6 @@ fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { error_count += 1; WordCount::default() }); - // Compute the number of digits needed to display the number - // of bytes in the file. Even if the settings indicate that we - // won't *display* the number of bytes, we still use the - // number of digits in the byte count as the width when - // formatting each count as a string for output. - max_width = max(max_width, word_count.bytes.to_string().len()); total_word_count += word_count; results.push(word_count.with_title(input.to_title())); } diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 8036d0eaa..1203c0b1d 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -33,7 +33,7 @@ fn test_stdin_default() { new_ucmd!() .pipe_in_fixture("lorem_ipsum.txt") .run() - .stdout_is(" 13 109 772\n"); + .stdout_is(" 13 109 772\n"); } #[test] @@ -42,7 +42,7 @@ fn test_stdin_explicit() { .pipe_in_fixture("lorem_ipsum.txt") .arg("-") .run() - .stdout_is(" 13 109 772 -\n"); + .stdout_is(" 13 109 772 -\n"); } #[test] @@ -51,9 +51,11 @@ fn test_utf8() { .args(&["-lwmcL"]) .pipe_in_fixture("UTF_8_test.txt") .run() - .stdout_is(" 300 4969 22781 22213 79\n"); - // GNU returns " 300 2086 22219 22781 79" - // TODO: we should fix that to match GNU's behavior + .stdout_is(" 300 4969 22781 22213 79\n"); + // GNU returns " 300 2086 22219 22781 79" + // + // TODO: we should fix the word, character, and byte count to + // match the behavior of GNU wc } #[test] @@ -80,7 +82,7 @@ fn test_stdin_all_counts() { .args(&["-c", "-m", "-l", "-L", "-w"]) .pipe_in_fixture("alice_in_wonderland.txt") .run() - .stdout_is(" 5 57 302 302 66\n"); + .stdout_is(" 5 57 302 302 66\n"); } #[test] @@ -169,6 +171,30 @@ fn test_file_one_long_word() { .stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n"); } +/// Test that the number of bytes in the file dictate the display width. +/// +/// The width in digits of any count is the width in digits of the +/// number of bytes in the file, regardless of whether the number of +/// bytes are displayed. +#[test] +fn test_file_bytes_dictate_width() { + // This file has 10,001 bytes. Five digits are required to + // represent that. Even though the number of lines is 1 and the + // number of words is 0, each of those counts is formatted with + // five characters, filled with whitespace. + new_ucmd!() + .args(&["-lw", "onelongemptyline.txt"]) + .run() + .stdout_is(" 1 0 onelongemptyline.txt\n"); + + // This file has zero bytes. Only one digit is required to + // represent that. + new_ucmd!() + .args(&["-lw", "emptyfile.txt"]) + .run() + .stdout_is("0 0 emptyfile.txt\n"); +} + /// Test that getting counts from a directory is an error. #[test] fn test_read_from_directory_error() { From 733d347fa86839a84d5cc153f0428dacd3547632 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Tue, 11 May 2021 23:18:32 -0400 Subject: [PATCH 059/148] head: simplify rbuf_n_bytes() in head.rs Simplify the code in `rbuf_n_bytes()` to use existing abstractions provided by the standard library. --- src/uu/head/src/head.rs | 40 +++++++++++----------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 807d04314..e050d26f6 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -1,7 +1,7 @@ use clap::{App, Arg}; use std::convert::TryFrom; use std::ffi::OsString; -use std::io::{ErrorKind, Read, Seek, SeekFrom, Write}; +use std::io::{self, ErrorKind, Read, Seek, SeekFrom, Write}; use uucore::{crash, executable, show_error}; const EXIT_FAILURE: i32 = 1; @@ -206,38 +206,20 @@ impl Default for HeadOptions { } } -fn rbuf_n_bytes(input: &mut impl std::io::BufRead, n: usize) -> std::io::Result<()> { - if n == 0 { - return Ok(()); - } - let mut readbuf = [0u8; BUF_SIZE]; - let mut i = 0usize; +fn rbuf_n_bytes(input: R, n: usize) -> std::io::Result<()> +where + R: Read, +{ + // Read the first `n` bytes from the `input` reader. + let mut reader = input.take(n as u64); + // Write those bytes to `stdout`. let stdout = std::io::stdout(); let mut stdout = stdout.lock(); - loop { - let read = loop { - match input.read(&mut readbuf) { - Ok(n) => break n, - Err(e) => match e.kind() { - ErrorKind::Interrupted => {} - _ => return Err(e), - }, - } - }; - if read == 0 { - // might be unexpected if - // we haven't read `n` bytes - // but this mirrors GNU's behavior - return Ok(()); - } - stdout.write_all(&readbuf[..read.min(n - i)])?; - i += read.min(n - i); - if i == n { - return Ok(()); - } - } + io::copy(&mut reader, &mut stdout)?; + + Ok(()) } fn rbuf_n_lines(input: &mut impl std::io::BufRead, n: usize, zero: bool) -> std::io::Result<()> { From 659bf58a4c80201db406e4d7c2b3e8cef56931a3 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sun, 16 May 2021 11:30:10 -0400 Subject: [PATCH 060/148] head: print headings when reading multiple files Fix a bug in which `head` failed to print headings for `stdin` inputs when reading from multiple files, and fix another bug in which `head` failed to print a blank line between the contents of a file and the heading for the next file when reading multiple files. The output now matches that of GNU `head`. --- src/uu/head/src/head.rs | 5 ++++- tests/by-util/test_head.rs | 25 +++++++++++++++++++++++++ tests/fixtures/head/emptyfile.txt | 0 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/head/emptyfile.txt diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index e050d26f6..faaeedd3f 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -405,7 +405,7 @@ fn uu_head(options: &HeadOptions) { for fname in &options.files { let res = match fname.as_str() { "-" => { - if options.verbose { + if (options.files.len() > 1 && !options.quiet) || options.verbose { if !first { println!(); } @@ -459,6 +459,9 @@ fn uu_head(options: &HeadOptions) { }, }; if (options.files.len() > 1 && !options.quiet) || options.verbose { + if !first { + println!(); + } println!("==> {} <==", name) } head_file(&mut file, options) diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 4f009c800..2aedbdcbe 100755 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -196,3 +196,28 @@ fn test_obsolete_extras() { .succeeds() .stdout_is("==> standard input <==\n1\02\03\04\05\0"); } + +#[test] +fn test_multiple_files() { + new_ucmd!() + .args(&["emptyfile.txt", "emptyfile.txt"]) + .succeeds() + .stdout_is("==> emptyfile.txt <==\n\n==> emptyfile.txt <==\n"); +} + +#[test] +fn test_multiple_files_with_stdin() { + new_ucmd!() + .args(&["emptyfile.txt", "-", "emptyfile.txt"]) + .pipe_in("hello\n") + .succeeds() + .stdout_is( + "==> emptyfile.txt <== + +==> standard input <== +hello + +==> emptyfile.txt <== +", + ); +} diff --git a/tests/fixtures/head/emptyfile.txt b/tests/fixtures/head/emptyfile.txt new file mode 100644 index 000000000..e69de29bb From fcd48813e01b279604f286e87f7dcbacafe9759b Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 16 May 2021 21:13:37 +0200 Subject: [PATCH 061/148] sort: read files as chunks, off-thread Instead of using a BufReader and reading each line separately, allocating a String for each one, we read to a chunk. Lines are references to this chunk. This makes the allocator's job much easier and yields performance improvements. Chunks are read on a separate thread to further improve performance. --- Cargo.lock | 141 +++++++- src/uu/sort/BENCHMARKING.md | 15 +- src/uu/sort/Cargo.toml | 10 +- src/uu/sort/src/check.rs | 102 ++++++ src/uu/sort/src/chunks.rs | 202 +++++++++++ src/uu/sort/src/ext_sort.rs | 160 +++++++++ src/uu/sort/src/external_sort/LICENSE | 19 - src/uu/sort/src/external_sort/mod.rs | 93 ----- src/uu/sort/src/merge.rs | 223 ++++++++++++ src/uu/sort/src/sort.rs | 486 +++++++++----------------- tests/by-util/test_sort.rs | 4 +- 11 files changed, 1003 insertions(+), 452 deletions(-) create mode 100644 src/uu/sort/src/check.rs create mode 100644 src/uu/sort/src/chunks.rs create mode 100644 src/uu/sort/src/ext_sort.rs delete mode 100644 src/uu/sort/src/external_sort/LICENSE delete mode 100644 src/uu/sort/src/external_sort/mod.rs create mode 100644 src/uu/sort/src/merge.rs diff --git a/Cargo.lock b/Cargo.lock index 77957de80..feda68de5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,11 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "Inflector" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" + [[package]] name = "advapi32-sys" version = "0.2.0" @@ -63,6 +69,15 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "binary-heap-plus" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f068638f8ff9e118a9361e66a411eff410e7fb3ecaa23bf9272324f8fc606d7" +dependencies = [ + "compare", +] + [[package]] name = "bit-set" version = "0.5.2" @@ -136,9 +151,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cast" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc38c385bfd7e444464011bb24820f40dd1c76bcdfa1b78611cb7c2e5cafab75" +checksum = "57cdfa5d50aad6cb4d44dcab6101a7f79925bd59d82ca42f38a9856a28865374" dependencies = [ "rustc_version", ] @@ -198,6 +213,12 @@ dependencies = [ "bitflags", ] +[[package]] +name = "compare" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120133d4db2ec47efe2e26502ee984747630c67f51974fca0b6c1340cf2368d3" + [[package]] name = "constant_time_eq" version = "0.1.5" @@ -999,6 +1020,29 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +[[package]] +name = "ouroboros" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc1f52300b81ac4eeeb6c00c20f7e86556c427d9fb2d92b68fc73c22f331cd15" +dependencies = [ + "ouroboros_macro", + "stable_deref_trait", +] + +[[package]] +name = "ouroboros_macro" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41db02c8f8731cdd7a72b433c7900cce4bf245465b452c364bfd21f4566ab055" +dependencies = [ + "Inflector", + "proc-macro-error", + "proc-macro2", + "quote 1.0.9", + "syn", +] + [[package]] name = "output_vt100" version = "0.1.2" @@ -1027,6 +1071,15 @@ dependencies = [ "proc-macro-hack", ] +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + [[package]] name = "pkg-config" version = "0.3.19" @@ -1089,6 +1142,30 @@ dependencies = [ "output_vt100", ] +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote 1.0.9", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote 1.0.9", + "version_check", +] + [[package]] name = "proc-macro-hack" version = "0.5.19" @@ -1336,11 +1413,11 @@ checksum = "3e52c148ef37f8c375d49d5a73aa70713125b7f19095948a923f80afdeb22ec2" [[package]] name = "rustc_version" -version = "0.2.3" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" dependencies = [ - "semver", + "semver 0.11.0", ] [[package]] @@ -1370,7 +1447,16 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" dependencies = [ - "semver-parser", + "semver-parser 0.7.0", +] + +[[package]] +name = "semver" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" +dependencies = [ + "semver-parser 0.10.2", ] [[package]] @@ -1380,10 +1466,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] -name = "serde" -version = "1.0.125" +name = "semver-parser" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171" +checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" +dependencies = [ + "pest", +] + +[[package]] +name = "serde" +version = "1.0.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03" [[package]] name = "serde_cbor" @@ -1397,9 +1492,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.125" +version = "1.0.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b093b7a2bb58203b5da3056c05b4ec1fed827dcfdb37347a8841695263b3d06d" +checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43" dependencies = [ "proc-macro2", "quote 1.0.9", @@ -1468,6 +1563,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.8.0" @@ -1627,6 +1728,12 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06" +[[package]] +name = "ucd-trie" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" + [[package]] name = "unicode-segmentation" version = "1.7.1" @@ -2402,12 +2509,16 @@ dependencies = [ name = "uu_sort" version = "0.0.6" dependencies = [ + "binary-heap-plus", "clap", + "compare", "fnv", "itertools 0.10.0", + "memchr 2.4.0", + "ouroboros", "rand 0.7.3", "rayon", - "semver", + "semver 0.9.0", "tempdir", "unicode-width", "uucore", @@ -2720,6 +2831,12 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + [[package]] name = "void" version = "1.0.2" diff --git a/src/uu/sort/BENCHMARKING.md b/src/uu/sort/BENCHMARKING.md index 71c331105..52866719d 100644 --- a/src/uu/sort/BENCHMARKING.md +++ b/src/uu/sort/BENCHMARKING.md @@ -75,7 +75,20 @@ Try running commands with the `-S` option set to an amount of memory to be used, huge files (ideally multiple Gigabytes) with `-S`. Creating such a large file can be achieved by running `cat shuffled_wordlist.txt | sort -R >> shuffled_wordlist.txt` multiple times (this will add the contents of `shuffled_wordlist.txt` to itself). Example: Run `hyperfine './target/release/coreutils sort shuffled_wordlist.txt -S 1M' 'sort shuffled_wordlist.txt -S 1M'` -` + +## Merging + +"Merge" sort merges already sorted files. It is a sub-step of external sorting, so benchmarking it separately may be helpful. + +- Splitting `shuffled_wordlist.txt` can be achieved by running `split shuffled_wordlist.txt shuffled_wordlist_slice_ --additional-suffix=.txt` +- Sort each part by running `for f in shuffled_wordlist_slice_*; do sort $f -o $f; done` +- Benchmark merging by running `hyperfine "target/release/coreutils sort -m shuffled_wordlist_slice_*"` + +## Check + +When invoked with -c, we simply check if the input is already ordered. The input for benchmarking should be an already sorted file. + +- Benchmark checking by running `hyperfine "target/release/coreutils sort -c sorted_wordlist.txt"` ## Stdout and stdin performance diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 5221f1f4e..724744dc4 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -15,16 +15,20 @@ edition = "2018" path = "src/sort.rs" [dependencies] -rayon = "1.5" -rand = "0.7" +binary-heap-plus = "0.4.1" clap = "2.33" +compare = "0.1.0" fnv = "1.0.7" itertools = "0.10.0" +memchr = "2.4.0" +ouroboros = "0.9.3" +rand = "0.7" +rayon = "1.5" semver = "0.9.0" +tempdir = "0.3.7" unicode-width = "0.1.8" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } -tempdir = "0.3.7" [[bin]] name = "sort" diff --git a/src/uu/sort/src/check.rs b/src/uu/sort/src/check.rs new file mode 100644 index 000000000..fe815b624 --- /dev/null +++ b/src/uu/sort/src/check.rs @@ -0,0 +1,102 @@ +// * This file is part of the uutils coreutils package. +// * +// * (c) Michael Debertol +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +//! Check if a file is ordered + +use crate::{ + chunks::{self, Chunk}, + compare_by, open, GlobalSettings, +}; +use itertools::Itertools; +use std::{ + cmp::Ordering, + io::Read, + iter, + sync::mpsc::{sync_channel, Receiver, SyncSender}, + thread, +}; + +/// Check if the file at `path` is ordered. +/// +/// # Returns +/// +/// The code we should exit with. +pub fn check(path: &str, settings: &GlobalSettings) -> i32 { + let file = open(path).expect("failed to open input file"); + let (recycled_sender, recycled_receiver) = sync_channel(2); + let (loaded_sender, loaded_receiver) = sync_channel(2); + thread::spawn({ + let settings = settings.clone(); + move || reader(file, recycled_receiver, loaded_sender, &settings) + }); + for _ in 0..2 { + recycled_sender + .send(Chunk::new(vec![0; 100 * 1024], |_| Vec::new())) + .unwrap(); + } + + let mut prev_chunk: Option = None; + let mut line_idx = 0; + for chunk in loaded_receiver.iter() { + line_idx += 1; + if let Some(prev_chunk) = prev_chunk.take() { + // Check if the first element of the new chunk is greater than the last + // element from the previous chunk + let prev_last = prev_chunk.borrow_lines().last().unwrap(); + let new_first = chunk.borrow_lines().first().unwrap(); + + if compare_by(prev_last, new_first, &settings) == Ordering::Greater { + if !settings.check_silent { + println!("sort: {}:{}: disorder: {}", path, line_idx, new_first.line); + } + return 1; + } + recycled_sender.send(prev_chunk).ok(); + } + + for (a, b) in chunk.borrow_lines().iter().tuple_windows() { + line_idx += 1; + if compare_by(a, b, &settings) == Ordering::Greater { + if !settings.check_silent { + println!("sort: {}:{}: disorder: {}", path, line_idx, b.line); + } + return 1; + } + } + + prev_chunk = Some(chunk); + } + 0 +} + +/// The function running on the reader thread. +fn reader( + mut file: Box, + receiver: Receiver, + sender: SyncSender, + settings: &GlobalSettings, +) { + let mut sender = Some(sender); + let mut carry_over = vec![]; + for chunk in receiver.iter() { + let (recycled_lines, recycled_buffer) = chunk.recycle(); + chunks::read( + &mut sender, + recycled_buffer, + &mut carry_over, + &mut file, + &mut iter::empty(), + if settings.zero_terminated { + b'\0' + } else { + b'\n' + }, + recycled_lines, + settings, + ) + } +} diff --git a/src/uu/sort/src/chunks.rs b/src/uu/sort/src/chunks.rs new file mode 100644 index 000000000..c679980ec --- /dev/null +++ b/src/uu/sort/src/chunks.rs @@ -0,0 +1,202 @@ +// * This file is part of the uutils coreutils package. +// * +// * (c) Michael Debertol +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +//! Utilities for reading files as chunks. + +use std::{ + io::{ErrorKind, Read}, + sync::mpsc::SyncSender, +}; + +use memchr::memchr_iter; +use ouroboros::self_referencing; + +use crate::{GlobalSettings, Line}; + +/// The chunk that is passed around between threads. +/// `lines` consist of slices into `buffer`. +#[self_referencing(pub_extras)] +#[derive(Debug)] +pub struct Chunk { + pub buffer: Vec, + #[borrows(buffer)] + #[covariant] + pub lines: Vec>, +} + +impl Chunk { + /// Destroy this chunk and return its components to be reused. + /// + /// # Returns + /// + /// * The `lines` vector, emptied + /// * The `buffer` vector, **not** emptied + pub fn recycle(mut self) -> (Vec>, Vec) { + let recycled_lines = self.with_lines_mut(|lines| { + lines.clear(); + unsafe { + // SAFETY: It is safe to (temporarily) transmute to a vector of lines with a longer lifetime, + // because the vector is empty. + // Transmuting is necessary to make recycling possible. See https://github.com/rust-lang/rfcs/pull/2802 + // for a rfc to make this unnecessary. Its example is similar to the code here. + std::mem::transmute::>, Vec>>(std::mem::take(lines)) + } + }); + (recycled_lines, self.into_heads().buffer) + } +} + +/// Read a chunk, parse lines and send them. +/// +/// No empty chunk will be sent. +/// +/// # Arguments +/// +/// * `sender_option`: The sender to send the lines to the sorter. If `None`, does nothing. +/// * `buffer`: The recycled buffer. All contents will be overwritten, but it must already be filled. +/// (i.e. `buffer.len()` should be equal to `buffer.capacity()`) +/// * `carry_over`: The bytes that must be carried over in between invocations. +/// * `file`: The current file. +/// * `next_files`: What `file` should be updated to next. +/// * `separator`: The line separator. +/// * `lines`: The recycled vector to fill with lines. Must be empty. +/// * `settings`: The global settings. +#[allow(clippy::too_many_arguments)] +pub fn read( + sender_option: &mut Option>, + mut buffer: Vec, + carry_over: &mut Vec, + file: &mut Box, + next_files: &mut impl Iterator>, + separator: u8, + lines: Vec>, + settings: &GlobalSettings, +) { + assert!(lines.is_empty()); + if let Some(sender) = sender_option { + if buffer.len() < carry_over.len() { + buffer.resize(carry_over.len() + 10 * 1024, 0); + } + buffer[..carry_over.len()].copy_from_slice(&carry_over); + let (read, should_continue) = + read_to_buffer(file, next_files, &mut buffer, carry_over.len(), separator); + carry_over.clear(); + carry_over.extend_from_slice(&buffer[read..]); + + let payload = Chunk::new(buffer, |buf| { + let mut lines = unsafe { + // SAFETY: It is safe to transmute to a vector of lines with shorter lifetime, + // because it was only temporarily transmuted to a Vec> to make recycling possible. + std::mem::transmute::>, Vec>>(lines) + }; + let read = crash_if_err!(1, std::str::from_utf8(&buf[..read])); + parse_lines(read, &mut lines, separator, &settings); + lines + }); + if !payload.borrow_lines().is_empty() { + sender.send(payload).unwrap(); + } + if !should_continue { + *sender_option = None; + } + } +} + +/// Split `read` into `Line`s, and add them to `lines`. +fn parse_lines<'a>( + mut read: &'a str, + lines: &mut Vec>, + separator: u8, + settings: &GlobalSettings, +) { + // Strip a trailing separator. TODO: Once our MinRustV is 1.45 or above, use strip_suffix() instead. + if read.ends_with(separator as char) { + read = &read[..read.len() - 1]; + } + + lines.extend( + read.split(separator as char) + .map(|line| Line::create(line, settings)), + ); +} + +/// Read from `file` into `buffer`. +/// +/// This function makes sure that at least two lines are read (unless we reach EOF and there's no next file), +/// growing the buffer if necessary. +/// The last line is likely to not have been fully read into the buffer. Its bytes must be copied to +/// the front of the buffer for the next invocation so that it can be continued to be read +/// (see the return values and `start_offset`). +/// +/// # Arguments +/// +/// * `file`: The file to start reading from. +/// * `next_files`: When `file` reaches EOF, it is updated to `next_files.next()` if that is `Some`, +/// and this function continues reading. +/// * `buffer`: The buffer that is filled with bytes. Its contents will mostly be overwritten (see `start_offset` +/// as well). It will not be grown by default, unless that is necessary to read at least two lines. +/// * `start_offset`: The amount of bytes at the start of `buffer` that were carried over +/// from the previous read and should not be overwritten. +/// * `separator`: The byte that separates lines. +/// +/// # Returns +/// +/// * The amount of bytes in `buffer` that can now be interpreted as lines. +/// The remaining bytes must be copied to the start of the buffer for the next invocation, +/// if another invocation is necessary, which is determined by the other return value. +/// * Whether this function should be called again. +fn read_to_buffer( + file: &mut Box, + next_files: &mut impl Iterator>, + buffer: &mut Vec, + start_offset: usize, + separator: u8, +) -> (usize, bool) { + let mut read_target = &mut buffer[start_offset..]; + loop { + match file.read(read_target) { + Ok(0) => { + if read_target.is_empty() { + // chunk is full + let mut sep_iter = memchr_iter(separator, &buffer).rev(); + let last_line_end = sep_iter.next(); + if sep_iter.next().is_some() { + // We read enough lines. + let end = last_line_end.unwrap(); + // We want to include the separator here, because it shouldn't be carried over. + return (end + 1, true); + } else { + // We need to read more lines + let len = buffer.len(); + // resize the vector to 10 KB more + buffer.resize(len + 1024 * 10, 0); + read_target = &mut buffer[len..]; + } + } else { + // This file is empty. + if let Some(next_file) = next_files.next() { + // There is another file. + *file = next_file; + } else { + // This was the last file. + let leftover_len = read_target.len(); + return (buffer.len() - leftover_len, false); + } + } + } + Ok(n) => { + read_target = &mut read_target[n..]; + } + Err(e) if e.kind() == ErrorKind::Interrupted => { + // retry + } + Err(e) => { + crash!(1, "{}", e) + } + } + } +} diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs new file mode 100644 index 000000000..629ebb714 --- /dev/null +++ b/src/uu/sort/src/ext_sort.rs @@ -0,0 +1,160 @@ +// * This file is part of the uutils coreutils package. +// * +// * (c) Michael Debertol +// * +// * For the full copyright and license information, please view the LICENSE +// * file that was distributed with this source code. + +//! Sort big files by using files for storing intermediate chunks. +//! +//! Files are read into chunks of memory which are then sorted individually and +//! written to temporary files. There are two threads: One sorter, and one reader/writer. +//! The buffers for the individual chunks are recycled. There are two buffers. + +use std::io::{BufWriter, Write}; +use std::path::Path; +use std::{ + fs::OpenOptions, + io::Read, + sync::mpsc::{Receiver, SyncSender}, + thread, +}; + +use tempdir::TempDir; + +use crate::{ + chunks::{self, Chunk}, + merge::{self, FileMerger}, + sort_by, GlobalSettings, +}; + +/// Iterator that wraps the +pub struct ExtSortedMerger<'a> { + pub file_merger: FileMerger<'a>, + // Keep _tmp_dir around, as it is deleted when dropped. + _tmp_dir: TempDir, +} + +/// Sort big files by using files for storing intermediate chunks. +/// +/// # Returns +/// +/// An iterator that merges intermediate files back together. +pub fn ext_sort<'a>( + files: &mut impl Iterator>, + settings: &'a GlobalSettings, +) -> ExtSortedMerger<'a> { + let tmp_dir = crash_if_err!(1, TempDir::new_in(&settings.tmp_dir, "uutils_sort")); + let (sorted_sender, sorted_receiver) = std::sync::mpsc::sync_channel(1); + let (recycled_sender, recycled_receiver) = std::sync::mpsc::sync_channel(1); + thread::spawn({ + let settings = settings.clone(); + move || sorter(recycled_receiver, sorted_sender, settings) + }); + let chunks_read = reader_writer( + files, + &tmp_dir, + if settings.zero_terminated { + b'\0' + } else { + b'\n' + }, + // Heuristically chosen: Dividing by 10 seems to keep our memory usage roughly + // around settings.buffer_size as a whole. + settings.buffer_size / 10, + settings.clone(), + sorted_receiver, + recycled_sender, + ); + let files = (0..chunks_read) + .map(|chunk_num| tmp_dir.path().join(chunk_num.to_string())) + .collect::>(); + + ExtSortedMerger { + file_merger: merge::merge(&files, settings), + _tmp_dir: tmp_dir, + } +} + +/// The function that is executed on the sorter thread. +fn sorter(receiver: Receiver, sender: SyncSender, settings: GlobalSettings) { + while let Ok(mut payload) = receiver.recv() { + payload.with_lines_mut(|lines| sort_by(lines, &settings)); + sender.send(payload).unwrap(); + } +} + +/// The function that is executed on the reader/writer thread. +/// +/// # Returns +/// * The number of chunks read. +fn reader_writer( + mut files: impl Iterator>, + tmp_dir: &TempDir, + separator: u8, + buffer_size: usize, + settings: GlobalSettings, + receiver: Receiver, + sender: SyncSender, +) -> usize { + let mut sender_option = Some(sender); + + let mut file = files.next().unwrap(); + + let mut carry_over = vec![]; + // kick things off with two reads + for _ in 0..2 { + chunks::read( + &mut sender_option, + vec![0; buffer_size], + &mut carry_over, + &mut file, + &mut files, + separator, + Vec::new(), + &settings, + ) + } + + let mut file_number = 0; + loop { + let mut chunk = match receiver.recv() { + Ok(it) => it, + _ => return file_number, + }; + + write( + &mut chunk, + &tmp_dir.path().join(file_number.to_string()), + separator, + ); + + let (recycled_lines, recycled_buffer) = chunk.recycle(); + + file_number += 1; + + chunks::read( + &mut sender_option, + recycled_buffer, + &mut carry_over, + &mut file, + &mut files, + separator, + recycled_lines, + &settings, + ); + } +} + +/// Write the lines in `chunk` to `file`, separated by `separator`. +fn write(chunk: &mut Chunk, file: &Path, separator: u8) { + chunk.with_lines_mut(|lines| { + // Write the lines to the file + let file = crash_if_err!(1, OpenOptions::new().create(true).write(true).open(file)); + let mut writer = BufWriter::new(file); + for s in lines.iter() { + crash_if_err!(1, writer.write_all(s.line.as_bytes())); + crash_if_err!(1, writer.write_all(&[separator])); + } + }); +} diff --git a/src/uu/sort/src/external_sort/LICENSE b/src/uu/sort/src/external_sort/LICENSE deleted file mode 100644 index e26c89c9f..000000000 --- a/src/uu/sort/src/external_sort/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright 2018 Battelle Memorial Institute - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/src/uu/sort/src/external_sort/mod.rs b/src/uu/sort/src/external_sort/mod.rs deleted file mode 100644 index af6902367..000000000 --- a/src/uu/sort/src/external_sort/mod.rs +++ /dev/null @@ -1,93 +0,0 @@ -use std::fs::OpenOptions; -use std::io::{BufWriter, Write}; -use std::path::Path; - -use tempdir::TempDir; - -use crate::{file_to_lines_iter, FileMerger}; - -use super::{GlobalSettings, Line}; - -/// Iterator that provides sorted `T`s -pub struct ExtSortedIterator<'a> { - file_merger: FileMerger<'a>, - // Keep tmp_dir around, it is deleted when dropped. - _tmp_dir: TempDir, -} - -impl<'a> Iterator for ExtSortedIterator<'a> { - type Item = Line; - fn next(&mut self) -> Option { - self.file_merger.next() - } -} - -/// Sort (based on `compare`) the `T`s provided by `unsorted` and return an -/// iterator -/// -/// # Panics -/// -/// This method can panic due to issues writing intermediate sorted chunks -/// to disk. -pub fn ext_sort( - unsorted: impl Iterator, - settings: &GlobalSettings, -) -> ExtSortedIterator { - let tmp_dir = crash_if_err!(1, TempDir::new_in(&settings.tmp_dir, "uutils_sort")); - - let mut total_read = 0; - let mut chunk = Vec::new(); - - let mut chunks_read = 0; - let mut file_merger = FileMerger::new(settings); - - // make the initial chunks on disk - for seq in unsorted { - let seq_size = seq.estimate_size(); - total_read += seq_size; - - chunk.push(seq); - - if total_read >= settings.buffer_size && chunk.len() >= 2 { - super::sort_by(&mut chunk, &settings); - - let file_path = tmp_dir.path().join(chunks_read.to_string()); - write_chunk(settings, &file_path, &mut chunk); - chunk.clear(); - total_read = 0; - chunks_read += 1; - - file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap())) - } - } - // write the last chunk - if !chunk.is_empty() { - super::sort_by(&mut chunk, &settings); - - let file_path = tmp_dir.path().join(chunks_read.to_string()); - write_chunk( - settings, - &tmp_dir.path().join(chunks_read.to_string()), - &mut chunk, - ); - - file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap())); - } - ExtSortedIterator { - file_merger, - _tmp_dir: tmp_dir, - } -} - -fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec) { - let new_file = crash_if_err!(1, OpenOptions::new().create(true).append(true).open(file)); - let mut buf_write = BufWriter::new(new_file); - for s in chunk { - crash_if_err!(1, buf_write.write_all(s.line.as_bytes())); - crash_if_err!( - 1, - buf_write.write_all(if settings.zero_terminated { "\0" } else { "\n" }.as_bytes(),) - ); - } - crash_if_err!(1, buf_write.flush()); -} diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs new file mode 100644 index 000000000..6f7cdfed7 --- /dev/null +++ b/src/uu/sort/src/merge.rs @@ -0,0 +1,223 @@ +//! Merge already sorted files. +//! +//! We achieve performance by splitting the tasks of sorting and writing, and reading and parsing between two threads. +//! The threads communicate over channels. There's one channel per file in the direction reader -> sorter, but only +//! one channel from the sorter back to the reader. The channels to the sorter are used to send the read chunks. +//! The sorter reads the next chunk from the channel whenever it needs the next chunk after running out of lines +//! from the previous read of the file. The channel back from the sorter to the reader has two purposes: To allow the reader +//! to reuse memory allocations and to tell the reader which file to read from next. + +use std::{ + cmp::Ordering, + ffi::OsStr, + io::{Read, Write}, + iter, + rc::Rc, + sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender}, + thread, +}; + +use compare::Compare; + +use crate::{ + chunks::{self, Chunk}, + compare_by, open, GlobalSettings, +}; + +// Merge already sorted files. +pub fn merge<'a>(files: &[impl AsRef], settings: &'a GlobalSettings) -> FileMerger<'a> { + let (request_sender, request_receiver) = channel(); + let mut reader_files = Vec::with_capacity(files.len()); + let mut loaded_receivers = Vec::with_capacity(files.len()); + for (file_number, file) in files.iter().filter_map(open).enumerate() { + let (sender, receiver) = sync_channel(2); + loaded_receivers.push(receiver); + reader_files.push(ReaderFile { + file, + sender: Some(sender), + carry_over: vec![], + }); + request_sender + .send((file_number, Chunk::new(vec![0; 8 * 1024], |_| Vec::new()))) + .unwrap(); + } + + for file_number in 0..reader_files.len() { + request_sender + .send((file_number, Chunk::new(vec![0; 8 * 1024], |_| Vec::new()))) + .unwrap(); + } + + thread::spawn({ + let settings = settings.clone(); + move || { + reader( + request_receiver, + &mut reader_files, + &settings, + if settings.zero_terminated { + b'\0' + } else { + b'\n' + }, + ) + } + }); + + let mut mergeable_files = vec![]; + + for (file_number, receiver) in loaded_receivers.into_iter().enumerate() { + mergeable_files.push(MergeableFile { + current_chunk: Rc::new(receiver.recv().unwrap()), + file_number, + line_idx: 0, + receiver, + }) + } + + FileMerger { + heap: binary_heap_plus::BinaryHeap::from_vec_cmp( + mergeable_files, + FileComparator { settings }, + ), + request_sender, + prev: None, + } +} +/// The struct on the reader thread representing an input file +struct ReaderFile { + file: Box, + sender: Option>, + carry_over: Vec, +} + +/// The function running on the reader thread. +fn reader( + recycled_receiver: Receiver<(usize, Chunk)>, + files: &mut [ReaderFile], + settings: &GlobalSettings, + separator: u8, +) { + for (file_idx, chunk) in recycled_receiver.iter() { + let (recycled_lines, recycled_buffer) = chunk.recycle(); + let ReaderFile { + file, + sender, + carry_over, + } = &mut files[file_idx]; + chunks::read( + sender, + recycled_buffer, + carry_over, + file, + &mut iter::empty(), + separator, + recycled_lines, + settings, + ); + } +} +/// The struct on the main thread representing an input file +pub struct MergeableFile { + current_chunk: Rc, + line_idx: usize, + receiver: Receiver, + file_number: usize, +} + +/// A struct to keep track of the previous line we encountered. +/// +/// This is required for deduplication purposes. +struct PreviousLine { + chunk: Rc, + line_idx: usize, + file_number: usize, +} + +/// Merges files together. This is **not** an iterator because of lifetime problems. +pub struct FileMerger<'a> { + heap: binary_heap_plus::BinaryHeap>, + request_sender: Sender<(usize, Chunk)>, + prev: Option, +} + +impl<'a> FileMerger<'a> { + /// Write the merged contents to the output file. + pub fn write_all(&mut self, settings: &GlobalSettings) { + let mut out = settings.out_writer(); + while self.write_next(settings, &mut out) {} + } + + fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool { + if let Some(file) = self.heap.peek() { + let prev = self.prev.replace(PreviousLine { + chunk: file.current_chunk.clone(), + line_idx: file.line_idx, + file_number: file.file_number, + }); + + file.current_chunk.with_lines(|lines| { + let current_line = &lines[file.line_idx]; + if settings.unique { + if let Some(prev) = &prev { + let cmp = compare_by( + &prev.chunk.borrow_lines()[prev.line_idx], + current_line, + settings, + ); + if cmp == Ordering::Equal { + return; + } + } + } + current_line.print(out, settings); + }); + + let was_last_line_for_file = + file.current_chunk.borrow_lines().len() == file.line_idx + 1; + + if was_last_line_for_file { + if let Ok(next_chunk) = file.receiver.recv() { + let mut file = self.heap.peek_mut().unwrap(); + file.current_chunk = Rc::new(next_chunk); + file.line_idx = 0; + } else { + self.heap.pop(); + } + } else { + self.heap.peek_mut().unwrap().line_idx += 1; + } + + if let Some(prev) = prev { + if let Ok(prev_chunk) = Rc::try_unwrap(prev.chunk) { + self.request_sender + .send((prev.file_number, prev_chunk)) + .ok(); + } + } + } + !self.heap.is_empty() + } +} + +/// Compares files by their current line. +struct FileComparator<'a> { + settings: &'a GlobalSettings, +} + +impl<'a> Compare for FileComparator<'a> { + fn compare(&self, a: &MergeableFile, b: &MergeableFile) -> Ordering { + let mut cmp = compare_by( + &a.current_chunk.borrow_lines()[a.line_idx], + &b.current_chunk.borrow_lines()[b.line_idx], + self.settings, + ); + if cmp == Ordering::Equal { + // To make sorting stable, we need to consider the file number as well, + // as lines from a file with a lower number are to be considered "earlier". + cmp = a.file_number.cmp(&b.file_number); + } + // Our BinaryHeap is a max heap. We use it as a min heap, so we need to reverse the ordering. + cmp.reverse() + } +} diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 2697d7df4..b6ab5a2b1 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -15,13 +15,16 @@ #[macro_use] extern crate uucore; +mod check; +mod chunks; mod custom_str_cmp; -mod external_sort; +mod ext_sort; +mod merge; mod numeric_str_cmp; use clap::{App, Arg}; use custom_str_cmp::custom_str_cmp; -use external_sort::ext_sort; +use ext_sort::ext_sort; use fnv::FnvHasher; use itertools::Itertools; use numeric_str_cmp::{numeric_str_cmp, NumInfo, NumInfoParseSettings}; @@ -30,18 +33,15 @@ use rand::{thread_rng, Rng}; use rayon::prelude::*; use semver::Version; use std::cmp::Ordering; -use std::collections::BinaryHeap; use std::env; use std::ffi::OsStr; use std::fs::File; use std::hash::{Hash, Hasher}; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; -use std::mem::replace; use std::ops::Range; use std::path::Path; use std::path::PathBuf; use unicode_width::UnicodeWidthStr; -use uucore::fs::is_stdin_interactive; // for Iterator::dedup() use uucore::InvalidEncodingHandling; static NAME: &str = "sort"; @@ -150,6 +150,19 @@ impl GlobalSettings { }; num_usize * suf_usize } + + fn out_writer(&self) -> BufWriter> { + match self.outfile { + Some(ref filename) => match File::create(Path::new(&filename)) { + Ok(f) => BufWriter::new(Box::new(f) as Box), + Err(e) => { + show_error!("{0}: {1}", filename, e.to_string()); + panic!("Could not open output file"); + } + }, + None => BufWriter::new(Box::new(stdout()) as Box), + } + } } impl Default for GlobalSettings { @@ -205,29 +218,7 @@ impl From<&GlobalSettings> for KeySettings { } } -#[derive(Debug, Clone)] -/// Represents the string selected by a FieldSelector. -struct SelectionRange { - range: Range, -} - -impl SelectionRange { - fn new(range: Range) -> Self { - Self { range } - } - - /// Gets the actual string slice represented by this Selection. - fn get_str<'a>(&self, line: &'a str) -> &'a str { - &line[self.range.to_owned()] - } - - fn shorten(&mut self, new_range: Range) { - self.range.end = self.range.start + new_range.end; - self.range.start += new_range.start; - } -} - -#[derive(Clone)] +#[derive(Clone, Debug)] enum NumCache { AsF64(GeneralF64ParseResult), WithInfo(NumInfo), @@ -248,64 +239,53 @@ impl NumCache { } } -#[derive(Clone)] -struct Selection { - range: SelectionRange, +#[derive(Clone, Debug)] +struct Selection<'a> { + slice: &'a str, num_cache: Option>, } -impl Selection { - /// Gets the actual string slice represented by this Selection. - fn get_str<'a>(&'a self, line: &'a Line) -> &'a str { - self.range.get_str(&line.line) - } -} - type Field = Range; -#[derive(Clone)] -pub struct Line { - line: Box, - // The common case is not to specify fields. Let's make this fast. - first_selection: Selection, - other_selections: Box<[Selection]>, +#[derive(Clone, Debug)] +pub struct Line<'a> { + line: &'a str, + selections: Box<[Selection<'a>]>, } -impl Line { - /// Estimate the number of bytes that this Line is occupying - pub fn estimate_size(&self) -> usize { - self.line.len() - + self.other_selections.len() * std::mem::size_of::() - + std::mem::size_of::() - } - - pub fn new(line: String, settings: &GlobalSettings) -> Self { +impl<'a> Line<'a> { + fn create(string: &'a str, settings: &GlobalSettings) -> Self { let fields = if settings .selectors .iter() - .any(|selector| selector.needs_tokens()) + .any(|selector| selector.needs_tokens) { // Only tokenize if we will need tokens. - Some(tokenize(&line, settings.separator)) + Some(tokenize(string, settings.separator)) } else { None }; - let mut selectors = settings.selectors.iter(); + Line { + line: string, + selections: settings + .selectors + .iter() + .filter(|selector| !selector.is_default_selection) + .map(|selector| selector.get_selection(string, fields.as_deref())) + .collect(), + } + } - let first_selection = selectors - .next() - .unwrap() - .get_selection(&line, fields.as_deref()); - - let other_selections: Vec = selectors - .map(|selector| selector.get_selection(&line, fields.as_deref())) - .collect(); - - Self { - line: line.into_boxed_str(), - first_selection, - other_selections: other_selections.into_boxed_slice(), + fn print(&self, writer: &mut impl Write, settings: &GlobalSettings) { + if settings.zero_terminated && !settings.debug { + crash_if_err!(1, writer.write_all(self.line.as_bytes())); + crash_if_err!(1, writer.write_all("\0".as_bytes())); + } else if !settings.debug { + crash_if_err!(1, writer.write_all(self.line.as_bytes())); + crash_if_err!(1, writer.write_all("\n".as_bytes())); + } else { + crash_if_err!(1, self.print_debug(settings, writer)); } } @@ -314,7 +294,7 @@ impl Line { fn print_debug( &self, settings: &GlobalSettings, - writer: &mut dyn Write, + writer: &mut impl Write, ) -> std::io::Result<()> { // We do not consider this function performance critical, as debug output is only useful for small files, // which are not a performance problem in any case. Therefore there aren't any special performance @@ -575,23 +555,39 @@ struct FieldSelector { from: KeyPosition, to: Option, settings: KeySettings, + needs_tokens: bool, + // Whether the selection for each line is going to be the whole line with no NumCache + is_default_selection: bool, } impl FieldSelector { - fn needs_tokens(&self) -> bool { - self.from.field != 1 || self.from.char == 0 || self.to.is_some() + fn new(from: KeyPosition, to: Option, settings: KeySettings) -> Self { + Self { + is_default_selection: from.field == 1 + && from.char == 1 + && to.is_none() + // TODO: Once our MinRustV is 1.42 or higher, change this to the matches! macro + && match settings.mode { + SortMode::Numeric | SortMode::GeneralNumeric | SortMode::HumanNumeric => false, + _ => true, + }, + needs_tokens: from.field != 1 || from.char == 0 || to.is_some(), + from, + to, + settings, + } } /// Get the selection that corresponds to this selector for the line. /// If needs_fields returned false, tokens may be None. - fn get_selection(&self, line: &str, tokens: Option<&[Field]>) -> Selection { - let mut range = SelectionRange::new(self.get_range(&line, tokens)); + fn get_selection<'a>(&self, line: &'a str, tokens: Option<&[Field]>) -> Selection<'a> { + let mut range = &line[self.get_range(&line, tokens)]; let num_cache = if self.settings.mode == SortMode::Numeric || self.settings.mode == SortMode::HumanNumeric { // Parse NumInfo for this number. let (info, num_range) = NumInfo::parse( - range.get_str(&line), + range, NumInfoParseSettings { accept_si_units: self.settings.mode == SortMode::HumanNumeric, thousands_separator: Some(THOUSANDS_SEP), @@ -599,19 +595,21 @@ impl FieldSelector { }, ); // Shorten the range to what we need to pass to numeric_str_cmp later. - range.shorten(num_range); + range = &range[num_range]; Some(Box::new(NumCache::WithInfo(info))) } else if self.settings.mode == SortMode::GeneralNumeric { // Parse this number as f64, as this is the requirement for general numeric sorting. - let str = range.get_str(&line); Some(Box::new(NumCache::AsF64(general_f64_parse( - &str[get_leading_gen(str)], + &range[get_leading_gen(range)], )))) } else { // This is not a numeric sort, so we don't need a NumCache. None }; - Selection { range, num_cache } + Selection { + slice: range, + num_cache, + } } /// Look up the range in the line that corresponds to this selector. @@ -701,91 +699,6 @@ impl FieldSelector { } } -struct MergeableFile<'a> { - lines: Box + 'a>, - current_line: Line, - settings: &'a GlobalSettings, - file_index: usize, -} - -// BinaryHeap depends on `Ord`. Note that we want to pop smallest items -// from the heap first, and BinaryHeap.pop() returns the largest, so we -// trick it into the right order by calling reverse() here. -impl<'a> Ord for MergeableFile<'a> { - fn cmp(&self, other: &MergeableFile) -> Ordering { - let comparison = compare_by(&self.current_line, &other.current_line, self.settings); - if comparison == Ordering::Equal { - // If lines are equal, the earlier file takes precedence. - self.file_index.cmp(&other.file_index) - } else { - comparison - } - .reverse() - } -} - -impl<'a> PartialOrd for MergeableFile<'a> { - fn partial_cmp(&self, other: &MergeableFile) -> Option { - Some(self.cmp(other)) - } -} - -impl<'a> PartialEq for MergeableFile<'a> { - fn eq(&self, other: &MergeableFile) -> bool { - Ordering::Equal == self.cmp(other) - } -} - -impl<'a> Eq for MergeableFile<'a> {} - -struct FileMerger<'a> { - heap: BinaryHeap>, - settings: &'a GlobalSettings, -} - -impl<'a> FileMerger<'a> { - fn new(settings: &'a GlobalSettings) -> FileMerger<'a> { - FileMerger { - heap: BinaryHeap::new(), - settings, - } - } - fn push_file(&mut self, mut lines: Box + 'a>) { - if let Some(next_line) = lines.next() { - let mergeable_file = MergeableFile { - lines, - current_line: next_line, - settings: &self.settings, - file_index: self.heap.len(), - }; - self.heap.push(mergeable_file); - } - } -} - -impl<'a> Iterator for FileMerger<'a> { - type Item = Line; - fn next(&mut self) -> Option { - match self.heap.pop() { - Some(mut current) => { - match current.lines.next() { - Some(next_line) => { - let ret = replace(&mut current.current_line, next_line); - self.heap.push(current); - Some(ret) - } - _ => { - // Don't put it back in the heap (it's empty/erroring) - // but its first line is still valid. - Some(current.current_line) - } - } - } - None => None, - } - } -} - fn get_usage() -> String { format!( "{0} {1} @@ -985,7 +898,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let mut files = Vec::new(); for path in &files0_from { - let (reader, _) = open(path.as_str()).expect("Could not read from file specified."); + let reader = open(path.as_str()).expect("Could not read from file specified."); let buf_reader = BufReader::new(reader); for line in buf_reader.split(b'\0').flatten() { files.push( @@ -1112,11 +1025,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let to = from_to .next() .map(|to| KeyPosition::parse(to, 0, &mut key_settings)); - let field_selector = FieldSelector { - from, - to, - settings: key_settings, - }; + let field_selector = FieldSelector::new(from, to, key_settings); settings.selectors.push(field_selector); } } @@ -1124,48 +1033,21 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if !settings.stable || !matches.is_present(OPT_KEY) { // add a default selector matching the whole line let key_settings = KeySettings::from(&settings); - settings.selectors.push(FieldSelector { - from: KeyPosition { + settings.selectors.push(FieldSelector::new( + KeyPosition { field: 1, char: 1, ignore_blanks: key_settings.ignore_blanks, }, - to: None, - settings: key_settings, - }); + None, + key_settings, + )); } - exec(files, settings) + exec(&files, &settings) } -fn file_to_lines_iter( - file: impl AsRef, - settings: &'_ GlobalSettings, -) -> Option + '_> { - let (reader, _) = match open(file) { - Some(x) => x, - None => return None, - }; - - let buf_reader = BufReader::new(reader); - - Some( - buf_reader - .split(if settings.zero_terminated { - b'\0' - } else { - b'\n' - }) - .map(move |line| { - Line::new( - crash_if_err!(1, String::from_utf8(crash_if_err!(1, line))), - settings, - ) - }), - ) -} - -fn output_sorted_lines(iter: impl Iterator, settings: &GlobalSettings) { +fn output_sorted_lines<'a>(iter: impl Iterator>, settings: &GlobalSettings) { if settings.unique { print_sorted( iter.dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal), @@ -1176,87 +1058,48 @@ fn output_sorted_lines(iter: impl Iterator, settings: &GlobalSettin } } -fn exec(files: Vec, settings: GlobalSettings) -> i32 { +fn exec(files: &[String], settings: &GlobalSettings) -> i32 { if settings.merge { - let mut file_merger = FileMerger::new(&settings); - for lines in files - .iter() - .filter_map(|file| file_to_lines_iter(file, &settings)) - { - file_merger.push_file(Box::new(lines)); + let mut file_merger = merge::merge(files, settings); + file_merger.write_all(settings); + } else if settings.check { + if files.len() > 1 { + crash!(1, "only one file allowed with -c"); } - output_sorted_lines(file_merger, &settings); + return check::check(files.first().unwrap(), settings); + } else if settings.ext_sort { + let mut lines = files.iter().filter_map(open); + + let mut sorted = ext_sort(&mut lines, &settings); + sorted.file_merger.write_all(settings); } else { - let lines = files - .iter() - .filter_map(|file| file_to_lines_iter(file, &settings)) - .flatten(); + let separator = if settings.zero_terminated { '\0' } else { '\n' }; + let mut lines = vec![]; + let mut full_string = String::new(); - if settings.check { - return exec_check_file(lines, &settings); - } + for mut file in files.iter().filter_map(open) { + crash_if_err!(1, file.read_to_string(&mut full_string)); - // Only use ext_sorter when we need to. - // Probably faster that we don't create - // an owned value each run - if settings.ext_sort { - let sorted_lines = ext_sort(lines, &settings); - output_sorted_lines(sorted_lines, &settings); - } else { - let mut lines = vec![]; - - // This is duplicated from fn file_to_lines_iter, but using that function directly results in a performance regression. - for (file, _) in files.iter().map(open).flatten() { - let buf_reader = BufReader::new(file); - for line in buf_reader.split(if settings.zero_terminated { - b'\0' - } else { - b'\n' - }) { - let string = crash_if_err!(1, String::from_utf8(crash_if_err!(1, line))); - lines.push(Line::new(string, &settings)); - } + if !full_string.ends_with(separator) { + full_string.push(separator); } - - sort_by(&mut lines, &settings); - output_sorted_lines(lines.into_iter(), &settings); } - } + if full_string.ends_with(separator) { + full_string.pop(); + } + + for line in full_string.split(if settings.zero_terminated { '\0' } else { '\n' }) { + lines.push(Line::create(line, &settings)); + } + + sort_by(&mut lines, &settings); + output_sorted_lines(lines.into_iter(), &settings); + } 0 } -fn exec_check_file(unwrapped_lines: impl Iterator, settings: &GlobalSettings) -> i32 { - // errors yields the line before each disorder, - // plus the last line (quirk of .coalesce()) - let mut errors = unwrapped_lines - .enumerate() - .coalesce(|(last_i, last_line), (i, line)| { - if compare_by(&last_line, &line, &settings) == Ordering::Greater { - Err(((last_i, last_line), (i, line))) - } else { - Ok((i, line)) - } - }); - if let Some((first_error_index, _line)) = errors.next() { - // Check for a second "error", as .coalesce() always returns the last - // line, no matter what our merging function does. - if let Some(_last_line_or_next_error) = errors.next() { - if !settings.check_silent { - println!("sort: disorder in line {}", first_error_index); - }; - 1 - } else { - // first "error" was actually the last line. - 0 - } - } else { - // unwrapped_lines was empty. Empty files are defined to be sorted. - 0 - } -} - -fn sort_by(unsorted: &mut Vec, settings: &GlobalSettings) { +fn sort_by<'a>(unsorted: &mut Vec>, settings: &GlobalSettings) { if settings.stable || settings.unique { unsorted.par_sort_by(|a, b| compare_by(a, b, &settings)) } else { @@ -1264,19 +1107,39 @@ fn sort_by(unsorted: &mut Vec, settings: &GlobalSettings) { } } -fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering { - for (idx, selector) in global_settings.selectors.iter().enumerate() { - let (a_selection, b_selection) = if idx == 0 { - (&a.first_selection, &b.first_selection) +fn compare_by<'a>(a: &Line<'a>, b: &Line<'a>, global_settings: &GlobalSettings) -> Ordering { + let mut idx = 0; + for selector in &global_settings.selectors { + let mut _selections = None; + let (a_selection, b_selection) = if selector.is_default_selection { + // We can select the whole line. + // We have to store the selections outside of the if-block so that they live long enough. + _selections = Some(( + Selection { + slice: a.line, + num_cache: None, + }, + Selection { + slice: b.line, + num_cache: None, + }, + )); + // Unwrap the selections again, and return references to them. + ( + &_selections.as_ref().unwrap().0, + &_selections.as_ref().unwrap().1, + ) } else { - (&a.other_selections[idx - 1], &b.other_selections[idx - 1]) + let selections = (&a.selections[idx], &b.selections[idx]); + idx += 1; + selections }; - let a_str = a_selection.get_str(a); - let b_str = b_selection.get_str(b); + let a_str = a_selection.slice; + let b_str = b_selection.slice; let settings = &selector.settings; let cmp: Ordering = if settings.random { - random_shuffle(a_str, b_str, global_settings.salt.clone()) + random_shuffle(a_str, b_str, &global_settings.salt) } else { match settings.mode { SortMode::Numeric | SortMode::HumanNumeric => numeric_str_cmp( @@ -1307,7 +1170,7 @@ fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering let cmp = if global_settings.random || global_settings.stable || global_settings.unique { Ordering::Equal } else { - a.line.cmp(&b.line) + a.line.cmp(b.line) }; if global_settings.reverse { @@ -1362,7 +1225,7 @@ fn get_leading_gen(input: &str) -> Range { leading_whitespace_len..input.len() } -#[derive(Copy, Clone, PartialEq, PartialOrd)] +#[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] enum GeneralF64ParseResult { Invalid, NaN, @@ -1408,12 +1271,11 @@ fn get_hash(t: &T) -> u64 { s.finish() } -fn random_shuffle(a: &str, b: &str, x: String) -> Ordering { +fn random_shuffle(a: &str, b: &str, salt: &str) -> Ordering { #![allow(clippy::comparison_chain)] - let salt_slice = x.as_str(); - let da = get_hash(&[a, salt_slice].concat()); - let db = get_hash(&[b, salt_slice].concat()); + let da = get_hash(&[a, salt].concat()); + let db = get_hash(&[b, salt].concat()); da.cmp(&db) } @@ -1504,45 +1366,23 @@ fn version_compare(a: &str, b: &str) -> Ordering { } } -fn print_sorted>(iter: T, settings: &GlobalSettings) { - let mut file: Box = match settings.outfile { - Some(ref filename) => match File::create(Path::new(&filename)) { - Ok(f) => Box::new(BufWriter::new(f)) as Box, - Err(e) => { - show_error!("{0}: {1}", filename, e.to_string()); - panic!("Could not open output file"); - } - }, - None => Box::new(BufWriter::new(stdout())) as Box, - }; - if settings.zero_terminated && !settings.debug { - for line in iter { - crash_if_err!(1, file.write_all(line.line.as_bytes())); - crash_if_err!(1, file.write_all("\0".as_bytes())); - } - } else { - for line in iter { - if !settings.debug { - crash_if_err!(1, file.write_all(line.line.as_bytes())); - crash_if_err!(1, file.write_all("\n".as_bytes())); - } else { - crash_if_err!(1, line.print_debug(settings, &mut file)); - } - } +fn print_sorted<'a, T: Iterator>>(iter: T, settings: &GlobalSettings) { + let mut writer = settings.out_writer(); + for line in iter { + line.print(&mut writer, settings); } - crash_if_err!(1, file.flush()); } // from cat.rs -fn open(path: impl AsRef) -> Option<(Box, bool)> { +fn open(path: impl AsRef) -> Option> { let path = path.as_ref(); if path == "-" { let stdin = stdin(); - return Some((Box::new(stdin) as Box, is_stdin_interactive())); + return Some(Box::new(stdin) as Box); } match File::open(Path::new(path)) { - Ok(f) => Some((Box::new(f) as Box, false)), + Ok(f) => Some(Box::new(f) as Box), Err(e) => { show_error!("{0:?}: {1}", path, e.to_string()); None @@ -1568,7 +1408,7 @@ mod tests { let b = "Ted"; let c = get_rand_string(); - assert_eq!(Ordering::Equal, random_shuffle(a, b, c)); + assert_eq!(Ordering::Equal, random_shuffle(a, b, &c)); } #[test] @@ -1592,7 +1432,7 @@ mod tests { let b = "9"; let c = get_rand_string(); - assert_eq!(Ordering::Equal, random_shuffle(a, b, c)); + assert_eq!(Ordering::Equal, random_shuffle(a, b, &c)); } #[test] @@ -1631,10 +1471,12 @@ mod tests { fn test_line_size() { // We should make sure to not regress the size of the Line struct because // it is unconditional overhead for every line we sort. - assert_eq!(std::mem::size_of::(), 56); + assert_eq!(std::mem::size_of::(), 32); // These are the fields of Line: - assert_eq!(std::mem::size_of::>(), 16); - assert_eq!(std::mem::size_of::(), 24); + assert_eq!(std::mem::size_of::<&str>(), 16); assert_eq!(std::mem::size_of::>(), 16); + + // How big is a selection? Constant cost all lines pay when we need selections. + assert_eq!(std::mem::size_of::(), 24); } } diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index bad9d577e..e89d18054 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -122,7 +122,7 @@ fn test_check_zero_terminated_failure() { .arg("-c") .arg("zero-terminated.txt") .fails() - .stdout_is("sort: disorder in line 0\n"); + .stdout_is("sort: zero-terminated.txt:2: disorder: ../../fixtures/du\n"); } #[test] @@ -621,7 +621,7 @@ fn test_check() { .arg("-c") .arg("check_fail.txt") .fails() - .stdout_is("sort: disorder in line 4\n"); + .stdout_is("sort: check_fail.txt:6: disorder: 5\n"); new_ucmd!() .arg("-c") From 2f84f59573128927b5f8ffd9cad6a09ef895ffad Mon Sep 17 00:00:00 2001 From: Chad Brewbaker Date: Sun, 16 May 2021 19:43:53 -0500 Subject: [PATCH 062/148] fixing regex to take negative time offsets --- tests/by-util/test_ls.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 0985ba719..a57525e4b 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -685,7 +685,7 @@ fn test_ls_styles() { at.touch("test"); let re_full = Regex::new( - r"[a-z-]* \d* \w* \w* \d* \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d* \+\d{4} test\n", + r"[a-z-]* \d* \w* \w* \d* \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d* (\+|\-)\d{4} test\n", ) .unwrap(); let re_long = From eeef8290df9e5ea24eb8ca4ae5fb3e0fc40576b3 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sun, 16 May 2021 21:21:20 -0400 Subject: [PATCH 063/148] head: display errors for each input file Change the behavior of `head` to display an error for each problematic file, instead of displaying an error message for the first problematic file and terminating immediately at that point. This change now matches the behavior of GNU `head`. Before this commit, the first error caused the program to terminate immediately: $ head a b c head: error: head: cannot open 'a' for reading: No such file or directory After this commit: $ head a b c head: cannot open 'a' for reading: No such file or directory head: cannot open 'b' for reading: No such file or directory head: cannot open 'c' for reading: No such file or directory --- src/uu/head/src/head.rs | 73 ++++++++++++++++++-------------------- tests/by-util/test_head.rs | 12 +++++++ tests/common/util.rs | 7 +++- 3 files changed, 52 insertions(+), 40 deletions(-) diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index faaeedd3f..0c8b3bc88 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -2,7 +2,7 @@ use clap::{App, Arg}; use std::convert::TryFrom; use std::ffi::OsString; use std::io::{self, ErrorKind, Read, Seek, SeekFrom, Write}; -use uucore::{crash, executable, show_error}; +use uucore::{crash, executable, show_error, show_error_custom_description}; const EXIT_FAILURE: i32 = 1; const EXIT_SUCCESS: i32 = 0; @@ -400,7 +400,8 @@ fn head_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Resul } } -fn uu_head(options: &HeadOptions) { +fn uu_head(options: &HeadOptions) -> Result<(), u32> { + let mut error_count = 0; let mut first = true; for fname in &options.files { let res = match fname.as_str() { @@ -433,30 +434,22 @@ fn uu_head(options: &HeadOptions) { name => { let mut file = match std::fs::File::open(name) { Ok(f) => f, - Err(err) => match err.kind() { - ErrorKind::NotFound => { - crash!( - EXIT_FAILURE, - "head: cannot open '{}' for reading: No such file or directory", - name - ); + Err(err) => { + let prefix = format!("cannot open '{}' for reading", name); + match err.kind() { + ErrorKind::NotFound => { + show_error_custom_description!(prefix, "No such file or directory"); + } + ErrorKind::PermissionDenied => { + show_error_custom_description!(prefix, "Permission denied"); + } + _ => { + show_error_custom_description!(prefix, "{}", err); + } } - ErrorKind::PermissionDenied => { - crash!( - EXIT_FAILURE, - "head: cannot open '{}' for reading: Permission denied", - name - ); - } - _ => { - crash!( - EXIT_FAILURE, - "head: cannot open '{}' for reading: {}", - name, - err - ); - } - }, + error_count += 1; + continue; + } }; if (options.files.len() > 1 && !options.quiet) || options.verbose { if !first { @@ -468,21 +461,22 @@ fn uu_head(options: &HeadOptions) { } }; if res.is_err() { - if fname.as_str() == "-" { - crash!( - EXIT_FAILURE, - "head: error reading standard input: Input/output error" - ); + let name = if fname.as_str() == "-" { + "standard input" } else { - crash!( - EXIT_FAILURE, - "head: error reading {}: Input/output error", - fname - ); - } + fname + }; + let prefix = format!("error reading {}", name); + show_error_custom_description!(prefix, "Input/output error"); + error_count += 1; } first = false; } + if error_count > 0 { + Err(error_count) + } else { + Ok(()) + } } pub fn uumain(args: impl uucore::Args) -> i32 { @@ -492,9 +486,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 { crash!(EXIT_FAILURE, "head: {}", s); } }; - uu_head(&args); - - EXIT_SUCCESS + match uu_head(&args) { + Ok(_) => EXIT_SUCCESS, + Err(_) => EXIT_FAILURE, + } } #[cfg(test)] diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 2aedbdcbe..88df1f068 100755 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -162,6 +162,18 @@ fn test_no_such_file_or_directory() { .stderr_contains("cannot open 'no_such_file.toml' for reading: No such file or directory"); } +/// Test that each non-existent files gets its own error message printed. +#[test] +fn test_multiple_nonexistent_files() { + new_ucmd!() + .args(&["bogusfile1", "bogusfile2"]) + .fails() + .stdout_does_not_contain("==> bogusfile1 <==") + .stderr_contains("cannot open 'bogusfile1' for reading: No such file or directory") + .stdout_does_not_contain("==> bogusfile2 <==") + .stderr_contains("cannot open 'bogusfile2' for reading: No such file or directory"); +} + // there was a bug not caught by previous tests // where for negative n > 3, the total amount of lines // was correct, but it would eat from the second line diff --git a/tests/common/util.rs b/tests/common/util.rs index 719849afc..611baadd4 100644 --- a/tests/common/util.rs +++ b/tests/common/util.rs @@ -315,7 +315,12 @@ impl CmdResult { } pub fn stdout_does_not_contain>(&self, cmp: T) -> &CmdResult { - assert!(!self.stdout_str().contains(cmp.as_ref())); + assert!( + !self.stdout_str().contains(cmp.as_ref()), + "'{}' contains '{}' but should not", + self.stdout_str(), + cmp.as_ref(), + ); self } From c68c83c6ddc8b629d178b47ba6d7a6f987f54817 Mon Sep 17 00:00:00 2001 From: nicoo Date: Thu, 29 Apr 2021 13:50:31 +0200 Subject: [PATCH 064/148] factor::table: Take mutable refs This will be easier to adapt to working with multiple numbers to process at once. --- src/uu/factor/src/factor.rs | 2 +- src/uu/factor/src/table.rs | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index ebe06a1c5..f53abd772 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -161,7 +161,7 @@ pub fn factor(mut n: u64) -> Factors { return factors; } - let (factors, n) = table::factor(n, factors); + table::factor(&mut n, &mut factors); #[allow(clippy::let_and_return)] let r = if n < (1 << 32) { diff --git a/src/uu/factor/src/table.rs b/src/uu/factor/src/table.rs index 94ad6df4c..cbd4af5e4 100644 --- a/src/uu/factor/src/table.rs +++ b/src/uu/factor/src/table.rs @@ -8,15 +8,13 @@ // spell-checker: ignore (ToDO) INVS -use std::num::Wrapping; - use crate::Factors; include!(concat!(env!("OUT_DIR"), "/prime_table.rs")); -pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { +pub(crate) fn factor(num: &mut u64, factors: &mut Factors) { for &(prime, inv, ceil) in P_INVS_U64 { - if num == 1 { + if *num == 1 { break; } @@ -27,11 +25,11 @@ pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { // for a nice explanation. let mut k = 0; loop { - let Wrapping(x) = Wrapping(num) * Wrapping(inv); + let x = num.wrapping_mul(inv); // While prime divides num if x <= ceil { - num = x; + *num = x; k += 1; #[cfg(feature = "coz")] coz::progress!("factor found"); @@ -43,6 +41,4 @@ pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { } } } - - (factors, num) } From cd047425aaefbf9ea4bb059651dffddbcbace251 Mon Sep 17 00:00:00 2001 From: nicoo Date: Thu, 29 Apr 2021 14:15:40 +0200 Subject: [PATCH 065/148] factor::table: Add chunked implementation and microbenchmarks The factor_chunk implementation is a strawman, but getting it in place allows us to set up the microbenchmarking etc. --- src/uu/factor/Cargo.toml | 5 ++++ src/uu/factor/benches/table.rs | 44 ++++++++++++++++++++++++++++++++++ src/uu/factor/src/cli.rs | 4 ++-- src/uu/factor/src/table.rs | 9 ++++++- 4 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 src/uu/factor/benches/table.rs diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml index c4e7e8469..cb77c5d19 100644 --- a/src/uu/factor/Cargo.toml +++ b/src/uu/factor/Cargo.toml @@ -23,6 +23,7 @@ uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } [dev-dependencies] +array-init = "2.0.0" criterion = "0.3" paste = "0.1.18" quickcheck = "0.9.2" @@ -32,6 +33,10 @@ rand_chacha = "0.2.2" name = "gcd" harness = false +[[bench]] +name = "table" +harness = false + [[bin]] name = "factor" path = "src/main.rs" diff --git a/src/uu/factor/benches/table.rs b/src/uu/factor/benches/table.rs new file mode 100644 index 000000000..8fae7cef6 --- /dev/null +++ b/src/uu/factor/benches/table.rs @@ -0,0 +1,44 @@ +use array_init::array_init; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use uu_factor::{table::*, Factors}; + +fn table(c: &mut Criterion) { + let inputs = { + // Deterministic RNG; use an explicitely-named RNG to guarantee stability + use rand::{RngCore, SeedableRng}; + use rand_chacha::ChaCha8Rng; + const SEED: u64 = 0xdead_bebe_ea75_cafe; + let mut rng = ChaCha8Rng::seed_from_u64(SEED); + + std::iter::repeat_with(move || array_init(|_| rng.next_u64())) + }; + + let mut group = c.benchmark_group("table"); + for a in inputs.take(10) { + let a_str = format!("{:?}", a); + group.bench_with_input( + BenchmarkId::from_parameter("chunked_".to_owned() + &a_str), + &a, + |b, &a| { + b.iter(|| factor_chunk(&mut a.clone(), &mut array_init(|_| Factors::one()))); + }, + ); + group.bench_with_input( + BenchmarkId::from_parameter("seq_".to_owned() + &a_str), + &a, + |b, &a| { + b.iter(|| { + let mut n_s = a.clone(); + let mut f_s: [_; CHUNK_SIZE] = array_init(|_| Factors::one()); + for (n, f) in n_s.iter_mut().zip(f_s.iter_mut()) { + factor(n, f) + } + }) + }, + ); + } + group.finish() +} + +criterion_group!(benches, table); +criterion_main!(benches); diff --git a/src/uu/factor/src/cli.rs b/src/uu/factor/src/cli.rs index fb7b3f192..ee4c8a4c4 100644 --- a/src/uu/factor/src/cli.rs +++ b/src/uu/factor/src/cli.rs @@ -13,13 +13,13 @@ use std::error::Error; use std::io::{self, stdin, stdout, BufRead, Write}; mod factor; -pub(crate) use factor::*; +pub use factor::*; use uucore::InvalidEncodingHandling; mod miller_rabin; pub mod numeric; mod rho; -mod table; +pub mod table; static SYNTAX: &str = "[OPTION] [NUMBER]..."; static SUMMARY: &str = "Print the prime factors of the given number(s). diff --git a/src/uu/factor/src/table.rs b/src/uu/factor/src/table.rs index cbd4af5e4..72628054c 100644 --- a/src/uu/factor/src/table.rs +++ b/src/uu/factor/src/table.rs @@ -12,7 +12,7 @@ use crate::Factors; include!(concat!(env!("OUT_DIR"), "/prime_table.rs")); -pub(crate) fn factor(num: &mut u64, factors: &mut Factors) { +pub fn factor(num: &mut u64, factors: &mut Factors) { for &(prime, inv, ceil) in P_INVS_U64 { if *num == 1 { break; @@ -42,3 +42,10 @@ pub(crate) fn factor(num: &mut u64, factors: &mut Factors) { } } } + +pub const CHUNK_SIZE: usize = 4; +pub fn factor_chunk(n_s: &mut [u64; CHUNK_SIZE], f_s: &mut [Factors; CHUNK_SIZE]) { + for (n, s) in n_s.iter_mut().zip(f_s.iter_mut()) { + factor(n, s); + } +} From 1fd5f9da25d9ce7be5206e5b1eabc0364064cdfb Mon Sep 17 00:00:00 2001 From: nicoo Date: Thu, 29 Apr 2021 14:29:59 +0200 Subject: [PATCH 066/148] factor::table::factor_chunk: Turn loop inside-out This keeps the traversal of `P_INVS_U64` (a large table) to a single pass in-order, rather than `CHUNK_SIZE` passes. --- src/uu/factor/src/table.rs | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/uu/factor/src/table.rs b/src/uu/factor/src/table.rs index 72628054c..45464ac27 100644 --- a/src/uu/factor/src/table.rs +++ b/src/uu/factor/src/table.rs @@ -45,7 +45,30 @@ pub fn factor(num: &mut u64, factors: &mut Factors) { pub const CHUNK_SIZE: usize = 4; pub fn factor_chunk(n_s: &mut [u64; CHUNK_SIZE], f_s: &mut [Factors; CHUNK_SIZE]) { - for (n, s) in n_s.iter_mut().zip(f_s.iter_mut()) { - factor(n, s); + for &(prime, inv, ceil) in P_INVS_U64 { + if n_s[0] == 1 && n_s[1] == 1 && n_s[2] == 1 && n_s[3] == 1 { + break; + } + + for (num, factors) in n_s.iter_mut().zip(f_s.iter_mut()) { + if *num == 1 { + continue; + } + let mut k = 0; + loop { + let x = num.wrapping_mul(inv); + + // While prime divides num + if x <= ceil { + *num = x; + k += 1; + } else { + if k > 0 { + factors.add(prime, k); + } + break; + } + } + } } } From 7c287542c7cd436520b3b07f124c6dcdf69e9f36 Mon Sep 17 00:00:00 2001 From: nicoo Date: Thu, 29 Apr 2021 15:45:04 +0200 Subject: [PATCH 067/148] factor::table: Fixup microbenchmark Previous version would perform an amount of work proportional to `CHUNK_SIZE`, so this wasn't a valid way to benchmark at multiple values of that constant. The `TryInto` implementation for `&mut [T]` to `&mut [T; N]` relies on `const` generics, and is available in (stable) Rust v1.51 and later. --- src/uu/factor/benches/table.rs | 20 +++++++++++++++++--- src/uu/factor/src/table.rs | 2 +- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/uu/factor/benches/table.rs b/src/uu/factor/benches/table.rs index 8fae7cef6..ad8036d67 100644 --- a/src/uu/factor/benches/table.rs +++ b/src/uu/factor/benches/table.rs @@ -1,8 +1,16 @@ +use std::convert::TryInto; use array_init::array_init; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use uu_factor::{table::*, Factors}; fn table(c: &mut Criterion) { + const INPUT_SIZE: usize = 128; + assert!( + INPUT_SIZE % CHUNK_SIZE == 0, + "INPUT_SIZE ({}) is not divisible by CHUNK_SIZE ({})", + INPUT_SIZE, + CHUNK_SIZE + ); let inputs = { // Deterministic RNG; use an explicitely-named RNG to guarantee stability use rand::{RngCore, SeedableRng}; @@ -10,7 +18,7 @@ fn table(c: &mut Criterion) { const SEED: u64 = 0xdead_bebe_ea75_cafe; let mut rng = ChaCha8Rng::seed_from_u64(SEED); - std::iter::repeat_with(move || array_init(|_| rng.next_u64())) + std::iter::repeat_with(move || array_init::<_, _, INPUT_SIZE>(|_| rng.next_u64())) }; let mut group = c.benchmark_group("table"); @@ -20,7 +28,13 @@ fn table(c: &mut Criterion) { BenchmarkId::from_parameter("chunked_".to_owned() + &a_str), &a, |b, &a| { - b.iter(|| factor_chunk(&mut a.clone(), &mut array_init(|_| Factors::one()))); + b.iter(|| { + let mut n_s = a.clone(); + let mut f_s: [_; INPUT_SIZE] = array_init(|_| Factors::one()); + for (n_s, f_s) in n_s.chunks_mut(CHUNK_SIZE).zip(f_s.chunks_mut(CHUNK_SIZE)) { + factor_chunk(n_s.try_into().unwrap(), f_s.try_into().unwrap()) + } + }) }, ); group.bench_with_input( @@ -29,7 +43,7 @@ fn table(c: &mut Criterion) { |b, &a| { b.iter(|| { let mut n_s = a.clone(); - let mut f_s: [_; CHUNK_SIZE] = array_init(|_| Factors::one()); + let mut f_s: [_; INPUT_SIZE] = array_init(|_| Factors::one()); for (n, f) in n_s.iter_mut().zip(f_s.iter_mut()) { factor(n, f) } diff --git a/src/uu/factor/src/table.rs b/src/uu/factor/src/table.rs index 45464ac27..db2698e4b 100644 --- a/src/uu/factor/src/table.rs +++ b/src/uu/factor/src/table.rs @@ -43,7 +43,7 @@ pub fn factor(num: &mut u64, factors: &mut Factors) { } } -pub const CHUNK_SIZE: usize = 4; +pub const CHUNK_SIZE: usize = 8; pub fn factor_chunk(n_s: &mut [u64; CHUNK_SIZE], f_s: &mut [Factors; CHUNK_SIZE]) { for &(prime, inv, ceil) in P_INVS_U64 { if n_s[0] == 1 && n_s[1] == 1 && n_s[2] == 1 && n_s[3] == 1 { From 12efaa6add6d5ceab0c9c73459088faa806ec82e Mon Sep 17 00:00:00 2001 From: nicoo Date: Mon, 3 May 2021 12:26:05 +0200 Subject: [PATCH 068/148] factor: Add BENCHMARKING.md --- src/uu/factor/BENCHMARKING.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 src/uu/factor/BENCHMARKING.md diff --git a/src/uu/factor/BENCHMARKING.md b/src/uu/factor/BENCHMARKING.md new file mode 100644 index 000000000..e93bed95e --- /dev/null +++ b/src/uu/factor/BENCHMARKING.md @@ -0,0 +1,12 @@ +# Benchmarking `factor` + +## Microbenchmarking deterministic functions + +We currently use [`criterion`] to benchmark deterministic functions, +such as `gcd` and `table::factor`. + +Those benchmarks can be simply executed with `cargo bench` as usual, +but may require a recent version of Rust, *i.e.* the project's minimum +supported version of Rust does not apply to the benchmarks. + +[`criterion`]: https://bheisler.github.io/criterion.rs/book/index.html From ae15bf16a83328c08fd77f31da40af192eeedb5e Mon Sep 17 00:00:00 2001 From: nicoo Date: Mon, 3 May 2021 14:42:26 +0200 Subject: [PATCH 069/148] factor::benches::table: Report throughput (in numbers/s) --- src/uu/factor/benches/table.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/uu/factor/benches/table.rs b/src/uu/factor/benches/table.rs index ad8036d67..44ea1c863 100644 --- a/src/uu/factor/benches/table.rs +++ b/src/uu/factor/benches/table.rs @@ -1,6 +1,6 @@ -use std::convert::TryInto; use array_init::array_init; -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use std::convert::TryInto; use uu_factor::{table::*, Factors}; fn table(c: &mut Criterion) { @@ -22,6 +22,7 @@ fn table(c: &mut Criterion) { }; let mut group = c.benchmark_group("table"); + group.throughput(Throughput::Elements(INPUT_SIZE as _)); for a in inputs.take(10) { let a_str = format!("{:?}", a); group.bench_with_input( From e9f8194266125f72d8bf99ab83e8562a21c9d048 Mon Sep 17 00:00:00 2001 From: nicoo Date: Mon, 3 May 2021 14:45:00 +0200 Subject: [PATCH 070/148] =?UTF-8?q?factor::benchmarking(doc):=20Add=20guid?= =?UTF-8?q?ance=20on=20running=20=C2=B5benches?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/uu/factor/BENCHMARKING.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/uu/factor/BENCHMARKING.md b/src/uu/factor/BENCHMARKING.md index e93bed95e..e87c965c2 100644 --- a/src/uu/factor/BENCHMARKING.md +++ b/src/uu/factor/BENCHMARKING.md @@ -9,4 +9,26 @@ Those benchmarks can be simply executed with `cargo bench` as usual, but may require a recent version of Rust, *i.e.* the project's minimum supported version of Rust does not apply to the benchmarks. + +However, µbenchmarks are by nature unstable: not only are they specific to +the hardware, operating system version, etc., but they are noisy and affected +by other tasks on the system (browser, compile jobs, etc.), which can cause +`criterion` to report spurious performance improvements and regressions. + +This can be mitigated by getting as close to [idealised conditions][lemire] +as possible: +- minimize the amount of computation and I/O running concurrently to the + benchmark, *i.e.* close your browser and IM clients, don't compile at the + same time, etc. ; +- ensure the CPU's [frequency stays constant] during the benchmark ; +- [isolate a **physical** core], set it to `nohz_full`, and pin the benchmark + to it, so it won't be preempted in the middle of a measurement ; +- disable ASLR by running `setarch -R cargo bench`, so we can compare results + across multiple executions. + **TODO**: check this propagates to the benchmark process + + [`criterion`]: https://bheisler.github.io/criterion.rs/book/index.html +[lemire]: https://lemire.me/blog/2018/01/16/microbenchmarking-calls-for-idealized-conditions/ +[isolate a **physical** core]: https://pyperf.readthedocs.io/en/latest/system.html#isolate-cpus-on-linux +[frequency stays constant]: XXXTODO From 1d75f09743a8fbe436da4d84701200f844cfae9f Mon Sep 17 00:00:00 2001 From: nicoo Date: Mon, 3 May 2021 14:45:24 +0200 Subject: [PATCH 071/148] =?UTF-8?q?factor::benchmarking(doc):=20Add=20guid?= =?UTF-8?q?ance=20on=20writing=20=C2=B5benches?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/uu/factor/BENCHMARKING.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/uu/factor/BENCHMARKING.md b/src/uu/factor/BENCHMARKING.md index e87c965c2..c629252b8 100644 --- a/src/uu/factor/BENCHMARKING.md +++ b/src/uu/factor/BENCHMARKING.md @@ -32,3 +32,34 @@ as possible: [lemire]: https://lemire.me/blog/2018/01/16/microbenchmarking-calls-for-idealized-conditions/ [isolate a **physical** core]: https://pyperf.readthedocs.io/en/latest/system.html#isolate-cpus-on-linux [frequency stays constant]: XXXTODO + + +### Guidance for designing µbenchmarks + +*Note:* this guidance is specific to `factor` and takes its application domain +into account; do not expect it to generalise to other projects. It is based +on Daniel Lemire's [*Microbenchmarking calls for idealized conditions*][lemire], +which I recommend reading if you want to add benchmarks to `factor`. + +1. Select a small, self-contained, deterministic component + `gcd` and `table::factor` are good example of such: + - no I/O or access to external data structures ; + - no call into other components ; + - behaviour is deterministic: no RNG, no concurrency, ... ; + - the test's body is *fast* (~100ns for `gcd`, ~10µs for `factor::table`), + so each sample takes a very short time, minimizing variability and + maximizing the numbers of samples we can take in a given time. + +2. Benchmarks are immutable (once merged in `uutils`) + Modifying a benchmark means previously-collected values cannot meaningfully + be compared, silently giving nonsensical results. If you must modify an + existing benchmark, rename it. + +3. Test common cases + We are interested in overall performance, rather than specific edge-cases; + use **reproducibly-randomised inputs**, sampling from either all possible + input values or some subset of interest. + +4. Use [`criterion`], `criterion::black_box`, ... + `criterion` isn't perfect, but it is also much better than ad-hoc + solutions in each benchmark. From ddfcd2eb14d8046c6246c753b00e2c0466e43c17 Mon Sep 17 00:00:00 2001 From: nicoo Date: Mon, 3 May 2021 15:04:06 +0200 Subject: [PATCH 072/148] factor::benchmarking: Add wishlist / planned work --- src/uu/factor/BENCHMARKING.md | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/uu/factor/BENCHMARKING.md b/src/uu/factor/BENCHMARKING.md index c629252b8..3ad038c15 100644 --- a/src/uu/factor/BENCHMARKING.md +++ b/src/uu/factor/BENCHMARKING.md @@ -63,3 +63,52 @@ which I recommend reading if you want to add benchmarks to `factor`. 4. Use [`criterion`], `criterion::black_box`, ... `criterion` isn't perfect, but it is also much better than ad-hoc solutions in each benchmark. + + +## Wishlist + +### Configurable statistical estimators + +`criterion` always uses the arithmetic average as estimator; in µbenchmarks, +where the code under test is fully deterministic and the measurements are +subject to additive, positive noise, [the minimum is more appropriate][lemire]. + + +### CI & reproducible performance testing + +Measuring performance on real hardware is important, as it relates directly +to what users of `factor` experience; however, such measurements are subject +to the constraints of the real-world, and aren't perfectly reproducible. +Moreover, the mitigations for it (described above) aren't achievable in +virtualized, multi-tenant environments such as CI. + +Instead, we could run the µbenchmarks in a simulated CPU with [`cachegrind`], +measure execution “time” in that model (in CI), and use it to detect and report +performance improvements and regressions. + +[`iai`] is an implementation of this idea for Rust. + +[`cachegrind`]: https://www.valgrind.org/docs/manual/cg-manual.html +[`iai`]: https://bheisler.github.io/criterion.rs/book/iai/iai.html + + +### Comparing randomised implementations across multiple inputs + +`factor` is a challenging target for system benchmarks as it combines two +characteristics: + +1. integer factoring algorithms are randomised, with large variance in + execution time ; + +2. various inputs also have large differences in factoring time, that + corresponds to no natural, linear ordering of the inputs. + + +If (1) was untrue (i.e. if execution time wasn't random), we could faithfully +compare 2 implementations (2 successive versions, or `uutils` and GNU) using +a scatter plot, where each axis corresponds to the perf. of one implementation. + +Similarly, without (2) we could plot numbers on the X axis and their factoring +time on the Y axis, using multiple lines for various quantiles. The large +differences in factoring times for successive numbers, mean that such a plot +would be unreadable. From 7c649bc74ecd425bdf15f5376979eeac973821c0 Mon Sep 17 00:00:00 2001 From: nicoo Date: Mon, 3 May 2021 16:14:38 +0200 Subject: [PATCH 073/148] factor::benches: Add check against ASLR --- src/uu/factor/BENCHMARKING.md | 1 - src/uu/factor/benches/table.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/uu/factor/BENCHMARKING.md b/src/uu/factor/BENCHMARKING.md index 3ad038c15..cf3bb35d0 100644 --- a/src/uu/factor/BENCHMARKING.md +++ b/src/uu/factor/BENCHMARKING.md @@ -25,7 +25,6 @@ as possible: to it, so it won't be preempted in the middle of a measurement ; - disable ASLR by running `setarch -R cargo bench`, so we can compare results across multiple executions. - **TODO**: check this propagates to the benchmark process [`criterion`]: https://bheisler.github.io/criterion.rs/book/index.html diff --git a/src/uu/factor/benches/table.rs b/src/uu/factor/benches/table.rs index 44ea1c863..232e59053 100644 --- a/src/uu/factor/benches/table.rs +++ b/src/uu/factor/benches/table.rs @@ -4,6 +4,9 @@ use std::convert::TryInto; use uu_factor::{table::*, Factors}; fn table(c: &mut Criterion) { + #[cfg(target_os = "linux")] + check_personality(); + const INPUT_SIZE: usize = 128; assert!( INPUT_SIZE % CHUNK_SIZE == 0, @@ -55,5 +58,29 @@ fn table(c: &mut Criterion) { group.finish() } +#[cfg(target_os = "linux")] +fn check_personality() { + use std::fs; + const ADDR_NO_RANDOMIZE: u64 = 0x0040000; + const PERSONALITY_PATH: &'static str = "/proc/self/personality"; + + let p_string = fs::read_to_string(PERSONALITY_PATH) + .expect(&format!("Couldn't read '{}'", PERSONALITY_PATH)) + .strip_suffix("\n") + .unwrap() + .to_owned(); + + let personality = u64::from_str_radix(&p_string, 16).expect(&format!( + "Expected a hex value for personality, got '{:?}'", + p_string + )); + if personality & ADDR_NO_RANDOMIZE == 0 { + eprintln!( + "WARNING: Benchmarking with ASLR enabled (personality is {:x}), results might not be reproducible.", + personality + ); + } +} + criterion_group!(benches, table); criterion_main!(benches); From 1cd001f529c80484f52175370972015abef425b9 Mon Sep 17 00:00:00 2001 From: nicoo Date: Sat, 8 May 2021 17:56:18 +0200 Subject: [PATCH 074/148] factor::benches::table: Match BenchmarkId w/ criterion's conventions See https://bheisler.github.io/criterion.rs/book/user_guide/comparing_functions.html --- src/uu/factor/benches/table.rs | 44 ++++++++++++++-------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/src/uu/factor/benches/table.rs b/src/uu/factor/benches/table.rs index 232e59053..0b31b2b4c 100644 --- a/src/uu/factor/benches/table.rs +++ b/src/uu/factor/benches/table.rs @@ -28,32 +28,24 @@ fn table(c: &mut Criterion) { group.throughput(Throughput::Elements(INPUT_SIZE as _)); for a in inputs.take(10) { let a_str = format!("{:?}", a); - group.bench_with_input( - BenchmarkId::from_parameter("chunked_".to_owned() + &a_str), - &a, - |b, &a| { - b.iter(|| { - let mut n_s = a.clone(); - let mut f_s: [_; INPUT_SIZE] = array_init(|_| Factors::one()); - for (n_s, f_s) in n_s.chunks_mut(CHUNK_SIZE).zip(f_s.chunks_mut(CHUNK_SIZE)) { - factor_chunk(n_s.try_into().unwrap(), f_s.try_into().unwrap()) - } - }) - }, - ); - group.bench_with_input( - BenchmarkId::from_parameter("seq_".to_owned() + &a_str), - &a, - |b, &a| { - b.iter(|| { - let mut n_s = a.clone(); - let mut f_s: [_; INPUT_SIZE] = array_init(|_| Factors::one()); - for (n, f) in n_s.iter_mut().zip(f_s.iter_mut()) { - factor(n, f) - } - }) - }, - ); + group.bench_with_input(BenchmarkId::new("factor_chunk", &a_str), &a, |b, &a| { + b.iter(|| { + let mut n_s = a.clone(); + let mut f_s: [_; INPUT_SIZE] = array_init(|_| Factors::one()); + for (n_s, f_s) in n_s.chunks_mut(CHUNK_SIZE).zip(f_s.chunks_mut(CHUNK_SIZE)) { + factor_chunk(n_s.try_into().unwrap(), f_s.try_into().unwrap()) + } + }) + }); + group.bench_with_input(BenchmarkId::new("factor", &a_str), &a, |b, &a| { + b.iter(|| { + let mut n_s = a.clone(); + let mut f_s: [_; INPUT_SIZE] = array_init(|_| Factors::one()); + for (n, f) in n_s.iter_mut().zip(f_s.iter_mut()) { + factor(n, f) + } + }) + }); } group.finish() } From 00322b986bfe9311985f64f3401112db12600813 Mon Sep 17 00:00:00 2001 From: nicoo Date: Mon, 17 May 2021 19:22:56 +0200 Subject: [PATCH 075/148] factor: Move benchmarks out-of-crate --- Cargo.toml | 3 +++ src/uu/factor/BENCHMARKING.md | 13 ++++++---- src/uu/factor/Cargo.toml | 18 +++---------- tests/benches/factor/Cargo.toml | 26 +++++++++++++++++++ .../benches}/factor/benches/gcd.rs | 0 .../benches}/factor/benches/table.rs | 0 6 files changed, 41 insertions(+), 19 deletions(-) create mode 100644 tests/benches/factor/Cargo.toml rename {src/uu => tests/benches}/factor/benches/gcd.rs (100%) rename {src/uu => tests/benches}/factor/benches/table.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index 7c1a771fd..745393260 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -324,6 +324,9 @@ wc = { optional=true, version="0.0.6", package="uu_wc", path="src/uu/wc" } who = { optional=true, version="0.0.6", package="uu_who", path="src/uu/who" } whoami = { optional=true, version="0.0.6", package="uu_whoami", path="src/uu/whoami" } yes = { optional=true, version="0.0.6", package="uu_yes", path="src/uu/yes" } + +factor_benches = { optional = true, version = "0.0.0", package = "uu_factor_benches", path = "tests/benches/factor" } + # # * pinned transitive dependencies # Not needed for now. Keep as examples: diff --git a/src/uu/factor/BENCHMARKING.md b/src/uu/factor/BENCHMARKING.md index cf3bb35d0..e174d62b7 100644 --- a/src/uu/factor/BENCHMARKING.md +++ b/src/uu/factor/BENCHMARKING.md @@ -1,15 +1,18 @@ # Benchmarking `factor` +The benchmarks for `factor` are located under `tests/benches/factor` +and can be invoked with `cargo bench` in that directory. + +They are located outside the `uu_factor` crate, as they do not comply +with the project's minimum supported Rust version, *i.e.* may require +a newer version of `rustc`. + + ## Microbenchmarking deterministic functions We currently use [`criterion`] to benchmark deterministic functions, such as `gcd` and `table::factor`. -Those benchmarks can be simply executed with `cargo bench` as usual, -but may require a recent version of Rust, *i.e.* the project's minimum -supported version of Rust does not apply to the benchmarks. - - However, µbenchmarks are by nature unstable: not only are they specific to the hardware, operating system version, etc., but they are noisy and affected by other tasks on the system (browser, compile jobs, etc.), which can cause diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml index cb77c5d19..eb34519f1 100644 --- a/src/uu/factor/Cargo.toml +++ b/src/uu/factor/Cargo.toml @@ -17,25 +17,15 @@ num-traits = "0.2.13" # used in src/numerics.rs, which is included by build.rs [dependencies] coz = { version = "0.1.3", optional = true } num-traits = "0.2.13" # Needs at least version 0.2.13 for "OverflowingAdd" -rand = { version="0.7", features=["small_rng"] } -smallvec = { version="0.6.14, < 1.0" } -uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } -uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } +rand = { version = "0.7", features = ["small_rng"] } +smallvec = { version = "0.6.14, < 1.0" } +uucore = { version = ">=0.0.8", package = "uucore", path = "../../uucore" } +uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" } [dev-dependencies] -array-init = "2.0.0" -criterion = "0.3" paste = "0.1.18" quickcheck = "0.9.2" -rand_chacha = "0.2.2" -[[bench]] -name = "gcd" -harness = false - -[[bench]] -name = "table" -harness = false [[bin]] name = "factor" diff --git a/tests/benches/factor/Cargo.toml b/tests/benches/factor/Cargo.toml new file mode 100644 index 000000000..b3b718477 --- /dev/null +++ b/tests/benches/factor/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "uu_factor_benches" +version = "0.0.0" +authors = ["nicoo "] +license = "MIT" +description = "Benchmarks for the uu_factor integer factorization tool" +homepage = "https://github.com/uutils/coreutils" +edition = "2018" + +[dependencies] +uu_factor = { path = "../../../src/uu/factor" } + +[dev-dependencies] +array-init = "2.0.0" +criterion = "0.3" +rand = "0.7" +rand_chacha = "0.2.2" + + +[[bench]] +name = "gcd" +harness = false + +[[bench]] +name = "table" +harness = false diff --git a/src/uu/factor/benches/gcd.rs b/tests/benches/factor/benches/gcd.rs similarity index 100% rename from src/uu/factor/benches/gcd.rs rename to tests/benches/factor/benches/gcd.rs diff --git a/src/uu/factor/benches/table.rs b/tests/benches/factor/benches/table.rs similarity index 100% rename from src/uu/factor/benches/table.rs rename to tests/benches/factor/benches/table.rs From 9afed1f25f244305a1e24fc17aae061d0bd58c75 Mon Sep 17 00:00:00 2001 From: nicoo Date: Mon, 17 May 2021 19:41:32 +0200 Subject: [PATCH 076/148] Update Cargo.lock Adding array-init v2.0.0 Updating cast v0.2.5 -> v0.2.6 Adding pest v2.1.3 Updating rustc_version v0.2.3 -> v0.3.3 Adding semver v0.11.0 Adding semver-parser v0.10.2 Updating serde v1.0.125 -> v1.0.126 Updating serde_derive v1.0.125 -> v1.0.126 Adding ucd-trie v0.1.3 Adding uu_factor_benches v0.0.0 (#tests/benches/factor) --- Cargo.lock | 77 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 77957de80..34e918b45 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "array-init" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6945cc5422176fc5e602e590c2878d2c2acd9a4fe20a4baa7c28022521698ec6" + [[package]] name = "arrayvec" version = "0.4.12" @@ -136,9 +142,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cast" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc38c385bfd7e444464011bb24820f40dd1c76bcdfa1b78611cb7c2e5cafab75" +checksum = "57cdfa5d50aad6cb4d44dcab6101a7f79925bd59d82ca42f38a9856a28865374" dependencies = [ "rustc_version", ] @@ -258,6 +264,7 @@ dependencies = [ "uu_expand", "uu_expr", "uu_factor", + "uu_factor_benches", "uu_false", "uu_fmt", "uu_fold", @@ -1027,6 +1034,15 @@ dependencies = [ "proc-macro-hack", ] +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + [[package]] name = "pkg-config" version = "0.3.19" @@ -1336,11 +1352,11 @@ checksum = "3e52c148ef37f8c375d49d5a73aa70713125b7f19095948a923f80afdeb22ec2" [[package]] name = "rustc_version" -version = "0.2.3" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" dependencies = [ - "semver", + "semver 0.11.0", ] [[package]] @@ -1370,7 +1386,16 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" dependencies = [ - "semver-parser", + "semver-parser 0.7.0", +] + +[[package]] +name = "semver" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" +dependencies = [ + "semver-parser 0.10.2", ] [[package]] @@ -1380,10 +1405,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] -name = "serde" -version = "1.0.125" +name = "semver-parser" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171" +checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" +dependencies = [ + "pest", +] + +[[package]] +name = "serde" +version = "1.0.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03" [[package]] name = "serde_cbor" @@ -1397,9 +1431,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.125" +version = "1.0.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b093b7a2bb58203b5da3056c05b4ec1fed827dcfdb37347a8841695263b3d06d" +checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43" dependencies = [ "proc-macro2", "quote 1.0.9", @@ -1627,6 +1661,12 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06" +[[package]] +name = "ucd-trie" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" + [[package]] name = "unicode-segmentation" version = "1.7.1" @@ -1922,17 +1962,26 @@ name = "uu_factor" version = "0.0.6" dependencies = [ "coz", - "criterion", "num-traits", "paste", "quickcheck", "rand 0.7.3", - "rand_chacha", "smallvec", "uucore", "uucore_procs", ] +[[package]] +name = "uu_factor_benches" +version = "0.0.0" +dependencies = [ + "array-init", + "criterion", + "rand 0.7.3", + "rand_chacha", + "uu_factor", +] + [[package]] name = "uu_false" version = "0.0.6" @@ -2407,7 +2456,7 @@ dependencies = [ "itertools 0.10.0", "rand 0.7.3", "rayon", - "semver", + "semver 0.9.0", "tempdir", "unicode-width", "uucore", From f46b119493bfe419437e3bd97638f8553eb5ad8a Mon Sep 17 00:00:00 2001 From: nicoo Date: Mon, 17 May 2021 20:37:26 +0200 Subject: [PATCH 077/148] CI: Stabilise the version of GNU tests used in CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The “GNU tests” task is routinely broken on `master`. Broken CI is worse than no CI, as it teaches people to ignore errors. This PR pins the versions of the GNU testsuite (and GNUlib) used, to current stable versions, so this task stops breaking unexpectedly. Presumably, someone will update `GNU.yml` when a new stable version of the GNU coreutils is released, but I'm not volunteering. --- .github/workflows/GNU.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/GNU.yml b/.github/workflows/GNU.yml index a68f0a083..d721eb8b1 100644 --- a/.github/workflows/GNU.yml +++ b/.github/workflows/GNU.yml @@ -12,16 +12,18 @@ jobs: uses: actions/checkout@v2 with: path: 'uutils' - - name: Chechout GNU coreutils + - name: Checkout GNU coreutils uses: actions/checkout@v2 with: repository: 'coreutils/coreutils' path: 'gnu' - - name: Chechout GNU corelib + ref: v8.32 + - name: Checkout GNU corelib uses: actions/checkout@v2 with: repository: 'coreutils/gnulib' path: 'gnulib' + ref: 8e99f24c0931a38880c6ee9b8287c7da80b0036b fetch-depth: 0 # gnu gets upset if gnulib is a shallow checkout - name: Install `rust` toolchain uses: actions-rs/toolchain@v1 From 047d775e5ee44dddfe77a98f019bda920458d554 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 17 May 2021 21:24:58 +0200 Subject: [PATCH 078/148] gh action: fix the GNU testsuite job --- .github/workflows/GNU.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/GNU.yml b/.github/workflows/GNU.yml index a68f0a083..f976b4633 100644 --- a/.github/workflows/GNU.yml +++ b/.github/workflows/GNU.yml @@ -92,7 +92,7 @@ jobs: sed -i 's|sort |/usr/bin/sort |' tests/ls/hyperlink.sh tests/misc/test-N.sh sed -i 's|split |/usr/bin/split |' tests/misc/factor-parallel.sh sed -i 's|truncate |/usr/bin/truncate |' tests/split/fail.sh - sed -i 's|dd |/usr/bin/dd |' tests/du/8gb.sh tests/tail-2/big-4gb.sh tests/cp/fiemap-2.sh init.cfg + sed -i 's|dd |/usr/bin/dd |' tests/du/8gb.sh tests/tail-2/big-4gb.sh init.cfg sed -i 's|id -|/usr/bin/id -|' tests/misc/runcon-no-reorder.sh sed -i 's|touch |/usr/bin/touch |' tests/cp/preserve-link.sh tests/cp/reflink-perm.sh tests/ls/block-size.sh tests/ls/abmon-align.sh tests/ls/rt-1.sh tests/mv/update.sh tests/misc/ls-time.sh tests/misc/stat-nanoseconds.sh tests/misc/time-style.sh tests/misc/test-N.sh sed -i 's|ln -|/usr/bin/ln -|' tests/cp/link-deref.sh @@ -104,7 +104,6 @@ jobs: #Add specific timeout to tests that currently hang to limit time spent waiting sed -i 's|seq \$|/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh sed -i 's|cat |/usr/bin/timeout 0.1 cat |' tests/misc/cat-self.sh - test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}" - name: Run GNU tests From dc93f29fe3532318059f9d5b7e39c1048f6bbc81 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Mon, 17 May 2021 22:22:18 +0200 Subject: [PATCH 079/148] CICD: install GNU coreutils on macOS --- .github/workflows/CICD.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index cc0972bf9..a42d2f335 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -235,6 +235,9 @@ jobs: arm-unknown-linux-gnueabihf) sudo apt-get -y update ; sudo apt-get -y install gcc-arm-linux-gnueabihf ;; aarch64-unknown-linux-gnu) sudo apt-get -y update ; sudo apt-get -y install gcc-aarch64-linux-gnu ;; esac + case '${{ matrix.job.os }}' in + macos-latest) brew install coreutils ;; # needed for testing + esac - name: Initialize workflow variables id: vars shell: bash From fea1026669ec4f01b121eb1b2c027ad00595cc5c Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Mon, 17 May 2021 18:15:39 -0400 Subject: [PATCH 080/148] tail: use std::io::copy() to write bytes to stdout --- src/uu/tail/src/tail.rs | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 6dafee184..371f0e2ed 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -19,7 +19,6 @@ mod chunks; mod platform; mod ringbuffer; use chunks::ReverseChunks; -use chunks::BLOCK_SIZE; use ringbuffer::RingBuffer; use clap::{App, Arg}; @@ -442,8 +441,6 @@ fn backwards_thru_file(file: &mut File, num_delimiters: usize, delimiter: u8) { /// `BLOCK_SIZE` until we find the location of the first line/byte. This ends up /// being a nice performance win for very large files. fn bounded_tail(file: &mut File, settings: &Settings) { - let mut buf = vec![0; BLOCK_SIZE as usize]; - // Find the position in the file to start printing from. match settings.mode { FilterMode::Lines(count, delimiter) => { @@ -455,18 +452,9 @@ fn bounded_tail(file: &mut File, settings: &Settings) { } // Print the target section of the file. - loop { - let bytes_read = file.read(&mut buf).unwrap(); - - let mut stdout = stdout(); - for b in &buf[0..bytes_read] { - print_byte(&mut stdout, *b); - } - - if bytes_read == 0 { - break; - } - } + let stdout = stdout(); + let mut stdout = stdout.lock(); + std::io::copy(file, &mut stdout).unwrap(); } /// Collect the last elements of an iterator into a `VecDeque`. From bc296455316a418ede9238f0b0a3745e261a0b46 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Mon, 17 May 2021 19:33:49 -0400 Subject: [PATCH 081/148] tail: fix off-by-one issue for +NUM args Fix an off-by-one issue for `tail -c +NUM` and `tail -n +NUM` command line options. --- src/uu/tail/src/tail.rs | 5 ++++- tests/by-util/test_tail.rs | 44 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 6dafee184..6af6d4b97 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -489,7 +489,10 @@ where E: fmt::Debug, { if beginning { - iter.skip(count as usize).map(|r| r.unwrap()).collect() + // GNU `tail` seems to index bytes and lines starting at 1, not + // at 0. It seems to treat `+0` and `+1` as the same thing. + let i = count.max(1) - 1; + iter.skip(i as usize).map(|r| r.unwrap()).collect() } else { RingBuffer::from_iter(iter.map(|r| r.unwrap()), count as usize).data } diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index 1c025cf4c..dddbb9c31 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -348,3 +348,47 @@ fn test_negative_indexing() { fn test_sleep_interval() { new_ucmd!().arg("-s").arg("10").arg(FOOBAR_TXT).succeeds(); } + + +/// Test for reading all but the first NUM bytes: `tail -c +3`. +#[test] +fn test_positive_bytes() { + new_ucmd!() + .args(&["-c", "+3"]) + .pipe_in("abcde") + .succeeds() + .stdout_is("cde"); +} + + +/// Test for reading all bytes, specified by `tail -c +0`. +#[test] +fn test_positive_zero_bytes() { + new_ucmd!() + .args(&["-c", "+0"]) + .pipe_in("abcde") + .succeeds() + .stdout_is("abcde"); +} + + +/// Test for reading all but the first NUM lines: `tail -n +3`. +#[test] +fn test_positive_lines() { + new_ucmd!() + .args(&["-n", "+3"]) + .pipe_in("a\nb\nc\nd\ne\n") + .succeeds() + .stdout_is("c\nd\ne\n"); +} + + +/// Test for reading all lines, specified by `tail -n +0`. +#[test] +fn test_positive_zero_lines() { + new_ucmd!() + .args(&["-n", "+0"]) + .pipe_in("a\nb\nc\nd\ne\n") + .succeeds() + .stdout_is("a\nb\nc\nd\ne\n"); +} From 7c7d622d540449cc77e9adf84ee6c32dcb73e30b Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Tue, 18 May 2021 02:00:16 +0200 Subject: [PATCH 082/148] tests: add test for issue #2223 --- tests/by-util/test_ls.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index a57525e4b..8d32172b0 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -1966,3 +1966,38 @@ fn test_ls_sort_extension() { expected, ); } + +// This tests for the open issue described in #2223 +#[cfg_attr(not(feature = "test_unimplemented"), ignore)] +#[test] +fn test_ls_path() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + let file1 = "file1"; + let file2 = "file2"; + let dir = "dir"; + let path = &format!("{}/{}", dir, file2); + + at.mkdir(dir); + at.touch(file1); + at.touch(path); + + let expected_stdout = &format!("{}\n", path); + scene.ucmd().arg(path).run().stdout_is(expected_stdout); + + let expected_stdout = &format!("./{}\n", path); + scene.ucmd().arg(path).run().stdout_is(expected_stdout); + + let abs_path = format!("{}/{}\n", at.as_string(), path); + println!(":{}", abs_path); + scene.ucmd().arg(&abs_path).run().stdout_is(&abs_path); + + let expected_stdout = &format!("{}\n{}\n", file1, path); + scene + .ucmd() + .arg(file1) + .arg(path) + .run() + .stdout_is(expected_stdout); +} From ce5b852a3121bf8069c1d8b2948fa8c3b31f6134 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Tue, 18 May 2021 19:58:33 +0200 Subject: [PATCH 083/148] stat: remove unused/duplicate tests --- src/uu/stat/src/test_stat.rs | 76 ------------------------------------ 1 file changed, 76 deletions(-) delete mode 100644 src/uu/stat/src/test_stat.rs diff --git a/src/uu/stat/src/test_stat.rs b/src/uu/stat/src/test_stat.rs deleted file mode 100644 index 05e91fb84..000000000 --- a/src/uu/stat/src/test_stat.rs +++ /dev/null @@ -1,76 +0,0 @@ -// spell-checker:ignore (ToDO) scanutil qzxc dqzxc - -pub use super::*; - -#[test] -fn test_scanutil() { - assert_eq!(Some((-5, 2)), "-5zxc".scan_num::()); - assert_eq!(Some((51, 2)), "51zxc".scan_num::()); - assert_eq!(Some((192, 4)), "+192zxc".scan_num::()); - assert_eq!(None, "z192zxc".scan_num::()); - - assert_eq!(Some(('a', 3)), "141zxc".scan_char(8)); - assert_eq!(Some(('\n', 2)), "12qzxc".scan_char(8)); - assert_eq!(Some(('\r', 1)), "dqzxc".scan_char(16)); - assert_eq!(None, "z2qzxc".scan_char(8)); -} - -#[cfg(test)] -mod test_generate_tokens { - use super::*; - - #[test] - fn test_normal_format() { - let s = "%10.2ac%-5.w\n"; - let expected = vec![ - Token::Directive { - flag: 0, - width: 10, - precision: 2, - format: 'a', - }, - Token::Char('c'), - Token::Directive { - flag: F_LEFT, - width: 5, - precision: 0, - format: 'w', - }, - Token::Char('\n'), - ]; - assert_eq!(&expected, &Stater::generate_tokens(s, false).unwrap()); - } - - #[test] - fn test_printf_format() { - let s = "%-# 15a\\r\\\"\\\\\\a\\b\\e\\f\\v%+020.-23w\\x12\\167\\132\\112\\n"; - let expected = vec![ - Token::Directive { - flag: F_LEFT | F_ALTER | F_SPACE, - width: 15, - precision: -1, - format: 'a', - }, - Token::Char('\r'), - Token::Char('"'), - Token::Char('\\'), - Token::Char('\x07'), - Token::Char('\x08'), - Token::Char('\x1B'), - Token::Char('\x0C'), - Token::Char('\x0B'), - Token::Directive { - flag: F_SIGN | F_ZERO, - width: 20, - precision: -1, - format: 'w', - }, - Token::Char('\x12'), - Token::Char('w'), - Token::Char('Z'), - Token::Char('J'), - Token::Char('\n'), - ]; - assert_eq!(&expected, &Stater::generate_tokens(s, true).unwrap()); - } -} From c60d3866c33243d2c186ba15e314eca791053f02 Mon Sep 17 00:00:00 2001 From: Chad Brewbaker Date: Tue, 18 May 2021 15:10:51 -0500 Subject: [PATCH 084/148] dev random blocks on linux --- tests/by-util/test_cat.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/by-util/test_cat.rs b/tests/by-util/test_cat.rs index 67722daa2..6ec021ffa 100644 --- a/tests/by-util/test_cat.rs +++ b/tests/by-util/test_cat.rs @@ -347,10 +347,18 @@ fn test_squeeze_blank_before_numbering() { #[cfg(unix)] fn test_dev_random() { let mut buf = [0; 2048]; - let mut proc = new_ucmd!().args(&["/dev/random"]).run_no_wait(); + #[cfg(target_os = "linux")] + fn rand_gen() -> &'static str { "/dev/urandom"} + + #[cfg(not(target_os = "linux"))] + fn rand_gen() -> &'static str { "/dev/random"} + + let mut proc = new_ucmd!().args(&[rand_gen()]).run_no_wait(); let mut proc_stdout = proc.stdout.take().unwrap(); + println!("I got to 1"); proc_stdout.read_exact(&mut buf).unwrap(); + println!("I got to 3"); let num_zeroes = buf.iter().fold(0, |mut acc, &n| { if n == 0 { acc += 1; From a69cb11de9de0d51e679a78f4a1055d199d2915c Mon Sep 17 00:00:00 2001 From: Chad Brewbaker Date: Tue, 18 May 2021 15:17:07 -0500 Subject: [PATCH 085/148] Removing debug code --- tests/by-util/test_cat.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/by-util/test_cat.rs b/tests/by-util/test_cat.rs index 6ec021ffa..997a7964c 100644 --- a/tests/by-util/test_cat.rs +++ b/tests/by-util/test_cat.rs @@ -355,10 +355,8 @@ fn test_dev_random() { let mut proc = new_ucmd!().args(&[rand_gen()]).run_no_wait(); let mut proc_stdout = proc.stdout.take().unwrap(); - println!("I got to 1"); proc_stdout.read_exact(&mut buf).unwrap(); - println!("I got to 3"); let num_zeroes = buf.iter().fold(0, |mut acc, &n| { if n == 0 { acc += 1; From 7bf342fa52300fdbe17fdb86cd630b40b267598e Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 18 May 2021 21:31:55 +0200 Subject: [PATCH 086/148] publish the results of the gnu testsuite as a json file too --- .github/workflows/GNU.yml | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/.github/workflows/GNU.yml b/.github/workflows/GNU.yml index f976b4633..3fd39aa15 100644 --- a/.github/workflows/GNU.yml +++ b/.github/workflows/GNU.yml @@ -34,7 +34,7 @@ jobs: shell: bash run: | sudo apt-get update - sudo apt-get install autoconf autopoint bison texinfo gperf gcc g++ gdb python-pyinotify python3-sphinx + sudo apt-get install autoconf autopoint bison texinfo gperf gcc g++ gdb python-pyinotify python3-sphinx jq pushd uutils make PROFILE=release BUILDDIR="$PWD/target/release/" @@ -117,15 +117,24 @@ jobs: - name: Extract tests info shell: bash run: | - if test -f gnu/tests/test-suite.log + LOG_FILE=gnu/tests/test-suite.log + if test -f "$LOG_FILE" then - TOTAL=$( grep "# TOTAL:" gnu/tests/test-suite.log|cut -d' ' -f2-) - PASS=$( grep "# PASS:" gnu/tests/test-suite.log|cut -d' ' -f2-) - SKIP=$( grep "# SKIP:" gnu/tests/test-suite.log|cut -d' ' -f2-) - FAIL=$( grep "# FAIL:" gnu/tests/test-suite.log|cut -d' ' -f2-) - XPASS=$( grep "# XPASS:" gnu/tests/test-suite.log|cut -d' ' -f2-) - ERROR=$( grep "# ERROR:" gnu/tests/test-suite.log|cut -d' ' -f2-) - echo "::warning ::GNU testsuite = $TOTAL / $PASS / $FAIL / $ERROR" + TOTAL=$(sed -n "s/.*# TOTAL: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + PASS=$(sed -n "s/.*# PASS: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + SKIP=$(sed -n "s/.*# SKIP: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + FAIL=$(sed -n "s/.*# FAIL: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + XPASS=$(sed -n "s/.*# XPASS: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + ERROR=$(sed -n "s/.*# ERROR: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) + echo "::warning ::GNU testsuite = TOTAL: $TOTAL / PASS: $PASS / FAIL: $FAIL / ERROR: $ERROR" + jq -n \ + --arg total "$TOTAL" \ + --arg pass "$PASS" \ + --arg skip "$SKIP" \ + --arg fail "$FAIL" \ + --arg xpass "$XPASS" \ + --arg error "$ERROR" \ + '{total: $total, pass: $pass, skip: $skip, fail: $fail, xpass: $xpass, error: $error, }' > gnu-result.json else echo "::error ::Failed to get summary of test results" fi @@ -134,3 +143,8 @@ jobs: with: name: test-report path: gnu/tests/**/*.log + + - uses: actions/upload-artifact@v2 + with: + name: gnu-result + path: gnu-result.json From 8032c6d750f4a97b0d5880fc0180554f10360d8f Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Wed, 19 May 2021 01:04:24 +0200 Subject: [PATCH 087/148] fix clippy warnings --- src/uucore/src/lib/features/fsext.rs | 8 ++++---- tests/by-util/test_basename.rs | 2 ++ tests/by-util/test_relpath.rs | 6 +++--- tests/by-util/test_uname.rs | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/uucore/src/lib/features/fsext.rs b/src/uucore/src/lib/features/fsext.rs index 887c31e01..19c634b0b 100644 --- a/src/uucore/src/lib/features/fsext.rs +++ b/src/uucore/src/lib/features/fsext.rs @@ -276,7 +276,7 @@ impl MountInfo { GetVolumeInformationW( String2LPWSTR!(mount_root), ptr::null_mut(), - 0 as DWORD, + 0, ptr::null_mut(), ptr::null_mut(), ptr::null_mut(), @@ -510,12 +510,12 @@ impl FsUsage { // Total number of free blocks. bfree: number_of_free_clusters as u64, // Total number of free blocks available to non-privileged processes. - bavail: 0 as u64, + bavail: 0, bavail_top_bit_set: ((bytes_per_sector as u64) & (1u64.rotate_right(1))) != 0, // Total number of file nodes (inodes) on the file system. - files: 0 as u64, // Not available on windows + files: 0, // Not available on windows // Total number of free file nodes (inodes). - ffree: 4096 as u64, // Meaningless on Windows + ffree: 4096, // Meaningless on Windows } } } diff --git a/tests/by-util/test_basename.rs b/tests/by-util/test_basename.rs index 2a40ba4b9..baf15f78a 100644 --- a/tests/by-util/test_basename.rs +++ b/tests/by-util/test_basename.rs @@ -1,4 +1,5 @@ use crate::common::util::*; +#[cfg(any(unix, target_os = "redox"))] use std::ffi::OsStr; #[test] @@ -123,6 +124,7 @@ fn test_too_many_args_output() { ); } +#[cfg(any(unix, target_os = "redox"))] fn test_invalid_utf8_args(os_str: &OsStr) { let test_vec = vec![os_str.to_os_string()]; new_ucmd!().args(&test_vec).succeeds().stdout_is("fo�o\n"); diff --git a/tests/by-util/test_relpath.rs b/tests/by-util/test_relpath.rs index 5094d25a8..70d9f2a5d 100644 --- a/tests/by-util/test_relpath.rs +++ b/tests/by-util/test_relpath.rs @@ -155,7 +155,7 @@ fn test_relpath_no_from_with_d() { at.mkdir_all(to); // d is part of subpath -> expect relative path - let mut result_stdout = scene + let _result_stdout = scene .ucmd() .arg(to) .arg(&format!("-d{}", pwd)) @@ -163,10 +163,10 @@ fn test_relpath_no_from_with_d() { .stdout_move_str(); // relax rules for windows test environment #[cfg(not(windows))] - assert!(Path::new(&result_stdout).is_relative()); + assert!(Path::new(&_result_stdout).is_relative()); // d is not part of subpath -> expect absolut path - result_stdout = scene + let result_stdout = scene .ucmd() .arg(to) .arg("-dnon_existing") diff --git a/tests/by-util/test_uname.rs b/tests/by-util/test_uname.rs index da901d985..d878ed7ac 100644 --- a/tests/by-util/test_uname.rs +++ b/tests/by-util/test_uname.rs @@ -43,5 +43,5 @@ fn test_uname_kernel() { } #[cfg(not(target_os = "linux"))] - let result = ucmd.arg("-o").succeeds(); + ucmd.arg("-o").succeeds(); } From 9167a4128da23f14831819bd2b6d89f624ab9cfb Mon Sep 17 00:00:00 2001 From: Chad Brewbaker Date: Wed, 19 May 2021 04:06:46 -0500 Subject: [PATCH 088/148] Update test_cat.rs Refactored to constants --- tests/by-util/test_cat.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/by-util/test_cat.rs b/tests/by-util/test_cat.rs index 997a7964c..4bb673b95 100644 --- a/tests/by-util/test_cat.rs +++ b/tests/by-util/test_cat.rs @@ -348,12 +348,12 @@ fn test_squeeze_blank_before_numbering() { fn test_dev_random() { let mut buf = [0; 2048]; #[cfg(target_os = "linux")] - fn rand_gen() -> &'static str { "/dev/urandom"} + const DEV_RANDOM: &str = "/dev/urandom"; #[cfg(not(target_os = "linux"))] - fn rand_gen() -> &'static str { "/dev/random"} + const DEV_RANDOM: &str = "/dev/random"; - let mut proc = new_ucmd!().args(&[rand_gen()]).run_no_wait(); + let mut proc = new_ucmd!().args(&[DEV_RANDOM]).run_no_wait(); let mut proc_stdout = proc.stdout.take().unwrap(); proc_stdout.read_exact(&mut buf).unwrap(); From 0c6a84831452b4c00216a498be053cda0ecc3912 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 19 May 2021 20:33:28 +0200 Subject: [PATCH 089/148] gnu results: also add the date (#2236) --- .github/workflows/GNU.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/GNU.yml b/.github/workflows/GNU.yml index 3fd39aa15..213137acc 100644 --- a/.github/workflows/GNU.yml +++ b/.github/workflows/GNU.yml @@ -128,13 +128,14 @@ jobs: ERROR=$(sed -n "s/.*# ERROR: \(.*\)/\1/p" "$LOG_FILE"|tr -d '\r'|head -n1) echo "::warning ::GNU testsuite = TOTAL: $TOTAL / PASS: $PASS / FAIL: $FAIL / ERROR: $ERROR" jq -n \ + --arg date "$(date --rfc-email)" \ --arg total "$TOTAL" \ --arg pass "$PASS" \ --arg skip "$SKIP" \ --arg fail "$FAIL" \ --arg xpass "$XPASS" \ --arg error "$ERROR" \ - '{total: $total, pass: $pass, skip: $skip, fail: $fail, xpass: $xpass, error: $error, }' > gnu-result.json + '{($date): { total: $total, pass: $pass, skip: $skip, fail: $fail, xpass: $xpass, error: $error, }}' > gnu-result.json else echo "::error ::Failed to get summary of test results" fi From 63b496eaa8a87a696fd9ea21eb3bffc86fe0a971 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Wed, 19 May 2021 22:23:28 -0400 Subject: [PATCH 090/148] truncate: refactor parse_size() function Change the interface provided by the `parse_size()` function to reduce its responsibilities to just a single task: parsing a number of bytes from a string of the form '123KB', etc. Previously, the function was also responsible for deciding which mode truncate would operate in. Furthermore, this commit simplifies the code for parsing the number and unit to be less verbose and use less mutable state. Finally, this commit adds some unit tests for the `parse_size()` function. --- src/uu/truncate/src/truncate.rs | 171 +++++++++++++++++++++----------- tests/by-util/test_truncate.rs | 10 ++ 2 files changed, 122 insertions(+), 59 deletions(-) diff --git a/src/uu/truncate/src/truncate.rs b/src/uu/truncate/src/truncate.rs index 91f705bd1..3190e6ad4 100644 --- a/src/uu/truncate/src/truncate.rs +++ b/src/uu/truncate/src/truncate.rs @@ -133,7 +133,35 @@ fn truncate( filenames: Vec, ) { let (modsize, mode) = match size { - Some(size_string) => parse_size(&size_string), + Some(size_string) => { + // Trim any whitespace. + let size_string = size_string.trim(); + + // Get the modifier character from the size string, if any. For + // example, if the argument is "+123", then the modifier is '+'. + let c = size_string.chars().next().unwrap(); + + let mode = match c { + '+' => TruncateMode::Extend, + '-' => TruncateMode::Reduce, + '<' => TruncateMode::AtMost, + '>' => TruncateMode::AtLeast, + '/' => TruncateMode::RoundDown, + '*' => TruncateMode::RoundUp, + _ => TruncateMode::Absolute, /* assume that the size is just a number */ + }; + + // If there was a modifier character, strip it. + let size_string = match mode { + TruncateMode::Absolute => size_string, + _ => &size_string[1..], + }; + let num_bytes = match parse_size(size_string) { + Ok(b) => b, + Err(_) => crash!(1, "Invalid number: ‘{}’", size_string), + }; + (num_bytes, mode) + } None => (0, TruncateMode::Reference), }; @@ -208,64 +236,89 @@ fn truncate( } } -fn parse_size(size: &str) -> (u64, TruncateMode) { - let clean_size = size.replace(" ", ""); - let mode = match clean_size.chars().next().unwrap() { - '+' => TruncateMode::Extend, - '-' => TruncateMode::Reduce, - '<' => TruncateMode::AtMost, - '>' => TruncateMode::AtLeast, - '/' => TruncateMode::RoundDown, - '*' => TruncateMode::RoundUp, - _ => TruncateMode::Absolute, /* assume that the size is just a number */ +/// Parse a size string into a number of bytes. +/// +/// A size string comprises an integer and an optional unit. The unit +/// may be K, M, G, T, P, E, Z, or Y (powers of 1024) or KB, MB, +/// etc. (powers of 1000). +/// +/// # Errors +/// +/// This function returns an error if the string does not begin with a +/// numeral, or if the unit is not one of the supported units described +/// in the preceding section. +/// +/// # Examples +/// +/// ```rust,ignore +/// assert_eq!(parse_size("123").unwrap(), 123); +/// assert_eq!(parse_size("123K").unwrap(), 123 * 1024); +/// assert_eq!(parse_size("123KB").unwrap(), 123 * 1000); +/// ``` +fn parse_size(size: &str) -> Result { + // Get the numeric part of the size argument. For example, if the + // argument is "123K", then the numeric part is "123". + let numeric_string: String = size.chars().take_while(|c| c.is_digit(10)).collect(); + let number: u64 = match numeric_string.parse() { + Ok(n) => n, + Err(_) => return Err(()), }; - let bytes = { - let mut slice = if mode == TruncateMode::Absolute { - &clean_size - } else { - &clean_size[1..] - }; - if slice.chars().last().unwrap().is_alphabetic() { - slice = &slice[..slice.len() - 1]; - if !slice.is_empty() && slice.chars().last().unwrap().is_alphabetic() { - slice = &slice[..slice.len() - 1]; - } - } - slice - } - .to_owned(); - let mut number: u64 = match bytes.parse() { - Ok(num) => num, - Err(e) => crash!(1, "'{}' is not a valid number: {}", size, e), + + // Get the alphabetic units part of the size argument and compute + // the factor it represents. For example, if the argument is "123K", + // then the unit part is "K" and the factor is 1024. This may be the + // empty string, in which case, the factor is 1. + let n = numeric_string.len(); + let (base, exponent): (u64, u32) = match &size[n..] { + "" => (1, 0), + "K" | "k" => (1024, 1), + "M" | "m" => (1024, 2), + "G" | "g" => (1024, 3), + "T" | "t" => (1024, 4), + "P" | "p" => (1024, 5), + "E" | "e" => (1024, 6), + "Z" | "z" => (1024, 7), + "Y" | "y" => (1024, 8), + "KB" | "kB" => (1000, 1), + "MB" | "mB" => (1000, 2), + "GB" | "gB" => (1000, 3), + "TB" | "tB" => (1000, 4), + "PB" | "pB" => (1000, 5), + "EB" | "eB" => (1000, 6), + "ZB" | "zB" => (1000, 7), + "YB" | "yB" => (1000, 8), + _ => return Err(()), }; - if clean_size.chars().last().unwrap().is_alphabetic() { - number *= match clean_size.chars().last().unwrap().to_ascii_uppercase() { - 'B' => match clean_size - .chars() - .nth(clean_size.len() - 2) - .unwrap() - .to_ascii_uppercase() - { - 'K' => 1000u64, - 'M' => 1000u64.pow(2), - 'G' => 1000u64.pow(3), - 'T' => 1000u64.pow(4), - 'P' => 1000u64.pow(5), - 'E' => 1000u64.pow(6), - 'Z' => 1000u64.pow(7), - 'Y' => 1000u64.pow(8), - letter => crash!(1, "'{}B' is not a valid suffix.", letter), - }, - 'K' => 1024u64, - 'M' => 1024u64.pow(2), - 'G' => 1024u64.pow(3), - 'T' => 1024u64.pow(4), - 'P' => 1024u64.pow(5), - 'E' => 1024u64.pow(6), - 'Z' => 1024u64.pow(7), - 'Y' => 1024u64.pow(8), - letter => crash!(1, "'{}' is not a valid suffix.", letter), - }; - } - (number, mode) + let factor = base.pow(exponent); + Ok(number * factor) +} + +#[cfg(test)] +mod tests { + use crate::parse_size; + + #[test] + fn test_parse_size_zero() { + assert_eq!(parse_size("0").unwrap(), 0); + assert_eq!(parse_size("0K").unwrap(), 0); + assert_eq!(parse_size("0KB").unwrap(), 0); + } + + #[test] + fn test_parse_size_without_factor() { + assert_eq!(parse_size("123").unwrap(), 123); + } + + #[test] + fn test_parse_size_kilobytes() { + assert_eq!(parse_size("123K").unwrap(), 123 * 1024); + assert_eq!(parse_size("123KB").unwrap(), 123 * 1000); + } + + #[test] + fn test_parse_size_megabytes() { + assert_eq!(parse_size("123").unwrap(), 123); + assert_eq!(parse_size("123M").unwrap(), 123 * 1024 * 1024); + assert_eq!(parse_size("123MB").unwrap(), 123 * 1000 * 1000); + } } diff --git a/tests/by-util/test_truncate.rs b/tests/by-util/test_truncate.rs index 8f88f4c74..b1f806f82 100644 --- a/tests/by-util/test_truncate.rs +++ b/tests/by-util/test_truncate.rs @@ -235,3 +235,13 @@ fn test_size_and_reference() { actual ); } + +#[test] +fn test_invalid_numbers() { + // TODO For compatibility with GNU, `truncate -s 0X` should cause + // the same error as `truncate -s 0X file`, but currently it returns + // a different error. + new_ucmd!().args(&["-s", "0X", "file"]).fails().stderr_contains("Invalid number: ‘0X’"); + new_ucmd!().args(&["-s", "0XB", "file"]).fails().stderr_contains("Invalid number: ‘0XB’"); + new_ucmd!().args(&["-s", "0B", "file"]).fails().stderr_contains("Invalid number: ‘0B’"); +} From 998b3c11d3af933afdd714f1678e2080740cbb07 Mon Sep 17 00:00:00 2001 From: nicoo Date: Thu, 20 May 2021 17:00:49 +0200 Subject: [PATCH 091/148] factor: Make random Factors instance generatable for tests --- src/uu/factor/src/factor.rs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index f53abd772..b279de7fc 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -239,9 +239,13 @@ mod tests { } #[cfg(test)] -impl quickcheck::Arbitrary for Factors { - fn arbitrary(gen: &mut G) -> Self { - use rand::Rng; +use rand::{ + distributions::{Distribution, Standard}, + Rng, +}; +#[cfg(test)] +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> Factors { let mut f = Factors::one(); let mut g = 1u64; let mut n = u64::MAX; @@ -252,7 +256,7 @@ impl quickcheck::Arbitrary for Factors { // See Generating Random Factored Numbers, Easily, J. Cryptology (2003) 'attempt: loop { while n > 1 { - n = gen.gen_range(1, n); + n = rng.gen_range(1, n); if miller_rabin::is_prime(n) { if let Some(h) = g.checked_mul(n) { f.push(n); @@ -269,6 +273,13 @@ impl quickcheck::Arbitrary for Factors { } } +#[cfg(test)] +impl quickcheck::Arbitrary for Factors { + fn arbitrary(g: &mut G) -> Self { + g.gen() + } +} + #[cfg(test)] impl std::ops::BitXor for Factors { type Output = Self; From a0a103b15e52b20f63d36aee93083c81b007326d Mon Sep 17 00:00:00 2001 From: nicoo Date: Thu, 20 May 2021 17:01:33 +0200 Subject: [PATCH 092/148] factor::table::chunked: Add test (equivalent to the single-number version) --- src/uu/factor/src/table.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/uu/factor/src/table.rs b/src/uu/factor/src/table.rs index db2698e4b..518d4f241 100644 --- a/src/uu/factor/src/table.rs +++ b/src/uu/factor/src/table.rs @@ -72,3 +72,30 @@ pub fn factor_chunk(n_s: &mut [u64; CHUNK_SIZE], f_s: &mut [Factors; CHUNK_SIZE] } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::Factors; + use quickcheck::quickcheck; + use rand::{rngs::SmallRng, Rng, SeedableRng}; + + quickcheck! { + fn chunk_vs_iter(seed: u64) -> () { + let mut rng = SmallRng::seed_from_u64(seed); + let mut n_c: [u64; CHUNK_SIZE] = rng.gen(); + let mut f_c: [Factors; CHUNK_SIZE] = rng.gen(); + + let mut n_i = n_c.clone(); + let mut f_i = f_c.clone(); + for (n, f) in n_i.iter_mut().zip(f_i.iter_mut()) { + factor(n, f); + } + + factor_chunk(&mut n_c, &mut f_c); + + assert_eq!(n_i, n_c); + assert_eq!(f_i, f_c); + } + } +} From d30393089f4d9f02f492933fd8bdd283724e401f Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Thu, 20 May 2021 20:57:28 -0400 Subject: [PATCH 093/148] truncate: rustfmt test_truncate.rs file --- tests/by-util/test_truncate.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/by-util/test_truncate.rs b/tests/by-util/test_truncate.rs index b1f806f82..fc302dcba 100644 --- a/tests/by-util/test_truncate.rs +++ b/tests/by-util/test_truncate.rs @@ -241,7 +241,16 @@ fn test_invalid_numbers() { // TODO For compatibility with GNU, `truncate -s 0X` should cause // the same error as `truncate -s 0X file`, but currently it returns // a different error. - new_ucmd!().args(&["-s", "0X", "file"]).fails().stderr_contains("Invalid number: ‘0X’"); - new_ucmd!().args(&["-s", "0XB", "file"]).fails().stderr_contains("Invalid number: ‘0XB’"); - new_ucmd!().args(&["-s", "0B", "file"]).fails().stderr_contains("Invalid number: ‘0B’"); + new_ucmd!() + .args(&["-s", "0X", "file"]) + .fails() + .stderr_contains("Invalid number: ‘0X’"); + new_ucmd!() + .args(&["-s", "0XB", "file"]) + .fails() + .stderr_contains("Invalid number: ‘0XB’"); + new_ucmd!() + .args(&["-s", "0B", "file"]) + .fails() + .stderr_contains("Invalid number: ‘0B’"); } From fc29846b45e20956ed3ebb37b67aa172290767e2 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Thu, 20 May 2021 20:55:11 -0400 Subject: [PATCH 094/148] truncate: fix error message for file not found Change the error message for when the reference file (the `-r` argument) is not found to match GNU coreutils. This commit also eliminates a redundant call to `File::open`; the file need not be opened because the size in bytes can be read from the result of `std::fs::metadata()`. --- src/uu/truncate/src/truncate.rs | 14 ++++++++------ tests/by-util/test_truncate.rs | 8 ++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/uu/truncate/src/truncate.rs b/src/uu/truncate/src/truncate.rs index 3190e6ad4..c74171373 100644 --- a/src/uu/truncate/src/truncate.rs +++ b/src/uu/truncate/src/truncate.rs @@ -11,7 +11,8 @@ extern crate uucore; use clap::{App, Arg}; -use std::fs::{metadata, File, OpenOptions}; +use std::fs::{metadata, OpenOptions}; +use std::io::ErrorKind; use std::path::Path; #[derive(Eq, PartialEq)] @@ -174,13 +175,14 @@ fn truncate( TruncateMode::Reduce => (), _ => crash!(1, "you must specify a relative ‘--size’ with ‘--reference’"), }; - let _ = match File::open(Path::new(rfilename)) { - Ok(m) => m, - Err(f) => crash!(1, "{}", f.to_string()), - }; match metadata(rfilename) { Ok(meta) => meta.len(), - Err(f) => crash!(1, "{}", f.to_string()), + Err(f) => match f.kind() { + ErrorKind::NotFound => { + crash!(1, "cannot stat '{}': No such file or directory", rfilename) + } + _ => crash!(1, "{}", f.to_string()), + }, } } None => 0, diff --git a/tests/by-util/test_truncate.rs b/tests/by-util/test_truncate.rs index b1f806f82..e14836fcf 100644 --- a/tests/by-util/test_truncate.rs +++ b/tests/by-util/test_truncate.rs @@ -245,3 +245,11 @@ fn test_invalid_numbers() { new_ucmd!().args(&["-s", "0XB", "file"]).fails().stderr_contains("Invalid number: ‘0XB’"); new_ucmd!().args(&["-s", "0B", "file"]).fails().stderr_contains("Invalid number: ‘0B’"); } + +#[test] +fn test_reference_file_not_found() { + new_ucmd!() + .args(&["-r", "a", "b"]) + .fails() + .stderr_contains("cannot stat 'a': No such file or directory"); +} From 17b95246cdcb4df0807af0aebcac19590837df57 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Thu, 20 May 2021 21:24:43 -0400 Subject: [PATCH 095/148] truncate: use min() and max() instead of if stmts --- src/uu/truncate/src/truncate.rs | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/uu/truncate/src/truncate.rs b/src/uu/truncate/src/truncate.rs index 3190e6ad4..3fb1f2663 100644 --- a/src/uu/truncate/src/truncate.rs +++ b/src/uu/truncate/src/truncate.rs @@ -209,20 +209,8 @@ fn truncate( TruncateMode::Reference => fsize, TruncateMode::Extend => fsize + modsize, TruncateMode::Reduce => fsize - modsize, - TruncateMode::AtMost => { - if fsize > modsize { - modsize - } else { - fsize - } - } - TruncateMode::AtLeast => { - if fsize < modsize { - modsize - } else { - fsize - } - } + TruncateMode::AtMost => fsize.min(modsize), + TruncateMode::AtLeast => fsize.max(modsize), TruncateMode::RoundDown => fsize - fsize % modsize, TruncateMode::RoundUp => fsize + fsize % modsize, }; From a23555e857e89f1cd7a9c08429886b102e95f01a Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Thu, 20 May 2021 23:19:58 -0400 Subject: [PATCH 096/148] truncate: fix character used to indicate round up Fix a bug in which the incorrect character was being used to indicate "round up to the nearest multiple" mode. The character was "*" but it should be "%". This commit corrects that. --- src/uu/truncate/src/truncate.rs | 2 +- tests/by-util/test_truncate.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uu/truncate/src/truncate.rs b/src/uu/truncate/src/truncate.rs index 3190e6ad4..7537987e0 100644 --- a/src/uu/truncate/src/truncate.rs +++ b/src/uu/truncate/src/truncate.rs @@ -147,7 +147,7 @@ fn truncate( '<' => TruncateMode::AtMost, '>' => TruncateMode::AtLeast, '/' => TruncateMode::RoundDown, - '*' => TruncateMode::RoundUp, + '%' => TruncateMode::RoundUp, _ => TruncateMode::Absolute, /* assume that the size is just a number */ }; diff --git a/tests/by-util/test_truncate.rs b/tests/by-util/test_truncate.rs index b1f806f82..7fa3df98c 100644 --- a/tests/by-util/test_truncate.rs +++ b/tests/by-util/test_truncate.rs @@ -206,7 +206,7 @@ fn test_round_up() { let (at, mut ucmd) = at_and_ucmd!(); let mut file = at.make_file(TFILE2); file.write_all(b"1234567890").unwrap(); - ucmd.args(&["--size", "*4", TFILE2]).succeeds(); + ucmd.args(&["--size", "%4", TFILE2]).succeeds(); file.seek(SeekFrom::End(0)).unwrap(); let actual = file.seek(SeekFrom::Current(0)).unwrap(); assert!( From 007e0a4e7f915989298278f0a777a63ab2af1185 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Thu, 20 May 2021 23:11:40 +0200 Subject: [PATCH 097/148] who/stat/pinky: adjust tests to be compatible with running on macOS A lot of tests depend on GNU's coreutils to be installed in order to obtain reference values during testing. In these cases testing is limited to `target_os = linux`. This PR installs GNU's coreutils on "github actions" and adjusts the tests for `who`, `stat` and `pinky` in order to be compatible with macOS. * `brew install coreutils` (prefix is 'g', e.g. `gwho`, `gstat`, etc. * switch paths for testing to something that's available on both OSs, e.g. `/boot` -> `/bin`, etc. * switch paths for testing to the macOS equivalent, e.g. `/dev/pts/ptmx` -> `/dev/ptmx`, etc. * exclude paths when no equivalent is available, e.g. `/proc`, `/etc/fstab`, etc. * refactor tests to make better use of the testing API * fix a warning in utmpx.rs to print to stderr instead of stdout * fix long_usage text in `who` * fix minor output formatting in `stat` * the `expected_result` function should be refactored to reduce duplicate code * more tests should be adjusted to not only run on `target_os = linux` --- src/uu/stat/src/stat.rs | 2 +- src/uu/who/src/who.rs | 7 +- src/uucore/src/lib/features/utmpx.rs | 4 +- tests/by-util/test_pinky.rs | 95 +++++++-------- tests/by-util/test_stat.rs | 127 +++++++++++--------- tests/by-util/test_who.rs | 168 +++++++++++++-------------- 6 files changed, 207 insertions(+), 196 deletions(-) diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index 8b148d39d..5bb0e5f12 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -657,7 +657,7 @@ impl Stater { dst.to_string_lossy() ); } else { - arg = format!("`{}'", file); + arg = file.to_string(); } otype = OutputType::Str; } diff --git a/src/uu/who/src/who.rs b/src/uu/who/src/who.rs index aef23b3a2..1ae4f1c8d 100644 --- a/src/uu/who/src/who.rs +++ b/src/uu/who/src/who.rs @@ -46,9 +46,10 @@ fn get_usage() -> String { } fn get_long_usage() -> String { - String::from( - "If FILE is not specified, use /var/run/utmp. /var/log/wtmp as FILE is common.\n\ -If ARG1 ARG2 given, -m presumed: 'am i' or 'mom likes' are usual.", + format!( + "If FILE is not specified, use {}. /var/log/wtmp as FILE is common.\n\ + If ARG1 ARG2 given, -m presumed: 'am i' or 'mom likes' are usual.", + utmpx::DEFAULT_FILE, ) } diff --git a/src/uucore/src/lib/features/utmpx.rs b/src/uucore/src/lib/features/utmpx.rs index 96db33c35..826831ba6 100644 --- a/src/uucore/src/lib/features/utmpx.rs +++ b/src/uucore/src/lib/features/utmpx.rs @@ -54,6 +54,8 @@ pub unsafe extern "C" fn utmpxname(_file: *const libc::c_char) -> libc::c_int { 0 } +pub use crate::*; // import macros from `../../macros.rs` + // In case the c_char array doesn't end with NULL macro_rules! chars2string { ($arr:expr) => { @@ -240,7 +242,7 @@ impl UtmpxIter { utmpxname(cstr.as_ptr()) }; if res != 0 { - println!("Warning: {}", IOError::last_os_error()); + show_warning!("utmpxname: {}", IOError::last_os_error()); } unsafe { setutxent(); diff --git a/tests/by-util/test_pinky.rs b/tests/by-util/test_pinky.rs index 904a05f93..ccabb7345 100644 --- a/tests/by-util/test_pinky.rs +++ b/tests/by-util/test_pinky.rs @@ -20,42 +20,37 @@ fn test_long_format() { let ulogin = "root"; let pw: Passwd = Passwd::locate(ulogin).unwrap(); let real_name = pw.user_info().replace("&", &pw.name().capitalize()); - new_ucmd!().arg("-l").arg(ulogin).run().stdout_is(format!( - "Login name: {:<28}In real life: {}\nDirectory: {:<29}Shell: {}\n\n", - ulogin, - real_name, - pw.user_dir(), - pw.user_shell() - )); + new_ucmd!() + .arg("-l") + .arg(ulogin) + .succeeds() + .stdout_is(format!( + "Login name: {:<28}In real life: {}\nDirectory: {:<29}Shell: {}\n\n", + ulogin, + real_name, + pw.user_dir(), + pw.user_shell() + )); - new_ucmd!().arg("-lb").arg(ulogin).run().stdout_is(format!( - "Login name: {:<28}In real life: {1}\n\n", - ulogin, real_name - )); + new_ucmd!() + .arg("-lb") + .arg(ulogin) + .succeeds() + .stdout_is(format!( + "Login name: {:<28}In real life: {1}\n\n", + ulogin, real_name + )); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_long_format_multiple_users() { - let scene = TestScenario::new(util_name!()); + let args = ["-l", "root", "root", "root"]; - let expected = scene - .cmd_keepenv(util_name!()) - .env("LANGUAGE", "C") - .arg("-l") - .arg("root") - .arg("root") - .arg("root") - .succeeds(); - - scene - .ucmd() - .arg("-l") - .arg("root") - .arg("root") - .arg("root") + new_ucmd!() + .args(&args) .succeeds() - .stdout_is(expected.stdout_str()); + .stdout_is(expected_result(&args)); } #[test] @@ -64,63 +59,53 @@ fn test_long_format_wo_user() { new_ucmd!().arg("-l").fails().code_is(1); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_short_format_i() { // allow whitespace variation // * minor whitespace differences occur between platform built-in outputs; specifically, the number of trailing TABs may be variant let args = ["-i"]; - let actual = TestScenario::new(util_name!()) - .ucmd() - .args(&args) - .succeeds() - .stdout_move_str(); + let actual = new_ucmd!().args(&args).succeeds().stdout_move_str(); let expect = expected_result(&args); let v_actual: Vec<&str> = actual.split_whitespace().collect(); let v_expect: Vec<&str> = expect.split_whitespace().collect(); assert_eq!(v_actual, v_expect); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_short_format_q() { // allow whitespace variation // * minor whitespace differences occur between platform built-in outputs; specifically, the number of trailing TABs may be variant let args = ["-q"]; - let actual = TestScenario::new(util_name!()) - .ucmd() - .args(&args) - .succeeds() - .stdout_move_str(); + let actual = new_ucmd!().args(&args).succeeds().stdout_move_str(); let expect = expected_result(&args); let v_actual: Vec<&str> = actual.split_whitespace().collect(); let v_expect: Vec<&str> = expect.split_whitespace().collect(); assert_eq!(v_actual, v_expect); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_no_flag() { - let scene = TestScenario::new(util_name!()); - - let actual = scene.ucmd().succeeds().stdout_move_str(); - let expect = scene - .cmd_keepenv(util_name!()) - .env("LANGUAGE", "C") - .succeeds() - .stdout_move_str(); - + let actual = new_ucmd!().succeeds().stdout_move_str(); + let expect = expected_result(&[]); let v_actual: Vec<&str> = actual.split_whitespace().collect(); let v_expect: Vec<&str> = expect.split_whitespace().collect(); assert_eq!(v_actual, v_expect); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] fn expected_result(args: &[&str]) -> String { - TestScenario::new(util_name!()) - .cmd_keepenv(util_name!()) + #[cfg(target_os = "linux")] + let util_name = util_name!(); + #[cfg(target_vendor = "apple")] + let util_name = format!("g{}", util_name!()); + + TestScenario::new(&util_name) + .cmd_keepenv(util_name) .env("LANGUAGE", "C") .args(args) - .run() + .succeeds() .stdout_move_str() } diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 308dcb9f5..44bce9cd8 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -96,10 +96,10 @@ fn test_invalid_option() { new_ucmd!().arg("-w").arg("-q").arg("/").fails(); } -#[cfg(target_os = "linux")] +#[cfg(any(target_os = "linux", target_vendor = "apple"))] const NORMAL_FMTSTR: &'static str = "%a %A %b %B %d %D %f %F %g %G %h %i %m %n %o %s %u %U %x %X %y %Y %z %Z"; // avoid "%w %W" (birth/creation) due to `stat` limitations and linux kernel & rust version capability variations -#[cfg(target_os = "linux")] +#[cfg(any(target_os = "linux"))] const DEV_FMTSTR: &'static str = "%a %A %b %B %d %D %f %F %g %G %h %i %m %n %o %s (%t/%T) %u %U %w %W %x %X %y %Y %z %Z"; #[cfg(target_os = "linux")] @@ -125,8 +125,8 @@ fn test_fs_format() { .stdout_is(expected_result(&args)); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] fn test_terse_normal_format() { // note: contains birth/creation date which increases test fragility // * results may vary due to built-in `stat` limitations as well as linux kernel and rust version capability variations @@ -156,10 +156,10 @@ fn test_terse_normal_format() { ); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] fn test_format_created_time() { - let args = ["-c", "%w", "/boot"]; + let args = ["-c", "%w", "/bin"]; let actual = new_ucmd!().args(&args).succeeds().stdout_move_str(); let expect = expected_result(&args); println!("actual: {:?}", actual); @@ -180,10 +180,10 @@ fn test_format_created_time() { ); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] fn test_format_created_seconds() { - let args = ["-c", "%W", "/boot"]; + let args = ["-c", "%W", "/bin"]; let actual = new_ucmd!().args(&args).succeeds().stdout_move_str(); let expect = expected_result(&args); println!("actual: {:?}", actual); @@ -204,79 +204,97 @@ fn test_format_created_seconds() { ); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] fn test_normal_format() { - let args = ["-c", NORMAL_FMTSTR, "/boot"]; + let args = ["-c", NORMAL_FMTSTR, "/bin"]; new_ucmd!() .args(&args) - .run() + .succeeds() .stdout_is(expected_result(&args)); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] -fn test_follow_symlink() { - let args = ["-L", "-c", DEV_FMTSTR, "/dev/cdrom"]; - new_ucmd!() - .args(&args) - .run() - .stdout_is(expected_result(&args)); +fn test_symlinks() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + let mut tested: bool = false; + // arbitrarily chosen symlinks with hope that the CI environment provides at least one of them + for file in vec![ + "/bin/sh", + "/bin/sudoedit", + "/usr/bin/ex", + "/etc/localtime", + "/etc/aliases", + ] { + if at.file_exists(file) && at.is_symlink(file) { + tested = true; + let args = ["-c", NORMAL_FMTSTR, file]; + scene + .ucmd() + .args(&args) + .succeeds() + .stdout_is(expected_result(&args)); + // -L, --dereference follow links + let args = ["-L", "-c", NORMAL_FMTSTR, file]; + scene + .ucmd() + .args(&args) + .succeeds() + .stdout_is(expected_result(&args)); + } + } + if !tested { + panic!("No symlink found to test in this environment"); + } } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] -fn test_symlink() { - let args = ["-c", DEV_FMTSTR, "/dev/cdrom"]; - new_ucmd!() - .args(&args) - .run() - .stdout_is(expected_result(&args)); -} - -#[test] -#[cfg(target_os = "linux")] fn test_char() { - let args = ["-c", DEV_FMTSTR, "/dev/pts/ptmx"]; + // TODO: "(%t) (%x) (%w)" deviate from GNU stat for `character special file` on macOS + // Diff < left / right > : + // <"(f0000) (2021-05-20 23:08:03.442555000 +0200) (1970-01-01 01:00:00.000000000 +0100)\n" + // >"(f) (2021-05-20 23:08:03.455598000 +0200) (-)\n" + let args = [ + "-c", + #[cfg(target_os = "linux")] + DEV_FMTSTR, + #[cfg(target_os = "linux")] + "/dev/pts/ptmx", + #[cfg(any(target_vendor = "apple"))] + "%a %A %b %B %d %D %f %F %g %G %h %i %m %n %o %s (/%T) %u %U %W %X %y %Y %z %Z", + #[cfg(any(target_vendor = "apple"))] + "/dev/ptmx", + ]; new_ucmd!() .args(&args) - .run() + .succeeds() .stdout_is(expected_result(&args)); } +#[cfg(any(target_os = "linux", target_vendor = "apple"))] #[test] -#[cfg(target_os = "linux")] fn test_multi_files() { let args = [ "-c", NORMAL_FMTSTR, "/dev", "/usr/lib", + #[cfg(target_os = "linux")] "/etc/fstab", "/var", ]; new_ucmd!() .args(&args) - .run() + .succeeds() .stdout_is(expected_result(&args)); } -#[cfg(any(target_os = "linux", target_os = "freebsd", target_vendor = "apple"))] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] -fn test_one_file() { - let (at, mut ucmd) = at_and_ucmd!(); - let file = "TEST_FILE.mp4"; - at.touch(file); - - ucmd.arg(file) - .succeeds() - .stdout_contains(format!("File: `{}'", file)) - .stdout_contains(format!("Size: 0")) - .stdout_contains(format!("Access: (0644/-rw-r--r--)")); -} - -#[test] -#[cfg(target_os = "linux")] fn test_printf() { let args = [ "--printf=123%-# 15q\\r\\\"\\\\\\a\\b\\e\\f\\v%+020.23m\\x12\\167\\132\\112\\n", @@ -284,16 +302,21 @@ fn test_printf() { ]; new_ucmd!() .args(&args) - .run() + .succeeds() .stdout_is(expected_result(&args)); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] fn expected_result(args: &[&str]) -> String { - TestScenario::new(util_name!()) - .cmd_keepenv(util_name!()) + #[cfg(target_os = "linux")] + let util_name = util_name!(); + #[cfg(target_vendor = "apple")] + let util_name = format!("g{}", util_name!()); + + TestScenario::new(&util_name) + .cmd_keepenv(util_name) .env("LANGUAGE", "C") .args(args) - .run() + .succeeds() .stdout_move_str() } diff --git a/tests/by-util/test_who.rs b/tests/by-util/test_who.rs index a5637f23a..725ec0b1e 100644 --- a/tests/by-util/test_who.rs +++ b/tests/by-util/test_who.rs @@ -1,28 +1,28 @@ use crate::common::util::*; -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_count() { for opt in vec!["-q", "--count"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_boot() { for opt in vec!["-b", "--boot"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_heading() { for opt in vec!["-H", "--heading"] { @@ -30,7 +30,7 @@ fn test_heading() { // * minor whitespace differences occur between platform built-in outputs; // specifically number of TABs between "TIME" and "COMMENT" may be variant let actual = new_ucmd!().arg(opt).succeeds().stdout_move_str(); - let expect = expected_result(opt); + let expect = expected_result(&[opt]); println!("actual: {:?}", actual); println!("expect: {:?}", expect); let v_actual: Vec<&str> = actual.split_whitespace().collect(); @@ -39,205 +39,205 @@ fn test_heading() { } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_short() { for opt in vec!["-s", "--short"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_login() { for opt in vec!["-l", "--login"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_m() { for opt in vec!["-m"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_process() { for opt in vec!["-p", "--process"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_runlevel() { for opt in vec!["-r", "--runlevel"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_time() { for opt in vec!["-t", "--time"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_mesg() { - for opt in vec!["-w", "-T", "--users", "--message", "--writable"] { + // -T, -w, --mesg + // add user's message status as +, - or ? + // --message + // same as -T + // --writable + // same as -T + for opt in vec!["-T", "-w", "--mesg", "--message", "--writable"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] #[test] fn test_arg1_arg2() { - let scene = TestScenario::new(util_name!()); + let args = ["am", "i"]; - let expected = scene - .cmd_keepenv(util_name!()) - .env("LANGUAGE", "C") - .arg("am") - .arg("i") - .succeeds(); - - scene - .ucmd() - .arg("am") - .arg("i") + new_ucmd!() + .args(&args) .succeeds() - .stdout_is(expected.stdout_str()); + .stdout_is(expected_result(&args)); } #[test] fn test_too_many_args() { - let expected = + const EXPECTED: &str = "error: The value 'u' was provided to '...', but it wasn't expecting any more values"; - new_ucmd!() - .arg("am") - .arg("i") - .arg("u") - .fails() - .stderr_contains(expected); + let args = ["am", "i", "u"]; + new_ucmd!().args(&args).fails().stderr_contains(EXPECTED); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_users() { for opt in vec!["-u", "--users"] { - new_ucmd!() - .arg(opt) - .succeeds() - .stdout_is(expected_result(opt)); + let actual = new_ucmd!().arg(opt).succeeds().stdout_move_str(); + let expect = expected_result(&[opt]); + println!("actual: {:?}", actual); + println!("expect: {:?}", expect); + + let mut v_actual: Vec<&str> = actual.split_whitespace().collect(); + let mut v_expect: Vec<&str> = expect.split_whitespace().collect(); + + // TODO: `--users` differs from GNU's output on manOS running in CI + // Diff < left / right > : + // <"runner console 2021-05-20 22:03 00:08 196\n" + // >"runner console 2021-05-20 22:03 old 196\n" + if is_ci() && cfg!(target_os = "macos") { + v_actual.remove(4); + v_expect.remove(4); + } + + assert_eq!(v_actual, v_expect); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_lookup() { for opt in vec!["--lookup"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_dead() { for opt in vec!["-d", "--dead"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_all_separately() { + if is_ci() && cfg!(target_os = "macos") { + // TODO: fix `-u`, see: test_users + return; + } + // -a, --all same as -b -d --login -p -r -t -T -u + let args = ["-b", "-d", "--login", "-p", "-r", "-t", "-T", "-u"]; let scene = TestScenario::new(util_name!()); - - let expected = scene - .cmd_keepenv(util_name!()) - .env("LANGUAGE", "C") - .arg("-b") - .arg("-d") - .arg("--login") - .arg("-p") - .arg("-r") - .arg("-t") - .arg("-T") - .arg("-u") - .succeeds(); - scene .ucmd() - .arg("-b") - .arg("-d") - .arg("--login") - .arg("-p") - .arg("-r") - .arg("-t") - .arg("-T") - .arg("-u") + .args(&args) .succeeds() - .stdout_is(expected.stdout_str()); - + .stdout_is(expected_result(&args)); scene .ucmd() .arg("--all") .succeeds() - .stdout_is(expected.stdout_str()); + .stdout_is(expected_result(&args)); } -#[cfg(target_os = "linux")] +#[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_all() { + if is_ci() && cfg!(target_os = "macos") { + // TODO: fix `-u`, see: test_users + return; + } + for opt in vec!["-a", "--all"] { new_ucmd!() .arg(opt) .succeeds() - .stdout_is(expected_result(opt)); + .stdout_is(expected_result(&[opt])); } } -#[cfg(target_os = "linux")] -fn expected_result(arg: &str) -> String { - TestScenario::new(util_name!()) - .cmd_keepenv(util_name!()) +#[cfg(any(target_vendor = "apple", target_os = "linux"))] +fn expected_result(args: &[&str]) -> String { + #[cfg(target_os = "linux")] + let util_name = util_name!(); + #[cfg(target_vendor = "apple")] + let util_name = format!("g{}", util_name!()); + + TestScenario::new(&util_name) + .cmd_keepenv(util_name) .env("LANGUAGE", "C") - .args(&[arg]) + .args(args) .succeeds() .stdout_move_str() } From 6ed080cf97d7bece6163a190bc07b0179d90f5ac Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Fri, 21 May 2021 12:39:48 +0200 Subject: [PATCH 098/148] CICD: install GNU coreutils on macOS (Code Coverage) --- .github/workflows/CICD.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index a42d2f335..bb29355cf 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -489,6 +489,13 @@ jobs: - { os: windows-latest , features: windows } steps: - uses: actions/checkout@v1 + - name: Install/setup prerequisites + shell: bash + run: | + ## install/setup prerequisites + case '${{ matrix.job.os }}' in + macos-latest) brew install coreutils ;; # needed for testing + esac # - name: Reattach HEAD ## may be needed for accurate code coverage info # run: git checkout ${{ github.head_ref }} - name: Initialize workflow variables From 0dafbfccca27154b6c831fed0338c1c54da63e3d Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Fri, 21 May 2021 13:30:24 +0200 Subject: [PATCH 099/148] CI-Trigger From 73b47b8c765284f7cc1586a46358e86eb56ae22a Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 21 May 2021 13:27:35 +0200 Subject: [PATCH 100/148] gnu/ci: install the dep into a separate task --- .github/workflows/GNU.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/GNU.yml b/.github/workflows/GNU.yml index a72cb0cfc..57730aee7 100644 --- a/.github/workflows/GNU.yml +++ b/.github/workflows/GNU.yml @@ -32,11 +32,14 @@ jobs: default: true profile: minimal # minimal component installation (ie, no documentation) components: rustfmt - - name: Build binaries + - name: Install deps shell: bash run: | sudo apt-get update sudo apt-get install autoconf autopoint bison texinfo gperf gcc g++ gdb python-pyinotify python3-sphinx jq + - name: Build binaries + shell: bash + run: | pushd uutils make PROFILE=release BUILDDIR="$PWD/target/release/" From 414c92eed79d5b8687db332e84c6d67f9b2b2593 Mon Sep 17 00:00:00 2001 From: Anup Mahindre Date: Fri, 21 May 2021 22:04:24 +0530 Subject: [PATCH 101/148] ls: Fix printing paths behavior For any commandline arguments, ls should print the argument as is (and not truncate to just the file name) For any other files it reaches (say through recursive exploration), ls should print just the filename (as path is printed once when we enter the directory) --- src/uu/ls/src/ls.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index c5389295b..d467d431a 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1110,7 +1110,7 @@ struct PathData { md: OnceCell>, ft: OnceCell>, // Name of the file - will be empty for . or .. - file_name: String, + display_name: String, // PathBuf that all above data corresponds to p_buf: PathBuf, must_dereference: bool, @@ -1126,14 +1126,18 @@ impl PathData { ) -> Self { // We cannot use `Path::ends_with` or `Path::Components`, because they remove occurrences of '.' // For '..', the filename is None - let name = if let Some(name) = file_name { + let display_name = if let Some(name) = file_name { name } else { - p_buf - .file_name() - .unwrap_or_else(|| p_buf.iter().next_back().unwrap()) - .to_string_lossy() - .into_owned() + let display_osstr = if command_line { + p_buf.as_os_str() + } else { + p_buf + .file_name() + .unwrap_or_else(|| p_buf.iter().next_back().unwrap()) + }; + + display_osstr.to_string_lossy().into_owned() }; let must_dereference = match &config.dereference { Dereference::All => true, @@ -1159,7 +1163,7 @@ impl PathData { Self { md: OnceCell::new(), ft, - file_name: name, + display_name, p_buf, must_dereference, } @@ -1243,7 +1247,7 @@ fn sort_entries(entries: &mut Vec, config: &Config) { entries.sort_by_key(|k| Reverse(k.md().as_ref().map(|md| md.len()).unwrap_or(0))) } // The default sort in GNU ls is case insensitive - Sort::Name => entries.sort_by(|a, b| a.file_name.cmp(&b.file_name)), + Sort::Name => entries.sort_by(|a, b| a.display_name.cmp(&b.display_name)), Sort::Version => entries.sort_by(|a, b| version_cmp::version_cmp(&a.p_buf, &b.p_buf)), Sort::Extension => entries.sort_by(|a, b| { a.p_buf @@ -1719,7 +1723,7 @@ fn classify_file(path: &PathData) -> Option { } fn display_file_name(path: &PathData, config: &Config) -> Option { - let mut name = escape_name(&path.file_name, &config.quoting_style); + let mut name = escape_name(&path.display_name, &config.quoting_style); #[cfg(unix)] { From 31545258ac6b6ef11ef8c04f2564825e08f7ad50 Mon Sep 17 00:00:00 2001 From: Anup Mahindre Date: Fri, 21 May 2021 22:20:54 +0530 Subject: [PATCH 102/148] tests: Fix test_ls_path --- tests/by-util/test_ls.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 8d32172b0..fc4051039 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -1967,8 +1967,6 @@ fn test_ls_sort_extension() { ); } -// This tests for the open issue described in #2223 -#[cfg_attr(not(feature = "test_unimplemented"), ignore)] #[test] fn test_ls_path() { let scene = TestScenario::new(util_name!()); @@ -1987,13 +1985,17 @@ fn test_ls_path() { scene.ucmd().arg(path).run().stdout_is(expected_stdout); let expected_stdout = &format!("./{}\n", path); - scene.ucmd().arg(path).run().stdout_is(expected_stdout); + scene + .ucmd() + .arg(format!("./{}", path)) + .run() + .stdout_is(expected_stdout); - let abs_path = format!("{}/{}\n", at.as_string(), path); - println!(":{}", abs_path); - scene.ucmd().arg(&abs_path).run().stdout_is(&abs_path); + let abs_path = format!("{}/{}", at.as_string(), path); + let expected_stdout = &format!("{}\n", abs_path); + scene.ucmd().arg(&abs_path).run().stdout_is(expected_stdout); - let expected_stdout = &format!("{}\n{}\n", file1, path); + let expected_stdout = &format!("{}\n{}\n", path, file1); scene .ucmd() .arg(file1) From e7da8058dc27dc106af66b206318faeec6cf9938 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Fri, 21 May 2021 23:00:13 +0200 Subject: [PATCH 103/148] sort: automatically fall back to extsort To make this work we make default sort a special case of external sort. External sorting uses auxiliary files for intermediate chunks. However, when we can keep our intermediate chunks in memory, we don't write them to the file system at all. Only when we notice that we can't keep them in memory they are written to the disk. Additionally, we don't allocate buffers with the capacity of their maximum size anymore. Instead, they start with a capacity of 8kb and are grown only when needed. This makes sorting smaller files about as fast as it was before (I'm seeing a regression of ~3%), and allows us to seamlessly continue with auxiliary files when needed. --- src/uu/sort/BENCHMARKING.md | 3 +- src/uu/sort/src/check.rs | 1 + src/uu/sort/src/chunks.rs | 39 ++++++++++++-- src/uu/sort/src/ext_sort.rs | 105 +++++++++++++++++++++++++----------- src/uu/sort/src/merge.rs | 1 + src/uu/sort/src/sort.rs | 68 ++++++----------------- tests/by-util/test_sort.rs | 33 ++++-------- 7 files changed, 138 insertions(+), 112 deletions(-) diff --git a/src/uu/sort/BENCHMARKING.md b/src/uu/sort/BENCHMARKING.md index 52866719d..fd728c41d 100644 --- a/src/uu/sort/BENCHMARKING.md +++ b/src/uu/sort/BENCHMARKING.md @@ -72,7 +72,8 @@ Run `cargo build --release` before benchmarking after you make a change! ## External sorting Try running commands with the `-S` option set to an amount of memory to be used, such as `1M`. Additionally, you could try sorting -huge files (ideally multiple Gigabytes) with `-S`. Creating such a large file can be achieved by running `cat shuffled_wordlist.txt | sort -R >> shuffled_wordlist.txt` +huge files (ideally multiple Gigabytes) with `-S` (or without `-S` to benchmark with our default value). +Creating such a large file can be achieved by running `cat shuffled_wordlist.txt | sort -R >> shuffled_wordlist.txt` multiple times (this will add the contents of `shuffled_wordlist.txt` to itself). Example: Run `hyperfine './target/release/coreutils sort shuffled_wordlist.txt -S 1M' 'sort shuffled_wordlist.txt -S 1M'` diff --git a/src/uu/sort/src/check.rs b/src/uu/sort/src/check.rs index fe815b624..01b5a25b5 100644 --- a/src/uu/sort/src/check.rs +++ b/src/uu/sort/src/check.rs @@ -87,6 +87,7 @@ fn reader( chunks::read( &mut sender, recycled_buffer, + None, &mut carry_over, &mut file, &mut iter::empty(), diff --git a/src/uu/sort/src/chunks.rs b/src/uu/sort/src/chunks.rs index c679980ec..7a7749003 100644 --- a/src/uu/sort/src/chunks.rs +++ b/src/uu/sort/src/chunks.rs @@ -52,13 +52,20 @@ impl Chunk { /// Read a chunk, parse lines and send them. /// -/// No empty chunk will be sent. +/// No empty chunk will be sent. If we reach the end of the input, sender_option +/// is set to None. If this function however does not set sender_option to None, +/// it is not guaranteed that there is still input left: If the input fits _exactly_ +/// into a buffer, we will only notice that there's nothing more to read at the next +/// invocation. /// /// # Arguments /// -/// * `sender_option`: The sender to send the lines to the sorter. If `None`, does nothing. +/// (see also `read_to_chunk` for a more detailed documentation) +/// +/// * `sender_option`: The sender to send the lines to the sorter. If `None`, this function does nothing. /// * `buffer`: The recycled buffer. All contents will be overwritten, but it must already be filled. /// (i.e. `buffer.len()` should be equal to `buffer.capacity()`) +/// * `max_buffer_size`: How big `buffer` can be. /// * `carry_over`: The bytes that must be carried over in between invocations. /// * `file`: The current file. /// * `next_files`: What `file` should be updated to next. @@ -69,6 +76,7 @@ impl Chunk { pub fn read( sender_option: &mut Option>, mut buffer: Vec, + max_buffer_size: Option, carry_over: &mut Vec, file: &mut Box, next_files: &mut impl Iterator>, @@ -82,8 +90,14 @@ pub fn read( buffer.resize(carry_over.len() + 10 * 1024, 0); } buffer[..carry_over.len()].copy_from_slice(&carry_over); - let (read, should_continue) = - read_to_buffer(file, next_files, &mut buffer, carry_over.len(), separator); + let (read, should_continue) = read_to_buffer( + file, + next_files, + &mut buffer, + max_buffer_size, + carry_over.len(), + separator, + ); carry_over.clear(); carry_over.extend_from_slice(&buffer[read..]); @@ -138,7 +152,8 @@ fn parse_lines<'a>( /// * `next_files`: When `file` reaches EOF, it is updated to `next_files.next()` if that is `Some`, /// and this function continues reading. /// * `buffer`: The buffer that is filled with bytes. Its contents will mostly be overwritten (see `start_offset` -/// as well). It will not be grown by default, unless that is necessary to read at least two lines. +/// as well). It will be grown up to `max_buffer_size` if necessary, but it will always grow to read at least two lines. +/// * `max_buffer_size`: Grow the buffer to at most this length. If None, the buffer will not grow, unless needed to read at least two lines. /// * `start_offset`: The amount of bytes at the start of `buffer` that were carried over /// from the previous read and should not be overwritten. /// * `separator`: The byte that separates lines. @@ -153,6 +168,7 @@ fn read_to_buffer( file: &mut Box, next_files: &mut impl Iterator>, buffer: &mut Vec, + max_buffer_size: Option, start_offset: usize, separator: u8, ) -> (usize, bool) { @@ -162,6 +178,19 @@ fn read_to_buffer( Ok(0) => { if read_target.is_empty() { // chunk is full + if let Some(max_buffer_size) = max_buffer_size { + if max_buffer_size > buffer.len() { + // we can grow the buffer + let prev_len = buffer.len(); + if buffer.len() < max_buffer_size / 2 { + buffer.resize(buffer.len() * 2, 0); + } else { + buffer.resize(max_buffer_size, 0); + } + read_target = &mut buffer[prev_len..]; + continue; + } + } let mut sep_iter = memchr_iter(separator, &buffer).rev(); let last_line_end = sep_iter.next(); if sep_iter.next().is_some() { diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs index 629ebb714..a304bf7c0 100644 --- a/src/uu/sort/src/ext_sort.rs +++ b/src/uu/sort/src/ext_sort.rs @@ -5,12 +5,13 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -//! Sort big files by using files for storing intermediate chunks. +//! Sort big files by using auxiliary files for storing intermediate chunks. //! //! Files are read into chunks of memory which are then sorted individually and //! written to temporary files. There are two threads: One sorter, and one reader/writer. //! The buffers for the individual chunks are recycled. There are two buffers. +use std::cmp::Ordering; use std::io::{BufWriter, Write}; use std::path::Path; use std::{ @@ -20,30 +21,19 @@ use std::{ thread, }; +use itertools::Itertools; + use tempdir::TempDir; use crate::{ chunks::{self, Chunk}, - merge::{self, FileMerger}, - sort_by, GlobalSettings, + compare_by, merge, output_sorted_lines, sort_by, GlobalSettings, }; -/// Iterator that wraps the -pub struct ExtSortedMerger<'a> { - pub file_merger: FileMerger<'a>, - // Keep _tmp_dir around, as it is deleted when dropped. - _tmp_dir: TempDir, -} +const MIN_BUFFER_SIZE: usize = 8_000; -/// Sort big files by using files for storing intermediate chunks. -/// -/// # Returns -/// -/// An iterator that merges intermediate files back together. -pub fn ext_sort<'a>( - files: &mut impl Iterator>, - settings: &'a GlobalSettings, -) -> ExtSortedMerger<'a> { +/// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result. +pub fn ext_sort(files: &mut impl Iterator>, settings: &GlobalSettings) { let tmp_dir = crash_if_err!(1, TempDir::new_in(&settings.tmp_dir, "uutils_sort")); let (sorted_sender, sorted_receiver) = std::sync::mpsc::sync_channel(1); let (recycled_sender, recycled_receiver) = std::sync::mpsc::sync_channel(1); @@ -51,7 +41,7 @@ pub fn ext_sort<'a>( let settings = settings.clone(); move || sorter(recycled_receiver, sorted_sender, settings) }); - let chunks_read = reader_writer( + let read_result = reader_writer( files, &tmp_dir, if settings.zero_terminated { @@ -66,13 +56,29 @@ pub fn ext_sort<'a>( sorted_receiver, recycled_sender, ); - let files = (0..chunks_read) - .map(|chunk_num| tmp_dir.path().join(chunk_num.to_string())) - .collect::>(); - - ExtSortedMerger { - file_merger: merge::merge(&files, settings), - _tmp_dir: tmp_dir, + match read_result { + ReadResult::WroteChunksToFile { chunks_written } => { + let files = (0..chunks_written) + .map(|chunk_num| tmp_dir.path().join(chunk_num.to_string())) + .collect::>(); + let mut merger = merge::merge(&files, settings); + merger.write_all(settings); + } + ReadResult::SortedSingleChunk(chunk) => { + output_sorted_lines(chunk.borrow_lines().iter(), settings); + } + ReadResult::SortedTwoChunks([a, b]) => { + let merged_iter = a + .borrow_lines() + .iter() + .merge_by(b.borrow_lines().iter(), |line_a, line_b| { + compare_by(line_a, line_b, settings) != Ordering::Greater + }); + output_sorted_lines(merged_iter, settings); + } + ReadResult::EmptyInput => { + // don't output anything + } } } @@ -84,6 +90,21 @@ fn sorter(receiver: Receiver, sender: SyncSender, settings: Global } } +/// Describes how we read the chunks from the input. +enum ReadResult { + /// The input was empty. Nothing was read. + EmptyInput, + /// The input fits into a single Chunk, which was kept in memory. + SortedSingleChunk(Chunk), + /// The input fits into two chunks, which were kept in memory. + SortedTwoChunks([Chunk; 2]), + /// The input was read into multiple chunks, which were written to auxiliary files. + WroteChunksToFile { + /// The number of chunks written to auxiliary files. + chunks_written: usize, + }, +} + /// The function that is executed on the reader/writer thread. /// /// # Returns @@ -96,7 +117,7 @@ fn reader_writer( settings: GlobalSettings, receiver: Receiver, sender: SyncSender, -) -> usize { +) -> ReadResult { let mut sender_option = Some(sender); let mut file = files.next().unwrap(); @@ -106,21 +127,40 @@ fn reader_writer( for _ in 0..2 { chunks::read( &mut sender_option, - vec![0; buffer_size], + vec![0; MIN_BUFFER_SIZE], + Some(buffer_size), &mut carry_over, &mut file, &mut files, separator, Vec::new(), &settings, - ) + ); + if sender_option.is_none() { + // We have already read the whole input. Since we are in our first two reads, + // this means that we can fit the whole input into memory. Bypass writing below and + // handle this case in a more straightforward way. + return if let Ok(first_chunk) = receiver.recv() { + if let Ok(second_chunk) = receiver.recv() { + ReadResult::SortedTwoChunks([first_chunk, second_chunk]) + } else { + ReadResult::SortedSingleChunk(first_chunk) + } + } else { + ReadResult::EmptyInput + }; + } } let mut file_number = 0; loop { let mut chunk = match receiver.recv() { Ok(it) => it, - _ => return file_number, + _ => { + return ReadResult::WroteChunksToFile { + chunks_written: file_number, + } + } }; write( @@ -129,13 +169,14 @@ fn reader_writer( separator, ); - let (recycled_lines, recycled_buffer) = chunk.recycle(); - file_number += 1; + let (recycled_lines, recycled_buffer) = chunk.recycle(); + chunks::read( &mut sender_option, recycled_buffer, + None, &mut carry_over, &mut file, &mut files, diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index 6f7cdfed7..48d48ad40 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -108,6 +108,7 @@ fn reader( chunks::read( sender, recycled_buffer, + None, carry_over, file, &mut iter::empty(), diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index b6ab5a2b1..78388a298 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -93,7 +93,10 @@ static THOUSANDS_SEP: char = ','; static NEGATIVE: char = '-'; static POSITIVE: char = '+'; -static DEFAULT_BUF_SIZE: usize = std::usize::MAX; +/// Choosing a higher buffer size does not result in performance improvements +/// (at least not on my machine). TODO: In the future, we should also take the amount of +/// available memory into consideration, instead of relying on this constant only. +static DEFAULT_BUF_SIZE: usize = 1_000_000_000; #[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)] enum SortMode { @@ -127,7 +130,6 @@ pub struct GlobalSettings { zero_terminated: bool, buffer_size: usize, tmp_dir: PathBuf, - ext_sort: bool, } impl GlobalSettings { @@ -189,7 +191,6 @@ impl Default for GlobalSettings { zero_terminated: false, buffer_size: DEFAULT_BUF_SIZE, tmp_dir: PathBuf::new(), - ext_sort: false, } } } @@ -941,28 +942,15 @@ pub fn uumain(args: impl uucore::Args) -> i32 { env::set_var("RAYON_NUM_THREADS", &settings.threads); } - if matches.is_present(OPT_BUF_SIZE) { - settings.buffer_size = { - let input = matches - .value_of(OPT_BUF_SIZE) - .map(String::from) - .unwrap_or(format!("{}", DEFAULT_BUF_SIZE)); + settings.buffer_size = matches + .value_of(OPT_BUF_SIZE) + .map(GlobalSettings::human_numeric_convert) + .unwrap_or(DEFAULT_BUF_SIZE); - GlobalSettings::human_numeric_convert(&input) - }; - settings.ext_sort = true; - } - - if matches.is_present(OPT_TMP_DIR) { - let result = matches - .value_of(OPT_TMP_DIR) - .map(String::from) - .unwrap_or(format!("{}", env::temp_dir().display())); - settings.tmp_dir = PathBuf::from(result); - settings.ext_sort = true; - } else { - settings.tmp_dir = env::temp_dir(); - } + settings.tmp_dir = matches + .value_of(OPT_TMP_DIR) + .map(PathBuf::from) + .unwrap_or_else(env::temp_dir); settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED); settings.merge = matches.is_present(OPT_MERGE); @@ -1047,7 +1035,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { exec(&files, &settings) } -fn output_sorted_lines<'a>(iter: impl Iterator>, settings: &GlobalSettings) { +fn output_sorted_lines<'a>(iter: impl Iterator>, settings: &GlobalSettings) { if settings.unique { print_sorted( iter.dedup_by(|a, b| compare_by(a, b, &settings) == Ordering::Equal), @@ -1067,34 +1055,10 @@ fn exec(files: &[String], settings: &GlobalSettings) -> i32 { crash!(1, "only one file allowed with -c"); } return check::check(files.first().unwrap(), settings); - } else if settings.ext_sort { + } else { let mut lines = files.iter().filter_map(open); - let mut sorted = ext_sort(&mut lines, &settings); - sorted.file_merger.write_all(settings); - } else { - let separator = if settings.zero_terminated { '\0' } else { '\n' }; - let mut lines = vec![]; - let mut full_string = String::new(); - - for mut file in files.iter().filter_map(open) { - crash_if_err!(1, file.read_to_string(&mut full_string)); - - if !full_string.ends_with(separator) { - full_string.push(separator); - } - } - - if full_string.ends_with(separator) { - full_string.pop(); - } - - for line in full_string.split(if settings.zero_terminated { '\0' } else { '\n' }) { - lines.push(Line::create(line, &settings)); - } - - sort_by(&mut lines, &settings); - output_sorted_lines(lines.into_iter(), &settings); + ext_sort(&mut lines, &settings); } 0 } @@ -1366,7 +1330,7 @@ fn version_compare(a: &str, b: &str) -> Ordering { } } -fn print_sorted<'a, T: Iterator>>(iter: T, settings: &GlobalSettings) { +fn print_sorted<'a, T: Iterator>>(iter: T, settings: &GlobalSettings) { let mut writer = settings.out_writer(); for line in iter { line.print(&mut writer, settings); diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index e89d18054..59058d5bc 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -15,29 +15,18 @@ fn test_helper(file_name: &str, args: &str) { .stdout_is_fixture(format!("{}.expected.debug", file_name)); } -// FYI, the initialization size of our Line struct is 96 bytes. -// -// At very small buffer sizes, with that overhead we are certainly going -// to overrun our buffer way, way, way too quickly because of these excess -// bytes for the struct. -// -// For instance, seq 0..20000 > ...text = 108894 bytes -// But overhead is 1920000 + 108894 = 2028894 bytes -// -// Or kjvbible-random.txt = 4332506 bytes, but minimum size of its -// 99817 lines in memory * 96 bytes = 9582432 bytes -// -// Here, we test 108894 bytes with a 50K buffer -// #[test] -fn test_larger_than_specified_segment() { - new_ucmd!() - .arg("-n") - .arg("-S") - .arg("50K") - .arg("ext_sort.txt") - .succeeds() - .stdout_is_fixture("ext_sort.expected"); +fn test_buffer_sizes() { + let buffer_sizes = ["0", "50K", "1M", "1000G"]; + for buffer_size in &buffer_sizes { + new_ucmd!() + .arg("-n") + .arg("-S") + .arg(buffer_size) + .arg("ext_sort.txt") + .succeeds() + .stdout_is_fixture("ext_sort.expected"); + } } #[test] From adaba5402691ee1286abc4ce8be7dd8f66862aa7 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 21 May 2021 18:25:23 +0200 Subject: [PATCH 104/148] gnu/ci: move the operations into script to run them locally --- .github/workflows/GNU.yml | 79 ++--------------------------------- util/build-gnu.sh | 86 +++++++++++++++++++++++++++++++++++++++ util/run-gnu-test.sh | 7 ++++ 3 files changed, 96 insertions(+), 76 deletions(-) create mode 100644 util/build-gnu.sh create mode 100644 util/run-gnu-test.sh diff --git a/.github/workflows/GNU.yml b/.github/workflows/GNU.yml index 57730aee7..c94902bbc 100644 --- a/.github/workflows/GNU.yml +++ b/.github/workflows/GNU.yml @@ -40,85 +40,12 @@ jobs: - name: Build binaries shell: bash run: | - pushd uutils - make PROFILE=release - BUILDDIR="$PWD/target/release/" - cp "${BUILDDIR}/install" "${BUILDDIR}/ginstall" # The GNU tests rename this script before running, to avoid confusion with the make target - # Create *sum binaries - for sum in b2sum md5sum sha1sum sha224sum sha256sum sha384sum sha512sum - do - sum_path="${BUILDDIR}/${sum}" - test -f "${sum_path}" || cp "${BUILDDIR}/hashsum" "${sum_path}" - done - test -f "${BUILDDIR}/[" || cp "${BUILDDIR}/test" "${BUILDDIR}/[" - popd - GNULIB_SRCDIR="$PWD/gnulib" - pushd gnu/ - - # Any binaries that aren't built become `false` so their tests fail - for binary in $(./build-aux/gen-lists-of-programs.sh --list-progs) - do - bin_path="${BUILDDIR}/${binary}" - test -f "${bin_path}" || { echo "'${binary}' was not built with uutils, using the 'false' program"; cp "${BUILDDIR}/false" "${bin_path}"; } - done - - ./bootstrap --gnulib-srcdir="$GNULIB_SRCDIR" - ./configure --quiet --disable-gcc-warnings - #Add timeout to to protect against hangs - sed -i 's|"\$@|/usr/bin/timeout 600 "\$@|' build-aux/test-driver - # Change the PATH in the Makefile to test the uutils coreutils instead of the GNU coreutils - sed -i "s/^[[:blank:]]*PATH=.*/ PATH='${BUILDDIR//\//\\/}\$(PATH_SEPARATOR)'\"\$\$PATH\" \\\/" Makefile - sed -i 's| tr | /usr/bin/tr |' tests/init.sh - make - # Generate the factor tests, so they can be fixed - for i in {00..36} - do - make tests/factor/t${i}.sh - done - grep -rl 'path_prepend_' tests/* | xargs sed -i 's|path_prepend_ ./src||' - sed -i -e 's|^seq |/usr/bin/seq |' -e 's|sha1sum |/usr/bin/sha1sum |' tests/factor/t*sh - - # Remove tests checking for --version & --help - # Not really interesting for us and logs are too big - sed -i -e '/tests\/misc\/invalid-opt.pl/ D' \ - -e '/tests\/misc\/help-version.sh/ D' \ - -e '/tests\/misc\/help-version-getopt.sh/ D' \ - Makefile - - # printf doesn't limit the values used in its arg, so this produced ~2GB of output - sed -i '/INT_OFLOW/ D' tests/misc/printf.sh - - # Use the system coreutils where the test fails due to error in a util that is not the one being tested - sed -i 's|stat|/usr/bin/stat|' tests/chgrp/basic.sh tests/cp/existing-perm-dir.sh tests/touch/60-seconds.sh tests/misc/sort-compress-proc.sh - sed -i 's|ls -|/usr/bin/ls -|' tests/chgrp/posix-H.sh tests/chown/deref.sh tests/cp/same-file.sh tests/misc/mknod.sh tests/mv/part-symlink.sh tests/du/8gb.sh - sed -i 's|mkdir |/usr/bin/mkdir |' tests/cp/existing-perm-dir.sh tests/rm/empty-inacc.sh - sed -i 's|timeout \([[:digit:]]\)| /usr/bin/timeout \1|' tests/tail-2/inotify-rotate.sh tests/tail-2/inotify-dir-recreate.sh tests/tail-2/inotify-rotate-resources.sh tests/cp/parent-perm-race.sh tests/ls/infloop.sh tests/misc/sort-exit-early.sh tests/misc/sort-NaN-infloop.sh tests/misc/uniq-perf.sh tests/tail-2/inotify-only-regular.sh tests/tail-2/pipe-f2.sh tests/tail-2/retry.sh tests/tail-2/symlink.sh tests/tail-2/wait.sh tests/tail-2/pid.sh tests/dd/stats.sh tests/tail-2/follow-name.sh tests/misc/shuf.sh # Don't break the function called 'grep_timeout' - sed -i 's|chmod |/usr/bin/chmod |' tests/du/inacc-dir.sh tests/mkdir/p-3.sh tests/tail-2/tail-n0f.sh tests/cp/fail-perm.sh tests/du/inaccessible-cwd.sh tests/mv/i-2.sh tests/chgrp/basic.sh tests/misc/shuf.sh - sed -i 's|sort |/usr/bin/sort |' tests/ls/hyperlink.sh tests/misc/test-N.sh - sed -i 's|split |/usr/bin/split |' tests/misc/factor-parallel.sh - sed -i 's|truncate |/usr/bin/truncate |' tests/split/fail.sh - sed -i 's|dd |/usr/bin/dd |' tests/du/8gb.sh tests/tail-2/big-4gb.sh init.cfg - sed -i 's|id -|/usr/bin/id -|' tests/misc/runcon-no-reorder.sh - sed -i 's|touch |/usr/bin/touch |' tests/cp/preserve-link.sh tests/cp/reflink-perm.sh tests/ls/block-size.sh tests/ls/abmon-align.sh tests/ls/rt-1.sh tests/mv/update.sh tests/misc/ls-time.sh tests/misc/stat-nanoseconds.sh tests/misc/time-style.sh tests/misc/test-N.sh - sed -i 's|ln -|/usr/bin/ln -|' tests/cp/link-deref.sh - sed -i 's|printf |/usr/bin/printf |' tests/dd/ascii.sh - sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh - sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh - sed -i 's|seq |/usr/bin/seq |' tests/misc/sort-discrim.sh - - #Add specific timeout to tests that currently hang to limit time spent waiting - sed -i 's|seq \$|/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh - sed -i 's|cat |/usr/bin/timeout 0.1 cat |' tests/misc/cat-self.sh - - test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}" + cd uutils + bash util/build-gnu.sh - name: Run GNU tests shell: bash run: | - BUILDDIR="${PWD}/uutils/target/release" - GNULIB_DIR="${PWD}/gnulib" - pushd gnu - - timeout -sKILL 2h make -j "$(nproc)" check SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no || : # Kill after 4 hours in case something gets stuck in make + bash uutils/util/run-gnu-test.sh - name: Extract tests info shell: bash run: | diff --git a/util/build-gnu.sh b/util/build-gnu.sh new file mode 100644 index 000000000..667dc8e46 --- /dev/null +++ b/util/build-gnu.sh @@ -0,0 +1,86 @@ +#!/bin/bash +set -e +if test ! -d ../gnu; then + echo "Could not find ../gnu" + echo "git clone git@github.com:coreutils/coreutils.git ../gnu" + exit 1 +fi +if test ! -d ../gnulib; then + echo "Could not find ../gnulib" + echo "git clone git@github.com:coreutils/gnulib.git ../gnulib" + exit 1 +fi + + +pushd $(pwd) +make PROFILE=release +BUILDDIR="$PWD/target/release/" +cp "${BUILDDIR}/install" "${BUILDDIR}/ginstall" # The GNU tests rename this script before running, to avoid confusion with the make target +# Create *sum binaries +for sum in b2sum md5sum sha1sum sha224sum sha256sum sha384sum sha512sum +do + sum_path="${BUILDDIR}/${sum}" + test -f "${sum_path}" || cp "${BUILDDIR}/hashsum" "${sum_path}" +done +test -f "${BUILDDIR}/[" || cp "${BUILDDIR}/test" "${BUILDDIR}/[" +popd +GNULIB_SRCDIR="$PWD/../gnulib" +pushd ../gnu/ + +# Any binaries that aren't built become `false` so their tests fail +for binary in $(./build-aux/gen-lists-of-programs.sh --list-progs) +do + bin_path="${BUILDDIR}/${binary}" + test -f "${bin_path}" || { echo "'${binary}' was not built with uutils, using the 'false' program"; cp "${BUILDDIR}/false" "${bin_path}"; } +done + +./bootstrap --gnulib-srcdir="$GNULIB_SRCDIR" +./configure --quiet --disable-gcc-warnings +#Add timeout to to protect against hangs +sed -i 's|"\$@|/usr/bin/timeout 600 "\$@|' build-aux/test-driver +# Change the PATH in the Makefile to test the uutils coreutils instead of the GNU coreutils +sed -i "s/^[[:blank:]]*PATH=.*/ PATH='${BUILDDIR//\//\\/}\$(PATH_SEPARATOR)'\"\$\$PATH\" \\\/" Makefile +sed -i 's| tr | /usr/bin/tr |' tests/init.sh +make +# Generate the factor tests, so they can be fixed +for i in {00..36} +do + make tests/factor/t${i}.sh +done +grep -rl 'path_prepend_' tests/* | xargs sed -i 's|path_prepend_ ./src||' +sed -i -e 's|^seq |/usr/bin/seq |' -e 's|sha1sum |/usr/bin/sha1sum |' tests/factor/t*sh + +# Remove tests checking for --version & --help +# Not really interesting for us and logs are too big +sed -i -e '/tests\/misc\/invalid-opt.pl/ D' \ + -e '/tests\/misc\/help-version.sh/ D' \ + -e '/tests\/misc\/help-version-getopt.sh/ D' \ + Makefile + +# printf doesn't limit the values used in its arg, so this produced ~2GB of output +sed -i '/INT_OFLOW/ D' tests/misc/printf.sh + +# Use the system coreutils where the test fails due to error in a util that is not the one being tested +sed -i 's|stat|/usr/bin/stat|' tests/chgrp/basic.sh tests/cp/existing-perm-dir.sh tests/touch/60-seconds.sh tests/misc/sort-compress-proc.sh +sed -i 's|ls -|/usr/bin/ls -|' tests/chgrp/posix-H.sh tests/chown/deref.sh tests/cp/same-file.sh tests/misc/mknod.sh tests/mv/part-symlink.sh tests/du/8gb.sh +sed -i 's|mkdir |/usr/bin/mkdir |' tests/cp/existing-perm-dir.sh tests/rm/empty-inacc.sh +sed -i 's|timeout \([[:digit:]]\)| /usr/bin/timeout \1|' tests/tail-2/inotify-rotate.sh tests/tail-2/inotify-dir-recreate.sh tests/tail-2/inotify-rotate-resources.sh tests/cp/parent-perm-race.sh tests/ls/infloop.sh tests/misc/sort-exit-early.sh tests/misc/sort-NaN-infloop.sh tests/misc/uniq-perf.sh tests/tail-2/inotify-only-regular.sh tests/tail-2/pipe-f2.sh tests/tail-2/retry.sh tests/tail-2/symlink.sh tests/tail-2/wait.sh tests/tail-2/pid.sh tests/dd/stats.sh tests/tail-2/follow-name.sh tests/misc/shuf.sh # Don't break the function called 'grep_timeout' +sed -i 's|chmod |/usr/bin/chmod |' tests/du/inacc-dir.sh tests/mkdir/p-3.sh tests/tail-2/tail-n0f.sh tests/cp/fail-perm.sh tests/du/inaccessible-cwd.sh tests/mv/i-2.sh tests/chgrp/basic.sh tests/misc/shuf.sh +sed -i 's|sort |/usr/bin/sort |' tests/ls/hyperlink.sh tests/misc/test-N.sh +sed -i 's|split |/usr/bin/split |' tests/misc/factor-parallel.sh +sed -i 's|truncate |/usr/bin/truncate |' tests/split/fail.sh +sed -i 's|dd |/usr/bin/dd |' tests/du/8gb.sh tests/tail-2/big-4gb.sh init.cfg +sed -i 's|id -|/usr/bin/id -|' tests/misc/runcon-no-reorder.sh +sed -i 's|touch |/usr/bin/touch |' tests/cp/preserve-link.sh tests/cp/reflink-perm.sh tests/ls/block-size.sh tests/ls/abmon-align.sh tests/ls/rt-1.sh tests/mv/update.sh tests/misc/ls-time.sh tests/misc/stat-nanoseconds.sh tests/misc/time-style.sh tests/misc/test-N.sh +sed -i 's|ln -|/usr/bin/ln -|' tests/cp/link-deref.sh +sed -i 's|printf |/usr/bin/printf |' tests/dd/ascii.sh +sed -i 's|cp |/usr/bin/cp |' tests/mv/hard-2.sh +sed -i 's|paste |/usr/bin/paste |' tests/misc/od-endian.sh +sed -i 's|seq |/usr/bin/seq |' tests/misc/sort-discrim.sh + +#Add specific timeout to tests that currently hang to limit time spent waiting +sed -i 's|seq \$|/usr/bin/timeout 0.1 seq \$|' tests/misc/seq-precision.sh tests/misc/seq-long-double.sh +sed -i 's|cat |/usr/bin/timeout 0.1 cat |' tests/misc/cat-self.sh + +test -f "${BUILDDIR}/getlimits" || cp src/getlimits "${BUILDDIR}" + diff --git a/util/run-gnu-test.sh b/util/run-gnu-test.sh new file mode 100644 index 000000000..5031863c4 --- /dev/null +++ b/util/run-gnu-test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -e +BUILDDIR="${PWD}/uutils/target/release" +GNULIB_DIR="${PWD}/gnulib" +pushd gnu + +timeout -sKILL 2h make -j "$(nproc)" check SUBDIRS=. RUN_EXPENSIVE_TESTS=no RUN_VERY_EXPENSIVE_TESTS=no VERBOSE=no || : # Kill after 4 hours in case something gets stuck in make From ddcd6be37afb0b9a23431415399ad6cf4f881b74 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 21 May 2021 18:31:21 +0200 Subject: [PATCH 105/148] gnu: document how to run the tests --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 7de4419af..6b29fa854 100644 --- a/README.md +++ b/README.md @@ -318,6 +318,16 @@ To pass an argument like "-v" to the busybox test runtime $ make UTILS='UTILITY_1 UTILITY_2' RUNTEST_ARGS='-v' busytest ``` +## Comparing with GNU + +![Evolution over time](https://github.com/uutils/coreutils-tracking/blob/main/gnu-results.png?raw=true) + +To run locally: +```bash +$ bash util/build-gnu.sh +$ bash util/run-gnu-test.sh +``` + ## Contribute To contribute to uutils, please see [CONTRIBUTING](CONTRIBUTING.md). From 373776e0713580935b3eb8395585655bd492e019 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 22 May 2021 09:40:35 +0200 Subject: [PATCH 106/148] freebsd/circus: workaround the timeout https://github.com/rust-lang/rustup/issues/2774 It is failing currently on: ``` info: installing component 'cargo' error: error: 'sysinfo not supported on this platform' ``` with 1.52.1 --- .cirrus.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.cirrus.yml b/.cirrus.yml index 5d16dce92..fb9b038a8 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -1,3 +1,10 @@ +env: + # Temporary workaround for error `error: sysinfo not supported on + # this platform` seen on FreeBSD platforms, affecting Rustup + # + # References: https://github.com/rust-lang/rustup/issues/2774 + RUSTUP_IO_THREADS: 1 + task: name: stable x86_64-unknown-freebsd-12 freebsd_instance: From 33fb491c6e89dccb0f6532f1bd2c6bb024c4640d Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 22 May 2021 11:05:55 +0200 Subject: [PATCH 107/148] freebsd/circus: update to freebsd 12.2 --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index fb9b038a8..50f8a25b1 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -8,7 +8,7 @@ env: task: name: stable x86_64-unknown-freebsd-12 freebsd_instance: - image: freebsd-12-1-release-amd64 + image: freebsd-12-2-release-amd64 setup_script: - pkg install -y curl gmake - curl https://sh.rustup.rs -sSf --output rustup.sh From 0d1b14ee333bdaae9942cf45f1d5cb7e395f8ab6 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 22 May 2021 11:08:03 +0200 Subject: [PATCH 108/148] Bring back the run expensive tests --- util/run-gnu-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/run-gnu-test.sh b/util/run-gnu-test.sh index 5031863c4..b9948ccd3 100644 --- a/util/run-gnu-test.sh +++ b/util/run-gnu-test.sh @@ -4,4 +4,4 @@ BUILDDIR="${PWD}/uutils/target/release" GNULIB_DIR="${PWD}/gnulib" pushd gnu -timeout -sKILL 2h make -j "$(nproc)" check SUBDIRS=. RUN_EXPENSIVE_TESTS=no RUN_VERY_EXPENSIVE_TESTS=no VERBOSE=no || : # Kill after 4 hours in case something gets stuck in make +timeout -sKILL 2h make -j "$(nproc)" check SUBDIRS=. RUN_EXPENSIVE_TESTS=yes RUN_VERY_EXPENSIVE_TESTS=yes VERBOSE=no || : # Kill after 4 hours in case something gets stuck in make From 9f88963764ad2e02836b6583ac556503b4f3c687 Mon Sep 17 00:00:00 2001 From: Anup Mahindre Date: Sat, 22 May 2021 14:51:50 +0530 Subject: [PATCH 109/148] tests: fix test_ls_path for windows --- tests/by-util/test_ls.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index fc4051039..6d6c65194 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -1992,10 +1992,18 @@ fn test_ls_path() { .stdout_is(expected_stdout); let abs_path = format!("{}/{}", at.as_string(), path); - let expected_stdout = &format!("{}\n", abs_path); + let expected_stdout = if cfg!(windows) { + format!("\'{}\'\n", abs_path) + } else { + format!("{}\n", abs_path) + }; scene.ucmd().arg(&abs_path).run().stdout_is(expected_stdout); - let expected_stdout = &format!("{}\n{}\n", path, file1); + let expected_stdout = if cfg!(windows) { + format!("{} {}\n", path, file1) + } else { + format!("{}\n{}\n", path, file1) + }; scene .ucmd() .arg(file1) From 628684af4f8f8fbeed87ba0b8dd592b255f634ab Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 22 May 2021 12:20:13 +0200 Subject: [PATCH 110/148] refresh cargo.lock with recent updates --- Cargo.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index feda68de5..997d52fab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1098,9 +1098,9 @@ dependencies = [ [[package]] name = "plotters" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45ca0ae5f169d0917a7c7f5a9c1a3d3d9598f18f529dd2b8373ed988efea307a" +checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a" dependencies = [ "num-traits", "plotters-backend", @@ -1174,9 +1174,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.26" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec" +checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038" dependencies = [ "unicode-xid 0.2.2", ] @@ -1308,9 +1308,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" +checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" dependencies = [ "autocfg", "crossbeam-deque", @@ -1320,9 +1320,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.9.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" +checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" dependencies = [ "crossbeam-channel", "crossbeam-deque", From 088443276a6d0229dc93b5a9502ff3d09870d782 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sat, 22 May 2021 14:00:07 +0200 Subject: [PATCH 111/148] sort: improve handling of buffer size cmd arg Instead of overflowing when calculating the buffer size, use saturating_{pow, mul}. When failing to parse the buffer size, we now crash instead of silently ignoring the error. --- src/uu/sort/src/sort.rs | 54 ++++++++++++++++++++++---------------- tests/by-util/test_sort.rs | 24 ++++++++++------- 2 files changed, 46 insertions(+), 32 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 78388a298..bc3b65492 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -93,10 +93,10 @@ static THOUSANDS_SEP: char = ','; static NEGATIVE: char = '-'; static POSITIVE: char = '+'; -/// Choosing a higher buffer size does not result in performance improvements -/// (at least not on my machine). TODO: In the future, we should also take the amount of -/// available memory into consideration, instead of relying on this constant only. -static DEFAULT_BUF_SIZE: usize = 1_000_000_000; +// Choosing a higher buffer size does not result in performance improvements +// (at least not on my machine). TODO: In the future, we should also take the amount of +// available memory into consideration, instead of relying on this constant only. +static DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB #[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)] enum SortMode { @@ -133,24 +133,32 @@ pub struct GlobalSettings { } impl GlobalSettings { - // It's back to do conversions for command line opts! - // Probably want to do through numstrcmp somehow now? - fn human_numeric_convert(a: &str) -> usize { - let num_str = &a[get_leading_gen(a)]; - let (_, suf_str) = a.split_at(num_str.len()); - let num_usize = num_str - .parse::() - .expect("Error parsing buffer size: "); - let suf_usize: usize = match suf_str.to_uppercase().as_str() { - // SI Units - "B" => 1usize, - "K" => 1000usize, - "M" => 1000000usize, - "G" => 1000000000usize, - // GNU regards empty human numeric values as K by default - _ => 1000usize, - }; - num_usize * suf_usize + /// Interpret this `&str` as a number with an optional trailing si unit. + /// + /// If there is no trailing si unit, the implicit unit is K. + /// The suffix B causes the number to be interpreted as a byte count. + fn parse_byte_count(input: &str) -> usize { + const SI_UNITS: &[char] = &['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']; + + let input = input.trim(); + + let (num_str, si_unit) = + if input.ends_with(|c: char| SI_UNITS.contains(&c.to_ascii_uppercase())) { + let mut chars = input.chars(); + let si_suffix = chars.next_back().unwrap().to_ascii_uppercase(); + let si_unit = SI_UNITS.iter().position(|&c| c == si_suffix).unwrap(); + let num_str = chars.as_str(); + (num_str, si_unit) + } else { + (input, 1) + }; + + let num_usize: usize = num_str + .trim() + .parse() + .unwrap_or_else(|e| crash!(1, "failed to parse buffer size `{}`: {}", num_str, e)); + + num_usize.saturating_mul(1000usize.saturating_pow(si_unit as u32)) } fn out_writer(&self) -> BufWriter> { @@ -944,7 +952,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.buffer_size = matches .value_of(OPT_BUF_SIZE) - .map(GlobalSettings::human_numeric_convert) + .map(GlobalSettings::parse_byte_count) .unwrap_or(DEFAULT_BUF_SIZE); settings.tmp_dir = matches diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 59058d5bc..23705d2ee 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -17,7 +17,9 @@ fn test_helper(file_name: &str, args: &str) { #[test] fn test_buffer_sizes() { - let buffer_sizes = ["0", "50K", "1M", "1000G"]; + let buffer_sizes = [ + "0", "50K", "50k", "1M", "100M", "1000G", "10T", "500E", "1Y", + ]; for buffer_size in &buffer_sizes { new_ucmd!() .arg("-n") @@ -30,14 +32,18 @@ fn test_buffer_sizes() { } #[test] -fn test_smaller_than_specified_segment() { - new_ucmd!() - .arg("-n") - .arg("-S") - .arg("100M") - .arg("ext_sort.txt") - .succeeds() - .stdout_is_fixture("ext_sort.expected"); +fn test_invalid_buffer_size() { + let buffer_sizes = ["asd", "100f"]; + for invalid_buffer_size in &buffer_sizes { + new_ucmd!() + .arg("-S") + .arg(invalid_buffer_size) + .fails() + .stderr_only(format!( + "sort: error: failed to parse buffer size `{}`: invalid digit found in string", + invalid_buffer_size + )); + } } #[test] From bee3b1237c589f7c2e22d94d0c6d2e99bea0b5b5 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sat, 22 May 2021 10:44:50 -0400 Subject: [PATCH 112/148] uucore::fs: don't canonicalize last component Change the behavior of `uucore::fs::canonicalize()` when `can_mode` is `CanonicalizeMode::None` so that it does not attempt to resolve the final component if it is a symbolic link. This matches the behavior of the function for the non-final components of a path when `can_mode` is `None`. --- src/uucore/src/lib/features/fs.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/uucore/src/lib/features/fs.rs b/src/uucore/src/lib/features/fs.rs index 040c36e95..afaa07af1 100644 --- a/src/uucore/src/lib/features/fs.rs +++ b/src/uucore/src/lib/features/fs.rs @@ -54,11 +54,19 @@ pub fn resolve_relative_path(path: &Path) -> Cow { result.into() } +/// Controls how symbolic links should be handled when canonicalizing a path. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum CanonicalizeMode { + /// Do not resolve any symbolic links. None, + + /// Resolve all symbolic links. Normal, + + /// Resolve symbolic links, ignoring errors on the final component. Existing, + + /// Resolve symbolic links, ignoring errors on the non-final components. Missing, } @@ -125,6 +133,24 @@ fn resolve>(original: P) -> IOResult { Ok(result) } +/// Return the canonical, absolute form of a path. +/// +/// This function is a generalization of [`std::fs::canonicalize`] that +/// allows controlling how symbolic links are resolved and how to deal +/// with missing components. It returns the canonical, absolute form of +/// a path. The `can_mode` parameter controls how symbolic links are +/// resolved: +/// +/// * [`CanonicalizeMode::Normal`] makes this function behave like +/// [`std::fs::canonicalize`], resolving symbolic links and returning +/// an error if the path does not exist. +/// * [`CanonicalizeMode::Missing`] makes this function ignore non-final +/// components of the path that could not be resolved. +/// * [`CanonicalizeMode::Existing`] makes this function return an error +/// if the final component of the path does not exist. +/// * [`CanonicalizeMode::None`] makes this function not try to resolve +/// any symbolic links. +/// pub fn canonicalize>(original: P, can_mode: CanonicalizeMode) -> IOResult { // Create an absolute path let original = original.as_ref(); @@ -180,6 +206,10 @@ pub fn canonicalize>(original: P, can_mode: CanonicalizeMode) -> result.push(parts.last().unwrap()); + if can_mode == CanonicalizeMode::None { + return Ok(result); + } + match resolve(&result) { Err(e) => { if can_mode == CanonicalizeMode::Existing { From 4b5c3efe85bf3dd735401f40d44eda16dd1c67c2 Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Sat, 22 May 2021 10:50:00 -0400 Subject: [PATCH 113/148] realpath: use uucore::fs::canonicalize() Use the `uucore::fs::canonicalize()` function to simplify the implementation of `realpath`. --- src/uu/realpath/src/realpath.rs | 78 ++++++++++----------------------- 1 file changed, 24 insertions(+), 54 deletions(-) diff --git a/src/uu/realpath/src/realpath.rs b/src/uu/realpath/src/realpath.rs index 37ff70fb2..937cee5bd 100644 --- a/src/uu/realpath/src/realpath.rs +++ b/src/uu/realpath/src/realpath.rs @@ -11,7 +11,6 @@ extern crate uucore; use clap::{App, Arg}; -use std::fs; use std::path::{Path, PathBuf}; use uucore::fs::{canonicalize, CanonicalizeMode}; @@ -75,64 +74,35 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let quiet = matches.is_present(OPT_QUIET); let mut retcode = 0; for path in &paths { - if !resolve_path(path, strip, zero, quiet) { + if let Err(e) = resolve_path(path, strip, zero) { + if !quiet { + show_error!("{}: {}", e, path.display()); + } retcode = 1 }; } retcode } -fn resolve_path(p: &Path, strip: bool, zero: bool, quiet: bool) -> bool { - let abs = canonicalize(p, CanonicalizeMode::Normal).unwrap(); - - if strip { - if zero { - print!("{}\0", p.display()); - } else { - println!("{}", p.display()) - } - return true; - } - - let mut result = PathBuf::new(); - let mut links_left = 256; - - for part in abs.components() { - result.push(part.as_os_str()); - loop { - if links_left == 0 { - if !quiet { - show_error!("Too many symbolic links: {}", p.display()) - }; - return false; - } - match fs::metadata(result.as_path()) { - Err(_) => break, - Ok(ref m) if !m.file_type().is_symlink() => break, - Ok(_) => { - links_left -= 1; - match fs::read_link(result.as_path()) { - Ok(x) => { - result.pop(); - result.push(x.as_path()); - } - _ => { - if !quiet { - show_error!("Invalid path: {}", p.display()) - }; - return false; - } - } - } - } - } - } - - if zero { - print!("{}\0", result.display()); +/// Resolve a path to an absolute form and print it. +/// +/// If `strip` is `true`, then this function does not attempt to resolve +/// symbolic links in the path. If `zero` is `true`, then this function +/// prints the path followed by the null byte (`'\0'`) instead of a +/// newline character (`'\n'`). +/// +/// # Errors +/// +/// This function returns an error if there is a problem resolving +/// symbolic links. +fn resolve_path(p: &Path, strip: bool, zero: bool) -> std::io::Result<()> { + let mode = if strip { + CanonicalizeMode::None } else { - println!("{}", result.display()); - } - - true + CanonicalizeMode::Normal + }; + let abs = canonicalize(p, mode)?; + let line_ending = if zero { '\0' } else { '\n' }; + print!("{}{}", abs.display(), line_ending); + Ok(()) } From fcb079e20e6cd6008d7146a9a5dc04ddda0e5bdd Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 20 May 2021 20:09:41 +0100 Subject: [PATCH 114/148] who freebsd build fix unsupported RUN_LVL option only for other platforms. --- src/uu/who/src/who.rs | 17 +++++++++++------ tests/by-util/test_who.rs | 17 ++++++++++++++++- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/uu/who/src/who.rs b/src/uu/who/src/who.rs index 1ae4f1c8d..81fc2a687 100644 --- a/src/uu/who/src/who.rs +++ b/src/uu/who/src/who.rs @@ -29,7 +29,7 @@ mod options { pub const ONLY_HOSTNAME_USER: &str = "only_hostname_user"; pub const PROCESS: &str = "process"; pub const COUNT: &str = "count"; - #[cfg(any(target_vendor = "apple", target_os = "linux", target_os = "android"))] + #[cfg(any(target_os = "linux", target_os = "android"))] pub const RUNLEVEL: &str = "runlevel"; pub const SHORT: &str = "short"; pub const TIME: &str = "time"; @@ -119,11 +119,13 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .help("all login names and number of users logged on"), ) .arg( - #[cfg(any(target_vendor = "apple", target_os = "linux", target_os = "android"))] + #[cfg(any(target_os = "linux", target_os = "android"))] Arg::with_name(options::RUNLEVEL) .long(options::RUNLEVEL) .short("r") .help("print current runlevel"), + #[cfg(any(target_vendor = "apple", target_os = "freebsd"))] + Arg::with_name(""), ) .arg( Arg::with_name(options::SHORT) @@ -265,10 +267,13 @@ pub fn uumain(args: impl uucore::Args) -> i32 { assumptions = false; } - if matches.is_present(options::RUNLEVEL) { - need_runlevel = true; - include_idle = true; - assumptions = false; + #[cfg(any(target_os = "linux", target_os = "android"))] + { + if matches.is_present(options::RUNLEVEL) { + need_runlevel = true; + include_idle = true; + assumptions = false; + } } if matches.is_present(options::SHORT) { diff --git a/tests/by-util/test_who.rs b/tests/by-util/test_who.rs index 725ec0b1e..1aa8d604d 100644 --- a/tests/by-util/test_who.rs +++ b/tests/by-util/test_who.rs @@ -83,7 +83,7 @@ fn test_process() { } } -#[cfg(any(target_vendor = "apple", target_os = "linux"))] +#[cfg(target_os = "linux")] #[test] fn test_runlevel() { for opt in vec!["-r", "--runlevel"] { @@ -94,6 +94,19 @@ fn test_runlevel() { } } +#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] +#[test] +fn test_runlevel() { + let expected = + "error: Found argument"; + for opt in vec!["-r", "--runlevel"] { + new_ucmd!() + .arg(opt) + .fails() + .stderr_contains(expected); + } +} + #[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_time() { @@ -122,6 +135,7 @@ fn test_mesg() { } } +#[cfg(target_os = "linux")] #[test] fn test_arg1_arg2() { let args = ["am", "i"]; @@ -132,6 +146,7 @@ fn test_arg1_arg2() { .stdout_is(expected_result(&args)); } +#[cfg(target_os = "linux")] #[test] fn test_too_many_args() { const EXPECTED: &str = From 4521aa2659c2e44516415913d63da0774e1fd1bc Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Thu, 6 May 2021 22:39:39 -0400 Subject: [PATCH 115/148] wc: print counts for each file as soon as computed Change the behavior of `wc` to print the counts for a file as soon as it is computed, instead of waiting to compute the counts for all files before writing any output to `stdout`. The new behavior matches the behavior of GNU `wc`. The old behavior looked like this (the word "hello" is entered on `stdin`): $ wc emptyfile.txt - hello 0 0 0 emptyfile.txt 1 1 6 1 1 6 total The new behavior looks like this: $ wc emptyfile.txt - 0 0 0 emptyfile.txt hello 1 1 6 1 1 6 total --- src/uu/wc/src/wc.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index b323f7261..6e95254ee 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -373,7 +373,6 @@ fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { let max_width = max_width(&inputs); let mut total_word_count = WordCount::default(); - let mut results = vec![]; let num_inputs = inputs.len(); @@ -384,10 +383,7 @@ fn wc(inputs: Vec, settings: &Settings) -> Result<(), u32> { WordCount::default() }); total_word_count += word_count; - results.push(word_count.with_title(input.to_title())); - } - - for result in &results { + let result = word_count.with_title(input.to_title()); if let Err(err) = print_stats(settings, &result, max_width) { show_warning!( "failed to print result for {}: {}", From 9f0ef3ba54a83e4ed5e5a87bf8f3663eeccfa1c2 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 22 May 2021 21:59:54 +0200 Subject: [PATCH 116/148] gnu/ci: also store the hash in the json --- .github/workflows/GNU.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/GNU.yml b/.github/workflows/GNU.yml index c94902bbc..1f9250900 100644 --- a/.github/workflows/GNU.yml +++ b/.github/workflows/GNU.yml @@ -61,13 +61,14 @@ jobs: echo "::warning ::GNU testsuite = TOTAL: $TOTAL / PASS: $PASS / FAIL: $FAIL / ERROR: $ERROR" jq -n \ --arg date "$(date --rfc-email)" \ + --arg sha "$GITHUB_SHA" \ --arg total "$TOTAL" \ --arg pass "$PASS" \ --arg skip "$SKIP" \ --arg fail "$FAIL" \ --arg xpass "$XPASS" \ --arg error "$ERROR" \ - '{($date): { total: $total, pass: $pass, skip: $skip, fail: $fail, xpass: $xpass, error: $error, }}' > gnu-result.json + '{($date): { sha: $sha, total: $total, pass: $pass, skip: $skip, fail: $fail, xpass: $xpass, error: $error, }}' > gnu-result.json else echo "::error ::Failed to get summary of test results" fi From c1f67ed775bb33cdf17d563c20ebce8cdf886cde Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sat, 22 May 2021 23:06:30 +0200 Subject: [PATCH 117/148] sort: support --sort flag and check for conflicts `sort` supports three ways to specify the sort mode: a long option (e.g. --numeric-sort), a short option (e.g. -n) and the sort flag (e.g. --sort=numeric). This adds support for the sort flag. Additionally, sort modes now conflict, which means that an error is shown when multiple modes are passed, instead of silently picking a mode. For consistency, I added the `random` sort mode to the `SortMode` enum, instead of it being a bool flag. --- src/uu/sort/src/sort.rs | 178 +++++++++++++++++++++++-------------- tests/by-util/test_sort.rs | 139 +++++++++++++++++++---------- 2 files changed, 202 insertions(+), 115 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index bc3b65492..81787ece6 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -64,6 +64,17 @@ static OPT_NUMERIC_SORT: &str = "numeric-sort"; static OPT_GENERAL_NUMERIC_SORT: &str = "general-numeric-sort"; static OPT_VERSION_SORT: &str = "version-sort"; +static OPT_SORT: &str = "sort"; + +static ALL_SORT_MODES: &[&str] = &[ + OPT_GENERAL_NUMERIC_SORT, + OPT_HUMAN_NUMERIC_SORT, + OPT_MONTH_SORT, + OPT_NUMERIC_SORT, + OPT_VERSION_SORT, + OPT_RANDOM, +]; + static OPT_DICTIONARY_ORDER: &str = "dictionary-order"; static OPT_MERGE: &str = "merge"; static OPT_CHECK: &str = "check"; @@ -105,6 +116,7 @@ enum SortMode { GeneralNumeric, Month, Version, + Random, Default, } #[derive(Clone)] @@ -122,7 +134,6 @@ pub struct GlobalSettings { unique: bool, check: bool, check_silent: bool, - random: bool, salt: String, selectors: Vec, separator: Option, @@ -191,7 +202,6 @@ impl Default for GlobalSettings { unique: false, check: false, check_silent: false, - random: false, salt: String::new(), selectors: vec![], separator: None, @@ -209,7 +219,6 @@ struct KeySettings { ignore_case: bool, dictionary_order: bool, ignore_non_printing: bool, - random: bool, reverse: bool, } @@ -220,7 +229,6 @@ impl From<&GlobalSettings> for KeySettings { ignore_blanks: settings.ignore_blanks, ignore_case: settings.ignore_case, ignore_non_printing: settings.ignore_non_printing, - random: settings.random, reverse: settings.reverse, dictionary_order: settings.dictionary_order, } @@ -398,7 +406,7 @@ impl<'a> Line<'a> { } } } - if !(settings.random + if !(settings.mode == SortMode::Random || settings.stable || settings.unique || !(settings.dictionary_order @@ -502,7 +510,7 @@ impl KeyPosition { 'h' => settings.mode = SortMode::HumanNumeric, 'i' => settings.ignore_non_printing = true, 'n' => settings.mode = SortMode::Numeric, - 'R' => settings.random = true, + 'R' => settings.mode = SortMode::Random, 'r' => settings.reverse = true, 'V' => settings.mode = SortMode::Version, c => { @@ -526,7 +534,9 @@ impl KeyPosition { | SortMode::GeneralNumeric | SortMode::Month => SortMode::Default, // Only SortMode::Default and SortMode::Version work with dictionary_order and ignore_non_printing - m @ SortMode::Default | m @ SortMode::Version => m, + m @ SortMode::Default + | m @ SortMode::Version + | m @ SortMode::Random => m, } } _ => {} @@ -720,6 +730,16 @@ With no FILE, or when FILE is -, read standard input.", ) } +fn make_sort_mode_arg<'a, 'b>(mode: &'a str, short: &'b str, help: &'b str) -> Arg<'a, 'b> { + let mut arg = Arg::with_name(mode).short(short).long(mode).help(help); + for possible_mode in ALL_SORT_MODES { + if *possible_mode != mode { + arg = arg.conflicts_with(possible_mode); + } + } + arg +} + pub fn uumain(args: impl uucore::Args) -> i32 { let args = args .collect_str(InvalidEncodingHandling::Ignore) @@ -732,34 +752,62 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .about(ABOUT) .usage(&usage[..]) .arg( - Arg::with_name(OPT_HUMAN_NUMERIC_SORT) - .short("h") - .long(OPT_HUMAN_NUMERIC_SORT) - .help("compare according to human readable sizes, eg 1M > 100k"), + Arg::with_name(OPT_SORT) + .long(OPT_SORT) + .takes_value(true) + .possible_values( + &[ + "general-numeric", + "human-numeric", + "month", + "numeric", + "version", + "random", + ] + ) + .conflicts_with_all(ALL_SORT_MODES) ) .arg( - Arg::with_name(OPT_MONTH_SORT) - .short("M") - .long(OPT_MONTH_SORT) - .help("compare according to month name abbreviation"), + make_sort_mode_arg( + OPT_HUMAN_NUMERIC_SORT, + "h", + "compare according to human readable sizes, eg 1M > 100k" + ), ) .arg( - Arg::with_name(OPT_NUMERIC_SORT) - .short("n") - .long(OPT_NUMERIC_SORT) - .help("compare according to string numerical value"), + make_sort_mode_arg( + OPT_MONTH_SORT, + "M", + "compare according to month name abbreviation" + ), ) .arg( - Arg::with_name(OPT_GENERAL_NUMERIC_SORT) - .short("g") - .long(OPT_GENERAL_NUMERIC_SORT) - .help("compare according to string general numerical value"), + make_sort_mode_arg( + OPT_NUMERIC_SORT, + "n", + "compare according to string numerical value" + ), ) .arg( - Arg::with_name(OPT_VERSION_SORT) - .short("V") - .long(OPT_VERSION_SORT) - .help("Sort by SemVer version number, eg 1.12.2 > 1.1.2"), + make_sort_mode_arg( + OPT_GENERAL_NUMERIC_SORT, + "g", + "compare according to string general numerical value" + ), + ) + .arg( + make_sort_mode_arg( + OPT_VERSION_SORT, + "V", + "Sort by SemVer version number, eg 1.12.2 > 1.1.2", + ), + ) + .arg( + make_sort_mode_arg( + OPT_RANDOM, + "R", + "shuffle in random order", + ), ) .arg( Arg::with_name(OPT_DICTIONARY_ORDER) @@ -813,12 +861,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .takes_value(true) .value_name("FILENAME"), ) - .arg( - Arg::with_name(OPT_RANDOM) - .short("R") - .long(OPT_RANDOM) - .help("shuffle in random order"), - ) .arg( Arg::with_name(OPT_REVERSE) .short("r") @@ -925,16 +967,25 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .unwrap_or_default() }; - settings.mode = if matches.is_present(OPT_HUMAN_NUMERIC_SORT) { + settings.mode = if matches.is_present(OPT_HUMAN_NUMERIC_SORT) + || matches.value_of(OPT_SORT) == Some("human-numeric") + { SortMode::HumanNumeric - } else if matches.is_present(OPT_MONTH_SORT) { + } else if matches.is_present(OPT_MONTH_SORT) || matches.value_of(OPT_SORT) == Some("month") { SortMode::Month - } else if matches.is_present(OPT_GENERAL_NUMERIC_SORT) { + } else if matches.is_present(OPT_GENERAL_NUMERIC_SORT) + || matches.value_of(OPT_SORT) == Some("general-numeric") + { SortMode::GeneralNumeric - } else if matches.is_present(OPT_NUMERIC_SORT) { + } else if matches.is_present(OPT_NUMERIC_SORT) || matches.value_of(OPT_SORT) == Some("numeric") + { SortMode::Numeric - } else if matches.is_present(OPT_VERSION_SORT) { + } else if matches.is_present(OPT_VERSION_SORT) || matches.value_of(OPT_SORT) == Some("version") + { SortMode::Version + } else if matches.is_present(OPT_RANDOM) || matches.value_of(OPT_SORT) == Some("random") { + settings.salt = get_rand_string(); + SortMode::Random } else { SortMode::Default }; @@ -978,11 +1029,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 { settings.stable = matches.is_present(OPT_STABLE); settings.unique = matches.is_present(OPT_UNIQUE); - if matches.is_present(OPT_RANDOM) { - settings.random = matches.is_present(OPT_RANDOM); - settings.salt = get_rand_string(); - } - if files.is_empty() { /* if no file, default to stdin */ files.push("-".to_owned()); @@ -1110,28 +1156,25 @@ fn compare_by<'a>(a: &Line<'a>, b: &Line<'a>, global_settings: &GlobalSettings) let b_str = b_selection.slice; let settings = &selector.settings; - let cmp: Ordering = if settings.random { - random_shuffle(a_str, b_str, &global_settings.salt) - } else { - match settings.mode { - SortMode::Numeric | SortMode::HumanNumeric => numeric_str_cmp( - (a_str, a_selection.num_cache.as_ref().unwrap().as_num_info()), - (b_str, b_selection.num_cache.as_ref().unwrap().as_num_info()), - ), - SortMode::GeneralNumeric => general_numeric_compare( - a_selection.num_cache.as_ref().unwrap().as_f64(), - b_selection.num_cache.as_ref().unwrap().as_f64(), - ), - SortMode::Month => month_compare(a_str, b_str), - SortMode::Version => version_compare(a_str, b_str), - SortMode::Default => custom_str_cmp( - a_str, - b_str, - settings.ignore_non_printing, - settings.dictionary_order, - settings.ignore_case, - ), - } + let cmp: Ordering = match settings.mode { + SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt), + SortMode::Numeric | SortMode::HumanNumeric => numeric_str_cmp( + (a_str, a_selection.num_cache.as_ref().unwrap().as_num_info()), + (b_str, b_selection.num_cache.as_ref().unwrap().as_num_info()), + ), + SortMode::GeneralNumeric => general_numeric_compare( + a_selection.num_cache.as_ref().unwrap().as_f64(), + b_selection.num_cache.as_ref().unwrap().as_f64(), + ), + SortMode::Month => month_compare(a_str, b_str), + SortMode::Version => version_compare(a_str, b_str), + SortMode::Default => custom_str_cmp( + a_str, + b_str, + settings.ignore_non_printing, + settings.dictionary_order, + settings.ignore_case, + ), }; if cmp != Ordering::Equal { return if settings.reverse { cmp.reverse() } else { cmp }; @@ -1139,7 +1182,10 @@ fn compare_by<'a>(a: &Line<'a>, b: &Line<'a>, global_settings: &GlobalSettings) } // Call "last resort compare" if all selectors returned Equal - let cmp = if global_settings.random || global_settings.stable || global_settings.unique { + let cmp = if global_settings.mode == SortMode::Random + || global_settings.stable + || global_settings.unique + { Ordering::Equal } else { a.line.cmp(b.line) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 23705d2ee..e4676b379 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1,18 +1,20 @@ use crate::common::util::*; -fn test_helper(file_name: &str, args: &str) { - new_ucmd!() - .arg(format!("{}.txt", file_name)) - .args(&args.split(' ').collect::>()) - .succeeds() - .stdout_is_fixture(format!("{}.expected", file_name)); +fn test_helper(file_name: &str, possible_args: &[&str]) { + for args in possible_args { + new_ucmd!() + .arg(format!("{}.txt", file_name)) + .args(&args.split(' ').collect::>()) + .succeeds() + .stdout_is_fixture(format!("{}.expected", file_name)); - new_ucmd!() - .arg(format!("{}.txt", file_name)) - .arg("--debug") - .args(&args.split(' ').collect::>()) - .succeeds() - .stdout_is_fixture(format!("{}.expected.debug", file_name)); + new_ucmd!() + .arg(format!("{}.txt", file_name)) + .arg("--debug") + .args(&args.split(' ').collect::>()) + .succeeds() + .stdout_is_fixture(format!("{}.expected.debug", file_name)); + } } #[test] @@ -71,7 +73,7 @@ fn test_extsort_zero_terminated() { #[test] fn test_months_whitespace() { - test_helper("months-whitespace", "-M"); + test_helper("months-whitespace", &["-M", "--month-sort", "--sort=month"]); } #[test] @@ -85,7 +87,10 @@ fn test_version_empty_lines() { #[test] fn test_human_numeric_whitespace() { - test_helper("human-numeric-whitespace", "-h"); + test_helper( + "human-numeric-whitespace", + &["-h", "--human-numeric-sort", "--sort=human-numeric"], + ); } // This tests where serde often fails when reading back JSON @@ -102,12 +107,18 @@ fn test_extsort_as64_bailout() { #[test] fn test_multiple_decimals_general() { - test_helper("multiple_decimals_general", "-g") + test_helper( + "multiple_decimals_general", + &["-g", "--general-numeric-sort", "--sort=general-numeric"], + ) } #[test] fn test_multiple_decimals_numeric() { - test_helper("multiple_decimals_numeric", "-n") + test_helper( + "multiple_decimals_numeric", + &["-n", "--numeric-sort", "--sort=numeric"], + ) } #[test] @@ -186,72 +197,93 @@ fn test_random_shuffle_contains_two_runs_not_the_same() { #[test] fn test_numeric_floats_and_ints() { - test_helper("numeric_floats_and_ints", "-n"); + test_helper( + "numeric_floats_and_ints", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_numeric_floats() { - test_helper("numeric_floats", "-n"); + test_helper( + "numeric_floats", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_numeric_floats_with_nan() { - test_helper("numeric_floats_with_nan", "-n"); + test_helper( + "numeric_floats_with_nan", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_numeric_unfixed_floats() { - test_helper("numeric_unfixed_floats", "-n"); + test_helper( + "numeric_unfixed_floats", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_numeric_fixed_floats() { - test_helper("numeric_fixed_floats", "-n"); + test_helper( + "numeric_fixed_floats", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_numeric_unsorted_ints() { - test_helper("numeric_unsorted_ints", "-n"); + test_helper( + "numeric_unsorted_ints", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_human_block_sizes() { - test_helper("human_block_sizes", "-h"); + test_helper( + "human_block_sizes", + &["-h", "--human-numeric-sort", "--sort=human-numeric"], + ); } #[test] fn test_month_default() { - test_helper("month_default", "-M"); + test_helper("month_default", &["-M", "--month-sort", "--sort=month"]); } #[test] fn test_month_stable() { - test_helper("month_stable", "-Ms"); + test_helper("month_stable", &["-Ms"]); } #[test] fn test_default_unsorted_ints() { - test_helper("default_unsorted_ints", ""); + test_helper("default_unsorted_ints", &[""]); } #[test] fn test_numeric_unique_ints() { - test_helper("numeric_unsorted_ints_unique", "-nu"); + test_helper("numeric_unsorted_ints_unique", &["-nu"]); } #[test] fn test_version() { - test_helper("version", "-V"); + test_helper("version", &["-V"]); } #[test] fn test_ignore_case() { - test_helper("ignore_case", "-f"); + test_helper("ignore_case", &["-f"]); } #[test] fn test_dictionary_order() { - test_helper("dictionary_order", "-d"); + test_helper("dictionary_order", &["-d"]); } #[test] @@ -278,47 +310,53 @@ fn test_non_printing_chars() { #[test] fn test_exponents_positive_general_fixed() { - test_helper("exponents_general", "-g"); + test_helper("exponents_general", &["-g"]); } #[test] fn test_exponents_positive_numeric() { - test_helper("exponents-positive-numeric", "-n"); + test_helper( + "exponents-positive-numeric", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_months_dedup() { - test_helper("months-dedup", "-Mu"); + test_helper("months-dedup", &["-Mu"]); } #[test] fn test_mixed_floats_ints_chars_numeric() { - test_helper("mixed_floats_ints_chars_numeric", "-n"); + test_helper( + "mixed_floats_ints_chars_numeric", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_mixed_floats_ints_chars_numeric_unique() { - test_helper("mixed_floats_ints_chars_numeric_unique", "-nu"); + test_helper("mixed_floats_ints_chars_numeric_unique", &["-nu"]); } #[test] fn test_words_unique() { - test_helper("words_unique", "-u"); + test_helper("words_unique", &["-u"]); } #[test] fn test_numeric_unique() { - test_helper("numeric_unique", "-nu"); + test_helper("numeric_unique", &["-nu"]); } #[test] fn test_mixed_floats_ints_chars_numeric_reverse() { - test_helper("mixed_floats_ints_chars_numeric_unique_reverse", "-nur"); + test_helper("mixed_floats_ints_chars_numeric_unique_reverse", &["-nur"]); } #[test] fn test_mixed_floats_ints_chars_numeric_stable() { - test_helper("mixed_floats_ints_chars_numeric_stable", "-ns"); + test_helper("mixed_floats_ints_chars_numeric_stable", &["-ns"]); } #[test] @@ -347,12 +385,15 @@ fn test_numeric_floats2() { #[test] fn test_numeric_floats_with_nan2() { - test_helper("numeric-floats-with-nan2", "-n"); + test_helper( + "numeric-floats-with-nan2", + &["-n", "--numeric-sort", "--sort=numeric"], + ); } #[test] fn test_human_block_sizes2() { - for human_numeric_sort_param in vec!["-h", "--human-numeric-sort"] { + for human_numeric_sort_param in &["-h", "--human-numeric-sort", "--sort=human-numeric"] { let input = "8981K\n909991M\n-8T\n21G\n0.8M"; new_ucmd!() .arg(human_numeric_sort_param) @@ -364,7 +405,7 @@ fn test_human_block_sizes2() { #[test] fn test_month_default2() { - for month_sort_param in vec!["-M", "--month-sort"] { + for month_sort_param in &["-M", "--month-sort", "--sort=month"] { let input = "JAn\nMAY\n000may\nJun\nFeb"; new_ucmd!() .arg(month_sort_param) @@ -397,32 +438,32 @@ fn test_numeric_unique_ints2() { #[test] fn test_keys_open_ended() { - test_helper("keys_open_ended", "-k 2.3"); + test_helper("keys_open_ended", &["-k 2.3"]); } #[test] fn test_keys_closed_range() { - test_helper("keys_closed_range", "-k 2.2,2.2"); + test_helper("keys_closed_range", &["-k 2.2,2.2"]); } #[test] fn test_keys_multiple_ranges() { - test_helper("keys_multiple_ranges", "-k 2,2 -k 3,3"); + test_helper("keys_multiple_ranges", &["-k 2,2 -k 3,3"]); } #[test] fn test_keys_no_field_match() { - test_helper("keys_no_field_match", "-k 4,4"); + test_helper("keys_no_field_match", &["-k 4,4"]); } #[test] fn test_keys_no_char_match() { - test_helper("keys_no_char_match", "-k 1.2"); + test_helper("keys_no_char_match", &["-k 1.2"]); } #[test] fn test_keys_custom_separator() { - test_helper("keys_custom_separator", "-k 2.2,2.2 -t x"); + test_helper("keys_custom_separator", &["-k 2.2,2.2 -t x"]); } #[test] @@ -534,7 +575,7 @@ aaaa #[test] fn test_zero_terminated() { - test_helper("zero-terminated", "-z"); + test_helper("zero-terminated", &["-z"]); } #[test] From 4aaeede3d8475058531daf740059ed44c3a12850 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 23 May 2021 00:13:53 +0200 Subject: [PATCH 118/148] rustfmt the recent change --- src/uu/pinky/src/pinky.rs | 2 +- src/uu/sort/src/chunks.rs | 4 +--- src/uu/sort/src/sort.rs | 4 +--- src/uu/stdbuf/src/stdbuf.rs | 25 +++++++++++++------------ tests/by-util/test_stdbuf.rs | 12 ++++++------ tests/by-util/test_tail.rs | 4 ---- tests/by-util/test_who.rs | 8 ++------ 7 files changed, 24 insertions(+), 35 deletions(-) diff --git a/src/uu/pinky/src/pinky.rs b/src/uu/pinky/src/pinky.rs index f0ab44e5f..d65775c2d 100644 --- a/src/uu/pinky/src/pinky.rs +++ b/src/uu/pinky/src/pinky.rs @@ -48,7 +48,7 @@ fn get_usage() -> String { fn get_long_usage() -> String { format!( "A lightweight 'finger' program; print user information.\n\ - The utmp file will be {}.", + The utmp file will be {}.", utmpx::DEFAULT_FILE ) } diff --git a/src/uu/sort/src/chunks.rs b/src/uu/sort/src/chunks.rs index 7a7749003..6ec759211 100644 --- a/src/uu/sort/src/chunks.rs +++ b/src/uu/sort/src/chunks.rs @@ -223,9 +223,7 @@ fn read_to_buffer( Err(e) if e.kind() == ErrorKind::Interrupted => { // retry } - Err(e) => { - crash!(1, "{}", e) - } + Err(e) => crash!(1, "{}", e), } } } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index bc3b65492..1bbfdc5c5 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -505,9 +505,7 @@ impl KeyPosition { 'R' => settings.random = true, 'r' => settings.reverse = true, 'V' => settings.mode = SortMode::Version, - c => { - crash!(1, "invalid option for key: `{}`", c) - } + c => crash!(1, "invalid option for key: `{}`", c), } // All numeric sorts and month sort conflict with dictionary_order and ignore_non_printing. // Instad of reporting an error, let them overwrite each other. diff --git a/src/uu/stdbuf/src/stdbuf.rs b/src/uu/stdbuf/src/stdbuf.rs index 77f6d9dad..485b3c70e 100644 --- a/src/uu/stdbuf/src/stdbuf.rs +++ b/src/uu/stdbuf/src/stdbuf.rs @@ -24,18 +24,19 @@ use uucore::InvalidEncodingHandling; static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Run COMMAND, with modified buffering operations for its standard streams.\n\n\ - Mandatory arguments to long options are mandatory for short options too."; -static LONG_HELP: &str = "If MODE is 'L' the corresponding stream will be line buffered.\n\ - This option is invalid with standard input.\n\n\ - If MODE is '0' the corresponding stream will be unbuffered.\n\n\ - Otherwise MODE is a number which may be followed by one of the following:\n\n\ - KB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n\ - In this case the corresponding stream will be fully buffered with the buffer size set to \ - MODE bytes.\n\n\ - NOTE: If COMMAND adjusts the buffering of its standard streams ('tee' does for e.g.) then \ - that will override corresponding settings changed by 'stdbuf'.\n\ - Also some filters (like 'dd' and 'cat' etc.) don't use streams for I/O, \ - and are thus unaffected by 'stdbuf' settings.\n"; + Mandatory arguments to long options are mandatory for short options too."; +static LONG_HELP: &str = + "If MODE is 'L' the corresponding stream will be line buffered.\n\ + This option is invalid with standard input.\n\n\ + If MODE is '0' the corresponding stream will be unbuffered.\n\n\ + Otherwise MODE is a number which may be followed by one of the following:\n\n\ + KB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n\ + In this case the corresponding stream will be fully buffered with the buffer size set to \ + MODE bytes.\n\n\ + NOTE: If COMMAND adjusts the buffering of its standard streams ('tee' does for e.g.) then \ + that will override corresponding settings changed by 'stdbuf'.\n\ + Also some filters (like 'dd' and 'cat' etc.) don't use streams for I/O, \ + and are thus unaffected by 'stdbuf' settings.\n"; mod options { pub const INPUT: &str = "input"; diff --git a/tests/by-util/test_stdbuf.rs b/tests/by-util/test_stdbuf.rs index 808b7382a..4105cb7a2 100644 --- a/tests/by-util/test_stdbuf.rs +++ b/tests/by-util/test_stdbuf.rs @@ -27,12 +27,12 @@ fn test_stdbuf_line_buffered_stdout() { fn test_stdbuf_no_buffer_option_fails() { new_ucmd!().args(&["head"]).fails().stderr_is( "error: The following required arguments were not provided:\n \ - --error \n \ - --input \n \ - --output \n\n\ - USAGE:\n \ - stdbuf OPTION... COMMAND\n\n\ - For more information try --help", + --error \n \ + --input \n \ + --output \n\n\ + USAGE:\n \ + stdbuf OPTION... COMMAND\n\n\ + For more information try --help", ); } diff --git a/tests/by-util/test_tail.rs b/tests/by-util/test_tail.rs index dddbb9c31..f3c9a7b11 100644 --- a/tests/by-util/test_tail.rs +++ b/tests/by-util/test_tail.rs @@ -349,7 +349,6 @@ fn test_sleep_interval() { new_ucmd!().arg("-s").arg("10").arg(FOOBAR_TXT).succeeds(); } - /// Test for reading all but the first NUM bytes: `tail -c +3`. #[test] fn test_positive_bytes() { @@ -360,7 +359,6 @@ fn test_positive_bytes() { .stdout_is("cde"); } - /// Test for reading all bytes, specified by `tail -c +0`. #[test] fn test_positive_zero_bytes() { @@ -371,7 +369,6 @@ fn test_positive_zero_bytes() { .stdout_is("abcde"); } - /// Test for reading all but the first NUM lines: `tail -n +3`. #[test] fn test_positive_lines() { @@ -382,7 +379,6 @@ fn test_positive_lines() { .stdout_is("c\nd\ne\n"); } - /// Test for reading all lines, specified by `tail -n +0`. #[test] fn test_positive_zero_lines() { diff --git a/tests/by-util/test_who.rs b/tests/by-util/test_who.rs index 1aa8d604d..df023bb0a 100644 --- a/tests/by-util/test_who.rs +++ b/tests/by-util/test_who.rs @@ -97,13 +97,9 @@ fn test_runlevel() { #[cfg(any(target_vendor = "apple", target_os = "freebsd"))] #[test] fn test_runlevel() { - let expected = - "error: Found argument"; + let expected = "error: Found argument"; for opt in vec!["-r", "--runlevel"] { - new_ucmd!() - .arg(opt) - .fails() - .stderr_contains(expected); + new_ucmd!().arg(opt).fails().stderr_contains(expected); } } From 95092e64402cf5feadeb4d5f496cc1f8cbdd239e Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 23 May 2021 00:33:54 +0200 Subject: [PATCH 119/148] ignore test_should_calculate_implicit_padding_per_free_argument Fails from time to time with ``` ---- test_numfmt::test_should_calculate_implicit_padding_per_free_argument stdout ---- current_directory_resolved: run: /target/x86_64-unknown-linux-musl/debug/coreutils numfmt --from=auto 1Ki 2K thread 'test_numfmt::test_should_calculate_implicit_padding_per_free_argument' panicked at 'failed to write to stdin of child: Broken pipe (os error 32)', tests/common/util.rs:859:21 ``` --- tests/by-util/test_numfmt.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/by-util/test_numfmt.rs b/tests/by-util/test_numfmt.rs index 64fc5360d..b52dbc359 100644 --- a/tests/by-util/test_numfmt.rs +++ b/tests/by-util/test_numfmt.rs @@ -281,6 +281,7 @@ fn test_leading_whitespace_in_free_argument_should_imply_padding() { } #[test] +#[ignore] fn test_should_calculate_implicit_padding_per_free_argument() { new_ucmd!() .args(&["--from=auto", " 1Ki", " 2K"]) From 44c033a013a12adf4123d3c112c38700fc462001 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Sun, 23 May 2021 02:07:32 +0200 Subject: [PATCH 120/148] who: exclude --runlevel from non Linux targets (fix #2239) --- src/uu/who/src/who.rs | 39 ++++++++++++++++++--------------------- tests/by-util/test_who.rs | 26 +++++++------------------- 2 files changed, 25 insertions(+), 40 deletions(-) diff --git a/src/uu/who/src/who.rs b/src/uu/who/src/who.rs index 81fc2a687..19ae3addb 100644 --- a/src/uu/who/src/who.rs +++ b/src/uu/who/src/who.rs @@ -29,7 +29,6 @@ mod options { pub const ONLY_HOSTNAME_USER: &str = "only_hostname_user"; pub const PROCESS: &str = "process"; pub const COUNT: &str = "count"; - #[cfg(any(target_os = "linux", target_os = "android"))] pub const RUNLEVEL: &str = "runlevel"; pub const SHORT: &str = "short"; pub const TIME: &str = "time"; @@ -41,6 +40,11 @@ mod options { static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Print information about users who are currently logged in."; +#[cfg(any(target_os = "linux"))] +static RUNLEVEL_HELP: &str = "print current runlevel"; +#[cfg(not(target_os = "linux"))] +static RUNLEVEL_HELP: &str = "print current runlevel (This is meaningless on non Linux)"; + fn get_usage() -> String { format!("{0} [OPTION]... [ FILE | ARG1 ARG2 ]", executable!()) } @@ -119,13 +123,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .help("all login names and number of users logged on"), ) .arg( - #[cfg(any(target_os = "linux", target_os = "android"))] Arg::with_name(options::RUNLEVEL) .long(options::RUNLEVEL) .short("r") - .help("print current runlevel"), - #[cfg(any(target_vendor = "apple", target_os = "freebsd"))] - Arg::with_name(""), + .help(RUNLEVEL_HELP), ) .arg( Arg::with_name(options::SHORT) @@ -267,13 +268,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 { assumptions = false; } - #[cfg(any(target_os = "linux", target_os = "android"))] - { - if matches.is_present(options::RUNLEVEL) { - need_runlevel = true; - include_idle = true; - assumptions = false; - } + if matches.is_present(options::RUNLEVEL) { + need_runlevel = true; + include_idle = true; + assumptions = false; } if matches.is_present(options::SHORT) { @@ -389,15 +387,12 @@ fn current_tty() -> String { impl Who { fn exec(&mut self) { - let run_level_chk = |record: i16| { - #[allow(unused_assignments)] - let mut res = false; + let run_level_chk = |_record: i16| { + #[cfg(not(target_os = "linux"))] + return false; - #[cfg(any(target_vendor = "apple", target_os = "linux", target_os = "android"))] - { - res = record == utmpx::RUN_LVL; - } - res + #[cfg(target_os = "linux")] + return _record == utmpx::RUN_LVL; }; let f = if self.args.len() == 1 { @@ -430,7 +425,9 @@ impl Who { if self.need_users && ut.is_user_process() { self.print_user(&ut); } else if self.need_runlevel && run_level_chk(ut.record_type()) { - self.print_runlevel(&ut); + if cfg!(target_os = "linux") { + self.print_runlevel(&ut); + } } else if self.need_boottime && ut.record_type() == utmpx::BOOT_TIME { self.print_boottime(&ut); } else if self.need_clockchange && ut.record_type() == utmpx::NEW_TIME { diff --git a/tests/by-util/test_who.rs b/tests/by-util/test_who.rs index 1aa8d604d..21b5eb93e 100644 --- a/tests/by-util/test_who.rs +++ b/tests/by-util/test_who.rs @@ -83,27 +83,17 @@ fn test_process() { } } -#[cfg(target_os = "linux")] #[test] fn test_runlevel() { for opt in vec!["-r", "--runlevel"] { + #[cfg(any(target_vendor = "apple", target_os = "linux"))] new_ucmd!() .arg(opt) .succeeds() .stdout_is(expected_result(&[opt])); - } -} -#[cfg(any(target_vendor = "apple", target_os = "freebsd"))] -#[test] -fn test_runlevel() { - let expected = - "error: Found argument"; - for opt in vec!["-r", "--runlevel"] { - new_ucmd!() - .arg(opt) - .fails() - .stderr_contains(expected); + #[cfg(not(target_os = "linux"))] + new_ucmd!().arg(opt).succeeds().stdout_is(""); } } @@ -135,7 +125,6 @@ fn test_mesg() { } } -#[cfg(target_os = "linux")] #[test] fn test_arg1_arg2() { let args = ["am", "i"]; @@ -146,7 +135,6 @@ fn test_arg1_arg2() { .stdout_is(expected_result(&args)); } -#[cfg(target_os = "linux")] #[test] fn test_too_many_args() { const EXPECTED: &str = @@ -168,11 +156,11 @@ fn test_users() { let mut v_actual: Vec<&str> = actual.split_whitespace().collect(); let mut v_expect: Vec<&str> = expect.split_whitespace().collect(); - // TODO: `--users` differs from GNU's output on manOS running in CI + // TODO: `--users` differs from GNU's output on macOS // Diff < left / right > : // <"runner console 2021-05-20 22:03 00:08 196\n" // >"runner console 2021-05-20 22:03 old 196\n" - if is_ci() && cfg!(target_os = "macos") { + if cfg!(target_os = "macos") { v_actual.remove(4); v_expect.remove(4); } @@ -206,7 +194,7 @@ fn test_dead() { #[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_all_separately() { - if is_ci() && cfg!(target_os = "macos") { + if cfg!(target_os = "macos") { // TODO: fix `-u`, see: test_users return; } @@ -229,7 +217,7 @@ fn test_all_separately() { #[cfg(any(target_vendor = "apple", target_os = "linux"))] #[test] fn test_all() { - if is_ci() && cfg!(target_os = "macos") { + if cfg!(target_os = "macos") { // TODO: fix `-u`, see: test_users return; } From bc9db289e8edad21de4b2e57a457542d6b1280ee Mon Sep 17 00:00:00 2001 From: Jeffrey Finkelstein Date: Tue, 11 May 2021 23:48:06 -0400 Subject: [PATCH 121/148] head: add abstractions for "all but last n lines" Add some abstractions to simplify the `rbuf_but_last_n_lines()` function, which implements the "take all but the last `n` lines" functionality of the `head` program. This commit adds - `RingBuffer`, a fixed-size ring buffer, - `ZLines`, an iterator over zero-terminated "lines", - `TakeAllBut`, an iterator over all but the last `n` elements of an iterator. These three together make the implementation of `rbuf_but_last_n_lines()` concise. --- src/uu/head/Cargo.toml | 2 +- src/uu/head/src/head.rs | 42 +++---- src/uu/head/src/lines.rs | 73 ++++++++++++ src/uu/head/src/take.rs | 93 +++++++++++++++ src/uu/tail/Cargo.toml | 2 +- src/uu/tail/src/ringbuffer.rs | 61 ---------- src/uu/tail/src/tail.rs | 3 +- src/uucore/Cargo.toml | 1 + src/uucore/src/lib/features.rs | 2 + src/uucore/src/lib/features/ringbuffer.rs | 134 ++++++++++++++++++++++ src/uucore/src/lib/lib.rs | 2 + tests/by-util/test_head.rs | 9 ++ 12 files changed, 333 insertions(+), 91 deletions(-) create mode 100644 src/uu/head/src/lines.rs create mode 100644 src/uu/head/src/take.rs delete mode 100644 src/uu/tail/src/ringbuffer.rs create mode 100644 src/uucore/src/lib/features/ringbuffer.rs diff --git a/src/uu/head/Cargo.toml b/src/uu/head/Cargo.toml index 3c383cb6f..661052f58 100644 --- a/src/uu/head/Cargo.toml +++ b/src/uu/head/Cargo.toml @@ -16,7 +16,7 @@ path = "src/head.rs" [dependencies] clap = "2.33" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["ringbuffer"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } [[bin]] diff --git a/src/uu/head/src/head.rs b/src/uu/head/src/head.rs index 0c8b3bc88..3602b4a73 100644 --- a/src/uu/head/src/head.rs +++ b/src/uu/head/src/head.rs @@ -27,8 +27,12 @@ mod options { pub const ZERO_NAME: &str = "ZERO"; pub const FILES_NAME: &str = "FILE"; } +mod lines; mod parse; mod split; +mod take; +use lines::zlines; +use take::take_all_but; fn app<'a>() -> App<'a, 'a> { App::new(executable!()) @@ -293,36 +297,22 @@ fn rbuf_but_last_n_bytes(input: &mut impl std::io::BufRead, n: usize) -> std::io } fn rbuf_but_last_n_lines( - input: &mut impl std::io::BufRead, + input: impl std::io::BufRead, n: usize, zero: bool, ) -> std::io::Result<()> { - if n == 0 { - //prints everything - return rbuf_n_bytes(input, std::usize::MAX); + if zero { + let stdout = std::io::stdout(); + let mut stdout = stdout.lock(); + for bytes in take_all_but(zlines(input), n) { + stdout.write_all(&bytes?)?; + } + } else { + for line in take_all_but(input.lines(), n) { + println!("{}", line?); + } } - let mut ringbuf = vec![Vec::new(); n]; - let stdout = std::io::stdout(); - let mut stdout = stdout.lock(); - let mut line = Vec::new(); - let mut lines = 0usize; - split::walk_lines(input, zero, |e| match e { - split::Event::Data(dat) => { - line.extend_from_slice(dat); - Ok(true) - } - split::Event::Line => { - if lines < n { - ringbuf[lines] = std::mem::replace(&mut line, Vec::new()); - lines += 1; - } else { - stdout.write_all(&ringbuf[0])?; - ringbuf.rotate_left(1); - ringbuf[n - 1] = std::mem::replace(&mut line, Vec::new()); - } - Ok(true) - } - }) + Ok(()) } fn head_backwards_file(input: &mut std::fs::File, options: &HeadOptions) -> std::io::Result<()> { diff --git a/src/uu/head/src/lines.rs b/src/uu/head/src/lines.rs new file mode 100644 index 000000000..dcae27bc8 --- /dev/null +++ b/src/uu/head/src/lines.rs @@ -0,0 +1,73 @@ +//! Iterate over zero-terminated lines. +use std::io::BufRead; + +/// The zero byte, representing the null character. +const ZERO: u8 = 0; + +/// Returns an iterator over the lines of the given reader. +/// +/// The iterator returned from this function will yield instances of +/// [`io::Result`]<[`Vec`]<[`u8`]>>, representing the bytes of the line +/// *including* the null character (with the possible exception of the +/// last line, which may not have one). +/// +/// # Examples +/// +/// ```rust,ignore +/// use std::io::Cursor; +/// +/// let cursor = Cursor::new(b"x\0y\0z\0"); +/// let mut iter = zlines(cursor).map(|l| l.unwrap()); +/// assert_eq!(iter.next(), Some(b"x\0".to_vec())); +/// assert_eq!(iter.next(), Some(b"y\0".to_vec())); +/// assert_eq!(iter.next(), Some(b"z\0".to_vec())); +/// assert_eq!(iter.next(), None); +/// ``` +pub fn zlines(buf: B) -> ZLines { + ZLines { buf } +} + +/// An iterator over the zero-terminated lines of an instance of `BufRead`. +pub struct ZLines { + buf: B, +} + +impl Iterator for ZLines { + type Item = std::io::Result>; + + fn next(&mut self) -> Option>> { + let mut buf = Vec::new(); + match self.buf.read_until(ZERO, &mut buf) { + Ok(0) => None, + Ok(_) => Some(Ok(buf)), + Err(e) => Some(Err(e)), + } + } +} + +#[cfg(test)] +mod tests { + + use crate::lines::zlines; + use std::io::Cursor; + + #[test] + fn test_null_terminated() { + let cursor = Cursor::new(b"x\0y\0z\0"); + let mut iter = zlines(cursor).map(|l| l.unwrap()); + assert_eq!(iter.next(), Some(b"x\0".to_vec())); + assert_eq!(iter.next(), Some(b"y\0".to_vec())); + assert_eq!(iter.next(), Some(b"z\0".to_vec())); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_not_null_terminated() { + let cursor = Cursor::new(b"x\0y\0z"); + let mut iter = zlines(cursor).map(|l| l.unwrap()); + assert_eq!(iter.next(), Some(b"x\0".to_vec())); + assert_eq!(iter.next(), Some(b"y\0".to_vec())); + assert_eq!(iter.next(), Some(b"z".to_vec())); + assert_eq!(iter.next(), None); + } +} diff --git a/src/uu/head/src/take.rs b/src/uu/head/src/take.rs new file mode 100644 index 000000000..94fa012be --- /dev/null +++ b/src/uu/head/src/take.rs @@ -0,0 +1,93 @@ +//! Take all but the last elements of an iterator. +use uucore::ringbuffer::RingBuffer; + +/// Create an iterator over all but the last `n` elements of `iter`. +/// +/// # Examples +/// +/// ```rust,ignore +/// let data = [1, 2, 3, 4, 5]; +/// let n = 2; +/// let mut iter = take_all_but(data.iter(), n); +/// assert_eq!(Some(4), iter.next()); +/// assert_eq!(Some(5), iter.next()); +/// assert_eq!(None, iter.next()); +/// ``` +pub fn take_all_but(iter: I, n: usize) -> TakeAllBut { + TakeAllBut::new(iter, n) +} + +/// An iterator that only iterates over the last elements of another iterator. +pub struct TakeAllBut { + iter: I, + buf: RingBuffer<::Item>, +} + +impl TakeAllBut { + pub fn new(mut iter: I, n: usize) -> TakeAllBut { + // Create a new ring buffer and fill it up. + // + // If there are fewer than `n` elements in `iter`, then we + // exhaust the iterator so that whenever `TakeAllBut::next()` is + // called, it will return `None`, as expected. + let mut buf = RingBuffer::new(n); + for _ in 0..n { + let value = match iter.next() { + None => { + break; + } + Some(x) => x, + }; + buf.push_back(value); + } + TakeAllBut { iter, buf } + } +} + +impl Iterator for TakeAllBut +where + I: Iterator, +{ + type Item = ::Item; + + fn next(&mut self) -> Option<::Item> { + match self.iter.next() { + Some(value) => self.buf.push_back(value), + None => None, + } + } +} + +#[cfg(test)] +mod tests { + + use crate::take::take_all_but; + + #[test] + fn test_fewer_elements() { + let mut iter = take_all_but([0, 1, 2].iter(), 2); + assert_eq!(Some(&0), iter.next()); + assert_eq!(None, iter.next()); + } + + #[test] + fn test_same_number_of_elements() { + let mut iter = take_all_but([0, 1].iter(), 2); + assert_eq!(None, iter.next()); + } + + #[test] + fn test_more_elements() { + let mut iter = take_all_but([0].iter(), 2); + assert_eq!(None, iter.next()); + } + + #[test] + fn test_zero_elements() { + let mut iter = take_all_but([0, 1, 2].iter(), 0); + assert_eq!(Some(&0), iter.next()); + assert_eq!(Some(&1), iter.next()); + assert_eq!(Some(&2), iter.next()); + assert_eq!(None, iter.next()); + } +} diff --git a/src/uu/tail/Cargo.toml b/src/uu/tail/Cargo.toml index d3f60e09b..273c67bb3 100644 --- a/src/uu/tail/Cargo.toml +++ b/src/uu/tail/Cargo.toml @@ -17,7 +17,7 @@ path = "src/tail.rs" [dependencies] clap = "2.33" libc = "0.2.42" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["ringbuffer"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } winapi = { version="0.3", features=["fileapi", "handleapi", "processthreadsapi", "synchapi", "winbase"] } diff --git a/src/uu/tail/src/ringbuffer.rs b/src/uu/tail/src/ringbuffer.rs deleted file mode 100644 index 86483b8ed..000000000 --- a/src/uu/tail/src/ringbuffer.rs +++ /dev/null @@ -1,61 +0,0 @@ -//! A fixed-size ring buffer. -use std::collections::VecDeque; - -/// A fixed-size ring buffer backed by a `VecDeque`. -/// -/// If the ring buffer is not full, then calling the [`push_back`] -/// method appends elements, as in a [`VecDeque`]. If the ring buffer -/// is full, then calling [`push_back`] removes the element at the -/// front of the buffer (in a first-in, first-out manner) before -/// appending the new element to the back of the buffer. -/// -/// Use [`from_iter`] to take the last `size` elements from an -/// iterator. -/// -/// # Examples -/// -/// After exceeding the size limit, the oldest elements are dropped in -/// favor of the newest element: -/// -/// ```rust,ignore -/// let buffer: RingBuffer = RingBuffer::new(2); -/// buffer.push_back(0); -/// buffer.push_back(1); -/// buffer.push_back(2); -/// assert_eq!(vec![1, 2], buffer.data); -/// ``` -/// -/// Take the last `n` elements from an iterator: -/// -/// ```rust,ignore -/// let iter = vec![0, 1, 2, 3].iter(); -/// assert_eq!(vec![2, 3], RingBuffer::from_iter(iter, 2).data); -/// ``` -pub struct RingBuffer { - pub data: VecDeque, - size: usize, -} - -impl RingBuffer { - pub fn new(size: usize) -> RingBuffer { - RingBuffer { - data: VecDeque::new(), - size, - } - } - - pub fn from_iter(iter: impl Iterator, size: usize) -> RingBuffer { - let mut ringbuf = RingBuffer::new(size); - for value in iter { - ringbuf.push_back(value); - } - ringbuf - } - - pub fn push_back(&mut self, value: T) { - if self.size <= self.data.len() { - self.data.pop_front(); - } - self.data.push_back(value) - } -} diff --git a/src/uu/tail/src/tail.rs b/src/uu/tail/src/tail.rs index 06d0e6fdb..15a819d35 100644 --- a/src/uu/tail/src/tail.rs +++ b/src/uu/tail/src/tail.rs @@ -17,9 +17,7 @@ extern crate uucore; mod chunks; mod platform; -mod ringbuffer; use chunks::ReverseChunks; -use ringbuffer::RingBuffer; use clap::{App, Arg}; use std::collections::VecDeque; @@ -30,6 +28,7 @@ use std::io::{stdin, stdout, BufRead, BufReader, Read, Seek, SeekFrom, Write}; use std::path::Path; use std::thread::sleep; use std::time::Duration; +use uucore::ringbuffer::RingBuffer; pub mod options { pub mod verbosity { diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 85efe0434..482252680 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -47,6 +47,7 @@ mode = ["libc"] parse_time = [] perms = ["libc"] process = ["libc"] +ringbuffer = [] signals = [] utf8 = [] utmpx = ["time", "libc"] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index 0287b9675..310a41fe1 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -8,6 +8,8 @@ pub mod fs; pub mod fsext; #[cfg(feature = "parse_time")] pub mod parse_time; +#[cfg(feature = "ringbuffer")] +pub mod ringbuffer; #[cfg(feature = "zero-copy")] pub mod zero_copy; diff --git a/src/uucore/src/lib/features/ringbuffer.rs b/src/uucore/src/lib/features/ringbuffer.rs new file mode 100644 index 000000000..60847df8f --- /dev/null +++ b/src/uucore/src/lib/features/ringbuffer.rs @@ -0,0 +1,134 @@ +//! A fixed-size ring buffer. +use std::collections::VecDeque; + +/// A fixed-size ring buffer backed by a `VecDeque`. +/// +/// If the ring buffer is not full, then calling the [`push_back`] +/// method appends elements, as in a [`VecDeque`]. If the ring buffer +/// is full, then calling [`push_back`] removes the element at the +/// front of the buffer (in a first-in, first-out manner) before +/// appending the new element to the back of the buffer. +/// +/// Use [`from_iter`] to take the last `size` elements from an +/// iterator. +/// +/// # Examples +/// +/// After exceeding the size limit, the oldest elements are dropped in +/// favor of the newest element: +/// +/// ```rust,ignore +/// let mut buffer: RingBuffer = RingBuffer::new(2); +/// buffer.push_back(0); +/// buffer.push_back(1); +/// buffer.push_back(2); +/// assert_eq!(vec![1, 2], buffer.data); +/// ``` +/// +/// Take the last `n` elements from an iterator: +/// +/// ```rust,ignore +/// let iter = [0, 1, 2].iter(); +/// let actual = RingBuffer::from_iter(iter, 2).data; +/// let expected = VecDeque::from_iter([1, 2].iter()); +/// assert_eq!(expected, actual); +/// ``` +pub struct RingBuffer { + pub data: VecDeque, + size: usize, +} + +impl RingBuffer { + pub fn new(size: usize) -> RingBuffer { + RingBuffer { + data: VecDeque::new(), + size, + } + } + + pub fn from_iter(iter: impl Iterator, size: usize) -> RingBuffer { + let mut ringbuf = RingBuffer::new(size); + for value in iter { + ringbuf.push_back(value); + } + ringbuf + } + + /// Append a value to the end of the ring buffer. + /// + /// If the ring buffer is not full, this method return [`None`]. If + /// the ring buffer is full, appending a new element will cause the + /// oldest element to be evicted. In that case this method returns + /// that element, or `None`. + /// + /// In the special case where the size limit is zero, each call to + /// this method with input `value` returns `Some(value)`, because + /// the input is immediately evicted. + /// + /// # Examples + /// + /// Appending an element when the buffer is full returns the oldest + /// element: + /// + /// ```rust,ignore + /// let mut buf = RingBuffer::new(3); + /// assert_eq!(None, buf.push_back(0)); + /// assert_eq!(None, buf.push_back(1)); + /// assert_eq!(None, buf.push_back(2)); + /// assert_eq!(Some(0), buf.push_back(3)); + /// ``` + /// + /// If the size limit is zero, then this method always returns the + /// input value: + /// + /// ```rust,ignore + /// let mut buf = RingBuffer::new(0); + /// assert_eq!(Some(0), buf.push_back(0)); + /// assert_eq!(Some(1), buf.push_back(1)); + /// assert_eq!(Some(2), buf.push_back(2)); + /// ``` + pub fn push_back(&mut self, value: T) -> Option { + if self.size == 0 { + return Some(value); + } + let result = if self.size <= self.data.len() { + self.data.pop_front() + } else { + None + }; + self.data.push_back(value); + result + } +} + +#[cfg(test)] +mod tests { + + use crate::ringbuffer::RingBuffer; + use std::collections::VecDeque; + use std::iter::FromIterator; + + #[test] + fn test_size_limit_zero() { + let mut buf = RingBuffer::new(0); + assert_eq!(Some(0), buf.push_back(0)); + assert_eq!(Some(1), buf.push_back(1)); + assert_eq!(Some(2), buf.push_back(2)); + } + + #[test] + fn test_evict_oldest() { + let mut buf = RingBuffer::new(2); + assert_eq!(None, buf.push_back(0)); + assert_eq!(None, buf.push_back(1)); + assert_eq!(Some(0), buf.push_back(2)); + } + + #[test] + fn test_from_iter() { + let iter = [0, 1, 2].iter(); + let actual = RingBuffer::from_iter(iter, 2).data; + let expected = VecDeque::from_iter([1, 2].iter()); + assert_eq!(expected, actual); + } +} diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 28bae08cb..eb630f53a 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -39,6 +39,8 @@ pub use crate::features::fs; pub use crate::features::fsext; #[cfg(feature = "parse_time")] pub use crate::features::parse_time; +#[cfg(feature = "ringbuffer")] +pub use crate::features::ringbuffer; #[cfg(feature = "zero-copy")] pub use crate::features::zero_copy; diff --git a/tests/by-util/test_head.rs b/tests/by-util/test_head.rs index 88df1f068..b2a3cf0cb 100755 --- a/tests/by-util/test_head.rs +++ b/tests/by-util/test_head.rs @@ -129,6 +129,15 @@ fn test_zero_terminated_syntax_2() { .stdout_is("x\0y"); } +#[test] +fn test_zero_terminated_negative_lines() { + new_ucmd!() + .args(&["-z", "-n", "-1"]) + .pipe_in("x\0y\0z\0") + .run() + .stdout_is("x\0y\0"); +} + #[test] fn test_negative_byte_syntax() { new_ucmd!() From 1860e61f8344dc7bc7451ea67869190b8e0f21ee Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 23 May 2021 10:19:25 +0200 Subject: [PATCH 122/148] Workaround the Windows CI install issue. Fails trom time to time with: ``` info: installing component 'rustc' memory allocation of 16777216 bytes failed Error: The process 'C:\Rust\.cargo\bin\rustup.exe' failed with exit code 3221226505 ``` on Build (windows-latest, i686-pc-windows-gnu, feat_os_windows) --- .github/workflows/CICD.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index bb29355cf..977a86915 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -363,6 +363,10 @@ jobs: mkdir -p '${{ steps.vars.outputs.STAGING }}/dpkg' - name: rust toolchain ~ install uses: actions-rs/toolchain@v1 + env: + # Override auto-detection of RAM for Rustc install. + # https://github.com/rust-lang/rustup/issues/2229#issuecomment-585855925 + RUSTUP_UNPACK_RAM: "21474836480" with: toolchain: ${{ steps.vars.outputs.TOOLCHAIN }} target: ${{ matrix.job.target }} From 218f523e1b813e9f1f8fa239d78690e72524b202 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 23 May 2021 21:58:18 +0200 Subject: [PATCH 123/148] expr: make substr infallible Instead of returning an Err it should return the "null string" (in our case that's the empty string) when the offset or length is invalid. --- src/uu/expr/src/syntax_tree.rs | 36 ++++++++++++---------------------- tests/by-util/test_expr.rs | 29 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index c81adf0c8..a75f4c742 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -153,7 +153,7 @@ impl AstNode { ":" | "match" => operator_match(&operand_values), "length" => Ok(prefix_operator_length(&operand_values)), "index" => Ok(prefix_operator_index(&operand_values)), - "substr" => prefix_operator_substr(&operand_values), + "substr" => Ok(prefix_operator_substr(&operand_values)), _ => Err(format!("operation not implemented: {}", op_type)), }, @@ -522,35 +522,23 @@ fn prefix_operator_index(values: &[String]) -> String { "0".to_string() } -fn prefix_operator_substr(values: &[String]) -> Result { +fn prefix_operator_substr(values: &[String]) -> String { assert!(values.len() == 3); let subj = &values[0]; - let mut idx = match values[1].parse::() { - Ok(i) => i, - Err(_) => return Err("expected integer as POS arg to 'substr'".to_string()), + let idx = match values[1] + .parse::() + .ok() + .and_then(|v| v.checked_sub(1)) + { + Some(i) => i, + None => return String::new(), }; - let mut len = match values[2].parse::() { + let len = match values[2].parse::() { Ok(i) => i, - Err(_) => return Err("expected integer as LENGTH arg to 'substr'".to_string()), + Err(_) => return String::new(), }; - if idx <= 0 || len <= 0 { - return Ok("".to_string()); - } - - let mut out_str = String::new(); - for ch in subj.chars() { - idx -= 1; - if idx <= 0 { - if len <= 0 { - break; - } - len -= 1; - - out_str.push(ch); - } - } - Ok(out_str) + subj.chars().skip(idx).take(len).collect() } fn bool_as_int(b: bool) -> i64 { diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index bb0760676..6a969b5e9 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -54,3 +54,32 @@ fn test_and() { new_ucmd!().args(&["", "&", "1"]).run().stdout_is("0\n"); } + +#[test] +fn test_substr() { + new_ucmd!() + .args(&["substr", "abc", "1", "1"]) + .succeeds() + .stdout_only("a\n"); +} + +#[test] +fn test_invalid_substr() { + new_ucmd!() + .args(&["substr", "abc", "0", "1"]) + .fails() + .status_code(1) + .stdout_only("\n"); + + new_ucmd!() + .args(&["substr", "abc", &(std::usize::MAX.to_string() + "0"), "1"]) + .fails() + .status_code(1) + .stdout_only("\n"); + + new_ucmd!() + .args(&["substr", "abc", "0", &(std::usize::MAX.to_string() + "0")]) + .fails() + .status_code(1) + .stdout_only("\n"); +} From 991fcc548cce95318c73629ecbf710deecad4589 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ya=C4=9F=C4=B1z=20can=20De=C4=9Firmenci?= Date: Mon, 24 May 2021 21:07:45 +0300 Subject: [PATCH 124/148] fix: log error messages properly on permission errors --- src/uu/mv/src/mv.rs | 33 +++++++++++++++++++++++++-------- tests/by-util/test_mv.rs | 18 ++++++++++++++++++ 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index f57178a09..95b2fd423 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -291,12 +291,22 @@ fn exec(files: &[PathBuf], b: Behavior) -> i32 { return match rename(source, target, &b) { Err(e) => { - show_error!( - "cannot move ‘{}’ to ‘{}’: {}", - source.display(), - target.display(), - e - ); + let error_as_str = e.to_string(); + let is_perm_denied = error_as_str.contains("Permission denied"); + match e.kind() { + _ => { + show_error!( + "cannot move ‘{}’ to ‘{}’: {}", + source.display(), + target.display(), + if is_perm_denied { + "Permission denied".to_string() + } else { + e.to_string() + } + ); + } + } 1 } _ => 0, @@ -357,15 +367,22 @@ fn move_files_into_dir(files: &[PathBuf], target_dir: &Path, b: &Behavior) -> i3 }; if let Err(e) = rename(sourcepath, &targetpath, b) { + let error_as_str = e.to_string(); + let is_perm_denied = error_as_str.contains("Permission denied"); show_error!( - "mv: cannot move ‘{}’ to ‘{}’: {}", + "cannot move ‘{}’ to ‘{}’: {}", sourcepath.display(), targetpath.display(), - e + if is_perm_denied { + "Permission denied".to_string() + } else { + e.to_string() + } ); all_successful = false; } } + if all_successful { 0 } else { diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index e8ba43282..47532e2e5 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -587,6 +587,24 @@ fn test_mv_verbose() { )); } +#[test] +fn test_mv_permission_error() { + let scene = TestScenario::new("mkdir"); + let folder1 = "bar"; + let folder2 = "foo"; + let folder_to_move = "bar/foo"; + scene.ucmd().arg("-m444").arg(folder1).succeeds(); + scene.ucmd().arg("-m777").arg(folder2).succeeds(); + + scene + .cmd_keepenv(util_name!()) + .arg(folder2) + .arg(folder_to_move) + .run() + .stderr_str() + .ends_with("Permission denied"); +} + // Todo: // $ at.touch a b From e5e7ca8dc5cc4f4a2cdb9cdc9d24bb24d24b3fb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ya=C4=9F=C4=B1z=20can=20De=C4=9Firmenci?= Date: Mon, 24 May 2021 21:20:59 +0300 Subject: [PATCH 125/148] fix: simplify logic --- src/uu/mv/src/mv.rs | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index 95b2fd423..a0ff1bcc6 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -291,22 +291,12 @@ fn exec(files: &[PathBuf], b: Behavior) -> i32 { return match rename(source, target, &b) { Err(e) => { - let error_as_str = e.to_string(); - let is_perm_denied = error_as_str.contains("Permission denied"); - match e.kind() { - _ => { - show_error!( - "cannot move ‘{}’ to ‘{}’: {}", - source.display(), - target.display(), - if is_perm_denied { - "Permission denied".to_string() - } else { - e.to_string() - } - ); - } - } + show_error!( + "cannot move ‘{}’ to ‘{}’: {}", + source.display(), + target.display(), + e.to_string() + ); 1 } _ => 0, @@ -367,17 +357,11 @@ fn move_files_into_dir(files: &[PathBuf], target_dir: &Path, b: &Behavior) -> i3 }; if let Err(e) = rename(sourcepath, &targetpath, b) { - let error_as_str = e.to_string(); - let is_perm_denied = error_as_str.contains("Permission denied"); show_error!( "cannot move ‘{}’ to ‘{}’: {}", sourcepath.display(), targetpath.display(), - if is_perm_denied { - "Permission denied".to_string() - } else { - e.to_string() - } + e.to_string() ); all_successful = false; } @@ -469,7 +453,13 @@ fn rename_with_fallback(from: &Path, to: &Path) -> io::Result<()> { ..DirCopyOptions::new() }; if let Err(err) = move_dir(from, to, &options) { - return Err(io::Error::new(io::ErrorKind::Other, format!("{:?}", err))); + return match err.kind { + fs_extra::error::ErrorKind::PermissionDenied => Err(io::Error::new( + io::ErrorKind::PermissionDenied, + "Permission denied", + )), + _ => Err(io::Error::new(io::ErrorKind::Other, format!("{:?}", err))), + }; } } else { fs::copy(from, to).and_then(|_| fs::remove_file(from))?; From 962e4198b2a64b827d197b7ee011d986fe2da264 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 24 May 2021 22:32:23 +0200 Subject: [PATCH 126/148] gnu/ci: limit the number of factor runs --- util/build-gnu.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 667dc8e46..c2659ae11 100644 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -43,7 +43,8 @@ sed -i "s/^[[:blank:]]*PATH=.*/ PATH='${BUILDDIR//\//\\/}\$(PATH_SEPARATOR)'\"\ sed -i 's| tr | /usr/bin/tr |' tests/init.sh make # Generate the factor tests, so they can be fixed -for i in {00..36} +# Used to be 36. Reduced to 20 to decrease the log size +for i in {00..20} do make tests/factor/t${i}.sh done From 98f09a6b8b08b705dca590924101fd24e4bc2f76 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 24 May 2021 22:32:48 +0200 Subject: [PATCH 127/148] gnu/ci: don't run seq-precision - logs are too long --- util/build-gnu.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index c2659ae11..e8ab4e44d 100644 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -58,6 +58,10 @@ sed -i -e '/tests\/misc\/invalid-opt.pl/ D' \ -e '/tests\/misc\/help-version-getopt.sh/ D' \ Makefile +# logs are clotted because of this test +sed -i -e '/tests\/misc\/seq-precision.sh/ D' \ + Makefile + # printf doesn't limit the values used in its arg, so this produced ~2GB of output sed -i '/INT_OFLOW/ D' tests/misc/printf.sh From 97d15e34d984d3e73b4cc8f6102523c343ac9075 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 25 May 2021 14:58:56 +0200 Subject: [PATCH 128/148] Disable some factor tests --- util/build-gnu.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/util/build-gnu.sh b/util/build-gnu.sh index e8ab4e44d..9d73450f6 100644 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -48,6 +48,14 @@ for i in {00..20} do make tests/factor/t${i}.sh done + +# strip the long stuff +for i in {21..36} +do + sed -i -e "s/\$(tf)\/t${i}.sh//g" Makefile +done + + grep -rl 'path_prepend_' tests/* | xargs sed -i 's|path_prepend_ ./src||' sed -i -e 's|^seq |/usr/bin/seq |' -e 's|sha1sum |/usr/bin/sha1sum |' tests/factor/t*sh From a77e92cc96ec6f7e93fdf0ef6f8d3ac904323e61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ya=C4=9F=C4=B1z=20can=20De=C4=9Firmenci?= Date: Wed, 26 May 2021 01:53:40 +0300 Subject: [PATCH 129/148] chore: delete unused macros --- src/uucore/src/lib/macros.rs | 56 ------------------------------------ 1 file changed, 56 deletions(-) diff --git a/src/uucore/src/lib/macros.rs b/src/uucore/src/lib/macros.rs index 637e91f8f..00bdf2939 100644 --- a/src/uucore/src/lib/macros.rs +++ b/src/uucore/src/lib/macros.rs @@ -176,13 +176,6 @@ macro_rules! msg_invalid_input { }; } -#[macro_export] -macro_rules! snippet_no_file_at_path { - ($path:expr) => { - format!("nonexistent path {}", $path) - }; -} - // -- message templates : invalid input : flag #[macro_export] @@ -229,55 +222,6 @@ macro_rules! msg_opt_invalid_should_be { }; } -// -- message templates : invalid input : args - -#[macro_export] -macro_rules! msg_arg_invalid_value { - ($expects:expr, $received:expr) => { - msg_invalid_input!(format!( - "expects its argument to be {}, but was provided {}", - $expects, $received - )) - }; -} - -#[macro_export] -macro_rules! msg_args_invalid_value { - ($expects:expr, $received:expr) => { - msg_invalid_input!(format!( - "expects its arguments to be {}, but was provided {}", - $expects, $received - )) - }; - ($msg:expr) => { - msg_invalid_input!($msg) - }; -} - -#[macro_export] -macro_rules! msg_args_nonexistent_file { - ($received:expr) => { - msg_args_invalid_value!("paths to files", snippet_no_file_at_path!($received)) - }; -} - -#[macro_export] -macro_rules! msg_wrong_number_of_arguments { - () => { - msg_args_invalid_value!("wrong number of arguments") - }; - ($min:expr, $max:expr) => { - msg_args_invalid_value!(format!("expects {}-{} arguments", $min, $max)) - }; - ($exact:expr) => { - if $exact == 1 { - msg_args_invalid_value!("expects 1 argument") - } else { - msg_args_invalid_value!(format!("expects {} arguments", $exact)) - } - }; -} - // -- message templates : invalid input : input combinations #[macro_export] From c78a7937f8debc78b4a0016ad4f1cecd759a3437 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ya=C4=9F=C4=B1z=20can=20De=C4=9Firmenci?= Date: Wed, 26 May 2021 02:27:10 +0300 Subject: [PATCH 130/148] chore: delete show_info macro and replace with show_error --- src/uu/cat/src/cat.rs | 2 +- src/uu/chgrp/src/chgrp.rs | 24 ++++++++++++------------ src/uu/chmod/src/chmod.rs | 4 ++-- src/uu/chown/src/chown.rs | 26 +++++++++++++------------- src/uu/dircolors/src/dircolors.rs | 6 +++--- src/uu/install/src/install.rs | 20 ++++++++++---------- src/uu/install/src/mode.rs | 2 +- src/uu/mkdir/src/mkdir.rs | 4 ++-- src/uu/mknod/src/mknod.rs | 2 +- src/uu/mktemp/src/mktemp.rs | 6 +++--- src/uu/nohup/src/nohup.rs | 10 +++++----- src/uu/numfmt/src/numfmt.rs | 2 +- src/uu/stat/src/stat.rs | 6 +++--- src/uu/tee/src/tee.rs | 6 +++--- src/uucore/src/lib/macros.rs | 9 --------- 15 files changed, 60 insertions(+), 69 deletions(-) diff --git a/src/uu/cat/src/cat.rs b/src/uu/cat/src/cat.rs index 8dea096be..69ea902e6 100644 --- a/src/uu/cat/src/cat.rs +++ b/src/uu/cat/src/cat.rs @@ -347,7 +347,7 @@ fn cat_files(files: Vec, options: &OutputOptions) -> Result<(), u32> { for path in &files { if let Err(err) = cat_path(path, &options, &mut state) { - show_info!("{}: {}", path, err); + show_error!("{}: {}", path, err); error_count += 1; } } diff --git a/src/uu/chgrp/src/chgrp.rs b/src/uu/chgrp/src/chgrp.rs index 2afef7de0..f6afc2805 100644 --- a/src/uu/chgrp/src/chgrp.rs +++ b/src/uu/chgrp/src/chgrp.rs @@ -97,7 +97,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if recursive { if bit_flag == FTS_PHYSICAL { if derefer == 1 { - show_info!("-R --dereference requires -H or -L"); + show_error!("-R --dereference requires -H or -L"); return 1; } derefer = 0; @@ -132,7 +132,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { dest_gid = meta.gid(); } Err(e) => { - show_info!("failed to get attributes of '{}': {}", file, e); + show_error!("failed to get attributes of '{}': {}", file, e); return 1; } } @@ -143,7 +143,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { dest_gid = g; } _ => { - show_info!("invalid group: {}", matches.free[0].as_str()); + show_error!("invalid group: {}", matches.free[0].as_str()); return 1; } } @@ -235,8 +235,8 @@ impl Chgrper { if let Some(p) = may_exist { if p.parent().is_none() || self.is_bind_root(p) { - show_info!("it is dangerous to operate recursively on '/'"); - show_info!("use --no-preserve-root to override this failsafe"); + show_error!("it is dangerous to operate recursively on '/'"); + show_error!("use --no-preserve-root to override this failsafe"); return 1; } } @@ -250,12 +250,12 @@ impl Chgrper { self.verbosity.clone(), ) { Ok(n) => { - show_info!("{}", n); + show_error!("{}", n); 0 } Err(e) => { if self.verbosity != Verbosity::Silent { - show_info!("{}", e); + show_error!("{}", e); } 1 } @@ -275,7 +275,7 @@ impl Chgrper { for entry in WalkDir::new(root).follow_links(follow).min_depth(1) { let entry = unwrap!(entry, e, { ret = 1; - show_info!("{}", e); + show_error!("{}", e); continue; }); let path = entry.path(); @@ -290,13 +290,13 @@ impl Chgrper { ret = match wrap_chgrp(path, &meta, self.dest_gid, follow, self.verbosity.clone()) { Ok(n) => { if !n.is_empty() { - show_info!("{}", n); + show_error!("{}", n); } 0 } Err(e) => { if self.verbosity != Verbosity::Silent { - show_info!("{}", e); + show_error!("{}", e); } 1 } @@ -313,7 +313,7 @@ impl Chgrper { unwrap!(path.metadata(), e, { match self.verbosity { Silent => (), - _ => show_info!("cannot access '{}': {}", path.display(), e), + _ => show_error!("cannot access '{}': {}", path.display(), e), } return None; }) @@ -321,7 +321,7 @@ impl Chgrper { unwrap!(path.symlink_metadata(), e, { match self.verbosity { Silent => (), - _ => show_info!("cannot dereference '{}': {}", path.display(), e), + _ => show_error!("cannot dereference '{}': {}", path.display(), e), } return None; }) diff --git a/src/uu/chmod/src/chmod.rs b/src/uu/chmod/src/chmod.rs index 88e3403fe..9dea3c842 100644 --- a/src/uu/chmod/src/chmod.rs +++ b/src/uu/chmod/src/chmod.rs @@ -316,7 +316,7 @@ impl Chmoder { show_error!("{}", err); } if self.verbose { - show_info!( + show_error!( "failed to change mode of file '{}' from {:o} ({}) to {:o} ({})", file.display(), fperm, @@ -328,7 +328,7 @@ impl Chmoder { Err(1) } else { if self.verbose || self.changes { - show_info!( + show_error!( "mode of '{}' changed from {:o} ({}) to {:o} ({})", file.display(), fperm, diff --git a/src/uu/chown/src/chown.rs b/src/uu/chown/src/chown.rs index ff9c42dd0..3d0b25814 100644 --- a/src/uu/chown/src/chown.rs +++ b/src/uu/chown/src/chown.rs @@ -199,7 +199,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if recursive { if bit_flag == FTS_PHYSICAL { if derefer == 1 { - show_info!("-R --dereference requires -H or -L"); + show_error!("-R --dereference requires -H or -L"); return 1; } derefer = 0; @@ -227,7 +227,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { Ok((Some(uid), Some(gid))) => IfFrom::UserGroup(uid, gid), Ok((None, None)) => IfFrom::All, Err(e) => { - show_info!("{}", e); + show_error!("{}", e); return 1; } } @@ -244,7 +244,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { dest_uid = Some(meta.uid()); } Err(e) => { - show_info!("failed to get attributes of '{}': {}", file, e); + show_error!("failed to get attributes of '{}': {}", file, e); return 1; } } @@ -255,7 +255,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { dest_gid = g; } Err(e) => { - show_info!("{}", e); + show_error!("{}", e); return 1; } } @@ -377,8 +377,8 @@ impl Chowner { if let Some(p) = may_exist { if p.parent().is_none() { - show_info!("it is dangerous to operate recursively on '/'"); - show_info!("use --no-preserve-root to override this failsafe"); + show_error!("it is dangerous to operate recursively on '/'"); + show_error!("use --no-preserve-root to override this failsafe"); return 1; } } @@ -395,13 +395,13 @@ impl Chowner { ) { Ok(n) => { if !n.is_empty() { - show_info!("{}", n); + show_error!("{}", n); } 0 } Err(e) => { if self.verbosity != Verbosity::Silent { - show_info!("{}", e); + show_error!("{}", e); } 1 } @@ -424,7 +424,7 @@ impl Chowner { for entry in WalkDir::new(root).follow_links(follow).min_depth(1) { let entry = unwrap!(entry, e, { ret = 1; - show_info!("{}", e); + show_error!("{}", e); continue; }); let path = entry.path(); @@ -450,13 +450,13 @@ impl Chowner { ) { Ok(n) => { if !n.is_empty() { - show_info!("{}", n); + show_error!("{}", n); } 0 } Err(e) => { if self.verbosity != Verbosity::Silent { - show_info!("{}", e); + show_error!("{}", e); } 1 } @@ -472,7 +472,7 @@ impl Chowner { unwrap!(path.metadata(), e, { match self.verbosity { Silent => (), - _ => show_info!("cannot access '{}': {}", path.display(), e), + _ => show_error!("cannot access '{}': {}", path.display(), e), } return None; }) @@ -480,7 +480,7 @@ impl Chowner { unwrap!(path.symlink_metadata(), e, { match self.verbosity { Silent => (), - _ => show_info!("cannot dereference '{}': {}", path.display(), e), + _ => show_error!("cannot dereference '{}': {}", path.display(), e), } return None; }) diff --git a/src/uu/dircolors/src/dircolors.rs b/src/uu/dircolors/src/dircolors.rs index a2d819620..b6942c2d2 100644 --- a/src/uu/dircolors/src/dircolors.rs +++ b/src/uu/dircolors/src/dircolors.rs @@ -105,7 +105,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { if out_format == OutputFmt::Unknown { match guess_syntax() { OutputFmt::Unknown => { - show_info!("no SHELL environment variable, and no shell type option given"); + show_error!("no SHELL environment variable, and no shell type option given"); return 1; } fmt => out_format = fmt, @@ -130,7 +130,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { ) } Err(e) => { - show_info!("{}: {}", matches.free[0], e); + show_error!("{}: {}", matches.free[0], e); return 1; } } @@ -141,7 +141,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { 0 } Err(s) => { - show_info!("{}", s); + show_error!("{}", s); 1 } } diff --git a/src/uu/install/src/install.rs b/src/uu/install/src/install.rs index 4ce665b80..bb51a7606 100644 --- a/src/uu/install/src/install.rs +++ b/src/uu/install/src/install.rs @@ -370,13 +370,13 @@ fn directory(paths: Vec, b: Behavior) -> i32 { // created ancestor directories will have the default mode. Hence it is safe to use // fs::create_dir_all and then only modify the target's dir mode. if let Err(e) = fs::create_dir_all(path) { - show_info!("{}: {}", path.display(), e); + show_error!("{}: {}", path.display(), e); all_successful = false; continue; } if b.verbose { - show_info!("creating directory '{}'", path.display()); + show_error!("creating directory '{}'", path.display()); } } @@ -461,7 +461,7 @@ fn copy_files_into_dir(files: &[PathBuf], target_dir: &Path, b: &Behavior) -> i3 let mut all_successful = true; for sourcepath in files.iter() { if !sourcepath.exists() { - show_info!( + show_error!( "cannot stat '{}': No such file or directory", sourcepath.display() ); @@ -471,7 +471,7 @@ fn copy_files_into_dir(files: &[PathBuf], target_dir: &Path, b: &Behavior) -> i3 } if sourcepath.is_dir() { - show_info!("omitting directory '{}'", sourcepath.display()); + show_error!("omitting directory '{}'", sourcepath.display()); all_successful = false; continue; } @@ -588,10 +588,10 @@ fn copy(from: &Path, to: &Path, b: &Behavior) -> Result<(), ()> { ) { Ok(n) => { if !n.is_empty() { - show_info!("{}", n); + show_error!("{}", n); } } - Err(e) => show_info!("{}", e), + Err(e) => show_error!("{}", e), } } @@ -608,10 +608,10 @@ fn copy(from: &Path, to: &Path, b: &Behavior) -> Result<(), ()> { match wrap_chgrp(to, &meta, group_id, false, Verbosity::Normal) { Ok(n) => { if !n.is_empty() { - show_info!("{}", n); + show_error!("{}", n); } } - Err(e) => show_info!("{}", e), + Err(e) => show_error!("{}", e), } } @@ -626,12 +626,12 @@ fn copy(from: &Path, to: &Path, b: &Behavior) -> Result<(), ()> { match set_file_times(to, accessed_time, modified_time) { Ok(_) => {} - Err(e) => show_info!("{}", e), + Err(e) => show_error!("{}", e), } } if b.verbose { - show_info!("'{}' -> '{}'", from.display(), to.display()); + show_error!("'{}' -> '{}'", from.display(), to.display()); } Ok(()) diff --git a/src/uu/install/src/mode.rs b/src/uu/install/src/mode.rs index a3de40c68..b8d5cd839 100644 --- a/src/uu/install/src/mode.rs +++ b/src/uu/install/src/mode.rs @@ -23,7 +23,7 @@ pub fn parse(mode_string: &str, considering_dir: bool) -> Result { pub fn chmod(path: &Path, mode: u32) -> Result<(), ()> { use std::os::unix::fs::PermissionsExt; fs::set_permissions(path, fs::Permissions::from_mode(mode)).map_err(|err| { - show_info!("{}: chmod failed with error {}", path.display(), err); + show_error!("{}: chmod failed with error {}", path.display(), err); }) } diff --git a/src/uu/mkdir/src/mkdir.rs b/src/uu/mkdir/src/mkdir.rs index 6b9fd68ea..861ef5075 100644 --- a/src/uu/mkdir/src/mkdir.rs +++ b/src/uu/mkdir/src/mkdir.rs @@ -101,7 +101,7 @@ fn exec(dirs: Vec, recursive: bool, mode: u16, verbose: bool) -> i32 { if !recursive { if let Some(parent) = path.parent() { if parent != empty && !parent.exists() { - show_info!( + show_error!( "cannot create directory '{}': No such file or directory", path.display() ); @@ -125,7 +125,7 @@ fn mkdir(path: &Path, recursive: bool, mode: u16, verbose: bool) -> i32 { fs::create_dir }; if let Err(e) = create_dir(path) { - show_info!("{}: {}", path.display(), e.to_string()); + show_error!("{}: {}", path.display(), e.to_string()); return 1; } diff --git a/src/uu/mknod/src/mknod.rs b/src/uu/mknod/src/mknod.rs index 5b6c2fa8c..e0cf62024 100644 --- a/src/uu/mknod/src/mknod.rs +++ b/src/uu/mknod/src/mknod.rs @@ -136,7 +136,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let mode = match get_mode(&matches) { Ok(mode) => mode, Err(err) => { - show_info!("{}", err); + show_error!("{}", err); return 1; } }; diff --git a/src/uu/mktemp/src/mktemp.rs b/src/uu/mktemp/src/mktemp.rs index ed767ffe0..112c2fb94 100644 --- a/src/uu/mktemp/src/mktemp.rs +++ b/src/uu/mktemp/src/mktemp.rs @@ -157,7 +157,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } if matches.is_present(OPT_TMPDIR) && PathBuf::from(prefix).is_absolute() { - show_info!( + show_error!( "invalid template, ‘{}’; with --tmpdir, it may not be absolute", template ); @@ -229,7 +229,7 @@ fn exec( } Err(e) => { if !quiet { - show_info!("{}: {}", e, tmpdir.display()); + show_error!("{}: {}", e, tmpdir.display()); } return 1; } @@ -244,7 +244,7 @@ fn exec( Ok(f) => f, Err(e) => { if !quiet { - show_info!("failed to create tempfile: {}", e); + show_error!("failed to create tempfile: {}", e); } return 1; } diff --git a/src/uu/nohup/src/nohup.rs b/src/uu/nohup/src/nohup.rs index 83153ad37..93d9b5e45 100644 --- a/src/uu/nohup/src/nohup.rs +++ b/src/uu/nohup/src/nohup.rs @@ -122,13 +122,13 @@ fn find_stdout() -> File { .open(Path::new(NOHUP_OUT)) { Ok(t) => { - show_info!("ignoring input and appending output to '{}'", NOHUP_OUT); + show_error!("ignoring input and appending output to '{}'", NOHUP_OUT); t } Err(e1) => { let home = match env::var("HOME") { Err(_) => { - show_info!("failed to open '{}': {}", NOHUP_OUT, e1); + show_error!("failed to open '{}': {}", NOHUP_OUT, e1); exit!(internal_failure_code) } Ok(h) => h, @@ -143,12 +143,12 @@ fn find_stdout() -> File { .open(&homeout) { Ok(t) => { - show_info!("ignoring input and appending output to '{}'", homeout_str); + show_error!("ignoring input and appending output to '{}'", homeout_str); t } Err(e2) => { - show_info!("failed to open '{}': {}", NOHUP_OUT, e1); - show_info!("failed to open '{}': {}", homeout_str, e2); + show_error!("failed to open '{}': {}", NOHUP_OUT, e1); + show_error!("failed to open '{}': {}", homeout_str, e2); exit!(internal_failure_code) } } diff --git a/src/uu/numfmt/src/numfmt.rs b/src/uu/numfmt/src/numfmt.rs index e9a476956..6eba699b2 100644 --- a/src/uu/numfmt/src/numfmt.rs +++ b/src/uu/numfmt/src/numfmt.rs @@ -216,7 +216,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { match result { Err(e) => { std::io::stdout().flush().expect("error flushing stdout"); - show_info!("{}", e); + show_error!("{}", e); 1 } _ => 0, diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index 5bb0e5f12..582d59841 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -749,7 +749,7 @@ impl Stater { } } Err(e) => { - show_info!("cannot stat '{}': {}", file, e); + show_error!("cannot stat '{}': {}", file, e); return 1; } } @@ -842,7 +842,7 @@ impl Stater { } } Err(e) => { - show_info!("cannot read file system information for '{}': {}", file, e); + show_error!("cannot read file system information for '{}': {}", file, e); return 1; } } @@ -1001,7 +1001,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { match Stater::new(matches) { Ok(stater) => stater.exec(), Err(e) => { - show_info!("{}", e); + show_error!("{}", e); 1 } } diff --git a/src/uu/tee/src/tee.rs b/src/uu/tee/src/tee.rs index 7c6a86b4c..c21559b3b 100644 --- a/src/uu/tee/src/tee.rs +++ b/src/uu/tee/src/tee.rs @@ -166,7 +166,7 @@ impl Write for MultiWriter { let result = writer.write_all(buf); match result { Err(f) => { - show_info!("{}: {}", writer.name, f.to_string()); + show_error!("{}: {}", writer.name, f.to_string()); false } _ => true, @@ -180,7 +180,7 @@ impl Write for MultiWriter { let result = writer.flush(); match result { Err(f) => { - show_info!("{}: {}", writer.name, f.to_string()); + show_error!("{}: {}", writer.name, f.to_string()); false } _ => true, @@ -213,7 +213,7 @@ impl Read for NamedReader { fn read(&mut self, buf: &mut [u8]) -> Result { match self.inner.read(buf) { Err(f) => { - show_info!("{}: {}", Path::new("stdin").display(), f.to_string()); + show_error!("{}: {}", Path::new("stdin").display(), f.to_string()); Err(f) } okay => okay, diff --git a/src/uucore/src/lib/macros.rs b/src/uucore/src/lib/macros.rs index 637e91f8f..dde059131 100644 --- a/src/uucore/src/lib/macros.rs +++ b/src/uucore/src/lib/macros.rs @@ -47,15 +47,6 @@ macro_rules! show_warning( }) ); -/// Show an info message to stderr in a silimar style to GNU coreutils. -#[macro_export] -macro_rules! show_info( - ($($args:tt)+) => ({ - eprint!("{}: ", executable!()); - eprintln!($($args)+); - }) -); - /// Show a bad inocation help message in a similar style to GNU coreutils. #[macro_export] macro_rules! show_usage_error( From 898d2eb48956e38440b5a7328d0589b5d5793281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ya=C4=9F=C4=B1z=20can=20De=C4=9Firmenci?= Date: Wed, 26 May 2021 02:32:02 +0300 Subject: [PATCH 131/148] chore: delete 'error:' prefix on show_error --- src/uucore/src/lib/macros.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uucore/src/lib/macros.rs b/src/uucore/src/lib/macros.rs index dde059131..616a1b57c 100644 --- a/src/uucore/src/lib/macros.rs +++ b/src/uucore/src/lib/macros.rs @@ -25,7 +25,7 @@ macro_rules! executable( #[macro_export] macro_rules! show_error( ($($args:tt)+) => ({ - eprint!("{}: error: ", executable!()); + eprint!("{}: ", executable!()); eprintln!($($args)+); }) ); From 7240b1289523a985f6bc9999b0fec4a0cd903557 Mon Sep 17 00:00:00 2001 From: Matt Blessed Date: Tue, 25 May 2021 16:38:34 -0400 Subject: [PATCH 132/148] uucore: implement backup control Most of these changes were sourced from mv's existing backup control implementation. A later commit will update the mv utility to use this new share backup control. --- src/uucore/src/lib/lib.rs | 1 + src/uucore/src/lib/mods.rs | 1 + src/uucore/src/lib/mods/backup_control.rs | 97 +++++++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 src/uucore/src/lib/mods/backup_control.rs diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index eb630f53a..c17f14516 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -25,6 +25,7 @@ mod features; // feature-gated code modules mod mods; // core cross-platform modules // * cross-platform modules +pub use crate::mods::backup_control; pub use crate::mods::coreopts; pub use crate::mods::os; pub use crate::mods::panic; diff --git a/src/uucore/src/lib/mods.rs b/src/uucore/src/lib/mods.rs index 74725e141..2689361a0 100644 --- a/src/uucore/src/lib/mods.rs +++ b/src/uucore/src/lib/mods.rs @@ -1,5 +1,6 @@ // mods ~ cross-platforms modules (core/bundler file) +pub mod backup_control; pub mod coreopts; pub mod os; pub mod panic; diff --git a/src/uucore/src/lib/mods/backup_control.rs b/src/uucore/src/lib/mods/backup_control.rs new file mode 100644 index 000000000..6004ae84d --- /dev/null +++ b/src/uucore/src/lib/mods/backup_control.rs @@ -0,0 +1,97 @@ +use std::{ + env, + path::{Path, PathBuf}, +}; + +pub static BACKUP_CONTROL_VALUES: &[&str] = &[ + "simple", "never", "numbered", "t", "existing", "nil", "none", "off", +]; + +pub static BACKUP_CONTROL_LONG_HELP: &str = "The backup suffix is '~', unless set with --suffix or SIMPLE_BACKUP_SUFFIX. Here are the version control values: + +none, off + never make backups (even if --backup is given) + +numbered, t + make numbered backups + +existing, nil + numbered if numbered backups exist, simple otherwise + +simple, never + always make simple backups"; + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum BackupMode { + NoBackup, + SimpleBackup, + NumberedBackup, + ExistingBackup, +} + +pub fn determine_backup_suffix(supplied_suffix: Option<&str>) -> String { + if let Some(suffix) = supplied_suffix { + String::from(suffix) + } else { + env::var("SIMPLE_BACKUP_SUFFIX").unwrap_or("~".to_owned()) + } +} + +pub fn determine_backup_mode(backup_opt_exists: bool, backup_opt: Option<&str>) -> BackupMode { + if backup_opt_exists { + match backup_opt.map(String::from) { + // default is existing, see: + // https://www.gnu.org/software/coreutils/manual/html_node/Backup-options.html + None => BackupMode::ExistingBackup, + Some(mode) => match &mode[..] { + "simple" | "never" => BackupMode::SimpleBackup, + "numbered" | "t" => BackupMode::NumberedBackup, + "existing" | "nil" => BackupMode::ExistingBackup, + "none" | "off" => BackupMode::NoBackup, + _ => panic!(), // cannot happen as it is managed by clap + }, + } + } else { + BackupMode::NoBackup + } +} + +pub fn get_backup_path( + backup_mode: BackupMode, + backup_path: &Path, + suffix: &str, +) -> Option { + match backup_mode { + BackupMode::NoBackup => None, + BackupMode::SimpleBackup => Some(simple_backup_path(backup_path, suffix)), + BackupMode::NumberedBackup => Some(numbered_backup_path(backup_path)), + BackupMode::ExistingBackup => Some(existing_backup_path(backup_path, suffix)), + } +} + +pub fn simple_backup_path(path: &Path, suffix: &str) -> PathBuf { + let mut p = path.to_string_lossy().into_owned(); + p.push_str(suffix); + PathBuf::from(p) +} + +pub fn numbered_backup_path(path: &Path) -> PathBuf { + for i in 1_u64.. { + let path_str = &format!("{}.~{}~", path.to_string_lossy(), i); + let path = Path::new(path_str); + if !path.exists() { + return path.to_path_buf(); + } + } + panic!("cannot create backup") +} + +pub fn existing_backup_path(path: &Path, suffix: &str) -> PathBuf { + let test_path_str = &format!("{}.~1~", path.to_string_lossy()); + let test_path = Path::new(test_path_str); + if test_path.exists() { + numbered_backup_path(path) + } else { + simple_backup_path(path, suffix) + } +} From 071899d24d3d1c21d65d4295292ec3b5600c8da4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ya=C4=9F=C4=B1z=20can=20De=C4=9Firmenci?= Date: Wed, 26 May 2021 02:45:53 +0300 Subject: [PATCH 133/148] tests: delete 'error:' prefix from the tests --- tests/by-util/test_base32.rs | 6 +-- tests/by-util/test_base64.rs | 6 +-- tests/by-util/test_basename.rs | 9 ++-- tests/by-util/test_chmod.rs | 2 +- tests/by-util/test_chroot.rs | 8 ++- tests/by-util/test_cksum.rs | 4 +- tests/by-util/test_csplit.rs | 98 +++++++++++++++++----------------- tests/by-util/test_cut.rs | 4 +- tests/by-util/test_du.rs | 2 +- tests/by-util/test_expr.rs | 4 +- tests/by-util/test_fmt.rs | 2 +- tests/by-util/test_id.rs | 2 +- tests/by-util/test_install.rs | 2 +- tests/by-util/test_join.rs | 4 +- tests/by-util/test_link.rs | 4 +- tests/by-util/test_ln.rs | 2 +- tests/by-util/test_logname.rs | 2 +- tests/by-util/test_ls.rs | 6 +-- tests/by-util/test_mkfifo.rs | 6 +-- tests/by-util/test_mktemp.rs | 2 +- tests/by-util/test_mv.rs | 4 +- tests/by-util/test_nice.rs | 2 +- tests/by-util/test_rm.rs | 2 +- tests/by-util/test_rmdir.rs | 8 +-- tests/by-util/test_sort.rs | 15 +++--- tests/by-util/test_stdbuf.rs | 11 ++-- tests/by-util/test_sum.rs | 6 +-- tests/by-util/test_uniq.rs | 2 +- tests/by-util/test_unlink.rs | 6 +-- tests/by-util/test_whoami.rs | 2 +- 30 files changed, 114 insertions(+), 119 deletions(-) diff --git a/tests/by-util/test_base32.rs b/tests/by-util/test_base32.rs index fd49aa951..e36c376be 100644 --- a/tests/by-util/test_base32.rs +++ b/tests/by-util/test_base32.rs @@ -98,7 +98,7 @@ fn test_wrap_bad_arg() { .arg(wrap_param) .arg("b") .fails() - .stderr_only("base32: error: Invalid wrap size: ‘b’: invalid digit found in string\n"); + .stderr_only("base32: Invalid wrap size: ‘b’: invalid digit found in string\n"); } } @@ -109,7 +109,7 @@ fn test_base32_extra_operand() { .arg("a.txt") .arg("a.txt") .fails() - .stderr_only("base32: error: extra operand ‘a.txt’"); + .stderr_only("base32: extra operand ‘a.txt’"); } #[test] @@ -117,5 +117,5 @@ fn test_base32_file_not_found() { new_ucmd!() .arg("a.txt") .fails() - .stderr_only("base32: error: a.txt: No such file or directory"); + .stderr_only("base32: a.txt: No such file or directory"); } diff --git a/tests/by-util/test_base64.rs b/tests/by-util/test_base64.rs index 8d9dc5639..89405d791 100644 --- a/tests/by-util/test_base64.rs +++ b/tests/by-util/test_base64.rs @@ -88,7 +88,7 @@ fn test_wrap_bad_arg() { .arg(wrap_param) .arg("b") .fails() - .stderr_only("base64: error: Invalid wrap size: ‘b’: invalid digit found in string\n"); + .stderr_only("base64: Invalid wrap size: ‘b’: invalid digit found in string\n"); } } @@ -99,7 +99,7 @@ fn test_base64_extra_operand() { .arg("a.txt") .arg("a.txt") .fails() - .stderr_only("base64: error: extra operand ‘a.txt’"); + .stderr_only("base64: extra operand ‘a.txt’"); } #[test] @@ -107,5 +107,5 @@ fn test_base64_file_not_found() { new_ucmd!() .arg("a.txt") .fails() - .stderr_only("base64: error: a.txt: No such file or directory"); + .stderr_only("base64: a.txt: No such file or directory"); } diff --git a/tests/by-util/test_basename.rs b/tests/by-util/test_basename.rs index baf15f78a..1d26a922a 100644 --- a/tests/by-util/test_basename.rs +++ b/tests/by-util/test_basename.rs @@ -109,7 +109,7 @@ fn test_no_args() { fn test_no_args_output() { new_ucmd!() .fails() - .stderr_is("basename: error: missing operand\nTry 'basename --help' for more information."); + .stderr_is("basename: missing operand\nTry 'basename --help' for more information."); } #[test] @@ -119,9 +119,10 @@ fn test_too_many_args() { #[test] fn test_too_many_args_output() { - new_ucmd!().args(&["a", "b", "c"]).fails().stderr_is( - "basename: error: extra operand 'c'\nTry 'basename --help' for more information.", - ); + new_ucmd!() + .args(&["a", "b", "c"]) + .fails() + .stderr_is("basename: extra operand 'c'\nTry 'basename --help' for more information."); } #[cfg(any(unix, target_os = "redox"))] diff --git a/tests/by-util/test_chmod.rs b/tests/by-util/test_chmod.rs index 3958c0a36..733722b7c 100644 --- a/tests/by-util/test_chmod.rs +++ b/tests/by-util/test_chmod.rs @@ -338,7 +338,7 @@ fn test_chmod_preserve_root() { .arg("755") .arg("/") .fails() - .stderr_contains(&"chmod: error: it is dangerous to operate recursively on '/'"); + .stderr_contains(&"chmod: it is dangerous to operate recursively on '/'"); } #[test] diff --git a/tests/by-util/test_chroot.rs b/tests/by-util/test_chroot.rs index e2e355e14..0479e7c3a 100644 --- a/tests/by-util/test_chroot.rs +++ b/tests/by-util/test_chroot.rs @@ -21,7 +21,7 @@ fn test_enter_chroot_fails() { assert!(result .stderr_str() - .starts_with("chroot: error: cannot chroot to jail: Operation not permitted (os error 1)")); + .starts_with("chroot: cannot chroot to jail: Operation not permitted (os error 1)")); } #[test] @@ -32,7 +32,7 @@ fn test_no_such_directory() { ucmd.arg("a") .fails() - .stderr_is("chroot: error: cannot change root directory to `a`: no such directory"); + .stderr_is("chroot: cannot change root directory to `a`: no such directory"); } #[test] @@ -43,9 +43,7 @@ fn test_invalid_user_spec() { let result = ucmd.arg("a").arg("--userspec=ARABA:").fails(); - assert!(result - .stderr_str() - .starts_with("chroot: error: invalid userspec")); + assert!(result.stderr_str().starts_with("chroot: invalid userspec")); } #[test] diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index 592e45c58..81ef4c177 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -66,7 +66,7 @@ fn test_invalid_file() { .arg(folder_name) .fails() .no_stdout() - .stderr_contains("cksum: error: 'asdf' No such file or directory"); + .stderr_contains("cksum: 'asdf' No such file or directory"); // Then check when the file is of an invalid type at.mkdir(folder_name); @@ -74,7 +74,7 @@ fn test_invalid_file() { .arg(folder_name) .fails() .no_stdout() - .stderr_contains("cksum: error: 'asdf' Is a directory"); + .stderr_contains("cksum: 'asdf' Is a directory"); } // Make sure crc is correct for files larger than 32 bytes diff --git a/tests/by-util/test_csplit.rs b/tests/by-util/test_csplit.rs index 51cab483c..ae0885ff8 100644 --- a/tests/by-util/test_csplit.rs +++ b/tests/by-util/test_csplit.rs @@ -208,7 +208,7 @@ fn test_up_to_match_repeat_over() { ucmd.args(&["numbers50.txt", "/9$/", "{50}"]) .fails() .stdout_is("16\n29\n30\n30\n30\n6\n") - .stderr_is("csplit: error: '/9$/': match not found on repetition 5"); + .stderr_is("csplit: '/9$/': match not found on repetition 5"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -219,7 +219,7 @@ fn test_up_to_match_repeat_over() { ucmd.args(&["numbers50.txt", "/9$/", "{50}", "-k"]) .fails() .stdout_is("16\n29\n30\n30\n30\n6\n") - .stderr_is("csplit: error: '/9$/': match not found on repetition 5"); + .stderr_is("csplit: '/9$/': match not found on repetition 5"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -365,7 +365,7 @@ fn test_option_keep() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["-k", "numbers50.txt", "/20/", "/nope/"]) .fails() - .stderr_is("csplit: error: '/nope/': match not found") + .stderr_is("csplit: '/nope/': match not found") .stdout_is("48\n93\n"); let count = glob(&at.plus_as_string("xx*")) @@ -541,7 +541,7 @@ fn test_up_to_match_context_overflow() { ucmd.args(&["numbers50.txt", "/45/+10"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/45/+10': line number out of range"); + .stderr_is("csplit: '/45/+10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -552,7 +552,7 @@ fn test_up_to_match_context_overflow() { ucmd.args(&["numbers50.txt", "/45/+10", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/45/+10': line number out of range"); + .stderr_is("csplit: '/45/+10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -567,7 +567,7 @@ fn test_skip_to_match_context_underflow() { ucmd.args(&["numbers50.txt", "%5%-10"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '%5%-10': line number out of range"); + .stderr_is("csplit: '%5%-10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -578,7 +578,7 @@ fn test_skip_to_match_context_underflow() { ucmd.args(&["numbers50.txt", "%5%-10", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '%5%-10': line number out of range"); + .stderr_is("csplit: '%5%-10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -592,7 +592,7 @@ fn test_skip_to_match_context_overflow() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%45%+10"]) .fails() - .stderr_only("csplit: error: '%45%+10': line number out of range"); + .stderr_only("csplit: '%45%+10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -602,7 +602,7 @@ fn test_skip_to_match_context_overflow() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%45%+10", "-k"]) .fails() - .stderr_only("csplit: error: '%45%+10': line number out of range"); + .stderr_only("csplit: '%45%+10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -616,7 +616,7 @@ fn test_up_to_no_match1() { ucmd.args(&["numbers50.txt", "/4/", "/nope/"]) .fails() .stdout_is("6\n135\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -627,7 +627,7 @@ fn test_up_to_no_match1() { ucmd.args(&["numbers50.txt", "/4/", "/nope/", "-k"]) .fails() .stdout_is("6\n135\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -643,7 +643,7 @@ fn test_up_to_no_match2() { ucmd.args(&["numbers50.txt", "/4/", "/nope/", "{50}"]) .fails() .stdout_is("6\n135\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -654,7 +654,7 @@ fn test_up_to_no_match2() { ucmd.args(&["numbers50.txt", "/4/", "/nope/", "{50}", "-k"]) .fails() .stdout_is("6\n135\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -670,7 +670,7 @@ fn test_up_to_no_match3() { ucmd.args(&["numbers50.txt", "/0$/", "{50}"]) .fails() .stdout_is("18\n30\n30\n30\n30\n3\n") - .stderr_is("csplit: error: '/0$/': match not found on repetition 5"); + .stderr_is("csplit: '/0$/': match not found on repetition 5"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -681,7 +681,7 @@ fn test_up_to_no_match3() { ucmd.args(&["numbers50.txt", "/0$/", "{50}", "-k"]) .fails() .stdout_is("18\n30\n30\n30\n30\n3\n") - .stderr_is("csplit: error: '/0$/': match not found on repetition 5"); + .stderr_is("csplit: '/0$/': match not found on repetition 5"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -701,7 +701,7 @@ fn test_up_to_no_match4() { ucmd.args(&["numbers50.txt", "/nope/", "/4/"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -712,7 +712,7 @@ fn test_up_to_no_match4() { ucmd.args(&["numbers50.txt", "/nope/", "/4/", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -741,7 +741,7 @@ fn test_up_to_no_match6() { ucmd.args(&["numbers50.txt", "/nope/-5"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/-5': match not found"); + .stderr_is("csplit: '/nope/-5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -752,7 +752,7 @@ fn test_up_to_no_match6() { ucmd.args(&["numbers50.txt", "/nope/-5", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/-5': match not found"); + .stderr_is("csplit: '/nope/-5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -767,7 +767,7 @@ fn test_up_to_no_match7() { ucmd.args(&["numbers50.txt", "/nope/+5"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/+5': match not found"); + .stderr_is("csplit: '/nope/+5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -778,7 +778,7 @@ fn test_up_to_no_match7() { ucmd.args(&["numbers50.txt", "/nope/+5", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/+5': match not found"); + .stderr_is("csplit: '/nope/+5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -792,7 +792,7 @@ fn test_skip_to_no_match1() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%"]) .fails() - .stderr_only("csplit: error: '%nope%': match not found"); + .stderr_only("csplit: '%nope%': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -805,7 +805,7 @@ fn test_skip_to_no_match2() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%", "{50}"]) .fails() - .stderr_only("csplit: error: '%nope%': match not found"); + .stderr_only("csplit: '%nope%': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -818,7 +818,7 @@ fn test_skip_to_no_match3() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%0$%", "{50}"]) .fails() - .stderr_only("csplit: error: '%0$%': match not found on repetition 5"); + .stderr_only("csplit: '%0$%': match not found on repetition 5"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -831,7 +831,7 @@ fn test_skip_to_no_match4() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%", "/4/"]) .fails() - .stderr_only("csplit: error: '%nope%': match not found"); + .stderr_only("csplit: '%nope%': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -858,7 +858,7 @@ fn test_skip_to_no_match6() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%-5"]) .fails() - .stderr_only("csplit: error: '%nope%-5': match not found"); + .stderr_only("csplit: '%nope%-5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -871,7 +871,7 @@ fn test_skip_to_no_match7() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%+5"]) .fails() - .stderr_only("csplit: error: '%nope%+5': match not found"); + .stderr_only("csplit: '%nope%+5': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -884,7 +884,7 @@ fn test_no_match() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "%nope%"]) .fails() - .stderr_only("csplit: error: '%nope%': match not found"); + .stderr_only("csplit: '%nope%': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -895,7 +895,7 @@ fn test_no_match() { ucmd.args(&["numbers50.txt", "/nope/"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '/nope/': match not found"); + .stderr_is("csplit: '/nope/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -992,7 +992,7 @@ fn test_too_small_linenum_repeat() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "/20/", "10", "{*}"]) .fails() - .stderr_is("csplit: error: '10': line number out of range on repetition 5") + .stderr_is("csplit: '10': line number out of range on repetition 5") .stdout_is("48\n0\n0\n30\n30\n30\n3\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1003,7 +1003,7 @@ fn test_too_small_linenum_repeat() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "/20/", "10", "{*}", "-k"]) .fails() - .stderr_is("csplit: error: '10': line number out of range on repetition 5") + .stderr_is("csplit: '10': line number out of range on repetition 5") .stdout_is("48\n0\n0\n30\n30\n30\n3\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1025,7 +1025,7 @@ fn test_linenum_out_of_range1() { ucmd.args(&["numbers50.txt", "100"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '100': line number out of range"); + .stderr_is("csplit: '100': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1036,7 +1036,7 @@ fn test_linenum_out_of_range1() { ucmd.args(&["numbers50.txt", "100", "-k"]) .fails() .stdout_is("141\n") - .stderr_is("csplit: error: '100': line number out of range"); + .stderr_is("csplit: '100': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1051,7 +1051,7 @@ fn test_linenum_out_of_range2() { ucmd.args(&["numbers50.txt", "10", "100"]) .fails() .stdout_is("18\n123\n") - .stderr_is("csplit: error: '100': line number out of range"); + .stderr_is("csplit: '100': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1062,7 +1062,7 @@ fn test_linenum_out_of_range2() { ucmd.args(&["numbers50.txt", "10", "100", "-k"]) .fails() .stdout_is("18\n123\n") - .stderr_is("csplit: error: '100': line number out of range"); + .stderr_is("csplit: '100': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1078,7 +1078,7 @@ fn test_linenum_out_of_range3() { ucmd.args(&["numbers50.txt", "40", "{2}"]) .fails() .stdout_is("108\n33\n") - .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + .stderr_is("csplit: '40': line number out of range on repetition 1"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1089,7 +1089,7 @@ fn test_linenum_out_of_range3() { ucmd.args(&["numbers50.txt", "40", "{2}", "-k"]) .fails() .stdout_is("108\n33\n") - .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + .stderr_is("csplit: '40': line number out of range on repetition 1"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1105,7 +1105,7 @@ fn test_linenum_out_of_range4() { ucmd.args(&["numbers50.txt", "40", "{*}"]) .fails() .stdout_is("108\n33\n") - .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + .stderr_is("csplit: '40': line number out of range on repetition 1"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1116,7 +1116,7 @@ fn test_linenum_out_of_range4() { ucmd.args(&["numbers50.txt", "40", "{*}", "-k"]) .fails() .stdout_is("108\n33\n") - .stderr_is("csplit: error: '40': line number out of range on repetition 1"); + .stderr_is("csplit: '40': line number out of range on repetition 1"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1132,7 +1132,7 @@ fn test_skip_to_match_negative_offset_before_a_match() { ucmd.args(&["numbers50.txt", "/20/-10", "/15/"]) .fails() .stdout_is("18\n123\n") - .stderr_is("csplit: error: '/15/': match not found"); + .stderr_is("csplit: '/15/': match not found"); let count = glob(&at.plus_as_string("xx*")) .expect("there should be splits created") @@ -1177,7 +1177,7 @@ fn test_corner_case2() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "/10/-5", "/10/"]) .fails() - .stderr_is("csplit: error: '/10/': match not found") + .stderr_is("csplit: '/10/': match not found") .stdout_is("8\n133\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1191,7 +1191,7 @@ fn test_corner_case3() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "/15/-3", "14", "/15/"]) .fails() - .stderr_is("csplit: error: '/15/': match not found") + .stderr_is("csplit: '/15/': match not found") .stdout_is("24\n6\n111\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1223,7 +1223,7 @@ fn test_up_to_match_context_underflow() { ucmd.args(&["numbers50.txt", "/5/-10"]) .fails() .stdout_is("0\n141\n") - .stderr_is("csplit: error: '/5/-10': line number out of range"); + .stderr_is("csplit: '/5/-10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -1234,7 +1234,7 @@ fn test_up_to_match_context_underflow() { ucmd.args(&["numbers50.txt", "/5/-10", "-k"]) .fails() .stdout_is("0\n141\n") - .stderr_is("csplit: error: '/5/-10': line number out of range"); + .stderr_is("csplit: '/5/-10': line number out of range"); let count = glob(&at.plus_as_string("xx*")) .expect("counting splits") @@ -1251,7 +1251,7 @@ fn test_linenum_range_with_up_to_match1() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "10", "/12/-5"]) .fails() - .stderr_is("csplit: error: '/12/-5': line number out of range") + .stderr_is("csplit: '/12/-5': line number out of range") .stdout_is("18\n0\n123\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1262,7 +1262,7 @@ fn test_linenum_range_with_up_to_match1() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "10", "/12/-5", "-k"]) .fails() - .stderr_is("csplit: error: '/12/-5': line number out of range") + .stderr_is("csplit: '/12/-5': line number out of range") .stdout_is("18\n0\n123\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1281,7 +1281,7 @@ fn test_linenum_range_with_up_to_match2() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "10", "/12/-15"]) .fails() - .stderr_is("csplit: error: '/12/-15': line number out of range") + .stderr_is("csplit: '/12/-15': line number out of range") .stdout_is("18\n0\n123\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1292,7 +1292,7 @@ fn test_linenum_range_with_up_to_match2() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "10", "/12/-15", "-k"]) .fails() - .stderr_is("csplit: error: '/12/-15': line number out of range") + .stderr_is("csplit: '/12/-15': line number out of range") .stdout_is("18\n0\n123\n"); let count = glob(&at.plus_as_string("xx*")) @@ -1310,7 +1310,7 @@ fn test_linenum_range_with_up_to_match3() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.args(&["numbers50.txt", "10", "/10/", "-k"]) .fails() - .stderr_is("csplit: error: '/10/': match not found") + .stderr_is("csplit: '/10/': match not found") .stdout_is("18\n123\n"); let count = glob(&at.plus_as_string("xx*")) diff --git a/tests/by-util/test_cut.rs b/tests/by-util/test_cut.rs index 875317721..413b73154 100644 --- a/tests/by-util/test_cut.rs +++ b/tests/by-util/test_cut.rs @@ -149,11 +149,11 @@ fn test_directory_and_no_such_file() { ucmd.arg("-b1") .arg("some") .run() - .stderr_is("cut: error: some: Is a directory\n"); + .stderr_is("cut: some: Is a directory\n"); new_ucmd!() .arg("-b1") .arg("some") .run() - .stderr_is("cut: error: some: No such file or directory\n"); + .stderr_is("cut: some: No such file or directory\n"); } diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index c72bd02a6..15620be52 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -76,7 +76,7 @@ fn test_du_basics_bad_name() { new_ucmd!() .arg("bad_name") .succeeds() // TODO: replace with ".fails()" once `du` is fixed - .stderr_only("du: error: bad_name: No such file or directory\n"); + .stderr_only("du: bad_name: No such file or directory\n"); } #[test] diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 6a969b5e9..f20739e13 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -17,11 +17,11 @@ fn test_complex_arithmetic() { .args(&["9223372036854775807", "+", "9223372036854775807"]) .run(); run.stdout_is(""); - run.stderr_is("expr: error: +: Numerical result out of range"); + run.stderr_is("expr: +: Numerical result out of range"); let run = new_ucmd!().args(&["9", "/", "0"]).run(); run.stdout_is(""); - run.stderr_is("expr: error: division by zero"); + run.stderr_is("expr: division by zero"); } #[test] diff --git a/tests/by-util/test_fmt.rs b/tests/by-util/test_fmt.rs index a83fae58e..0d6d9bb24 100644 --- a/tests/by-util/test_fmt.rs +++ b/tests/by-util/test_fmt.rs @@ -30,7 +30,7 @@ fn test_fmt_w_too_big() { //.stdout_is_fixture("call_graph.expected"); assert_eq!( result.stderr_str().trim(), - "fmt: error: invalid width: '2501': Numerical result out of range" + "fmt: invalid width: '2501': Numerical result out of range" ); } #[test] diff --git a/tests/by-util/test_id.rs b/tests/by-util/test_id.rs index 534736a32..1f8249aab 100644 --- a/tests/by-util/test_id.rs +++ b/tests/by-util/test_id.rs @@ -7,7 +7,7 @@ use crate::common::util::*; // From the Logs: "Build (ubuntu-18.04, x86_64-unknown-linux-gnu, feat_os_unix, use-cross)" // stderr: "whoami: cannot find name for user ID 1001" // Maybe: "adduser --uid 1001 username" can put things right? -// stderr = id: error: Could not find uid 1001: No such id: 1001 +// stderr = id: Could not find uid 1001: No such id: 1001 fn skipping_test_is_okay(result: &CmdResult, needle: &str) -> bool { if !result.succeeded() { println!("result.stdout = {}", result.stdout_str()); diff --git a/tests/by-util/test_install.rs b/tests/by-util/test_install.rs index fc4459072..68cd9700f 100644 --- a/tests/by-util/test_install.rs +++ b/tests/by-util/test_install.rs @@ -301,7 +301,7 @@ fn test_install_target_new_file_with_group() { .arg(format!("{}/{}", dir, file)) .run(); - if is_ci() && result.stderr_str().contains("error: no such group:") { + if is_ci() && result.stderr_str().contains("no such group:") { // In the CI, some server are failing to return the group. // As seems to be a configuration issue, ignoring it return; diff --git a/tests/by-util/test_join.rs b/tests/by-util/test_join.rs index b0311df84..a8f046851 100644 --- a/tests/by-util/test_join.rs +++ b/tests/by-util/test_join.rs @@ -148,7 +148,7 @@ fn multitab_character() { .arg("-t") .arg("э") .fails() - .stderr_is("join: error: multi-character tab э"); + .stderr_is("join: multi-character tab э"); } #[test] @@ -211,7 +211,7 @@ fn empty_format() { .arg("-o") .arg("") .fails() - .stderr_is("join: error: invalid file number in field spec: ''"); + .stderr_is("join: invalid file number in field spec: ''"); } #[test] diff --git a/tests/by-util/test_link.rs b/tests/by-util/test_link.rs index 99559a7fe..6ac3f35cc 100644 --- a/tests/by-util/test_link.rs +++ b/tests/by-util/test_link.rs @@ -23,7 +23,7 @@ fn test_link_no_circular() { ucmd.args(&[link, link]) .fails() - .stderr_is("link: error: No such file or directory (os error 2)\n"); + .stderr_is("link: No such file or directory (os error 2)\n"); assert!(!at.file_exists(link)); } @@ -35,7 +35,7 @@ fn test_link_nonexistent_file() { ucmd.args(&[file, link]) .fails() - .stderr_is("link: error: No such file or directory (os error 2)\n"); + .stderr_is("link: No such file or directory (os error 2)\n"); assert!(!at.file_exists(file)); assert!(!at.file_exists(link)); } diff --git a/tests/by-util/test_ln.rs b/tests/by-util/test_ln.rs index 646091b09..f2508ecbf 100644 --- a/tests/by-util/test_ln.rs +++ b/tests/by-util/test_ln.rs @@ -409,7 +409,7 @@ fn test_symlink_missing_destination() { at.touch(file); ucmd.args(&["-s", "-T", file]).fails().stderr_is(format!( - "ln: error: missing destination file operand after '{}'", + "ln: missing destination file operand after '{}'", file )); } diff --git a/tests/by-util/test_logname.rs b/tests/by-util/test_logname.rs index bd9d04a50..0e8125191 100644 --- a/tests/by-util/test_logname.rs +++ b/tests/by-util/test_logname.rs @@ -9,7 +9,7 @@ fn test_normal() { for (key, value) in env::vars() { println!("{}: {}", key, value); } - if (is_ci() || uucore::os::is_wsl_1()) && result.stderr_str().contains("error: no login name") { + if (is_ci() || uucore::os::is_wsl_1()) && result.stderr_str().contains("no login name") { // ToDO: investigate WSL failure // In the CI, some server are failing to return logname. // As seems to be a configuration issue, ignoring it diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 6d6c65194..2ae57ad7f 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -167,7 +167,7 @@ fn test_ls_width() { .ucmd() .args(&option.split(" ").collect::>()) .fails() - .stderr_only("ls: error: invalid line width: ‘1a’"); + .stderr_only("ls: invalid line width: ‘1a’"); } } @@ -875,7 +875,7 @@ fn test_ls_files_dirs() { .ucmd() .arg("doesntexist") .fails() - .stderr_contains(&"error: 'doesntexist': No such file or directory"); + .stderr_contains(&"'doesntexist': No such file or directory"); // One exists, the other doesn't scene @@ -883,7 +883,7 @@ fn test_ls_files_dirs() { .arg("a") .arg("doesntexist") .fails() - .stderr_contains(&"error: 'doesntexist': No such file or directory") + .stderr_contains(&"'doesntexist': No such file or directory") .stdout_contains(&"a:"); } diff --git a/tests/by-util/test_mkfifo.rs b/tests/by-util/test_mkfifo.rs index 23108d976..318a2ea5d 100644 --- a/tests/by-util/test_mkfifo.rs +++ b/tests/by-util/test_mkfifo.rs @@ -2,9 +2,7 @@ use crate::common::util::*; #[test] fn test_create_fifo_missing_operand() { - new_ucmd!() - .fails() - .stderr_is("mkfifo: error: missing operand"); + new_ucmd!().fails().stderr_is("mkfifo: missing operand"); } #[test] @@ -43,5 +41,5 @@ fn test_create_one_fifo_already_exists() { .arg("abcdef") .arg("abcdef") .fails() - .stderr_is("mkfifo: error: cannot create fifo 'abcdef': File exists"); + .stderr_is("mkfifo: cannot create fifo 'abcdef': File exists"); } diff --git a/tests/by-util/test_mktemp.rs b/tests/by-util/test_mktemp.rs index c273c407c..617f0fd06 100644 --- a/tests/by-util/test_mktemp.rs +++ b/tests/by-util/test_mktemp.rs @@ -120,7 +120,7 @@ fn test_mktemp_mktemp_t() { .arg(TEST_TEMPLATE8) .fails() .no_stdout() - .stderr_contains("error: suffix cannot contain any path separators"); + .stderr_contains("suffix cannot contain any path separators"); } #[test] diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index 47532e2e5..e0723a479 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -472,7 +472,7 @@ fn test_mv_overwrite_nonempty_dir() { at.touch(dummy); // Not same error as GNU; the error message is a rust builtin // TODO: test (and implement) correct error message (or at least decide whether to do so) - // Current: "mv: error: couldn't rename path (Directory not empty; from=a; to=b)" + // Current: "mv: couldn't rename path (Directory not empty; from=a; to=b)" // GNU: "mv: cannot move ‘a’ to ‘b’: Directory not empty" // Verbose output for the move should not be shown on failure @@ -539,7 +539,7 @@ fn test_mv_errors() { .arg(dir) .fails() .stderr_is(format!( - "mv: error: cannot overwrite directory ‘{}’ with non-directory\n", + "mv: cannot overwrite directory ‘{}’ with non-directory\n", dir )); diff --git a/tests/by-util/test_nice.rs b/tests/by-util/test_nice.rs index d3457c686..9e004b98b 100644 --- a/tests/by-util/test_nice.rs +++ b/tests/by-util/test_nice.rs @@ -25,7 +25,7 @@ fn test_adjustment_with_no_command_should_error() { new_ucmd!() .args(&["-n", "19"]) .run() - .stderr_is("nice: error: A command must be given with an adjustment.\nTry \"nice --help\" for more information.\n"); + .stderr_is("nice: A command must be given with an adjustment.\nTry \"nice --help\" for more information.\n"); } #[test] diff --git a/tests/by-util/test_rm.rs b/tests/by-util/test_rm.rs index 9a068887c..2a87038d5 100644 --- a/tests/by-util/test_rm.rs +++ b/tests/by-util/test_rm.rs @@ -258,7 +258,7 @@ fn test_rm_no_operand() { let mut ucmd = new_ucmd!(); ucmd.fails() - .stderr_is("rm: error: missing an argument\nrm: error: for help, try 'rm --help'\n"); + .stderr_is("rm: missing an argument\nrm: for help, try 'rm --help'\n"); } #[test] diff --git a/tests/by-util/test_rmdir.rs b/tests/by-util/test_rmdir.rs index 34531cf22..eef2d50f5 100644 --- a/tests/by-util/test_rmdir.rs +++ b/tests/by-util/test_rmdir.rs @@ -39,7 +39,7 @@ fn test_rmdir_nonempty_directory_no_parents() { assert!(at.file_exists(file)); ucmd.arg(dir).fails().stderr_is( - "rmdir: error: failed to remove 'test_rmdir_nonempty_no_parents': Directory not \ + "rmdir: failed to remove 'test_rmdir_nonempty_no_parents': Directory not \ empty\n", ); @@ -59,9 +59,9 @@ fn test_rmdir_nonempty_directory_with_parents() { assert!(at.file_exists(file)); ucmd.arg("-p").arg(dir).fails().stderr_is( - "rmdir: error: failed to remove 'test_rmdir_nonempty/with/parents': Directory not \ - empty\nrmdir: error: failed to remove 'test_rmdir_nonempty/with': Directory not \ - empty\nrmdir: error: failed to remove 'test_rmdir_nonempty': Directory not \ + "rmdir: failed to remove 'test_rmdir_nonempty/with/parents': Directory not \ + empty\nrmdir: failed to remove 'test_rmdir_nonempty/with': Directory not \ + empty\nrmdir: failed to remove 'test_rmdir_nonempty': Directory not \ empty\n", ); diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index e4676b379..133dc0028 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -42,7 +42,7 @@ fn test_invalid_buffer_size() { .arg(invalid_buffer_size) .fails() .stderr_only(format!( - "sort: error: failed to parse buffer size `{}`: invalid digit found in string", + "sort: failed to parse buffer size `{}`: invalid digit found in string", invalid_buffer_size )); } @@ -471,7 +471,7 @@ fn test_keys_invalid_field() { new_ucmd!() .args(&["-k", "1."]) .fails() - .stderr_only("sort: error: failed to parse character index for key `1.`: cannot parse integer from empty string"); + .stderr_only("sort: failed to parse character index for key `1.`: cannot parse integer from empty string"); } #[test] @@ -479,7 +479,7 @@ fn test_keys_invalid_field_option() { new_ucmd!() .args(&["-k", "1.1x"]) .fails() - .stderr_only("sort: error: invalid option for key: `x`"); + .stderr_only("sort: invalid option for key: `x`"); } #[test] @@ -487,14 +487,15 @@ fn test_keys_invalid_field_zero() { new_ucmd!() .args(&["-k", "0.1"]) .fails() - .stderr_only("sort: error: field index was 0"); + .stderr_only("sort: field index was 0"); } #[test] fn test_keys_invalid_char_zero() { - new_ucmd!().args(&["-k", "1.0"]).fails().stderr_only( - "sort: error: invalid character index 0 in `1.0` for the start position of a field", - ); + new_ucmd!() + .args(&["-k", "1.0"]) + .fails() + .stderr_only("sort: invalid character index 0 in `1.0` for the start position of a field"); } #[test] diff --git a/tests/by-util/test_stdbuf.rs b/tests/by-util/test_stdbuf.rs index 4105cb7a2..4732d2def 100644 --- a/tests/by-util/test_stdbuf.rs +++ b/tests/by-util/test_stdbuf.rs @@ -26,7 +26,7 @@ fn test_stdbuf_line_buffered_stdout() { #[test] fn test_stdbuf_no_buffer_option_fails() { new_ucmd!().args(&["head"]).fails().stderr_is( - "error: The following required arguments were not provided:\n \ + "The following required arguments were not provided:\n \ --error \n \ --input \n \ --output \n\n\ @@ -49,10 +49,9 @@ fn test_stdbuf_trailing_var_arg() { #[cfg(not(target_os = "windows"))] #[test] fn test_stdbuf_line_buffering_stdin_fails() { - new_ucmd!() - .args(&["-i", "L", "head"]) - .fails() - .stderr_is("stdbuf: error: line buffering stdin is meaningless\nTry 'stdbuf --help' for more information."); + new_ucmd!().args(&["-i", "L", "head"]).fails().stderr_is( + "stdbuf: line buffering stdin is meaningless\nTry 'stdbuf --help' for more information.", + ); } #[cfg(not(target_os = "windows"))] @@ -61,5 +60,5 @@ fn test_stdbuf_invalid_mode_fails() { new_ucmd!() .args(&["-i", "1024R", "head"]) .fails() - .stderr_is("stdbuf: error: invalid mode 1024R\nTry 'stdbuf --help' for more information."); + .stderr_is("stdbuf: invalid mode 1024R\nTry 'stdbuf --help' for more information."); } diff --git a/tests/by-util/test_sum.rs b/tests/by-util/test_sum.rs index d12455749..f09ba9d00 100644 --- a/tests/by-util/test_sum.rs +++ b/tests/by-util/test_sum.rs @@ -59,9 +59,7 @@ fn test_invalid_file() { at.mkdir("a"); - ucmd.arg("a") - .fails() - .stderr_is("sum: error: 'a' Is a directory"); + ucmd.arg("a").fails().stderr_is("sum: 'a' Is a directory"); } #[test] @@ -70,5 +68,5 @@ fn test_invalid_metadata() { ucmd.arg("b") .fails() - .stderr_is("sum: error: 'b' No such file or directory"); + .stderr_is("sum: 'b' No such file or directory"); } diff --git a/tests/by-util/test_uniq.rs b/tests/by-util/test_uniq.rs index c1e53faf3..2645c38ca 100644 --- a/tests/by-util/test_uniq.rs +++ b/tests/by-util/test_uniq.rs @@ -145,7 +145,7 @@ fn test_invalid_utf8() { .arg("not-utf8-sequence.txt") .run() .failure() - .stderr_only("uniq: error: invalid utf-8 sequence of 1 bytes from index 0"); + .stderr_only("uniq: invalid utf-8 sequence of 1 bytes from index 0"); } #[test] diff --git a/tests/by-util/test_unlink.rs b/tests/by-util/test_unlink.rs index fa8f962c4..1999e965c 100644 --- a/tests/by-util/test_unlink.rs +++ b/tests/by-util/test_unlink.rs @@ -22,7 +22,7 @@ fn test_unlink_multiple_files() { at.touch(file_b); ucmd.arg(file_a).arg(file_b).fails().stderr_is( - "unlink: error: extra operand: 'test_unlink_multiple_file_b'\nTry 'unlink --help' \ + "unlink: extra operand: 'test_unlink_multiple_file_b'\nTry 'unlink --help' \ for more information.\n", ); } @@ -35,7 +35,7 @@ fn test_unlink_directory() { at.mkdir(dir); ucmd.arg(dir).fails().stderr_is( - "unlink: error: cannot unlink 'test_unlink_empty_directory': Not a regular file \ + "unlink: cannot unlink 'test_unlink_empty_directory': Not a regular file \ or symlink\n", ); } @@ -45,7 +45,7 @@ fn test_unlink_nonexistent() { let file = "test_unlink_nonexistent"; new_ucmd!().arg(file).fails().stderr_is( - "unlink: error: Cannot stat 'test_unlink_nonexistent': No such file or directory \ + "unlink: Cannot stat 'test_unlink_nonexistent': No such file or directory \ (os error 2)\n", ); } diff --git a/tests/by-util/test_whoami.rs b/tests/by-util/test_whoami.rs index dc6a1ceed..a98541b2d 100644 --- a/tests/by-util/test_whoami.rs +++ b/tests/by-util/test_whoami.rs @@ -5,7 +5,7 @@ use crate::common::util::*; // considered okay. If we are not inside the CI this calls assert!(result.success). // // From the Logs: "Build (ubuntu-18.04, x86_64-unknown-linux-gnu, feat_os_unix, use-cross)" -// stderr: "whoami: error: failed to get username" +// stderr: "whoami: failed to get username" // Maybe: "adduser --uid 1001 username" can put things right? fn skipping_test_is_okay(result: &CmdResult, needle: &str) -> bool { if !result.succeeded() { From 8fe34c72d2980f39d2d7664b92c776a9c66d1b48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ya=C4=9F=C4=B1z=20can=20De=C4=9Firmenci?= Date: Wed, 26 May 2021 03:07:49 +0300 Subject: [PATCH 134/148] test: fix tests --- tests/by-util/test_stdbuf.rs | 2 +- tests/by-util/test_sync.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_stdbuf.rs b/tests/by-util/test_stdbuf.rs index 4732d2def..2e09601ce 100644 --- a/tests/by-util/test_stdbuf.rs +++ b/tests/by-util/test_stdbuf.rs @@ -26,7 +26,7 @@ fn test_stdbuf_line_buffered_stdout() { #[test] fn test_stdbuf_no_buffer_option_fails() { new_ucmd!().args(&["head"]).fails().stderr_is( - "The following required arguments were not provided:\n \ + "error: The following required arguments were not provided:\n \ --error \n \ --input \n \ --output \n\n\ diff --git a/tests/by-util/test_sync.rs b/tests/by-util/test_sync.rs index 436bfdef3..033651910 100644 --- a/tests/by-util/test_sync.rs +++ b/tests/by-util/test_sync.rs @@ -37,5 +37,5 @@ fn test_sync_no_existing_files() { .arg("--data") .arg("do-no-exist") .fails() - .stderr_contains("error: cannot stat"); + .stderr_contains("cannot stat"); } From 12f207a6d6f002e5c4f0aafc17b95c9d403e75c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ya=C4=9F=C4=B1z=20can=20De=C4=9Firmenci?= Date: Wed, 26 May 2021 03:21:53 +0300 Subject: [PATCH 135/148] test: fix tests --- tests/by-util/test_install.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/by-util/test_install.rs b/tests/by-util/test_install.rs index 68cd9700f..fb79454c1 100644 --- a/tests/by-util/test_install.rs +++ b/tests/by-util/test_install.rs @@ -328,7 +328,7 @@ fn test_install_target_new_file_with_owner() { .arg(format!("{}/{}", dir, file)) .run(); - if is_ci() && result.stderr_str().contains("error: no such user:") { + if is_ci() && result.stderr_str().contains("no such user:") { // In the CI, some server are failing to return the user id. // As seems to be a configuration issue, ignoring it return; From a8a1ec7faf7ef1994366f538f040dc866a2c9686 Mon Sep 17 00:00:00 2001 From: Matt Blessed Date: Tue, 25 May 2021 16:41:07 -0400 Subject: [PATCH 136/148] cp: implement backup control with tests --- src/uu/cp/src/cp.rs | 64 +++++++----- tests/by-util/test_cp.rs | 210 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 247 insertions(+), 27 deletions(-) diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index 3d6faf66a..7eaa21c11 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -47,6 +47,7 @@ use std::os::windows::ffi::OsStrExt; use std::path::{Path, PathBuf, StripPrefixError}; use std::str::FromStr; use std::string::ToString; +use uucore::backup_control::{self, BackupMode}; use uucore::fs::resolve_relative_path; use uucore::fs::{canonicalize, CanonicalizeMode}; use walkdir::WalkDir; @@ -169,14 +170,6 @@ pub enum TargetType { File, } -#[derive(Clone, Eq, PartialEq)] -pub enum BackupMode { - ExistingBackup, - NoBackup, - NumberedBackup, - SimpleBackup, -} - pub enum CopyMode { Link, SymLink, @@ -201,7 +194,7 @@ pub enum Attribute { #[allow(dead_code)] pub struct Options { attributes_only: bool, - backup: bool, + backup: BackupMode, copy_contents: bool, copy_mode: CopyMode, dereference: bool, @@ -222,6 +215,7 @@ pub struct Options { static VERSION: &str = env!("CARGO_PKG_VERSION"); static ABOUT: &str = "Copy SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY."; +static LONG_HELP: &str = ""; static EXIT_OK: i32 = 0; static EXIT_ERR: i32 = 1; @@ -301,6 +295,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let matches = App::new(executable!()) .version(VERSION) .about(ABOUT) + .after_help(&*format!("{}\n{}", LONG_HELP, backup_control::BACKUP_CONTROL_LONG_HELP)) .usage(&usage[..]) .arg(Arg::with_name(OPT_TARGET_DIRECTORY) .short("t") @@ -364,12 +359,17 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .arg(Arg::with_name(OPT_BACKUP) .short("b") .long(OPT_BACKUP) - .help("make a backup of each existing destination file")) + .help("make a backup of each existing destination file") + .takes_value(true) + .require_equals(true) + .min_values(0) + .possible_values(backup_control::BACKUP_CONTROL_VALUES) + .value_name("CONTROL") + ) .arg(Arg::with_name(OPT_SUFFIX) .short("S") .long(OPT_SUFFIX) .takes_value(true) - .default_value("~") .value_name("SUFFIX") .help("override the usual backup suffix")) .arg(Arg::with_name(OPT_UPDATE) @@ -585,7 +585,24 @@ impl Options { || matches.is_present(OPT_RECURSIVE_ALIAS) || matches.is_present(OPT_ARCHIVE); - let backup = matches.is_present(OPT_BACKUP) || (matches.occurrences_of(OPT_SUFFIX) > 0); + let backup_mode = backup_control::determine_backup_mode( + matches.is_present(OPT_BACKUP), + matches.value_of(OPT_BACKUP), + ); + let backup_suffix = backup_control::determine_backup_suffix(matches.value_of(OPT_SUFFIX)); + + let overwrite = OverwriteMode::from_matches(matches); + + if overwrite == OverwriteMode::NoClobber && backup_mode != BackupMode::NoBackup { + show_error!( + "options --backup and --no-clobber are mutually exclusive\n\ + Try '{} --help' for more information.", + executable!() + ); + return Err(Error::Error( + "options --backup and --no-clobber are mutually exclusive".to_owned(), + )); + } // Parse target directory options let no_target_dir = matches.is_present(OPT_NO_TARGET_DIRECTORY); @@ -631,9 +648,7 @@ impl Options { || matches.is_present(OPT_NO_DEREFERENCE_PRESERVE_LINKS) || matches.is_present(OPT_ARCHIVE), one_file_system: matches.is_present(OPT_ONE_FILE_SYSTEM), - overwrite: OverwriteMode::from_matches(matches), parents: matches.is_present(OPT_PARENTS), - backup_suffix: matches.value_of(OPT_SUFFIX).unwrap().to_string(), update: matches.is_present(OPT_UPDATE), verbose: matches.is_present(OPT_VERBOSE), strip_trailing_slashes: matches.is_present(OPT_STRIP_TRAILING_SLASHES), @@ -654,7 +669,9 @@ impl Options { ReflinkMode::Never } }, - backup, + backup: backup_mode, + backup_suffix: backup_suffix, + overwrite: overwrite, no_target_dir, preserve_attributes, recursive, @@ -1090,14 +1107,10 @@ fn context_for(src: &Path, dest: &Path) -> String { format!("'{}' -> '{}'", src.display(), dest.display()) } -/// Implements a relatively naive backup that is not as full featured -/// as GNU cp. No CONTROL version control method argument is taken -/// for backups. -/// TODO: Add version control methods -fn backup_file(path: &Path, suffix: &str) -> CopyResult { - let mut backup_path = path.to_path_buf().into_os_string(); - backup_path.push(suffix); - fs::copy(path, &backup_path)?; +/// Implements a simple backup copy for the destination file. +/// TODO: for the backup, should this function be replaced by `copy_file(...)`? +fn backup_dest(dest: &Path, backup_path: &PathBuf) -> CopyResult { + fs::copy(dest, &backup_path)?; Ok(backup_path.into()) } @@ -1108,8 +1121,9 @@ fn handle_existing_dest(source: &Path, dest: &Path, options: &Options) -> CopyRe options.overwrite.verify(dest)?; - if options.backup { - backup_file(dest, &options.backup_suffix)?; + let backup_path = backup_control::get_backup_path(options.backup, dest, &options.backup_suffix); + if let Some(backup_path) = backup_path { + backup_dest(dest, &backup_path)?; } match options.overwrite { diff --git a/tests/by-util/test_cp.rs b/tests/by-util/test_cp.rs index 1e99da0fb..dddba595c 100644 --- a/tests/by-util/test_cp.rs +++ b/tests/by-util/test_cp.rs @@ -214,8 +214,8 @@ fn test_cp_arg_symlink() { fn test_cp_arg_no_clobber() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.arg(TEST_HELLO_WORLD_SOURCE) - .arg("--no-clobber") .arg(TEST_HOW_ARE_YOU_SOURCE) + .arg("--no-clobber") .succeeds(); assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "How are you?\n"); @@ -305,7 +305,23 @@ fn test_cp_arg_backup() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.arg(TEST_HELLO_WORLD_SOURCE) - .arg("--backup") + .arg(TEST_HOW_ARE_YOU_SOURCE) + .arg("-b") + .succeeds(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_arg_backup_arg_first() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup") + .arg(TEST_HELLO_WORLD_SOURCE) .arg(TEST_HOW_ARE_YOU_SOURCE) .succeeds(); @@ -321,6 +337,7 @@ fn test_cp_arg_suffix() { let (at, mut ucmd) = at_and_ucmd!(); ucmd.arg(TEST_HELLO_WORLD_SOURCE) + .arg("-b") .arg("--suffix") .arg(".bak") .arg(TEST_HOW_ARE_YOU_SOURCE) @@ -333,6 +350,195 @@ fn test_cp_arg_suffix() { ); } +#[test] +fn test_cp_custom_backup_suffix_via_env() { + let (at, mut ucmd) = at_and_ucmd!(); + let suffix = "super-suffix-of-the-century"; + + ucmd.arg("-b") + .env("SIMPLE_BACKUP_SUFFIX", suffix) + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}{}", TEST_HOW_ARE_YOU_SOURCE, suffix)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_numbered_with_t() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=t") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}.~1~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_numbered() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=numbered") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}.~1~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_existing() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=existing") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_nil() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=nil") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_numbered_if_existing_backup_existing() { + let (at, mut ucmd) = at_and_ucmd!(); + let existing_backup = &*format!("{}.~1~", TEST_HOW_ARE_YOU_SOURCE); + at.touch(existing_backup); + + ucmd.arg("--backup=existing") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert!(at.file_exists(TEST_HOW_ARE_YOU_SOURCE)); + assert!(at.file_exists(existing_backup)); + assert_eq!( + at.read(&*format!("{}.~2~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_numbered_if_existing_backup_nil() { + let (at, mut ucmd) = at_and_ucmd!(); + let existing_backup = &*format!("{}.~1~", TEST_HOW_ARE_YOU_SOURCE); + + at.touch(existing_backup); + ucmd.arg("--backup=nil") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert!(at.file_exists(TEST_HOW_ARE_YOU_SOURCE)); + assert!(at.file_exists(existing_backup)); + assert_eq!( + at.read(&*format!("{}.~2~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_simple() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=simple") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_never() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=never") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + +#[test] +fn test_cp_backup_none() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=none") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert!(!at.file_exists(&format!("{}~", TEST_HOW_ARE_YOU_SOURCE))); +} + +#[test] +fn test_cp_backup_off() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup=off") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .succeeds() + .no_stderr(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert!(!at.file_exists(&format!("{}~", TEST_HOW_ARE_YOU_SOURCE))); +} + #[test] fn test_cp_deref_conflicting_options() { new_ucmd!() From eda72b52081c7b92d412735f578645a37bdf6490 Mon Sep 17 00:00:00 2001 From: Syukron Rifail M Date: Sat, 15 May 2021 21:29:45 +0700 Subject: [PATCH 137/148] du: replace getopts with clap --- Cargo.lock | 1 + src/uu/du/Cargo.toml | 1 + src/uu/du/src/du.rs | 371 +++++++++++++++++++++++++++---------------- 3 files changed, 234 insertions(+), 139 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0674d3de0..5e1470c88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1986,6 +1986,7 @@ name = "uu_du" version = "0.0.6" dependencies = [ "chrono", + "clap", "uucore", "uucore_procs", "winapi 0.3.9", diff --git a/src/uu/du/Cargo.toml b/src/uu/du/Cargo.toml index 3ce9d8361..023c0a021 100644 --- a/src/uu/du/Cargo.toml +++ b/src/uu/du/Cargo.toml @@ -15,6 +15,7 @@ edition = "2018" path = "src/du.rs" [dependencies] +clap = "2.33" chrono = "0.4" uucore = { version=">=0.0.8", package="uucore", path="../../uucore" } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 89dd3f739..6bd4f23e4 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -12,6 +12,7 @@ extern crate uucore; use chrono::prelude::DateTime; use chrono::Local; +use clap::{App, Arg}; use std::collections::HashSet; use std::env; use std::fs; @@ -37,6 +38,27 @@ use winapi::um::winbase::GetFileInformationByHandleEx; #[cfg(windows)] use winapi::um::winnt::{FILE_ID_128, ULONGLONG}; +mod options { + pub const NULL: &str = "0"; + pub const ALL: &str = "all"; + pub const APPARENT_SIZE: &str = "apparent-size"; + pub const BLOCK_SIZE: &str = "B"; + pub const BYTES: &str = "b"; + pub const TOTAL: &str = "c"; + pub const MAX_DEPTH: &str = "d"; + pub const HUMAN_READABLE: &str = "h"; + pub const BLOCK_SIZE_1K: &str = "k"; + pub const COUNT_LINKS: &str = "l"; + pub const BLOCK_SIZE_1M: &str = "m"; + pub const SEPARATE_DIRS: &str = "S"; + pub const SUMMARIZE: &str = "s"; + pub const SI: &str = "si"; + pub const TIME: &str = "time"; + pub const TIME_STYLE: &str = "time-style"; + pub const FILE: &str = "FILE"; +} + +const VERSION: &str = env!("CARGO_PKG_VERSION"); const NAME: &str = "du"; const SUMMARY: &str = "estimate file space usage"; const LONG_HELP: &str = " @@ -220,14 +242,14 @@ fn unit_string_to_number(s: &str) -> Option { Some(number * multiple.pow(unit)) } -fn translate_to_pure_number(s: &Option) -> Option { +fn translate_to_pure_number(s: &Option<&str>) -> Option { match *s { Some(ref s) => unit_string_to_number(s), None => None, } } -fn read_block_size(s: Option) -> u64 { +fn read_block_size(s: Option<&str>) -> u64 { match translate_to_pure_number(&s) { Some(v) => v, None => { @@ -236,7 +258,8 @@ fn read_block_size(s: Option) -> u64 { }; for env_var in &["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] { - if let Some(quantity) = translate_to_pure_number(&env::var(env_var).ok()) { + let env_size = env::var(env_var).ok(); + if let Some(quantity) = translate_to_pure_number(&env_size.as_deref()) { return quantity; } } @@ -361,126 +384,189 @@ fn convert_size_other(size: u64, _multiplier: u64, block_size: u64) -> String { format!("{}", ((size as f64) / (block_size as f64)).ceil()) } +fn get_usage() -> String { + format!( + "{0} [OPTION]... [FILE]... + {0} [OPTION]... --files0-from=F", + executable!() + ) +} + #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> i32 { let args = args .collect_str(InvalidEncodingHandling::Ignore) .accept_any(); - let syntax = format!( - "[OPTION]... [FILE]... - {0} [OPTION]... --files0-from=F", - NAME - ); - let matches = app!(&syntax, SUMMARY, LONG_HELP) - // In task - .optflag( - "a", - "all", - " write counts for all files, not just directories", - ) - // In main - .optflag( - "", - "apparent-size", - "print apparent sizes, rather than disk usage - although the apparent size is usually smaller, it may be larger due to holes - in ('sparse') files, internal fragmentation, indirect blocks, and the like", - ) - // In main - .optopt( - "B", - "block-size", - "scale sizes by SIZE before printing them. - E.g., '-BM' prints sizes in units of 1,048,576 bytes. See SIZE format below.", - "SIZE", - ) - // In main - .optflag( - "b", - "bytes", - "equivalent to '--apparent-size --block-size=1'", - ) - // In main - .optflag("c", "total", "produce a grand total") - // In task - // opts.optflag("D", "dereference-args", "dereference only symlinks that are listed - // on the command line"), - // In main - // opts.optopt("", "files0-from", "summarize disk usage of the NUL-terminated file - // names specified in file F; - // If F is - then read names from standard input", "F"), - // // In task - // opts.optflag("H", "", "equivalent to --dereference-args (-D)"), - // In main - .optflag( - "h", - "human-readable", - "print sizes in human readable format (e.g., 1K 234M 2G)", - ) - // In main - .optflag("", "si", "like -h, but use powers of 1000 not 1024") - // In main - .optflag("k", "", "like --block-size=1K") - // In task - .optflag("l", "count-links", "count sizes many times if hard linked") - // // In main - .optflag("m", "", "like --block-size=1M") - // // In task - // opts.optflag("L", "dereference", "dereference all symbolic links"), - // // In task - // opts.optflag("P", "no-dereference", "don't follow any symbolic links (this is the default)"), - // // In main - .optflag( - "0", - "null", - "end each output line with 0 byte rather than newline", - ) - // In main - .optflag( - "S", - "separate-dirs", - "do not include size of subdirectories", - ) - // In main - .optflag("s", "summarize", "display only a total for each argument") - // // In task - // opts.optflag("x", "one-file-system", "skip directories on different file systems"), - // // In task - // opts.optopt("X", "exclude-from", "exclude files that match any pattern in FILE", "FILE"), - // // In task - // opts.optopt("", "exclude", "exclude files that match PATTERN", "PATTERN"), - // In main - .optopt( - "d", - "max-depth", - "print the total for a directory (or file, with --all) - only if it is N or fewer levels below the command - line argument; --max-depth=0 is the same as --summarize", - "N", - ) - // In main - .optflagopt( - "", - "time", - "show time of the last modification of any file in the - directory, or any of its subdirectories. If WORD is given, show time as WORD instead - of modification time: atime, access, use, ctime or status", - "WORD", - ) - // In main - .optopt( - "", - "time-style", - "show times using style STYLE: - full-iso, long-iso, iso, +FORMAT FORMAT is interpreted like 'date'", - "STYLE", - ) - .parse(args); + let usage = get_usage(); - let summarize = matches.opt_present("summarize"); + let matches = App::new(executable!()) + .version(VERSION) + .about(SUMMARY) + .usage(&usage[..]) + .after_help(LONG_HELP) + .arg( + Arg::with_name(options::ALL) + .short("a") + .long(options::ALL) + .help("write counts for all files, not just directories"), + ) + .arg( + Arg::with_name(options::APPARENT_SIZE) + .long(options::APPARENT_SIZE) + .help( + "print apparent sizes, rather than disk usage \ + although the apparent size is usually smaller, it may be larger due to holes \ + in ('sparse') files, internal fragmentation, indirect blocks, and the like" + ) + ) + .arg( + Arg::with_name(options::BLOCK_SIZE) + .short("B") + .long("block-size") + .value_name("SIZE") + .help( + "scale sizes by SIZE before printing them. \ + E.g., '-BM' prints sizes in units of 1,048,576 bytes. See SIZE format below." + ) + ) + .arg( + Arg::with_name(options::BYTES) + .short("b") + .long("bytes") + .help("equivalent to '--apparent-size --block-size=1'") + ) + .arg( + Arg::with_name(options::TOTAL) + .long("total") + .short("c") + .help("produce a grand total") + ) + .arg( + Arg::with_name(options::MAX_DEPTH) + .short("d") + .long("max-depth") + .value_name("N") + .help( + "print the total for a directory (or file, with --all) \ + only if it is N or fewer levels below the command \ + line argument; --max-depth=0 is the same as --summarize" + ) + ) + .arg( + Arg::with_name(options::HUMAN_READABLE) + .long("human-readable") + .short("h") + .help("print sizes in human readable format (e.g., 1K 234M 2G)") + ) + .arg( + Arg::with_name("inodes") + .long("inodes") + .help( + "list inode usage information instead of block usage like --block-size=1K" + ) + ) + .arg( + Arg::with_name(options::BLOCK_SIZE_1K) + .short("k") + .help("like --block-size=1K") + ) + .arg( + Arg::with_name(options::COUNT_LINKS) + .short("l") + .long("count-links") + .help("count sizes many times if hard linked") + ) + // .arg( + // Arg::with_name("dereference") + // .short("L") + // .long("dereference") + // .help("dereference all symbolic links") + // ) + // .arg( + // Arg::with_name("no-dereference") + // .short("P") + // .long("no-dereference") + // .help("don't follow any symbolic links (this is the default)") + // ) + .arg( + Arg::with_name(options::BLOCK_SIZE_1M) + .short("m") + .help("like --block-size=1M") + ) + .arg( + Arg::with_name(options::NULL) + .short("0") + .long("null") + .help("end each output line with 0 byte rather than newline") + ) + .arg( + Arg::with_name(options::SEPARATE_DIRS) + .short("S") + .long("separate-dirs") + .help("do not include size of subdirectories") + ) + .arg( + Arg::with_name(options::SUMMARIZE) + .short("s") + .long("summarize") + .help("display only a total for each argument") + ) + .arg( + Arg::with_name(options::SI) + .long(options::SI) + .help("like -h, but use powers of 1000 not 1024") + ) + // .arg( + // Arg::with_name("one-file-system") + // .short("x") + // .long("one-file-system") + // .help("skip directories on different file systems") + // ) + // .arg( + // Arg::with_name("") + // .short("x") + // .long("exclude-from") + // .value_name("FILE") + // .help("exclude files that match any pattern in FILE") + // ) + // .arg( + // Arg::with_name("exclude") + // .long("exclude") + // .value_name("PATTERN") + // .help("exclude files that match PATTERN") + // ) + .arg( + Arg::with_name(options::TIME) + .long(options::TIME) + .value_name("WORD") + .require_equals(true) + .min_values(0) + .help( + "show time of the last modification of any file in the \ + directory, or any of its subdirectories. If WORD is given, show time as WORD instead \ + of modification time: atime, access, use, ctime or status" + ) + ) + .arg( + Arg::with_name(options::TIME_STYLE) + .long(options::TIME_STYLE) + .value_name("STYLE") + .help( + "show times using style STYLE: \ + full-iso, long-iso, iso, +FORMAT FORMAT is interpreted like 'date'" + ) + ) + .arg( + Arg::with_name(options::FILE) + .hidden(true) + .multiple(true) + ) + .get_matches_from(args); - let max_depth_str = matches.opt_str("max-depth"); + let summarize = matches.is_present(options::SUMMARIZE); + + let max_depth_str = matches.value_of(options::MAX_DEPTH); let max_depth = max_depth_str.as_ref().and_then(|s| s.parse::().ok()); match (max_depth_str, max_depth) { (Some(ref s), _) if summarize => { @@ -495,34 +581,35 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } let options = Options { - all: matches.opt_present("all"), + all: matches.is_present(options::ALL), program_name: NAME.to_owned(), max_depth, - total: matches.opt_present("total"), - separate_dirs: matches.opt_present("S"), + total: matches.is_present(options::TOTAL), + separate_dirs: matches.is_present(options::SEPARATE_DIRS), }; - let strs = if matches.free.is_empty() { - vec!["./".to_owned()] // TODO: gnu `du` doesn't use trailing "/" here - } else { - matches.free.clone() + let strs = match matches.value_of(options::FILE) { + Some(_) => matches.values_of(options::FILE).unwrap().collect(), + None => { + vec!["./"] // TODO: gnu `du` doesn't use trailing "/" here + } }; - let block_size = read_block_size(matches.opt_str("block-size")); + let block_size = read_block_size(matches.value_of(options::BLOCK_SIZE)); - let multiplier: u64 = if matches.opt_present("si") { + let multiplier: u64 = if matches.is_present(options::SI) { 1000 } else { 1024 }; let convert_size_fn = { - if matches.opt_present("human-readable") || matches.opt_present("si") { + if matches.is_present(options::HUMAN_READABLE) || matches.is_present(options::SI) { convert_size_human - } else if matches.opt_present("b") { + } else if matches.is_present(options::BYTES) { convert_size_b - } else if matches.opt_present("k") { + } else if matches.is_present(options::BLOCK_SIZE_1K) { convert_size_k - } else if matches.opt_present("m") { + } else if matches.is_present(options::BLOCK_SIZE_1M) { convert_size_m } else { convert_size_other @@ -530,8 +617,8 @@ pub fn uumain(args: impl uucore::Args) -> i32 { }; let convert_size = |size| convert_size_fn(size, multiplier, block_size); - let time_format_str = match matches.opt_str("time-style") { - Some(s) => match &s[..] { + let time_format_str = match matches.value_of("time-style") { + Some(s) => match s { "full-iso" => "%Y-%m-%d %H:%M:%S.%f %z", "long-iso" => "%Y-%m-%d %H:%M", "iso" => "%Y-%m-%d", @@ -552,7 +639,11 @@ Try '{} --help' for more information.", None => "%Y-%m-%d %H:%M", }; - let line_separator = if matches.opt_present("0") { "\0" } else { "\n" }; + let line_separator = if matches.is_present(options::NULL) { + "\0" + } else { + "\n" + }; let mut grand_total = 0; for path_str in strs { @@ -565,18 +656,20 @@ Try '{} --help' for more information.", let (_, len) = iter.size_hint(); let len = len.unwrap(); for (index, stat) in iter.enumerate() { - let size = if matches.opt_present("apparent-size") || matches.opt_present("b") { + let size = if matches.is_present(options::APPARENT_SIZE) + || matches.is_present(options::BYTES) + { stat.size } else { // C's stat is such that each block is assume to be 512 bytes // See: http://linux.die.net/man/2/stat stat.blocks * 512 }; - if matches.opt_present("time") { + if matches.is_present(options::TIME) { let tm = { let secs = { - match matches.opt_str("time") { - Some(s) => match &s[..] { + match matches.value_of(options::TIME) { + Some(s) => match s { "accessed" => stat.accessed, "created" => stat.created, "modified" => stat.modified, @@ -649,8 +742,8 @@ mod test_du { (Some("900KB".to_string()), Some(900 * 1000)), (Some("BAD_STRING".to_string()), None), ]; - for it in test_data.into_iter() { - assert_eq!(translate_to_pure_number(&it.0), it.1); + for it in test_data.iter() { + assert_eq!(translate_to_pure_number(&it.0.as_deref()), it.1); } } @@ -661,8 +754,8 @@ mod test_du { (None, 1024), (Some("BAD_STRING".to_string()), 1024), ]; - for it in test_data.into_iter() { - assert_eq!(read_block_size(it.0.clone()), it.1); + for it in test_data.iter() { + assert_eq!(read_block_size(it.0.as_deref()), it.1); } } } From afb1b9efb4ff31fb934d459555f22cb82d94b483 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Wed, 26 May 2021 12:53:11 +0200 Subject: [PATCH 138/148] tests/util: add AtPath::hard_link --- tests/common/util.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/common/util.rs b/tests/common/util.rs index 611baadd4..6f9f779ef 100644 --- a/tests/common/util.rs +++ b/tests/common/util.rs @@ -7,7 +7,7 @@ use std::env; #[cfg(not(windows))] use std::ffi::CString; use std::ffi::OsStr; -use std::fs::{self, File, OpenOptions}; +use std::fs::{self, hard_link, File, OpenOptions}; use std::io::{Read, Result, Write}; #[cfg(unix)] use std::os::unix::fs::{symlink as symlink_dir, symlink as symlink_file}; @@ -524,6 +524,14 @@ impl AtPath { } } + pub fn hard_link(&self, src: &str, dst: &str) { + log_info( + "hard_link", + &format!("{},{}", self.plus_as_string(src), self.plus_as_string(dst)), + ); + hard_link(&self.plus(src), &self.plus(dst)).unwrap(); + } + pub fn symlink_file(&self, src: &str, dst: &str) { log_info( "symlink", @@ -680,6 +688,10 @@ impl TestScenario { cmd } + /// Returns builder for invoking any system command. Paths given are treated + /// relative to the environment's unique temporary test directory. + /// Differs from the builder returned by `cmd` in that `cmd_keepenv` does not call + /// `Command::env_clear` (Clears the entire environment map for the child process.) pub fn cmd_keepenv>(&self, bin: S) -> UCommand { UCommand::new_from_tmp(bin, self.tmpd.clone(), false) } From 6a70d89e8ca43c979a3b8053ecc43122d8d23bf5 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Wed, 26 May 2021 12:55:53 +0200 Subject: [PATCH 139/148] tests/du: replace call to 'ln' with call to 'AtPath::hard_link' --- tests/by-util/test_du.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index c72bd02a6..04dbf9f37 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -129,11 +129,9 @@ fn _du_soft_link(s: &str) { #[test] fn test_du_hard_link() { let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; - let result_ln = scene.cmd("ln").arg(SUB_FILE).arg(SUB_LINK).run(); - if !result_ln.succeeded() { - scene.ccmd("ln").arg(SUB_FILE).arg(SUB_LINK).succeeds(); - } + at.hard_link(SUB_FILE, SUB_LINK); let result = scene.ucmd().arg(SUB_DIR_LINKS).succeeds(); From efd5921bdaade152a9a1d07d813f685980d13900 Mon Sep 17 00:00:00 2001 From: Jan Scheer Date: Wed, 26 May 2021 13:07:04 +0200 Subject: [PATCH 140/148] tests/test: replace call to 'ln -s' with call to 'AtPath::symlink_file' --- tests/by-util/test_test.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/by-util/test_test.rs b/tests/by-util/test_test.rs index 0dfc0c620..3a55f772a 100644 --- a/tests/by-util/test_test.rs +++ b/tests/by-util/test_test.rs @@ -437,10 +437,9 @@ fn test_not_is_not_empty() { #[cfg(not(windows))] fn test_symlink_is_symlink() { let scenario = TestScenario::new(util_name!()); - let mut ln = scenario.cmd("ln"); + let at = &scenario.fixtures; - // creating symlinks requires admin on Windows - ln.args(&["-s", "regular_file", "symlink"]).succeeds(); + at.symlink_file("regular_file", "symlink"); // FIXME: implement on Windows scenario.ucmd().args(&["-h", "symlink"]).succeeds(); From fe25b51a6658dffb2288c1b409b1c5ca50322d71 Mon Sep 17 00:00:00 2001 From: Dean Li Date: Wed, 26 May 2021 21:34:02 +0800 Subject: [PATCH 141/148] chmod: match GNU error Related to #2260 Signed-off-by: Dean Li --- src/uu/chmod/src/chmod.rs | 4 +++- tests/by-util/test_chmod.rs | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/uu/chmod/src/chmod.rs b/src/uu/chmod/src/chmod.rs index 9dea3c842..c4bf309d6 100644 --- a/src/uu/chmod/src/chmod.rs +++ b/src/uu/chmod/src/chmod.rs @@ -262,8 +262,10 @@ impl Chmoder { ); } return Ok(()); + } else if err.kind() == std::io::ErrorKind::PermissionDenied { + show_error!("'{}': Permission denied", file.display()); } else { - show_error!("{}: '{}'", err, file.display()); + show_error!("'{}': {}", file.display(), err); } return Err(1); } diff --git a/tests/by-util/test_chmod.rs b/tests/by-util/test_chmod.rs index 733722b7c..f20429a6e 100644 --- a/tests/by-util/test_chmod.rs +++ b/tests/by-util/test_chmod.rs @@ -282,6 +282,26 @@ fn test_chmod_reference_file() { run_single_test(&tests[0], at, ucmd); } +#[test] +fn test_permission_denied() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + at.mkdir("d/"); + at.mkdir("d/no-x"); + at.mkdir("d/no-x/y"); + + scene.ucmd().arg("u=rw").arg("d/no-x").succeeds(); + + scene + .ucmd() + .arg("-R") + .arg("o=r") + .arg("d") + .fails() + .stderr_is("chmod: 'd/no-x/y': Permission denied"); +} + #[test] fn test_chmod_recursive() { let _guard = UMASK_MUTEX.lock(); From 25ed5eeb0e470b7d1695f9caebee8a192efecc79 Mon Sep 17 00:00:00 2001 From: Matt Blessed Date: Wed, 26 May 2021 10:50:41 -0400 Subject: [PATCH 142/148] cp: move option check to uumain and use `show_usage_error` - add test for conflicting options `--backup` and `--no-clobber` --- src/uu/cp/src/cp.rs | 17 ++++++----------- tests/by-util/test_cp.rs | 12 ++++++++++++ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index 7eaa21c11..7e64a288c 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -463,6 +463,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .get_matches_from(args); let options = crash_if_err!(EXIT_ERR, Options::from_matches(&matches)); + + if options.overwrite == OverwriteMode::NoClobber && options.backup != BackupMode::NoBackup { + show_usage_error!("options --backup and --no-clobber are mutually exclusive"); + return 1; + } + let paths: Vec = matches .values_of(OPT_PATHS) .map(|v| v.map(ToString::to_string).collect()) @@ -593,17 +599,6 @@ impl Options { let overwrite = OverwriteMode::from_matches(matches); - if overwrite == OverwriteMode::NoClobber && backup_mode != BackupMode::NoBackup { - show_error!( - "options --backup and --no-clobber are mutually exclusive\n\ - Try '{} --help' for more information.", - executable!() - ); - return Err(Error::Error( - "options --backup and --no-clobber are mutually exclusive".to_owned(), - )); - } - // Parse target directory options let no_target_dir = matches.is_present(OPT_NO_TARGET_DIRECTORY); let target_dir = matches diff --git a/tests/by-util/test_cp.rs b/tests/by-util/test_cp.rs index dddba595c..d41d3f6ed 100644 --- a/tests/by-util/test_cp.rs +++ b/tests/by-util/test_cp.rs @@ -539,6 +539,18 @@ fn test_cp_backup_off() { assert!(!at.file_exists(&format!("{}~", TEST_HOW_ARE_YOU_SOURCE))); } +#[test] +fn test_cp_backup_no_clobber_conflicting_options() { + let (_, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup") + .arg("--no-clobber") + .arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .fails() + .stderr_is("cp: options --backup and --no-clobber are mutually exclusive\nTry 'cp --help' for more information."); +} + #[test] fn test_cp_deref_conflicting_options() { new_ucmd!() From f11f5f3abba82be1e4697f4c7a8a0c804b3cfb07 Mon Sep 17 00:00:00 2001 From: Matt Blessed Date: Tue, 25 May 2021 20:34:49 -0400 Subject: [PATCH 143/148] mv: refactor backup logic to use shared uucore backup control - add mv backup tests --- src/uu/mv/src/mv.rs | 95 ++++------------------- tests/by-util/test_mv.rs | 161 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 164 insertions(+), 92 deletions(-) diff --git a/src/uu/mv/src/mv.rs b/src/uu/mv/src/mv.rs index a0ff1bcc6..c61c7caf1 100644 --- a/src/uu/mv/src/mv.rs +++ b/src/uu/mv/src/mv.rs @@ -20,6 +20,7 @@ use std::os::unix; #[cfg(windows)] use std::os::windows; use std::path::{Path, PathBuf}; +use uucore::backup_control::{self, BackupMode}; use fs_extra::dir::{move_dir, CopyOptions as DirCopyOptions}; @@ -40,16 +41,9 @@ pub enum OverwriteMode { Force, } -#[derive(Clone, Copy, Eq, PartialEq)] -pub enum BackupMode { - NoBackup, - SimpleBackup, - NumberedBackup, - ExistingBackup, -} - static ABOUT: &str = "Move SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY."; static VERSION: &str = env!("CARGO_PKG_VERSION"); +static LONG_HELP: &str = ""; static OPT_BACKUP: &str = "backup"; static OPT_BACKUP_NO_ARG: &str = "b"; @@ -80,20 +74,16 @@ pub fn uumain(args: impl uucore::Args) -> i32 { let matches = App::new(executable!()) .version(VERSION) .about(ABOUT) + .after_help(&*format!("{}\n{}", LONG_HELP, backup_control::BACKUP_CONTROL_LONG_HELP)) .usage(&usage[..]) .arg( Arg::with_name(OPT_BACKUP) .long(OPT_BACKUP) .help("make a backup of each existing destination file") .takes_value(true) - .possible_value("simple") - .possible_value("never") - .possible_value("numbered") - .possible_value("t") - .possible_value("existing") - .possible_value("nil") - .possible_value("none") - .possible_value("off") + .require_equals(true) + .min_values(0) + .possible_values(backup_control::BACKUP_CONTROL_VALUES) .value_name("CONTROL") ) .arg( @@ -172,18 +162,17 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .unwrap_or_default(); let overwrite_mode = determine_overwrite_mode(&matches); - let backup_mode = determine_backup_mode(&matches); + let backup_mode = backup_control::determine_backup_mode( + matches.is_present(OPT_BACKUP_NO_ARG) || matches.is_present(OPT_BACKUP), + matches.value_of(OPT_BACKUP), + ); if overwrite_mode == OverwriteMode::NoClobber && backup_mode != BackupMode::NoBackup { - show_error!( - "options --backup and --no-clobber are mutually exclusive\n\ - Try '{} --help' for more information.", - executable!() - ); + show_usage_error!("options --backup and --no-clobber are mutually exclusive"); return 1; } - let backup_suffix = determine_backup_suffix(backup_mode, &matches); + let backup_suffix = backup_control::determine_backup_suffix(matches.value_of(OPT_SUFFIX)); let behavior = Behavior { overwrite: overwrite_mode, @@ -227,37 +216,6 @@ fn determine_overwrite_mode(matches: &ArgMatches) -> OverwriteMode { } } -fn determine_backup_mode(matches: &ArgMatches) -> BackupMode { - if matches.is_present(OPT_BACKUP_NO_ARG) { - BackupMode::SimpleBackup - } else if matches.is_present(OPT_BACKUP) { - match matches.value_of(OPT_BACKUP).map(String::from) { - None => BackupMode::SimpleBackup, - Some(mode) => match &mode[..] { - "simple" | "never" => BackupMode::SimpleBackup, - "numbered" | "t" => BackupMode::NumberedBackup, - "existing" | "nil" => BackupMode::ExistingBackup, - "none" | "off" => BackupMode::NoBackup, - _ => panic!(), // cannot happen as it is managed by clap - }, - } - } else { - BackupMode::NoBackup - } -} - -fn determine_backup_suffix(backup_mode: BackupMode, matches: &ArgMatches) -> String { - if matches.is_present(OPT_SUFFIX) { - matches.value_of(OPT_SUFFIX).map(String::from).unwrap() - } else if let (Ok(s), BackupMode::SimpleBackup) = - (env::var("SIMPLE_BACKUP_SUFFIX"), backup_mode) - { - s - } else { - "~".to_owned() - } -} - fn exec(files: &[PathBuf], b: Behavior) -> i32 { if let Some(ref name) = b.target_dir { return move_files_into_dir(files, &PathBuf::from(name), &b); @@ -389,12 +347,7 @@ fn rename(from: &Path, to: &Path, b: &Behavior) -> io::Result<()> { OverwriteMode::Force => {} }; - backup_path = match b.backup { - BackupMode::NoBackup => None, - BackupMode::SimpleBackup => Some(simple_backup_path(to, &b.suffix)), - BackupMode::NumberedBackup => Some(numbered_backup_path(to)), - BackupMode::ExistingBackup => Some(existing_backup_path(to, &b.suffix)), - }; + backup_path = backup_control::get_backup_path(b.backup, to, &b.suffix); if let Some(ref backup_path) = backup_path { rename_with_fallback(to, backup_path)?; } @@ -514,28 +467,6 @@ fn read_yes() -> bool { } } -fn simple_backup_path(path: &Path, suffix: &str) -> PathBuf { - let mut p = path.to_string_lossy().into_owned(); - p.push_str(suffix); - PathBuf::from(p) -} - -fn numbered_backup_path(path: &Path) -> PathBuf { - (1_u64..) - .map(|i| path.with_extension(format!("~{}~", i))) - .find(|p| !p.exists()) - .expect("cannot create backup") -} - -fn existing_backup_path(path: &Path, suffix: &str) -> PathBuf { - let test_path = path.with_extension("~1~"); - if test_path.exists() { - numbered_backup_path(path) - } else { - simple_backup_path(path, suffix) - } -} - fn is_empty_dir(path: &Path) -> bool { match fs::read_dir(path) { Ok(contents) => contents.peekable().peek().is_none(), diff --git a/tests/by-util/test_mv.rs b/tests/by-util/test_mv.rs index e0723a479..e0bdd9ef3 100644 --- a/tests/by-util/test_mv.rs +++ b/tests/by-util/test_mv.rs @@ -251,6 +251,40 @@ fn test_mv_simple_backup() { assert!(at.file_exists(&format!("{}~", file_b))); } +#[test] +fn test_mv_simple_backup_with_file_extension() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_simple_backup_file_a.txt"; + let file_b = "test_mv_simple_backup_file_b.txt"; + + at.touch(file_a); + at.touch(file_b); + ucmd.arg("-b") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(!at.file_exists(file_a)); + assert!(at.file_exists(file_b)); + assert!(at.file_exists(&format!("{}~", file_b))); +} + +#[test] +fn test_mv_arg_backup_arg_first() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_simple_backup_file_a"; + let file_b = "test_mv_simple_backup_file_b"; + + at.touch(file_a); + at.touch(file_b); + ucmd.arg("--backup").arg(file_a).arg(file_b).succeeds(); + + assert!(!at.file_exists(file_a)); + assert!(at.file_exists(file_b)); + assert!(at.file_exists(&format!("{}~", file_b))); +} + #[test] fn test_mv_custom_backup_suffix() { let (at, mut ucmd) = at_and_ucmd!(); @@ -293,7 +327,7 @@ fn test_mv_custom_backup_suffix_via_env() { } #[test] -fn test_mv_backup_numbering() { +fn test_mv_backup_numbered_with_t() { let (at, mut ucmd) = at_and_ucmd!(); let file_a = "test_mv_backup_numbering_file_a"; let file_b = "test_mv_backup_numbering_file_b"; @@ -311,6 +345,25 @@ fn test_mv_backup_numbering() { assert!(at.file_exists(&format!("{}.~1~", file_b))); } +#[test] +fn test_mv_backup_numbered() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; + + at.touch(file_a); + at.touch(file_b); + ucmd.arg("--backup=numbered") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(!at.file_exists(file_a)); + assert!(at.file_exists(file_b)); + assert!(at.file_exists(&format!("{}.~1~", file_b))); +} + #[test] fn test_mv_backup_existing() { let (at, mut ucmd) = at_and_ucmd!(); @@ -330,6 +383,67 @@ fn test_mv_backup_existing() { assert!(at.file_exists(&format!("{}~", file_b))); } +#[test] +fn test_mv_backup_nil() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; + + at.touch(file_a); + at.touch(file_b); + ucmd.arg("--backup=nil") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(!at.file_exists(file_a)); + assert!(at.file_exists(file_b)); + assert!(at.file_exists(&format!("{}~", file_b))); +} + +#[test] +fn test_mv_numbered_if_existing_backup_existing() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; + let file_b_backup = "test_mv_backup_numbering_file_b.~1~"; + + at.touch(file_a); + at.touch(file_b); + at.touch(file_b_backup); + ucmd.arg("--backup=existing") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(at.file_exists(file_b)); + assert!(at.file_exists(file_b_backup)); + assert!(at.file_exists(&*format!("{}.~2~", file_b))); +} + +#[test] +fn test_mv_numbered_if_existing_backup_nil() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; + let file_b_backup = "test_mv_backup_numbering_file_b.~1~"; + + at.touch(file_a); + at.touch(file_b); + at.touch(file_b_backup); + ucmd.arg("--backup=nil") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(at.file_exists(file_b)); + assert!(at.file_exists(file_b_backup)); + assert!(at.file_exists(&*format!("{}.~2~", file_b))); +} + #[test] fn test_mv_backup_simple() { let (at, mut ucmd) = at_and_ucmd!(); @@ -349,6 +463,25 @@ fn test_mv_backup_simple() { assert!(at.file_exists(&format!("{}~", file_b))); } +#[test] +fn test_mv_backup_never() { + let (at, mut ucmd) = at_and_ucmd!(); + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; + + at.touch(file_a); + at.touch(file_b); + ucmd.arg("--backup=never") + .arg(file_a) + .arg(file_b) + .succeeds() + .no_stderr(); + + assert!(!at.file_exists(file_a)); + assert!(at.file_exists(file_b)); + assert!(at.file_exists(&format!("{}~", file_b))); +} + #[test] fn test_mv_backup_none() { let (at, mut ucmd) = at_and_ucmd!(); @@ -369,17 +502,14 @@ fn test_mv_backup_none() { } #[test] -fn test_mv_existing_backup() { +fn test_mv_backup_off() { let (at, mut ucmd) = at_and_ucmd!(); - let file_a = "test_mv_existing_backup_file_a"; - let file_b = "test_mv_existing_backup_file_b"; - let file_b_backup = "test_mv_existing_backup_file_b.~1~"; - let resulting_backup = "test_mv_existing_backup_file_b.~2~"; + let file_a = "test_mv_backup_numbering_file_a"; + let file_b = "test_mv_backup_numbering_file_b"; at.touch(file_a); at.touch(file_b); - at.touch(file_b_backup); - ucmd.arg("--backup=nil") + ucmd.arg("--backup=off") .arg(file_a) .arg(file_b) .succeeds() @@ -387,8 +517,19 @@ fn test_mv_existing_backup() { assert!(!at.file_exists(file_a)); assert!(at.file_exists(file_b)); - assert!(at.file_exists(file_b_backup)); - assert!(at.file_exists(resulting_backup)); + assert!(!at.file_exists(&format!("{}~", file_b))); +} + +#[test] +fn test_mv_backup_no_clobber_conflicting_options() { + let (_, mut ucmd) = at_and_ucmd!(); + + ucmd.arg("--backup") + .arg("--no-clobber") + .arg("file1") + .arg("file2") + .fails() + .stderr_is("mv: options --backup and --no-clobber are mutually exclusive\nTry 'mv --help' for more information."); } #[test] From 41bea72f23da42f0b5dded9b1955a3782cd1c063 Mon Sep 17 00:00:00 2001 From: Matt Blessed Date: Wed, 26 May 2021 18:28:17 -0400 Subject: [PATCH 144/148] cp: fix regressed issue with `--backup` and `-b` - add test for regressed issue --- src/uu/cp/src/cp.rs | 8 ++++++-- tests/by-util/test_cp.rs | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index 7e64a288c..fab1dfec1 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -232,6 +232,7 @@ fn get_usage() -> String { static OPT_ARCHIVE: &str = "archive"; static OPT_ATTRIBUTES_ONLY: &str = "attributes-only"; static OPT_BACKUP: &str = "backup"; +static OPT_BACKUP_NO_ARG: &str = "b"; static OPT_CLI_SYMBOLIC_LINKS: &str = "cli-symbolic-links"; static OPT_CONTEXT: &str = "context"; static OPT_COPY_CONTENTS: &str = "copy-contents"; @@ -357,7 +358,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .help("remove each existing destination file before attempting to open it \ (contrast with --force). On Windows, current only works for writeable files.")) .arg(Arg::with_name(OPT_BACKUP) - .short("b") .long(OPT_BACKUP) .help("make a backup of each existing destination file") .takes_value(true) @@ -366,6 +366,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .possible_values(backup_control::BACKUP_CONTROL_VALUES) .value_name("CONTROL") ) + .arg(Arg::with_name(OPT_BACKUP_NO_ARG) + .short(OPT_BACKUP_NO_ARG) + .help("like --backup but does not accept an argument") + ) .arg(Arg::with_name(OPT_SUFFIX) .short("S") .long(OPT_SUFFIX) @@ -592,7 +596,7 @@ impl Options { || matches.is_present(OPT_ARCHIVE); let backup_mode = backup_control::determine_backup_mode( - matches.is_present(OPT_BACKUP), + matches.is_present(OPT_BACKUP_NO_ARG) || matches.is_present(OPT_BACKUP), matches.value_of(OPT_BACKUP), ); let backup_suffix = backup_control::determine_backup_suffix(matches.value_of(OPT_SUFFIX)); diff --git a/tests/by-util/test_cp.rs b/tests/by-util/test_cp.rs index d41d3f6ed..d49219b04 100644 --- a/tests/by-util/test_cp.rs +++ b/tests/by-util/test_cp.rs @@ -316,6 +316,22 @@ fn test_cp_arg_backup() { ); } +#[test] +fn test_cp_arg_backup_with_other_args() { + let (at, mut ucmd) = at_and_ucmd!(); + + ucmd.arg(TEST_HELLO_WORLD_SOURCE) + .arg(TEST_HOW_ARE_YOU_SOURCE) + .arg("-vbL") + .succeeds(); + + assert_eq!(at.read(TEST_HOW_ARE_YOU_SOURCE), "Hello, World!\n"); + assert_eq!( + at.read(&*format!("{}~", TEST_HOW_ARE_YOU_SOURCE)), + "How are you?\n" + ); +} + #[test] fn test_cp_arg_backup_arg_first() { let (at, mut ucmd) = at_and_ucmd!(); From 052ee22ce019faf7a7021f81430512df9c39b901 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 27 May 2021 18:20:15 +0200 Subject: [PATCH 145/148] Bump MSRV to 1.43.1 --- .github/workflows/CICD.yml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CICD.yml b/.github/workflows/CICD.yml index 977a86915..804720bea 100644 --- a/.github/workflows/CICD.yml +++ b/.github/workflows/CICD.yml @@ -11,7 +11,7 @@ env: PROJECT_NAME: coreutils PROJECT_DESC: "Core universal (cross-platform) utilities" PROJECT_AUTH: "uutils" - RUST_MIN_SRV: "1.40.0" ## v1.40.0 + RUST_MIN_SRV: "1.43.1" ## v1.43.0 RUST_COV_SRV: "2020-08-01" ## (~v1.47.0) supported rust version for code coverage; (date required/used by 'coverage') ## !maint: refactor when code coverage support is included in the stable channel on: [push, pull_request] diff --git a/README.md b/README.md index 6b29fa854..1365bf7ce 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ to compile anywhere, and this is as good a way as any to try and learn it. ### Rust Version uutils follows Rust's release channels and is tested against stable, beta and nightly. -The current oldest supported version of the Rust compiler is `1.40.0`. +The current oldest supported version of the Rust compiler is `1.43.1`. On both Windows and Redox, only the nightly version is tested currently. From 825476f57314565e306287e72e2f5c690a749567 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 27 May 2021 20:25:24 +0200 Subject: [PATCH 146/148] Update tempfile --- Cargo.lock | 79 ++++++++++++++++++++++++++++++++++++++++++++++-------- Cargo.toml | 4 +-- 2 files changed, 69 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e1470c88..2060edfa2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -707,7 +707,18 @@ checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ "cfg-if 1.0.0", "libc", - "wasi", + "wasi 0.9.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi 0.10.2+wasi-snapshot-preview1", ] [[package]] @@ -1253,14 +1264,26 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" dependencies = [ - "getrandom", + "getrandom 0.1.16", "libc", - "rand_chacha", + "rand_chacha 0.2.2", "rand_core 0.5.1", - "rand_hc", + "rand_hc 0.2.0", "rand_pcg", ] +[[package]] +name = "rand" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" +dependencies = [ + "libc", + "rand_chacha 0.3.0", + "rand_core 0.6.2", + "rand_hc 0.3.0", +] + [[package]] name = "rand_chacha" version = "0.2.2" @@ -1271,6 +1294,16 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rand_chacha" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.2", +] + [[package]] name = "rand_core" version = "0.3.1" @@ -1292,7 +1325,16 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" dependencies = [ - "getrandom", + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" +dependencies = [ + "getrandom 0.2.3", ] [[package]] @@ -1304,6 +1346,15 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rand_hc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" +dependencies = [ + "rand_core 0.6.2", +] + [[package]] name = "rand_pcg" version = "0.2.1" @@ -1623,14 +1674,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.1.0" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" +checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "libc", - "rand 0.7.3", - "redox_syscall 0.1.57", + "rand 0.8.3", + "redox_syscall 0.2.8", "remove_dir_all", "winapi 0.3.9", ] @@ -2053,7 +2104,7 @@ dependencies = [ "array-init", "criterion", "rand 0.7.3", - "rand_chacha", + "rand_chacha 0.2.2", "uu_factor", ] @@ -2877,6 +2928,12 @@ version = "0.9.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "wasm-bindgen" version = "0.2.74" diff --git a/Cargo.toml b/Cargo.toml index 745393260..fdf45e484 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -342,9 +342,7 @@ pretty_assertions = "0.7.2" rand = "0.7" regex = "1.0" sha1 = { version="0.6", features=["std"] } -## tempfile 3.2 depends on recent version of rand which depends on getrandom v0.2 which has compiler errors for MinRustV v1.32.0 -## min dep for tempfile = Rustc 1.40 -tempfile = "= 3.1.0" +tempfile = "3.2.0" time = "0.1" unindent = "0.1" uucore = { version=">=0.0.8", package="uucore", path="src/uucore", features=["entries"] } From ebe6341ae37dee47ce94861960ddadc012a127bb Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 27 May 2021 22:47:03 +0200 Subject: [PATCH 147/148] chore: replace tempdir with tempfile --- Cargo.lock | 2 +- Cargo.toml | 1 - src/uu/sort/Cargo.toml | 2 +- src/uu/sort/src/ext_sort.rs | 4 ++-- tests/by-util/test_cat.rs | 3 +-- tests/by-util/test_ls.rs | 6 ++---- 6 files changed, 7 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e1470c88..547e9bc6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2536,7 +2536,7 @@ dependencies = [ "rand 0.7.3", "rayon", "semver 0.9.0", - "tempdir", + "tempfile", "unicode-width", "uucore", "uucore_procs", diff --git a/Cargo.toml b/Cargo.toml index 745393260..9f499529b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -349,7 +349,6 @@ time = "0.1" unindent = "0.1" uucore = { version=">=0.0.8", package="uucore", path="src/uucore", features=["entries"] } walkdir = "2.2" -tempdir = "0.3" [target.'cfg(unix)'.dev-dependencies] rust-users = { version="0.10", package="users" } diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index 724744dc4..f06610248 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -25,7 +25,7 @@ ouroboros = "0.9.3" rand = "0.7" rayon = "1.5" semver = "0.9.0" -tempdir = "0.3.7" +tempfile = "3" unicode-width = "0.1.8" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs index a304bf7c0..23a55aad0 100644 --- a/src/uu/sort/src/ext_sort.rs +++ b/src/uu/sort/src/ext_sort.rs @@ -23,7 +23,7 @@ use std::{ use itertools::Itertools; -use tempdir::TempDir; +use tempfile::TempDir; use crate::{ chunks::{self, Chunk}, @@ -34,7 +34,7 @@ const MIN_BUFFER_SIZE: usize = 8_000; /// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result. pub fn ext_sort(files: &mut impl Iterator>, settings: &GlobalSettings) { - let tmp_dir = crash_if_err!(1, TempDir::new_in(&settings.tmp_dir, "uutils_sort")); + let tmp_dir = crash_if_err!(1, tempfile::Builder::new().prefix("uutils_sort").tempdir_in(&settings.tmp_dir)); let (sorted_sender, sorted_receiver) = std::sync::mpsc::sync_channel(1); let (recycled_sender, recycled_receiver) = std::sync::mpsc::sync_channel(1); thread::spawn({ diff --git a/tests/by-util/test_cat.rs b/tests/by-util/test_cat.rs index 4bb673b95..adda905b3 100644 --- a/tests/by-util/test_cat.rs +++ b/tests/by-util/test_cat.rs @@ -406,10 +406,9 @@ fn test_domain_socket() { use std::io::prelude::*; use std::sync::{Arc, Barrier}; use std::thread; - use tempdir::TempDir; use unix_socket::UnixListener; - let dir = TempDir::new("unix_socket").expect("failed to create dir"); + let dir = tempfile::Builder::new().prefix("unix_socket").tempdir().expect("failed to create dir"); let socket_path = dir.path().join("sock"); let listener = UnixListener::bind(&socket_path).expect("failed to create socket"); diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 2ae57ad7f..01c5ab5c4 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -19,9 +19,7 @@ use std::path::PathBuf; #[cfg(not(windows))] use std::sync::Mutex; #[cfg(not(windows))] -extern crate tempdir; -#[cfg(not(windows))] -use self::tempdir::TempDir; +extern crate tempfile; #[cfg(not(windows))] lazy_static! { @@ -1087,7 +1085,7 @@ fn test_ls_indicator_style() { { use self::unix_socket::UnixListener; - let dir = TempDir::new("unix_socket").expect("failed to create dir"); + let dir = tempfile::Builder::new().prefix("unix_socket").tempdir().expect("failed to create dir"); let socket_path = dir.path().join("sock"); let _listener = UnixListener::bind(&socket_path).expect("failed to create socket"); From 835a17d79f59891458d7701daad7c664576448d7 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 27 May 2021 22:48:10 +0200 Subject: [PATCH 148/148] mktemp: use tempfile instead of custom tempdir --- Cargo.lock | 33 ---------------- src/uu/mktemp/src/mktemp.rs | 76 ++++++++++++++++++------------------ src/uu/mktemp/src/tempdir.rs | 51 ------------------------ 3 files changed, 39 insertions(+), 121 deletions(-) delete mode 100644 src/uu/mktemp/src/tempdir.rs diff --git a/Cargo.lock b/Cargo.lock index 547e9bc6e..1d4cdca93 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -254,7 +254,6 @@ dependencies = [ "rand 0.7.3", "regex", "sha1", - "tempdir", "tempfile", "textwrap", "time", @@ -1221,19 +1220,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rand" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" -dependencies = [ - "fuchsia-cprng", - "libc", - "rand_core 0.3.1", - "rdrand", - "winapi 0.3.9", -] - [[package]] name = "rand" version = "0.5.6" @@ -1338,15 +1324,6 @@ dependencies = [ "num_cpus", ] -[[package]] -name = "rdrand" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -dependencies = [ - "rand_core 0.3.1", -] - [[package]] name = "redox_syscall" version = "0.1.57" @@ -1611,16 +1588,6 @@ dependencies = [ "unicode-xid 0.2.2", ] -[[package]] -name = "tempdir" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" -dependencies = [ - "rand 0.4.6", - "remove_dir_all", -] - [[package]] name = "tempfile" version = "3.1.0" diff --git a/src/uu/mktemp/src/mktemp.rs b/src/uu/mktemp/src/mktemp.rs index 112c2fb94..d66dd3d57 100644 --- a/src/uu/mktemp/src/mktemp.rs +++ b/src/uu/mktemp/src/mktemp.rs @@ -15,14 +15,11 @@ use clap::{App, Arg}; use std::env; use std::iter; -use std::mem::forget; use std::path::{is_separator, PathBuf}; use rand::Rng; use tempfile::Builder; -mod tempdir; - static ABOUT: &str = "create a temporary file or directory."; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -214,49 +211,54 @@ pub fn dry_exec(mut tmpdir: PathBuf, prefix: &str, rand: usize, suffix: &str) -> } fn exec( - tmpdir: PathBuf, + dir: PathBuf, prefix: &str, rand: usize, suffix: &str, make_dir: bool, quiet: bool, ) -> i32 { - if make_dir { - match tempdir::new_in(&tmpdir, prefix, rand, suffix) { - Ok(ref f) => { - println!("{}", f); - return 0; - } - Err(e) => { - if !quiet { - show_error!("{}: {}", e, tmpdir.display()); + let res = if make_dir { + let tmpdir = Builder::new() + .prefix(prefix) + .rand_bytes(rand) + .suffix(suffix) + .tempdir_in(&dir); + + // `into_path` consumes the TempDir without removing it + tmpdir.map(|d| d.into_path().to_string_lossy().to_string()) + } else { + let tmpfile = Builder::new() + .prefix(prefix) + .rand_bytes(rand) + .suffix(suffix) + .tempfile_in(&dir); + + match tmpfile { + Ok(f) => { + // `keep` ensures that the file is not deleted + match f.keep() { + Ok((_, p)) => Ok(p.to_string_lossy().to_string()), + Err(e) => { + show_error!("'{}': {}", dir.display(), e); + return 1; + } } - return 1; } - } - } - let tmpfile = Builder::new() - .prefix(prefix) - .rand_bytes(rand) - .suffix(suffix) - .tempfile_in(tmpdir); - let tmpfile = match tmpfile { - Ok(f) => f, - Err(e) => { - if !quiet { - show_error!("failed to create tempfile: {}", e); - } - return 1; + Err(x) => Err(x) } }; - let tmpname = tmpfile.path().to_string_lossy().to_string(); - - println!("{}", tmpname); - - // CAUTION: Not to call `drop` of tmpfile, which removes the tempfile, - // I call a dangerous function `forget`. - forget(tmpfile); - - 0 + match res { + Ok(ref f) => { + println!("{}", f); + 0 + } + Err(e) => { + if !quiet { + show_error!("{}: {}", e, dir.display()); + } + 1 + } + } } diff --git a/src/uu/mktemp/src/tempdir.rs b/src/uu/mktemp/src/tempdir.rs deleted file mode 100644 index 1b6c9d7b3..000000000 --- a/src/uu/mktemp/src/tempdir.rs +++ /dev/null @@ -1,51 +0,0 @@ -// spell-checker:ignore (ToDO) tempdir tmpdir - -// Mainly taken from crate `tempdir` - -use rand::distributions::Alphanumeric; -use rand::{thread_rng, Rng}; - -use std::io::Result as IOResult; -use std::io::{Error, ErrorKind}; -use std::path::Path; - -// How many times should we (re)try finding an unused random name? It should be -// enough that an attacker will run out of luck before we run out of patience. -const NUM_RETRIES: u32 = 1 << 31; - -#[cfg(any(unix, target_os = "redox"))] -fn create_dir>(path: P) -> IOResult<()> { - use std::fs::DirBuilder; - use std::os::unix::fs::DirBuilderExt; - - DirBuilder::new().mode(0o700).create(path) -} - -#[cfg(windows)] -fn create_dir>(path: P) -> IOResult<()> { - ::std::fs::create_dir(path) -} - -pub fn new_in>( - tmpdir: P, - prefix: &str, - rand: usize, - suffix: &str, -) -> IOResult { - let mut rng = thread_rng(); - for _ in 0..NUM_RETRIES { - let rand_chars: String = rng.sample_iter(&Alphanumeric).take(rand).collect(); - let leaf = format!("{}{}{}", prefix, rand_chars, suffix); - let path = tmpdir.as_ref().join(&leaf); - match create_dir(&path) { - Ok(_) => return Ok(path.to_string_lossy().into_owned()), - Err(ref e) if e.kind() == ErrorKind::AlreadyExists => {} - Err(e) => return Err(e), - } - } - - Err(Error::new( - ErrorKind::AlreadyExists, - "too many temporary directories already exist", - )) -}