diff --git a/src/uu/chmod/src/chmod.rs b/src/uu/chmod/src/chmod.rs index d01f0316e..88e3403fe 100644 --- a/src/uu/chmod/src/chmod.rs +++ b/src/uu/chmod/src/chmod.rs @@ -15,6 +15,7 @@ use std::fs; use std::os::unix::fs::{MetadataExt, PermissionsExt}; use std::path::Path; use uucore::fs::display_permissions_unix; +use uucore::libc::mode_t; #[cfg(not(windows))] use uucore::mode; use uucore::InvalidEncodingHandling; @@ -306,7 +307,7 @@ impl Chmoder { "mode of '{}' retained as {:04o} ({})", file.display(), fperm, - display_permissions_unix(fperm), + display_permissions_unix(fperm as mode_t, false), ); } Ok(()) @@ -319,9 +320,9 @@ impl Chmoder { "failed to change mode of file '{}' from {:o} ({}) to {:o} ({})", file.display(), fperm, - display_permissions_unix(fperm), + display_permissions_unix(fperm as mode_t, false), mode, - display_permissions_unix(mode) + display_permissions_unix(mode as mode_t, false) ); } Err(1) @@ -331,9 +332,9 @@ impl Chmoder { "mode of '{}' changed from {:o} ({}) to {:o} ({})", file.display(), fperm, - display_permissions_unix(fperm), + display_permissions_unix(fperm as mode_t, false), mode, - display_permissions_unix(mode) + display_permissions_unix(mode as mode_t, false) ); } Ok(()) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index f24bf513e..36f0ad758 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1480,9 +1480,8 @@ fn display_item_long( let _ = write!( out, - "{}{} {}", - display_file_type(md.file_type()), - display_permissions(&md), + "{} {}", + display_permissions(&md, true), pad_left(display_symlink_count(&md), max_links), ); @@ -1668,16 +1667,6 @@ fn display_size(len: u64, config: &Config) -> String { } } -fn display_file_type(file_type: FileType) -> char { - if file_type.is_dir() { - 'd' - } else if file_type.is_symlink() { - 'l' - } else { - '-' - } -} - #[cfg(unix)] fn file_is_executable(md: &Metadata) -> bool { // Mode always returns u32, but the flags might not be, based on the platform diff --git a/src/uu/sort/src/external_sort/mod.rs b/src/uu/sort/src/external_sort/mod.rs index 662250e1d..af6902367 100644 --- a/src/uu/sort/src/external_sort/mod.rs +++ b/src/uu/sort/src/external_sort/mod.rs @@ -1,91 +1,33 @@ -use std::cmp::Ordering; -use std::collections::VecDeque; -use std::fs::{File, OpenOptions}; -use std::io::SeekFrom; -use std::io::{BufRead, BufReader, BufWriter, Seek, Write}; +use std::fs::OpenOptions; +use std::io::{BufWriter, Write}; use std::path::Path; use tempdir::TempDir; +use crate::{file_to_lines_iter, FileMerger}; + use super::{GlobalSettings, Line}; /// Iterator that provides sorted `T`s -pub struct ExtSortedIterator { - buffers: Vec>, - chunk_offsets: Vec, - max_per_chunk: usize, - chunks: usize, - tmp_dir: TempDir, - settings: GlobalSettings, - failed: bool, +pub struct ExtSortedIterator<'a> { + file_merger: FileMerger<'a>, + // Keep tmp_dir around, it is deleted when dropped. + _tmp_dir: TempDir, } -impl Iterator for ExtSortedIterator { +impl<'a> Iterator for ExtSortedIterator<'a> { type Item = Line; - - /// # Errors - /// - /// This method can fail due to issues reading intermediate sorted chunks - /// from disk fn next(&mut self) -> Option { - if self.failed { - return None; - } - // fill up any empty buffers - let mut empty = true; - for chunk_num in 0..self.chunks { - if self.buffers[chunk_num as usize].is_empty() { - let mut f = crash_if_err!( - 1, - File::open(self.tmp_dir.path().join(chunk_num.to_string())) - ); - crash_if_err!(1, f.seek(SeekFrom::Start(self.chunk_offsets[chunk_num]))); - let bytes_read = fill_buff( - &mut self.buffers[chunk_num as usize], - f, - self.max_per_chunk, - &self.settings, - ); - self.chunk_offsets[chunk_num as usize] += bytes_read as u64; - if !self.buffers[chunk_num as usize].is_empty() { - empty = false; - } - } else { - empty = false; - } - } - if empty { - return None; - } - - // find the next record to write - // check is_empty() before unwrap()ing - let mut idx = 0; - for chunk_num in 0..self.chunks as usize { - if !self.buffers[chunk_num].is_empty() - && (self.buffers[idx].is_empty() - || super::compare_by( - self.buffers[chunk_num].front().unwrap(), - self.buffers[idx].front().unwrap(), - &self.settings, - ) == Ordering::Less) - { - idx = chunk_num; - } - } - - // unwrap due to checks above - let r = self.buffers[idx].pop_front().unwrap(); - Some(r) + self.file_merger.next() } } /// Sort (based on `compare`) the `T`s provided by `unsorted` and return an /// iterator /// -/// # Errors +/// # Panics /// -/// This method can fail due to issues writing intermediate sorted chunks +/// This method can panic due to issues writing intermediate sorted chunks /// to disk. pub fn ext_sort( unsorted: impl Iterator, @@ -93,19 +35,12 @@ pub fn ext_sort( ) -> ExtSortedIterator { let tmp_dir = crash_if_err!(1, TempDir::new_in(&settings.tmp_dir, "uutils_sort")); - let mut iter = ExtSortedIterator { - buffers: Vec::new(), - chunk_offsets: Vec::new(), - max_per_chunk: 0, - chunks: 0, - tmp_dir, - settings: settings.clone(), - failed: false, - }; - let mut total_read = 0; let mut chunk = Vec::new(); + let mut chunks_read = 0; + let mut file_merger = FileMerger::new(settings); + // make the initial chunks on disk for seq in unsorted { let seq_size = seq.estimate_size(); @@ -113,65 +48,35 @@ pub fn ext_sort( chunk.push(seq); - if total_read + chunk.len() * std::mem::size_of::() >= settings.buffer_size { + if total_read >= settings.buffer_size && chunk.len() >= 2 { super::sort_by(&mut chunk, &settings); - write_chunk( - settings, - &iter.tmp_dir.path().join(iter.chunks.to_string()), - &mut chunk, - ); + + let file_path = tmp_dir.path().join(chunks_read.to_string()); + write_chunk(settings, &file_path, &mut chunk); chunk.clear(); total_read = 0; - iter.chunks += 1; + chunks_read += 1; + + file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap())) } } // write the last chunk if !chunk.is_empty() { super::sort_by(&mut chunk, &settings); + + let file_path = tmp_dir.path().join(chunks_read.to_string()); write_chunk( settings, - &iter.tmp_dir.path().join(iter.chunks.to_string()), + &tmp_dir.path().join(chunks_read.to_string()), &mut chunk, ); - iter.chunks += 1; + + file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap())); } - - // We manually drop here to not go over our memory limit when we allocate below. - drop(chunk); - - // initialize buffers for each chunk - // - // Having a right sized buffer for each chunk for smallish values seems silly to me? - // - // We will have to have the entire iter in memory sometime right? - // Set minimum to the size of the writer buffer, ~8K - - const MINIMUM_READBACK_BUFFER: usize = 8200; - let right_sized_buffer = settings - .buffer_size - .checked_div(iter.chunks) - .unwrap_or(settings.buffer_size); - iter.max_per_chunk = if right_sized_buffer > MINIMUM_READBACK_BUFFER { - right_sized_buffer - } else { - MINIMUM_READBACK_BUFFER - }; - iter.buffers = vec![VecDeque::new(); iter.chunks]; - iter.chunk_offsets = vec![0; iter.chunks]; - for chunk_num in 0..iter.chunks { - let offset = fill_buff( - &mut iter.buffers[chunk_num], - crash_if_err!( - 1, - File::open(iter.tmp_dir.path().join(chunk_num.to_string())) - ), - iter.max_per_chunk, - &settings, - ); - iter.chunk_offsets[chunk_num] = offset as u64; + ExtSortedIterator { + file_merger, + _tmp_dir: tmp_dir, } - - iter } fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec) { @@ -186,29 +91,3 @@ fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec) { } crash_if_err!(1, buf_write.flush()); } - -fn fill_buff( - vec: &mut VecDeque, - file: File, - max_bytes: usize, - settings: &GlobalSettings, -) -> usize { - let mut total_read = 0; - let mut bytes_read = 0; - for line in BufReader::new(file).split(if settings.zero_terminated { - b'\0' - } else { - b'\n' - }) { - let line_s = String::from_utf8(crash_if_err!(1, line)).unwrap(); - bytes_read += line_s.len() + 1; - let deserialized = Line::new(line_s, settings); - total_read += deserialized.estimate_size(); - vec.push_back(deserialized); - if total_read > max_bytes { - break; - } - } - - bytes_read -} diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 71d912f33..02e54baf8 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -32,6 +32,7 @@ use semver::Version; use std::cmp::Ordering; use std::collections::BinaryHeap; use std::env; +use std::ffi::OsStr; use std::fs::File; use std::hash::{Hash, Hasher}; use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; @@ -1122,10 +1123,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 { exec(files, settings) } -fn file_to_lines_iter<'a>( - file: &str, - settings: &'a GlobalSettings, -) -> Option + 'a> { +fn file_to_lines_iter( + file: impl AsRef, + settings: &'_ GlobalSettings, +) -> Option + '_> { let (reader, _) = match open(file) { Some(x) => x, None => return None, @@ -1190,7 +1191,7 @@ fn exec(files: Vec, settings: GlobalSettings) -> i32 { let mut lines = vec![]; // This is duplicated from fn file_to_lines_iter, but using that function directly results in a performance regression. - for (file, _) in files.iter().map(|file| open(file)).flatten() { + for (file, _) in files.iter().map(open).flatten() { let buf_reader = BufReader::new(file); for line in buf_reader.split(if settings.zero_terminated { b'\0' @@ -1517,7 +1518,8 @@ fn print_sorted>(iter: T, settings: &GlobalSettings) { } // from cat.rs -fn open(path: &str) -> Option<(Box, bool)> { +fn open(path: impl AsRef) -> Option<(Box, bool)> { + let path = path.as_ref(); if path == "-" { let stdin = stdin(); return Some((Box::new(stdin) as Box, is_stdin_interactive())); @@ -1526,7 +1528,7 @@ fn open(path: &str) -> Option<(Box, bool)> { match File::open(Path::new(path)) { Ok(f) => Some((Box::new(f) as Box, false)), Err(e) => { - show_error!("{0}: {1}", path, e.to_string()); + show_error!("{0:?}: {1}", path, e.to_string()); None } } diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 4f80e25a3..726c9b8cd 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -13,11 +13,11 @@ extern crate uucore; mod platform; use clap::{App, Arg}; -use std::char; use std::env; use std::fs::File; -use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; +use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write}; use std::path::Path; +use std::{char, fs::remove_file}; static NAME: &str = "split"; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -213,107 +213,145 @@ struct Settings { verbose: bool, } -struct SplitControl { - current_line: String, // Don't touch - request_new_file: bool, // Splitter implementation requests new file -} - trait Splitter { - // Consume the current_line and return the consumed string - fn consume(&mut self, _: &mut SplitControl) -> String; + // Consume as much as possible from `reader` so as to saturate `writer`. + // Equivalent to finishing one of the part files. Returns the number of + // bytes that have been moved. + fn consume( + &mut self, + reader: &mut BufReader>, + writer: &mut BufWriter>, + ) -> u128; } struct LineSplitter { - saved_lines_to_write: usize, - lines_to_write: usize, + lines_per_split: usize, } impl LineSplitter { fn new(settings: &Settings) -> LineSplitter { - let n = match settings.strategy_param.parse() { - Ok(a) => a, - Err(e) => crash!(1, "invalid number of lines: {}", e), - }; LineSplitter { - saved_lines_to_write: n, - lines_to_write: n, + lines_per_split: settings + .strategy_param + .parse() + .unwrap_or_else(|e| crash!(1, "invalid number of lines: {}", e)), } } } impl Splitter for LineSplitter { - fn consume(&mut self, control: &mut SplitControl) -> String { - self.lines_to_write -= 1; - if self.lines_to_write == 0 { - self.lines_to_write = self.saved_lines_to_write; - control.request_new_file = true; + fn consume( + &mut self, + reader: &mut BufReader>, + writer: &mut BufWriter>, + ) -> u128 { + let mut bytes_consumed = 0u128; + let mut buffer = String::with_capacity(1024); + for _ in 0..self.lines_per_split { + let bytes_read = reader + .read_line(&mut buffer) + .unwrap_or_else(|_| crash!(1, "error reading bytes from input file")); + // If we ever read 0 bytes then we know we've hit EOF. + if bytes_read == 0 { + return bytes_consumed; + } + + writer + .write_all(buffer.as_bytes()) + .unwrap_or_else(|_| crash!(1, "error writing bytes to output file")); + // Empty out the String buffer since `read_line` appends instead of + // replaces. + buffer.clear(); + + bytes_consumed += bytes_read as u128; } - control.current_line.clone() + + bytes_consumed } } struct ByteSplitter { - saved_bytes_to_write: usize, - bytes_to_write: usize, - break_on_line_end: bool, - require_whole_line: bool, + bytes_per_split: u128, } impl ByteSplitter { fn new(settings: &Settings) -> ByteSplitter { - let mut strategy_param: Vec = settings.strategy_param.chars().collect(); - let suffix = strategy_param.pop().unwrap(); - let multiplier = match suffix { - '0'..='9' => 1usize, - 'b' => 512usize, - 'k' => 1024usize, - 'm' => 1024usize * 1024usize, - _ => crash!(1, "invalid number of bytes"), - }; - let n = if suffix.is_alphabetic() { - match strategy_param - .iter() - .cloned() - .collect::() - .parse::() - { - Ok(a) => a, - Err(e) => crash!(1, "invalid number of bytes: {}", e), - } - } else { - match settings.strategy_param.parse::() { - Ok(a) => a, - Err(e) => crash!(1, "invalid number of bytes: {}", e), - } - }; + // These multipliers are the same as supported by GNU coreutils. + let modifiers: Vec<(&str, u128)> = vec![ + ("K", 1024u128), + ("M", 1024 * 1024), + ("G", 1024 * 1024 * 1024), + ("T", 1024 * 1024 * 1024 * 1024), + ("P", 1024 * 1024 * 1024 * 1024 * 1024), + ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ("KB", 1000), + ("MB", 1000 * 1000), + ("GB", 1000 * 1000 * 1000), + ("TB", 1000 * 1000 * 1000 * 1000), + ("PB", 1000 * 1000 * 1000 * 1000 * 1000), + ("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000), + ("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), + ("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000), + ]; + + // This sequential find is acceptable since none of the modifiers are + // suffixes of any other modifiers, a la Huffman codes. + let (suffix, multiplier) = modifiers + .iter() + .find(|(suffix, _)| settings.strategy_param.ends_with(suffix)) + .unwrap_or(&("", 1)); + + // Try to parse the actual numeral. + let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())] + .parse::() + .unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e)); + ByteSplitter { - saved_bytes_to_write: n * multiplier, - bytes_to_write: n * multiplier, - break_on_line_end: settings.strategy == "b", - require_whole_line: false, + bytes_per_split: n * multiplier, } } } impl Splitter for ByteSplitter { - fn consume(&mut self, control: &mut SplitControl) -> String { - let line = control.current_line.clone(); - let n = std::cmp::min(line.chars().count(), self.bytes_to_write); - if self.require_whole_line && n < line.chars().count() { - self.bytes_to_write = self.saved_bytes_to_write; - control.request_new_file = true; - self.require_whole_line = false; - return "".to_owned(); + fn consume( + &mut self, + reader: &mut BufReader>, + writer: &mut BufWriter>, + ) -> u128 { + // We buffer reads and writes. We proceed until `bytes_consumed` is + // equal to `self.bytes_per_split` or we reach EOF. + let mut bytes_consumed = 0u128; + const BUFFER_SIZE: usize = 1024; + let mut buffer = [0u8; BUFFER_SIZE]; + while bytes_consumed < self.bytes_per_split { + // Don't overshoot `self.bytes_per_split`! Note: Using std::cmp::min + // doesn't really work since we have to get types to match which + // can't be done in a way that keeps all conversions safe. + let bytes_desired = if (BUFFER_SIZE as u128) <= self.bytes_per_split - bytes_consumed { + BUFFER_SIZE + } else { + // This is a safe conversion since the difference must be less + // than BUFFER_SIZE in this branch. + (self.bytes_per_split - bytes_consumed) as usize + }; + let bytes_read = reader + .read(&mut buffer[0..bytes_desired]) + .unwrap_or_else(|_| crash!(1, "error reading bytes from input file")); + // If we ever read 0 bytes then we know we've hit EOF. + if bytes_read == 0 { + return bytes_consumed; + } + + writer + .write_all(&buffer[0..bytes_read]) + .unwrap_or_else(|_| crash!(1, "error writing bytes to output file")); + + bytes_consumed += bytes_read as u128; } - self.bytes_to_write -= n; - if n == 0 { - self.bytes_to_write = self.saved_bytes_to_write; - control.request_new_file = true; - } - if self.break_on_line_end && n == line.chars().count() { - self.require_whole_line = self.break_on_line_end; - } - line[..n].to_owned() + + bytes_consumed } } @@ -353,14 +391,13 @@ fn split(settings: &Settings) -> i32 { let mut reader = BufReader::new(if settings.input == "-" { Box::new(stdin()) as Box } else { - let r = match File::open(Path::new(&settings.input)) { - Ok(a) => a, - Err(_) => crash!( + let r = File::open(Path::new(&settings.input)).unwrap_or_else(|_| { + crash!( 1, "cannot open '{}' for reading: No such file or directory", settings.input - ), - }; + ) + }); Box::new(r) as Box }); @@ -370,48 +407,39 @@ fn split(settings: &Settings) -> i32 { a => crash!(1, "strategy {} not supported", a), }; - let mut control = SplitControl { - current_line: "".to_owned(), // Request new line - request_new_file: true, // Request new file - }; - - let mut writer = BufWriter::new(Box::new(stdout()) as Box); let mut fileno = 0; loop { - if control.current_line.chars().count() == 0 { - match reader.read_line(&mut control.current_line) { - Ok(0) | Err(_) => break, - _ => {} + // Get a new part file set up, and construct `writer` for it. + let mut filename = settings.prefix.clone(); + filename.push_str( + if settings.numeric_suffix { + num_prefix(fileno, settings.suffix_length) + } else { + str_prefix(fileno, settings.suffix_length) } - } - if control.request_new_file { - let mut filename = settings.prefix.clone(); - filename.push_str( - if settings.numeric_suffix { - num_prefix(fileno, settings.suffix_length) - } else { - str_prefix(fileno, settings.suffix_length) - } - .as_ref(), - ); - filename.push_str(settings.additional_suffix.as_ref()); + .as_ref(), + ); + filename.push_str(settings.additional_suffix.as_ref()); + let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); - crash_if_err!(1, writer.flush()); - fileno += 1; - writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); - control.request_new_file = false; - if settings.verbose { - println!("creating file '{}'", filename); + let bytes_consumed = splitter.consume(&mut reader, &mut writer); + writer + .flush() + .unwrap_or_else(|e| crash!(1, "error flushing to output file: {}", e)); + + // If we didn't write anything we should clean up the empty file, and + // break from the loop. + if bytes_consumed == 0 { + // The output file is only ever created if --filter isn't used. + // Complicated, I know... + if settings.filter.is_none() { + remove_file(filename) + .unwrap_or_else(|e| crash!(1, "error removing empty file: {}", e)); } + break; } - let consumed = splitter.consume(&mut control); - crash_if_err!(1, writer.write_all(consumed.as_bytes())); - - let advance = consumed.chars().count(); - let clone = control.current_line.clone(); - let sl = clone; - control.current_line = sl[advance..sl.chars().count()].to_owned(); + fileno += 1; } 0 } diff --git a/src/uu/stat/Cargo.toml b/src/uu/stat/Cargo.toml index 96bf63ffe..c325c20db 100644 --- a/src/uu/stat/Cargo.toml +++ b/src/uu/stat/Cargo.toml @@ -17,7 +17,7 @@ path = "src/stat.rs" [dependencies] clap = "2.33" time = "0.1.40" -uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc"] } +uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc", "fs"] } uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" } [[bin]] diff --git a/src/uu/stat/src/fsext.rs b/src/uu/stat/src/fsext.rs index d90099892..53280790e 100644 --- a/src/uu/stat/src/fsext.rs +++ b/src/uu/stat/src/fsext.rs @@ -41,13 +41,6 @@ impl BirthTime for Metadata { } } -#[macro_export] -macro_rules! has { - ($mode:expr, $perm:expr) => { - $mode & $perm != 0 - }; -} - pub fn pretty_time(sec: i64, nsec: i64) -> String { // sec == seconds since UNIX_EPOCH // nsec == nanoseconds since (UNIX_EPOCH + sec) @@ -81,65 +74,6 @@ pub fn pretty_filetype<'a>(mode: mode_t, size: u64) -> &'a str { } } -pub fn pretty_access(mode: mode_t) -> String { - let mut result = String::with_capacity(10); - result.push(match mode & S_IFMT { - S_IFDIR => 'd', - S_IFCHR => 'c', - S_IFBLK => 'b', - S_IFREG => '-', - S_IFIFO => 'p', - S_IFLNK => 'l', - S_IFSOCK => 's', - // TODO: Other file types - _ => '?', - }); - - result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' }); - result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISUID as mode_t) { - if has!(mode, S_IXUSR) { - 's' - } else { - 'S' - } - } else if has!(mode, S_IXUSR) { - 'x' - } else { - '-' - }); - - result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' }); - result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISGID as mode_t) { - if has!(mode, S_IXGRP) { - 's' - } else { - 'S' - } - } else if has!(mode, S_IXGRP) { - 'x' - } else { - '-' - }); - - result.push(if has!(mode, S_IROTH) { 'r' } else { '-' }); - result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISVTX as mode_t) { - if has!(mode, S_IXOTH) { - 't' - } else { - 'T' - } - } else if has!(mode, S_IXOTH) { - 'x' - } else { - '-' - }); - - result -} - use std::borrow::Cow; use std::convert::{AsRef, From}; use std::ffi::CString; diff --git a/src/uu/stat/src/stat.rs b/src/uu/stat/src/stat.rs index 5216fb293..d46c54910 100644 --- a/src/uu/stat/src/stat.rs +++ b/src/uu/stat/src/stat.rs @@ -7,13 +7,13 @@ // spell-checker:ignore (ToDO) mtab fsext showfs otype fmtstr prec ftype blocksize nlink rdev fnodes fsid namelen blksize inodes fstype iosize statfs gnulib NBLOCKSIZE -#[macro_use] mod fsext; pub use crate::fsext::*; #[macro_use] extern crate uucore; use uucore::entries; +use uucore::fs::display_permissions; use clap::{App, Arg, ArgMatches}; use std::borrow::Cow; @@ -575,7 +575,7 @@ impl Stater { } // access rights in human readable form 'A' => { - arg = pretty_access(meta.mode() as mode_t); + arg = display_permissions(&meta, true); otype = OutputType::Str; } // number of blocks allocated (see %B) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 3b70856fa..33b2ba5ec 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -12,8 +12,10 @@ extern crate uucore; mod count_bytes; mod countable; +mod wordcount; use count_bytes::count_bytes_fast; use countable::WordCountable; +use wordcount::{TitledWordCount, WordCount}; use clap::{App, Arg, ArgMatches}; use thiserror::Error; @@ -21,9 +23,7 @@ use thiserror::Error; use std::cmp::max; use std::fs::File; use std::io::{self, Write}; -use std::ops::{Add, AddAssign}; use std::path::Path; -use std::str::from_utf8; #[derive(Error, Debug)] pub enum WcError { @@ -82,51 +82,6 @@ impl Settings { } } -#[derive(Debug, Default, Copy, Clone)] -struct WordCount { - bytes: usize, - chars: usize, - lines: usize, - words: usize, - max_line_length: usize, -} - -impl Add for WordCount { - type Output = Self; - - fn add(self, other: Self) -> Self { - Self { - bytes: self.bytes + other.bytes, - chars: self.chars + other.chars, - lines: self.lines + other.lines, - words: self.words + other.words, - max_line_length: max(self.max_line_length, other.max_line_length), - } - } -} - -impl AddAssign for WordCount { - fn add_assign(&mut self, other: Self) { - *self = *self + other - } -} - -impl WordCount { - fn with_title(self, title: &str) -> TitledWordCount { - TitledWordCount { title, count: self } - } -} - -/// This struct supplements the actual word count with a title that is displayed -/// to the user at the end of the program. -/// The reason we don't simply include title in the `WordCount` struct is that -/// it would result in unneccesary copying of `String`. -#[derive(Debug, Default, Clone)] -struct TitledWordCount<'a> { - title: &'a str, - count: WordCount, -} - static ABOUT: &str = "Display newline, word, and byte counts for each FILE, and a total line if more than one FILE is specified."; static VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -207,18 +162,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 { } } -const CR: u8 = b'\r'; -const LF: u8 = b'\n'; -const SPACE: u8 = b' '; -const TAB: u8 = b'\t'; -const SYN: u8 = 0x16_u8; -const FF: u8 = 0x0C_u8; - -#[inline(always)] -fn is_word_separator(byte: u8) -> bool { - byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF -} - fn word_count_from_reader( mut reader: T, settings: &Settings, @@ -239,58 +182,20 @@ fn word_count_from_reader( // we do not need to decode the byte stream if we're only counting bytes/newlines let decode_chars = settings.show_chars || settings.show_words || settings.show_max_line_length; - let mut line_count: usize = 0; - let mut word_count: usize = 0; - let mut byte_count: usize = 0; - let mut char_count: usize = 0; - let mut longest_line_length: usize = 0; - let mut ends_lf: bool; - - // reading from a TTY seems to raise a condition on, rather than return Some(0) like a file. - // hence the option wrapped in a result here - for line_result in reader.lines() { - let raw_line = match line_result { - Ok(l) => l, + // Sum the WordCount for each line. Show a warning for each line + // that results in an IO error when trying to read it. + let total = reader + .lines() + .filter_map(|res| match res { + Ok(line) => Some(line), Err(e) => { show_warning!("Error while reading {}: {}", path, e); - continue; + None } - }; - - // GNU 'wc' only counts lines that end in LF as lines - ends_lf = *raw_line.last().unwrap() == LF; - line_count += ends_lf as usize; - - byte_count += raw_line.len(); - - if decode_chars { - // try and convert the bytes to UTF-8 first - let current_char_count; - match from_utf8(&raw_line[..]) { - Ok(line) => { - word_count += line.split_whitespace().count(); - current_char_count = line.chars().count(); - } - Err(..) => { - word_count += raw_line.split(|&x| is_word_separator(x)).count(); - current_char_count = raw_line.iter().filter(|c| c.is_ascii()).count() - } - } - char_count += current_char_count; - if current_char_count > longest_line_length { - // -L is a GNU 'wc' extension so same behavior on LF - longest_line_length = current_char_count - (ends_lf as usize); - } - } - } - - Ok(WordCount { - bytes: byte_count, - chars: char_count, - lines: line_count, - words: word_count, - max_line_length: longest_line_length, - }) + }) + .map(|line| WordCount::from_line(&line, decode_chars)) + .sum(); + Ok(total) } fn word_count_from_path(path: &str, settings: &Settings) -> WcResult { @@ -323,7 +228,12 @@ fn wc(files: Vec, settings: &Settings) -> Result<(), u32> { error_count += 1; WordCount::default() }); - max_width = max(max_width, word_count.bytes.to_string().len() + 1); + // Compute the number of digits needed to display the number + // of bytes in the file. Even if the settings indicate that we + // won't *display* the number of bytes, we still use the + // number of digits in the byte count as the width when + // formatting each count as a string for output. + max_width = max(max_width, word_count.bytes.to_string().len()); total_word_count += word_count; results.push(word_count.with_title(path)); } @@ -364,19 +274,40 @@ fn print_stats( min_width = 0; } + let mut is_first: bool = true; + if settings.show_lines { + if !is_first { + write!(stdout_lock, " ")?; + } write!(stdout_lock, "{:1$}", result.count.lines, min_width)?; + is_first = false; } if settings.show_words { + if !is_first { + write!(stdout_lock, " ")?; + } write!(stdout_lock, "{:1$}", result.count.words, min_width)?; + is_first = false; } if settings.show_bytes { + if !is_first { + write!(stdout_lock, " ")?; + } write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?; + is_first = false; } if settings.show_chars { + if !is_first { + write!(stdout_lock, " ")?; + } write!(stdout_lock, "{:1$}", result.count.chars, min_width)?; + is_first = false; } if settings.show_max_line_length { + if !is_first { + write!(stdout_lock, " ")?; + } write!( stdout_lock, "{:1$}", diff --git a/src/uu/wc/src/wordcount.rs b/src/uu/wc/src/wordcount.rs new file mode 100644 index 000000000..785e57eff --- /dev/null +++ b/src/uu/wc/src/wordcount.rs @@ -0,0 +1,131 @@ +use std::cmp::max; +use std::iter::Sum; +use std::ops::{Add, AddAssign}; +use std::str::from_utf8; + +const CR: u8 = b'\r'; +const LF: u8 = b'\n'; +const SPACE: u8 = b' '; +const TAB: u8 = b'\t'; +const SYN: u8 = 0x16_u8; +const FF: u8 = 0x0C_u8; + +#[inline(always)] +fn is_word_separator(byte: u8) -> bool { + byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF +} + +#[derive(Debug, Default, Copy, Clone)] +pub struct WordCount { + pub bytes: usize, + pub chars: usize, + pub lines: usize, + pub words: usize, + pub max_line_length: usize, +} + +impl Add for WordCount { + type Output = Self; + + fn add(self, other: Self) -> Self { + Self { + bytes: self.bytes + other.bytes, + chars: self.chars + other.chars, + lines: self.lines + other.lines, + words: self.words + other.words, + max_line_length: max(self.max_line_length, other.max_line_length), + } + } +} + +impl AddAssign for WordCount { + fn add_assign(&mut self, other: Self) { + *self = *self + other + } +} + +impl Sum for WordCount { + fn sum(iter: I) -> WordCount + where + I: Iterator, + { + iter.fold(WordCount::default(), |acc, x| acc + x) + } +} + +impl WordCount { + /// Count the characters and whitespace-separated words in the given bytes. + /// + /// `line` is a slice of bytes that will be decoded as ASCII characters. + fn ascii_word_and_char_count(line: &[u8]) -> (usize, usize) { + let word_count = line.split(|&x| is_word_separator(x)).count(); + let char_count = line.iter().filter(|c| c.is_ascii()).count(); + (word_count, char_count) + } + + /// Create a [`WordCount`] from a sequence of bytes representing a line. + /// + /// If the last byte of `line` encodes a newline character (`\n`), + /// then the [`lines`] field will be set to 1. Otherwise, it will + /// be set to 0. The [`bytes`] field is simply the length of + /// `line`. + /// + /// If `decode_chars` is `false`, the [`chars`] and [`words`] + /// fields will be set to 0. If it is `true`, this function will + /// attempt to decode the bytes first as UTF-8, and failing that, + /// as ASCII. + pub fn from_line(line: &[u8], decode_chars: bool) -> WordCount { + // GNU 'wc' only counts lines that end in LF as lines + let lines = (*line.last().unwrap() == LF) as usize; + let bytes = line.len(); + let (words, chars) = if decode_chars { + WordCount::word_and_char_count(line) + } else { + (0, 0) + }; + // -L is a GNU 'wc' extension so same behavior on LF + let max_line_length = if chars > 0 { chars - lines } else { 0 }; + WordCount { + bytes, + chars, + lines, + words, + max_line_length, + } + } + + /// Count the UTF-8 characters and words in the given string slice. + /// + /// `s` is a string slice that is assumed to be a UTF-8 string. + fn utf8_word_and_char_count(s: &str) -> (usize, usize) { + let word_count = s.split_whitespace().count(); + let char_count = s.chars().count(); + (word_count, char_count) + } + + pub fn with_title(self, title: &str) -> TitledWordCount { + TitledWordCount { title, count: self } + } + + /// Count the characters and words in the given slice of bytes. + /// + /// `line` is a slice of bytes that will be decoded as UTF-8 + /// characters, or if that fails, as ASCII characters. + fn word_and_char_count(line: &[u8]) -> (usize, usize) { + // try and convert the bytes to UTF-8 first + match from_utf8(line) { + Ok(s) => WordCount::utf8_word_and_char_count(s), + Err(..) => WordCount::ascii_word_and_char_count(line), + } + } +} + +/// This struct supplements the actual word count with a title that is displayed +/// to the user at the end of the program. +/// The reason we don't simply include title in the `WordCount` struct is that +/// it would result in unneccesary copying of `String`. +#[derive(Debug, Default, Clone)] +pub struct TitledWordCount<'a> { + pub title: &'a str, + pub count: WordCount, +} diff --git a/src/uucore/src/lib/features/fs.rs b/src/uucore/src/lib/features/fs.rs index a72d6ea82..040c36e95 100644 --- a/src/uucore/src/lib/features/fs.rs +++ b/src/uucore/src/lib/features/fs.rs @@ -8,8 +8,9 @@ #[cfg(unix)] use libc::{ - mode_t, S_IRGRP, S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, - S_IXGRP, S_IXOTH, S_IXUSR, + mode_t, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, S_IRGRP, + S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, S_IXGRP, S_IXOTH, + S_IXUSR, }; use std::borrow::Cow; use std::env; @@ -23,9 +24,10 @@ use std::os::unix::fs::MetadataExt; use std::path::{Component, Path, PathBuf}; #[cfg(unix)] +#[macro_export] macro_rules! has { ($mode:expr, $perm:expr) => { - $mode & ($perm as u32) != 0 + $mode & $perm != 0 }; } @@ -240,22 +242,42 @@ pub fn is_stderr_interactive() -> bool { #[cfg(not(unix))] #[allow(unused_variables)] -pub fn display_permissions(metadata: &fs::Metadata) -> String { +pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String { + if display_file_type { + return String::from("----------"); + } String::from("---------") } #[cfg(unix)] -pub fn display_permissions(metadata: &fs::Metadata) -> String { +pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String { let mode: mode_t = metadata.mode() as mode_t; - display_permissions_unix(mode as u32) + display_permissions_unix(mode, display_file_type) } #[cfg(unix)] -pub fn display_permissions_unix(mode: u32) -> String { - let mut result = String::with_capacity(9); +pub fn display_permissions_unix(mode: mode_t, display_file_type: bool) -> String { + let mut result; + if display_file_type { + result = String::with_capacity(10); + result.push(match mode & S_IFMT { + S_IFDIR => 'd', + S_IFCHR => 'c', + S_IFBLK => 'b', + S_IFREG => '-', + S_IFIFO => 'p', + S_IFLNK => 'l', + S_IFSOCK => 's', + // TODO: Other file types + _ => '?', + }); + } else { + result = String::with_capacity(9); + } + result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' }); result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISUID) { + result.push(if has!(mode, S_ISUID as mode_t) { if has!(mode, S_IXUSR) { 's' } else { @@ -269,7 +291,7 @@ pub fn display_permissions_unix(mode: u32) -> String { result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' }); result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISGID) { + result.push(if has!(mode, S_ISGID as mode_t) { if has!(mode, S_IXGRP) { 's' } else { @@ -283,7 +305,7 @@ pub fn display_permissions_unix(mode: u32) -> String { result.push(if has!(mode, S_IROTH) { 'r' } else { '-' }); result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' }); - result.push(if has!(mode, S_ISVTX) { + result.push(if has!(mode, S_ISVTX as mode_t) { if has!(mode, S_IXOTH) { 't' } else { @@ -355,4 +377,57 @@ mod tests { ); } } + + #[cfg(unix)] + #[test] + fn test_display_permissions() { + assert_eq!( + "drwxr-xr-x", + display_permissions_unix(S_IFDIR | 0o755, true) + ); + assert_eq!( + "rwxr-xr-x", + display_permissions_unix(S_IFDIR | 0o755, false) + ); + assert_eq!( + "-rw-r--r--", + display_permissions_unix(S_IFREG | 0o644, true) + ); + assert_eq!( + "srw-r-----", + display_permissions_unix(S_IFSOCK | 0o640, true) + ); + assert_eq!( + "lrw-r-xr-x", + display_permissions_unix(S_IFLNK | 0o655, true) + ); + assert_eq!("?rw-r-xr-x", display_permissions_unix(0o655, true)); + + assert_eq!( + "brwSr-xr-x", + display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o655, true) + ); + assert_eq!( + "brwsr-xr-x", + display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o755, true) + ); + + assert_eq!( + "prw---sr--", + display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o614, true) + ); + assert_eq!( + "prw---Sr--", + display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o604, true) + ); + + assert_eq!( + "c---r-xr-t", + display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o055, true) + ); + assert_eq!( + "c---r-xr-T", + display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o054, true) + ); + } } diff --git a/tests/by-util/test_df.rs b/tests/by-util/test_df.rs index e3b7141d1..ac3776b96 100644 --- a/tests/by-util/test_df.rs +++ b/tests/by-util/test_df.rs @@ -27,7 +27,7 @@ fn test_df_output() { stdout_only("Filesystem Size Used Available Capacity Use% Mounted on \n"); } else { new_ucmd!().arg("-H").arg("-total").succeeds().stdout_only( - "Filesystem Size Used Available Use% Mounted on \n" + "Filesystem Size Used Available Use% Mounted on \n", ); } } diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 111f2dc90..c72bd02a6 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -53,7 +53,15 @@ fn _du_basics_subdir(s: &str) { fn _du_basics_subdir(s: &str) { assert_eq!(s, "0\tsubdir/deeper\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_basics_subdir(s: &str) { + assert_eq!(s, "8\tsubdir/deeper\n"); +} +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_basics_subdir(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -100,7 +108,15 @@ fn _du_soft_link(s: &str) { fn _du_soft_link(s: &str) { assert_eq!(s, "8\tsubdir/links\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_soft_link(s: &str) { + assert_eq!(s, "16\tsubdir/links\n"); +} +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_soft_link(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -141,7 +157,15 @@ fn _du_hard_link(s: &str) { fn _du_hard_link(s: &str) { assert_eq!(s, "8\tsubdir/links\n") } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_hard_link(s: &str) { + assert_eq!(s, "16\tsubdir/links\n") +} +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_hard_link(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { @@ -181,7 +205,15 @@ fn _du_d_flag(s: &str) { fn _du_d_flag(s: &str) { assert_eq!(s, "8\t./subdir\n8\t./\n"); } -#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))] +#[cfg(target_os = "freebsd")] +fn _du_d_flag(s: &str) { + assert_eq!(s, "28\t./subdir\n36\t./\n"); +} +#[cfg(all( + not(target_vendor = "apple"), + not(target_os = "windows"), + not(target_os = "freebsd") +))] fn _du_d_flag(s: &str) { // MS-WSL linux has altered expected output if !uucore::os::is_wsl_1() { diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index 521cbbe9a..d83de4323 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -4,11 +4,15 @@ extern crate regex; use self::rand::{thread_rng, Rng}; use self::regex::Regex; use crate::common::util::*; +use rand::SeedableRng; #[cfg(not(windows))] use std::env; -use std::fs::{read_dir, File}; use std::io::Write; use std::path::Path; +use std::{ + fs::{read_dir, File}, + io::BufWriter, +}; fn random_chars(n: usize) -> String { thread_rng() @@ -58,7 +62,7 @@ impl Glob { files.sort(); let mut data: Vec = vec![]; for name in &files { - data.extend(self.directory.read(name).into_bytes()); + data.extend(self.directory.read_bytes(name)); } data } @@ -81,20 +85,30 @@ impl RandomFile { } fn add_bytes(&mut self, bytes: usize) { - let chunk_size: usize = if bytes >= 1024 { 1024 } else { bytes }; - let mut n = bytes; - while n > chunk_size { - let _ = write!(self.inner, "{}", random_chars(chunk_size)); - n -= chunk_size; + // Note that just writing random characters isn't enough to cover all + // cases. We need truly random bytes. + let mut writer = BufWriter::new(&self.inner); + + // Seed the rng so as to avoid spurious test failures. + let mut rng = rand::rngs::StdRng::seed_from_u64(123); + let mut buffer = [0; 1024]; + let mut remaining_size = bytes; + + while remaining_size > 0 { + let to_write = std::cmp::min(remaining_size, buffer.len()); + let buf = &mut buffer[..to_write]; + rng.fill(buf); + writer.write(buf).unwrap(); + + remaining_size -= to_write; } - let _ = write!(self.inner, "{}", random_chars(n)); } /// Add n lines each of size `RandomFile::LINESIZE` fn add_lines(&mut self, lines: usize) { let mut n = lines; while n > 0 { - let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE)); + writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE)).unwrap(); n -= 1; } } @@ -104,18 +118,18 @@ impl RandomFile { fn test_split_default() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_default"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); RandomFile::new(&at, name).add_lines(2000); ucmd.args(&[name]).succeeds(); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); assert_eq!(glob.count(), 2); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_numeric_prefixed_chunks_by_bytes() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_num_prefixed_chunks_by_bytes"; - let glob = Glob::new(&at, ".", r"a\d\d$"); RandomFile::new(&at, name).add_bytes(10000); ucmd.args(&[ "-d", // --numeric-suffixes @@ -123,52 +137,89 @@ fn test_split_numeric_prefixed_chunks_by_bytes() { "1000", name, "a", ]) .succeeds(); + + let glob = Glob::new(&at, ".", r"a\d\d$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + for filename in glob.collect() { + assert_eq!(glob.directory.metadata(&filename).len(), 1000); + } + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_str_prefixed_chunks_by_bytes() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_str_prefixed_chunks_by_bytes"; - let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$"); RandomFile::new(&at, name).add_bytes(10000); + // Important that this is less than 1024 since that's our internal buffer + // size. Good to test that we don't overshoot. ucmd.args(&["-b", "1000", name, "b"]).succeeds(); + + let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + for filename in glob.collect() { + assert_eq!(glob.directory.metadata(&filename).len(), 1000); + } + assert_eq!(glob.collate(), at.read_bytes(name)); +} + +// This is designed to test what happens when the desired part size is not a +// multiple of the buffer size and we hopefully don't overshoot the desired part +// size. +#[test] +fn test_split_bytes_prime_part_size() { + let (at, mut ucmd) = at_and_ucmd!(); + let name = "test_split_bytes_prime_part_size"; + RandomFile::new(&at, name).add_bytes(10000); + // 1753 is prime and greater than the buffer size, 1024. + ucmd.args(&["-b", "1753", name, "b"]).succeeds(); + + let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.count(), 6); + let mut fns = glob.collect(); + // glob.collect() is not guaranteed to return in sorted order, so we sort. + fns.sort(); + for i in 0..5 { + assert_eq!(glob.directory.metadata(&fns[i]).len(), 1753); + } + assert_eq!(glob.directory.metadata(&fns[5]).len(), 1235); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_num_prefixed_chunks_by_lines() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_num_prefixed_chunks_by_lines"; - let glob = Glob::new(&at, ".", r"c\d\d$"); RandomFile::new(&at, name).add_lines(10000); ucmd.args(&["-d", "-l", "1000", name, "c"]).succeeds(); + + let glob = Glob::new(&at, ".", r"c\d\d$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_str_prefixed_chunks_by_lines() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_str_prefixed_chunks_by_lines"; - let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$"); RandomFile::new(&at, name).add_lines(10000); ucmd.args(&["-l", "1000", name, "d"]).succeeds(); + + let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$"); assert_eq!(glob.count(), 10); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } #[test] fn test_split_additional_suffix() { let (at, mut ucmd) = at_and_ucmd!(); let name = "split_additional_suffix"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$"); RandomFile::new(&at, name).add_lines(2000); ucmd.args(&["--additional-suffix", ".txt", name]).succeeds(); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$"); assert_eq!(glob.count(), 2); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + assert_eq!(glob.collate(), at.read_bytes(name)); } // note: the test_filter* tests below are unix-only @@ -182,15 +233,16 @@ fn test_filter() { // like `test_split_default()` but run a command before writing let (at, mut ucmd) = at_and_ucmd!(); let name = "filtered"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); let n_lines = 3; RandomFile::new(&at, name).add_lines(n_lines); // change all characters to 'i' ucmd.args(&["--filter=sed s/./i/g > $FILE", name]) .succeeds(); + // assert all characters are 'i' / no character is not 'i' // (assert that command succeded) + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); assert!( glob.collate().iter().find(|&&c| { // is not i @@ -209,7 +261,6 @@ fn test_filter_with_env_var_set() { // implemented like `test_split_default()` but run a command before writing let (at, mut ucmd) = at_and_ucmd!(); let name = "filtered"; - let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); let n_lines = 3; RandomFile::new(&at, name).add_lines(n_lines); @@ -217,7 +268,9 @@ fn test_filter_with_env_var_set() { env::set_var("FILE", &env_var_value); ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name]) .succeeds(); - assert_eq!(glob.collate(), at.read(name).into_bytes()); + + let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$"); + assert_eq!(glob.collate(), at.read_bytes(name)); assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value); } diff --git a/tests/by-util/test_stat.rs b/tests/by-util/test_stat.rs index 7b7e990f4..5c4e62610 100644 --- a/tests/by-util/test_stat.rs +++ b/tests/by-util/test_stat.rs @@ -9,42 +9,6 @@ pub use self::stat::*; mod test_fsext { use super::*; - #[test] - fn test_access() { - assert_eq!("drwxr-xr-x", pretty_access(S_IFDIR | 0o755)); - assert_eq!("-rw-r--r--", pretty_access(S_IFREG | 0o644)); - assert_eq!("srw-r-----", pretty_access(S_IFSOCK | 0o640)); - assert_eq!("lrw-r-xr-x", pretty_access(S_IFLNK | 0o655)); - assert_eq!("?rw-r-xr-x", pretty_access(0o655)); - - assert_eq!( - "brwSr-xr-x", - pretty_access(S_IFBLK | S_ISUID as mode_t | 0o655) - ); - assert_eq!( - "brwsr-xr-x", - pretty_access(S_IFBLK | S_ISUID as mode_t | 0o755) - ); - - assert_eq!( - "prw---sr--", - pretty_access(S_IFIFO | S_ISGID as mode_t | 0o614) - ); - assert_eq!( - "prw---Sr--", - pretty_access(S_IFIFO | S_ISGID as mode_t | 0o604) - ); - - assert_eq!( - "c---r-xr-t", - pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o055) - ); - assert_eq!( - "c---r-xr-T", - pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o054) - ); - } - #[test] fn test_file_type() { assert_eq!("block special file", pretty_filetype(S_IFBLK, 0)); diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index a16f1854e..87a86fca4 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -33,7 +33,7 @@ fn test_stdin_default() { new_ucmd!() .pipe_in_fixture("lorem_ipsum.txt") .run() - .stdout_is(" 13 109 772\n"); + .stdout_is(" 13 109 772\n"); } #[test] @@ -42,7 +42,7 @@ fn test_utf8() { .args(&["-lwmcL"]) .pipe_in_fixture("UTF_8_test.txt") .run() - .stdout_is(" 300 4969 22781 22213 79\n"); + .stdout_is(" 300 4969 22781 22213 79\n"); // GNU returns " 300 2086 22219 22781 79" // TODO: we should fix that to match GNU's behavior } @@ -71,7 +71,7 @@ fn test_stdin_all_counts() { .args(&["-c", "-m", "-l", "-L", "-w"]) .pipe_in_fixture("alice_in_wonderland.txt") .run() - .stdout_is(" 5 57 302 302 66\n"); + .stdout_is(" 5 57 302 302 66\n"); } #[test] @@ -79,7 +79,7 @@ fn test_single_default() { new_ucmd!() .arg("moby_dick.txt") .run() - .stdout_is(" 18 204 1115 moby_dick.txt\n"); + .stdout_is(" 18 204 1115 moby_dick.txt\n"); } #[test] @@ -95,7 +95,7 @@ fn test_single_all_counts() { new_ucmd!() .args(&["-c", "-l", "-L", "-m", "-w", "alice_in_wonderland.txt"]) .run() - .stdout_is(" 5 57 302 302 66 alice_in_wonderland.txt\n"); + .stdout_is(" 5 57 302 302 66 alice_in_wonderland.txt\n"); } #[test] @@ -108,64 +108,54 @@ fn test_multiple_default() { ]) .run() .stdout_is( - " 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \ - alice_in_wonderland.txt\n 36 370 2189 total\n", + " 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \ + alice_in_wonderland.txt\n 36 370 2189 total\n", ); } /// Test for an empty file. #[test] fn test_file_empty() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "emptyfile.txt"]) .run() - .stdout_is(" 0 0 0 0 0 emptyfile.txt\n"); + .stdout_is("0 0 0 0 0 emptyfile.txt\n"); } /// Test for an file containing a single non-whitespace character /// *without* a trailing newline. #[test] fn test_file_single_line_no_trailing_newline() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "notrailingnewline.txt"]) .run() - .stdout_is(" 1 1 2 2 1 notrailingnewline.txt\n"); + .stdout_is("1 1 2 2 1 notrailingnewline.txt\n"); } /// Test for a file that has 100 empty lines (that is, the contents of /// the file are the newline character repeated one hundred times). #[test] fn test_file_many_empty_lines() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "manyemptylines.txt"]) .run() - .stdout_is(" 100 0 100 100 0 manyemptylines.txt\n"); + .stdout_is("100 0 100 100 0 manyemptylines.txt\n"); } /// Test for a file that has one long line comprising only spaces. #[test] fn test_file_one_long_line_only_spaces() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "onelongemptyline.txt"]) .run() - .stdout_is(" 1 0 10001 10001 10000 onelongemptyline.txt\n"); + .stdout_is(" 1 0 10001 10001 10000 onelongemptyline.txt\n"); } /// Test for a file that has one long line comprising a single "word". #[test] fn test_file_one_long_word() { - // TODO There is a leading space in the output that should be - // removed; see issue #2173. new_ucmd!() .args(&["-clmwL", "onelongword.txt"]) .run() - .stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n"); + .stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n"); }