1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge branch 'master' of https://github.com/uutils/coreutils into sort-optimize-line

This commit is contained in:
Michael Debertol 2021-05-08 15:15:34 +02:00
commit 1afeb55881
16 changed files with 574 additions and 565 deletions

View file

@ -15,6 +15,7 @@ use std::fs;
use std::os::unix::fs::{MetadataExt, PermissionsExt};
use std::path::Path;
use uucore::fs::display_permissions_unix;
use uucore::libc::mode_t;
#[cfg(not(windows))]
use uucore::mode;
use uucore::InvalidEncodingHandling;
@ -306,7 +307,7 @@ impl Chmoder {
"mode of '{}' retained as {:04o} ({})",
file.display(),
fperm,
display_permissions_unix(fperm),
display_permissions_unix(fperm as mode_t, false),
);
}
Ok(())
@ -319,9 +320,9 @@ impl Chmoder {
"failed to change mode of file '{}' from {:o} ({}) to {:o} ({})",
file.display(),
fperm,
display_permissions_unix(fperm),
display_permissions_unix(fperm as mode_t, false),
mode,
display_permissions_unix(mode)
display_permissions_unix(mode as mode_t, false)
);
}
Err(1)
@ -331,9 +332,9 @@ impl Chmoder {
"mode of '{}' changed from {:o} ({}) to {:o} ({})",
file.display(),
fperm,
display_permissions_unix(fperm),
display_permissions_unix(fperm as mode_t, false),
mode,
display_permissions_unix(mode)
display_permissions_unix(mode as mode_t, false)
);
}
Ok(())

View file

@ -1480,9 +1480,8 @@ fn display_item_long(
let _ = write!(
out,
"{}{} {}",
display_file_type(md.file_type()),
display_permissions(&md),
"{} {}",
display_permissions(&md, true),
pad_left(display_symlink_count(&md), max_links),
);
@ -1668,16 +1667,6 @@ fn display_size(len: u64, config: &Config) -> String {
}
}
fn display_file_type(file_type: FileType) -> char {
if file_type.is_dir() {
'd'
} else if file_type.is_symlink() {
'l'
} else {
'-'
}
}
#[cfg(unix)]
fn file_is_executable(md: &Metadata) -> bool {
// Mode always returns u32, but the flags might not be, based on the platform

View file

@ -1,91 +1,33 @@
use std::cmp::Ordering;
use std::collections::VecDeque;
use std::fs::{File, OpenOptions};
use std::io::SeekFrom;
use std::io::{BufRead, BufReader, BufWriter, Seek, Write};
use std::fs::OpenOptions;
use std::io::{BufWriter, Write};
use std::path::Path;
use tempdir::TempDir;
use crate::{file_to_lines_iter, FileMerger};
use super::{GlobalSettings, Line};
/// Iterator that provides sorted `T`s
pub struct ExtSortedIterator {
buffers: Vec<VecDeque<Line>>,
chunk_offsets: Vec<u64>,
max_per_chunk: usize,
chunks: usize,
tmp_dir: TempDir,
settings: GlobalSettings,
failed: bool,
pub struct ExtSortedIterator<'a> {
file_merger: FileMerger<'a>,
// Keep tmp_dir around, it is deleted when dropped.
_tmp_dir: TempDir,
}
impl Iterator for ExtSortedIterator {
impl<'a> Iterator for ExtSortedIterator<'a> {
type Item = Line;
/// # Errors
///
/// This method can fail due to issues reading intermediate sorted chunks
/// from disk
fn next(&mut self) -> Option<Self::Item> {
if self.failed {
return None;
}
// fill up any empty buffers
let mut empty = true;
for chunk_num in 0..self.chunks {
if self.buffers[chunk_num as usize].is_empty() {
let mut f = crash_if_err!(
1,
File::open(self.tmp_dir.path().join(chunk_num.to_string()))
);
crash_if_err!(1, f.seek(SeekFrom::Start(self.chunk_offsets[chunk_num])));
let bytes_read = fill_buff(
&mut self.buffers[chunk_num as usize],
f,
self.max_per_chunk,
&self.settings,
);
self.chunk_offsets[chunk_num as usize] += bytes_read as u64;
if !self.buffers[chunk_num as usize].is_empty() {
empty = false;
}
} else {
empty = false;
}
}
if empty {
return None;
}
// find the next record to write
// check is_empty() before unwrap()ing
let mut idx = 0;
for chunk_num in 0..self.chunks as usize {
if !self.buffers[chunk_num].is_empty()
&& (self.buffers[idx].is_empty()
|| super::compare_by(
self.buffers[chunk_num].front().unwrap(),
self.buffers[idx].front().unwrap(),
&self.settings,
) == Ordering::Less)
{
idx = chunk_num;
}
}
// unwrap due to checks above
let r = self.buffers[idx].pop_front().unwrap();
Some(r)
self.file_merger.next()
}
}
/// Sort (based on `compare`) the `T`s provided by `unsorted` and return an
/// iterator
///
/// # Errors
/// # Panics
///
/// This method can fail due to issues writing intermediate sorted chunks
/// This method can panic due to issues writing intermediate sorted chunks
/// to disk.
pub fn ext_sort(
unsorted: impl Iterator<Item = Line>,
@ -93,19 +35,12 @@ pub fn ext_sort(
) -> ExtSortedIterator {
let tmp_dir = crash_if_err!(1, TempDir::new_in(&settings.tmp_dir, "uutils_sort"));
let mut iter = ExtSortedIterator {
buffers: Vec::new(),
chunk_offsets: Vec::new(),
max_per_chunk: 0,
chunks: 0,
tmp_dir,
settings: settings.clone(),
failed: false,
};
let mut total_read = 0;
let mut chunk = Vec::new();
let mut chunks_read = 0;
let mut file_merger = FileMerger::new(settings);
// make the initial chunks on disk
for seq in unsorted {
let seq_size = seq.estimate_size();
@ -113,65 +48,35 @@ pub fn ext_sort(
chunk.push(seq);
if total_read + chunk.len() * std::mem::size_of::<Line>() >= settings.buffer_size {
if total_read >= settings.buffer_size && chunk.len() >= 2 {
super::sort_by(&mut chunk, &settings);
write_chunk(
settings,
&iter.tmp_dir.path().join(iter.chunks.to_string()),
&mut chunk,
);
let file_path = tmp_dir.path().join(chunks_read.to_string());
write_chunk(settings, &file_path, &mut chunk);
chunk.clear();
total_read = 0;
iter.chunks += 1;
chunks_read += 1;
file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap()))
}
}
// write the last chunk
if !chunk.is_empty() {
super::sort_by(&mut chunk, &settings);
let file_path = tmp_dir.path().join(chunks_read.to_string());
write_chunk(
settings,
&iter.tmp_dir.path().join(iter.chunks.to_string()),
&tmp_dir.path().join(chunks_read.to_string()),
&mut chunk,
);
iter.chunks += 1;
file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap()));
}
// We manually drop here to not go over our memory limit when we allocate below.
drop(chunk);
// initialize buffers for each chunk
//
// Having a right sized buffer for each chunk for smallish values seems silly to me?
//
// We will have to have the entire iter in memory sometime right?
// Set minimum to the size of the writer buffer, ~8K
const MINIMUM_READBACK_BUFFER: usize = 8200;
let right_sized_buffer = settings
.buffer_size
.checked_div(iter.chunks)
.unwrap_or(settings.buffer_size);
iter.max_per_chunk = if right_sized_buffer > MINIMUM_READBACK_BUFFER {
right_sized_buffer
} else {
MINIMUM_READBACK_BUFFER
};
iter.buffers = vec![VecDeque::new(); iter.chunks];
iter.chunk_offsets = vec![0; iter.chunks];
for chunk_num in 0..iter.chunks {
let offset = fill_buff(
&mut iter.buffers[chunk_num],
crash_if_err!(
1,
File::open(iter.tmp_dir.path().join(chunk_num.to_string()))
),
iter.max_per_chunk,
&settings,
);
iter.chunk_offsets[chunk_num] = offset as u64;
ExtSortedIterator {
file_merger,
_tmp_dir: tmp_dir,
}
iter
}
fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec<Line>) {
@ -186,29 +91,3 @@ fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec<Line>) {
}
crash_if_err!(1, buf_write.flush());
}
fn fill_buff(
vec: &mut VecDeque<Line>,
file: File,
max_bytes: usize,
settings: &GlobalSettings,
) -> usize {
let mut total_read = 0;
let mut bytes_read = 0;
for line in BufReader::new(file).split(if settings.zero_terminated {
b'\0'
} else {
b'\n'
}) {
let line_s = String::from_utf8(crash_if_err!(1, line)).unwrap();
bytes_read += line_s.len() + 1;
let deserialized = Line::new(line_s, settings);
total_read += deserialized.estimate_size();
vec.push_back(deserialized);
if total_read > max_bytes {
break;
}
}
bytes_read
}

View file

@ -32,6 +32,7 @@ use semver::Version;
use std::cmp::Ordering;
use std::collections::BinaryHeap;
use std::env;
use std::ffi::OsStr;
use std::fs::File;
use std::hash::{Hash, Hasher};
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
@ -1122,10 +1123,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
exec(files, settings)
}
fn file_to_lines_iter<'a>(
file: &str,
settings: &'a GlobalSettings,
) -> Option<impl Iterator<Item = Line> + 'a> {
fn file_to_lines_iter(
file: impl AsRef<OsStr>,
settings: &'_ GlobalSettings,
) -> Option<impl Iterator<Item = Line> + '_> {
let (reader, _) = match open(file) {
Some(x) => x,
None => return None,
@ -1190,7 +1191,7 @@ fn exec(files: Vec<String>, settings: GlobalSettings) -> i32 {
let mut lines = vec![];
// This is duplicated from fn file_to_lines_iter, but using that function directly results in a performance regression.
for (file, _) in files.iter().map(|file| open(file)).flatten() {
for (file, _) in files.iter().map(open).flatten() {
let buf_reader = BufReader::new(file);
for line in buf_reader.split(if settings.zero_terminated {
b'\0'
@ -1517,7 +1518,8 @@ fn print_sorted<T: Iterator<Item = Line>>(iter: T, settings: &GlobalSettings) {
}
// from cat.rs
fn open(path: &str) -> Option<(Box<dyn Read>, bool)> {
fn open(path: impl AsRef<OsStr>) -> Option<(Box<dyn Read>, bool)> {
let path = path.as_ref();
if path == "-" {
let stdin = stdin();
return Some((Box::new(stdin) as Box<dyn Read>, is_stdin_interactive()));
@ -1526,7 +1528,7 @@ fn open(path: &str) -> Option<(Box<dyn Read>, bool)> {
match File::open(Path::new(path)) {
Ok(f) => Some((Box::new(f) as Box<dyn Read>, false)),
Err(e) => {
show_error!("{0}: {1}", path, e.to_string());
show_error!("{0:?}: {1}", path, e.to_string());
None
}
}

View file

@ -13,11 +13,11 @@ extern crate uucore;
mod platform;
use clap::{App, Arg};
use std::char;
use std::env;
use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write};
use std::path::Path;
use std::{char, fs::remove_file};
static NAME: &str = "split";
static VERSION: &str = env!("CARGO_PKG_VERSION");
@ -213,107 +213,145 @@ struct Settings {
verbose: bool,
}
struct SplitControl {
current_line: String, // Don't touch
request_new_file: bool, // Splitter implementation requests new file
}
trait Splitter {
// Consume the current_line and return the consumed string
fn consume(&mut self, _: &mut SplitControl) -> String;
// Consume as much as possible from `reader` so as to saturate `writer`.
// Equivalent to finishing one of the part files. Returns the number of
// bytes that have been moved.
fn consume(
&mut self,
reader: &mut BufReader<Box<dyn Read>>,
writer: &mut BufWriter<Box<dyn Write>>,
) -> u128;
}
struct LineSplitter {
saved_lines_to_write: usize,
lines_to_write: usize,
lines_per_split: usize,
}
impl LineSplitter {
fn new(settings: &Settings) -> LineSplitter {
let n = match settings.strategy_param.parse() {
Ok(a) => a,
Err(e) => crash!(1, "invalid number of lines: {}", e),
};
LineSplitter {
saved_lines_to_write: n,
lines_to_write: n,
lines_per_split: settings
.strategy_param
.parse()
.unwrap_or_else(|e| crash!(1, "invalid number of lines: {}", e)),
}
}
}
impl Splitter for LineSplitter {
fn consume(&mut self, control: &mut SplitControl) -> String {
self.lines_to_write -= 1;
if self.lines_to_write == 0 {
self.lines_to_write = self.saved_lines_to_write;
control.request_new_file = true;
fn consume(
&mut self,
reader: &mut BufReader<Box<dyn Read>>,
writer: &mut BufWriter<Box<dyn Write>>,
) -> u128 {
let mut bytes_consumed = 0u128;
let mut buffer = String::with_capacity(1024);
for _ in 0..self.lines_per_split {
let bytes_read = reader
.read_line(&mut buffer)
.unwrap_or_else(|_| crash!(1, "error reading bytes from input file"));
// If we ever read 0 bytes then we know we've hit EOF.
if bytes_read == 0 {
return bytes_consumed;
}
writer
.write_all(buffer.as_bytes())
.unwrap_or_else(|_| crash!(1, "error writing bytes to output file"));
// Empty out the String buffer since `read_line` appends instead of
// replaces.
buffer.clear();
bytes_consumed += bytes_read as u128;
}
control.current_line.clone()
bytes_consumed
}
}
struct ByteSplitter {
saved_bytes_to_write: usize,
bytes_to_write: usize,
break_on_line_end: bool,
require_whole_line: bool,
bytes_per_split: u128,
}
impl ByteSplitter {
fn new(settings: &Settings) -> ByteSplitter {
let mut strategy_param: Vec<char> = settings.strategy_param.chars().collect();
let suffix = strategy_param.pop().unwrap();
let multiplier = match suffix {
'0'..='9' => 1usize,
'b' => 512usize,
'k' => 1024usize,
'm' => 1024usize * 1024usize,
_ => crash!(1, "invalid number of bytes"),
};
let n = if suffix.is_alphabetic() {
match strategy_param
.iter()
.cloned()
.collect::<String>()
.parse::<usize>()
{
Ok(a) => a,
Err(e) => crash!(1, "invalid number of bytes: {}", e),
}
} else {
match settings.strategy_param.parse::<usize>() {
Ok(a) => a,
Err(e) => crash!(1, "invalid number of bytes: {}", e),
}
};
// These multipliers are the same as supported by GNU coreutils.
let modifiers: Vec<(&str, u128)> = vec![
("K", 1024u128),
("M", 1024 * 1024),
("G", 1024 * 1024 * 1024),
("T", 1024 * 1024 * 1024 * 1024),
("P", 1024 * 1024 * 1024 * 1024 * 1024),
("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
("KB", 1000),
("MB", 1000 * 1000),
("GB", 1000 * 1000 * 1000),
("TB", 1000 * 1000 * 1000 * 1000),
("PB", 1000 * 1000 * 1000 * 1000 * 1000),
("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
];
// This sequential find is acceptable since none of the modifiers are
// suffixes of any other modifiers, a la Huffman codes.
let (suffix, multiplier) = modifiers
.iter()
.find(|(suffix, _)| settings.strategy_param.ends_with(suffix))
.unwrap_or(&("", 1));
// Try to parse the actual numeral.
let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())]
.parse::<u128>()
.unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e));
ByteSplitter {
saved_bytes_to_write: n * multiplier,
bytes_to_write: n * multiplier,
break_on_line_end: settings.strategy == "b",
require_whole_line: false,
bytes_per_split: n * multiplier,
}
}
}
impl Splitter for ByteSplitter {
fn consume(&mut self, control: &mut SplitControl) -> String {
let line = control.current_line.clone();
let n = std::cmp::min(line.chars().count(), self.bytes_to_write);
if self.require_whole_line && n < line.chars().count() {
self.bytes_to_write = self.saved_bytes_to_write;
control.request_new_file = true;
self.require_whole_line = false;
return "".to_owned();
fn consume(
&mut self,
reader: &mut BufReader<Box<dyn Read>>,
writer: &mut BufWriter<Box<dyn Write>>,
) -> u128 {
// We buffer reads and writes. We proceed until `bytes_consumed` is
// equal to `self.bytes_per_split` or we reach EOF.
let mut bytes_consumed = 0u128;
const BUFFER_SIZE: usize = 1024;
let mut buffer = [0u8; BUFFER_SIZE];
while bytes_consumed < self.bytes_per_split {
// Don't overshoot `self.bytes_per_split`! Note: Using std::cmp::min
// doesn't really work since we have to get types to match which
// can't be done in a way that keeps all conversions safe.
let bytes_desired = if (BUFFER_SIZE as u128) <= self.bytes_per_split - bytes_consumed {
BUFFER_SIZE
} else {
// This is a safe conversion since the difference must be less
// than BUFFER_SIZE in this branch.
(self.bytes_per_split - bytes_consumed) as usize
};
let bytes_read = reader
.read(&mut buffer[0..bytes_desired])
.unwrap_or_else(|_| crash!(1, "error reading bytes from input file"));
// If we ever read 0 bytes then we know we've hit EOF.
if bytes_read == 0 {
return bytes_consumed;
}
writer
.write_all(&buffer[0..bytes_read])
.unwrap_or_else(|_| crash!(1, "error writing bytes to output file"));
bytes_consumed += bytes_read as u128;
}
self.bytes_to_write -= n;
if n == 0 {
self.bytes_to_write = self.saved_bytes_to_write;
control.request_new_file = true;
}
if self.break_on_line_end && n == line.chars().count() {
self.require_whole_line = self.break_on_line_end;
}
line[..n].to_owned()
bytes_consumed
}
}
@ -353,14 +391,13 @@ fn split(settings: &Settings) -> i32 {
let mut reader = BufReader::new(if settings.input == "-" {
Box::new(stdin()) as Box<dyn Read>
} else {
let r = match File::open(Path::new(&settings.input)) {
Ok(a) => a,
Err(_) => crash!(
let r = File::open(Path::new(&settings.input)).unwrap_or_else(|_| {
crash!(
1,
"cannot open '{}' for reading: No such file or directory",
settings.input
),
};
)
});
Box::new(r) as Box<dyn Read>
});
@ -370,48 +407,39 @@ fn split(settings: &Settings) -> i32 {
a => crash!(1, "strategy {} not supported", a),
};
let mut control = SplitControl {
current_line: "".to_owned(), // Request new line
request_new_file: true, // Request new file
};
let mut writer = BufWriter::new(Box::new(stdout()) as Box<dyn Write>);
let mut fileno = 0;
loop {
if control.current_line.chars().count() == 0 {
match reader.read_line(&mut control.current_line) {
Ok(0) | Err(_) => break,
_ => {}
// Get a new part file set up, and construct `writer` for it.
let mut filename = settings.prefix.clone();
filename.push_str(
if settings.numeric_suffix {
num_prefix(fileno, settings.suffix_length)
} else {
str_prefix(fileno, settings.suffix_length)
}
}
if control.request_new_file {
let mut filename = settings.prefix.clone();
filename.push_str(
if settings.numeric_suffix {
num_prefix(fileno, settings.suffix_length)
} else {
str_prefix(fileno, settings.suffix_length)
}
.as_ref(),
);
filename.push_str(settings.additional_suffix.as_ref());
.as_ref(),
);
filename.push_str(settings.additional_suffix.as_ref());
let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
crash_if_err!(1, writer.flush());
fileno += 1;
writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
control.request_new_file = false;
if settings.verbose {
println!("creating file '{}'", filename);
let bytes_consumed = splitter.consume(&mut reader, &mut writer);
writer
.flush()
.unwrap_or_else(|e| crash!(1, "error flushing to output file: {}", e));
// If we didn't write anything we should clean up the empty file, and
// break from the loop.
if bytes_consumed == 0 {
// The output file is only ever created if --filter isn't used.
// Complicated, I know...
if settings.filter.is_none() {
remove_file(filename)
.unwrap_or_else(|e| crash!(1, "error removing empty file: {}", e));
}
break;
}
let consumed = splitter.consume(&mut control);
crash_if_err!(1, writer.write_all(consumed.as_bytes()));
let advance = consumed.chars().count();
let clone = control.current_line.clone();
let sl = clone;
control.current_line = sl[advance..sl.chars().count()].to_owned();
fileno += 1;
}
0
}

View file

@ -17,7 +17,7 @@ path = "src/stat.rs"
[dependencies]
clap = "2.33"
time = "0.1.40"
uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc"] }
uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc", "fs"] }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
[[bin]]

View file

@ -41,13 +41,6 @@ impl BirthTime for Metadata {
}
}
#[macro_export]
macro_rules! has {
($mode:expr, $perm:expr) => {
$mode & $perm != 0
};
}
pub fn pretty_time(sec: i64, nsec: i64) -> String {
// sec == seconds since UNIX_EPOCH
// nsec == nanoseconds since (UNIX_EPOCH + sec)
@ -81,65 +74,6 @@ pub fn pretty_filetype<'a>(mode: mode_t, size: u64) -> &'a str {
}
}
pub fn pretty_access(mode: mode_t) -> String {
let mut result = String::with_capacity(10);
result.push(match mode & S_IFMT {
S_IFDIR => 'd',
S_IFCHR => 'c',
S_IFBLK => 'b',
S_IFREG => '-',
S_IFIFO => 'p',
S_IFLNK => 'l',
S_IFSOCK => 's',
// TODO: Other file types
_ => '?',
});
result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' });
result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' });
result.push(if has!(mode, S_ISUID as mode_t) {
if has!(mode, S_IXUSR) {
's'
} else {
'S'
}
} else if has!(mode, S_IXUSR) {
'x'
} else {
'-'
});
result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' });
result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' });
result.push(if has!(mode, S_ISGID as mode_t) {
if has!(mode, S_IXGRP) {
's'
} else {
'S'
}
} else if has!(mode, S_IXGRP) {
'x'
} else {
'-'
});
result.push(if has!(mode, S_IROTH) { 'r' } else { '-' });
result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' });
result.push(if has!(mode, S_ISVTX as mode_t) {
if has!(mode, S_IXOTH) {
't'
} else {
'T'
}
} else if has!(mode, S_IXOTH) {
'x'
} else {
'-'
});
result
}
use std::borrow::Cow;
use std::convert::{AsRef, From};
use std::ffi::CString;

View file

@ -7,13 +7,13 @@
// spell-checker:ignore (ToDO) mtab fsext showfs otype fmtstr prec ftype blocksize nlink rdev fnodes fsid namelen blksize inodes fstype iosize statfs gnulib NBLOCKSIZE
#[macro_use]
mod fsext;
pub use crate::fsext::*;
#[macro_use]
extern crate uucore;
use uucore::entries;
use uucore::fs::display_permissions;
use clap::{App, Arg, ArgMatches};
use std::borrow::Cow;
@ -575,7 +575,7 @@ impl Stater {
}
// access rights in human readable form
'A' => {
arg = pretty_access(meta.mode() as mode_t);
arg = display_permissions(&meta, true);
otype = OutputType::Str;
}
// number of blocks allocated (see %B)

View file

@ -12,8 +12,10 @@ extern crate uucore;
mod count_bytes;
mod countable;
mod wordcount;
use count_bytes::count_bytes_fast;
use countable::WordCountable;
use wordcount::{TitledWordCount, WordCount};
use clap::{App, Arg, ArgMatches};
use thiserror::Error;
@ -21,9 +23,7 @@ use thiserror::Error;
use std::cmp::max;
use std::fs::File;
use std::io::{self, Write};
use std::ops::{Add, AddAssign};
use std::path::Path;
use std::str::from_utf8;
#[derive(Error, Debug)]
pub enum WcError {
@ -82,51 +82,6 @@ impl Settings {
}
}
#[derive(Debug, Default, Copy, Clone)]
struct WordCount {
bytes: usize,
chars: usize,
lines: usize,
words: usize,
max_line_length: usize,
}
impl Add for WordCount {
type Output = Self;
fn add(self, other: Self) -> Self {
Self {
bytes: self.bytes + other.bytes,
chars: self.chars + other.chars,
lines: self.lines + other.lines,
words: self.words + other.words,
max_line_length: max(self.max_line_length, other.max_line_length),
}
}
}
impl AddAssign for WordCount {
fn add_assign(&mut self, other: Self) {
*self = *self + other
}
}
impl WordCount {
fn with_title(self, title: &str) -> TitledWordCount {
TitledWordCount { title, count: self }
}
}
/// This struct supplements the actual word count with a title that is displayed
/// to the user at the end of the program.
/// The reason we don't simply include title in the `WordCount` struct is that
/// it would result in unneccesary copying of `String`.
#[derive(Debug, Default, Clone)]
struct TitledWordCount<'a> {
title: &'a str,
count: WordCount,
}
static ABOUT: &str = "Display newline, word, and byte counts for each FILE, and a total line if
more than one FILE is specified.";
static VERSION: &str = env!("CARGO_PKG_VERSION");
@ -207,18 +162,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
}
}
const CR: u8 = b'\r';
const LF: u8 = b'\n';
const SPACE: u8 = b' ';
const TAB: u8 = b'\t';
const SYN: u8 = 0x16_u8;
const FF: u8 = 0x0C_u8;
#[inline(always)]
fn is_word_separator(byte: u8) -> bool {
byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF
}
fn word_count_from_reader<T: WordCountable>(
mut reader: T,
settings: &Settings,
@ -239,58 +182,20 @@ fn word_count_from_reader<T: WordCountable>(
// we do not need to decode the byte stream if we're only counting bytes/newlines
let decode_chars = settings.show_chars || settings.show_words || settings.show_max_line_length;
let mut line_count: usize = 0;
let mut word_count: usize = 0;
let mut byte_count: usize = 0;
let mut char_count: usize = 0;
let mut longest_line_length: usize = 0;
let mut ends_lf: bool;
// reading from a TTY seems to raise a condition on, rather than return Some(0) like a file.
// hence the option wrapped in a result here
for line_result in reader.lines() {
let raw_line = match line_result {
Ok(l) => l,
// Sum the WordCount for each line. Show a warning for each line
// that results in an IO error when trying to read it.
let total = reader
.lines()
.filter_map(|res| match res {
Ok(line) => Some(line),
Err(e) => {
show_warning!("Error while reading {}: {}", path, e);
continue;
None
}
};
// GNU 'wc' only counts lines that end in LF as lines
ends_lf = *raw_line.last().unwrap() == LF;
line_count += ends_lf as usize;
byte_count += raw_line.len();
if decode_chars {
// try and convert the bytes to UTF-8 first
let current_char_count;
match from_utf8(&raw_line[..]) {
Ok(line) => {
word_count += line.split_whitespace().count();
current_char_count = line.chars().count();
}
Err(..) => {
word_count += raw_line.split(|&x| is_word_separator(x)).count();
current_char_count = raw_line.iter().filter(|c| c.is_ascii()).count()
}
}
char_count += current_char_count;
if current_char_count > longest_line_length {
// -L is a GNU 'wc' extension so same behavior on LF
longest_line_length = current_char_count - (ends_lf as usize);
}
}
}
Ok(WordCount {
bytes: byte_count,
chars: char_count,
lines: line_count,
words: word_count,
max_line_length: longest_line_length,
})
})
.map(|line| WordCount::from_line(&line, decode_chars))
.sum();
Ok(total)
}
fn word_count_from_path(path: &str, settings: &Settings) -> WcResult<WordCount> {
@ -323,7 +228,12 @@ fn wc(files: Vec<String>, settings: &Settings) -> Result<(), u32> {
error_count += 1;
WordCount::default()
});
max_width = max(max_width, word_count.bytes.to_string().len() + 1);
// Compute the number of digits needed to display the number
// of bytes in the file. Even if the settings indicate that we
// won't *display* the number of bytes, we still use the
// number of digits in the byte count as the width when
// formatting each count as a string for output.
max_width = max(max_width, word_count.bytes.to_string().len());
total_word_count += word_count;
results.push(word_count.with_title(path));
}
@ -364,19 +274,40 @@ fn print_stats(
min_width = 0;
}
let mut is_first: bool = true;
if settings.show_lines {
if !is_first {
write!(stdout_lock, " ")?;
}
write!(stdout_lock, "{:1$}", result.count.lines, min_width)?;
is_first = false;
}
if settings.show_words {
if !is_first {
write!(stdout_lock, " ")?;
}
write!(stdout_lock, "{:1$}", result.count.words, min_width)?;
is_first = false;
}
if settings.show_bytes {
if !is_first {
write!(stdout_lock, " ")?;
}
write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?;
is_first = false;
}
if settings.show_chars {
if !is_first {
write!(stdout_lock, " ")?;
}
write!(stdout_lock, "{:1$}", result.count.chars, min_width)?;
is_first = false;
}
if settings.show_max_line_length {
if !is_first {
write!(stdout_lock, " ")?;
}
write!(
stdout_lock,
"{:1$}",

131
src/uu/wc/src/wordcount.rs Normal file
View file

@ -0,0 +1,131 @@
use std::cmp::max;
use std::iter::Sum;
use std::ops::{Add, AddAssign};
use std::str::from_utf8;
const CR: u8 = b'\r';
const LF: u8 = b'\n';
const SPACE: u8 = b' ';
const TAB: u8 = b'\t';
const SYN: u8 = 0x16_u8;
const FF: u8 = 0x0C_u8;
#[inline(always)]
fn is_word_separator(byte: u8) -> bool {
byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF
}
#[derive(Debug, Default, Copy, Clone)]
pub struct WordCount {
pub bytes: usize,
pub chars: usize,
pub lines: usize,
pub words: usize,
pub max_line_length: usize,
}
impl Add for WordCount {
type Output = Self;
fn add(self, other: Self) -> Self {
Self {
bytes: self.bytes + other.bytes,
chars: self.chars + other.chars,
lines: self.lines + other.lines,
words: self.words + other.words,
max_line_length: max(self.max_line_length, other.max_line_length),
}
}
}
impl AddAssign for WordCount {
fn add_assign(&mut self, other: Self) {
*self = *self + other
}
}
impl Sum for WordCount {
fn sum<I>(iter: I) -> WordCount
where
I: Iterator<Item = WordCount>,
{
iter.fold(WordCount::default(), |acc, x| acc + x)
}
}
impl WordCount {
/// Count the characters and whitespace-separated words in the given bytes.
///
/// `line` is a slice of bytes that will be decoded as ASCII characters.
fn ascii_word_and_char_count(line: &[u8]) -> (usize, usize) {
let word_count = line.split(|&x| is_word_separator(x)).count();
let char_count = line.iter().filter(|c| c.is_ascii()).count();
(word_count, char_count)
}
/// Create a [`WordCount`] from a sequence of bytes representing a line.
///
/// If the last byte of `line` encodes a newline character (`\n`),
/// then the [`lines`] field will be set to 1. Otherwise, it will
/// be set to 0. The [`bytes`] field is simply the length of
/// `line`.
///
/// If `decode_chars` is `false`, the [`chars`] and [`words`]
/// fields will be set to 0. If it is `true`, this function will
/// attempt to decode the bytes first as UTF-8, and failing that,
/// as ASCII.
pub fn from_line(line: &[u8], decode_chars: bool) -> WordCount {
// GNU 'wc' only counts lines that end in LF as lines
let lines = (*line.last().unwrap() == LF) as usize;
let bytes = line.len();
let (words, chars) = if decode_chars {
WordCount::word_and_char_count(line)
} else {
(0, 0)
};
// -L is a GNU 'wc' extension so same behavior on LF
let max_line_length = if chars > 0 { chars - lines } else { 0 };
WordCount {
bytes,
chars,
lines,
words,
max_line_length,
}
}
/// Count the UTF-8 characters and words in the given string slice.
///
/// `s` is a string slice that is assumed to be a UTF-8 string.
fn utf8_word_and_char_count(s: &str) -> (usize, usize) {
let word_count = s.split_whitespace().count();
let char_count = s.chars().count();
(word_count, char_count)
}
pub fn with_title(self, title: &str) -> TitledWordCount {
TitledWordCount { title, count: self }
}
/// Count the characters and words in the given slice of bytes.
///
/// `line` is a slice of bytes that will be decoded as UTF-8
/// characters, or if that fails, as ASCII characters.
fn word_and_char_count(line: &[u8]) -> (usize, usize) {
// try and convert the bytes to UTF-8 first
match from_utf8(line) {
Ok(s) => WordCount::utf8_word_and_char_count(s),
Err(..) => WordCount::ascii_word_and_char_count(line),
}
}
}
/// This struct supplements the actual word count with a title that is displayed
/// to the user at the end of the program.
/// The reason we don't simply include title in the `WordCount` struct is that
/// it would result in unneccesary copying of `String`.
#[derive(Debug, Default, Clone)]
pub struct TitledWordCount<'a> {
pub title: &'a str,
pub count: WordCount,
}

View file

@ -8,8 +8,9 @@
#[cfg(unix)]
use libc::{
mode_t, S_IRGRP, S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR,
S_IXGRP, S_IXOTH, S_IXUSR,
mode_t, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, S_IRGRP,
S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, S_IXGRP, S_IXOTH,
S_IXUSR,
};
use std::borrow::Cow;
use std::env;
@ -23,9 +24,10 @@ use std::os::unix::fs::MetadataExt;
use std::path::{Component, Path, PathBuf};
#[cfg(unix)]
#[macro_export]
macro_rules! has {
($mode:expr, $perm:expr) => {
$mode & ($perm as u32) != 0
$mode & $perm != 0
};
}
@ -240,22 +242,42 @@ pub fn is_stderr_interactive() -> bool {
#[cfg(not(unix))]
#[allow(unused_variables)]
pub fn display_permissions(metadata: &fs::Metadata) -> String {
pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String {
if display_file_type {
return String::from("----------");
}
String::from("---------")
}
#[cfg(unix)]
pub fn display_permissions(metadata: &fs::Metadata) -> String {
pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String {
let mode: mode_t = metadata.mode() as mode_t;
display_permissions_unix(mode as u32)
display_permissions_unix(mode, display_file_type)
}
#[cfg(unix)]
pub fn display_permissions_unix(mode: u32) -> String {
let mut result = String::with_capacity(9);
pub fn display_permissions_unix(mode: mode_t, display_file_type: bool) -> String {
let mut result;
if display_file_type {
result = String::with_capacity(10);
result.push(match mode & S_IFMT {
S_IFDIR => 'd',
S_IFCHR => 'c',
S_IFBLK => 'b',
S_IFREG => '-',
S_IFIFO => 'p',
S_IFLNK => 'l',
S_IFSOCK => 's',
// TODO: Other file types
_ => '?',
});
} else {
result = String::with_capacity(9);
}
result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' });
result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' });
result.push(if has!(mode, S_ISUID) {
result.push(if has!(mode, S_ISUID as mode_t) {
if has!(mode, S_IXUSR) {
's'
} else {
@ -269,7 +291,7 @@ pub fn display_permissions_unix(mode: u32) -> String {
result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' });
result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' });
result.push(if has!(mode, S_ISGID) {
result.push(if has!(mode, S_ISGID as mode_t) {
if has!(mode, S_IXGRP) {
's'
} else {
@ -283,7 +305,7 @@ pub fn display_permissions_unix(mode: u32) -> String {
result.push(if has!(mode, S_IROTH) { 'r' } else { '-' });
result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' });
result.push(if has!(mode, S_ISVTX) {
result.push(if has!(mode, S_ISVTX as mode_t) {
if has!(mode, S_IXOTH) {
't'
} else {
@ -355,4 +377,57 @@ mod tests {
);
}
}
#[cfg(unix)]
#[test]
fn test_display_permissions() {
assert_eq!(
"drwxr-xr-x",
display_permissions_unix(S_IFDIR | 0o755, true)
);
assert_eq!(
"rwxr-xr-x",
display_permissions_unix(S_IFDIR | 0o755, false)
);
assert_eq!(
"-rw-r--r--",
display_permissions_unix(S_IFREG | 0o644, true)
);
assert_eq!(
"srw-r-----",
display_permissions_unix(S_IFSOCK | 0o640, true)
);
assert_eq!(
"lrw-r-xr-x",
display_permissions_unix(S_IFLNK | 0o655, true)
);
assert_eq!("?rw-r-xr-x", display_permissions_unix(0o655, true));
assert_eq!(
"brwSr-xr-x",
display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o655, true)
);
assert_eq!(
"brwsr-xr-x",
display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o755, true)
);
assert_eq!(
"prw---sr--",
display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o614, true)
);
assert_eq!(
"prw---Sr--",
display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o604, true)
);
assert_eq!(
"c---r-xr-t",
display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o055, true)
);
assert_eq!(
"c---r-xr-T",
display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o054, true)
);
}
}

View file

@ -27,7 +27,7 @@ fn test_df_output() {
stdout_only("Filesystem Size Used Available Capacity Use% Mounted on \n");
} else {
new_ucmd!().arg("-H").arg("-total").succeeds().stdout_only(
"Filesystem Size Used Available Use% Mounted on \n"
"Filesystem Size Used Available Use% Mounted on \n",
);
}
}

View file

@ -53,7 +53,15 @@ fn _du_basics_subdir(s: &str) {
fn _du_basics_subdir(s: &str) {
assert_eq!(s, "0\tsubdir/deeper\n");
}
#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
#[cfg(target_os = "freebsd")]
fn _du_basics_subdir(s: &str) {
assert_eq!(s, "8\tsubdir/deeper\n");
}
#[cfg(all(
not(target_vendor = "apple"),
not(target_os = "windows"),
not(target_os = "freebsd")
))]
fn _du_basics_subdir(s: &str) {
// MS-WSL linux has altered expected output
if !uucore::os::is_wsl_1() {
@ -100,7 +108,15 @@ fn _du_soft_link(s: &str) {
fn _du_soft_link(s: &str) {
assert_eq!(s, "8\tsubdir/links\n");
}
#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
#[cfg(target_os = "freebsd")]
fn _du_soft_link(s: &str) {
assert_eq!(s, "16\tsubdir/links\n");
}
#[cfg(all(
not(target_vendor = "apple"),
not(target_os = "windows"),
not(target_os = "freebsd")
))]
fn _du_soft_link(s: &str) {
// MS-WSL linux has altered expected output
if !uucore::os::is_wsl_1() {
@ -141,7 +157,15 @@ fn _du_hard_link(s: &str) {
fn _du_hard_link(s: &str) {
assert_eq!(s, "8\tsubdir/links\n")
}
#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
#[cfg(target_os = "freebsd")]
fn _du_hard_link(s: &str) {
assert_eq!(s, "16\tsubdir/links\n")
}
#[cfg(all(
not(target_vendor = "apple"),
not(target_os = "windows"),
not(target_os = "freebsd")
))]
fn _du_hard_link(s: &str) {
// MS-WSL linux has altered expected output
if !uucore::os::is_wsl_1() {
@ -181,7 +205,15 @@ fn _du_d_flag(s: &str) {
fn _du_d_flag(s: &str) {
assert_eq!(s, "8\t./subdir\n8\t./\n");
}
#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
#[cfg(target_os = "freebsd")]
fn _du_d_flag(s: &str) {
assert_eq!(s, "28\t./subdir\n36\t./\n");
}
#[cfg(all(
not(target_vendor = "apple"),
not(target_os = "windows"),
not(target_os = "freebsd")
))]
fn _du_d_flag(s: &str) {
// MS-WSL linux has altered expected output
if !uucore::os::is_wsl_1() {

View file

@ -4,11 +4,15 @@ extern crate regex;
use self::rand::{thread_rng, Rng};
use self::regex::Regex;
use crate::common::util::*;
use rand::SeedableRng;
#[cfg(not(windows))]
use std::env;
use std::fs::{read_dir, File};
use std::io::Write;
use std::path::Path;
use std::{
fs::{read_dir, File},
io::BufWriter,
};
fn random_chars(n: usize) -> String {
thread_rng()
@ -58,7 +62,7 @@ impl Glob {
files.sort();
let mut data: Vec<u8> = vec![];
for name in &files {
data.extend(self.directory.read(name).into_bytes());
data.extend(self.directory.read_bytes(name));
}
data
}
@ -81,20 +85,30 @@ impl RandomFile {
}
fn add_bytes(&mut self, bytes: usize) {
let chunk_size: usize = if bytes >= 1024 { 1024 } else { bytes };
let mut n = bytes;
while n > chunk_size {
let _ = write!(self.inner, "{}", random_chars(chunk_size));
n -= chunk_size;
// Note that just writing random characters isn't enough to cover all
// cases. We need truly random bytes.
let mut writer = BufWriter::new(&self.inner);
// Seed the rng so as to avoid spurious test failures.
let mut rng = rand::rngs::StdRng::seed_from_u64(123);
let mut buffer = [0; 1024];
let mut remaining_size = bytes;
while remaining_size > 0 {
let to_write = std::cmp::min(remaining_size, buffer.len());
let buf = &mut buffer[..to_write];
rng.fill(buf);
writer.write(buf).unwrap();
remaining_size -= to_write;
}
let _ = write!(self.inner, "{}", random_chars(n));
}
/// Add n lines each of size `RandomFile::LINESIZE`
fn add_lines(&mut self, lines: usize) {
let mut n = lines;
while n > 0 {
let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE));
writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE)).unwrap();
n -= 1;
}
}
@ -104,18 +118,18 @@ impl RandomFile {
fn test_split_default() {
let (at, mut ucmd) = at_and_ucmd!();
let name = "split_default";
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
RandomFile::new(&at, name).add_lines(2000);
ucmd.args(&[name]).succeeds();
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
assert_eq!(glob.count(), 2);
assert_eq!(glob.collate(), at.read(name).into_bytes());
assert_eq!(glob.collate(), at.read_bytes(name));
}
#[test]
fn test_split_numeric_prefixed_chunks_by_bytes() {
let (at, mut ucmd) = at_and_ucmd!();
let name = "split_num_prefixed_chunks_by_bytes";
let glob = Glob::new(&at, ".", r"a\d\d$");
RandomFile::new(&at, name).add_bytes(10000);
ucmd.args(&[
"-d", // --numeric-suffixes
@ -123,52 +137,89 @@ fn test_split_numeric_prefixed_chunks_by_bytes() {
"1000", name, "a",
])
.succeeds();
let glob = Glob::new(&at, ".", r"a\d\d$");
assert_eq!(glob.count(), 10);
assert_eq!(glob.collate(), at.read(name).into_bytes());
for filename in glob.collect() {
assert_eq!(glob.directory.metadata(&filename).len(), 1000);
}
assert_eq!(glob.collate(), at.read_bytes(name));
}
#[test]
fn test_split_str_prefixed_chunks_by_bytes() {
let (at, mut ucmd) = at_and_ucmd!();
let name = "split_str_prefixed_chunks_by_bytes";
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
RandomFile::new(&at, name).add_bytes(10000);
// Important that this is less than 1024 since that's our internal buffer
// size. Good to test that we don't overshoot.
ucmd.args(&["-b", "1000", name, "b"]).succeeds();
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
assert_eq!(glob.count(), 10);
assert_eq!(glob.collate(), at.read(name).into_bytes());
for filename in glob.collect() {
assert_eq!(glob.directory.metadata(&filename).len(), 1000);
}
assert_eq!(glob.collate(), at.read_bytes(name));
}
// This is designed to test what happens when the desired part size is not a
// multiple of the buffer size and we hopefully don't overshoot the desired part
// size.
#[test]
fn test_split_bytes_prime_part_size() {
let (at, mut ucmd) = at_and_ucmd!();
let name = "test_split_bytes_prime_part_size";
RandomFile::new(&at, name).add_bytes(10000);
// 1753 is prime and greater than the buffer size, 1024.
ucmd.args(&["-b", "1753", name, "b"]).succeeds();
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
assert_eq!(glob.count(), 6);
let mut fns = glob.collect();
// glob.collect() is not guaranteed to return in sorted order, so we sort.
fns.sort();
for i in 0..5 {
assert_eq!(glob.directory.metadata(&fns[i]).len(), 1753);
}
assert_eq!(glob.directory.metadata(&fns[5]).len(), 1235);
assert_eq!(glob.collate(), at.read_bytes(name));
}
#[test]
fn test_split_num_prefixed_chunks_by_lines() {
let (at, mut ucmd) = at_and_ucmd!();
let name = "split_num_prefixed_chunks_by_lines";
let glob = Glob::new(&at, ".", r"c\d\d$");
RandomFile::new(&at, name).add_lines(10000);
ucmd.args(&["-d", "-l", "1000", name, "c"]).succeeds();
let glob = Glob::new(&at, ".", r"c\d\d$");
assert_eq!(glob.count(), 10);
assert_eq!(glob.collate(), at.read(name).into_bytes());
assert_eq!(glob.collate(), at.read_bytes(name));
}
#[test]
fn test_split_str_prefixed_chunks_by_lines() {
let (at, mut ucmd) = at_and_ucmd!();
let name = "split_str_prefixed_chunks_by_lines";
let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$");
RandomFile::new(&at, name).add_lines(10000);
ucmd.args(&["-l", "1000", name, "d"]).succeeds();
let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$");
assert_eq!(glob.count(), 10);
assert_eq!(glob.collate(), at.read(name).into_bytes());
assert_eq!(glob.collate(), at.read_bytes(name));
}
#[test]
fn test_split_additional_suffix() {
let (at, mut ucmd) = at_and_ucmd!();
let name = "split_additional_suffix";
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$");
RandomFile::new(&at, name).add_lines(2000);
ucmd.args(&["--additional-suffix", ".txt", name]).succeeds();
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$");
assert_eq!(glob.count(), 2);
assert_eq!(glob.collate(), at.read(name).into_bytes());
assert_eq!(glob.collate(), at.read_bytes(name));
}
// note: the test_filter* tests below are unix-only
@ -182,15 +233,16 @@ fn test_filter() {
// like `test_split_default()` but run a command before writing
let (at, mut ucmd) = at_and_ucmd!();
let name = "filtered";
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
let n_lines = 3;
RandomFile::new(&at, name).add_lines(n_lines);
// change all characters to 'i'
ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
.succeeds();
// assert all characters are 'i' / no character is not 'i'
// (assert that command succeded)
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
assert!(
glob.collate().iter().find(|&&c| {
// is not i
@ -209,7 +261,6 @@ fn test_filter_with_env_var_set() {
// implemented like `test_split_default()` but run a command before writing
let (at, mut ucmd) = at_and_ucmd!();
let name = "filtered";
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
let n_lines = 3;
RandomFile::new(&at, name).add_lines(n_lines);
@ -217,7 +268,9 @@ fn test_filter_with_env_var_set() {
env::set_var("FILE", &env_var_value);
ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
.succeeds();
assert_eq!(glob.collate(), at.read(name).into_bytes());
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
assert_eq!(glob.collate(), at.read_bytes(name));
assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value);
}

View file

@ -9,42 +9,6 @@ pub use self::stat::*;
mod test_fsext {
use super::*;
#[test]
fn test_access() {
assert_eq!("drwxr-xr-x", pretty_access(S_IFDIR | 0o755));
assert_eq!("-rw-r--r--", pretty_access(S_IFREG | 0o644));
assert_eq!("srw-r-----", pretty_access(S_IFSOCK | 0o640));
assert_eq!("lrw-r-xr-x", pretty_access(S_IFLNK | 0o655));
assert_eq!("?rw-r-xr-x", pretty_access(0o655));
assert_eq!(
"brwSr-xr-x",
pretty_access(S_IFBLK | S_ISUID as mode_t | 0o655)
);
assert_eq!(
"brwsr-xr-x",
pretty_access(S_IFBLK | S_ISUID as mode_t | 0o755)
);
assert_eq!(
"prw---sr--",
pretty_access(S_IFIFO | S_ISGID as mode_t | 0o614)
);
assert_eq!(
"prw---Sr--",
pretty_access(S_IFIFO | S_ISGID as mode_t | 0o604)
);
assert_eq!(
"c---r-xr-t",
pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o055)
);
assert_eq!(
"c---r-xr-T",
pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o054)
);
}
#[test]
fn test_file_type() {
assert_eq!("block special file", pretty_filetype(S_IFBLK, 0));

View file

@ -33,7 +33,7 @@ fn test_stdin_default() {
new_ucmd!()
.pipe_in_fixture("lorem_ipsum.txt")
.run()
.stdout_is(" 13 109 772\n");
.stdout_is(" 13 109 772\n");
}
#[test]
@ -42,7 +42,7 @@ fn test_utf8() {
.args(&["-lwmcL"])
.pipe_in_fixture("UTF_8_test.txt")
.run()
.stdout_is(" 300 4969 22781 22213 79\n");
.stdout_is(" 300 4969 22781 22213 79\n");
// GNU returns " 300 2086 22219 22781 79"
// TODO: we should fix that to match GNU's behavior
}
@ -71,7 +71,7 @@ fn test_stdin_all_counts() {
.args(&["-c", "-m", "-l", "-L", "-w"])
.pipe_in_fixture("alice_in_wonderland.txt")
.run()
.stdout_is(" 5 57 302 302 66\n");
.stdout_is(" 5 57 302 302 66\n");
}
#[test]
@ -79,7 +79,7 @@ fn test_single_default() {
new_ucmd!()
.arg("moby_dick.txt")
.run()
.stdout_is(" 18 204 1115 moby_dick.txt\n");
.stdout_is(" 18 204 1115 moby_dick.txt\n");
}
#[test]
@ -95,7 +95,7 @@ fn test_single_all_counts() {
new_ucmd!()
.args(&["-c", "-l", "-L", "-m", "-w", "alice_in_wonderland.txt"])
.run()
.stdout_is(" 5 57 302 302 66 alice_in_wonderland.txt\n");
.stdout_is(" 5 57 302 302 66 alice_in_wonderland.txt\n");
}
#[test]
@ -108,64 +108,54 @@ fn test_multiple_default() {
])
.run()
.stdout_is(
" 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \
alice_in_wonderland.txt\n 36 370 2189 total\n",
" 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \
alice_in_wonderland.txt\n 36 370 2189 total\n",
);
}
/// Test for an empty file.
#[test]
fn test_file_empty() {
// TODO There is a leading space in the output that should be
// removed; see issue #2173.
new_ucmd!()
.args(&["-clmwL", "emptyfile.txt"])
.run()
.stdout_is(" 0 0 0 0 0 emptyfile.txt\n");
.stdout_is("0 0 0 0 0 emptyfile.txt\n");
}
/// Test for an file containing a single non-whitespace character
/// *without* a trailing newline.
#[test]
fn test_file_single_line_no_trailing_newline() {
// TODO There is a leading space in the output that should be
// removed; see issue #2173.
new_ucmd!()
.args(&["-clmwL", "notrailingnewline.txt"])
.run()
.stdout_is(" 1 1 2 2 1 notrailingnewline.txt\n");
.stdout_is("1 1 2 2 1 notrailingnewline.txt\n");
}
/// Test for a file that has 100 empty lines (that is, the contents of
/// the file are the newline character repeated one hundred times).
#[test]
fn test_file_many_empty_lines() {
// TODO There is a leading space in the output that should be
// removed; see issue #2173.
new_ucmd!()
.args(&["-clmwL", "manyemptylines.txt"])
.run()
.stdout_is(" 100 0 100 100 0 manyemptylines.txt\n");
.stdout_is("100 0 100 100 0 manyemptylines.txt\n");
}
/// Test for a file that has one long line comprising only spaces.
#[test]
fn test_file_one_long_line_only_spaces() {
// TODO There is a leading space in the output that should be
// removed; see issue #2173.
new_ucmd!()
.args(&["-clmwL", "onelongemptyline.txt"])
.run()
.stdout_is(" 1 0 10001 10001 10000 onelongemptyline.txt\n");
.stdout_is(" 1 0 10001 10001 10000 onelongemptyline.txt\n");
}
/// Test for a file that has one long line comprising a single "word".
#[test]
fn test_file_one_long_word() {
// TODO There is a leading space in the output that should be
// removed; see issue #2173.
new_ucmd!()
.args(&["-clmwL", "onelongword.txt"])
.run()
.stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n");
.stdout_is(" 1 1 10001 10001 10000 onelongword.txt\n");
}