mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 12:07:46 +00:00
Merge branch 'master' of https://github.com/uutils/coreutils into sort-optimize-line
This commit is contained in:
commit
1afeb55881
16 changed files with 574 additions and 565 deletions
|
@ -15,6 +15,7 @@ use std::fs;
|
||||||
use std::os::unix::fs::{MetadataExt, PermissionsExt};
|
use std::os::unix::fs::{MetadataExt, PermissionsExt};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use uucore::fs::display_permissions_unix;
|
use uucore::fs::display_permissions_unix;
|
||||||
|
use uucore::libc::mode_t;
|
||||||
#[cfg(not(windows))]
|
#[cfg(not(windows))]
|
||||||
use uucore::mode;
|
use uucore::mode;
|
||||||
use uucore::InvalidEncodingHandling;
|
use uucore::InvalidEncodingHandling;
|
||||||
|
@ -306,7 +307,7 @@ impl Chmoder {
|
||||||
"mode of '{}' retained as {:04o} ({})",
|
"mode of '{}' retained as {:04o} ({})",
|
||||||
file.display(),
|
file.display(),
|
||||||
fperm,
|
fperm,
|
||||||
display_permissions_unix(fperm),
|
display_permissions_unix(fperm as mode_t, false),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -319,9 +320,9 @@ impl Chmoder {
|
||||||
"failed to change mode of file '{}' from {:o} ({}) to {:o} ({})",
|
"failed to change mode of file '{}' from {:o} ({}) to {:o} ({})",
|
||||||
file.display(),
|
file.display(),
|
||||||
fperm,
|
fperm,
|
||||||
display_permissions_unix(fperm),
|
display_permissions_unix(fperm as mode_t, false),
|
||||||
mode,
|
mode,
|
||||||
display_permissions_unix(mode)
|
display_permissions_unix(mode as mode_t, false)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Err(1)
|
Err(1)
|
||||||
|
@ -331,9 +332,9 @@ impl Chmoder {
|
||||||
"mode of '{}' changed from {:o} ({}) to {:o} ({})",
|
"mode of '{}' changed from {:o} ({}) to {:o} ({})",
|
||||||
file.display(),
|
file.display(),
|
||||||
fperm,
|
fperm,
|
||||||
display_permissions_unix(fperm),
|
display_permissions_unix(fperm as mode_t, false),
|
||||||
mode,
|
mode,
|
||||||
display_permissions_unix(mode)
|
display_permissions_unix(mode as mode_t, false)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -1480,9 +1480,8 @@ fn display_item_long(
|
||||||
|
|
||||||
let _ = write!(
|
let _ = write!(
|
||||||
out,
|
out,
|
||||||
"{}{} {}",
|
"{} {}",
|
||||||
display_file_type(md.file_type()),
|
display_permissions(&md, true),
|
||||||
display_permissions(&md),
|
|
||||||
pad_left(display_symlink_count(&md), max_links),
|
pad_left(display_symlink_count(&md), max_links),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1668,16 +1667,6 @@ fn display_size(len: u64, config: &Config) -> String {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn display_file_type(file_type: FileType) -> char {
|
|
||||||
if file_type.is_dir() {
|
|
||||||
'd'
|
|
||||||
} else if file_type.is_symlink() {
|
|
||||||
'l'
|
|
||||||
} else {
|
|
||||||
'-'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
fn file_is_executable(md: &Metadata) -> bool {
|
fn file_is_executable(md: &Metadata) -> bool {
|
||||||
// Mode always returns u32, but the flags might not be, based on the platform
|
// Mode always returns u32, but the flags might not be, based on the platform
|
||||||
|
|
|
@ -1,91 +1,33 @@
|
||||||
use std::cmp::Ordering;
|
use std::fs::OpenOptions;
|
||||||
use std::collections::VecDeque;
|
use std::io::{BufWriter, Write};
|
||||||
use std::fs::{File, OpenOptions};
|
|
||||||
use std::io::SeekFrom;
|
|
||||||
use std::io::{BufRead, BufReader, BufWriter, Seek, Write};
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use tempdir::TempDir;
|
use tempdir::TempDir;
|
||||||
|
|
||||||
|
use crate::{file_to_lines_iter, FileMerger};
|
||||||
|
|
||||||
use super::{GlobalSettings, Line};
|
use super::{GlobalSettings, Line};
|
||||||
|
|
||||||
/// Iterator that provides sorted `T`s
|
/// Iterator that provides sorted `T`s
|
||||||
pub struct ExtSortedIterator {
|
pub struct ExtSortedIterator<'a> {
|
||||||
buffers: Vec<VecDeque<Line>>,
|
file_merger: FileMerger<'a>,
|
||||||
chunk_offsets: Vec<u64>,
|
// Keep tmp_dir around, it is deleted when dropped.
|
||||||
max_per_chunk: usize,
|
_tmp_dir: TempDir,
|
||||||
chunks: usize,
|
|
||||||
tmp_dir: TempDir,
|
|
||||||
settings: GlobalSettings,
|
|
||||||
failed: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for ExtSortedIterator {
|
impl<'a> Iterator for ExtSortedIterator<'a> {
|
||||||
type Item = Line;
|
type Item = Line;
|
||||||
|
|
||||||
/// # Errors
|
|
||||||
///
|
|
||||||
/// This method can fail due to issues reading intermediate sorted chunks
|
|
||||||
/// from disk
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
if self.failed {
|
self.file_merger.next()
|
||||||
return None;
|
|
||||||
}
|
|
||||||
// fill up any empty buffers
|
|
||||||
let mut empty = true;
|
|
||||||
for chunk_num in 0..self.chunks {
|
|
||||||
if self.buffers[chunk_num as usize].is_empty() {
|
|
||||||
let mut f = crash_if_err!(
|
|
||||||
1,
|
|
||||||
File::open(self.tmp_dir.path().join(chunk_num.to_string()))
|
|
||||||
);
|
|
||||||
crash_if_err!(1, f.seek(SeekFrom::Start(self.chunk_offsets[chunk_num])));
|
|
||||||
let bytes_read = fill_buff(
|
|
||||||
&mut self.buffers[chunk_num as usize],
|
|
||||||
f,
|
|
||||||
self.max_per_chunk,
|
|
||||||
&self.settings,
|
|
||||||
);
|
|
||||||
self.chunk_offsets[chunk_num as usize] += bytes_read as u64;
|
|
||||||
if !self.buffers[chunk_num as usize].is_empty() {
|
|
||||||
empty = false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
empty = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if empty {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
// find the next record to write
|
|
||||||
// check is_empty() before unwrap()ing
|
|
||||||
let mut idx = 0;
|
|
||||||
for chunk_num in 0..self.chunks as usize {
|
|
||||||
if !self.buffers[chunk_num].is_empty()
|
|
||||||
&& (self.buffers[idx].is_empty()
|
|
||||||
|| super::compare_by(
|
|
||||||
self.buffers[chunk_num].front().unwrap(),
|
|
||||||
self.buffers[idx].front().unwrap(),
|
|
||||||
&self.settings,
|
|
||||||
) == Ordering::Less)
|
|
||||||
{
|
|
||||||
idx = chunk_num;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// unwrap due to checks above
|
|
||||||
let r = self.buffers[idx].pop_front().unwrap();
|
|
||||||
Some(r)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sort (based on `compare`) the `T`s provided by `unsorted` and return an
|
/// Sort (based on `compare`) the `T`s provided by `unsorted` and return an
|
||||||
/// iterator
|
/// iterator
|
||||||
///
|
///
|
||||||
/// # Errors
|
/// # Panics
|
||||||
///
|
///
|
||||||
/// This method can fail due to issues writing intermediate sorted chunks
|
/// This method can panic due to issues writing intermediate sorted chunks
|
||||||
/// to disk.
|
/// to disk.
|
||||||
pub fn ext_sort(
|
pub fn ext_sort(
|
||||||
unsorted: impl Iterator<Item = Line>,
|
unsorted: impl Iterator<Item = Line>,
|
||||||
|
@ -93,19 +35,12 @@ pub fn ext_sort(
|
||||||
) -> ExtSortedIterator {
|
) -> ExtSortedIterator {
|
||||||
let tmp_dir = crash_if_err!(1, TempDir::new_in(&settings.tmp_dir, "uutils_sort"));
|
let tmp_dir = crash_if_err!(1, TempDir::new_in(&settings.tmp_dir, "uutils_sort"));
|
||||||
|
|
||||||
let mut iter = ExtSortedIterator {
|
|
||||||
buffers: Vec::new(),
|
|
||||||
chunk_offsets: Vec::new(),
|
|
||||||
max_per_chunk: 0,
|
|
||||||
chunks: 0,
|
|
||||||
tmp_dir,
|
|
||||||
settings: settings.clone(),
|
|
||||||
failed: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut total_read = 0;
|
let mut total_read = 0;
|
||||||
let mut chunk = Vec::new();
|
let mut chunk = Vec::new();
|
||||||
|
|
||||||
|
let mut chunks_read = 0;
|
||||||
|
let mut file_merger = FileMerger::new(settings);
|
||||||
|
|
||||||
// make the initial chunks on disk
|
// make the initial chunks on disk
|
||||||
for seq in unsorted {
|
for seq in unsorted {
|
||||||
let seq_size = seq.estimate_size();
|
let seq_size = seq.estimate_size();
|
||||||
|
@ -113,65 +48,35 @@ pub fn ext_sort(
|
||||||
|
|
||||||
chunk.push(seq);
|
chunk.push(seq);
|
||||||
|
|
||||||
if total_read + chunk.len() * std::mem::size_of::<Line>() >= settings.buffer_size {
|
if total_read >= settings.buffer_size && chunk.len() >= 2 {
|
||||||
super::sort_by(&mut chunk, &settings);
|
super::sort_by(&mut chunk, &settings);
|
||||||
write_chunk(
|
|
||||||
settings,
|
let file_path = tmp_dir.path().join(chunks_read.to_string());
|
||||||
&iter.tmp_dir.path().join(iter.chunks.to_string()),
|
write_chunk(settings, &file_path, &mut chunk);
|
||||||
&mut chunk,
|
|
||||||
);
|
|
||||||
chunk.clear();
|
chunk.clear();
|
||||||
total_read = 0;
|
total_read = 0;
|
||||||
iter.chunks += 1;
|
chunks_read += 1;
|
||||||
|
|
||||||
|
file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// write the last chunk
|
// write the last chunk
|
||||||
if !chunk.is_empty() {
|
if !chunk.is_empty() {
|
||||||
super::sort_by(&mut chunk, &settings);
|
super::sort_by(&mut chunk, &settings);
|
||||||
|
|
||||||
|
let file_path = tmp_dir.path().join(chunks_read.to_string());
|
||||||
write_chunk(
|
write_chunk(
|
||||||
settings,
|
settings,
|
||||||
&iter.tmp_dir.path().join(iter.chunks.to_string()),
|
&tmp_dir.path().join(chunks_read.to_string()),
|
||||||
&mut chunk,
|
&mut chunk,
|
||||||
);
|
);
|
||||||
iter.chunks += 1;
|
|
||||||
|
file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap()));
|
||||||
}
|
}
|
||||||
|
ExtSortedIterator {
|
||||||
// We manually drop here to not go over our memory limit when we allocate below.
|
file_merger,
|
||||||
drop(chunk);
|
_tmp_dir: tmp_dir,
|
||||||
|
|
||||||
// initialize buffers for each chunk
|
|
||||||
//
|
|
||||||
// Having a right sized buffer for each chunk for smallish values seems silly to me?
|
|
||||||
//
|
|
||||||
// We will have to have the entire iter in memory sometime right?
|
|
||||||
// Set minimum to the size of the writer buffer, ~8K
|
|
||||||
|
|
||||||
const MINIMUM_READBACK_BUFFER: usize = 8200;
|
|
||||||
let right_sized_buffer = settings
|
|
||||||
.buffer_size
|
|
||||||
.checked_div(iter.chunks)
|
|
||||||
.unwrap_or(settings.buffer_size);
|
|
||||||
iter.max_per_chunk = if right_sized_buffer > MINIMUM_READBACK_BUFFER {
|
|
||||||
right_sized_buffer
|
|
||||||
} else {
|
|
||||||
MINIMUM_READBACK_BUFFER
|
|
||||||
};
|
|
||||||
iter.buffers = vec![VecDeque::new(); iter.chunks];
|
|
||||||
iter.chunk_offsets = vec![0; iter.chunks];
|
|
||||||
for chunk_num in 0..iter.chunks {
|
|
||||||
let offset = fill_buff(
|
|
||||||
&mut iter.buffers[chunk_num],
|
|
||||||
crash_if_err!(
|
|
||||||
1,
|
|
||||||
File::open(iter.tmp_dir.path().join(chunk_num.to_string()))
|
|
||||||
),
|
|
||||||
iter.max_per_chunk,
|
|
||||||
&settings,
|
|
||||||
);
|
|
||||||
iter.chunk_offsets[chunk_num] = offset as u64;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
iter
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec<Line>) {
|
fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec<Line>) {
|
||||||
|
@ -186,29 +91,3 @@ fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec<Line>) {
|
||||||
}
|
}
|
||||||
crash_if_err!(1, buf_write.flush());
|
crash_if_err!(1, buf_write.flush());
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fill_buff(
|
|
||||||
vec: &mut VecDeque<Line>,
|
|
||||||
file: File,
|
|
||||||
max_bytes: usize,
|
|
||||||
settings: &GlobalSettings,
|
|
||||||
) -> usize {
|
|
||||||
let mut total_read = 0;
|
|
||||||
let mut bytes_read = 0;
|
|
||||||
for line in BufReader::new(file).split(if settings.zero_terminated {
|
|
||||||
b'\0'
|
|
||||||
} else {
|
|
||||||
b'\n'
|
|
||||||
}) {
|
|
||||||
let line_s = String::from_utf8(crash_if_err!(1, line)).unwrap();
|
|
||||||
bytes_read += line_s.len() + 1;
|
|
||||||
let deserialized = Line::new(line_s, settings);
|
|
||||||
total_read += deserialized.estimate_size();
|
|
||||||
vec.push_back(deserialized);
|
|
||||||
if total_read > max_bytes {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bytes_read
|
|
||||||
}
|
|
||||||
|
|
|
@ -32,6 +32,7 @@ use semver::Version;
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::collections::BinaryHeap;
|
use std::collections::BinaryHeap;
|
||||||
use std::env;
|
use std::env;
|
||||||
|
use std::ffi::OsStr;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::hash::{Hash, Hasher};
|
use std::hash::{Hash, Hasher};
|
||||||
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
||||||
|
@ -1122,10 +1123,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
exec(files, settings)
|
exec(files, settings)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn file_to_lines_iter<'a>(
|
fn file_to_lines_iter(
|
||||||
file: &str,
|
file: impl AsRef<OsStr>,
|
||||||
settings: &'a GlobalSettings,
|
settings: &'_ GlobalSettings,
|
||||||
) -> Option<impl Iterator<Item = Line> + 'a> {
|
) -> Option<impl Iterator<Item = Line> + '_> {
|
||||||
let (reader, _) = match open(file) {
|
let (reader, _) = match open(file) {
|
||||||
Some(x) => x,
|
Some(x) => x,
|
||||||
None => return None,
|
None => return None,
|
||||||
|
@ -1190,7 +1191,7 @@ fn exec(files: Vec<String>, settings: GlobalSettings) -> i32 {
|
||||||
let mut lines = vec![];
|
let mut lines = vec![];
|
||||||
|
|
||||||
// This is duplicated from fn file_to_lines_iter, but using that function directly results in a performance regression.
|
// This is duplicated from fn file_to_lines_iter, but using that function directly results in a performance regression.
|
||||||
for (file, _) in files.iter().map(|file| open(file)).flatten() {
|
for (file, _) in files.iter().map(open).flatten() {
|
||||||
let buf_reader = BufReader::new(file);
|
let buf_reader = BufReader::new(file);
|
||||||
for line in buf_reader.split(if settings.zero_terminated {
|
for line in buf_reader.split(if settings.zero_terminated {
|
||||||
b'\0'
|
b'\0'
|
||||||
|
@ -1517,7 +1518,8 @@ fn print_sorted<T: Iterator<Item = Line>>(iter: T, settings: &GlobalSettings) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// from cat.rs
|
// from cat.rs
|
||||||
fn open(path: &str) -> Option<(Box<dyn Read>, bool)> {
|
fn open(path: impl AsRef<OsStr>) -> Option<(Box<dyn Read>, bool)> {
|
||||||
|
let path = path.as_ref();
|
||||||
if path == "-" {
|
if path == "-" {
|
||||||
let stdin = stdin();
|
let stdin = stdin();
|
||||||
return Some((Box::new(stdin) as Box<dyn Read>, is_stdin_interactive()));
|
return Some((Box::new(stdin) as Box<dyn Read>, is_stdin_interactive()));
|
||||||
|
@ -1526,7 +1528,7 @@ fn open(path: &str) -> Option<(Box<dyn Read>, bool)> {
|
||||||
match File::open(Path::new(path)) {
|
match File::open(Path::new(path)) {
|
||||||
Ok(f) => Some((Box::new(f) as Box<dyn Read>, false)),
|
Ok(f) => Some((Box::new(f) as Box<dyn Read>, false)),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
show_error!("{0}: {1}", path, e.to_string());
|
show_error!("{0:?}: {1}", path, e.to_string());
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,11 +13,11 @@ extern crate uucore;
|
||||||
mod platform;
|
mod platform;
|
||||||
|
|
||||||
use clap::{App, Arg};
|
use clap::{App, Arg};
|
||||||
use std::char;
|
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
|
use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
use std::{char, fs::remove_file};
|
||||||
|
|
||||||
static NAME: &str = "split";
|
static NAME: &str = "split";
|
||||||
static VERSION: &str = env!("CARGO_PKG_VERSION");
|
static VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||||
|
@ -213,107 +213,145 @@ struct Settings {
|
||||||
verbose: bool,
|
verbose: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct SplitControl {
|
|
||||||
current_line: String, // Don't touch
|
|
||||||
request_new_file: bool, // Splitter implementation requests new file
|
|
||||||
}
|
|
||||||
|
|
||||||
trait Splitter {
|
trait Splitter {
|
||||||
// Consume the current_line and return the consumed string
|
// Consume as much as possible from `reader` so as to saturate `writer`.
|
||||||
fn consume(&mut self, _: &mut SplitControl) -> String;
|
// Equivalent to finishing one of the part files. Returns the number of
|
||||||
|
// bytes that have been moved.
|
||||||
|
fn consume(
|
||||||
|
&mut self,
|
||||||
|
reader: &mut BufReader<Box<dyn Read>>,
|
||||||
|
writer: &mut BufWriter<Box<dyn Write>>,
|
||||||
|
) -> u128;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct LineSplitter {
|
struct LineSplitter {
|
||||||
saved_lines_to_write: usize,
|
lines_per_split: usize,
|
||||||
lines_to_write: usize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LineSplitter {
|
impl LineSplitter {
|
||||||
fn new(settings: &Settings) -> LineSplitter {
|
fn new(settings: &Settings) -> LineSplitter {
|
||||||
let n = match settings.strategy_param.parse() {
|
|
||||||
Ok(a) => a,
|
|
||||||
Err(e) => crash!(1, "invalid number of lines: {}", e),
|
|
||||||
};
|
|
||||||
LineSplitter {
|
LineSplitter {
|
||||||
saved_lines_to_write: n,
|
lines_per_split: settings
|
||||||
lines_to_write: n,
|
.strategy_param
|
||||||
|
.parse()
|
||||||
|
.unwrap_or_else(|e| crash!(1, "invalid number of lines: {}", e)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Splitter for LineSplitter {
|
impl Splitter for LineSplitter {
|
||||||
fn consume(&mut self, control: &mut SplitControl) -> String {
|
fn consume(
|
||||||
self.lines_to_write -= 1;
|
&mut self,
|
||||||
if self.lines_to_write == 0 {
|
reader: &mut BufReader<Box<dyn Read>>,
|
||||||
self.lines_to_write = self.saved_lines_to_write;
|
writer: &mut BufWriter<Box<dyn Write>>,
|
||||||
control.request_new_file = true;
|
) -> u128 {
|
||||||
|
let mut bytes_consumed = 0u128;
|
||||||
|
let mut buffer = String::with_capacity(1024);
|
||||||
|
for _ in 0..self.lines_per_split {
|
||||||
|
let bytes_read = reader
|
||||||
|
.read_line(&mut buffer)
|
||||||
|
.unwrap_or_else(|_| crash!(1, "error reading bytes from input file"));
|
||||||
|
// If we ever read 0 bytes then we know we've hit EOF.
|
||||||
|
if bytes_read == 0 {
|
||||||
|
return bytes_consumed;
|
||||||
}
|
}
|
||||||
control.current_line.clone()
|
|
||||||
|
writer
|
||||||
|
.write_all(buffer.as_bytes())
|
||||||
|
.unwrap_or_else(|_| crash!(1, "error writing bytes to output file"));
|
||||||
|
// Empty out the String buffer since `read_line` appends instead of
|
||||||
|
// replaces.
|
||||||
|
buffer.clear();
|
||||||
|
|
||||||
|
bytes_consumed += bytes_read as u128;
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes_consumed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ByteSplitter {
|
struct ByteSplitter {
|
||||||
saved_bytes_to_write: usize,
|
bytes_per_split: u128,
|
||||||
bytes_to_write: usize,
|
|
||||||
break_on_line_end: bool,
|
|
||||||
require_whole_line: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ByteSplitter {
|
impl ByteSplitter {
|
||||||
fn new(settings: &Settings) -> ByteSplitter {
|
fn new(settings: &Settings) -> ByteSplitter {
|
||||||
let mut strategy_param: Vec<char> = settings.strategy_param.chars().collect();
|
// These multipliers are the same as supported by GNU coreutils.
|
||||||
let suffix = strategy_param.pop().unwrap();
|
let modifiers: Vec<(&str, u128)> = vec![
|
||||||
let multiplier = match suffix {
|
("K", 1024u128),
|
||||||
'0'..='9' => 1usize,
|
("M", 1024 * 1024),
|
||||||
'b' => 512usize,
|
("G", 1024 * 1024 * 1024),
|
||||||
'k' => 1024usize,
|
("T", 1024 * 1024 * 1024 * 1024),
|
||||||
'm' => 1024usize * 1024usize,
|
("P", 1024 * 1024 * 1024 * 1024 * 1024),
|
||||||
_ => crash!(1, "invalid number of bytes"),
|
("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
|
||||||
};
|
("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
|
||||||
let n = if suffix.is_alphabetic() {
|
("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
|
||||||
match strategy_param
|
("KB", 1000),
|
||||||
|
("MB", 1000 * 1000),
|
||||||
|
("GB", 1000 * 1000 * 1000),
|
||||||
|
("TB", 1000 * 1000 * 1000 * 1000),
|
||||||
|
("PB", 1000 * 1000 * 1000 * 1000 * 1000),
|
||||||
|
("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
|
||||||
|
("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
|
||||||
|
("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
|
||||||
|
];
|
||||||
|
|
||||||
|
// This sequential find is acceptable since none of the modifiers are
|
||||||
|
// suffixes of any other modifiers, a la Huffman codes.
|
||||||
|
let (suffix, multiplier) = modifiers
|
||||||
.iter()
|
.iter()
|
||||||
.cloned()
|
.find(|(suffix, _)| settings.strategy_param.ends_with(suffix))
|
||||||
.collect::<String>()
|
.unwrap_or(&("", 1));
|
||||||
.parse::<usize>()
|
|
||||||
{
|
// Try to parse the actual numeral.
|
||||||
Ok(a) => a,
|
let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())]
|
||||||
Err(e) => crash!(1, "invalid number of bytes: {}", e),
|
.parse::<u128>()
|
||||||
}
|
.unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e));
|
||||||
} else {
|
|
||||||
match settings.strategy_param.parse::<usize>() {
|
|
||||||
Ok(a) => a,
|
|
||||||
Err(e) => crash!(1, "invalid number of bytes: {}", e),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
ByteSplitter {
|
ByteSplitter {
|
||||||
saved_bytes_to_write: n * multiplier,
|
bytes_per_split: n * multiplier,
|
||||||
bytes_to_write: n * multiplier,
|
|
||||||
break_on_line_end: settings.strategy == "b",
|
|
||||||
require_whole_line: false,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Splitter for ByteSplitter {
|
impl Splitter for ByteSplitter {
|
||||||
fn consume(&mut self, control: &mut SplitControl) -> String {
|
fn consume(
|
||||||
let line = control.current_line.clone();
|
&mut self,
|
||||||
let n = std::cmp::min(line.chars().count(), self.bytes_to_write);
|
reader: &mut BufReader<Box<dyn Read>>,
|
||||||
if self.require_whole_line && n < line.chars().count() {
|
writer: &mut BufWriter<Box<dyn Write>>,
|
||||||
self.bytes_to_write = self.saved_bytes_to_write;
|
) -> u128 {
|
||||||
control.request_new_file = true;
|
// We buffer reads and writes. We proceed until `bytes_consumed` is
|
||||||
self.require_whole_line = false;
|
// equal to `self.bytes_per_split` or we reach EOF.
|
||||||
return "".to_owned();
|
let mut bytes_consumed = 0u128;
|
||||||
|
const BUFFER_SIZE: usize = 1024;
|
||||||
|
let mut buffer = [0u8; BUFFER_SIZE];
|
||||||
|
while bytes_consumed < self.bytes_per_split {
|
||||||
|
// Don't overshoot `self.bytes_per_split`! Note: Using std::cmp::min
|
||||||
|
// doesn't really work since we have to get types to match which
|
||||||
|
// can't be done in a way that keeps all conversions safe.
|
||||||
|
let bytes_desired = if (BUFFER_SIZE as u128) <= self.bytes_per_split - bytes_consumed {
|
||||||
|
BUFFER_SIZE
|
||||||
|
} else {
|
||||||
|
// This is a safe conversion since the difference must be less
|
||||||
|
// than BUFFER_SIZE in this branch.
|
||||||
|
(self.bytes_per_split - bytes_consumed) as usize
|
||||||
|
};
|
||||||
|
let bytes_read = reader
|
||||||
|
.read(&mut buffer[0..bytes_desired])
|
||||||
|
.unwrap_or_else(|_| crash!(1, "error reading bytes from input file"));
|
||||||
|
// If we ever read 0 bytes then we know we've hit EOF.
|
||||||
|
if bytes_read == 0 {
|
||||||
|
return bytes_consumed;
|
||||||
}
|
}
|
||||||
self.bytes_to_write -= n;
|
|
||||||
if n == 0 {
|
writer
|
||||||
self.bytes_to_write = self.saved_bytes_to_write;
|
.write_all(&buffer[0..bytes_read])
|
||||||
control.request_new_file = true;
|
.unwrap_or_else(|_| crash!(1, "error writing bytes to output file"));
|
||||||
|
|
||||||
|
bytes_consumed += bytes_read as u128;
|
||||||
}
|
}
|
||||||
if self.break_on_line_end && n == line.chars().count() {
|
|
||||||
self.require_whole_line = self.break_on_line_end;
|
bytes_consumed
|
||||||
}
|
|
||||||
line[..n].to_owned()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -353,14 +391,13 @@ fn split(settings: &Settings) -> i32 {
|
||||||
let mut reader = BufReader::new(if settings.input == "-" {
|
let mut reader = BufReader::new(if settings.input == "-" {
|
||||||
Box::new(stdin()) as Box<dyn Read>
|
Box::new(stdin()) as Box<dyn Read>
|
||||||
} else {
|
} else {
|
||||||
let r = match File::open(Path::new(&settings.input)) {
|
let r = File::open(Path::new(&settings.input)).unwrap_or_else(|_| {
|
||||||
Ok(a) => a,
|
crash!(
|
||||||
Err(_) => crash!(
|
|
||||||
1,
|
1,
|
||||||
"cannot open '{}' for reading: No such file or directory",
|
"cannot open '{}' for reading: No such file or directory",
|
||||||
settings.input
|
settings.input
|
||||||
),
|
)
|
||||||
};
|
});
|
||||||
Box::new(r) as Box<dyn Read>
|
Box::new(r) as Box<dyn Read>
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -370,21 +407,9 @@ fn split(settings: &Settings) -> i32 {
|
||||||
a => crash!(1, "strategy {} not supported", a),
|
a => crash!(1, "strategy {} not supported", a),
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut control = SplitControl {
|
|
||||||
current_line: "".to_owned(), // Request new line
|
|
||||||
request_new_file: true, // Request new file
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut writer = BufWriter::new(Box::new(stdout()) as Box<dyn Write>);
|
|
||||||
let mut fileno = 0;
|
let mut fileno = 0;
|
||||||
loop {
|
loop {
|
||||||
if control.current_line.chars().count() == 0 {
|
// Get a new part file set up, and construct `writer` for it.
|
||||||
match reader.read_line(&mut control.current_line) {
|
|
||||||
Ok(0) | Err(_) => break,
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if control.request_new_file {
|
|
||||||
let mut filename = settings.prefix.clone();
|
let mut filename = settings.prefix.clone();
|
||||||
filename.push_str(
|
filename.push_str(
|
||||||
if settings.numeric_suffix {
|
if settings.numeric_suffix {
|
||||||
|
@ -395,23 +420,26 @@ fn split(settings: &Settings) -> i32 {
|
||||||
.as_ref(),
|
.as_ref(),
|
||||||
);
|
);
|
||||||
filename.push_str(settings.additional_suffix.as_ref());
|
filename.push_str(settings.additional_suffix.as_ref());
|
||||||
|
let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
||||||
|
|
||||||
|
let bytes_consumed = splitter.consume(&mut reader, &mut writer);
|
||||||
|
writer
|
||||||
|
.flush()
|
||||||
|
.unwrap_or_else(|e| crash!(1, "error flushing to output file: {}", e));
|
||||||
|
|
||||||
|
// If we didn't write anything we should clean up the empty file, and
|
||||||
|
// break from the loop.
|
||||||
|
if bytes_consumed == 0 {
|
||||||
|
// The output file is only ever created if --filter isn't used.
|
||||||
|
// Complicated, I know...
|
||||||
|
if settings.filter.is_none() {
|
||||||
|
remove_file(filename)
|
||||||
|
.unwrap_or_else(|e| crash!(1, "error removing empty file: {}", e));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
crash_if_err!(1, writer.flush());
|
|
||||||
fileno += 1;
|
fileno += 1;
|
||||||
writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
|
||||||
control.request_new_file = false;
|
|
||||||
if settings.verbose {
|
|
||||||
println!("creating file '{}'", filename);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let consumed = splitter.consume(&mut control);
|
|
||||||
crash_if_err!(1, writer.write_all(consumed.as_bytes()));
|
|
||||||
|
|
||||||
let advance = consumed.chars().count();
|
|
||||||
let clone = control.current_line.clone();
|
|
||||||
let sl = clone;
|
|
||||||
control.current_line = sl[advance..sl.chars().count()].to_owned();
|
|
||||||
}
|
}
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ path = "src/stat.rs"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = "2.33"
|
clap = "2.33"
|
||||||
time = "0.1.40"
|
time = "0.1.40"
|
||||||
uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc"] }
|
uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc", "fs"] }
|
||||||
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
|
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
|
|
|
@ -41,13 +41,6 @@ impl BirthTime for Metadata {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! has {
|
|
||||||
($mode:expr, $perm:expr) => {
|
|
||||||
$mode & $perm != 0
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn pretty_time(sec: i64, nsec: i64) -> String {
|
pub fn pretty_time(sec: i64, nsec: i64) -> String {
|
||||||
// sec == seconds since UNIX_EPOCH
|
// sec == seconds since UNIX_EPOCH
|
||||||
// nsec == nanoseconds since (UNIX_EPOCH + sec)
|
// nsec == nanoseconds since (UNIX_EPOCH + sec)
|
||||||
|
@ -81,65 +74,6 @@ pub fn pretty_filetype<'a>(mode: mode_t, size: u64) -> &'a str {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn pretty_access(mode: mode_t) -> String {
|
|
||||||
let mut result = String::with_capacity(10);
|
|
||||||
result.push(match mode & S_IFMT {
|
|
||||||
S_IFDIR => 'd',
|
|
||||||
S_IFCHR => 'c',
|
|
||||||
S_IFBLK => 'b',
|
|
||||||
S_IFREG => '-',
|
|
||||||
S_IFIFO => 'p',
|
|
||||||
S_IFLNK => 'l',
|
|
||||||
S_IFSOCK => 's',
|
|
||||||
// TODO: Other file types
|
|
||||||
_ => '?',
|
|
||||||
});
|
|
||||||
|
|
||||||
result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' });
|
|
||||||
result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' });
|
|
||||||
result.push(if has!(mode, S_ISUID as mode_t) {
|
|
||||||
if has!(mode, S_IXUSR) {
|
|
||||||
's'
|
|
||||||
} else {
|
|
||||||
'S'
|
|
||||||
}
|
|
||||||
} else if has!(mode, S_IXUSR) {
|
|
||||||
'x'
|
|
||||||
} else {
|
|
||||||
'-'
|
|
||||||
});
|
|
||||||
|
|
||||||
result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' });
|
|
||||||
result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' });
|
|
||||||
result.push(if has!(mode, S_ISGID as mode_t) {
|
|
||||||
if has!(mode, S_IXGRP) {
|
|
||||||
's'
|
|
||||||
} else {
|
|
||||||
'S'
|
|
||||||
}
|
|
||||||
} else if has!(mode, S_IXGRP) {
|
|
||||||
'x'
|
|
||||||
} else {
|
|
||||||
'-'
|
|
||||||
});
|
|
||||||
|
|
||||||
result.push(if has!(mode, S_IROTH) { 'r' } else { '-' });
|
|
||||||
result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' });
|
|
||||||
result.push(if has!(mode, S_ISVTX as mode_t) {
|
|
||||||
if has!(mode, S_IXOTH) {
|
|
||||||
't'
|
|
||||||
} else {
|
|
||||||
'T'
|
|
||||||
}
|
|
||||||
} else if has!(mode, S_IXOTH) {
|
|
||||||
'x'
|
|
||||||
} else {
|
|
||||||
'-'
|
|
||||||
});
|
|
||||||
|
|
||||||
result
|
|
||||||
}
|
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::convert::{AsRef, From};
|
use std::convert::{AsRef, From};
|
||||||
use std::ffi::CString;
|
use std::ffi::CString;
|
||||||
|
|
|
@ -7,13 +7,13 @@
|
||||||
|
|
||||||
// spell-checker:ignore (ToDO) mtab fsext showfs otype fmtstr prec ftype blocksize nlink rdev fnodes fsid namelen blksize inodes fstype iosize statfs gnulib NBLOCKSIZE
|
// spell-checker:ignore (ToDO) mtab fsext showfs otype fmtstr prec ftype blocksize nlink rdev fnodes fsid namelen blksize inodes fstype iosize statfs gnulib NBLOCKSIZE
|
||||||
|
|
||||||
#[macro_use]
|
|
||||||
mod fsext;
|
mod fsext;
|
||||||
pub use crate::fsext::*;
|
pub use crate::fsext::*;
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate uucore;
|
extern crate uucore;
|
||||||
use uucore::entries;
|
use uucore::entries;
|
||||||
|
use uucore::fs::display_permissions;
|
||||||
|
|
||||||
use clap::{App, Arg, ArgMatches};
|
use clap::{App, Arg, ArgMatches};
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
@ -575,7 +575,7 @@ impl Stater {
|
||||||
}
|
}
|
||||||
// access rights in human readable form
|
// access rights in human readable form
|
||||||
'A' => {
|
'A' => {
|
||||||
arg = pretty_access(meta.mode() as mode_t);
|
arg = display_permissions(&meta, true);
|
||||||
otype = OutputType::Str;
|
otype = OutputType::Str;
|
||||||
}
|
}
|
||||||
// number of blocks allocated (see %B)
|
// number of blocks allocated (see %B)
|
||||||
|
|
|
@ -12,8 +12,10 @@ extern crate uucore;
|
||||||
|
|
||||||
mod count_bytes;
|
mod count_bytes;
|
||||||
mod countable;
|
mod countable;
|
||||||
|
mod wordcount;
|
||||||
use count_bytes::count_bytes_fast;
|
use count_bytes::count_bytes_fast;
|
||||||
use countable::WordCountable;
|
use countable::WordCountable;
|
||||||
|
use wordcount::{TitledWordCount, WordCount};
|
||||||
|
|
||||||
use clap::{App, Arg, ArgMatches};
|
use clap::{App, Arg, ArgMatches};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
@ -21,9 +23,7 @@ use thiserror::Error;
|
||||||
use std::cmp::max;
|
use std::cmp::max;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
use std::ops::{Add, AddAssign};
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::str::from_utf8;
|
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum WcError {
|
pub enum WcError {
|
||||||
|
@ -82,51 +82,6 @@ impl Settings {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Default, Copy, Clone)]
|
|
||||||
struct WordCount {
|
|
||||||
bytes: usize,
|
|
||||||
chars: usize,
|
|
||||||
lines: usize,
|
|
||||||
words: usize,
|
|
||||||
max_line_length: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Add for WordCount {
|
|
||||||
type Output = Self;
|
|
||||||
|
|
||||||
fn add(self, other: Self) -> Self {
|
|
||||||
Self {
|
|
||||||
bytes: self.bytes + other.bytes,
|
|
||||||
chars: self.chars + other.chars,
|
|
||||||
lines: self.lines + other.lines,
|
|
||||||
words: self.words + other.words,
|
|
||||||
max_line_length: max(self.max_line_length, other.max_line_length),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl AddAssign for WordCount {
|
|
||||||
fn add_assign(&mut self, other: Self) {
|
|
||||||
*self = *self + other
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl WordCount {
|
|
||||||
fn with_title(self, title: &str) -> TitledWordCount {
|
|
||||||
TitledWordCount { title, count: self }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This struct supplements the actual word count with a title that is displayed
|
|
||||||
/// to the user at the end of the program.
|
|
||||||
/// The reason we don't simply include title in the `WordCount` struct is that
|
|
||||||
/// it would result in unneccesary copying of `String`.
|
|
||||||
#[derive(Debug, Default, Clone)]
|
|
||||||
struct TitledWordCount<'a> {
|
|
||||||
title: &'a str,
|
|
||||||
count: WordCount,
|
|
||||||
}
|
|
||||||
|
|
||||||
static ABOUT: &str = "Display newline, word, and byte counts for each FILE, and a total line if
|
static ABOUT: &str = "Display newline, word, and byte counts for each FILE, and a total line if
|
||||||
more than one FILE is specified.";
|
more than one FILE is specified.";
|
||||||
static VERSION: &str = env!("CARGO_PKG_VERSION");
|
static VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||||
|
@ -207,18 +162,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const CR: u8 = b'\r';
|
|
||||||
const LF: u8 = b'\n';
|
|
||||||
const SPACE: u8 = b' ';
|
|
||||||
const TAB: u8 = b'\t';
|
|
||||||
const SYN: u8 = 0x16_u8;
|
|
||||||
const FF: u8 = 0x0C_u8;
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn is_word_separator(byte: u8) -> bool {
|
|
||||||
byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF
|
|
||||||
}
|
|
||||||
|
|
||||||
fn word_count_from_reader<T: WordCountable>(
|
fn word_count_from_reader<T: WordCountable>(
|
||||||
mut reader: T,
|
mut reader: T,
|
||||||
settings: &Settings,
|
settings: &Settings,
|
||||||
|
@ -239,58 +182,20 @@ fn word_count_from_reader<T: WordCountable>(
|
||||||
// we do not need to decode the byte stream if we're only counting bytes/newlines
|
// we do not need to decode the byte stream if we're only counting bytes/newlines
|
||||||
let decode_chars = settings.show_chars || settings.show_words || settings.show_max_line_length;
|
let decode_chars = settings.show_chars || settings.show_words || settings.show_max_line_length;
|
||||||
|
|
||||||
let mut line_count: usize = 0;
|
// Sum the WordCount for each line. Show a warning for each line
|
||||||
let mut word_count: usize = 0;
|
// that results in an IO error when trying to read it.
|
||||||
let mut byte_count: usize = 0;
|
let total = reader
|
||||||
let mut char_count: usize = 0;
|
.lines()
|
||||||
let mut longest_line_length: usize = 0;
|
.filter_map(|res| match res {
|
||||||
let mut ends_lf: bool;
|
Ok(line) => Some(line),
|
||||||
|
|
||||||
// reading from a TTY seems to raise a condition on, rather than return Some(0) like a file.
|
|
||||||
// hence the option wrapped in a result here
|
|
||||||
for line_result in reader.lines() {
|
|
||||||
let raw_line = match line_result {
|
|
||||||
Ok(l) => l,
|
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
show_warning!("Error while reading {}: {}", path, e);
|
show_warning!("Error while reading {}: {}", path, e);
|
||||||
continue;
|
None
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
// GNU 'wc' only counts lines that end in LF as lines
|
|
||||||
ends_lf = *raw_line.last().unwrap() == LF;
|
|
||||||
line_count += ends_lf as usize;
|
|
||||||
|
|
||||||
byte_count += raw_line.len();
|
|
||||||
|
|
||||||
if decode_chars {
|
|
||||||
// try and convert the bytes to UTF-8 first
|
|
||||||
let current_char_count;
|
|
||||||
match from_utf8(&raw_line[..]) {
|
|
||||||
Ok(line) => {
|
|
||||||
word_count += line.split_whitespace().count();
|
|
||||||
current_char_count = line.chars().count();
|
|
||||||
}
|
|
||||||
Err(..) => {
|
|
||||||
word_count += raw_line.split(|&x| is_word_separator(x)).count();
|
|
||||||
current_char_count = raw_line.iter().filter(|c| c.is_ascii()).count()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
char_count += current_char_count;
|
|
||||||
if current_char_count > longest_line_length {
|
|
||||||
// -L is a GNU 'wc' extension so same behavior on LF
|
|
||||||
longest_line_length = current_char_count - (ends_lf as usize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(WordCount {
|
|
||||||
bytes: byte_count,
|
|
||||||
chars: char_count,
|
|
||||||
lines: line_count,
|
|
||||||
words: word_count,
|
|
||||||
max_line_length: longest_line_length,
|
|
||||||
})
|
})
|
||||||
|
.map(|line| WordCount::from_line(&line, decode_chars))
|
||||||
|
.sum();
|
||||||
|
Ok(total)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn word_count_from_path(path: &str, settings: &Settings) -> WcResult<WordCount> {
|
fn word_count_from_path(path: &str, settings: &Settings) -> WcResult<WordCount> {
|
||||||
|
@ -323,7 +228,12 @@ fn wc(files: Vec<String>, settings: &Settings) -> Result<(), u32> {
|
||||||
error_count += 1;
|
error_count += 1;
|
||||||
WordCount::default()
|
WordCount::default()
|
||||||
});
|
});
|
||||||
max_width = max(max_width, word_count.bytes.to_string().len() + 1);
|
// Compute the number of digits needed to display the number
|
||||||
|
// of bytes in the file. Even if the settings indicate that we
|
||||||
|
// won't *display* the number of bytes, we still use the
|
||||||
|
// number of digits in the byte count as the width when
|
||||||
|
// formatting each count as a string for output.
|
||||||
|
max_width = max(max_width, word_count.bytes.to_string().len());
|
||||||
total_word_count += word_count;
|
total_word_count += word_count;
|
||||||
results.push(word_count.with_title(path));
|
results.push(word_count.with_title(path));
|
||||||
}
|
}
|
||||||
|
@ -364,19 +274,40 @@ fn print_stats(
|
||||||
min_width = 0;
|
min_width = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut is_first: bool = true;
|
||||||
|
|
||||||
if settings.show_lines {
|
if settings.show_lines {
|
||||||
|
if !is_first {
|
||||||
|
write!(stdout_lock, " ")?;
|
||||||
|
}
|
||||||
write!(stdout_lock, "{:1$}", result.count.lines, min_width)?;
|
write!(stdout_lock, "{:1$}", result.count.lines, min_width)?;
|
||||||
|
is_first = false;
|
||||||
}
|
}
|
||||||
if settings.show_words {
|
if settings.show_words {
|
||||||
|
if !is_first {
|
||||||
|
write!(stdout_lock, " ")?;
|
||||||
|
}
|
||||||
write!(stdout_lock, "{:1$}", result.count.words, min_width)?;
|
write!(stdout_lock, "{:1$}", result.count.words, min_width)?;
|
||||||
|
is_first = false;
|
||||||
}
|
}
|
||||||
if settings.show_bytes {
|
if settings.show_bytes {
|
||||||
|
if !is_first {
|
||||||
|
write!(stdout_lock, " ")?;
|
||||||
|
}
|
||||||
write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?;
|
write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?;
|
||||||
|
is_first = false;
|
||||||
}
|
}
|
||||||
if settings.show_chars {
|
if settings.show_chars {
|
||||||
|
if !is_first {
|
||||||
|
write!(stdout_lock, " ")?;
|
||||||
|
}
|
||||||
write!(stdout_lock, "{:1$}", result.count.chars, min_width)?;
|
write!(stdout_lock, "{:1$}", result.count.chars, min_width)?;
|
||||||
|
is_first = false;
|
||||||
}
|
}
|
||||||
if settings.show_max_line_length {
|
if settings.show_max_line_length {
|
||||||
|
if !is_first {
|
||||||
|
write!(stdout_lock, " ")?;
|
||||||
|
}
|
||||||
write!(
|
write!(
|
||||||
stdout_lock,
|
stdout_lock,
|
||||||
"{:1$}",
|
"{:1$}",
|
||||||
|
|
131
src/uu/wc/src/wordcount.rs
Normal file
131
src/uu/wc/src/wordcount.rs
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
use std::cmp::max;
|
||||||
|
use std::iter::Sum;
|
||||||
|
use std::ops::{Add, AddAssign};
|
||||||
|
use std::str::from_utf8;
|
||||||
|
|
||||||
|
const CR: u8 = b'\r';
|
||||||
|
const LF: u8 = b'\n';
|
||||||
|
const SPACE: u8 = b' ';
|
||||||
|
const TAB: u8 = b'\t';
|
||||||
|
const SYN: u8 = 0x16_u8;
|
||||||
|
const FF: u8 = 0x0C_u8;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn is_word_separator(byte: u8) -> bool {
|
||||||
|
byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Copy, Clone)]
|
||||||
|
pub struct WordCount {
|
||||||
|
pub bytes: usize,
|
||||||
|
pub chars: usize,
|
||||||
|
pub lines: usize,
|
||||||
|
pub words: usize,
|
||||||
|
pub max_line_length: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Add for WordCount {
|
||||||
|
type Output = Self;
|
||||||
|
|
||||||
|
fn add(self, other: Self) -> Self {
|
||||||
|
Self {
|
||||||
|
bytes: self.bytes + other.bytes,
|
||||||
|
chars: self.chars + other.chars,
|
||||||
|
lines: self.lines + other.lines,
|
||||||
|
words: self.words + other.words,
|
||||||
|
max_line_length: max(self.max_line_length, other.max_line_length),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AddAssign for WordCount {
|
||||||
|
fn add_assign(&mut self, other: Self) {
|
||||||
|
*self = *self + other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Sum for WordCount {
|
||||||
|
fn sum<I>(iter: I) -> WordCount
|
||||||
|
where
|
||||||
|
I: Iterator<Item = WordCount>,
|
||||||
|
{
|
||||||
|
iter.fold(WordCount::default(), |acc, x| acc + x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WordCount {
|
||||||
|
/// Count the characters and whitespace-separated words in the given bytes.
|
||||||
|
///
|
||||||
|
/// `line` is a slice of bytes that will be decoded as ASCII characters.
|
||||||
|
fn ascii_word_and_char_count(line: &[u8]) -> (usize, usize) {
|
||||||
|
let word_count = line.split(|&x| is_word_separator(x)).count();
|
||||||
|
let char_count = line.iter().filter(|c| c.is_ascii()).count();
|
||||||
|
(word_count, char_count)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a [`WordCount`] from a sequence of bytes representing a line.
|
||||||
|
///
|
||||||
|
/// If the last byte of `line` encodes a newline character (`\n`),
|
||||||
|
/// then the [`lines`] field will be set to 1. Otherwise, it will
|
||||||
|
/// be set to 0. The [`bytes`] field is simply the length of
|
||||||
|
/// `line`.
|
||||||
|
///
|
||||||
|
/// If `decode_chars` is `false`, the [`chars`] and [`words`]
|
||||||
|
/// fields will be set to 0. If it is `true`, this function will
|
||||||
|
/// attempt to decode the bytes first as UTF-8, and failing that,
|
||||||
|
/// as ASCII.
|
||||||
|
pub fn from_line(line: &[u8], decode_chars: bool) -> WordCount {
|
||||||
|
// GNU 'wc' only counts lines that end in LF as lines
|
||||||
|
let lines = (*line.last().unwrap() == LF) as usize;
|
||||||
|
let bytes = line.len();
|
||||||
|
let (words, chars) = if decode_chars {
|
||||||
|
WordCount::word_and_char_count(line)
|
||||||
|
} else {
|
||||||
|
(0, 0)
|
||||||
|
};
|
||||||
|
// -L is a GNU 'wc' extension so same behavior on LF
|
||||||
|
let max_line_length = if chars > 0 { chars - lines } else { 0 };
|
||||||
|
WordCount {
|
||||||
|
bytes,
|
||||||
|
chars,
|
||||||
|
lines,
|
||||||
|
words,
|
||||||
|
max_line_length,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Count the UTF-8 characters and words in the given string slice.
|
||||||
|
///
|
||||||
|
/// `s` is a string slice that is assumed to be a UTF-8 string.
|
||||||
|
fn utf8_word_and_char_count(s: &str) -> (usize, usize) {
|
||||||
|
let word_count = s.split_whitespace().count();
|
||||||
|
let char_count = s.chars().count();
|
||||||
|
(word_count, char_count)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_title(self, title: &str) -> TitledWordCount {
|
||||||
|
TitledWordCount { title, count: self }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Count the characters and words in the given slice of bytes.
|
||||||
|
///
|
||||||
|
/// `line` is a slice of bytes that will be decoded as UTF-8
|
||||||
|
/// characters, or if that fails, as ASCII characters.
|
||||||
|
fn word_and_char_count(line: &[u8]) -> (usize, usize) {
|
||||||
|
// try and convert the bytes to UTF-8 first
|
||||||
|
match from_utf8(line) {
|
||||||
|
Ok(s) => WordCount::utf8_word_and_char_count(s),
|
||||||
|
Err(..) => WordCount::ascii_word_and_char_count(line),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This struct supplements the actual word count with a title that is displayed
|
||||||
|
/// to the user at the end of the program.
|
||||||
|
/// The reason we don't simply include title in the `WordCount` struct is that
|
||||||
|
/// it would result in unneccesary copying of `String`.
|
||||||
|
#[derive(Debug, Default, Clone)]
|
||||||
|
pub struct TitledWordCount<'a> {
|
||||||
|
pub title: &'a str,
|
||||||
|
pub count: WordCount,
|
||||||
|
}
|
|
@ -8,8 +8,9 @@
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
use libc::{
|
use libc::{
|
||||||
mode_t, S_IRGRP, S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR,
|
mode_t, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, S_IRGRP,
|
||||||
S_IXGRP, S_IXOTH, S_IXUSR,
|
S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, S_IXGRP, S_IXOTH,
|
||||||
|
S_IXUSR,
|
||||||
};
|
};
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::env;
|
use std::env;
|
||||||
|
@ -23,9 +24,10 @@ use std::os::unix::fs::MetadataExt;
|
||||||
use std::path::{Component, Path, PathBuf};
|
use std::path::{Component, Path, PathBuf};
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
|
#[macro_export]
|
||||||
macro_rules! has {
|
macro_rules! has {
|
||||||
($mode:expr, $perm:expr) => {
|
($mode:expr, $perm:expr) => {
|
||||||
$mode & ($perm as u32) != 0
|
$mode & $perm != 0
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -240,22 +242,42 @@ pub fn is_stderr_interactive() -> bool {
|
||||||
|
|
||||||
#[cfg(not(unix))]
|
#[cfg(not(unix))]
|
||||||
#[allow(unused_variables)]
|
#[allow(unused_variables)]
|
||||||
pub fn display_permissions(metadata: &fs::Metadata) -> String {
|
pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String {
|
||||||
|
if display_file_type {
|
||||||
|
return String::from("----------");
|
||||||
|
}
|
||||||
String::from("---------")
|
String::from("---------")
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
pub fn display_permissions(metadata: &fs::Metadata) -> String {
|
pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String {
|
||||||
let mode: mode_t = metadata.mode() as mode_t;
|
let mode: mode_t = metadata.mode() as mode_t;
|
||||||
display_permissions_unix(mode as u32)
|
display_permissions_unix(mode, display_file_type)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
pub fn display_permissions_unix(mode: u32) -> String {
|
pub fn display_permissions_unix(mode: mode_t, display_file_type: bool) -> String {
|
||||||
let mut result = String::with_capacity(9);
|
let mut result;
|
||||||
|
if display_file_type {
|
||||||
|
result = String::with_capacity(10);
|
||||||
|
result.push(match mode & S_IFMT {
|
||||||
|
S_IFDIR => 'd',
|
||||||
|
S_IFCHR => 'c',
|
||||||
|
S_IFBLK => 'b',
|
||||||
|
S_IFREG => '-',
|
||||||
|
S_IFIFO => 'p',
|
||||||
|
S_IFLNK => 'l',
|
||||||
|
S_IFSOCK => 's',
|
||||||
|
// TODO: Other file types
|
||||||
|
_ => '?',
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
result = String::with_capacity(9);
|
||||||
|
}
|
||||||
|
|
||||||
result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' });
|
result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' });
|
||||||
result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' });
|
result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' });
|
||||||
result.push(if has!(mode, S_ISUID) {
|
result.push(if has!(mode, S_ISUID as mode_t) {
|
||||||
if has!(mode, S_IXUSR) {
|
if has!(mode, S_IXUSR) {
|
||||||
's'
|
's'
|
||||||
} else {
|
} else {
|
||||||
|
@ -269,7 +291,7 @@ pub fn display_permissions_unix(mode: u32) -> String {
|
||||||
|
|
||||||
result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' });
|
result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' });
|
||||||
result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' });
|
result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' });
|
||||||
result.push(if has!(mode, S_ISGID) {
|
result.push(if has!(mode, S_ISGID as mode_t) {
|
||||||
if has!(mode, S_IXGRP) {
|
if has!(mode, S_IXGRP) {
|
||||||
's'
|
's'
|
||||||
} else {
|
} else {
|
||||||
|
@ -283,7 +305,7 @@ pub fn display_permissions_unix(mode: u32) -> String {
|
||||||
|
|
||||||
result.push(if has!(mode, S_IROTH) { 'r' } else { '-' });
|
result.push(if has!(mode, S_IROTH) { 'r' } else { '-' });
|
||||||
result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' });
|
result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' });
|
||||||
result.push(if has!(mode, S_ISVTX) {
|
result.push(if has!(mode, S_ISVTX as mode_t) {
|
||||||
if has!(mode, S_IXOTH) {
|
if has!(mode, S_IXOTH) {
|
||||||
't'
|
't'
|
||||||
} else {
|
} else {
|
||||||
|
@ -355,4 +377,57 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
#[test]
|
||||||
|
fn test_display_permissions() {
|
||||||
|
assert_eq!(
|
||||||
|
"drwxr-xr-x",
|
||||||
|
display_permissions_unix(S_IFDIR | 0o755, true)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"rwxr-xr-x",
|
||||||
|
display_permissions_unix(S_IFDIR | 0o755, false)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"-rw-r--r--",
|
||||||
|
display_permissions_unix(S_IFREG | 0o644, true)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"srw-r-----",
|
||||||
|
display_permissions_unix(S_IFSOCK | 0o640, true)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"lrw-r-xr-x",
|
||||||
|
display_permissions_unix(S_IFLNK | 0o655, true)
|
||||||
|
);
|
||||||
|
assert_eq!("?rw-r-xr-x", display_permissions_unix(0o655, true));
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
"brwSr-xr-x",
|
||||||
|
display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o655, true)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"brwsr-xr-x",
|
||||||
|
display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o755, true)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
"prw---sr--",
|
||||||
|
display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o614, true)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"prw---Sr--",
|
||||||
|
display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o604, true)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
"c---r-xr-t",
|
||||||
|
display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o055, true)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"c---r-xr-T",
|
||||||
|
display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o054, true)
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,7 @@ fn test_df_output() {
|
||||||
stdout_only("Filesystem Size Used Available Capacity Use% Mounted on \n");
|
stdout_only("Filesystem Size Used Available Capacity Use% Mounted on \n");
|
||||||
} else {
|
} else {
|
||||||
new_ucmd!().arg("-H").arg("-total").succeeds().stdout_only(
|
new_ucmd!().arg("-H").arg("-total").succeeds().stdout_only(
|
||||||
"Filesystem Size Used Available Use% Mounted on \n"
|
"Filesystem Size Used Available Use% Mounted on \n",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,15 @@ fn _du_basics_subdir(s: &str) {
|
||||||
fn _du_basics_subdir(s: &str) {
|
fn _du_basics_subdir(s: &str) {
|
||||||
assert_eq!(s, "0\tsubdir/deeper\n");
|
assert_eq!(s, "0\tsubdir/deeper\n");
|
||||||
}
|
}
|
||||||
#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
|
#[cfg(target_os = "freebsd")]
|
||||||
|
fn _du_basics_subdir(s: &str) {
|
||||||
|
assert_eq!(s, "8\tsubdir/deeper\n");
|
||||||
|
}
|
||||||
|
#[cfg(all(
|
||||||
|
not(target_vendor = "apple"),
|
||||||
|
not(target_os = "windows"),
|
||||||
|
not(target_os = "freebsd")
|
||||||
|
))]
|
||||||
fn _du_basics_subdir(s: &str) {
|
fn _du_basics_subdir(s: &str) {
|
||||||
// MS-WSL linux has altered expected output
|
// MS-WSL linux has altered expected output
|
||||||
if !uucore::os::is_wsl_1() {
|
if !uucore::os::is_wsl_1() {
|
||||||
|
@ -100,7 +108,15 @@ fn _du_soft_link(s: &str) {
|
||||||
fn _du_soft_link(s: &str) {
|
fn _du_soft_link(s: &str) {
|
||||||
assert_eq!(s, "8\tsubdir/links\n");
|
assert_eq!(s, "8\tsubdir/links\n");
|
||||||
}
|
}
|
||||||
#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
|
#[cfg(target_os = "freebsd")]
|
||||||
|
fn _du_soft_link(s: &str) {
|
||||||
|
assert_eq!(s, "16\tsubdir/links\n");
|
||||||
|
}
|
||||||
|
#[cfg(all(
|
||||||
|
not(target_vendor = "apple"),
|
||||||
|
not(target_os = "windows"),
|
||||||
|
not(target_os = "freebsd")
|
||||||
|
))]
|
||||||
fn _du_soft_link(s: &str) {
|
fn _du_soft_link(s: &str) {
|
||||||
// MS-WSL linux has altered expected output
|
// MS-WSL linux has altered expected output
|
||||||
if !uucore::os::is_wsl_1() {
|
if !uucore::os::is_wsl_1() {
|
||||||
|
@ -141,7 +157,15 @@ fn _du_hard_link(s: &str) {
|
||||||
fn _du_hard_link(s: &str) {
|
fn _du_hard_link(s: &str) {
|
||||||
assert_eq!(s, "8\tsubdir/links\n")
|
assert_eq!(s, "8\tsubdir/links\n")
|
||||||
}
|
}
|
||||||
#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
|
#[cfg(target_os = "freebsd")]
|
||||||
|
fn _du_hard_link(s: &str) {
|
||||||
|
assert_eq!(s, "16\tsubdir/links\n")
|
||||||
|
}
|
||||||
|
#[cfg(all(
|
||||||
|
not(target_vendor = "apple"),
|
||||||
|
not(target_os = "windows"),
|
||||||
|
not(target_os = "freebsd")
|
||||||
|
))]
|
||||||
fn _du_hard_link(s: &str) {
|
fn _du_hard_link(s: &str) {
|
||||||
// MS-WSL linux has altered expected output
|
// MS-WSL linux has altered expected output
|
||||||
if !uucore::os::is_wsl_1() {
|
if !uucore::os::is_wsl_1() {
|
||||||
|
@ -181,7 +205,15 @@ fn _du_d_flag(s: &str) {
|
||||||
fn _du_d_flag(s: &str) {
|
fn _du_d_flag(s: &str) {
|
||||||
assert_eq!(s, "8\t./subdir\n8\t./\n");
|
assert_eq!(s, "8\t./subdir\n8\t./\n");
|
||||||
}
|
}
|
||||||
#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
|
#[cfg(target_os = "freebsd")]
|
||||||
|
fn _du_d_flag(s: &str) {
|
||||||
|
assert_eq!(s, "28\t./subdir\n36\t./\n");
|
||||||
|
}
|
||||||
|
#[cfg(all(
|
||||||
|
not(target_vendor = "apple"),
|
||||||
|
not(target_os = "windows"),
|
||||||
|
not(target_os = "freebsd")
|
||||||
|
))]
|
||||||
fn _du_d_flag(s: &str) {
|
fn _du_d_flag(s: &str) {
|
||||||
// MS-WSL linux has altered expected output
|
// MS-WSL linux has altered expected output
|
||||||
if !uucore::os::is_wsl_1() {
|
if !uucore::os::is_wsl_1() {
|
||||||
|
|
|
@ -4,11 +4,15 @@ extern crate regex;
|
||||||
use self::rand::{thread_rng, Rng};
|
use self::rand::{thread_rng, Rng};
|
||||||
use self::regex::Regex;
|
use self::regex::Regex;
|
||||||
use crate::common::util::*;
|
use crate::common::util::*;
|
||||||
|
use rand::SeedableRng;
|
||||||
#[cfg(not(windows))]
|
#[cfg(not(windows))]
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fs::{read_dir, File};
|
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
use std::{
|
||||||
|
fs::{read_dir, File},
|
||||||
|
io::BufWriter,
|
||||||
|
};
|
||||||
|
|
||||||
fn random_chars(n: usize) -> String {
|
fn random_chars(n: usize) -> String {
|
||||||
thread_rng()
|
thread_rng()
|
||||||
|
@ -58,7 +62,7 @@ impl Glob {
|
||||||
files.sort();
|
files.sort();
|
||||||
let mut data: Vec<u8> = vec![];
|
let mut data: Vec<u8> = vec![];
|
||||||
for name in &files {
|
for name in &files {
|
||||||
data.extend(self.directory.read(name).into_bytes());
|
data.extend(self.directory.read_bytes(name));
|
||||||
}
|
}
|
||||||
data
|
data
|
||||||
}
|
}
|
||||||
|
@ -81,20 +85,30 @@ impl RandomFile {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_bytes(&mut self, bytes: usize) {
|
fn add_bytes(&mut self, bytes: usize) {
|
||||||
let chunk_size: usize = if bytes >= 1024 { 1024 } else { bytes };
|
// Note that just writing random characters isn't enough to cover all
|
||||||
let mut n = bytes;
|
// cases. We need truly random bytes.
|
||||||
while n > chunk_size {
|
let mut writer = BufWriter::new(&self.inner);
|
||||||
let _ = write!(self.inner, "{}", random_chars(chunk_size));
|
|
||||||
n -= chunk_size;
|
// Seed the rng so as to avoid spurious test failures.
|
||||||
|
let mut rng = rand::rngs::StdRng::seed_from_u64(123);
|
||||||
|
let mut buffer = [0; 1024];
|
||||||
|
let mut remaining_size = bytes;
|
||||||
|
|
||||||
|
while remaining_size > 0 {
|
||||||
|
let to_write = std::cmp::min(remaining_size, buffer.len());
|
||||||
|
let buf = &mut buffer[..to_write];
|
||||||
|
rng.fill(buf);
|
||||||
|
writer.write(buf).unwrap();
|
||||||
|
|
||||||
|
remaining_size -= to_write;
|
||||||
}
|
}
|
||||||
let _ = write!(self.inner, "{}", random_chars(n));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add n lines each of size `RandomFile::LINESIZE`
|
/// Add n lines each of size `RandomFile::LINESIZE`
|
||||||
fn add_lines(&mut self, lines: usize) {
|
fn add_lines(&mut self, lines: usize) {
|
||||||
let mut n = lines;
|
let mut n = lines;
|
||||||
while n > 0 {
|
while n > 0 {
|
||||||
let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE));
|
writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE)).unwrap();
|
||||||
n -= 1;
|
n -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -104,18 +118,18 @@ impl RandomFile {
|
||||||
fn test_split_default() {
|
fn test_split_default() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_default";
|
let name = "split_default";
|
||||||
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
|
||||||
RandomFile::new(&at, name).add_lines(2000);
|
RandomFile::new(&at, name).add_lines(2000);
|
||||||
ucmd.args(&[name]).succeeds();
|
ucmd.args(&[name]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
||||||
assert_eq!(glob.count(), 2);
|
assert_eq!(glob.count(), 2);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_numeric_prefixed_chunks_by_bytes() {
|
fn test_split_numeric_prefixed_chunks_by_bytes() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_num_prefixed_chunks_by_bytes";
|
let name = "split_num_prefixed_chunks_by_bytes";
|
||||||
let glob = Glob::new(&at, ".", r"a\d\d$");
|
|
||||||
RandomFile::new(&at, name).add_bytes(10000);
|
RandomFile::new(&at, name).add_bytes(10000);
|
||||||
ucmd.args(&[
|
ucmd.args(&[
|
||||||
"-d", // --numeric-suffixes
|
"-d", // --numeric-suffixes
|
||||||
|
@ -123,52 +137,89 @@ fn test_split_numeric_prefixed_chunks_by_bytes() {
|
||||||
"1000", name, "a",
|
"1000", name, "a",
|
||||||
])
|
])
|
||||||
.succeeds();
|
.succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"a\d\d$");
|
||||||
assert_eq!(glob.count(), 10);
|
assert_eq!(glob.count(), 10);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
for filename in glob.collect() {
|
||||||
|
assert_eq!(glob.directory.metadata(&filename).len(), 1000);
|
||||||
|
}
|
||||||
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_str_prefixed_chunks_by_bytes() {
|
fn test_split_str_prefixed_chunks_by_bytes() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_str_prefixed_chunks_by_bytes";
|
let name = "split_str_prefixed_chunks_by_bytes";
|
||||||
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
|
|
||||||
RandomFile::new(&at, name).add_bytes(10000);
|
RandomFile::new(&at, name).add_bytes(10000);
|
||||||
|
// Important that this is less than 1024 since that's our internal buffer
|
||||||
|
// size. Good to test that we don't overshoot.
|
||||||
ucmd.args(&["-b", "1000", name, "b"]).succeeds();
|
ucmd.args(&["-b", "1000", name, "b"]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
|
||||||
assert_eq!(glob.count(), 10);
|
assert_eq!(glob.count(), 10);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
for filename in glob.collect() {
|
||||||
|
assert_eq!(glob.directory.metadata(&filename).len(), 1000);
|
||||||
|
}
|
||||||
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is designed to test what happens when the desired part size is not a
|
||||||
|
// multiple of the buffer size and we hopefully don't overshoot the desired part
|
||||||
|
// size.
|
||||||
|
#[test]
|
||||||
|
fn test_split_bytes_prime_part_size() {
|
||||||
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
|
let name = "test_split_bytes_prime_part_size";
|
||||||
|
RandomFile::new(&at, name).add_bytes(10000);
|
||||||
|
// 1753 is prime and greater than the buffer size, 1024.
|
||||||
|
ucmd.args(&["-b", "1753", name, "b"]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
|
||||||
|
assert_eq!(glob.count(), 6);
|
||||||
|
let mut fns = glob.collect();
|
||||||
|
// glob.collect() is not guaranteed to return in sorted order, so we sort.
|
||||||
|
fns.sort();
|
||||||
|
for i in 0..5 {
|
||||||
|
assert_eq!(glob.directory.metadata(&fns[i]).len(), 1753);
|
||||||
|
}
|
||||||
|
assert_eq!(glob.directory.metadata(&fns[5]).len(), 1235);
|
||||||
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_num_prefixed_chunks_by_lines() {
|
fn test_split_num_prefixed_chunks_by_lines() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_num_prefixed_chunks_by_lines";
|
let name = "split_num_prefixed_chunks_by_lines";
|
||||||
let glob = Glob::new(&at, ".", r"c\d\d$");
|
|
||||||
RandomFile::new(&at, name).add_lines(10000);
|
RandomFile::new(&at, name).add_lines(10000);
|
||||||
ucmd.args(&["-d", "-l", "1000", name, "c"]).succeeds();
|
ucmd.args(&["-d", "-l", "1000", name, "c"]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"c\d\d$");
|
||||||
assert_eq!(glob.count(), 10);
|
assert_eq!(glob.count(), 10);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_str_prefixed_chunks_by_lines() {
|
fn test_split_str_prefixed_chunks_by_lines() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_str_prefixed_chunks_by_lines";
|
let name = "split_str_prefixed_chunks_by_lines";
|
||||||
let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$");
|
|
||||||
RandomFile::new(&at, name).add_lines(10000);
|
RandomFile::new(&at, name).add_lines(10000);
|
||||||
ucmd.args(&["-l", "1000", name, "d"]).succeeds();
|
ucmd.args(&["-l", "1000", name, "d"]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$");
|
||||||
assert_eq!(glob.count(), 10);
|
assert_eq!(glob.count(), 10);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_split_additional_suffix() {
|
fn test_split_additional_suffix() {
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "split_additional_suffix";
|
let name = "split_additional_suffix";
|
||||||
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$");
|
|
||||||
RandomFile::new(&at, name).add_lines(2000);
|
RandomFile::new(&at, name).add_lines(2000);
|
||||||
ucmd.args(&["--additional-suffix", ".txt", name]).succeeds();
|
ucmd.args(&["--additional-suffix", ".txt", name]).succeeds();
|
||||||
|
|
||||||
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$");
|
||||||
assert_eq!(glob.count(), 2);
|
assert_eq!(glob.count(), 2);
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
// note: the test_filter* tests below are unix-only
|
// note: the test_filter* tests below are unix-only
|
||||||
|
@ -182,15 +233,16 @@ fn test_filter() {
|
||||||
// like `test_split_default()` but run a command before writing
|
// like `test_split_default()` but run a command before writing
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "filtered";
|
let name = "filtered";
|
||||||
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
|
||||||
let n_lines = 3;
|
let n_lines = 3;
|
||||||
RandomFile::new(&at, name).add_lines(n_lines);
|
RandomFile::new(&at, name).add_lines(n_lines);
|
||||||
|
|
||||||
// change all characters to 'i'
|
// change all characters to 'i'
|
||||||
ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
|
ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
|
||||||
.succeeds();
|
.succeeds();
|
||||||
|
|
||||||
// assert all characters are 'i' / no character is not 'i'
|
// assert all characters are 'i' / no character is not 'i'
|
||||||
// (assert that command succeded)
|
// (assert that command succeded)
|
||||||
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
||||||
assert!(
|
assert!(
|
||||||
glob.collate().iter().find(|&&c| {
|
glob.collate().iter().find(|&&c| {
|
||||||
// is not i
|
// is not i
|
||||||
|
@ -209,7 +261,6 @@ fn test_filter_with_env_var_set() {
|
||||||
// implemented like `test_split_default()` but run a command before writing
|
// implemented like `test_split_default()` but run a command before writing
|
||||||
let (at, mut ucmd) = at_and_ucmd!();
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
let name = "filtered";
|
let name = "filtered";
|
||||||
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
|
||||||
let n_lines = 3;
|
let n_lines = 3;
|
||||||
RandomFile::new(&at, name).add_lines(n_lines);
|
RandomFile::new(&at, name).add_lines(n_lines);
|
||||||
|
|
||||||
|
@ -217,7 +268,9 @@ fn test_filter_with_env_var_set() {
|
||||||
env::set_var("FILE", &env_var_value);
|
env::set_var("FILE", &env_var_value);
|
||||||
ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
|
ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
|
||||||
.succeeds();
|
.succeeds();
|
||||||
assert_eq!(glob.collate(), at.read(name).into_bytes());
|
|
||||||
|
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
|
||||||
|
assert_eq!(glob.collate(), at.read_bytes(name));
|
||||||
assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value);
|
assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,42 +9,6 @@ pub use self::stat::*;
|
||||||
mod test_fsext {
|
mod test_fsext {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_access() {
|
|
||||||
assert_eq!("drwxr-xr-x", pretty_access(S_IFDIR | 0o755));
|
|
||||||
assert_eq!("-rw-r--r--", pretty_access(S_IFREG | 0o644));
|
|
||||||
assert_eq!("srw-r-----", pretty_access(S_IFSOCK | 0o640));
|
|
||||||
assert_eq!("lrw-r-xr-x", pretty_access(S_IFLNK | 0o655));
|
|
||||||
assert_eq!("?rw-r-xr-x", pretty_access(0o655));
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
"brwSr-xr-x",
|
|
||||||
pretty_access(S_IFBLK | S_ISUID as mode_t | 0o655)
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
"brwsr-xr-x",
|
|
||||||
pretty_access(S_IFBLK | S_ISUID as mode_t | 0o755)
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
"prw---sr--",
|
|
||||||
pretty_access(S_IFIFO | S_ISGID as mode_t | 0o614)
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
"prw---Sr--",
|
|
||||||
pretty_access(S_IFIFO | S_ISGID as mode_t | 0o604)
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
"c---r-xr-t",
|
|
||||||
pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o055)
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
"c---r-xr-T",
|
|
||||||
pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o054)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_file_type() {
|
fn test_file_type() {
|
||||||
assert_eq!("block special file", pretty_filetype(S_IFBLK, 0));
|
assert_eq!("block special file", pretty_filetype(S_IFBLK, 0));
|
||||||
|
|
|
@ -116,8 +116,6 @@ fn test_multiple_default() {
|
||||||
/// Test for an empty file.
|
/// Test for an empty file.
|
||||||
#[test]
|
#[test]
|
||||||
fn test_file_empty() {
|
fn test_file_empty() {
|
||||||
// TODO There is a leading space in the output that should be
|
|
||||||
// removed; see issue #2173.
|
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-clmwL", "emptyfile.txt"])
|
.args(&["-clmwL", "emptyfile.txt"])
|
||||||
.run()
|
.run()
|
||||||
|
@ -128,8 +126,6 @@ fn test_file_empty() {
|
||||||
/// *without* a trailing newline.
|
/// *without* a trailing newline.
|
||||||
#[test]
|
#[test]
|
||||||
fn test_file_single_line_no_trailing_newline() {
|
fn test_file_single_line_no_trailing_newline() {
|
||||||
// TODO There is a leading space in the output that should be
|
|
||||||
// removed; see issue #2173.
|
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-clmwL", "notrailingnewline.txt"])
|
.args(&["-clmwL", "notrailingnewline.txt"])
|
||||||
.run()
|
.run()
|
||||||
|
@ -140,8 +136,6 @@ fn test_file_single_line_no_trailing_newline() {
|
||||||
/// the file are the newline character repeated one hundred times).
|
/// the file are the newline character repeated one hundred times).
|
||||||
#[test]
|
#[test]
|
||||||
fn test_file_many_empty_lines() {
|
fn test_file_many_empty_lines() {
|
||||||
// TODO There is a leading space in the output that should be
|
|
||||||
// removed; see issue #2173.
|
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-clmwL", "manyemptylines.txt"])
|
.args(&["-clmwL", "manyemptylines.txt"])
|
||||||
.run()
|
.run()
|
||||||
|
@ -151,8 +145,6 @@ fn test_file_many_empty_lines() {
|
||||||
/// Test for a file that has one long line comprising only spaces.
|
/// Test for a file that has one long line comprising only spaces.
|
||||||
#[test]
|
#[test]
|
||||||
fn test_file_one_long_line_only_spaces() {
|
fn test_file_one_long_line_only_spaces() {
|
||||||
// TODO There is a leading space in the output that should be
|
|
||||||
// removed; see issue #2173.
|
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-clmwL", "onelongemptyline.txt"])
|
.args(&["-clmwL", "onelongemptyline.txt"])
|
||||||
.run()
|
.run()
|
||||||
|
@ -162,8 +154,6 @@ fn test_file_one_long_line_only_spaces() {
|
||||||
/// Test for a file that has one long line comprising a single "word".
|
/// Test for a file that has one long line comprising a single "word".
|
||||||
#[test]
|
#[test]
|
||||||
fn test_file_one_long_word() {
|
fn test_file_one_long_word() {
|
||||||
// TODO There is a leading space in the output that should be
|
|
||||||
// removed; see issue #2173.
|
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-clmwL", "onelongword.txt"])
|
.args(&["-clmwL", "onelongword.txt"])
|
||||||
.run()
|
.run()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue