1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-08-02 14:07:46 +00:00
This commit is contained in:
electricboogie 2021-04-10 09:06:14 -05:00
commit 843be9e149
18 changed files with 1183 additions and 358 deletions

15
Cargo.lock generated
View file

@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "advapi32-sys"
version = "0.2.0"
@ -1362,6 +1364,12 @@ dependencies = [
"maybe-uninit",
]
[[package]]
name = "smallvec"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
[[package]]
name = "strsim"
version = "0.8.0"
@ -1816,7 +1824,7 @@ dependencies = [
"quickcheck",
"rand 0.7.3",
"rand_chacha",
"smallvec",
"smallvec 0.6.14",
"uucore",
"uucore_procs",
]
@ -2289,6 +2297,7 @@ dependencies = [
"rand 0.7.3",
"rayon",
"semver",
"smallvec 1.6.1",
"uucore",
"uucore_procs",
]
@ -2316,7 +2325,7 @@ dependencies = [
name = "uu_stdbuf"
version = "0.0.6"
dependencies = [
"getopts",
"clap",
"tempfile",
"uu_stdbuf_libstdbuf",
"uucore",
@ -2500,7 +2509,7 @@ dependencies = [
name = "uu_unlink"
version = "0.0.6"
dependencies = [
"getopts",
"clap",
"libc",
"uucore",
"uucore_procs",

View file

@ -23,9 +23,11 @@ use std::fs;
use std::fs::File;
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::result::Result;
const DEFAULT_MODE: u32 = 0o755;
const DEFAULT_STRIP_PROGRAM: &str = "strip";
#[allow(dead_code)]
pub struct Behavior {
@ -37,6 +39,8 @@ pub struct Behavior {
verbose: bool,
preserve_timestamps: bool,
compare: bool,
strip: bool,
strip_program: String,
}
#[derive(Clone, Eq, PartialEq)]
@ -164,17 +168,15 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
.help("apply access/modification times of SOURCE files to corresponding destination files")
)
.arg(
// TODO implement flag
Arg::with_name(OPT_STRIP)
.short("s")
.long(OPT_STRIP)
.help("(unimplemented) strip symbol tables")
.help("strip symbol tables (no action Windows)")
)
.arg(
// TODO implement flag
Arg::with_name(OPT_STRIP_PROGRAM)
.long(OPT_STRIP_PROGRAM)
.help("(unimplemented) program used to strip binaries")
.help("program used to strip binaries (no action Windows)")
.value_name("PROGRAM")
)
.arg(
@ -266,10 +268,6 @@ fn check_unimplemented<'a>(matches: &ArgMatches) -> Result<(), &'a str> {
Err("-b")
} else if matches.is_present(OPT_CREATED) {
Err("-D")
} else if matches.is_present(OPT_STRIP) {
Err("--strip, -s")
} else if matches.is_present(OPT_STRIP_PROGRAM) {
Err("--strip-program")
} else if matches.is_present(OPT_SUFFIX) {
Err("--suffix, -S")
} else if matches.is_present(OPT_TARGET_DIRECTORY) {
@ -339,6 +337,12 @@ fn behavior(matches: &ArgMatches) -> Result<Behavior, i32> {
verbose: matches.is_present(OPT_VERBOSE),
preserve_timestamps: matches.is_present(OPT_PRESERVE_TIMESTAMPS),
compare: matches.is_present(OPT_COMPARE),
strip: matches.is_present(OPT_STRIP),
strip_program: String::from(
matches
.value_of(OPT_STRIP_PROGRAM)
.unwrap_or(DEFAULT_STRIP_PROGRAM),
),
})
}
@ -521,6 +525,21 @@ fn copy(from: &PathBuf, to: &PathBuf, b: &Behavior) -> Result<(), ()> {
return Err(());
}
if b.strip && cfg!(not(windows)) {
match Command::new(&b.strip_program).arg(to).output() {
Ok(o) => {
if !o.status.success() {
crash!(
1,
"strip program failed: {}",
String::from_utf8(o.stderr).unwrap_or_default()
);
}
}
Err(e) => crash!(1, "strip program execution failed: {}", e),
}
}
if mode::chmod(&to, b.mode()).is_err() {
return Err(());
}

View file

@ -259,6 +259,7 @@ static AFTER_HELP: &str =
";
pub mod options {
pub const FORCE: &str = "force";
pub const FILE: &str = "file";
pub const ITERATIONS: &str = "iterations";
pub const SIZE: &str = "size";
@ -278,6 +279,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
.about(ABOUT)
.after_help(AFTER_HELP)
.usage(&usage[..])
.arg(
Arg::with_name(options::FORCE)
.long(options::FORCE)
.short("f")
.help("change permissions to allow writing if necessary"),
)
.arg(
Arg::with_name(options::ITERATIONS)
.long(options::ITERATIONS)
@ -354,8 +361,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
// TODO: implement --random-source
// TODO: implement --force
let force = matches.is_present(options::FORCE);
let remove = matches.is_present(options::REMOVE);
let size_arg = match matches.value_of(options::SIZE) {
Some(s) => Some(s.to_string()),
@ -375,7 +381,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
}
for path_str in matches.values_of(options::FILE).unwrap() {
wipe_file(&path_str, iterations, remove, size, exact, zero, verbose);
wipe_file(
&path_str, iterations, remove, size, exact, zero, verbose, force,
);
}
0
@ -439,18 +447,40 @@ fn wipe_file(
exact: bool,
zero: bool,
verbose: bool,
force: bool,
) {
// Get these potential errors out of the way first
let path: &Path = Path::new(path_str);
if !path.exists() {
println!("{}: {}: No such file or directory", NAME, path.display());
show_error!("{}: No such file or directory", path.display());
return;
}
if !path.is_file() {
println!("{}: {}: Not a file", NAME, path.display());
show_error!("{}: Not a file", path.display());
return;
}
// If force is true, set file permissions to not-readonly.
if force {
let metadata = match fs::metadata(path) {
Ok(m) => m,
Err(e) => {
show_error!("{}", e);
return;
}
};
let mut perms = metadata.permissions();
perms.set_readonly(false);
match fs::set_permissions(path, perms) {
Err(e) => {
show_error!("{}", e);
return;
}
_ => {}
}
}
// Fill up our pass sequence
let mut pass_sequence: Vec<PassType> = Vec::new();
@ -489,11 +519,13 @@ fn wipe_file(
{
let total_passes: usize = pass_sequence.len();
let mut file: File = OpenOptions::new()
.write(true)
.truncate(false)
.open(path)
.expect("Failed to open file for writing");
let mut file: File = match OpenOptions::new().write(true).truncate(false).open(path) {
Ok(f) => f,
Err(e) => {
show_error!("{}: failed to open for writing: {}", path.display(), e);
return;
}
};
// NOTE: it does not really matter what we set for total_bytes and gen_type here, so just
// use bogus values
@ -523,14 +555,23 @@ fn wipe_file(
}
}
// size is an optional argument for exactly how many bytes we want to shred
do_pass(&mut file, path, &mut generator, *pass_type, size)
.expect("File write pass failed");
match do_pass(&mut file, path, &mut generator, *pass_type, size) {
Ok(_) => {}
Err(e) => {
show_error!("{}: File write pass failed: {}", path.display(), e);
}
}
// Ignore failed writes; just keep trying
}
}
if remove {
do_remove(path, path_str, verbose).expect("Failed to remove file");
match do_remove(path, path_str, verbose) {
Ok(_) => {}
Err(e) => {
show_error!("{}: failed to remove file: {}", path.display(), e);
}
}
}
}

View file

@ -0,0 +1,33 @@
# Benchmarking sort
Most of the time when sorting is spent comparing lines. The comparison functions however differ based
on which arguments are passed to `sort`, therefore it is important to always benchmark multiple scenarios.
This is an overwiew over what was benchmarked, and if you make changes to `sort`, you are encouraged to check
how performance was affected for the workloads listed below. Feel free to add other workloads to the
list that we should improve / make sure not to regress.
Run `cargo build --release` before benchmarking after you make a change!
## Sorting a wordlist
- Get a wordlist, for example with [words](https://en.wikipedia.org/wiki/Words_(Unix)) on Linux. The exact wordlist
doesn't matter for performance comparisons. In this example I'm using `/usr/share/dict/american-english` as the wordlist.
- Shuffle the wordlist by running `sort -R /usr/share/dict/american-english > shuffled_wordlist.txt`.
- Benchmark sorting the wordlist with hyperfine: `hyperfine "target/release/coreutils sort shuffled_wordlist.txt -o output.txt"`.
## Sorting a wordlist with ignore_case
- Same wordlist as above
- Benchmark sorting the wordlist ignoring the case with hyperfine: `hyperfine "target/release/coreutils sort shuffled_wordlist.txt -f -o output.txt"`.
## Sorting numbers
- Generate a list of numbers: `seq 0 100000 | sort -R > shuffled_numbers.txt`.
- Benchmark numeric sorting with hyperfine: `hyperfine "target/release/coreutils sort shuffled_numbers.txt -n -o output.txt"`.
## Stdout and stdin performance
Try to run the above benchmarks by piping the input through stdin (standard input) and redirect the
output through stdout (standard output):
- Remove the input file from the arguments and add `cat [inputfile] | ` at the beginning.
- Remove `-o output.txt` and add `> output.txt` at the end.
Example: `hyperfine "target/release/coreutils sort shuffled_numbers.txt -n -o output.txt"` becomes
`hyperfine "cat shuffled_numbers.txt | target/release/coreutils sort -n > output.txt`
- Check that performance is similar to the original benchmark.

View file

@ -21,6 +21,7 @@ clap = "2.33"
fnv = "1.0.7"
itertools = "0.8.0"
semver = "0.9.0"
smallvec = "1.6.1"
uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }

View file

@ -2,10 +2,10 @@
// *
// * (c) Michael Yin <mikeyin@mikeyin.org>
// * (c) Robert Swinford <robert.swinford..AT..gmail.com>
// * (c) Michael Debertol <michael.debertol..AT..gmail.com>
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
#![allow(dead_code)]
// Although these links don't always seem to describe reality, check out the POSIX and GNU specs:
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html
@ -22,6 +22,7 @@ use rand::distributions::Alphanumeric;
use rand::{thread_rng, Rng};
use rayon::prelude::*;
use semver::Version;
use smallvec::SmallVec;
use std::borrow::Cow;
use std::cmp::Ordering;
use std::collections::BinaryHeap;
@ -30,6 +31,7 @@ use std::fs::File;
use std::hash::{Hash, Hasher};
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Lines, Read, Write};
use std::mem::replace;
use std::ops::{Range, RangeInclusive};
use std::path::Path;
use uucore::fs::is_stdin_interactive; // for Iterator::dedup()
@ -37,6 +39,16 @@ static NAME: &str = "sort";
static ABOUT: &str = "Display sorted concatenation of all FILE(s).";
static VERSION: &str = env!("CARGO_PKG_VERSION");
const LONG_HELP_KEYS: &str = "The key format is FIELD[.CHAR][OPTIONS][,FIELD[.CHAR]][OPTIONS].
Fields by default are separated by the first whitespace after a non-whitespace character. Use -t to specify a custom separator.
In the default case, whitespace is appended at the beginning of each field. Custom separators however are not included in fields.
FIELD and CHAR both start at 1 (i.e. they are 1-indexed). If there is no end specified after a comma, the end will be the end of the line.
If CHAR is set 0, it means the end of the field. CHAR defaults to 1 for the start position and to 0 for the end position.
Valid options are: MbdfhnRrV. They override the global options for this key.";
static OPT_HUMAN_NUMERIC_SORT: &str = "human-numeric-sort";
static OPT_MONTH_SORT: &str = "month-sort";
static OPT_NUMERIC_SORT: &str = "numeric-sort";
@ -54,6 +66,8 @@ static OPT_OUTPUT: &str = "output";
static OPT_REVERSE: &str = "reverse";
static OPT_STABLE: &str = "stable";
static OPT_UNIQUE: &str = "unique";
static OPT_KEY: &str = "key";
static OPT_SEPARATOR: &str = "field-separator";
static OPT_RANDOM: &str = "random-sort";
static OPT_ZERO_TERMINATED: &str = "zero-terminated";
static OPT_PARALLEL: &str = "parallel";
@ -63,10 +77,11 @@ static ARG_FILES: &str = "files";
static DECIMAL_PT: char = '.';
static THOUSANDS_SEP: char = ',';
static NEGATIVE: char = '-';
static POSITIVE: char = '+';
#[derive(Eq, Ord, PartialEq, PartialOrd)]
#[derive(Eq, Ord, PartialEq, PartialOrd, Clone)]
enum SortMode {
Numeric,
HumanNumeric,
@ -76,8 +91,12 @@ enum SortMode {
Default,
}
struct Settings {
struct GlobalSettings {
mode: SortMode,
ignore_blanks: bool,
ignore_case: bool,
dictionary_order: bool,
ignore_non_printing: bool,
merge: bool,
reverse: bool,
outfile: Option<String>,
@ -86,17 +105,21 @@ struct Settings {
check: bool,
check_silent: bool,
random: bool,
compare_fn: fn(&str, &str) -> Ordering,
transform_fns: Vec<fn(&str) -> String>,
threads: String,
salt: String,
selectors: Vec<FieldSelector>,
separator: Option<char>,
threads: String,
zero_terminated: bool,
}
impl Default for Settings {
fn default() -> Settings {
Settings {
impl Default for GlobalSettings {
fn default() -> GlobalSettings {
GlobalSettings {
mode: SortMode::Default,
ignore_blanks: false,
ignore_case: false,
dictionary_order: false,
ignore_non_printing: false,
merge: false,
reverse: false,
outfile: None,
@ -105,19 +128,330 @@ impl Default for Settings {
check: false,
check_silent: false,
random: false,
compare_fn: default_compare,
transform_fns: Vec::new(),
threads: String::new(),
salt: String::new(),
selectors: vec![],
separator: None,
threads: String::new(),
zero_terminated: false,
}
}
}
struct KeySettings {
mode: SortMode,
ignore_blanks: bool,
ignore_case: bool,
dictionary_order: bool,
ignore_non_printing: bool,
random: bool,
reverse: bool,
}
impl From<&GlobalSettings> for KeySettings {
fn from(settings: &GlobalSettings) -> Self {
Self {
mode: settings.mode.clone(),
ignore_blanks: settings.ignore_blanks,
ignore_case: settings.ignore_case,
ignore_non_printing: settings.ignore_non_printing,
random: settings.random,
reverse: settings.reverse,
dictionary_order: settings.dictionary_order,
}
}
}
/// Represents the string selected by a FieldSelector.
enum Selection {
/// If we had to transform this selection, we have to store a new string.
String(String),
/// If there was no transformation, we can store an index into the line.
ByIndex(Range<usize>),
}
impl Selection {
/// Gets the actual string slice represented by this Selection.
fn get_str<'a>(&'a self, line: &'a Line) -> &'a str {
match self {
Selection::String(string) => string.as_str(),
Selection::ByIndex(range) => &line.line[range.to_owned()],
}
}
}
type Field = Range<usize>;
struct Line {
line: String,
// The common case is not to specify fields. Let's make this fast.
selections: SmallVec<[Selection; 1]>,
}
impl Line {
fn new(line: String, settings: &GlobalSettings) -> Self {
let fields = if settings
.selectors
.iter()
.any(|selector| selector.needs_tokens())
{
// Only tokenize if we will need tokens.
Some(tokenize(&line, settings.separator))
} else {
None
};
let selections = settings
.selectors
.iter()
.map(|selector| {
if let Some(range) = selector.get_selection(&line, fields.as_deref()) {
if let Some(transformed) =
transform(&line[range.to_owned()], &selector.settings)
{
Selection::String(transformed)
} else {
Selection::ByIndex(range.start().to_owned()..range.end() + 1)
}
} else {
// If there is no match, match the empty string.
Selection::ByIndex(0..0)
}
})
.collect();
Self { line, selections }
}
}
/// Transform this line. Returns None if there's no need to transform.
fn transform(line: &str, settings: &KeySettings) -> Option<String> {
let mut transformed = None;
if settings.ignore_case {
transformed = Some(line.to_uppercase());
}
if settings.ignore_blanks {
transformed = Some(
transformed
.as_deref()
.unwrap_or(line)
.trim_start()
.to_string(),
);
}
if settings.dictionary_order {
transformed = Some(remove_nondictionary_chars(
transformed.as_deref().unwrap_or(line),
));
}
if settings.ignore_non_printing {
transformed = Some(remove_nonprinting_chars(
transformed.as_deref().unwrap_or(line),
));
}
transformed
}
/// Tokenize a line into fields.
fn tokenize(line: &str, separator: Option<char>) -> Vec<Field> {
if let Some(separator) = separator {
tokenize_with_separator(line, separator)
} else {
tokenize_default(line)
}
}
/// By default fields are separated by the first whitespace after non-whitespace.
/// Whitespace is included in fields at the start.
fn tokenize_default(line: &str) -> Vec<Field> {
let mut tokens = vec![0..0];
// pretend that there was whitespace in front of the line
let mut previous_was_whitespace = true;
for (idx, char) in line.char_indices() {
if char.is_whitespace() {
if !previous_was_whitespace {
tokens.last_mut().unwrap().end = idx;
tokens.push(idx..0);
}
previous_was_whitespace = true;
} else {
previous_was_whitespace = false;
}
}
tokens.last_mut().unwrap().end = line.len();
tokens
}
/// Split between separators. These separators are not included in fields.
fn tokenize_with_separator(line: &str, separator: char) -> Vec<Field> {
let mut tokens = vec![0..0];
let mut previous_was_separator = false;
for (idx, char) in line.char_indices() {
if previous_was_separator {
tokens.push(idx..0);
}
if char == separator {
tokens.last_mut().unwrap().end = idx;
previous_was_separator = true;
} else {
previous_was_separator = false;
}
}
tokens.last_mut().unwrap().end = line.len();
tokens
}
struct KeyPosition {
/// 1-indexed, 0 is invalid.
field: usize,
/// 1-indexed, 0 is end of field.
char: usize,
ignore_blanks: bool,
}
impl KeyPosition {
fn parse(key: &str, default_char_index: usize, settings: &mut KeySettings) -> Self {
let mut field_and_char = key.split('.');
let mut field = field_and_char
.next()
.unwrap_or_else(|| crash!(1, "invalid key `{}`", key));
let mut char = field_and_char.next();
// If there is a char index, we expect options to appear after it. Otherwise we expect them after the field index.
let value_with_options = char.as_mut().unwrap_or(&mut field);
let mut ignore_blanks = settings.ignore_blanks;
if let Some(options_start) = value_with_options.chars().position(char::is_alphabetic) {
for option in value_with_options[options_start..].chars() {
// valid options: MbdfghinRrV
match option {
'M' => settings.mode = SortMode::Month,
'b' => ignore_blanks = true,
'd' => settings.dictionary_order = true,
'f' => settings.ignore_case = true,
'g' => settings.mode = SortMode::GeneralNumeric,
'h' => settings.mode = SortMode::HumanNumeric,
'i' => settings.ignore_non_printing = true,
'n' => settings.mode = SortMode::Numeric,
'R' => settings.random = true,
'r' => settings.reverse = true,
'V' => settings.mode = SortMode::Version,
c => {
crash!(1, "invalid option for key: `{}`", c)
}
}
}
// Strip away option characters from the original value so we can parse it later
*value_with_options = &value_with_options[..options_start];
}
let field = field
.parse()
.unwrap_or_else(|e| crash!(1, "failed to parse field index for key `{}`: {}", key, e));
if field == 0 {
crash!(1, "field index was 0");
}
let char = char.map_or(default_char_index, |char| {
char.parse().unwrap_or_else(|e| {
crash!(
1,
"failed to parse character index for key `{}`: {}",
key,
e
)
})
});
Self {
field,
char,
ignore_blanks,
}
}
}
struct FieldSelector {
from: KeyPosition,
to: Option<KeyPosition>,
settings: KeySettings,
}
impl FieldSelector {
fn needs_tokens(&self) -> bool {
self.from.field != 1 || self.from.char == 0 || self.to.is_some()
}
/// Look up the slice that corresponds to this selector for the given line.
/// If needs_fields returned false, fields may be None.
fn get_selection<'a>(
&self,
line: &'a str,
tokens: Option<&[Field]>,
) -> Option<RangeInclusive<usize>> {
enum ResolutionErr {
TooLow,
TooHigh,
}
// Get the index for this line given the KeyPosition
fn resolve_index(
line: &str,
tokens: Option<&[Field]>,
position: &KeyPosition,
) -> Result<usize, ResolutionErr> {
if tokens.map_or(false, |fields| fields.len() < position.field) {
Err(ResolutionErr::TooHigh)
} else if position.char == 0 {
let end = tokens.unwrap()[position.field - 1].end;
if end == 0 {
Err(ResolutionErr::TooLow)
} else {
Ok(end - 1)
}
} else {
let mut idx = if position.field == 1 {
// The first field always starts at 0.
// We don't need tokens for this case.
0
} else {
tokens.unwrap()[position.field - 1].start
} + position.char
- 1;
if idx >= line.len() {
Err(ResolutionErr::TooHigh)
} else {
if position.ignore_blanks {
if let Some(not_whitespace) =
line[idx..].chars().position(|c| !c.is_whitespace())
{
idx += not_whitespace;
} else {
return Err(ResolutionErr::TooHigh);
}
}
Ok(idx)
}
}
}
if let Ok(from) = resolve_index(line, tokens, &self.from) {
let to = self.to.as_ref().map(|to| resolve_index(line, tokens, &to));
match to {
Some(Ok(to)) => Some(from..=to),
// If `to` was not given or the match would be after the end of the line,
// match everything until the end of the line.
None | Some(Err(ResolutionErr::TooHigh)) => Some(from..=line.len() - 1),
// If `to` is before the start of the line, report no match.
// This can happen if the line starts with a separator.
Some(Err(ResolutionErr::TooLow)) => None,
}
} else {
None
}
}
}
struct MergeableFile<'a> {
lines: Lines<BufReader<Box<dyn Read>>>,
current_line: String,
settings: &'a Settings,
current_line: Line,
settings: &'a GlobalSettings,
}
// BinaryHeap depends on `Ord`. Note that we want to pop smallest items
@ -125,7 +459,7 @@ struct MergeableFile<'a> {
// trick it into the right order by calling reverse() here.
impl<'a> Ord for MergeableFile<'a> {
fn cmp(&self, other: &MergeableFile) -> Ordering {
compare_by(&self.current_line, &other.current_line, &self.settings).reverse()
compare_by(&self.current_line, &other.current_line, self.settings).reverse()
}
}
@ -137,7 +471,7 @@ impl<'a> PartialOrd for MergeableFile<'a> {
impl<'a> PartialEq for MergeableFile<'a> {
fn eq(&self, other: &MergeableFile) -> bool {
Ordering::Equal == compare_by(&self.current_line, &other.current_line, &self.settings)
Ordering::Equal == compare_by(&self.current_line, &other.current_line, self.settings)
}
}
@ -145,11 +479,11 @@ impl<'a> Eq for MergeableFile<'a> {}
struct FileMerger<'a> {
heap: BinaryHeap<MergeableFile<'a>>,
settings: &'a Settings,
settings: &'a GlobalSettings,
}
impl<'a> FileMerger<'a> {
fn new(settings: &'a Settings) -> FileMerger<'a> {
fn new(settings: &'a GlobalSettings) -> FileMerger<'a> {
FileMerger {
heap: BinaryHeap::new(),
settings,
@ -159,7 +493,7 @@ impl<'a> FileMerger<'a> {
if let Some(Ok(next_line)) = lines.next() {
let mergeable_file = MergeableFile {
lines,
current_line: next_line,
current_line: Line::new(next_line, &self.settings),
settings: &self.settings,
};
self.heap.push(mergeable_file);
@ -174,14 +508,17 @@ impl<'a> Iterator for FileMerger<'a> {
Some(mut current) => {
match current.lines.next() {
Some(Ok(next_line)) => {
let ret = replace(&mut current.current_line, next_line);
let ret = replace(
&mut current.current_line,
Line::new(next_line, &self.settings),
);
self.heap.push(current);
Some(ret)
Some(ret.line)
}
_ => {
// Don't put it back in the heap (it's empty/erroring)
// but its first line is still valid.
Some(current.current_line)
Some(current.current_line.line)
}
}
}
@ -205,7 +542,7 @@ With no FILE, or when FILE is -, read standard input.",
pub fn uumain(args: impl uucore::Args) -> i32 {
let args = args.collect_str();
let usage = get_usage();
let mut settings: Settings = Default::default();
let mut settings: GlobalSettings = Default::default();
let matches = App::new(executable!())
.version(VERSION)
@ -316,7 +653,21 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
.help("output only the first of an equal run"),
)
.arg(
Arg::with_name(OPT_ZERO_TERMINATED)
Arg::with_name(OPT_KEY)
.short("k")
.long(OPT_KEY)
.help("sort by a key")
.long_help(LONG_HELP_KEYS)
.multiple(true)
.takes_value(true),
)
.arg(
Arg::with_name(OPT_SEPARATOR)
.short("t")
.long(OPT_SEPARATOR)
.help("custom separator for -k")
.takes_value(true))
.arg(Arg::with_name(OPT_ZERO_TERMINATED)
.short("z")
.long(OPT_ZERO_TERMINATED)
.help("line delimiter is NUL, not newline"),
@ -350,16 +701,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
for path in &files0_from {
let (reader, _) = open(path.as_str()).expect("Could not read from file specified.");
let buf_reader = BufReader::new(reader);
for line in buf_reader.split(b'\0') {
if let Ok(n) = line {
for line in buf_reader.split(b'\0').flatten() {
files.push(
std::str::from_utf8(&n)
std::str::from_utf8(&line)
.expect("Could not parse string from zero terminated input.")
.to_string(),
);
}
}
}
files
} else {
matches
@ -382,21 +731,17 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
SortMode::Default
};
settings.dictionary_order = matches.is_present(OPT_DICTIONARY_ORDER);
settings.ignore_non_printing = matches.is_present(OPT_IGNORE_NONPRINTING);
if matches.is_present(OPT_PARALLEL) {
// "0" is default - threads = num of cores
settings.threads = matches
.value_of(OPT_PARALLEL)
.map(String::from)
.unwrap_or("0".to_string());
.unwrap_or_else(|| "0".to_string());
env::set_var("RAYON_NUM_THREADS", &settings.threads);
}
if matches.is_present(OPT_DICTIONARY_ORDER) {
settings.transform_fns.push(remove_nondictionary_chars);
} else if matches.is_present(OPT_IGNORE_NONPRINTING) {
settings.transform_fns.push(remove_nonprinting_chars);
}
settings.zero_terminated = matches.is_present(OPT_ZERO_TERMINATED);
settings.merge = matches.is_present(OPT_MERGE);
@ -406,13 +751,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
settings.check = true;
};
if matches.is_present(OPT_IGNORE_CASE) {
settings.transform_fns.push(|s| s.to_uppercase());
}
settings.ignore_case = matches.is_present(OPT_IGNORE_CASE);
if matches.is_present(OPT_IGNORE_BLANKS) {
settings.transform_fns.push(|s| s.trim_start().to_string());
}
settings.ignore_blanks = matches.is_present(OPT_IGNORE_BLANKS);
settings.outfile = matches.value_of(OPT_OUTPUT).map(String::from);
settings.reverse = matches.is_present(OPT_REVERSE);
@ -424,27 +765,64 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
settings.salt = get_rand_string();
}
//let mut files = matches.free;
if files.is_empty() {
/* if no file, default to stdin */
files.push("-".to_owned());
} else if settings.check && files.len() != 1 {
crash!(1, "sort: extra operand `{}' not allowed with -c", files[1])
crash!(1, "extra operand `{}' not allowed with -c", files[1])
}
settings.compare_fn = match settings.mode {
SortMode::Numeric => numeric_compare,
SortMode::GeneralNumeric => general_numeric_compare,
SortMode::HumanNumeric => human_numeric_size_compare,
SortMode::Month => month_compare,
SortMode::Version => version_compare,
SortMode::Default => default_compare,
};
if let Some(arg) = matches.args.get(OPT_SEPARATOR) {
let separator = arg.vals[0].to_string_lossy();
let separator = separator;
if separator.len() != 1 {
crash!(1, "separator must be exactly one character long");
}
settings.separator = Some(separator.chars().next().unwrap())
}
exec(files, &mut settings)
if matches.is_present(OPT_KEY) {
for key in &matches.args[OPT_KEY].vals {
let key = key.to_string_lossy();
let mut from_to = key.split(',');
let mut key_settings = KeySettings::from(&settings);
let from = KeyPosition::parse(
from_to
.next()
.unwrap_or_else(|| crash!(1, "invalid key `{}`", key)),
1,
&mut key_settings,
);
let to = from_to
.next()
.map(|to| KeyPosition::parse(to, 0, &mut key_settings));
let field_selector = FieldSelector {
from,
to,
settings: key_settings,
};
settings.selectors.push(field_selector);
}
}
if !settings.stable || !matches.is_present(OPT_KEY) {
// add a default selector matching the whole line
let key_settings = KeySettings::from(&settings);
settings.selectors.push(FieldSelector {
from: KeyPosition {
field: 1,
char: 1,
ignore_blanks: key_settings.ignore_blanks,
},
to: None,
settings: key_settings,
});
}
exec(files, &settings)
}
fn exec(files: Vec<String>, settings: &mut Settings) -> i32 {
fn exec(files: Vec<String>, settings: &GlobalSettings) -> i32 {
let mut lines = Vec::new();
let mut file_merger = FileMerger::new(&settings);
@ -459,26 +837,27 @@ fn exec(files: Vec<String>, settings: &mut Settings) -> i32 {
if settings.merge {
file_merger.push_file(buf_reader.lines());
} else if settings.zero_terminated {
for line in buf_reader.split(b'\0') {
if let Ok(n) = line {
lines.push(
std::str::from_utf8(&n)
for line in buf_reader.split(b'\0').flatten() {
lines.push(Line::new(
std::str::from_utf8(&line)
.expect("Could not parse string from zero terminated input.")
.to_string(),
);
}
&settings,
));
}
} else {
for line in buf_reader.lines() {
if let Ok(n) = line {
lines.push(n);
lines.push(Line::new(n, &settings));
} else {
break;
}
}
}
}
if settings.check {
return exec_check_file(lines, &settings);
return exec_check_file(&lines, &settings);
} else {
sort_by(&mut lines, &settings);
}
@ -490,29 +869,31 @@ fn exec(files: Vec<String>, settings: &mut Settings) -> i32 {
print_sorted(file_merger, &settings)
}
} else if settings.mode == SortMode::Default && settings.unique {
print_sorted(lines.iter().dedup(), &settings)
print_sorted(lines.into_iter().map(|line| line.line).dedup(), &settings)
} else if settings.mode == SortMode::Month && settings.unique {
print_sorted(
lines
.iter()
.into_iter()
.map(|line| line.line)
.dedup_by(|a, b| get_months_dedup(a) == get_months_dedup(b)),
&settings,
)
} else if settings.unique {
print_sorted(
lines
.iter()
.dedup_by(|a, b| get_num_dedup(a, &settings) == get_num_dedup(b, &settings)),
.into_iter()
.map(|line| line.line)
.dedup_by(|a, b| get_num_dedup(a, settings) == get_num_dedup(b, settings)),
&settings,
)
} else {
print_sorted(lines.iter(), &settings)
print_sorted(lines.into_iter().map(|line| line.line), &settings)
}
0
}
fn exec_check_file(unwrapped_lines: Vec<String>, settings: &Settings) -> i32 {
fn exec_check_file(unwrapped_lines: &[Line], settings: &GlobalSettings) -> i32 {
// errors yields the line before each disorder,
// plus the last line (quirk of .coalesce())
let mut errors =
@ -544,51 +925,45 @@ fn exec_check_file(unwrapped_lines: Vec<String>, settings: &Settings) -> i32 {
}
}
#[inline(always)]
fn transform(line: &str, settings: &Settings) -> String {
let mut transformed = line.to_owned();
for transform_fn in &settings.transform_fns {
transformed = transform_fn(&transformed);
}
transformed
}
#[inline(always)]
fn sort_by(lines: &mut Vec<String>, settings: &Settings) {
fn sort_by(lines: &mut Vec<Line>, settings: &GlobalSettings) {
lines.par_sort_by(|a, b| compare_by(a, b, &settings))
}
fn compare_by(a: &str, b: &str, settings: &Settings) -> Ordering {
let (a_transformed, b_transformed): (String, String);
let (a, b) = if !settings.transform_fns.is_empty() {
a_transformed = transform(&a, &settings);
b_transformed = transform(&b, &settings);
(a_transformed.as_str(), b_transformed.as_str())
fn compare_by(a: &Line, b: &Line, global_settings: &GlobalSettings) -> Ordering {
for (idx, selector) in global_settings.selectors.iter().enumerate() {
let a = a.selections[idx].get_str(a);
let b = b.selections[idx].get_str(b);
let settings = &selector.settings;
let cmp: Ordering = if settings.random {
random_shuffle(a, b, global_settings.salt.clone())
} else {
(a, b)
(match settings.mode {
SortMode::Numeric => numeric_compare,
SortMode::GeneralNumeric => general_numeric_compare,
SortMode::HumanNumeric => human_numeric_size_compare,
SortMode::Month => month_compare,
SortMode::Version => version_compare,
SortMode::Default => default_compare,
})(a, b)
};
if cmp != Ordering::Equal {
return if settings.reverse { cmp.reverse() } else { cmp };
}
}
// Call "last resort compare" if all selectors returned Equal
let cmp = if global_settings.random || global_settings.stable || global_settings.unique {
Ordering::Equal
} else {
default_compare(&a.line, &b.line)
};
// 1st Compare
let mut cmp: Ordering = if settings.random {
random_shuffle(a, b, settings.salt.clone())
if global_settings.reverse {
cmp.reverse()
} else {
(settings.compare_fn)(a, b)
};
// Call "last resort compare" on any equal
if cmp == Ordering::Equal {
if settings.random || settings.stable || settings.unique {
cmp = Ordering::Equal
} else {
cmp = default_compare(a, b)
};
};
if settings.reverse {
return cmp.reverse();
} else {
return cmp;
cmp
}
}
@ -617,8 +992,8 @@ fn leading_num_common(a: &str) -> &str {
&& !c.eq(&'e')
&& !c.eq(&'E')
// check whether first char is + or -
&& !a.chars().nth(0).unwrap_or('\0').eq(&POSITIVE)
&& !a.chars().nth(0).unwrap_or('\0').eq(&NEGATIVE)
&& !a.chars().next().unwrap_or('\0').eq(&POSITIVE)
&& !a.chars().next().unwrap_or('\0').eq(&NEGATIVE)
{
// Strip string of non-numeric trailing chars
s = &a[..idx];
@ -640,9 +1015,9 @@ fn get_leading_num(a: &str) -> &str {
let a = leading_num_common(a);
// GNU numeric sort doesn't recognize '+' or 'e' notation so we strip trailing chars
// GNU numeric sort doesn't recognize '+' or 'e' notation so we strip
for (idx, c) in a.char_indices() {
if c.eq(&'e') || c.eq(&'E') || a.chars().nth(0).unwrap_or('\0').eq(&POSITIVE) {
if c.eq(&'e') || c.eq(&'E') || a.chars().next().unwrap_or('\0').eq(&POSITIVE) {
s = &a[..idx];
break;
}
@ -670,12 +1045,9 @@ fn get_leading_gen(a: &str) -> &str {
// Cleanup raw stripped strings
for c in p_iter.to_owned() {
let next_char_numeric = p_iter.peek().unwrap_or(&'\0').is_numeric();
// Only general numeric recognizes e notation and the '+' sign
if (c.eq(&'e') && !next_char_numeric)
|| (c.eq(&'E') && !next_char_numeric)
// Only general numeric recognizes e notation and, see block below, the '+' sign
// Only GNU (non-general) numeric recognize thousands seperators, takes only leading #
|| c.eq(&THOUSANDS_SEP)
{
if (c.eq(&'e') || c.eq(&'E')) && !next_char_numeric || c.eq(&THOUSANDS_SEP) {
result = a.split(c).next().unwrap_or("");
break;
// If positive sign and next char is not numeric, split at postive sign at keep trailing numbers
@ -724,19 +1096,17 @@ fn get_months_dedup(a: &str) -> String {
// *For all dedups/uniques expect default we must compare leading numbers*
// Also note numeric compare and unique output is specifically *not* the same as a "sort | uniq"
// See: https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html
fn get_num_dedup<'a>(a: &'a str, settings: &&mut Settings) -> &'a str {
fn get_num_dedup<'a>(a: &'a str, settings: &GlobalSettings) -> &'a str {
// Trim and remove any leading zeros
let s = a.trim().trim_start_matches('0');
// Get first char
let c = s.chars().nth(0).unwrap_or('\0');
let c = s.chars().next().unwrap_or('\0');
// Empty lines and non-number lines are treated as the same for dedup
if s.is_empty() {
""
} else if !c.eq(&NEGATIVE) && !c.is_numeric() {
""
// Prepare lines for comparison of only the numerical leading numbers
if s.is_empty() || (!c.eq(&NEGATIVE) && !c.is_numeric()) {
""
} else {
let result = match settings.mode {
SortMode::Numeric => get_leading_num(s),
@ -944,6 +1314,7 @@ fn month_parse(line: &str) -> Month {
}
fn month_compare(a: &str, b: &str) -> Ordering {
#![allow(clippy::comparison_chain)]
let ma = month_parse(a);
let mb = month_parse(b);
@ -986,32 +1357,29 @@ fn remove_nonprinting_chars(s: &str) -> String {
.collect::<String>()
}
fn print_sorted<S, T: Iterator<Item = S>>(iter: T, settings: &Settings)
where
S: std::fmt::Display,
{
fn print_sorted<T: Iterator<Item = String>>(iter: T, settings: &GlobalSettings) {
let mut file: Box<dyn Write> = match settings.outfile {
Some(ref filename) => match File::create(Path::new(&filename)) {
Ok(f) => Box::new(BufWriter::new(f)) as Box<dyn Write>,
Err(e) => {
show_error!("sort: {0}: {1}", filename, e.to_string());
show_error!("{0}: {1}", filename, e.to_string());
panic!("Could not open output file");
}
},
None => Box::new(stdout()) as Box<dyn Write>,
None => Box::new(BufWriter::new(stdout())) as Box<dyn Write>,
};
if settings.zero_terminated {
for line in iter {
let str = format!("{}\0", line);
crash_if_err!(1, file.write_all(str.as_bytes()));
crash_if_err!(1, file.write_all(line.as_bytes()));
crash_if_err!(1, file.write_all("\0".as_bytes()));
}
} else {
for line in iter {
let str = format!("{}\n", line);
crash_if_err!(1, file.write_all(str.as_bytes()));
crash_if_err!(1, file.write_all(line.as_bytes()));
crash_if_err!(1, file.write_all("\n".as_bytes()));
}
}
crash_if_err!(1, file.flush());
}
// from cat.rs
@ -1024,7 +1392,7 @@ fn open(path: &str) -> Option<(Box<dyn Read>, bool)> {
match File::open(Path::new(path)) {
Ok(f) => Some((Box::new(f) as Box<dyn Read>, false)),
Err(e) => {
show_error!("sort: {0}: {1}", path, e.to_string());
show_error!("{0}: {1}", path, e.to_string());
None
}
}
@ -1097,4 +1465,34 @@ mod tests {
assert_eq!(Ordering::Less, version_compare(a, b));
}
#[test]
fn test_random_compare() {
let a = "9";
let b = "9";
let c = get_rand_string();
assert_eq!(Ordering::Equal, random_shuffle(a, b, c));
}
#[test]
fn test_tokenize_fields() {
let line = "foo bar b x";
assert_eq!(tokenize(line, None), vec![0..3, 3..7, 7..9, 9..14,],);
}
#[test]
fn test_tokenize_fields_leading_whitespace() {
let line = " foo bar b x";
assert_eq!(tokenize(line, None), vec![0..7, 7..11, 11..13, 13..18,]);
}
#[test]
fn test_tokenize_fields_custom_separator() {
let line = "aaa foo bar b x";
assert_eq!(
tokenize(line, Some('a')),
vec![0..0, 1..1, 2..2, 3..9, 10..18,]
);
}
}

View file

@ -15,7 +15,7 @@ edition = "2018"
path = "src/stdbuf.rs"
[dependencies]
getopts = "0.2.18"
clap = "2.33"
tempfile = "3.1"
uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }

View file

@ -10,7 +10,8 @@
#[macro_use]
extern crate uucore;
use getopts::{Matches, Options};
use clap::{App, AppSettings, Arg, ArgMatches};
use std::convert::TryFrom;
use std::fs::File;
use std::io::{self, Write};
use std::os::unix::process::ExitStatusExt;
@ -19,8 +20,35 @@ use std::process::Command;
use tempfile::tempdir;
use tempfile::TempDir;
static NAME: &str = "stdbuf";
static VERSION: &str = env!("CARGO_PKG_VERSION");
static ABOUT: &str =
"Run COMMAND, with modified buffering operations for its standard streams.\n\n\
Mandatory arguments to long options are mandatory for short options too.";
static LONG_HELP: &str = "If MODE is 'L' the corresponding stream will be line buffered.\n\
This option is invalid with standard input.\n\n\
If MODE is '0' the corresponding stream will be unbuffered.\n\n\
Otherwise MODE is a number which may be followed by one of the following:\n\n\
KB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n\
In this case the corresponding stream will be fully buffered with the buffer size set to \
MODE bytes.\n\n\
NOTE: If COMMAND adjusts the buffering of its standard streams ('tee' does for e.g.) then \
that will override corresponding settings changed by 'stdbuf'.\n\
Also some filters (like 'dd' and 'cat' etc.) don't use streams for I/O, \
and are thus unaffected by 'stdbuf' settings.\n";
mod options {
pub const INPUT: &str = "input";
pub const INPUT_SHORT: &str = "i";
pub const OUTPUT: &str = "output";
pub const OUTPUT_SHORT: &str = "o";
pub const ERROR: &str = "error";
pub const ERROR_SHORT: &str = "e";
pub const COMMAND: &str = "command";
}
fn get_usage() -> String {
format!("{0} OPTION... COMMAND", executable!())
}
const STDBUF_INJECT: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/libstdbuf.so"));
@ -36,16 +64,19 @@ struct ProgramOptions {
stderr: BufferType,
}
enum ErrMsg {
Retry,
Fatal,
impl<'a> TryFrom<&ArgMatches<'a>> for ProgramOptions {
type Error = ProgramOptionsError;
fn try_from(matches: &ArgMatches) -> Result<Self, Self::Error> {
Ok(ProgramOptions {
stdin: check_option(&matches, options::INPUT)?,
stdout: check_option(&matches, options::OUTPUT)?,
stderr: check_option(&matches, options::ERROR)?,
})
}
}
enum OkMsg {
Buffering,
Help,
Version,
}
struct ProgramOptionsError(String);
#[cfg(any(
target_os = "linux",
@ -73,31 +104,6 @@ fn preload_strings() -> (&'static str, &'static str) {
crash!(1, "Command not supported for this operating system!")
}
fn print_version() {
println!("{} {}", NAME, VERSION);
}
fn print_usage(opts: &Options) {
let brief = "Run COMMAND, with modified buffering operations for its standard streams\n \
Mandatory arguments to long options are mandatory for short options too.";
let explanation = "If MODE is 'L' the corresponding stream will be line buffered.\n \
This option is invalid with standard input.\n\n \
If MODE is '0' the corresponding stream will be unbuffered.\n\n \
Otherwise MODE is a number which may be followed by one of the following:\n\n \
KB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n \
In this case the corresponding stream will be fully buffered with the buffer size set to \
MODE bytes.\n\n \
NOTE: If COMMAND adjusts the buffering of its standard streams ('tee' does for e.g.) then \
that will override corresponding settings changed by 'stdbuf'.\n \
Also some filters (like 'dd' and 'cat' etc.) don't use streams for I/O, \
and are thus unaffected by 'stdbuf' settings.\n";
println!("{} {}", NAME, VERSION);
println!();
println!("Usage: stdbuf OPTION... COMMAND");
println!();
println!("{}\n{}", opts.usage(brief), explanation);
}
fn parse_size(size: &str) -> Option<u64> {
let ext = size.trim_start_matches(|c: char| c.is_digit(10));
let num = size.trim_end_matches(char::is_alphabetic);
@ -133,63 +139,28 @@ fn parse_size(size: &str) -> Option<u64> {
Some(buf_size * base.pow(power))
}
fn check_option(matches: &Matches, name: &str, modified: &mut bool) -> Option<BufferType> {
match matches.opt_str(name) {
Some(value) => {
*modified = true;
match &value[..] {
fn check_option(matches: &ArgMatches, name: &str) -> Result<BufferType, ProgramOptionsError> {
match matches.value_of(name) {
Some(value) => match &value[..] {
"L" => {
if name == "input" {
show_info!("line buffering stdin is meaningless");
None
if name == options::INPUT {
Err(ProgramOptionsError(format!(
"line buffering stdin is meaningless"
)))
} else {
Some(BufferType::Line)
Ok(BufferType::Line)
}
}
x => {
let size = match parse_size(x) {
Some(m) => m,
None => {
show_error!("Invalid mode {}", x);
return None;
}
None => return Err(ProgramOptionsError(format!("invalid mode {}", x))),
};
Some(BufferType::Size(size))
Ok(BufferType::Size(size))
}
},
None => Ok(BufferType::Default),
}
}
None => Some(BufferType::Default),
}
}
fn parse_options(
args: &[String],
options: &mut ProgramOptions,
optgrps: &Options,
) -> Result<OkMsg, ErrMsg> {
let matches = match optgrps.parse(args) {
Ok(m) => m,
Err(_) => return Err(ErrMsg::Retry),
};
if matches.opt_present("help") {
return Ok(OkMsg::Help);
}
if matches.opt_present("version") {
return Ok(OkMsg::Version);
}
let mut modified = false;
options.stdin = check_option(&matches, "input", &mut modified).ok_or(ErrMsg::Fatal)?;
options.stdout = check_option(&matches, "output", &mut modified).ok_or(ErrMsg::Fatal)?;
options.stderr = check_option(&matches, "error", &mut modified).ok_or(ErrMsg::Fatal)?;
if matches.free.len() != 1 {
return Err(ErrMsg::Retry);
}
if !modified {
show_error!("you must specify a buffering mode option");
return Err(ErrMsg::Fatal);
}
Ok(OkMsg::Buffering)
}
fn set_command_env(command: &mut Command, buffer_name: &str, buffer_type: BufferType) {
@ -215,72 +186,62 @@ fn get_preload_env(tmp_dir: &mut TempDir) -> io::Result<(String, PathBuf)> {
}
pub fn uumain(args: impl uucore::Args) -> i32 {
let args = args.collect_str();
let usage = get_usage();
let mut opts = Options::new();
let matches = App::new(executable!())
.version(VERSION)
.about(ABOUT)
.usage(&usage[..])
.after_help(LONG_HELP)
.setting(AppSettings::TrailingVarArg)
.arg(
Arg::with_name(options::INPUT)
.long(options::INPUT)
.short(options::INPUT_SHORT)
.help("adjust standard input stream buffering")
.value_name("MODE")
.required_unless_one(&[options::OUTPUT, options::ERROR]),
)
.arg(
Arg::with_name(options::OUTPUT)
.long(options::OUTPUT)
.short(options::OUTPUT_SHORT)
.help("adjust standard output stream buffering")
.value_name("MODE")
.required_unless_one(&[options::INPUT, options::ERROR]),
)
.arg(
Arg::with_name(options::ERROR)
.long(options::ERROR)
.short(options::ERROR_SHORT)
.help("adjust standard error stream buffering")
.value_name("MODE")
.required_unless_one(&[options::INPUT, options::OUTPUT]),
)
.arg(
Arg::with_name(options::COMMAND)
.multiple(true)
.takes_value(true)
.hidden(true)
.required(true),
)
.get_matches_from(args);
opts.optopt(
"i",
"input",
"adjust standard input stream buffering",
"MODE",
);
opts.optopt(
"o",
"output",
"adjust standard output stream buffering",
"MODE",
);
opts.optopt(
"e",
"error",
"adjust standard error stream buffering",
"MODE",
);
opts.optflag("", "help", "display this help and exit");
opts.optflag("", "version", "output version information and exit");
let options = ProgramOptions::try_from(&matches)
.unwrap_or_else(|e| crash!(125, "{}\nTry 'stdbuf --help' for more information.", e.0));
let mut options = ProgramOptions {
stdin: BufferType::Default,
stdout: BufferType::Default,
stderr: BufferType::Default,
};
let mut command_idx: i32 = -1;
for i in 1..=args.len() {
match parse_options(&args[1..i], &mut options, &opts) {
Ok(OkMsg::Buffering) => {
command_idx = (i as i32) - 1;
break;
}
Ok(OkMsg::Help) => {
print_usage(&opts);
return 0;
}
Ok(OkMsg::Version) => {
print_version();
return 0;
}
Err(ErrMsg::Fatal) => break,
Err(ErrMsg::Retry) => continue,
}
}
if command_idx == -1 {
crash!(
125,
"Invalid options\nTry 'stdbuf --help' for more information."
);
}
let command_name = &args[command_idx as usize];
let mut command = Command::new(command_name);
let mut command_values = matches.values_of::<&str>(options::COMMAND).unwrap();
let mut command = Command::new(command_values.next().unwrap());
let command_params: Vec<&str> = command_values.collect();
let mut tmp_dir = tempdir().unwrap();
let (preload_env, libstdbuf) = return_if_err!(1, get_preload_env(&mut tmp_dir));
command
.args(&args[(command_idx as usize) + 1..])
.env(preload_env, libstdbuf);
command.env(preload_env, libstdbuf);
set_command_env(&mut command, "_STDBUF_I", options.stdin);
set_command_env(&mut command, "_STDBUF_O", options.stdout);
set_command_env(&mut command, "_STDBUF_E", options.stderr);
command.args(command_params);
let mut process = match command.spawn() {
Ok(p) => p,
Err(e) => crash!(1, "failed to execute process: {}", e),

View file

@ -15,7 +15,7 @@ edition = "2018"
path = "src/unlink.rs"
[dependencies]
getopts = "0.2.18"
clap = "2.33"
libc = "0.2.42"
uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }

View file

@ -12,59 +12,53 @@
#[macro_use]
extern crate uucore;
use getopts::Options;
use clap::{App, Arg};
use libc::{lstat, stat, unlink};
use libc::{S_IFLNK, S_IFMT, S_IFREG};
use std::ffi::CString;
use std::io::{Error, ErrorKind};
static NAME: &str = "unlink";
static VERSION: &str = env!("CARGO_PKG_VERSION");
static ABOUT: &str = "Unlink the file at [FILE].";
static OPT_PATH: &str = "FILE";
fn get_usage() -> String {
format!("{} [OPTION]... FILE", executable!())
}
pub fn uumain(args: impl uucore::Args) -> i32 {
let args = args.collect_str();
let mut opts = Options::new();
let usage = get_usage();
opts.optflag("h", "help", "display this help and exit");
opts.optflag("V", "version", "output version information and exit");
let matches = App::new(executable!())
.version(VERSION)
.about(ABOUT)
.usage(&usage[..])
.arg(Arg::with_name(OPT_PATH).hidden(true).multiple(true))
.get_matches_from(args);
let matches = match opts.parse(&args[1..]) {
Ok(m) => m,
Err(f) => crash!(1, "invalid options\n{}", f),
};
let paths: Vec<String> = matches
.values_of(OPT_PATH)
.map(|v| v.map(ToString::to_string).collect())
.unwrap_or_default();
if matches.opt_present("help") {
println!("{} {}", NAME, VERSION);
println!();
println!("Usage:");
println!(" {} [FILE]... [OPTION]...", NAME);
println!();
println!("{}", opts.usage("Unlink the file at [FILE]."));
return 0;
}
if matches.opt_present("version") {
println!("{} {}", NAME, VERSION);
return 0;
}
if matches.free.is_empty() {
if paths.is_empty() {
crash!(
1,
"missing operand\nTry '{0} --help' for more information.",
NAME
executable!()
);
} else if matches.free.len() > 1 {
} else if paths.len() > 1 {
crash!(
1,
"extra operand: '{1}'\nTry '{0} --help' for more information.",
NAME,
matches.free[1]
executable!(),
paths[1]
);
}
let c_string = CString::new(matches.free[0].clone()).unwrap(); // unwrap() cannot fail, the string comes from argv so it cannot contain a \0.
let c_string = CString::new(paths[0].clone()).unwrap(); // unwrap() cannot fail, the string comes from argv so it cannot contain a \0.
let st_mode = {
#[allow(deprecated)]
@ -72,12 +66,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
let result = unsafe { lstat(c_string.as_ptr(), &mut buf as *mut stat) };
if result < 0 {
crash!(
1,
"Cannot stat '{}': {}",
matches.free[0],
Error::last_os_error()
);
crash!(1, "Cannot stat '{}': {}", paths[0], Error::last_os_error());
}
buf.st_mode & S_IFMT
@ -101,7 +90,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
match result {
Ok(_) => (),
Err(e) => {
crash!(1, "cannot unlink '{0}': {1}", matches.free[0], e);
crash!(1, "cannot unlink '{0}': {1}", paths[0], e);
}
}

View file

@ -2,6 +2,8 @@ use crate::common::util::*;
use filetime::FileTime;
use rust_users::*;
use std::os::unix::fs::PermissionsExt;
#[cfg(not(windows))]
use std::process::Command;
#[cfg(target_os = "linux")]
use std::thread::sleep;
@ -566,3 +568,97 @@ fn test_install_copy_then_compare_file_with_extra_mode() {
assert!(after_install_sticky != after_install_sticky_again);
}
const STRIP_TARGET_FILE: &str = "helloworld_installed";
const SYMBOL_DUMP_PROGRAM: &str = "objdump";
const STRIP_SOURCE_FILE_SYMBOL: &str = "main";
fn strip_source_file() -> &'static str {
if cfg!(target_os = "macos") {
"helloworld_macos"
} else {
"helloworld_linux"
}
}
#[test]
#[cfg(not(windows))]
fn test_install_and_strip() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
scene
.ucmd()
.arg("-s")
.arg(strip_source_file())
.arg(STRIP_TARGET_FILE)
.succeeds()
.no_stderr();
let output = Command::new(SYMBOL_DUMP_PROGRAM)
.arg("-t")
.arg(at.plus(STRIP_TARGET_FILE))
.output()
.unwrap();
let stdout = String::from_utf8(output.stdout).unwrap();
assert!(!stdout.contains(STRIP_SOURCE_FILE_SYMBOL));
}
#[test]
#[cfg(not(windows))]
fn test_install_and_strip_with_program() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
scene
.ucmd()
.arg("-s")
.arg("--strip-program")
.arg("/usr/bin/strip")
.arg(strip_source_file())
.arg(STRIP_TARGET_FILE)
.succeeds()
.no_stderr();
let output = Command::new(SYMBOL_DUMP_PROGRAM)
.arg("-t")
.arg(at.plus(STRIP_TARGET_FILE))
.output()
.unwrap();
let stdout = String::from_utf8(output.stdout).unwrap();
assert!(!stdout.contains(STRIP_SOURCE_FILE_SYMBOL));
}
#[test]
#[cfg(not(windows))]
fn test_install_and_strip_with_invalid_program() {
let scene = TestScenario::new(util_name!());
let stderr = scene
.ucmd()
.arg("-s")
.arg("--strip-program")
.arg("/bin/date")
.arg(strip_source_file())
.arg(STRIP_TARGET_FILE)
.fails()
.stderr;
assert!(stderr.contains("strip program failed"));
}
#[test]
#[cfg(not(windows))]
fn test_install_and_strip_with_non_existent_program() {
let scene = TestScenario::new(util_name!());
let stderr = scene
.ucmd()
.arg("-s")
.arg("--strip-program")
.arg("/usr/bin/non_existent_program")
.arg(strip_source_file())
.arg(STRIP_TARGET_FILE)
.fails()
.stderr;
assert!(stderr.contains("No such file or directory"));
}

View file

@ -1 +1,51 @@
// ToDO: add tests
use crate::common::util::*;
#[test]
fn test_shred_remove() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
let file_a = "test_shred_remove_a";
let file_b = "test_shred_remove_b";
// Create file_a and file_b.
at.touch(file_a);
at.touch(file_b);
// Shred file_a.
scene.ucmd().arg("-u").arg(file_a).run();
// file_a was deleted, file_b exists.
assert!(!at.file_exists(file_a));
assert!(at.file_exists(file_b));
}
#[cfg(not(target_os = "freebsd"))]
#[test]
fn test_shred_force() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
let file = "test_shred_force";
// Create file_a.
at.touch(file);
assert!(at.file_exists(file));
// Make file_a readonly.
at.set_readonly(file);
// Try shred -u.
let result = scene.ucmd().arg("-u").arg(file).run();
println!("stderr = {:?}", result.stderr);
println!("stdout = {:?}", result.stdout);
// file_a was not deleted because it is readonly.
assert!(at.file_exists(file));
// Try shred -u -f.
scene.ucmd().arg("-u").arg("-f").arg(file).run();
// file_a was deleted.
assert!(!at.file_exists(file));
}

View file

@ -185,10 +185,10 @@ fn test_dictionary_order2() {
fn test_non_printing_chars() {
for non_printing_chars_param in vec!["-i"] {
new_ucmd!()
.pipe_in("a👦🏻aa b\naaaa b")
.pipe_in("a👦🏻aa\naaaa")
.arg(non_printing_chars_param)
.succeeds()
.stdout_only("aaaa b\na👦🏻aa b\n");
.stdout_only("a👦🏻aa\naaaa\n");
}
}
@ -307,6 +307,166 @@ fn test_numeric_unique_ints2() {
}
}
#[test]
fn test_keys_open_ended() {
let input = "aa bb cc\ndd aa ff\ngg aa cc\n";
new_ucmd!()
.args(&["-k", "2.2"])
.pipe_in(input)
.succeeds()
.stdout_only("gg aa cc\ndd aa ff\naa bb cc\n");
}
#[test]
fn test_keys_closed_range() {
let input = "aa bb cc\ndd aa ff\ngg aa cc\n";
new_ucmd!()
.args(&["-k", "2.2,2.2"])
.pipe_in(input)
.succeeds()
.stdout_only("dd aa ff\ngg aa cc\naa bb cc\n");
}
#[test]
fn test_keys_multiple_ranges() {
let input = "aa bb cc\ndd aa ff\ngg aa cc\n";
new_ucmd!()
.args(&["-k", "2,2", "-k", "3,3"])
.pipe_in(input)
.succeeds()
.stdout_only("gg aa cc\ndd aa ff\naa bb cc\n");
}
#[test]
fn test_keys_no_field_match() {
let input = "aa aa aa aa\naa bb cc\ndd aa ff\n";
new_ucmd!()
.args(&["-k", "4,4"])
.pipe_in(input)
.succeeds()
.stdout_only("aa bb cc\ndd aa ff\naa aa aa aa\n");
}
#[test]
fn test_keys_no_char_match() {
let input = "aaa\nba\nc\n";
new_ucmd!()
.args(&["-k", "1.2"])
.pipe_in(input)
.succeeds()
.stdout_only("c\nba\naaa\n");
}
#[test]
fn test_keys_custom_separator() {
let input = "aaxbbxcc\nddxaaxff\nggxaaxcc\n";
new_ucmd!()
.args(&["-k", "2.2,2.2", "-t", "x"])
.pipe_in(input)
.succeeds()
.stdout_only("ddxaaxff\nggxaaxcc\naaxbbxcc\n");
}
#[test]
fn test_keys_invalid_field() {
new_ucmd!()
.args(&["-k", "1."])
.fails()
.stderr_only("sort: error: failed to parse character index for key `1.`: cannot parse integer from empty string");
}
#[test]
fn test_keys_invalid_field_option() {
new_ucmd!()
.args(&["-k", "1.1x"])
.fails()
.stderr_only("sort: error: invalid option for key: `x`");
}
#[test]
fn test_keys_invalid_field_zero() {
new_ucmd!()
.args(&["-k", "0.1"])
.fails()
.stderr_only("sort: error: field index was 0");
}
#[test]
fn test_keys_with_options() {
let input = "aa 3 cc\ndd 1 ff\ngg 2 cc\n";
for param in &[
&["-k", "2,2n"][..],
&["-k", "2n,2"][..],
&["-k", "2,2", "-n"][..],
] {
new_ucmd!()
.args(param)
.pipe_in(input)
.succeeds()
.stdout_only("dd 1 ff\ngg 2 cc\naa 3 cc\n");
}
}
#[test]
fn test_keys_with_options_blanks_start() {
let input = "aa 3 cc\ndd 1 ff\ngg 2 cc\n";
for param in &[&["-k", "2b,2"][..], &["-k", "2,2", "-b"][..]] {
new_ucmd!()
.args(param)
.pipe_in(input)
.succeeds()
.stdout_only("dd 1 ff\ngg 2 cc\naa 3 cc\n");
}
}
#[test]
fn test_keys_with_options_blanks_end() {
let input = "a b
a b
a b
";
new_ucmd!()
.args(&["-k", "1,2.1b", "-s"])
.pipe_in(input)
.succeeds()
.stdout_only(
"a b
a b
a b
",
);
}
#[test]
fn test_keys_stable() {
let input = "a b
a b
a b
";
new_ucmd!()
.args(&["-k", "1,2.1", "-s"])
.pipe_in(input)
.succeeds()
.stdout_only(
"a b
a b
a b
",
);
}
#[test]
fn test_keys_empty_match() {
let input = "a a a a
aaaa
";
new_ucmd!()
.args(&["-k", "1,1", "-t", "a"])
.pipe_in(input)
.succeeds()
.stdout_only(input);
}
#[test]
fn test_zero_terminated() {
test_helper("zero-terminated", "-z");

View file

@ -1,13 +1,71 @@
#[cfg(not(target_os = "windows"))]
use crate::common::util::*;
#[cfg(not(target_os = "windows"))]
#[test]
fn test_stdbuf_unbuffered_stdout() {
if cfg!(target_os = "linux") {
// This is a basic smoke test
new_ucmd!()
.args(&["-o0", "head"])
.pipe_in("The quick brown fox jumps over the lazy dog.")
.run()
.stdout_is("The quick brown fox jumps over the lazy dog.");
}
}
#[cfg(not(target_os = "windows"))]
#[test]
fn test_stdbuf_line_buffered_stdout() {
new_ucmd!()
.args(&["-oL", "head"])
.pipe_in("The quick brown fox jumps over the lazy dog.")
.run()
.stdout_is("The quick brown fox jumps over the lazy dog.");
}
#[cfg(not(target_os = "windows"))]
#[test]
fn test_stdbuf_no_buffer_option_fails() {
new_ucmd!()
.args(&["head"])
.pipe_in("The quick brown fox jumps over the lazy dog.")
.fails()
.stderr_is(
"error: The following required arguments were not provided:\n \
--error <MODE>\n \
--input <MODE>\n \
--output <MODE>\n\n\
USAGE:\n \
stdbuf OPTION... COMMAND\n\n\
For more information try --help",
);
}
#[cfg(not(target_os = "windows"))]
#[test]
fn test_stdbuf_trailing_var_arg() {
new_ucmd!()
.args(&["-i", "1024", "tail", "-1"])
.pipe_in("The quick brown fox\njumps over the lazy dog.")
.run()
.stdout_is("jumps over the lazy dog.");
}
#[cfg(not(target_os = "windows"))]
#[test]
fn test_stdbuf_line_buffering_stdin_fails() {
new_ucmd!()
.args(&["-i", "L", "head"])
.pipe_in("The quick brown fox jumps over the lazy dog.")
.fails()
.stderr_is("stdbuf: error: line buffering stdin is meaningless\nTry 'stdbuf --help' for more information.");
}
#[cfg(not(target_os = "windows"))]
#[test]
fn test_stdbuf_invalid_mode_fails() {
new_ucmd!()
.args(&["-i", "1024R", "head"])
.pipe_in("The quick brown fox jumps over the lazy dog.")
.fails()
.stderr_is("stdbuf: error: invalid mode 1024R\nTry 'stdbuf --help' for more information.");
}

View file

@ -351,6 +351,13 @@ impl AtPath {
String::from(self.minus(name).to_str().unwrap())
}
pub fn set_readonly(&self, name: &str) {
let metadata = fs::metadata(self.plus(name)).unwrap();
let mut permissions = metadata.permissions();
permissions.set_readonly(true);
fs::set_permissions(self.plus(name), permissions).unwrap();
}
pub fn open(&self, name: &str) -> File {
log_info("open", self.plus_as_string(name));
File::open(self.plus(name)).unwrap()

3
tests/fixtures/install/helloworld.rs vendored Normal file
View file

@ -0,0 +1,3 @@
fn main() {
println!("Hello World!");
}

BIN
tests/fixtures/install/helloworld_linux vendored Executable file

Binary file not shown.

BIN
tests/fixtures/install/helloworld_macos vendored Executable file

Binary file not shown.