mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 19:47:45 +00:00
shuf: Use OS strings, don't split individual arguments, cleanup
- shuf now uses OS strings, so it can read from filenames that are invalid Unicode and it can shuffle arguments that are invalid Unicode. `uucore` now has an `OsWrite` trait to support this without platform-specific boilerplate. - shuf no longer tries to split individual command line arguments, only bulk input from a file/stdin. (This matches GNU and busybox.) - More values are parsed inside clap instead of manually, leading to better error messages and less code. - Some code has been simplified or made more idiomatic.
This commit is contained in:
parent
87ec8285c3
commit
f562543b6c
6 changed files with 250 additions and 155 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3184,7 +3184,6 @@ name = "uu_shuf"
|
||||||
version = "0.0.30"
|
version = "0.0.30"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"memchr",
|
|
||||||
"rand 0.9.0",
|
"rand 0.9.0",
|
||||||
"rand_core 0.9.3",
|
"rand_core 0.9.3",
|
||||||
"uucore",
|
"uucore",
|
||||||
|
|
|
@ -18,7 +18,6 @@ path = "src/shuf.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = { workspace = true }
|
clap = { workspace = true }
|
||||||
memchr = { workspace = true }
|
|
||||||
rand = { workspace = true }
|
rand = { workspace = true }
|
||||||
rand_core = { workspace = true }
|
rand_core = { workspace = true }
|
||||||
uucore = { workspace = true }
|
uucore = { workspace = true }
|
||||||
|
|
|
@ -5,23 +5,27 @@
|
||||||
|
|
||||||
// spell-checker:ignore (ToDO) cmdline evec nonrepeating seps shufable rvec fdata
|
// spell-checker:ignore (ToDO) cmdline evec nonrepeating seps shufable rvec fdata
|
||||||
|
|
||||||
|
use clap::builder::ValueParser;
|
||||||
use clap::{Arg, ArgAction, Command};
|
use clap::{Arg, ArgAction, Command};
|
||||||
use memchr::memchr_iter;
|
use rand::prelude::SliceRandom;
|
||||||
use rand::prelude::{IndexedRandom, SliceRandom};
|
use rand::seq::IndexedRandom;
|
||||||
use rand::{Rng, RngCore};
|
use rand::{Rng, RngCore};
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
use std::ffi::{OsStr, OsString};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{stdin, stdout, BufReader, BufWriter, Error, Read, Write};
|
use std::io::{stdin, stdout, BufWriter, Error, Read, Write};
|
||||||
use std::ops::RangeInclusive;
|
use std::ops::RangeInclusive;
|
||||||
use uucore::display::Quotable;
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::str::FromStr;
|
||||||
|
use uucore::display::{OsWrite, Quotable};
|
||||||
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
||||||
use uucore::{format_usage, help_about, help_usage};
|
use uucore::{format_usage, help_about, help_usage};
|
||||||
|
|
||||||
mod rand_read_adapter;
|
mod rand_read_adapter;
|
||||||
|
|
||||||
enum Mode {
|
enum Mode {
|
||||||
Default(String),
|
Default(PathBuf),
|
||||||
Echo(Vec<String>),
|
Echo(Vec<OsString>),
|
||||||
InputRange(RangeInclusive<usize>),
|
InputRange(RangeInclusive<usize>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,8 +34,8 @@ static ABOUT: &str = help_about!("shuf.md");
|
||||||
|
|
||||||
struct Options {
|
struct Options {
|
||||||
head_count: usize,
|
head_count: usize,
|
||||||
output: Option<String>,
|
output: Option<PathBuf>,
|
||||||
random_source: Option<String>,
|
random_source: Option<PathBuf>,
|
||||||
repeat: bool,
|
repeat: bool,
|
||||||
sep: u8,
|
sep: u8,
|
||||||
}
|
}
|
||||||
|
@ -54,53 +58,45 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
let mode = if matches.get_flag(options::ECHO) {
|
let mode = if matches.get_flag(options::ECHO) {
|
||||||
Mode::Echo(
|
Mode::Echo(
|
||||||
matches
|
matches
|
||||||
.get_many::<String>(options::FILE_OR_ARGS)
|
.get_many(options::FILE_OR_ARGS)
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
.map(String::from)
|
.cloned()
|
||||||
.collect(),
|
.collect(),
|
||||||
)
|
)
|
||||||
} else if let Some(range) = matches.get_one::<String>(options::INPUT_RANGE) {
|
} else if let Some(range) = matches.get_one(options::INPUT_RANGE).cloned() {
|
||||||
match parse_range(range) {
|
Mode::InputRange(range)
|
||||||
Ok(m) => Mode::InputRange(m),
|
|
||||||
Err(msg) => {
|
|
||||||
return Err(USimpleError::new(1, msg));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
let mut operands = matches
|
let mut operands = matches
|
||||||
.get_many::<String>(options::FILE_OR_ARGS)
|
.get_many::<OsString>(options::FILE_OR_ARGS)
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
let file = operands.next().cloned().unwrap_or("-".into());
|
let file = operands.next().cloned().unwrap_or("-".into());
|
||||||
if let Some(second_file) = operands.next() {
|
if let Some(second_file) = operands.next() {
|
||||||
return Err(UUsageError::new(
|
return Err(UUsageError::new(
|
||||||
1,
|
1,
|
||||||
format!("unexpected argument '{second_file}' found"),
|
format!("unexpected argument {} found", second_file.quote()),
|
||||||
));
|
));
|
||||||
};
|
};
|
||||||
Mode::Default(file)
|
Mode::Default(file.into())
|
||||||
};
|
};
|
||||||
|
|
||||||
let options = Options {
|
let options = Options {
|
||||||
head_count: {
|
// GNU shuf takes the lowest value passed, so we imitate that.
|
||||||
let headcounts = matches
|
// It's probably a bug or an implementation artifact though.
|
||||||
.get_many::<String>(options::HEAD_COUNT)
|
// Busybox takes the final value which is more typical: later
|
||||||
.unwrap_or_default()
|
// options override earlier options.
|
||||||
.cloned()
|
head_count: matches
|
||||||
.collect();
|
.get_many::<usize>(options::HEAD_COUNT)
|
||||||
match parse_head_count(headcounts) {
|
.unwrap_or_default()
|
||||||
Ok(val) => val,
|
.cloned()
|
||||||
Err(msg) => return Err(USimpleError::new(1, msg)),
|
.min()
|
||||||
}
|
.unwrap_or(usize::MAX),
|
||||||
},
|
output: matches.get_one(options::OUTPUT).cloned(),
|
||||||
output: matches.get_one::<String>(options::OUTPUT).map(String::from),
|
random_source: matches.get_one(options::RANDOM_SOURCE).cloned(),
|
||||||
random_source: matches
|
|
||||||
.get_one::<String>(options::RANDOM_SOURCE)
|
|
||||||
.map(String::from),
|
|
||||||
repeat: matches.get_flag(options::REPEAT),
|
repeat: matches.get_flag(options::REPEAT),
|
||||||
sep: if matches.get_flag(options::ZERO_TERMINATED) {
|
sep: if matches.get_flag(options::ZERO_TERMINATED) {
|
||||||
0x00_u8
|
b'\0'
|
||||||
} else {
|
} else {
|
||||||
0x0a_u8
|
b'\n'
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -108,7 +104,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
// Do not attempt to read the random source or the input file.
|
// Do not attempt to read the random source or the input file.
|
||||||
// However, we must touch the output file, if given:
|
// However, we must touch the output file, if given:
|
||||||
if let Some(s) = options.output {
|
if let Some(s) = options.output {
|
||||||
File::create(&s[..])
|
File::create(&s)
|
||||||
.map_err_context(|| format!("failed to open {} for writing", s.quote()))?;
|
.map_err_context(|| format!("failed to open {} for writing", s.quote()))?;
|
||||||
}
|
}
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
@ -116,8 +112,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
|
|
||||||
match mode {
|
match mode {
|
||||||
Mode::Echo(args) => {
|
Mode::Echo(args) => {
|
||||||
let mut evec = args.iter().map(String::as_bytes).collect::<Vec<_>>();
|
let mut evec: Vec<&OsStr> = args.iter().map(AsRef::as_ref).collect();
|
||||||
find_seps(&mut evec, options.sep);
|
|
||||||
shuf_exec(&mut evec, options)?;
|
shuf_exec(&mut evec, options)?;
|
||||||
}
|
}
|
||||||
Mode::InputRange(mut range) => {
|
Mode::InputRange(mut range) => {
|
||||||
|
@ -125,9 +120,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
}
|
}
|
||||||
Mode::Default(filename) => {
|
Mode::Default(filename) => {
|
||||||
let fdata = read_input_file(&filename)?;
|
let fdata = read_input_file(&filename)?;
|
||||||
let mut fdata = vec![&fdata[..]];
|
let mut items = split_seps(&fdata, options.sep);
|
||||||
find_seps(&mut fdata, options.sep);
|
shuf_exec(&mut items, options)?;
|
||||||
shuf_exec(&mut fdata, options)?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,6 +149,7 @@ pub fn uu_app() -> Command {
|
||||||
.long(options::INPUT_RANGE)
|
.long(options::INPUT_RANGE)
|
||||||
.value_name("LO-HI")
|
.value_name("LO-HI")
|
||||||
.help("treat each number LO through HI as an input line")
|
.help("treat each number LO through HI as an input line")
|
||||||
|
.value_parser(parse_range)
|
||||||
.conflicts_with(options::FILE_OR_ARGS),
|
.conflicts_with(options::FILE_OR_ARGS),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
|
@ -163,7 +158,8 @@ pub fn uu_app() -> Command {
|
||||||
.long(options::HEAD_COUNT)
|
.long(options::HEAD_COUNT)
|
||||||
.value_name("COUNT")
|
.value_name("COUNT")
|
||||||
.action(clap::ArgAction::Append)
|
.action(clap::ArgAction::Append)
|
||||||
.help("output at most COUNT lines"),
|
.help("output at most COUNT lines")
|
||||||
|
.value_parser(usize::from_str),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::new(options::OUTPUT)
|
Arg::new(options::OUTPUT)
|
||||||
|
@ -171,6 +167,7 @@ pub fn uu_app() -> Command {
|
||||||
.long(options::OUTPUT)
|
.long(options::OUTPUT)
|
||||||
.value_name("FILE")
|
.value_name("FILE")
|
||||||
.help("write result to FILE instead of standard output")
|
.help("write result to FILE instead of standard output")
|
||||||
|
.value_parser(ValueParser::path_buf())
|
||||||
.value_hint(clap::ValueHint::FilePath),
|
.value_hint(clap::ValueHint::FilePath),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
|
@ -178,6 +175,7 @@ pub fn uu_app() -> Command {
|
||||||
.long(options::RANDOM_SOURCE)
|
.long(options::RANDOM_SOURCE)
|
||||||
.value_name("FILE")
|
.value_name("FILE")
|
||||||
.help("get random bytes from FILE")
|
.help("get random bytes from FILE")
|
||||||
|
.value_parser(ValueParser::path_buf())
|
||||||
.value_hint(clap::ValueHint::FilePath),
|
.value_hint(clap::ValueHint::FilePath),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
|
@ -199,56 +197,33 @@ pub fn uu_app() -> Command {
|
||||||
.arg(
|
.arg(
|
||||||
Arg::new(options::FILE_OR_ARGS)
|
Arg::new(options::FILE_OR_ARGS)
|
||||||
.action(clap::ArgAction::Append)
|
.action(clap::ArgAction::Append)
|
||||||
|
.value_parser(ValueParser::os_string())
|
||||||
.value_hint(clap::ValueHint::FilePath),
|
.value_hint(clap::ValueHint::FilePath),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_input_file(filename: &str) -> UResult<Vec<u8>> {
|
fn read_input_file(filename: &Path) -> UResult<Vec<u8>> {
|
||||||
let mut file = BufReader::new(if filename == "-" {
|
if filename.as_os_str() == "-" {
|
||||||
Box::new(stdin()) as Box<dyn Read>
|
let mut data = Vec::new();
|
||||||
|
stdin()
|
||||||
|
.read_to_end(&mut data)
|
||||||
|
.map_err_context(|| "read error".into())?;
|
||||||
|
Ok(data)
|
||||||
} else {
|
} else {
|
||||||
let file = File::open(filename)
|
std::fs::read(filename).map_err_context(|| filename.maybe_quote().to_string())
|
||||||
.map_err_context(|| format!("failed to open {}", filename.quote()))?;
|
}
|
||||||
Box::new(file) as Box<dyn Read>
|
|
||||||
});
|
|
||||||
|
|
||||||
let mut data = Vec::new();
|
|
||||||
file.read_to_end(&mut data)
|
|
||||||
.map_err_context(|| format!("failed reading {}", filename.quote()))?;
|
|
||||||
|
|
||||||
Ok(data)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_seps(data: &mut Vec<&[u8]>, sep: u8) {
|
fn split_seps(data: &[u8], sep: u8) -> Vec<&[u8]> {
|
||||||
// Special case: If data is empty (and does not even contain a single 'sep'
|
// A single trailing separator is ignored.
|
||||||
// to indicate the presence of the empty element), then behave as if the input contained no elements at all.
|
// If data is empty (and does not even contain a single 'sep'
|
||||||
if data.len() == 1 && data[0].is_empty() {
|
// to indicate the presence of an empty element), then behave
|
||||||
data.clear();
|
// as if the input contained no elements at all.
|
||||||
return;
|
let mut elements: Vec<&[u8]> = data.split(|&b| b == sep).collect();
|
||||||
}
|
if elements.last().is_some_and(|e| e.is_empty()) {
|
||||||
|
elements.pop();
|
||||||
// need to use for loop so we don't borrow the vector as we modify it in place
|
|
||||||
// basic idea:
|
|
||||||
// * We don't care about the order of the result. This lets us slice the slices
|
|
||||||
// without making a new vector.
|
|
||||||
// * Starting from the end of the vector, we examine each element.
|
|
||||||
// * If that element contains the separator, we remove it from the vector,
|
|
||||||
// and then sub-slice it into slices that do not contain the separator.
|
|
||||||
// * We maintain the invariant throughout that each element in the vector past
|
|
||||||
// the ith element does not have any separators remaining.
|
|
||||||
for i in (0..data.len()).rev() {
|
|
||||||
if data[i].contains(&sep) {
|
|
||||||
let this = data.swap_remove(i);
|
|
||||||
let mut p = 0;
|
|
||||||
for i in memchr_iter(sep, this) {
|
|
||||||
data.push(&this[p..i]);
|
|
||||||
p = i + 1;
|
|
||||||
}
|
|
||||||
if p < this.len() {
|
|
||||||
data.push(&this[p..]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
elements
|
||||||
}
|
}
|
||||||
|
|
||||||
trait Shufable {
|
trait Shufable {
|
||||||
|
@ -293,6 +268,24 @@ impl<'a> Shufable for Vec<&'a [u8]> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> Shufable for Vec<&'a OsStr> {
|
||||||
|
type Item = &'a OsStr;
|
||||||
|
fn is_empty(&self) -> bool {
|
||||||
|
(**self).is_empty()
|
||||||
|
}
|
||||||
|
fn choose(&self, rng: &mut WrappedRng) -> Self::Item {
|
||||||
|
(**self).choose(rng).unwrap()
|
||||||
|
}
|
||||||
|
type PartialShuffleIterator<'b> = std::iter::Copied<std::slice::Iter<'b, &'a OsStr>> where Self: 'b;
|
||||||
|
fn partial_shuffle<'b>(
|
||||||
|
&'b mut self,
|
||||||
|
rng: &'b mut WrappedRng,
|
||||||
|
amount: usize,
|
||||||
|
) -> Self::PartialShuffleIterator<'b> {
|
||||||
|
(**self).partial_shuffle(rng, amount).0.iter().copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Shufable for RangeInclusive<usize> {
|
impl Shufable for RangeInclusive<usize> {
|
||||||
type Item = usize;
|
type Item = usize;
|
||||||
fn is_empty(&self) -> bool {
|
fn is_empty(&self) -> bool {
|
||||||
|
@ -330,7 +323,7 @@ impl<'a> NonrepeatingIterator<'a> {
|
||||||
fn new(range: RangeInclusive<usize>, rng: &'a mut WrappedRng, amount: usize) -> Self {
|
fn new(range: RangeInclusive<usize>, rng: &'a mut WrappedRng, amount: usize) -> Self {
|
||||||
let capped_amount = if range.start() > range.end() {
|
let capped_amount = if range.start() > range.end() {
|
||||||
0
|
0
|
||||||
} else if *range.start() == 0 && *range.end() == usize::MAX {
|
} else if range == (0..=usize::MAX) {
|
||||||
amount
|
amount
|
||||||
} else {
|
} else {
|
||||||
amount.min(range.end() - range.start() + 1)
|
amount.min(range.end() - range.start() + 1)
|
||||||
|
@ -404,34 +397,40 @@ fn number_set_should_list_remaining(listed_count: usize, range_size: usize) -> b
|
||||||
}
|
}
|
||||||
|
|
||||||
trait Writable {
|
trait Writable {
|
||||||
fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error>;
|
fn write_all_to(&self, output: &mut impl OsWrite) -> Result<(), Error>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Writable for &[u8] {
|
impl Writable for &[u8] {
|
||||||
fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error> {
|
fn write_all_to(&self, output: &mut impl OsWrite) -> Result<(), Error> {
|
||||||
output.write_all(self)
|
output.write_all(self)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Writable for &OsStr {
|
||||||
|
fn write_all_to(&self, output: &mut impl OsWrite) -> Result<(), Error> {
|
||||||
|
output.write_all_os(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Writable for usize {
|
impl Writable for usize {
|
||||||
fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error> {
|
fn write_all_to(&self, output: &mut impl OsWrite) -> Result<(), Error> {
|
||||||
output.write_all(format!("{self}").as_bytes())
|
write!(output, "{self}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn shuf_exec(input: &mut impl Shufable, opts: Options) -> UResult<()> {
|
fn shuf_exec(input: &mut impl Shufable, opts: Options) -> UResult<()> {
|
||||||
let mut output = BufWriter::new(match opts.output {
|
let mut output = BufWriter::new(match opts.output {
|
||||||
None => Box::new(stdout()) as Box<dyn Write>,
|
None => Box::new(stdout()) as Box<dyn OsWrite>,
|
||||||
Some(s) => {
|
Some(s) => {
|
||||||
let file = File::create(&s[..])
|
let file = File::create(&s)
|
||||||
.map_err_context(|| format!("failed to open {} for writing", s.quote()))?;
|
.map_err_context(|| format!("failed to open {} for writing", s.quote()))?;
|
||||||
Box::new(file) as Box<dyn Write>
|
Box::new(file) as Box<dyn OsWrite>
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
let mut rng = match opts.random_source {
|
let mut rng = match opts.random_source {
|
||||||
Some(r) => {
|
Some(r) => {
|
||||||
let file = File::open(&r[..])
|
let file = File::open(&r)
|
||||||
.map_err_context(|| format!("failed to open random source {}", r.quote()))?;
|
.map_err_context(|| format!("failed to open random source {}", r.quote()))?;
|
||||||
WrappedRng::RngFile(rand_read_adapter::ReadRng::new(file))
|
WrappedRng::RngFile(rand_read_adapter::ReadRng::new(file))
|
||||||
}
|
}
|
||||||
|
@ -467,33 +466,18 @@ fn shuf_exec(input: &mut impl Shufable, opts: Options) -> UResult<()> {
|
||||||
|
|
||||||
fn parse_range(input_range: &str) -> Result<RangeInclusive<usize>, String> {
|
fn parse_range(input_range: &str) -> Result<RangeInclusive<usize>, String> {
|
||||||
if let Some((from, to)) = input_range.split_once('-') {
|
if let Some((from, to)) = input_range.split_once('-') {
|
||||||
let begin = from
|
let begin = from.parse::<usize>().map_err(|e| e.to_string())?;
|
||||||
.parse::<usize>()
|
let end = to.parse::<usize>().map_err(|e| e.to_string())?;
|
||||||
.map_err(|_| format!("invalid input range: {}", from.quote()))?;
|
|
||||||
let end = to
|
|
||||||
.parse::<usize>()
|
|
||||||
.map_err(|_| format!("invalid input range: {}", to.quote()))?;
|
|
||||||
if begin <= end || begin == end + 1 {
|
if begin <= end || begin == end + 1 {
|
||||||
Ok(begin..=end)
|
Ok(begin..=end)
|
||||||
} else {
|
} else {
|
||||||
Err(format!("invalid input range: {}", input_range.quote()))
|
Err("start exceeds end".into())
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Err(format!("invalid input range: {}", input_range.quote()))
|
Err("missing '-'".into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_head_count(headcounts: Vec<String>) -> Result<usize, String> {
|
|
||||||
let mut result = usize::MAX;
|
|
||||||
for count in headcounts {
|
|
||||||
match count.parse::<usize>() {
|
|
||||||
Ok(pv) => result = std::cmp::min(result, pv),
|
|
||||||
Err(_) => return Err(format!("invalid line count: {}", count.quote())),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
enum WrappedRng {
|
enum WrappedRng {
|
||||||
RngFile(rand_read_adapter::ReadRng<File>),
|
RngFile(rand_read_adapter::ReadRng<File>),
|
||||||
RngDefault(rand::rngs::ThreadRng),
|
RngDefault(rand::rngs::ThreadRng),
|
||||||
|
@ -522,6 +506,31 @@ impl RngCore for WrappedRng {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test_split_seps {
|
||||||
|
use super::split_seps;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_input() {
|
||||||
|
assert!(split_seps(b"", b'\n').is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_single_blank_line() {
|
||||||
|
assert_eq!(split_seps(b"\n", b'\n'), &[b""]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_with_trailing() {
|
||||||
|
assert_eq!(split_seps(b"a\nb\nc\n", b'\n'), &[b"a", b"b", b"c"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_without_trailing() {
|
||||||
|
assert_eq!(split_seps(b"a\nb\nc", b'\n'), &[b"a", b"b", b"c"]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
// Since the computed value is a bool, it is more readable to write the expected value out:
|
// Since the computed value is a bool, it is more readable to write the expected value out:
|
||||||
#[allow(clippy::bool_assert_comparison)]
|
#[allow(clippy::bool_assert_comparison)]
|
||||||
|
|
|
@ -25,7 +25,8 @@
|
||||||
//! ```
|
//! ```
|
||||||
|
|
||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
use std::io::{self, Write as IoWrite};
|
use std::fs::File;
|
||||||
|
use std::io::{self, BufWriter, Stdout, StdoutLock, Write as IoWrite};
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
use std::os::unix::ffi::OsStrExt;
|
use std::os::unix::ffi::OsStrExt;
|
||||||
|
@ -42,33 +43,77 @@ pub use os_display::{Quotable, Quoted};
|
||||||
/// output is likely to be captured, like `pwd` and `basename`. For informational output
|
/// output is likely to be captured, like `pwd` and `basename`. For informational output
|
||||||
/// use `Quotable::quote`.
|
/// use `Quotable::quote`.
|
||||||
///
|
///
|
||||||
/// FIXME: This is lossy on Windows. It could probably be implemented using some low-level
|
/// FIXME: Invalid Unicode will produce an error on Windows. That could be fixed by
|
||||||
/// API that takes UTF-16, without going through io::Write. This is not a big priority
|
/// using low-level library calls and bypassing `io::Write`. This is not a big priority
|
||||||
/// because broken filenames are much rarer on Windows than on Unix.
|
/// because broken filenames are much rarer on Windows than on Unix.
|
||||||
pub fn println_verbatim<S: AsRef<OsStr>>(text: S) -> io::Result<()> {
|
pub fn println_verbatim<S: AsRef<OsStr>>(text: S) -> io::Result<()> {
|
||||||
let stdout = io::stdout();
|
let mut stdout = io::stdout().lock();
|
||||||
let mut stdout = stdout.lock();
|
stdout.write_all_os(text.as_ref())?;
|
||||||
#[cfg(any(unix, target_os = "wasi"))]
|
stdout.write_all(b"\n")?;
|
||||||
{
|
|
||||||
stdout.write_all(text.as_ref().as_bytes())?;
|
|
||||||
stdout.write_all(b"\n")?;
|
|
||||||
}
|
|
||||||
#[cfg(not(any(unix, target_os = "wasi")))]
|
|
||||||
{
|
|
||||||
writeln!(stdout, "{}", std::path::Path::new(text.as_ref()).display())?;
|
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Like `println_verbatim`, without the trailing newline.
|
/// Like `println_verbatim`, without the trailing newline.
|
||||||
pub fn print_verbatim<S: AsRef<OsStr>>(text: S) -> io::Result<()> {
|
pub fn print_verbatim<S: AsRef<OsStr>>(text: S) -> io::Result<()> {
|
||||||
let mut stdout = io::stdout();
|
io::stdout().write_all_os(text.as_ref())
|
||||||
#[cfg(any(unix, target_os = "wasi"))]
|
}
|
||||||
{
|
|
||||||
stdout.write_all(text.as_ref().as_bytes())
|
/// [`io::Write`], but for OS strings.
|
||||||
}
|
///
|
||||||
#[cfg(not(any(unix, target_os = "wasi")))]
|
/// On Unix this works straightforwardly.
|
||||||
{
|
///
|
||||||
write!(stdout, "{}", std::path::Path::new(text.as_ref()).display())
|
/// On Windows this currently returns an error if the OS string is not valid Unicode.
|
||||||
|
/// This may in the future change to allow those strings to be written to consoles.
|
||||||
|
pub trait OsWrite: io::Write {
|
||||||
|
/// Write the entire OS string into this writer.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// An error is returned if the underlying I/O operation fails.
|
||||||
|
///
|
||||||
|
/// On Windows, if the OS string is not valid Unicode, an error of kind
|
||||||
|
/// [`io::ErrorKind::InvalidData`] is returned.
|
||||||
|
fn write_all_os(&mut self, buf: &OsStr) -> io::Result<()> {
|
||||||
|
#[cfg(any(unix, target_os = "wasi"))]
|
||||||
|
{
|
||||||
|
self.write_all(buf.as_bytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(any(unix, target_os = "wasi")))]
|
||||||
|
{
|
||||||
|
// It's possible to write a better OsWrite impl for Windows consoles (e.g. Stdout)
|
||||||
|
// as those are fundamentally 16-bit. If the OS string is invalid then it can be
|
||||||
|
// encoded to 16-bit and written using raw windows_sys calls. But this is quite involved
|
||||||
|
// (see `sys/pal/windows/stdio.rs` in the stdlib) and the value-add is small.
|
||||||
|
//
|
||||||
|
// There's no way to write invalid OS strings to Windows files, as those are 8-bit.
|
||||||
|
|
||||||
|
match buf.to_str() {
|
||||||
|
Some(text) => self.write_all(text.as_bytes()),
|
||||||
|
// We could output replacement characters instead, but the
|
||||||
|
// stdlib errors when sending invalid UTF-8 to the console,
|
||||||
|
// so let's follow that.
|
||||||
|
None => Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidData,
|
||||||
|
"OS string cannot be converted to bytes",
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We do not have a blanket impl for all Write because a smarter Windows impl should
|
||||||
|
// be able to make use of AsRawHandle. Please keep this in mind when adding new impls.
|
||||||
|
impl OsWrite for File {}
|
||||||
|
impl OsWrite for Stdout {}
|
||||||
|
impl OsWrite for StdoutLock<'_> {}
|
||||||
|
// A future smarter Windows implementation can first flush the BufWriter before
|
||||||
|
// doing a raw write.
|
||||||
|
impl<W: OsWrite> OsWrite for BufWriter<W> {}
|
||||||
|
|
||||||
|
impl OsWrite for Box<dyn OsWrite> {
|
||||||
|
fn write_all_os(&mut self, buf: &OsStr) -> io::Result<()> {
|
||||||
|
let this: &mut dyn OsWrite = self;
|
||||||
|
this.write_all_os(buf)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -362,6 +362,51 @@ fn test_echo_short_collapsed_zero() {
|
||||||
assert_eq!(result_seq, ["a", "b", "c"], "Output is not a permutation");
|
assert_eq!(result_seq, ["a", "b", "c"], "Output is not a permutation");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_echo_separators_in_arguments() {
|
||||||
|
// We used to split arguments themselves on newlines, but this was wrong.
|
||||||
|
// shuf should behave as though it's shuffling two arguments and therefore
|
||||||
|
// output all of them.
|
||||||
|
// (Note that arguments can't contain null bytes so we don't need to test that.)
|
||||||
|
let result = new_ucmd!()
|
||||||
|
.arg("-e")
|
||||||
|
.arg("-n2")
|
||||||
|
.arg("a\nb")
|
||||||
|
.arg("c\nd")
|
||||||
|
.succeeds();
|
||||||
|
result.no_stderr();
|
||||||
|
assert_eq!(result.stdout_str().len(), 8, "Incorrect output length");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
#[test]
|
||||||
|
fn test_echo_invalid_unicode_in_arguments() {
|
||||||
|
use std::{ffi::OsStr, os::unix::ffi::OsStrExt};
|
||||||
|
|
||||||
|
let result = new_ucmd!()
|
||||||
|
.arg("-e")
|
||||||
|
.arg(OsStr::from_bytes(b"a\xFFb"))
|
||||||
|
.arg("ok")
|
||||||
|
.succeeds();
|
||||||
|
result.no_stderr();
|
||||||
|
assert!(result.stdout().contains(&b'\xFF'));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(any(unix, target_os = "wasi"))]
|
||||||
|
#[cfg(not(target_os = "macos"))]
|
||||||
|
#[test]
|
||||||
|
fn test_invalid_unicode_in_filename() {
|
||||||
|
use std::{ffi::OsStr, os::unix::ffi::OsStrExt};
|
||||||
|
|
||||||
|
let (at, mut ucmd) = at_and_ucmd!();
|
||||||
|
let filename = OsStr::from_bytes(b"a\xFFb");
|
||||||
|
at.append(filename, "foo\n");
|
||||||
|
|
||||||
|
let result = ucmd.arg(filename).succeeds();
|
||||||
|
result.no_stderr();
|
||||||
|
assert_eq!(result.stdout(), b"foo\n");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_head_count() {
|
fn test_head_count() {
|
||||||
let repeat_limit = 5;
|
let repeat_limit = 5;
|
||||||
|
@ -647,23 +692,21 @@ fn test_shuf_invalid_input_range_one() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.args(&["-i", "0"])
|
.args(&["-i", "0"])
|
||||||
.fails()
|
.fails()
|
||||||
.stderr_contains("invalid input range");
|
.stderr_contains("invalid value '0' for '--input-range <LO-HI>': missing '-'");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_shuf_invalid_input_range_two() {
|
fn test_shuf_invalid_input_range_two() {
|
||||||
new_ucmd!()
|
new_ucmd!().args(&["-i", "a-9"]).fails().stderr_contains(
|
||||||
.args(&["-i", "a-9"])
|
"invalid value 'a-9' for '--input-range <LO-HI>': invalid digit found in string",
|
||||||
.fails()
|
);
|
||||||
.stderr_contains("invalid input range: 'a'");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_shuf_invalid_input_range_three() {
|
fn test_shuf_invalid_input_range_three() {
|
||||||
new_ucmd!()
|
new_ucmd!().args(&["-i", "0-b"]).fails().stderr_contains(
|
||||||
.args(&["-i", "0-b"])
|
"invalid value '0-b' for '--input-range <LO-HI>': invalid digit found in string",
|
||||||
.fails()
|
);
|
||||||
.stderr_contains("invalid input range: 'b'");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -702,10 +745,9 @@ fn test_shuf_three_input_files() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_shuf_invalid_input_line_count() {
|
fn test_shuf_invalid_input_line_count() {
|
||||||
new_ucmd!()
|
new_ucmd!().args(&["-n", "a"]).fails().stderr_contains(
|
||||||
.args(&["-n", "a"])
|
"invalid value 'a' for '--head-count <COUNT>': invalid digit found in string",
|
||||||
.fails()
|
);
|
||||||
.stderr_contains("invalid line count: 'a'");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -772,7 +814,7 @@ fn test_range_empty_minus_one() {
|
||||||
.arg("-i5-3")
|
.arg("-i5-3")
|
||||||
.fails()
|
.fails()
|
||||||
.no_stdout()
|
.no_stdout()
|
||||||
.stderr_only("shuf: invalid input range: '5-3'\n");
|
.stderr_contains("invalid value '5-3' for '--input-range <LO-HI>': start exceeds end\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -802,5 +844,5 @@ fn test_range_repeat_empty_minus_one() {
|
||||||
.arg("-ri5-3")
|
.arg("-ri5-3")
|
||||||
.fails()
|
.fails()
|
||||||
.no_stdout()
|
.no_stdout()
|
||||||
.stderr_only("shuf: invalid input range: '5-3'\n");
|
.stderr_contains("invalid value '5-3' for '--input-range <LO-HI>': start exceeds end\n");
|
||||||
}
|
}
|
||||||
|
|
|
@ -892,7 +892,8 @@ impl AtPath {
|
||||||
.unwrap_or_else(|e| panic!("Couldn't write {name}: {e}"));
|
.unwrap_or_else(|e| panic!("Couldn't write {name}: {e}"));
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn append(&self, name: &str, contents: &str) {
|
pub fn append(&self, name: impl AsRef<Path>, contents: &str) {
|
||||||
|
let name = name.as_ref();
|
||||||
log_info("write(append)", self.plus_as_string(name));
|
log_info("write(append)", self.plus_as_string(name));
|
||||||
let mut f = OpenOptions::new()
|
let mut f = OpenOptions::new()
|
||||||
.append(true)
|
.append(true)
|
||||||
|
@ -900,7 +901,7 @@ impl AtPath {
|
||||||
.open(self.plus(name))
|
.open(self.plus(name))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
f.write_all(contents.as_bytes())
|
f.write_all(contents.as_bytes())
|
||||||
.unwrap_or_else(|e| panic!("Couldn't write(append) {name}: {e}"));
|
.unwrap_or_else(|e| panic!("Couldn't write(append) {}: {e}", name.display()));
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn append_bytes(&self, name: &str, contents: &[u8]) {
|
pub fn append_bytes(&self, name: &str, contents: &[u8]) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue