mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #7463 from blyxxyz/clean-shuf
Make `shuf` OsStr-compliant and bring newline handling in line with GNU
This commit is contained in:
commit
eed5c81060
6 changed files with 280 additions and 205 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3184,7 +3184,6 @@ name = "uu_shuf"
|
|||
version = "0.0.30"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"memchr",
|
||||
"rand 0.9.0",
|
||||
"rand_core 0.9.3",
|
||||
"uucore",
|
||||
|
|
|
@ -18,7 +18,6 @@ path = "src/shuf.rs"
|
|||
|
||||
[dependencies]
|
||||
clap = { workspace = true }
|
||||
memchr = { workspace = true }
|
||||
rand = { workspace = true }
|
||||
rand_core = { workspace = true }
|
||||
uucore = { workspace = true }
|
||||
|
|
|
@ -5,23 +5,27 @@
|
|||
|
||||
// spell-checker:ignore (ToDO) cmdline evec nonrepeating seps shufable rvec fdata
|
||||
|
||||
use clap::builder::ValueParser;
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use memchr::memchr_iter;
|
||||
use rand::prelude::{IndexedRandom, SliceRandom};
|
||||
use rand::prelude::SliceRandom;
|
||||
use rand::seq::IndexedRandom;
|
||||
use rand::{Rng, RngCore};
|
||||
use std::collections::HashSet;
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fs::File;
|
||||
use std::io::{stdin, stdout, BufReader, BufWriter, Error, Read, Write};
|
||||
use std::io::{stdin, stdout, BufWriter, Error, Read, Write};
|
||||
use std::ops::RangeInclusive;
|
||||
use uucore::display::Quotable;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
use uucore::display::{OsWrite, Quotable};
|
||||
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
||||
use uucore::{format_usage, help_about, help_usage};
|
||||
|
||||
mod rand_read_adapter;
|
||||
|
||||
enum Mode {
|
||||
Default(String),
|
||||
Echo(Vec<String>),
|
||||
Default(PathBuf),
|
||||
Echo(Vec<OsString>),
|
||||
InputRange(RangeInclusive<usize>),
|
||||
}
|
||||
|
||||
|
@ -30,8 +34,8 @@ static ABOUT: &str = help_about!("shuf.md");
|
|||
|
||||
struct Options {
|
||||
head_count: usize,
|
||||
output: Option<String>,
|
||||
random_source: Option<String>,
|
||||
output: Option<PathBuf>,
|
||||
random_source: Option<PathBuf>,
|
||||
repeat: bool,
|
||||
sep: u8,
|
||||
}
|
||||
|
@ -54,80 +58,83 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
let mode = if matches.get_flag(options::ECHO) {
|
||||
Mode::Echo(
|
||||
matches
|
||||
.get_many::<String>(options::FILE_OR_ARGS)
|
||||
.get_many(options::FILE_OR_ARGS)
|
||||
.unwrap_or_default()
|
||||
.map(String::from)
|
||||
.cloned()
|
||||
.collect(),
|
||||
)
|
||||
} else if let Some(range) = matches.get_one::<String>(options::INPUT_RANGE) {
|
||||
match parse_range(range) {
|
||||
Ok(m) => Mode::InputRange(m),
|
||||
Err(msg) => {
|
||||
return Err(USimpleError::new(1, msg));
|
||||
}
|
||||
}
|
||||
} else if let Some(range) = matches.get_one(options::INPUT_RANGE).cloned() {
|
||||
Mode::InputRange(range)
|
||||
} else {
|
||||
let mut operands = matches
|
||||
.get_many::<String>(options::FILE_OR_ARGS)
|
||||
.get_many::<OsString>(options::FILE_OR_ARGS)
|
||||
.unwrap_or_default();
|
||||
let file = operands.next().cloned().unwrap_or("-".into());
|
||||
if let Some(second_file) = operands.next() {
|
||||
return Err(UUsageError::new(
|
||||
1,
|
||||
format!("unexpected argument '{second_file}' found"),
|
||||
format!("unexpected argument {} found", second_file.quote()),
|
||||
));
|
||||
};
|
||||
Mode::Default(file)
|
||||
Mode::Default(file.into())
|
||||
};
|
||||
|
||||
let options = Options {
|
||||
head_count: {
|
||||
let headcounts = matches
|
||||
.get_many::<String>(options::HEAD_COUNT)
|
||||
// GNU shuf takes the lowest value passed, so we imitate that.
|
||||
// It's probably a bug or an implementation artifact though.
|
||||
// Busybox takes the final value which is more typical: later
|
||||
// options override earlier options.
|
||||
head_count: matches
|
||||
.get_many::<usize>(options::HEAD_COUNT)
|
||||
.unwrap_or_default()
|
||||
.cloned()
|
||||
.collect();
|
||||
match parse_head_count(headcounts) {
|
||||
Ok(val) => val,
|
||||
Err(msg) => return Err(USimpleError::new(1, msg)),
|
||||
}
|
||||
},
|
||||
output: matches.get_one::<String>(options::OUTPUT).map(String::from),
|
||||
random_source: matches
|
||||
.get_one::<String>(options::RANDOM_SOURCE)
|
||||
.map(String::from),
|
||||
.min()
|
||||
.unwrap_or(usize::MAX),
|
||||
output: matches.get_one(options::OUTPUT).cloned(),
|
||||
random_source: matches.get_one(options::RANDOM_SOURCE).cloned(),
|
||||
repeat: matches.get_flag(options::REPEAT),
|
||||
sep: if matches.get_flag(options::ZERO_TERMINATED) {
|
||||
0x00_u8
|
||||
b'\0'
|
||||
} else {
|
||||
0x0a_u8
|
||||
b'\n'
|
||||
},
|
||||
};
|
||||
|
||||
if options.head_count == 0 {
|
||||
// Do not attempt to read the random source or the input file.
|
||||
// However, we must touch the output file, if given:
|
||||
if let Some(s) = options.output {
|
||||
File::create(&s[..])
|
||||
let mut output = BufWriter::new(match options.output {
|
||||
None => Box::new(stdout()) as Box<dyn OsWrite>,
|
||||
Some(ref s) => {
|
||||
let file = File::create(s)
|
||||
.map_err_context(|| format!("failed to open {} for writing", s.quote()))?;
|
||||
Box::new(file) as Box<dyn OsWrite>
|
||||
}
|
||||
});
|
||||
|
||||
if options.head_count == 0 {
|
||||
// In this case we do want to touch the output file but we can quit immediately.
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut rng = match options.random_source {
|
||||
Some(ref r) => {
|
||||
let file = File::open(r)
|
||||
.map_err_context(|| format!("failed to open random source {}", r.quote()))?;
|
||||
WrappedRng::RngFile(rand_read_adapter::ReadRng::new(file))
|
||||
}
|
||||
None => WrappedRng::RngDefault(rand::rng()),
|
||||
};
|
||||
|
||||
match mode {
|
||||
Mode::Echo(args) => {
|
||||
let mut evec = args.iter().map(String::as_bytes).collect::<Vec<_>>();
|
||||
find_seps(&mut evec, options.sep);
|
||||
shuf_exec(&mut evec, options)?;
|
||||
let mut evec: Vec<&OsStr> = args.iter().map(AsRef::as_ref).collect();
|
||||
shuf_exec(&mut evec, &options, &mut rng, &mut output)?;
|
||||
}
|
||||
Mode::InputRange(mut range) => {
|
||||
shuf_exec(&mut range, options)?;
|
||||
shuf_exec(&mut range, &options, &mut rng, &mut output)?;
|
||||
}
|
||||
Mode::Default(filename) => {
|
||||
let fdata = read_input_file(&filename)?;
|
||||
let mut fdata = vec![&fdata[..]];
|
||||
find_seps(&mut fdata, options.sep);
|
||||
shuf_exec(&mut fdata, options)?;
|
||||
let mut items = split_seps(&fdata, options.sep);
|
||||
shuf_exec(&mut items, &options, &mut rng, &mut output)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -155,6 +162,7 @@ pub fn uu_app() -> Command {
|
|||
.long(options::INPUT_RANGE)
|
||||
.value_name("LO-HI")
|
||||
.help("treat each number LO through HI as an input line")
|
||||
.value_parser(parse_range)
|
||||
.conflicts_with(options::FILE_OR_ARGS),
|
||||
)
|
||||
.arg(
|
||||
|
@ -163,7 +171,8 @@ pub fn uu_app() -> Command {
|
|||
.long(options::HEAD_COUNT)
|
||||
.value_name("COUNT")
|
||||
.action(clap::ArgAction::Append)
|
||||
.help("output at most COUNT lines"),
|
||||
.help("output at most COUNT lines")
|
||||
.value_parser(usize::from_str),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(options::OUTPUT)
|
||||
|
@ -171,6 +180,7 @@ pub fn uu_app() -> Command {
|
|||
.long(options::OUTPUT)
|
||||
.value_name("FILE")
|
||||
.help("write result to FILE instead of standard output")
|
||||
.value_parser(ValueParser::path_buf())
|
||||
.value_hint(clap::ValueHint::FilePath),
|
||||
)
|
||||
.arg(
|
||||
|
@ -178,6 +188,7 @@ pub fn uu_app() -> Command {
|
|||
.long(options::RANDOM_SOURCE)
|
||||
.value_name("FILE")
|
||||
.help("get random bytes from FILE")
|
||||
.value_parser(ValueParser::path_buf())
|
||||
.value_hint(clap::ValueHint::FilePath),
|
||||
)
|
||||
.arg(
|
||||
|
@ -199,73 +210,44 @@ pub fn uu_app() -> Command {
|
|||
.arg(
|
||||
Arg::new(options::FILE_OR_ARGS)
|
||||
.action(clap::ArgAction::Append)
|
||||
.value_parser(ValueParser::os_string())
|
||||
.value_hint(clap::ValueHint::FilePath),
|
||||
)
|
||||
}
|
||||
|
||||
fn read_input_file(filename: &str) -> UResult<Vec<u8>> {
|
||||
let mut file = BufReader::new(if filename == "-" {
|
||||
Box::new(stdin()) as Box<dyn Read>
|
||||
} else {
|
||||
let file = File::open(filename)
|
||||
.map_err_context(|| format!("failed to open {}", filename.quote()))?;
|
||||
Box::new(file) as Box<dyn Read>
|
||||
});
|
||||
|
||||
fn read_input_file(filename: &Path) -> UResult<Vec<u8>> {
|
||||
if filename.as_os_str() == "-" {
|
||||
let mut data = Vec::new();
|
||||
file.read_to_end(&mut data)
|
||||
.map_err_context(|| format!("failed reading {}", filename.quote()))?;
|
||||
|
||||
stdin()
|
||||
.read_to_end(&mut data)
|
||||
.map_err_context(|| "read error".into())?;
|
||||
Ok(data)
|
||||
} else {
|
||||
std::fs::read(filename).map_err_context(|| filename.maybe_quote().to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn find_seps(data: &mut Vec<&[u8]>, sep: u8) {
|
||||
// Special case: If data is empty (and does not even contain a single 'sep'
|
||||
// to indicate the presence of the empty element), then behave as if the input contained no elements at all.
|
||||
if data.len() == 1 && data[0].is_empty() {
|
||||
data.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
// need to use for loop so we don't borrow the vector as we modify it in place
|
||||
// basic idea:
|
||||
// * We don't care about the order of the result. This lets us slice the slices
|
||||
// without making a new vector.
|
||||
// * Starting from the end of the vector, we examine each element.
|
||||
// * If that element contains the separator, we remove it from the vector,
|
||||
// and then sub-slice it into slices that do not contain the separator.
|
||||
// * We maintain the invariant throughout that each element in the vector past
|
||||
// the ith element does not have any separators remaining.
|
||||
for i in (0..data.len()).rev() {
|
||||
if data[i].contains(&sep) {
|
||||
let this = data.swap_remove(i);
|
||||
let mut p = 0;
|
||||
for i in memchr_iter(sep, this) {
|
||||
data.push(&this[p..i]);
|
||||
p = i + 1;
|
||||
}
|
||||
if p < this.len() {
|
||||
data.push(&this[p..]);
|
||||
}
|
||||
}
|
||||
fn split_seps(data: &[u8], sep: u8) -> Vec<&[u8]> {
|
||||
// A single trailing separator is ignored.
|
||||
// If data is empty (and does not even contain a single 'sep'
|
||||
// to indicate the presence of an empty element), then behave
|
||||
// as if the input contained no elements at all.
|
||||
let mut elements: Vec<&[u8]> = data.split(|&b| b == sep).collect();
|
||||
if elements.last().is_some_and(|e| e.is_empty()) {
|
||||
elements.pop();
|
||||
}
|
||||
elements
|
||||
}
|
||||
|
||||
trait Shufable {
|
||||
type Item: Writable;
|
||||
fn is_empty(&self) -> bool;
|
||||
fn choose(&self, rng: &mut WrappedRng) -> Self::Item;
|
||||
// This type shouldn't even be known. However, because we want to support
|
||||
// Rust 1.70, it is not possible to return "impl Iterator".
|
||||
// TODO: When the MSRV is raised, rewrite this to return "impl Iterator".
|
||||
type PartialShuffleIterator<'b>: Iterator<Item = Self::Item>
|
||||
where
|
||||
Self: 'b;
|
||||
fn partial_shuffle<'b>(
|
||||
&'b mut self,
|
||||
rng: &'b mut WrappedRng,
|
||||
amount: usize,
|
||||
) -> Self::PartialShuffleIterator<'b>;
|
||||
) -> impl Iterator<Item = Self::Item>;
|
||||
}
|
||||
|
||||
impl<'a> Shufable for Vec<&'a [u8]> {
|
||||
|
@ -279,20 +261,33 @@ impl<'a> Shufable for Vec<&'a [u8]> {
|
|||
// this is safe.
|
||||
(**self).choose(rng).unwrap()
|
||||
}
|
||||
type PartialShuffleIterator<'b>
|
||||
= std::iter::Copied<std::slice::Iter<'b, &'a [u8]>>
|
||||
where
|
||||
Self: 'b;
|
||||
fn partial_shuffle<'b>(
|
||||
&'b mut self,
|
||||
rng: &'b mut WrappedRng,
|
||||
amount: usize,
|
||||
) -> Self::PartialShuffleIterator<'b> {
|
||||
) -> impl Iterator<Item = Self::Item> {
|
||||
// Note: "copied()" only copies the reference, not the entire [u8].
|
||||
(**self).partial_shuffle(rng, amount).0.iter().copied()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Shufable for Vec<&'a OsStr> {
|
||||
type Item = &'a OsStr;
|
||||
fn is_empty(&self) -> bool {
|
||||
(**self).is_empty()
|
||||
}
|
||||
fn choose(&self, rng: &mut WrappedRng) -> Self::Item {
|
||||
(**self).choose(rng).unwrap()
|
||||
}
|
||||
fn partial_shuffle<'b>(
|
||||
&'b mut self,
|
||||
rng: &'b mut WrappedRng,
|
||||
amount: usize,
|
||||
) -> impl Iterator<Item = Self::Item> {
|
||||
(**self).partial_shuffle(rng, amount).0.iter().copied()
|
||||
}
|
||||
}
|
||||
|
||||
impl Shufable for RangeInclusive<usize> {
|
||||
type Item = usize;
|
||||
fn is_empty(&self) -> bool {
|
||||
|
@ -301,15 +296,11 @@ impl Shufable for RangeInclusive<usize> {
|
|||
fn choose(&self, rng: &mut WrappedRng) -> usize {
|
||||
rng.random_range(self.clone())
|
||||
}
|
||||
type PartialShuffleIterator<'b>
|
||||
= NonrepeatingIterator<'b>
|
||||
where
|
||||
Self: 'b;
|
||||
fn partial_shuffle<'b>(
|
||||
&'b mut self,
|
||||
rng: &'b mut WrappedRng,
|
||||
amount: usize,
|
||||
) -> Self::PartialShuffleIterator<'b> {
|
||||
) -> impl Iterator<Item = Self::Item> {
|
||||
NonrepeatingIterator::new(self.clone(), rng, amount)
|
||||
}
|
||||
}
|
||||
|
@ -330,7 +321,7 @@ impl<'a> NonrepeatingIterator<'a> {
|
|||
fn new(range: RangeInclusive<usize>, rng: &'a mut WrappedRng, amount: usize) -> Self {
|
||||
let capped_amount = if range.start() > range.end() {
|
||||
0
|
||||
} else if *range.start() == 0 && *range.end() == usize::MAX {
|
||||
} else if range == (0..=usize::MAX) {
|
||||
amount
|
||||
} else {
|
||||
amount.min(range.end() - range.start() + 1)
|
||||
|
@ -404,61 +395,49 @@ fn number_set_should_list_remaining(listed_count: usize, range_size: usize) -> b
|
|||
}
|
||||
|
||||
trait Writable {
|
||||
fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error>;
|
||||
fn write_all_to(&self, output: &mut impl OsWrite) -> Result<(), Error>;
|
||||
}
|
||||
|
||||
impl Writable for &[u8] {
|
||||
fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error> {
|
||||
fn write_all_to(&self, output: &mut impl OsWrite) -> Result<(), Error> {
|
||||
output.write_all(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Writable for &OsStr {
|
||||
fn write_all_to(&self, output: &mut impl OsWrite) -> Result<(), Error> {
|
||||
output.write_all_os(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Writable for usize {
|
||||
fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error> {
|
||||
output.write_all(format!("{self}").as_bytes())
|
||||
fn write_all_to(&self, output: &mut impl OsWrite) -> Result<(), Error> {
|
||||
write!(output, "{self}")
|
||||
}
|
||||
}
|
||||
|
||||
fn shuf_exec(input: &mut impl Shufable, opts: Options) -> UResult<()> {
|
||||
let mut output = BufWriter::new(match opts.output {
|
||||
None => Box::new(stdout()) as Box<dyn Write>,
|
||||
Some(s) => {
|
||||
let file = File::create(&s[..])
|
||||
.map_err_context(|| format!("failed to open {} for writing", s.quote()))?;
|
||||
Box::new(file) as Box<dyn Write>
|
||||
}
|
||||
});
|
||||
|
||||
let mut rng = match opts.random_source {
|
||||
Some(r) => {
|
||||
let file = File::open(&r[..])
|
||||
.map_err_context(|| format!("failed to open random source {}", r.quote()))?;
|
||||
WrappedRng::RngFile(rand_read_adapter::ReadRng::new(file))
|
||||
}
|
||||
None => WrappedRng::RngDefault(rand::rng()),
|
||||
};
|
||||
|
||||
fn shuf_exec(
|
||||
input: &mut impl Shufable,
|
||||
opts: &Options,
|
||||
rng: &mut WrappedRng,
|
||||
output: &mut BufWriter<Box<dyn OsWrite>>,
|
||||
) -> UResult<()> {
|
||||
let ctx = || "write failed".to_string();
|
||||
if opts.repeat {
|
||||
if input.is_empty() {
|
||||
return Err(USimpleError::new(1, "no lines to repeat"));
|
||||
}
|
||||
for _ in 0..opts.head_count {
|
||||
let r = input.choose(&mut rng);
|
||||
let r = input.choose(rng);
|
||||
|
||||
r.write_all_to(&mut output)
|
||||
.map_err_context(|| "write failed".to_string())?;
|
||||
output
|
||||
.write_all(&[opts.sep])
|
||||
.map_err_context(|| "write failed".to_string())?;
|
||||
r.write_all_to(output).map_err_context(ctx)?;
|
||||
output.write_all(&[opts.sep]).map_err_context(ctx)?;
|
||||
}
|
||||
} else {
|
||||
let shuffled = input.partial_shuffle(&mut rng, opts.head_count);
|
||||
let shuffled = input.partial_shuffle(rng, opts.head_count);
|
||||
for r in shuffled {
|
||||
r.write_all_to(&mut output)
|
||||
.map_err_context(|| "write failed".to_string())?;
|
||||
output
|
||||
.write_all(&[opts.sep])
|
||||
.map_err_context(|| "write failed".to_string())?;
|
||||
r.write_all_to(output).map_err_context(ctx)?;
|
||||
output.write_all(&[opts.sep]).map_err_context(ctx)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -467,33 +446,18 @@ fn shuf_exec(input: &mut impl Shufable, opts: Options) -> UResult<()> {
|
|||
|
||||
fn parse_range(input_range: &str) -> Result<RangeInclusive<usize>, String> {
|
||||
if let Some((from, to)) = input_range.split_once('-') {
|
||||
let begin = from
|
||||
.parse::<usize>()
|
||||
.map_err(|_| format!("invalid input range: {}", from.quote()))?;
|
||||
let end = to
|
||||
.parse::<usize>()
|
||||
.map_err(|_| format!("invalid input range: {}", to.quote()))?;
|
||||
let begin = from.parse::<usize>().map_err(|e| e.to_string())?;
|
||||
let end = to.parse::<usize>().map_err(|e| e.to_string())?;
|
||||
if begin <= end || begin == end + 1 {
|
||||
Ok(begin..=end)
|
||||
} else {
|
||||
Err(format!("invalid input range: {}", input_range.quote()))
|
||||
Err("start exceeds end".into())
|
||||
}
|
||||
} else {
|
||||
Err(format!("invalid input range: {}", input_range.quote()))
|
||||
Err("missing '-'".into())
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_head_count(headcounts: Vec<String>) -> Result<usize, String> {
|
||||
let mut result = usize::MAX;
|
||||
for count in headcounts {
|
||||
match count.parse::<usize>() {
|
||||
Ok(pv) => result = std::cmp::min(result, pv),
|
||||
Err(_) => return Err(format!("invalid line count: {}", count.quote())),
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
enum WrappedRng {
|
||||
RngFile(rand_read_adapter::ReadRng<File>),
|
||||
RngDefault(rand::rngs::ThreadRng),
|
||||
|
@ -522,6 +486,31 @@ impl RngCore for WrappedRng {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_split_seps {
|
||||
use super::split_seps;
|
||||
|
||||
#[test]
|
||||
fn test_empty_input() {
|
||||
assert!(split_seps(b"", b'\n').is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_blank_line() {
|
||||
assert_eq!(split_seps(b"\n", b'\n'), &[b""]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_with_trailing() {
|
||||
assert_eq!(split_seps(b"a\nb\nc\n", b'\n'), &[b"a", b"b", b"c"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_without_trailing() {
|
||||
assert_eq!(split_seps(b"a\nb\nc", b'\n'), &[b"a", b"b", b"c"]);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
// Since the computed value is a bool, it is more readable to write the expected value out:
|
||||
#[allow(clippy::bool_assert_comparison)]
|
||||
|
|
|
@ -25,7 +25,8 @@
|
|||
//! ```
|
||||
|
||||
use std::ffi::OsStr;
|
||||
use std::io::{self, Write as IoWrite};
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufWriter, Stdout, StdoutLock, Write as IoWrite};
|
||||
|
||||
#[cfg(unix)]
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
@ -42,33 +43,77 @@ pub use os_display::{Quotable, Quoted};
|
|||
/// output is likely to be captured, like `pwd` and `basename`. For informational output
|
||||
/// use `Quotable::quote`.
|
||||
///
|
||||
/// FIXME: This is lossy on Windows. It could probably be implemented using some low-level
|
||||
/// API that takes UTF-16, without going through io::Write. This is not a big priority
|
||||
/// FIXME: Invalid Unicode will produce an error on Windows. That could be fixed by
|
||||
/// using low-level library calls and bypassing `io::Write`. This is not a big priority
|
||||
/// because broken filenames are much rarer on Windows than on Unix.
|
||||
pub fn println_verbatim<S: AsRef<OsStr>>(text: S) -> io::Result<()> {
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
{
|
||||
stdout.write_all(text.as_ref().as_bytes())?;
|
||||
let mut stdout = io::stdout().lock();
|
||||
stdout.write_all_os(text.as_ref())?;
|
||||
stdout.write_all(b"\n")?;
|
||||
}
|
||||
#[cfg(not(any(unix, target_os = "wasi")))]
|
||||
{
|
||||
writeln!(stdout, "{}", std::path::Path::new(text.as_ref()).display())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Like `println_verbatim`, without the trailing newline.
|
||||
pub fn print_verbatim<S: AsRef<OsStr>>(text: S) -> io::Result<()> {
|
||||
let mut stdout = io::stdout();
|
||||
io::stdout().write_all_os(text.as_ref())
|
||||
}
|
||||
|
||||
/// [`io::Write`], but for OS strings.
|
||||
///
|
||||
/// On Unix this works straightforwardly.
|
||||
///
|
||||
/// On Windows this currently returns an error if the OS string is not valid Unicode.
|
||||
/// This may in the future change to allow those strings to be written to consoles.
|
||||
pub trait OsWrite: io::Write {
|
||||
/// Write the entire OS string into this writer.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// An error is returned if the underlying I/O operation fails.
|
||||
///
|
||||
/// On Windows, if the OS string is not valid Unicode, an error of kind
|
||||
/// [`io::ErrorKind::InvalidData`] is returned.
|
||||
fn write_all_os(&mut self, buf: &OsStr) -> io::Result<()> {
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
{
|
||||
stdout.write_all(text.as_ref().as_bytes())
|
||||
self.write_all(buf.as_bytes())
|
||||
}
|
||||
|
||||
#[cfg(not(any(unix, target_os = "wasi")))]
|
||||
{
|
||||
write!(stdout, "{}", std::path::Path::new(text.as_ref()).display())
|
||||
// It's possible to write a better OsWrite impl for Windows consoles (e.g. Stdout)
|
||||
// as those are fundamentally 16-bit. If the OS string is invalid then it can be
|
||||
// encoded to 16-bit and written using raw windows_sys calls. But this is quite involved
|
||||
// (see `sys/pal/windows/stdio.rs` in the stdlib) and the value-add is small.
|
||||
//
|
||||
// There's no way to write invalid OS strings to Windows files, as those are 8-bit.
|
||||
|
||||
match buf.to_str() {
|
||||
Some(text) => self.write_all(text.as_bytes()),
|
||||
// We could output replacement characters instead, but the
|
||||
// stdlib errors when sending invalid UTF-8 to the console,
|
||||
// so let's follow that.
|
||||
None => Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"OS string cannot be converted to bytes",
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We do not have a blanket impl for all Write because a smarter Windows impl should
|
||||
// be able to make use of AsRawHandle. Please keep this in mind when adding new impls.
|
||||
impl OsWrite for File {}
|
||||
impl OsWrite for Stdout {}
|
||||
impl OsWrite for StdoutLock<'_> {}
|
||||
// A future smarter Windows implementation can first flush the BufWriter before
|
||||
// doing a raw write.
|
||||
impl<W: OsWrite> OsWrite for BufWriter<W> {}
|
||||
|
||||
impl OsWrite for Box<dyn OsWrite> {
|
||||
fn write_all_os(&mut self, buf: &OsStr) -> io::Result<()> {
|
||||
let this: &mut dyn OsWrite = self;
|
||||
this.write_all_os(buf)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -362,6 +362,51 @@ fn test_echo_short_collapsed_zero() {
|
|||
assert_eq!(result_seq, ["a", "b", "c"], "Output is not a permutation");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_echo_separators_in_arguments() {
|
||||
// We used to split arguments themselves on newlines, but this was wrong.
|
||||
// shuf should behave as though it's shuffling two arguments and therefore
|
||||
// output all of them.
|
||||
// (Note that arguments can't contain null bytes so we don't need to test that.)
|
||||
let result = new_ucmd!()
|
||||
.arg("-e")
|
||||
.arg("-n2")
|
||||
.arg("a\nb")
|
||||
.arg("c\nd")
|
||||
.succeeds();
|
||||
result.no_stderr();
|
||||
assert_eq!(result.stdout_str().len(), 8, "Incorrect output length");
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn test_echo_invalid_unicode_in_arguments() {
|
||||
use std::{ffi::OsStr, os::unix::ffi::OsStrExt};
|
||||
|
||||
let result = new_ucmd!()
|
||||
.arg("-e")
|
||||
.arg(OsStr::from_bytes(b"a\xFFb"))
|
||||
.arg("ok")
|
||||
.succeeds();
|
||||
result.no_stderr();
|
||||
assert!(result.stdout().contains(&b'\xFF'));
|
||||
}
|
||||
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
#[test]
|
||||
fn test_invalid_unicode_in_filename() {
|
||||
use std::{ffi::OsStr, os::unix::ffi::OsStrExt};
|
||||
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
let filename = OsStr::from_bytes(b"a\xFFb");
|
||||
at.append(filename, "foo\n");
|
||||
|
||||
let result = ucmd.arg(filename).succeeds();
|
||||
result.no_stderr();
|
||||
assert_eq!(result.stdout(), b"foo\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_head_count() {
|
||||
let repeat_limit = 5;
|
||||
|
@ -647,23 +692,21 @@ fn test_shuf_invalid_input_range_one() {
|
|||
new_ucmd!()
|
||||
.args(&["-i", "0"])
|
||||
.fails()
|
||||
.stderr_contains("invalid input range");
|
||||
.stderr_contains("invalid value '0' for '--input-range <LO-HI>': missing '-'");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shuf_invalid_input_range_two() {
|
||||
new_ucmd!()
|
||||
.args(&["-i", "a-9"])
|
||||
.fails()
|
||||
.stderr_contains("invalid input range: 'a'");
|
||||
new_ucmd!().args(&["-i", "a-9"]).fails().stderr_contains(
|
||||
"invalid value 'a-9' for '--input-range <LO-HI>': invalid digit found in string",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shuf_invalid_input_range_three() {
|
||||
new_ucmd!()
|
||||
.args(&["-i", "0-b"])
|
||||
.fails()
|
||||
.stderr_contains("invalid input range: 'b'");
|
||||
new_ucmd!().args(&["-i", "0-b"]).fails().stderr_contains(
|
||||
"invalid value '0-b' for '--input-range <LO-HI>': invalid digit found in string",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -702,10 +745,9 @@ fn test_shuf_three_input_files() {
|
|||
|
||||
#[test]
|
||||
fn test_shuf_invalid_input_line_count() {
|
||||
new_ucmd!()
|
||||
.args(&["-n", "a"])
|
||||
.fails()
|
||||
.stderr_contains("invalid line count: 'a'");
|
||||
new_ucmd!().args(&["-n", "a"]).fails().stderr_contains(
|
||||
"invalid value 'a' for '--head-count <COUNT>': invalid digit found in string",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -772,7 +814,7 @@ fn test_range_empty_minus_one() {
|
|||
.arg("-i5-3")
|
||||
.fails()
|
||||
.no_stdout()
|
||||
.stderr_only("shuf: invalid input range: '5-3'\n");
|
||||
.stderr_contains("invalid value '5-3' for '--input-range <LO-HI>': start exceeds end\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -802,5 +844,5 @@ fn test_range_repeat_empty_minus_one() {
|
|||
.arg("-ri5-3")
|
||||
.fails()
|
||||
.no_stdout()
|
||||
.stderr_only("shuf: invalid input range: '5-3'\n");
|
||||
.stderr_contains("invalid value '5-3' for '--input-range <LO-HI>': start exceeds end\n");
|
||||
}
|
||||
|
|
|
@ -892,7 +892,8 @@ impl AtPath {
|
|||
.unwrap_or_else(|e| panic!("Couldn't write {name}: {e}"));
|
||||
}
|
||||
|
||||
pub fn append(&self, name: &str, contents: &str) {
|
||||
pub fn append(&self, name: impl AsRef<Path>, contents: &str) {
|
||||
let name = name.as_ref();
|
||||
log_info("write(append)", self.plus_as_string(name));
|
||||
let mut f = OpenOptions::new()
|
||||
.append(true)
|
||||
|
@ -900,7 +901,7 @@ impl AtPath {
|
|||
.open(self.plus(name))
|
||||
.unwrap();
|
||||
f.write_all(contents.as_bytes())
|
||||
.unwrap_or_else(|e| panic!("Couldn't write(append) {name}: {e}"));
|
||||
.unwrap_or_else(|e| panic!("Couldn't write(append) {}: {e}", name.display()));
|
||||
}
|
||||
|
||||
pub fn append_bytes(&self, name: &str, contents: &[u8]) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue