mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
shuf: extract minimal required interface between data and core loop
This commit is contained in:
parent
5a2e0c700e
commit
989178f438
1 changed files with 65 additions and 14 deletions
|
@ -3,14 +3,14 @@
|
||||||
// For the full copyright and license information, please view the LICENSE
|
// For the full copyright and license information, please view the LICENSE
|
||||||
// file that was distributed with this source code.
|
// file that was distributed with this source code.
|
||||||
|
|
||||||
// spell-checker:ignore (ToDO) cmdline evec seps rvec fdata
|
// spell-checker:ignore (ToDO) cmdline evec seps shufable rvec fdata
|
||||||
|
|
||||||
use clap::{crate_version, Arg, ArgAction, Command};
|
use clap::{crate_version, Arg, ArgAction, Command};
|
||||||
use memchr::memchr_iter;
|
use memchr::memchr_iter;
|
||||||
use rand::prelude::SliceRandom;
|
use rand::prelude::SliceRandom;
|
||||||
use rand::RngCore;
|
use rand::RngCore;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{stdin, stdout, BufReader, BufWriter, Read, Write};
|
use std::io::{stdin, stdout, BufReader, BufWriter, Error, Read, Write};
|
||||||
use uucore::display::Quotable;
|
use uucore::display::Quotable;
|
||||||
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
||||||
use uucore::{format_usage, help_about, help_usage};
|
use uucore::{format_usage, help_about, help_usage};
|
||||||
|
@ -116,18 +116,18 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
Mode::Echo(args) => {
|
Mode::Echo(args) => {
|
||||||
let mut evec = args.iter().map(String::as_bytes).collect::<Vec<_>>();
|
let mut evec = args.iter().map(String::as_bytes).collect::<Vec<_>>();
|
||||||
find_seps(&mut evec, options.sep);
|
find_seps(&mut evec, options.sep);
|
||||||
shuf_bytes(&mut evec, options)?;
|
shuf_exec(&mut evec, options)?;
|
||||||
}
|
}
|
||||||
Mode::InputRange((b, e)) => {
|
Mode::InputRange((b, e)) => {
|
||||||
let rvec = (b..e).map(|x| format!("{x}")).collect::<Vec<String>>();
|
let rvec = (b..e).map(|x| format!("{x}")).collect::<Vec<String>>();
|
||||||
let mut rvec = rvec.iter().map(String::as_bytes).collect::<Vec<&[u8]>>();
|
let mut rvec = rvec.iter().map(String::as_bytes).collect::<Vec<&[u8]>>();
|
||||||
shuf_bytes(&mut rvec, options)?;
|
shuf_exec(&mut rvec, options)?;
|
||||||
}
|
}
|
||||||
Mode::Default(filename) => {
|
Mode::Default(filename) => {
|
||||||
let fdata = read_input_file(&filename)?;
|
let fdata = read_input_file(&filename)?;
|
||||||
let mut fdata = vec![&fdata[..]];
|
let mut fdata = vec![&fdata[..]];
|
||||||
find_seps(&mut fdata, options.sep);
|
find_seps(&mut fdata, options.sep);
|
||||||
shuf_bytes(&mut fdata, options)?;
|
shuf_exec(&mut fdata, options)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,7 +251,62 @@ fn find_seps(data: &mut Vec<&[u8]>, sep: u8) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn shuf_bytes(input: &mut Vec<&[u8]>, opts: Options) -> UResult<()> {
|
trait Shufable {
|
||||||
|
type Item: Writable;
|
||||||
|
fn is_empty(&self) -> bool;
|
||||||
|
fn choose(&self, rng: &mut WrappedRng) -> Self::Item;
|
||||||
|
// This type shouldn't even be known. However, because we want to support
|
||||||
|
// Rust 1.70, it is not possible to return "impl Iterator".
|
||||||
|
// TODO: When the MSRV is raised, rewrite this to return "impl Iterator".
|
||||||
|
type PartialShuffleIterator<'b>: Iterator<Item = Self::Item>
|
||||||
|
where
|
||||||
|
Self: 'b;
|
||||||
|
fn partial_shuffle<'b>(
|
||||||
|
&'b mut self,
|
||||||
|
rng: &'b mut WrappedRng,
|
||||||
|
amount: usize,
|
||||||
|
) -> Self::PartialShuffleIterator<'b>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Shufable for Vec<&'a [u8]> {
|
||||||
|
type Item = &'a [u8];
|
||||||
|
fn is_empty(&self) -> bool {
|
||||||
|
(**self).is_empty()
|
||||||
|
}
|
||||||
|
fn choose(&self, rng: &mut WrappedRng) -> Self::Item {
|
||||||
|
// Note: "copied()" only copies the reference, not the entire [u8].
|
||||||
|
// Returns None if the slice is empty. We checked this before, so
|
||||||
|
// this is safe.
|
||||||
|
(**self).choose(rng).unwrap()
|
||||||
|
}
|
||||||
|
type PartialShuffleIterator<'b> = std::iter::Copied<std::slice::Iter<'b, &'a [u8]>> where Self: 'b;
|
||||||
|
fn partial_shuffle<'b>(
|
||||||
|
&'b mut self,
|
||||||
|
rng: &'b mut WrappedRng,
|
||||||
|
amount: usize,
|
||||||
|
) -> Self::PartialShuffleIterator<'b> {
|
||||||
|
// Note: "copied()" only copies the reference, not the entire [u8].
|
||||||
|
(**self).partial_shuffle(rng, amount).0.iter().copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
trait Writable {
|
||||||
|
fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Writable for &'a [u8] {
|
||||||
|
fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error> {
|
||||||
|
output.write_all(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Writable for usize {
|
||||||
|
fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error> {
|
||||||
|
output.write_all(format!("{self}").as_bytes())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn shuf_exec(input: &mut impl Shufable, opts: Options) -> UResult<()> {
|
||||||
let mut output = BufWriter::new(match opts.output {
|
let mut output = BufWriter::new(match opts.output {
|
||||||
None => Box::new(stdout()) as Box<dyn Write>,
|
None => Box::new(stdout()) as Box<dyn Write>,
|
||||||
Some(s) => {
|
Some(s) => {
|
||||||
|
@ -276,22 +331,18 @@ fn shuf_bytes(input: &mut Vec<&[u8]>, opts: Options) -> UResult<()> {
|
||||||
|
|
||||||
if opts.repeat {
|
if opts.repeat {
|
||||||
for _ in 0..opts.head_count {
|
for _ in 0..opts.head_count {
|
||||||
// Returns None is the slice is empty. We checked this before, so
|
let r = input.choose(&mut rng);
|
||||||
// this is safe.
|
|
||||||
let r = input.choose(&mut rng).unwrap();
|
|
||||||
|
|
||||||
output
|
r.write_all_to(&mut output)
|
||||||
.write_all(r)
|
|
||||||
.map_err_context(|| "write failed".to_string())?;
|
.map_err_context(|| "write failed".to_string())?;
|
||||||
output
|
output
|
||||||
.write_all(&[opts.sep])
|
.write_all(&[opts.sep])
|
||||||
.map_err_context(|| "write failed".to_string())?;
|
.map_err_context(|| "write failed".to_string())?;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let (shuffled, _) = input.partial_shuffle(&mut rng, opts.head_count);
|
let shuffled = input.partial_shuffle(&mut rng, opts.head_count);
|
||||||
for r in shuffled {
|
for r in shuffled {
|
||||||
output
|
r.write_all_to(&mut output)
|
||||||
.write_all(r)
|
|
||||||
.map_err_context(|| "write failed".to_string())?;
|
.map_err_context(|| "write failed".to_string())?;
|
||||||
output
|
output
|
||||||
.write_all(&[opts.sep])
|
.write_all(&[opts.sep])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue