From 989178f438812ddbab23ef5bfd157e9ddda95b6f Mon Sep 17 00:00:00 2001 From: Ben Wiederhake Date: Fri, 16 Feb 2024 03:03:38 +0100 Subject: [PATCH] shuf: extract minimal required interface between data and core loop --- src/uu/shuf/src/shuf.rs | 79 +++++++++++++++++++++++++++++++++-------- 1 file changed, 65 insertions(+), 14 deletions(-) diff --git a/src/uu/shuf/src/shuf.rs b/src/uu/shuf/src/shuf.rs index bab328e2e..6f33492eb 100644 --- a/src/uu/shuf/src/shuf.rs +++ b/src/uu/shuf/src/shuf.rs @@ -3,14 +3,14 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) cmdline evec seps rvec fdata +// spell-checker:ignore (ToDO) cmdline evec seps shufable rvec fdata use clap::{crate_version, Arg, ArgAction, Command}; use memchr::memchr_iter; use rand::prelude::SliceRandom; use rand::RngCore; use std::fs::File; -use std::io::{stdin, stdout, BufReader, BufWriter, Read, Write}; +use std::io::{stdin, stdout, BufReader, BufWriter, Error, Read, Write}; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; use uucore::{format_usage, help_about, help_usage}; @@ -116,18 +116,18 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { Mode::Echo(args) => { let mut evec = args.iter().map(String::as_bytes).collect::>(); find_seps(&mut evec, options.sep); - shuf_bytes(&mut evec, options)?; + shuf_exec(&mut evec, options)?; } Mode::InputRange((b, e)) => { let rvec = (b..e).map(|x| format!("{x}")).collect::>(); let mut rvec = rvec.iter().map(String::as_bytes).collect::>(); - shuf_bytes(&mut rvec, options)?; + shuf_exec(&mut rvec, options)?; } Mode::Default(filename) => { let fdata = read_input_file(&filename)?; let mut fdata = vec![&fdata[..]]; find_seps(&mut fdata, options.sep); - shuf_bytes(&mut fdata, options)?; + shuf_exec(&mut fdata, options)?; } } @@ -251,7 +251,62 @@ fn find_seps(data: &mut Vec<&[u8]>, sep: u8) { } } -fn shuf_bytes(input: &mut Vec<&[u8]>, opts: Options) -> UResult<()> { +trait Shufable { + type Item: Writable; + fn is_empty(&self) -> bool; + fn choose(&self, rng: &mut WrappedRng) -> Self::Item; + // This type shouldn't even be known. However, because we want to support + // Rust 1.70, it is not possible to return "impl Iterator". + // TODO: When the MSRV is raised, rewrite this to return "impl Iterator". + type PartialShuffleIterator<'b>: Iterator + where + Self: 'b; + fn partial_shuffle<'b>( + &'b mut self, + rng: &'b mut WrappedRng, + amount: usize, + ) -> Self::PartialShuffleIterator<'b>; +} + +impl<'a> Shufable for Vec<&'a [u8]> { + type Item = &'a [u8]; + fn is_empty(&self) -> bool { + (**self).is_empty() + } + fn choose(&self, rng: &mut WrappedRng) -> Self::Item { + // Note: "copied()" only copies the reference, not the entire [u8]. + // Returns None if the slice is empty. We checked this before, so + // this is safe. + (**self).choose(rng).unwrap() + } + type PartialShuffleIterator<'b> = std::iter::Copied> where Self: 'b; + fn partial_shuffle<'b>( + &'b mut self, + rng: &'b mut WrappedRng, + amount: usize, + ) -> Self::PartialShuffleIterator<'b> { + // Note: "copied()" only copies the reference, not the entire [u8]. + (**self).partial_shuffle(rng, amount).0.iter().copied() + } +} + +trait Writable { + fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error>; +} + +impl<'a> Writable for &'a [u8] { + fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error> { + output.write_all(self) + } +} + +impl Writable for usize { + fn write_all_to(&self, output: &mut impl Write) -> Result<(), Error> { + output.write_all(format!("{self}").as_bytes()) + } +} + +fn shuf_exec(input: &mut impl Shufable, opts: Options) -> UResult<()> { let mut output = BufWriter::new(match opts.output { None => Box::new(stdout()) as Box, Some(s) => { @@ -276,22 +331,18 @@ fn shuf_bytes(input: &mut Vec<&[u8]>, opts: Options) -> UResult<()> { if opts.repeat { for _ in 0..opts.head_count { - // Returns None is the slice is empty. We checked this before, so - // this is safe. - let r = input.choose(&mut rng).unwrap(); + let r = input.choose(&mut rng); - output - .write_all(r) + r.write_all_to(&mut output) .map_err_context(|| "write failed".to_string())?; output .write_all(&[opts.sep]) .map_err_context(|| "write failed".to_string())?; } } else { - let (shuffled, _) = input.partial_shuffle(&mut rng, opts.head_count); + let shuffled = input.partial_shuffle(&mut rng, opts.head_count); for r in shuffled { - output - .write_all(r) + r.write_all_to(&mut output) .map_err_context(|| "write failed".to_string())?; output .write_all(&[opts.sep])