mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 19:47:45 +00:00
Merge pull request #6014 from BenWiederhake/dev-shuf-range-off-by-one
shuf: Fix off-by-one errors in range handling
This commit is contained in:
commit
80702d5391
2 changed files with 174 additions and 24 deletions
|
@ -12,6 +12,7 @@ use rand::{Rng, RngCore};
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{stdin, stdout, BufReader, BufWriter, Error, Read, Write};
|
use std::io::{stdin, stdout, BufReader, BufWriter, Error, Read, Write};
|
||||||
|
use std::ops::RangeInclusive;
|
||||||
use uucore::display::Quotable;
|
use uucore::display::Quotable;
|
||||||
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
|
||||||
use uucore::{format_usage, help_about, help_usage};
|
use uucore::{format_usage, help_about, help_usage};
|
||||||
|
@ -21,7 +22,7 @@ mod rand_read_adapter;
|
||||||
enum Mode {
|
enum Mode {
|
||||||
Default(String),
|
Default(String),
|
||||||
Echo(Vec<String>),
|
Echo(Vec<String>),
|
||||||
InputRange((usize, usize)),
|
InputRange(RangeInclusive<usize>),
|
||||||
}
|
}
|
||||||
|
|
||||||
static USAGE: &str = help_usage!("shuf.md");
|
static USAGE: &str = help_usage!("shuf.md");
|
||||||
|
@ -119,8 +120,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
find_seps(&mut evec, options.sep);
|
find_seps(&mut evec, options.sep);
|
||||||
shuf_exec(&mut evec, options)?;
|
shuf_exec(&mut evec, options)?;
|
||||||
}
|
}
|
||||||
Mode::InputRange((b, e)) => {
|
Mode::InputRange(mut range) => {
|
||||||
shuf_exec(&mut (b, e), options)?;
|
shuf_exec(&mut range, options)?;
|
||||||
}
|
}
|
||||||
Mode::Default(filename) => {
|
Mode::Default(filename) => {
|
||||||
let fdata = read_input_file(&filename)?;
|
let fdata = read_input_file(&filename)?;
|
||||||
|
@ -289,14 +290,13 @@ impl<'a> Shufable for Vec<&'a [u8]> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Shufable for (usize, usize) {
|
impl Shufable for RangeInclusive<usize> {
|
||||||
type Item = usize;
|
type Item = usize;
|
||||||
fn is_empty(&self) -> bool {
|
fn is_empty(&self) -> bool {
|
||||||
// Note: This is an inclusive range, so equality means there is 1 element.
|
self.is_empty()
|
||||||
self.0 > self.1
|
|
||||||
}
|
}
|
||||||
fn choose(&self, rng: &mut WrappedRng) -> usize {
|
fn choose(&self, rng: &mut WrappedRng) -> usize {
|
||||||
rng.gen_range(self.0..self.1)
|
rng.gen_range(self.clone())
|
||||||
}
|
}
|
||||||
type PartialShuffleIterator<'b> = NonrepeatingIterator<'b> where Self: 'b;
|
type PartialShuffleIterator<'b> = NonrepeatingIterator<'b> where Self: 'b;
|
||||||
fn partial_shuffle<'b>(
|
fn partial_shuffle<'b>(
|
||||||
|
@ -304,7 +304,7 @@ impl Shufable for (usize, usize) {
|
||||||
rng: &'b mut WrappedRng,
|
rng: &'b mut WrappedRng,
|
||||||
amount: usize,
|
amount: usize,
|
||||||
) -> Self::PartialShuffleIterator<'b> {
|
) -> Self::PartialShuffleIterator<'b> {
|
||||||
NonrepeatingIterator::new(self.0, self.1, rng, amount)
|
NonrepeatingIterator::new(self.clone(), rng, amount)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -314,8 +314,7 @@ enum NumberSet {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct NonrepeatingIterator<'a> {
|
struct NonrepeatingIterator<'a> {
|
||||||
begin: usize,
|
range: RangeInclusive<usize>,
|
||||||
end: usize, // exclusive
|
|
||||||
rng: &'a mut WrappedRng,
|
rng: &'a mut WrappedRng,
|
||||||
remaining_count: usize,
|
remaining_count: usize,
|
||||||
buf: NumberSet,
|
buf: NumberSet,
|
||||||
|
@ -323,19 +322,19 @@ struct NonrepeatingIterator<'a> {
|
||||||
|
|
||||||
impl<'a> NonrepeatingIterator<'a> {
|
impl<'a> NonrepeatingIterator<'a> {
|
||||||
fn new(
|
fn new(
|
||||||
begin: usize,
|
range: RangeInclusive<usize>,
|
||||||
end: usize,
|
|
||||||
rng: &'a mut WrappedRng,
|
rng: &'a mut WrappedRng,
|
||||||
amount: usize,
|
amount: usize,
|
||||||
) -> NonrepeatingIterator {
|
) -> NonrepeatingIterator {
|
||||||
let capped_amount = if begin > end {
|
let capped_amount = if range.start() > range.end() {
|
||||||
0
|
0
|
||||||
|
} else if *range.start() == 0 && *range.end() == std::usize::MAX {
|
||||||
|
amount
|
||||||
} else {
|
} else {
|
||||||
amount.min(end - begin)
|
amount.min(range.end() - range.start() + 1)
|
||||||
};
|
};
|
||||||
NonrepeatingIterator {
|
NonrepeatingIterator {
|
||||||
begin,
|
range,
|
||||||
end,
|
|
||||||
rng,
|
rng,
|
||||||
remaining_count: capped_amount,
|
remaining_count: capped_amount,
|
||||||
buf: NumberSet::AlreadyListed(HashSet::default()),
|
buf: NumberSet::AlreadyListed(HashSet::default()),
|
||||||
|
@ -343,11 +342,11 @@ impl<'a> NonrepeatingIterator<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn produce(&mut self) -> usize {
|
fn produce(&mut self) -> usize {
|
||||||
debug_assert!(self.begin <= self.end);
|
debug_assert!(self.range.start() <= self.range.end());
|
||||||
match &mut self.buf {
|
match &mut self.buf {
|
||||||
NumberSet::AlreadyListed(already_listed) => {
|
NumberSet::AlreadyListed(already_listed) => {
|
||||||
let chosen = loop {
|
let chosen = loop {
|
||||||
let guess = self.rng.gen_range(self.begin..self.end);
|
let guess = self.rng.gen_range(self.range.clone());
|
||||||
let newly_inserted = already_listed.insert(guess);
|
let newly_inserted = already_listed.insert(guess);
|
||||||
if newly_inserted {
|
if newly_inserted {
|
||||||
break guess;
|
break guess;
|
||||||
|
@ -356,9 +355,11 @@ impl<'a> NonrepeatingIterator<'a> {
|
||||||
// Once a significant fraction of the interval has already been enumerated,
|
// Once a significant fraction of the interval has already been enumerated,
|
||||||
// the number of attempts to find a number that hasn't been chosen yet increases.
|
// the number of attempts to find a number that hasn't been chosen yet increases.
|
||||||
// Therefore, we need to switch at some point from "set of already returned values" to "list of remaining values".
|
// Therefore, we need to switch at some point from "set of already returned values" to "list of remaining values".
|
||||||
let range_size = self.end - self.begin;
|
let range_size = (self.range.end() - self.range.start()).saturating_add(1);
|
||||||
if number_set_should_list_remaining(already_listed.len(), range_size) {
|
if number_set_should_list_remaining(already_listed.len(), range_size) {
|
||||||
let mut remaining = (self.begin..self.end)
|
let mut remaining = self
|
||||||
|
.range
|
||||||
|
.clone()
|
||||||
.filter(|n| !already_listed.contains(n))
|
.filter(|n| !already_listed.contains(n))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
assert!(remaining.len() >= self.remaining_count);
|
assert!(remaining.len() >= self.remaining_count);
|
||||||
|
@ -381,7 +382,7 @@ impl<'a> Iterator for NonrepeatingIterator<'a> {
|
||||||
type Item = usize;
|
type Item = usize;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<usize> {
|
fn next(&mut self) -> Option<usize> {
|
||||||
if self.begin > self.end || self.remaining_count == 0 {
|
if self.range.is_empty() || self.remaining_count == 0 {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
self.remaining_count -= 1;
|
self.remaining_count -= 1;
|
||||||
|
@ -462,7 +463,7 @@ fn shuf_exec(input: &mut impl Shufable, opts: Options) -> UResult<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_range(input_range: &str) -> Result<(usize, usize), String> {
|
fn parse_range(input_range: &str) -> Result<RangeInclusive<usize>, String> {
|
||||||
if let Some((from, to)) = input_range.split_once('-') {
|
if let Some((from, to)) = input_range.split_once('-') {
|
||||||
let begin = from
|
let begin = from
|
||||||
.parse::<usize>()
|
.parse::<usize>()
|
||||||
|
@ -470,7 +471,11 @@ fn parse_range(input_range: &str) -> Result<(usize, usize), String> {
|
||||||
let end = to
|
let end = to
|
||||||
.parse::<usize>()
|
.parse::<usize>()
|
||||||
.map_err(|_| format!("invalid input range: {}", to.quote()))?;
|
.map_err(|_| format!("invalid input range: {}", to.quote()))?;
|
||||||
Ok((begin, end + 1))
|
if begin <= end || begin == end + 1 {
|
||||||
|
Ok(begin..=end)
|
||||||
|
} else {
|
||||||
|
Err(format!("invalid input range: {}", input_range.quote()))
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
Err(format!("invalid input range: {}", input_range.quote()))
|
Err(format!("invalid input range: {}", input_range.quote()))
|
||||||
}
|
}
|
||||||
|
|
|
@ -138,6 +138,99 @@ fn test_very_large_range_offset() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_range_repeat_no_overflow_1_max() {
|
||||||
|
let upper_bound = std::usize::MAX;
|
||||||
|
let result = new_ucmd!()
|
||||||
|
.arg("-rn1")
|
||||||
|
.arg(&format!("-i1-{upper_bound}"))
|
||||||
|
.succeeds();
|
||||||
|
result.no_stderr();
|
||||||
|
|
||||||
|
let result_seq: Vec<usize> = result
|
||||||
|
.stdout_str()
|
||||||
|
.split('\n')
|
||||||
|
.filter(|x| !x.is_empty())
|
||||||
|
.map(|x| x.parse().unwrap())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(result_seq.len(), 1, "Miscounted output length!");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_range_repeat_no_overflow_0_max_minus_1() {
|
||||||
|
let upper_bound = std::usize::MAX - 1;
|
||||||
|
let result = new_ucmd!()
|
||||||
|
.arg("-rn1")
|
||||||
|
.arg(&format!("-i0-{upper_bound}"))
|
||||||
|
.succeeds();
|
||||||
|
result.no_stderr();
|
||||||
|
|
||||||
|
let result_seq: Vec<usize> = result
|
||||||
|
.stdout_str()
|
||||||
|
.split('\n')
|
||||||
|
.filter(|x| !x.is_empty())
|
||||||
|
.map(|x| x.parse().unwrap())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(result_seq.len(), 1, "Miscounted output length!");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_range_permute_no_overflow_1_max() {
|
||||||
|
let upper_bound = std::usize::MAX;
|
||||||
|
let result = new_ucmd!()
|
||||||
|
.arg("-n1")
|
||||||
|
.arg(&format!("-i1-{upper_bound}"))
|
||||||
|
.succeeds();
|
||||||
|
result.no_stderr();
|
||||||
|
|
||||||
|
let result_seq: Vec<usize> = result
|
||||||
|
.stdout_str()
|
||||||
|
.split('\n')
|
||||||
|
.filter(|x| !x.is_empty())
|
||||||
|
.map(|x| x.parse().unwrap())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(result_seq.len(), 1, "Miscounted output length!");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_range_permute_no_overflow_0_max_minus_1() {
|
||||||
|
let upper_bound = std::usize::MAX - 1;
|
||||||
|
let result = new_ucmd!()
|
||||||
|
.arg("-n1")
|
||||||
|
.arg(&format!("-i0-{upper_bound}"))
|
||||||
|
.succeeds();
|
||||||
|
result.no_stderr();
|
||||||
|
|
||||||
|
let result_seq: Vec<usize> = result
|
||||||
|
.stdout_str()
|
||||||
|
.split('\n')
|
||||||
|
.filter(|x| !x.is_empty())
|
||||||
|
.map(|x| x.parse().unwrap())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(result_seq.len(), 1, "Miscounted output length!");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_range_permute_no_overflow_0_max() {
|
||||||
|
// NOTE: This is different from GNU shuf!
|
||||||
|
// GNU shuf accepts -i0-MAX-1 and -i1-MAX, but not -i0-MAX.
|
||||||
|
// This feels like a bug in GNU shuf.
|
||||||
|
let upper_bound = std::usize::MAX;
|
||||||
|
let result = new_ucmd!()
|
||||||
|
.arg("-n1")
|
||||||
|
.arg(&format!("-i0-{upper_bound}"))
|
||||||
|
.succeeds();
|
||||||
|
result.no_stderr();
|
||||||
|
|
||||||
|
let result_seq: Vec<usize> = result
|
||||||
|
.stdout_str()
|
||||||
|
.split('\n')
|
||||||
|
.filter(|x| !x.is_empty())
|
||||||
|
.map(|x| x.parse().unwrap())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(result_seq.len(), 1, "Miscounted output length!");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_very_high_range_full() {
|
fn test_very_high_range_full() {
|
||||||
let input_seq = vec![
|
let input_seq = vec![
|
||||||
|
@ -626,7 +719,6 @@ fn test_shuf_multiple_input_line_count() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore = "known issue"]
|
|
||||||
fn test_shuf_repeat_empty_range() {
|
fn test_shuf_repeat_empty_range() {
|
||||||
new_ucmd!()
|
new_ucmd!()
|
||||||
.arg("-ri4-3")
|
.arg("-ri4-3")
|
||||||
|
@ -653,3 +745,56 @@ fn test_shuf_repeat_empty_input() {
|
||||||
.no_stdout()
|
.no_stdout()
|
||||||
.stderr_only("shuf: no lines to repeat\n");
|
.stderr_only("shuf: no lines to repeat\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_range_one_elem() {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-i5-5")
|
||||||
|
.succeeds()
|
||||||
|
.no_stderr()
|
||||||
|
.stdout_only("5\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_range_empty() {
|
||||||
|
new_ucmd!().arg("-i5-4").succeeds().no_output();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_range_empty_minus_one() {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-i5-3")
|
||||||
|
.fails()
|
||||||
|
.no_stdout()
|
||||||
|
.stderr_only("shuf: invalid input range: '5-3'\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_range_repeat_one_elem() {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-n1")
|
||||||
|
.arg("-ri5-5")
|
||||||
|
.succeeds()
|
||||||
|
.no_stderr()
|
||||||
|
.stdout_only("5\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_range_repeat_empty() {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-n1")
|
||||||
|
.arg("-ri5-4")
|
||||||
|
.fails()
|
||||||
|
.no_stdout()
|
||||||
|
.stderr_only("shuf: no lines to repeat\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_range_repeat_empty_minus_one() {
|
||||||
|
new_ucmd!()
|
||||||
|
.arg("-n1")
|
||||||
|
.arg("-ri5-3")
|
||||||
|
.fails()
|
||||||
|
.no_stdout()
|
||||||
|
.stderr_only("shuf: invalid input range: '5-3'\n");
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue