mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
tr: calculate complement set early
Fixes #6163 and adds a test to verify that a regression is not caused. Instead of inverting the conditions to check (e.g. delete characters **not** present in set1) invert set1 when passed the complement flag (`-c`, `-C`, `--complement`). This is done by calculating set1 then "inverting" it by subtracting from the "full" (universe) set (0..=u8::MAX). This fixes issue 6163 because it was caused by a combination of the `-c` and `-t` flag. `-c` is the abovementioned complement flag and `-t`/`--truncate-set1` truncates set1 to the length of set2. What happened in issue 6163 is that `set1={b'Y'}` and `set2={b'Z'}`, when truncated set1 stays the same and we proceed. The problem is GNU utils does not consider set1 to be `{b'Y'}`, but the complement of `{b'Y'}`, that is `U \ {b'Y'}={0, 1, ..., b'X', b'Z', ...}`, thus it is truncated to `{0}`. We can verify this by doing: `printf '\0' | tr -c -t Y Z`, which prints `Z` to stdout as expected. Additionally, by calculating the complement of set1 we no longer need to consider the complement flag when doing the translate operation, this allows us to delete a lot of code.
This commit is contained in:
parent
3b96ff1d10
commit
3c47f27698
3 changed files with 56 additions and 136 deletions
|
@ -19,6 +19,7 @@ use std::{
|
||||||
error::Error,
|
error::Error,
|
||||||
fmt::{Debug, Display},
|
fmt::{Debug, Display},
|
||||||
io::{BufRead, Write},
|
io::{BufRead, Write},
|
||||||
|
ops::Not,
|
||||||
};
|
};
|
||||||
use uucore::error::UError;
|
use uucore::error::UError;
|
||||||
|
|
||||||
|
@ -125,6 +126,7 @@ impl Sequence {
|
||||||
pub fn solve_set_characters(
|
pub fn solve_set_characters(
|
||||||
set1_str: &[u8],
|
set1_str: &[u8],
|
||||||
set2_str: &[u8],
|
set2_str: &[u8],
|
||||||
|
complement_flag: bool,
|
||||||
truncate_set1_flag: bool,
|
truncate_set1_flag: bool,
|
||||||
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
|
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
|
||||||
let set1 = Self::from_str(set1_str)?;
|
let set1 = Self::from_str(set1_str)?;
|
||||||
|
@ -189,6 +191,9 @@ impl Sequence {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect();
|
let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect();
|
||||||
|
if complement_flag {
|
||||||
|
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
|
||||||
|
}
|
||||||
if truncate_set1_flag {
|
if truncate_set1_flag {
|
||||||
set1_solved.truncate(set2_solved.len());
|
set1_solved.truncate(set2_solved.len());
|
||||||
}
|
}
|
||||||
|
@ -369,56 +374,28 @@ impl<A: SymbolTranslator, B: SymbolTranslator> SymbolTranslator for ChainedSymbo
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct DeleteOperation {
|
pub struct DeleteOperation {
|
||||||
set: Vec<u8>,
|
set: Vec<u8>,
|
||||||
complement_flag: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DeleteOperation {
|
impl DeleteOperation {
|
||||||
pub fn new(set: Vec<u8>, complement_flag: bool) -> Self {
|
pub fn new(set: Vec<u8>) -> Self {
|
||||||
Self {
|
Self { set }
|
||||||
set,
|
|
||||||
complement_flag,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SymbolTranslator for DeleteOperation {
|
impl SymbolTranslator for DeleteOperation {
|
||||||
fn translate(&mut self, current: u8) -> Option<u8> {
|
fn translate(&mut self, current: u8) -> Option<u8> {
|
||||||
let found = self.set.iter().any(|sequence| *sequence == current);
|
// keep if not present in the set
|
||||||
if self.complement_flag == found {
|
self.set.contains(¤t).not().then_some(current)
|
||||||
Some(current)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct TranslateOperationComplement {
|
|
||||||
iter: u8,
|
|
||||||
set2_iter: usize,
|
|
||||||
set1: Vec<u8>,
|
|
||||||
set2: Vec<u8>,
|
|
||||||
translation_map: HashMap<u8, u8>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TranslateOperationComplement {
|
|
||||||
fn new(set1: Vec<u8>, set2: Vec<u8>) -> Self {
|
|
||||||
Self {
|
|
||||||
iter: 0,
|
|
||||||
set2_iter: 0,
|
|
||||||
set1,
|
|
||||||
set2,
|
|
||||||
translation_map: HashMap::new(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct TranslateOperationStandard {
|
pub struct TranslateOperation {
|
||||||
translation_map: HashMap<u8, u8>,
|
translation_map: HashMap<u8, u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TranslateOperationStandard {
|
impl TranslateOperation {
|
||||||
fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> {
|
pub fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> {
|
||||||
if let Some(fallback) = set2.last().copied() {
|
if let Some(fallback) = set2.last().copied() {
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
translation_map: set1
|
translation_map: set1
|
||||||
|
@ -436,86 +413,27 @@ impl TranslateOperationStandard {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum TranslateOperation {
|
|
||||||
Standard(TranslateOperationStandard),
|
|
||||||
Complement(TranslateOperationComplement),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TranslateOperation {
|
|
||||||
fn next_complement_char(iter: u8, ignore_list: &[u8]) -> (u8, u8) {
|
|
||||||
(iter..)
|
|
||||||
.filter(|c| !ignore_list.iter().any(|s| s == c))
|
|
||||||
.map(|c| (c + 1, c))
|
|
||||||
.next()
|
|
||||||
.expect("exhausted all possible characters")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TranslateOperation {
|
|
||||||
pub fn new(set1: Vec<u8>, set2: Vec<u8>, complement: bool) -> Result<Self, BadSequence> {
|
|
||||||
if complement {
|
|
||||||
Ok(Self::Complement(TranslateOperationComplement::new(
|
|
||||||
set1, set2,
|
|
||||||
)))
|
|
||||||
} else {
|
|
||||||
Ok(Self::Standard(TranslateOperationStandard::new(set1, set2)?))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SymbolTranslator for TranslateOperation {
|
impl SymbolTranslator for TranslateOperation {
|
||||||
fn translate(&mut self, current: u8) -> Option<u8> {
|
fn translate(&mut self, current: u8) -> Option<u8> {
|
||||||
match self {
|
Some(
|
||||||
Self::Standard(TranslateOperationStandard { translation_map }) => Some(
|
self.translation_map
|
||||||
translation_map
|
.get(¤t)
|
||||||
.iter()
|
.copied()
|
||||||
.find_map(|(l, r)| if l.eq(¤t) { Some(*r) } else { None })
|
.unwrap_or(current),
|
||||||
.unwrap_or(current),
|
)
|
||||||
),
|
|
||||||
Self::Complement(TranslateOperationComplement {
|
|
||||||
iter,
|
|
||||||
set2_iter,
|
|
||||||
set1,
|
|
||||||
set2,
|
|
||||||
translation_map,
|
|
||||||
}) => {
|
|
||||||
// First, try to see if current char is already mapped
|
|
||||||
// If so, return the mapped char
|
|
||||||
// Else, pop from set2
|
|
||||||
// If we popped something, map the next complement character to this value
|
|
||||||
// If set2 is empty, we just map the current char directly to fallback --- to avoid looping unnecessarily
|
|
||||||
if let Some(c) = set1.iter().find(|c| c.eq(&¤t)) {
|
|
||||||
Some(*c)
|
|
||||||
} else {
|
|
||||||
while translation_map.get(¤t).is_none() {
|
|
||||||
if let Some(value) = set2.get(*set2_iter) {
|
|
||||||
let (next_iter, next_key) = Self::next_complement_char(*iter, &*set1);
|
|
||||||
*iter = next_iter;
|
|
||||||
*set2_iter = set2_iter.saturating_add(1);
|
|
||||||
translation_map.insert(next_key, *value);
|
|
||||||
} else {
|
|
||||||
translation_map.insert(current, *set2.last().unwrap());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some(*translation_map.get(¤t).unwrap())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct SqueezeOperation {
|
pub struct SqueezeOperation {
|
||||||
set1: HashSet<u8>,
|
set1: HashSet<u8>,
|
||||||
complement: bool,
|
|
||||||
previous: Option<u8>,
|
previous: Option<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SqueezeOperation {
|
impl SqueezeOperation {
|
||||||
pub fn new(set1: Vec<u8>, complement: bool) -> Self {
|
pub fn new(set1: Vec<u8>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
set1: set1.into_iter().collect(),
|
set1: set1.into_iter().collect(),
|
||||||
complement,
|
|
||||||
previous: None,
|
previous: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -523,35 +441,16 @@ impl SqueezeOperation {
|
||||||
|
|
||||||
impl SymbolTranslator for SqueezeOperation {
|
impl SymbolTranslator for SqueezeOperation {
|
||||||
fn translate(&mut self, current: u8) -> Option<u8> {
|
fn translate(&mut self, current: u8) -> Option<u8> {
|
||||||
if self.complement {
|
let next = if self.set1.contains(¤t) {
|
||||||
let next = if self.set1.contains(¤t) {
|
match self.previous {
|
||||||
Some(current)
|
Some(v) if v == current => None,
|
||||||
} else {
|
_ => Some(current),
|
||||||
match self.previous {
|
}
|
||||||
Some(v) => {
|
|
||||||
if v.eq(¤t) {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(current)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => Some(current),
|
|
||||||
}
|
|
||||||
};
|
|
||||||
self.previous = Some(current);
|
|
||||||
next
|
|
||||||
} else {
|
} else {
|
||||||
let next = if self.set1.contains(¤t) {
|
Some(current)
|
||||||
match self.previous {
|
};
|
||||||
Some(v) if v == current => None,
|
self.previous = Some(current);
|
||||||
_ => Some(current),
|
next
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Some(current)
|
|
||||||
};
|
|
||||||
self.previous = Some(current);
|
|
||||||
next
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -121,26 +121,26 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
// '*_op' are the operations that need to be applied, in order.
|
// '*_op' are the operations that need to be applied, in order.
|
||||||
if delete_flag {
|
if delete_flag {
|
||||||
if squeeze_flag {
|
if squeeze_flag {
|
||||||
let delete_op = DeleteOperation::new(set1, complement_flag);
|
let delete_op = DeleteOperation::new(set1);
|
||||||
let squeeze_op = SqueezeOperation::new(set2, false);
|
let squeeze_op = SqueezeOperation::new(set2);
|
||||||
let op = delete_op.chain(squeeze_op);
|
let op = delete_op.chain(squeeze_op);
|
||||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
} else {
|
} else {
|
||||||
let op = DeleteOperation::new(set1, complement_flag);
|
let op = DeleteOperation::new(set1);
|
||||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
}
|
}
|
||||||
} else if squeeze_flag {
|
} else if squeeze_flag {
|
||||||
if sets_len < 2 {
|
if sets_len < 2 {
|
||||||
let op = SqueezeOperation::new(set1, complement_flag);
|
let op = SqueezeOperation::new(set1);
|
||||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
} else {
|
} else {
|
||||||
let translate_op = TranslateOperation::new(set1, set2.clone(), complement_flag)?;
|
let translate_op = TranslateOperation::new(set1, set2.clone())?;
|
||||||
let squeeze_op = SqueezeOperation::new(set2, false);
|
let squeeze_op = SqueezeOperation::new(set2);
|
||||||
let op = translate_op.chain(squeeze_op);
|
let op = translate_op.chain(squeeze_op);
|
||||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let op = TranslateOperation::new(set1, set2, complement_flag)?;
|
let op = TranslateOperation::new(set1, set2)?;
|
||||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -1313,3 +1313,24 @@ fn check_regression_class_blank() {
|
||||||
.no_stderr()
|
.no_stderr()
|
||||||
.stdout_only("a12b");
|
.stdout_only("a12b");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check regression found in https://github.com/uutils/coreutils/issues/6163
|
||||||
|
#[test]
|
||||||
|
fn check_regression_issue_6163_no_match() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-t", "Y", "Z"])
|
||||||
|
.pipe_in("X\n")
|
||||||
|
.succeeds()
|
||||||
|
.no_stderr()
|
||||||
|
.stdout_only("X\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn check_regression_issue_6163_match() {
|
||||||
|
new_ucmd!()
|
||||||
|
.args(&["-c", "-t", "Y", "Z"])
|
||||||
|
.pipe_in("\0\n")
|
||||||
|
.succeeds()
|
||||||
|
.no_stderr()
|
||||||
|
.stdout_only("Z\n");
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue