1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

tr: calculate complement set early

Fixes #6163 and adds a test to verify that a regression is not caused.

Instead of inverting the conditions to check (e.g. delete characters **not** present in set1) invert
set1 when passed the complement flag (`-c`, `-C`, `--complement`). This is done by calculating set1
then "inverting" it by subtracting from the "full" (universe) set (0..=u8::MAX).

This fixes issue 6163 because it was caused by a combination of the `-c` and `-t` flag. `-c` is the
abovementioned complement flag and `-t`/`--truncate-set1` truncates set1 to the length of set2. What
happened in issue 6163 is that `set1={b'Y'}` and `set2={b'Z'}`, when truncated set1 stays the same
and we proceed. The problem is GNU utils does not consider set1 to be `{b'Y'}`, but the complement
of `{b'Y'}`, that is `U \ {b'Y'}={0, 1, ..., b'X', b'Z', ...}`, thus it is truncated to `{0}`.

We can verify this by doing: `printf '\0' | tr -c -t Y Z`, which prints `Z` to stdout as expected.

Additionally, by calculating the complement of set1 we no longer need to consider the complement
flag when doing the translate operation, this allows us to delete a lot of code.
This commit is contained in:
Jalil David Salamé Messina 2024-05-04 15:26:47 +02:00 committed by Ben Wiederhake
parent 3b96ff1d10
commit 3c47f27698
3 changed files with 56 additions and 136 deletions

View file

@ -19,6 +19,7 @@ use std::{
error::Error,
fmt::{Debug, Display},
io::{BufRead, Write},
ops::Not,
};
use uucore::error::UError;
@ -125,6 +126,7 @@ impl Sequence {
pub fn solve_set_characters(
set1_str: &[u8],
set2_str: &[u8],
complement_flag: bool,
truncate_set1_flag: bool,
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
let set1 = Self::from_str(set1_str)?;
@ -189,6 +191,9 @@ impl Sequence {
},
};
let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect();
if complement_flag {
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
}
if truncate_set1_flag {
set1_solved.truncate(set2_solved.len());
}
@ -369,56 +374,28 @@ impl<A: SymbolTranslator, B: SymbolTranslator> SymbolTranslator for ChainedSymbo
#[derive(Debug)]
pub struct DeleteOperation {
set: Vec<u8>,
complement_flag: bool,
}
impl DeleteOperation {
pub fn new(set: Vec<u8>, complement_flag: bool) -> Self {
Self {
set,
complement_flag,
}
pub fn new(set: Vec<u8>) -> Self {
Self { set }
}
}
impl SymbolTranslator for DeleteOperation {
fn translate(&mut self, current: u8) -> Option<u8> {
let found = self.set.iter().any(|sequence| *sequence == current);
if self.complement_flag == found {
Some(current)
} else {
None
}
}
}
pub struct TranslateOperationComplement {
iter: u8,
set2_iter: usize,
set1: Vec<u8>,
set2: Vec<u8>,
translation_map: HashMap<u8, u8>,
}
impl TranslateOperationComplement {
fn new(set1: Vec<u8>, set2: Vec<u8>) -> Self {
Self {
iter: 0,
set2_iter: 0,
set1,
set2,
translation_map: HashMap::new(),
}
// keep if not present in the set
self.set.contains(&current).not().then_some(current)
}
}
#[derive(Debug)]
pub struct TranslateOperationStandard {
pub struct TranslateOperation {
translation_map: HashMap<u8, u8>,
}
impl TranslateOperationStandard {
fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> {
impl TranslateOperation {
pub fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> {
if let Some(fallback) = set2.last().copied() {
Ok(Self {
translation_map: set1
@ -436,86 +413,27 @@ impl TranslateOperationStandard {
}
}
pub enum TranslateOperation {
Standard(TranslateOperationStandard),
Complement(TranslateOperationComplement),
}
impl TranslateOperation {
fn next_complement_char(iter: u8, ignore_list: &[u8]) -> (u8, u8) {
(iter..)
.filter(|c| !ignore_list.iter().any(|s| s == c))
.map(|c| (c + 1, c))
.next()
.expect("exhausted all possible characters")
}
}
impl TranslateOperation {
pub fn new(set1: Vec<u8>, set2: Vec<u8>, complement: bool) -> Result<Self, BadSequence> {
if complement {
Ok(Self::Complement(TranslateOperationComplement::new(
set1, set2,
)))
} else {
Ok(Self::Standard(TranslateOperationStandard::new(set1, set2)?))
}
}
}
impl SymbolTranslator for TranslateOperation {
fn translate(&mut self, current: u8) -> Option<u8> {
match self {
Self::Standard(TranslateOperationStandard { translation_map }) => Some(
translation_map
.iter()
.find_map(|(l, r)| if l.eq(&current) { Some(*r) } else { None })
Some(
self.translation_map
.get(&current)
.copied()
.unwrap_or(current),
),
Self::Complement(TranslateOperationComplement {
iter,
set2_iter,
set1,
set2,
translation_map,
}) => {
// First, try to see if current char is already mapped
// If so, return the mapped char
// Else, pop from set2
// If we popped something, map the next complement character to this value
// If set2 is empty, we just map the current char directly to fallback --- to avoid looping unnecessarily
if let Some(c) = set1.iter().find(|c| c.eq(&&current)) {
Some(*c)
} else {
while translation_map.get(&current).is_none() {
if let Some(value) = set2.get(*set2_iter) {
let (next_iter, next_key) = Self::next_complement_char(*iter, &*set1);
*iter = next_iter;
*set2_iter = set2_iter.saturating_add(1);
translation_map.insert(next_key, *value);
} else {
translation_map.insert(current, *set2.last().unwrap());
}
}
Some(*translation_map.get(&current).unwrap())
}
}
}
)
}
}
#[derive(Debug, Clone)]
pub struct SqueezeOperation {
set1: HashSet<u8>,
complement: bool,
previous: Option<u8>,
}
impl SqueezeOperation {
pub fn new(set1: Vec<u8>, complement: bool) -> Self {
pub fn new(set1: Vec<u8>) -> Self {
Self {
set1: set1.into_iter().collect(),
complement,
previous: None,
}
}
@ -523,24 +441,6 @@ impl SqueezeOperation {
impl SymbolTranslator for SqueezeOperation {
fn translate(&mut self, current: u8) -> Option<u8> {
if self.complement {
let next = if self.set1.contains(&current) {
Some(current)
} else {
match self.previous {
Some(v) => {
if v.eq(&current) {
None
} else {
Some(current)
}
}
None => Some(current),
}
};
self.previous = Some(current);
next
} else {
let next = if self.set1.contains(&current) {
match self.previous {
Some(v) if v == current => None,
@ -553,7 +453,6 @@ impl SymbolTranslator for SqueezeOperation {
next
}
}
}
pub fn translate_input<T, R, W>(input: &mut R, output: &mut W, mut translator: T)
where

View file

@ -121,26 +121,26 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// '*_op' are the operations that need to be applied, in order.
if delete_flag {
if squeeze_flag {
let delete_op = DeleteOperation::new(set1, complement_flag);
let squeeze_op = SqueezeOperation::new(set2, false);
let delete_op = DeleteOperation::new(set1);
let squeeze_op = SqueezeOperation::new(set2);
let op = delete_op.chain(squeeze_op);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
} else {
let op = DeleteOperation::new(set1, complement_flag);
let op = DeleteOperation::new(set1);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
}
} else if squeeze_flag {
if sets_len < 2 {
let op = SqueezeOperation::new(set1, complement_flag);
let op = SqueezeOperation::new(set1);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
} else {
let translate_op = TranslateOperation::new(set1, set2.clone(), complement_flag)?;
let squeeze_op = SqueezeOperation::new(set2, false);
let translate_op = TranslateOperation::new(set1, set2.clone())?;
let squeeze_op = SqueezeOperation::new(set2);
let op = translate_op.chain(squeeze_op);
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
}
} else {
let op = TranslateOperation::new(set1, set2, complement_flag)?;
let op = TranslateOperation::new(set1, set2)?;
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
}
Ok(())

View file

@ -1313,3 +1313,24 @@ fn check_regression_class_blank() {
.no_stderr()
.stdout_only("a12b");
}
// Check regression found in https://github.com/uutils/coreutils/issues/6163
#[test]
fn check_regression_issue_6163_no_match() {
new_ucmd!()
.args(&["-c", "-t", "Y", "Z"])
.pipe_in("X\n")
.succeeds()
.no_stderr()
.stdout_only("X\n");
}
#[test]
fn check_regression_issue_6163_match() {
new_ucmd!()
.args(&["-c", "-t", "Y", "Z"])
.pipe_in("\0\n")
.succeeds()
.no_stderr()
.stdout_only("Z\n");
}