mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 03:27:44 +00:00
tr: calculate complement set early
Fixes #6163 and adds a test to verify that a regression is not caused. Instead of inverting the conditions to check (e.g. delete characters **not** present in set1) invert set1 when passed the complement flag (`-c`, `-C`, `--complement`). This is done by calculating set1 then "inverting" it by subtracting from the "full" (universe) set (0..=u8::MAX). This fixes issue 6163 because it was caused by a combination of the `-c` and `-t` flag. `-c` is the abovementioned complement flag and `-t`/`--truncate-set1` truncates set1 to the length of set2. What happened in issue 6163 is that `set1={b'Y'}` and `set2={b'Z'}`, when truncated set1 stays the same and we proceed. The problem is GNU utils does not consider set1 to be `{b'Y'}`, but the complement of `{b'Y'}`, that is `U \ {b'Y'}={0, 1, ..., b'X', b'Z', ...}`, thus it is truncated to `{0}`. We can verify this by doing: `printf '\0' | tr -c -t Y Z`, which prints `Z` to stdout as expected. Additionally, by calculating the complement of set1 we no longer need to consider the complement flag when doing the translate operation, this allows us to delete a lot of code.
This commit is contained in:
parent
3b96ff1d10
commit
3c47f27698
3 changed files with 56 additions and 136 deletions
|
@ -19,6 +19,7 @@ use std::{
|
|||
error::Error,
|
||||
fmt::{Debug, Display},
|
||||
io::{BufRead, Write},
|
||||
ops::Not,
|
||||
};
|
||||
use uucore::error::UError;
|
||||
|
||||
|
@ -125,6 +126,7 @@ impl Sequence {
|
|||
pub fn solve_set_characters(
|
||||
set1_str: &[u8],
|
||||
set2_str: &[u8],
|
||||
complement_flag: bool,
|
||||
truncate_set1_flag: bool,
|
||||
) -> Result<(Vec<u8>, Vec<u8>), BadSequence> {
|
||||
let set1 = Self::from_str(set1_str)?;
|
||||
|
@ -189,6 +191,9 @@ impl Sequence {
|
|||
},
|
||||
};
|
||||
let mut set1_solved: Vec<_> = set1.iter().flat_map(Self::flatten).collect();
|
||||
if complement_flag {
|
||||
set1_solved = (0..=u8::MAX).filter(|x| !set1_solved.contains(x)).collect();
|
||||
}
|
||||
if truncate_set1_flag {
|
||||
set1_solved.truncate(set2_solved.len());
|
||||
}
|
||||
|
@ -369,56 +374,28 @@ impl<A: SymbolTranslator, B: SymbolTranslator> SymbolTranslator for ChainedSymbo
|
|||
#[derive(Debug)]
|
||||
pub struct DeleteOperation {
|
||||
set: Vec<u8>,
|
||||
complement_flag: bool,
|
||||
}
|
||||
|
||||
impl DeleteOperation {
|
||||
pub fn new(set: Vec<u8>, complement_flag: bool) -> Self {
|
||||
Self {
|
||||
set,
|
||||
complement_flag,
|
||||
}
|
||||
pub fn new(set: Vec<u8>) -> Self {
|
||||
Self { set }
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslator for DeleteOperation {
|
||||
fn translate(&mut self, current: u8) -> Option<u8> {
|
||||
let found = self.set.iter().any(|sequence| *sequence == current);
|
||||
if self.complement_flag == found {
|
||||
Some(current)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TranslateOperationComplement {
|
||||
iter: u8,
|
||||
set2_iter: usize,
|
||||
set1: Vec<u8>,
|
||||
set2: Vec<u8>,
|
||||
translation_map: HashMap<u8, u8>,
|
||||
}
|
||||
|
||||
impl TranslateOperationComplement {
|
||||
fn new(set1: Vec<u8>, set2: Vec<u8>) -> Self {
|
||||
Self {
|
||||
iter: 0,
|
||||
set2_iter: 0,
|
||||
set1,
|
||||
set2,
|
||||
translation_map: HashMap::new(),
|
||||
}
|
||||
// keep if not present in the set
|
||||
self.set.contains(¤t).not().then_some(current)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TranslateOperationStandard {
|
||||
pub struct TranslateOperation {
|
||||
translation_map: HashMap<u8, u8>,
|
||||
}
|
||||
|
||||
impl TranslateOperationStandard {
|
||||
fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> {
|
||||
impl TranslateOperation {
|
||||
pub fn new(set1: Vec<u8>, set2: Vec<u8>) -> Result<Self, BadSequence> {
|
||||
if let Some(fallback) = set2.last().copied() {
|
||||
Ok(Self {
|
||||
translation_map: set1
|
||||
|
@ -436,86 +413,27 @@ impl TranslateOperationStandard {
|
|||
}
|
||||
}
|
||||
|
||||
pub enum TranslateOperation {
|
||||
Standard(TranslateOperationStandard),
|
||||
Complement(TranslateOperationComplement),
|
||||
}
|
||||
|
||||
impl TranslateOperation {
|
||||
fn next_complement_char(iter: u8, ignore_list: &[u8]) -> (u8, u8) {
|
||||
(iter..)
|
||||
.filter(|c| !ignore_list.iter().any(|s| s == c))
|
||||
.map(|c| (c + 1, c))
|
||||
.next()
|
||||
.expect("exhausted all possible characters")
|
||||
}
|
||||
}
|
||||
|
||||
impl TranslateOperation {
|
||||
pub fn new(set1: Vec<u8>, set2: Vec<u8>, complement: bool) -> Result<Self, BadSequence> {
|
||||
if complement {
|
||||
Ok(Self::Complement(TranslateOperationComplement::new(
|
||||
set1, set2,
|
||||
)))
|
||||
} else {
|
||||
Ok(Self::Standard(TranslateOperationStandard::new(set1, set2)?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTranslator for TranslateOperation {
|
||||
fn translate(&mut self, current: u8) -> Option<u8> {
|
||||
match self {
|
||||
Self::Standard(TranslateOperationStandard { translation_map }) => Some(
|
||||
translation_map
|
||||
.iter()
|
||||
.find_map(|(l, r)| if l.eq(¤t) { Some(*r) } else { None })
|
||||
Some(
|
||||
self.translation_map
|
||||
.get(¤t)
|
||||
.copied()
|
||||
.unwrap_or(current),
|
||||
),
|
||||
Self::Complement(TranslateOperationComplement {
|
||||
iter,
|
||||
set2_iter,
|
||||
set1,
|
||||
set2,
|
||||
translation_map,
|
||||
}) => {
|
||||
// First, try to see if current char is already mapped
|
||||
// If so, return the mapped char
|
||||
// Else, pop from set2
|
||||
// If we popped something, map the next complement character to this value
|
||||
// If set2 is empty, we just map the current char directly to fallback --- to avoid looping unnecessarily
|
||||
if let Some(c) = set1.iter().find(|c| c.eq(&¤t)) {
|
||||
Some(*c)
|
||||
} else {
|
||||
while translation_map.get(¤t).is_none() {
|
||||
if let Some(value) = set2.get(*set2_iter) {
|
||||
let (next_iter, next_key) = Self::next_complement_char(*iter, &*set1);
|
||||
*iter = next_iter;
|
||||
*set2_iter = set2_iter.saturating_add(1);
|
||||
translation_map.insert(next_key, *value);
|
||||
} else {
|
||||
translation_map.insert(current, *set2.last().unwrap());
|
||||
}
|
||||
}
|
||||
Some(*translation_map.get(¤t).unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SqueezeOperation {
|
||||
set1: HashSet<u8>,
|
||||
complement: bool,
|
||||
previous: Option<u8>,
|
||||
}
|
||||
|
||||
impl SqueezeOperation {
|
||||
pub fn new(set1: Vec<u8>, complement: bool) -> Self {
|
||||
pub fn new(set1: Vec<u8>) -> Self {
|
||||
Self {
|
||||
set1: set1.into_iter().collect(),
|
||||
complement,
|
||||
previous: None,
|
||||
}
|
||||
}
|
||||
|
@ -523,24 +441,6 @@ impl SqueezeOperation {
|
|||
|
||||
impl SymbolTranslator for SqueezeOperation {
|
||||
fn translate(&mut self, current: u8) -> Option<u8> {
|
||||
if self.complement {
|
||||
let next = if self.set1.contains(¤t) {
|
||||
Some(current)
|
||||
} else {
|
||||
match self.previous {
|
||||
Some(v) => {
|
||||
if v.eq(¤t) {
|
||||
None
|
||||
} else {
|
||||
Some(current)
|
||||
}
|
||||
}
|
||||
None => Some(current),
|
||||
}
|
||||
};
|
||||
self.previous = Some(current);
|
||||
next
|
||||
} else {
|
||||
let next = if self.set1.contains(¤t) {
|
||||
match self.previous {
|
||||
Some(v) if v == current => None,
|
||||
|
@ -553,7 +453,6 @@ impl SymbolTranslator for SqueezeOperation {
|
|||
next
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn translate_input<T, R, W>(input: &mut R, output: &mut W, mut translator: T)
|
||||
where
|
||||
|
|
|
@ -121,26 +121,26 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
// '*_op' are the operations that need to be applied, in order.
|
||||
if delete_flag {
|
||||
if squeeze_flag {
|
||||
let delete_op = DeleteOperation::new(set1, complement_flag);
|
||||
let squeeze_op = SqueezeOperation::new(set2, false);
|
||||
let delete_op = DeleteOperation::new(set1);
|
||||
let squeeze_op = SqueezeOperation::new(set2);
|
||||
let op = delete_op.chain(squeeze_op);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
} else {
|
||||
let op = DeleteOperation::new(set1, complement_flag);
|
||||
let op = DeleteOperation::new(set1);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
}
|
||||
} else if squeeze_flag {
|
||||
if sets_len < 2 {
|
||||
let op = SqueezeOperation::new(set1, complement_flag);
|
||||
let op = SqueezeOperation::new(set1);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
} else {
|
||||
let translate_op = TranslateOperation::new(set1, set2.clone(), complement_flag)?;
|
||||
let squeeze_op = SqueezeOperation::new(set2, false);
|
||||
let translate_op = TranslateOperation::new(set1, set2.clone())?;
|
||||
let squeeze_op = SqueezeOperation::new(set2);
|
||||
let op = translate_op.chain(squeeze_op);
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
}
|
||||
} else {
|
||||
let op = TranslateOperation::new(set1, set2, complement_flag)?;
|
||||
let op = TranslateOperation::new(set1, set2)?;
|
||||
translate_input(&mut locked_stdin, &mut buffered_stdout, op);
|
||||
}
|
||||
Ok(())
|
||||
|
|
|
@ -1313,3 +1313,24 @@ fn check_regression_class_blank() {
|
|||
.no_stderr()
|
||||
.stdout_only("a12b");
|
||||
}
|
||||
|
||||
// Check regression found in https://github.com/uutils/coreutils/issues/6163
|
||||
#[test]
|
||||
fn check_regression_issue_6163_no_match() {
|
||||
new_ucmd!()
|
||||
.args(&["-c", "-t", "Y", "Z"])
|
||||
.pipe_in("X\n")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("X\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_regression_issue_6163_match() {
|
||||
new_ucmd!()
|
||||
.args(&["-c", "-t", "Y", "Z"])
|
||||
.pipe_in("\0\n")
|
||||
.succeeds()
|
||||
.no_stderr()
|
||||
.stdout_only("Z\n");
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue