1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

tr: raise an error when there are two chars or more in an equivalence class

This commit is contained in:
Dorian Peron 2025-02-18 04:04:36 +01:00 committed by Dorian Peron
parent 99d4fbdd85
commit 989b6ba2a0
2 changed files with 44 additions and 11 deletions

View file

@ -8,11 +8,11 @@
use crate::unicode_table;
use nom::{
branch::alt,
bytes::complete::{tag, take, take_till},
bytes::complete::{tag, take, take_till, take_until},
character::complete::one_of,
combinator::{map, map_opt, peek, recognize, value},
multi::{many0, many_m_n},
sequence::{delimited, preceded, separated_pair},
sequence::{delimited, preceded, separated_pair, terminated},
IResult, Parser,
};
use std::{
@ -39,6 +39,7 @@ pub enum BadSequence {
Set1LongerSet2EndsInClass,
ComplementMoreThanOneUniqueInSet2,
BackwardsRange { end: u32, start: u32 },
MultipleCharInEquivalence(String),
}
impl Display for BadSequence {
@ -89,6 +90,10 @@ impl Display for BadSequence {
end_or_start_to_string(end)
)
}
Self::MultipleCharInEquivalence(s) => write!(
f,
"{s}: equivalence class operand must be a single character"
),
}
}
}
@ -492,18 +497,37 @@ impl Sequence {
}
fn parse_char_equal(input: &[u8]) -> IResult<&[u8], Result<Self, BadSequence>> {
delimited(
preceded(
tag("[="),
(
alt((
value(
Err(BadSequence::MissingEquivalentClassChar),
peek(tag("=]")),
),
map(Self::parse_backslash_or_char, |c| Ok(Self::Char(c))),
value(Err(()), peek(tag("=]"))),
map(Self::parse_backslash_or_char, Ok),
)),
tag("=]"),
map(terminated(take_until("=]"), tag("=]")), |v: &[u8]| {
if v.is_empty() {
Ok(())
} else {
Err(v)
}
}),
),
)
.parse(input)
.map(|(l, (a, b))| {
(
l,
match (a, b) {
(Err(()), _) => Err(BadSequence::MissingEquivalentClassChar),
(Ok(c), Ok(())) => Ok(Self::Char(c)),
(Ok(c), Err(v)) => Err(BadSequence::MultipleCharInEquivalence(format!(
"{}{}",
String::from_utf8_lossy(&[c]).into_owned(),
String::from_utf8_lossy(v).into_owned()
))),
},
)
})
}
}

View file

@ -1166,6 +1166,15 @@ fn check_against_gnu_tr_tests_empty_eq() {
.stderr_is("tr: missing equivalence class character '[==]'\n");
}
#[test]
fn check_too_many_chars_in_eq() {
new_ucmd!()
.args(&["-d", "[=aa=]"])
.pipe_in("")
.fails()
.stderr_contains("aa: equivalence class operand must be a single character\n");
}
#[test]
fn check_against_gnu_tr_tests_empty_cc() {
// ['empty-cc', qw('[::]' x), {IN=>''}, {OUT=>''}, {EXIT=>1},