1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 03:57:44 +00:00

tr: make parsing of sets more terse

This commit is contained in:
Terts Diepraam 2022-01-19 20:52:06 +01:00
parent da728dd2b6
commit b51a6e8fe3

View file

@ -8,9 +8,9 @@
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::tag, bytes::complete::tag,
character::complete::{anychar, one_of}, character::complete::{anychar, digit1},
combinator::{map, recognize}, combinator::{map, peek, value},
multi::{many0, many1}, multi::many0,
sequence::{delimited, preceded, separated_pair}, sequence::{delimited, preceded, separated_pair},
IResult, IResult,
}; };
@ -24,7 +24,7 @@ use uucore::error::UError;
use crate::unicode_table; use crate::unicode_table;
#[derive(Debug)] #[derive(Debug, Clone)]
pub enum BadSequence { pub enum BadSequence {
MissingCharClassName, MissingCharClassName,
MissingEquivalentClassChar, MissingEquivalentClassChar,
@ -220,32 +220,11 @@ impl Sequence {
impl Sequence { impl Sequence {
pub fn from_str(input: &str) -> Result<Vec<Sequence>, BadSequence> { pub fn from_str(input: &str) -> Result<Vec<Sequence>, BadSequence> {
many0(alt(( many0(alt((
alt(( Sequence::parse_char_range,
Sequence::parse_char_range, Sequence::parse_char_star,
Sequence::parse_char_star, Sequence::parse_char_repeat,
Sequence::parse_char_repeat, Sequence::parse_class,
)), Sequence::parse_char_equal,
alt((
Sequence::parse_alnum,
Sequence::parse_alpha,
Sequence::parse_blank,
Sequence::parse_control,
Sequence::parse_digit,
Sequence::parse_graph,
Sequence::parse_lower,
Sequence::parse_print,
Sequence::parse_punct,
Sequence::parse_space,
Sequence::parse_upper,
Sequence::parse_xdigit,
Sequence::parse_char_equal,
)),
// NOTE: Specific error cases
alt((
Sequence::error_parse_char_repeat,
Sequence::error_parse_empty_bracket,
Sequence::error_parse_empty_equivalent_char,
)),
// NOTE: This must be the last one // NOTE: This must be the last one
map(Sequence::parse_backslash_or_char, |s| Ok(Sequence::Char(s))), map(Sequence::parse_backslash_or_char, |s| Ok(Sequence::Char(s))),
)))(input) )))(input)
@ -297,102 +276,60 @@ impl Sequence {
fn parse_char_repeat(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { fn parse_char_repeat(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
delimited( delimited(
tag("["), tag("["),
separated_pair( separated_pair(Sequence::parse_backslash_or_char, tag("*"), digit1),
Sequence::parse_backslash_or_char,
tag("*"),
recognize(many1(one_of("01234567"))),
),
tag("]"), tag("]"),
)(input) )(input)
.map(|(l, (c, str))| { .map(|(l, (c, str))| {
( (
l, l,
match usize::from_str_radix(str, 8) match usize::from_str_radix(str, 8) {
.expect("This should not fail because we only parse against 0-7") Ok(0) => Ok(Sequence::CharStar(c)),
{ Ok(count) => Ok(Sequence::CharRepeat(c, count)),
0 => Ok(Sequence::CharStar(c)), Err(_) => Err(BadSequence::InvalidRepeatCount(str.to_string())),
count => Ok(Sequence::CharRepeat(c, count)),
}, },
) )
}) })
} }
fn parse_alnum(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { fn parse_class(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
tag("[:alnum:]")(input).map(|(l, _)| (l, Ok(Sequence::Alnum))) delimited(
} tag("[:"),
alt((
fn parse_alpha(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { map(
tag("[:alpha:]")(input).map(|(l, _)| (l, Ok(Sequence::Alpha))) alt((
} value(Sequence::Alnum, tag("alnum")),
value(Sequence::Alpha, tag("alpha")),
fn parse_blank(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { value(Sequence::Blank, tag("blank")),
tag("[:blank:]")(input).map(|(l, _)| (l, Ok(Sequence::Blank))) value(Sequence::Control, tag("cntrl")),
} value(Sequence::Digit, tag("digit")),
value(Sequence::Graph, tag("graph")),
fn parse_control(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { value(Sequence::Lower, tag("lower")),
tag("[:cntrl:]")(input).map(|(l, _)| (l, Ok(Sequence::Control))) value(Sequence::Print, tag("print")),
} value(Sequence::Punct, tag("punct")),
value(Sequence::Space, tag("space")),
fn parse_digit(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { value(Sequence::Upper, tag("upper")),
tag("[:digit:]")(input).map(|(l, _)| (l, Ok(Sequence::Digit))) value(Sequence::Xdigit, tag("xdigit")),
} )),
Ok,
fn parse_graph(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { ),
tag("[:graph:]")(input).map(|(l, _)| (l, Ok(Sequence::Graph))) value(Err(BadSequence::MissingCharClassName), tag("")),
} )),
tag(":]"),
fn parse_lower(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { )(input)
tag("[:lower:]")(input).map(|(l, _)| (l, Ok(Sequence::Lower)))
}
fn parse_print(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
tag("[:print:]")(input).map(|(l, _)| (l, Ok(Sequence::Print)))
}
fn parse_punct(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
tag("[:punct:]")(input).map(|(l, _)| (l, Ok(Sequence::Punct)))
}
fn parse_space(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
tag("[:space:]")(input).map(|(l, _)| (l, Ok(Sequence::Space)))
}
fn parse_upper(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
tag("[:upper:]")(input).map(|(l, _)| (l, Ok(Sequence::Upper)))
}
fn parse_xdigit(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
tag("[:xdigit:]")(input).map(|(l, _)| (l, Ok(Sequence::Xdigit)))
} }
fn parse_char_equal(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> { fn parse_char_equal(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
delimited(tag("[="), Sequence::parse_backslash_or_char, tag("=]"))(input)
.map(|(l, c)| (l, Ok(Sequence::Char(c))))
}
}
impl Sequence {
fn error_parse_char_repeat(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
delimited( delimited(
tag("["), tag("[="),
separated_pair( alt((
Sequence::parse_backslash_or_char, value(
tag("*"), Err(BadSequence::MissingEquivalentClassChar),
recognize(many1(one_of("0123456789"))), peek(tag("=]")),
), ),
tag("]"), map(Sequence::parse_backslash_or_char, |c| Ok(Sequence::Char(c))),
)),
tag("=]"),
)(input) )(input)
.map(|(l, (_, n))| (l, Err(BadSequence::InvalidRepeatCount(n.to_string()))))
}
fn error_parse_empty_bracket(input: &str) -> IResult<&str, Result<Sequence, BadSequence>> {
tag("[::]")(input).map(|(l, _)| (l, Err(BadSequence::MissingCharClassName)))
}
fn error_parse_empty_equivalent_char(
input: &str,
) -> IResult<&str, Result<Sequence, BadSequence>> {
tag("[==]")(input).map(|(l, _)| (l, Err(BadSequence::MissingEquivalentClassChar)))
} }
} }