mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-30 12:37:49 +00:00
Implement some more parsers
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
b0ef508b04
commit
4b45a2287c
3 changed files with 76 additions and 22 deletions
|
@ -2,7 +2,7 @@ use nom::{
|
||||||
branch::alt,
|
branch::alt,
|
||||||
bytes::complete::{tag, take_while1},
|
bytes::complete::{tag, take_while1},
|
||||||
character::complete::{anychar, one_of},
|
character::complete::{anychar, one_of},
|
||||||
combinator::{map_opt, recognize},
|
combinator::{map_opt, recognize, value},
|
||||||
multi::{many0, many_m_n},
|
multi::{many0, many_m_n},
|
||||||
sequence::{preceded, separated_pair, tuple},
|
sequence::{preceded, separated_pair, tuple},
|
||||||
IResult,
|
IResult,
|
||||||
|
@ -13,6 +13,8 @@ use std::{
|
||||||
io::{BufRead, Write},
|
io::{BufRead, Write},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use crate::unicode_table;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
pub enum Sequence {
|
pub enum Sequence {
|
||||||
Char(char),
|
Char(char),
|
||||||
|
@ -66,13 +68,13 @@ impl Sequence {
|
||||||
fn parse_backslash(input: &str) -> IResult<&str, Sequence> {
|
fn parse_backslash(input: &str) -> IResult<&str, Sequence> {
|
||||||
preceded(tag("\\"), anychar)(input).map(|(l, a)| {
|
preceded(tag("\\"), anychar)(input).map(|(l, a)| {
|
||||||
let c = match a {
|
let c = match a {
|
||||||
'a' => Sequence::Char('\u{0007}'),
|
'a' => Sequence::Char(unicode_table::BEL),
|
||||||
'b' => Sequence::Char('\u{0008}'),
|
'b' => Sequence::Char(unicode_table::BS),
|
||||||
'f' => Sequence::Char('\u{000C}'),
|
'f' => Sequence::Char(unicode_table::FF),
|
||||||
'n' => Sequence::Char('\u{000A}'),
|
'n' => Sequence::Char(unicode_table::LF),
|
||||||
'r' => Sequence::Char('\u{000D}'),
|
'r' => Sequence::Char(unicode_table::CR),
|
||||||
't' => Sequence::Char('\u{0009}'),
|
't' => Sequence::Char(unicode_table::HT),
|
||||||
'v' => Sequence::Char('\u{000B}'),
|
'v' => Sequence::Char(unicode_table::VT),
|
||||||
x => Sequence::Char(x),
|
x => Sequence::Char(x),
|
||||||
};
|
};
|
||||||
(l, c)
|
(l, c)
|
||||||
|
@ -129,32 +131,55 @@ impl Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_alpha(input: &str) -> IResult<&str, Sequence> {
|
fn parse_alpha(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:alpha:]")(input).map(|(l, _)| {
|
value(
|
||||||
(
|
Sequence::CharRange(('A'..='Z').chain('a'..='z').collect()),
|
||||||
l,
|
tag("[:alpha:]"),
|
||||||
Sequence::CharRange(('A'..='Z').chain('a'..='z').collect()),
|
)(input)
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_blank(input: &str) -> IResult<&str, Sequence> {
|
fn parse_blank(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:blank:]")(input).map(|(_, _)| todo!())
|
value(
|
||||||
|
Sequence::CharRange(vec![unicode_table::SPACE, unicode_table::HT]),
|
||||||
|
tag("[:blank:]"),
|
||||||
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_control(input: &str) -> IResult<&str, Sequence> {
|
fn parse_control(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:cntrl:]")(input).map(|(_, _)| todo!())
|
value(
|
||||||
|
Sequence::CharRange(
|
||||||
|
(0..=31)
|
||||||
|
.chain(std::iter::once(127))
|
||||||
|
.flat_map(char::from_u32)
|
||||||
|
.collect(),
|
||||||
|
),
|
||||||
|
tag("[:cntrl:]"),
|
||||||
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_digit(input: &str) -> IResult<&str, Sequence> {
|
fn parse_digit(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:digit:]")(input).map(|(l, _)| (l, Sequence::CharRange(('0'..='9').collect())))
|
value(Sequence::CharRange(('0'..='9').collect()), tag("[:digit:]"))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_graph(input: &str) -> IResult<&str, Sequence> {
|
fn parse_graph(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:graph:]")(input).map(|(_, _)| todo!())
|
value(
|
||||||
|
Sequence::CharRange(
|
||||||
|
(48..=57) // digit
|
||||||
|
.chain(65..=90) // uppercase
|
||||||
|
.chain(97..=122) // lowercase
|
||||||
|
// punctuations
|
||||||
|
.chain(33..=47)
|
||||||
|
.chain(58..=64)
|
||||||
|
.chain(91..=96)
|
||||||
|
.chain(123..=126)
|
||||||
|
.flat_map(char::from_u32)
|
||||||
|
.collect(),
|
||||||
|
),
|
||||||
|
tag("[:graph:]"),
|
||||||
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_lower(input: &str) -> IResult<&str, Sequence> {
|
fn parse_lower(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:lower:]")(input).map(|(l, _)| (l, Sequence::CharRange(('a'..='z').collect())))
|
value(Sequence::CharRange(('a'..='z').collect()), tag("[:lower:]"))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_print(input: &str) -> IResult<&str, Sequence> {
|
fn parse_print(input: &str) -> IResult<&str, Sequence> {
|
||||||
|
@ -162,11 +187,31 @@ impl Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_punct(input: &str) -> IResult<&str, Sequence> {
|
fn parse_punct(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:punct:]")(input).map(|(_, _)| todo!())
|
value(
|
||||||
|
Sequence::CharRange(
|
||||||
|
(33..=47)
|
||||||
|
.chain(58..=64)
|
||||||
|
.chain(91..=96)
|
||||||
|
.chain(123..=126)
|
||||||
|
.flat_map(char::from_u32)
|
||||||
|
.collect(),
|
||||||
|
),
|
||||||
|
tag("[:punct:]"),
|
||||||
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_space(input: &str) -> IResult<&str, Sequence> {
|
fn parse_space(input: &str) -> IResult<&str, Sequence> {
|
||||||
tag("[:space:]")(input).map(|(_, _)| todo!())
|
value(
|
||||||
|
Sequence::CharRange(vec![
|
||||||
|
unicode_table::HT,
|
||||||
|
unicode_table::LF,
|
||||||
|
unicode_table::VT,
|
||||||
|
unicode_table::FF,
|
||||||
|
unicode_table::CR,
|
||||||
|
unicode_table::SPACE,
|
||||||
|
]),
|
||||||
|
tag("[:space:]"),
|
||||||
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_upper(input: &str) -> IResult<&str, Sequence> {
|
fn parse_upper(input: &str) -> IResult<&str, Sequence> {
|
||||||
|
@ -177,7 +222,7 @@ impl Sequence {
|
||||||
tag("[:xdigit:]")(input).map(|(l, _)| {
|
tag("[:xdigit:]")(input).map(|(l, _)| {
|
||||||
(
|
(
|
||||||
l,
|
l,
|
||||||
Sequence::CharRange(('0'..='9').chain('A'..='Z').chain('a'..='z').collect()),
|
Sequence::CharRange(('0'..='9').chain('A'..='F').chain('a'..='f').collect()),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,7 @@ extern crate uucore;
|
||||||
extern crate nom;
|
extern crate nom;
|
||||||
|
|
||||||
mod operation;
|
mod operation;
|
||||||
|
mod unicode_table;
|
||||||
|
|
||||||
use clap::{crate_version, App, Arg};
|
use clap::{crate_version, App, Arg};
|
||||||
use nom::AsBytes;
|
use nom::AsBytes;
|
||||||
|
|
8
src/uu/tr/src/unicode_table.rs
Normal file
8
src/uu/tr/src/unicode_table.rs
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
pub static BEL: char = '\u{0007}';
|
||||||
|
pub static BS: char = '\u{0008}';
|
||||||
|
pub static HT: char = '\u{0009}';
|
||||||
|
pub static LF: char = '\u{000A}';
|
||||||
|
pub static VT: char = '\u{000B}';
|
||||||
|
pub static FF: char = '\u{000C}';
|
||||||
|
pub static CR: char = '\u{000D}';
|
||||||
|
pub static SPACE: char = '\u{0020}';
|
Loading…
Add table
Add a link
Reference in a new issue