mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 20:17:45 +00:00
Finally fixed parsing octal in char ranges
Signed-off-by: Hanif Bin Ariffin <hanif.ariffin.4326@gmail.com>
This commit is contained in:
parent
5aeeb6cfe9
commit
0acc165720
1 changed files with 81 additions and 11 deletions
|
@ -1,12 +1,12 @@
|
||||||
use crate::unicode_table;
|
use crate::unicode_table;
|
||||||
use nom::{
|
use nom::{
|
||||||
branch::alt,
|
branch::alt,
|
||||||
bytes::complete::{tag, take_until},
|
bytes::complete::tag,
|
||||||
character::complete::{anychar, digit1, one_of},
|
character::complete::{anychar, digit1, one_of},
|
||||||
combinator::{map_opt, opt, recognize},
|
combinator::{map_opt, recognize},
|
||||||
multi::{many0, many_m_n},
|
multi::{many0, many_m_n},
|
||||||
sequence::{delimited, preceded, separated_pair, tuple},
|
sequence::{delimited, preceded, separated_pair},
|
||||||
take_until1, IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::HashMap,
|
||||||
|
@ -34,6 +34,10 @@ impl Sequence {
|
||||||
pub fn parse_set_string(input: &str) -> Vec<Sequence> {
|
pub fn parse_set_string(input: &str) -> Vec<Sequence> {
|
||||||
many0(alt((
|
many0(alt((
|
||||||
alt((
|
alt((
|
||||||
|
Sequence::parse_char_range_octal_leftright,
|
||||||
|
Sequence::parse_char_range_octal_left,
|
||||||
|
Sequence::parse_char_range_octal_right,
|
||||||
|
Sequence::parse_char_range_backslash_collapse,
|
||||||
Sequence::parse_char_range,
|
Sequence::parse_char_range,
|
||||||
Sequence::parse_char_star,
|
Sequence::parse_char_star,
|
||||||
Sequence::parse_char_repeat,
|
Sequence::parse_char_repeat,
|
||||||
|
@ -114,6 +118,65 @@ impl Sequence {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_char_range_backslash_collapse(input: &str) -> IResult<&str, Sequence> {
|
||||||
|
separated_pair(
|
||||||
|
preceded(tag("\\"), anychar),
|
||||||
|
tag("-"),
|
||||||
|
preceded(tag("\\"), anychar),
|
||||||
|
)(input)
|
||||||
|
.map(|(l, (a, b))| {
|
||||||
|
(l, {
|
||||||
|
let (start, end) = (u32::from(a), u32::from(b));
|
||||||
|
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_char_range_octal_left(input: &str) -> IResult<&str, Sequence> {
|
||||||
|
separated_pair(
|
||||||
|
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
||||||
|
tag("-"),
|
||||||
|
anychar,
|
||||||
|
)(input)
|
||||||
|
.map(|(l, (a, b))| {
|
||||||
|
(l, {
|
||||||
|
let (start, end) = (u32::from_str_radix(a, 8).unwrap(), u32::from(b));
|
||||||
|
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_char_range_octal_right(input: &str) -> IResult<&str, Sequence> {
|
||||||
|
separated_pair(
|
||||||
|
anychar,
|
||||||
|
tag("-"),
|
||||||
|
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
||||||
|
)(input)
|
||||||
|
.map(|(l, (a, b))| {
|
||||||
|
(l, {
|
||||||
|
let (start, end) = (u32::from(a), u32::from_str_radix(b, 8).unwrap());
|
||||||
|
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_char_range_octal_leftright(input: &str) -> IResult<&str, Sequence> {
|
||||||
|
separated_pair(
|
||||||
|
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
||||||
|
tag("-"),
|
||||||
|
preceded(tag("\\"), recognize(many_m_n(1, 3, one_of("01234567")))),
|
||||||
|
)(input)
|
||||||
|
.map(|(l, (a, b))| {
|
||||||
|
(l, {
|
||||||
|
let (start, end) = (
|
||||||
|
u32::from_str_radix(a, 8).unwrap(),
|
||||||
|
u32::from_str_radix(b, 8).unwrap(),
|
||||||
|
);
|
||||||
|
Sequence::CharRange(Box::new((start..=end).filter_map(std::char::from_u32)))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_char_star(input: &str) -> IResult<&str, Sequence> {
|
fn parse_char_star(input: &str) -> IResult<&str, Sequence> {
|
||||||
delimited(tag("["), anychar, tag("*]"))(input).map(|(l, c)| (l, Sequence::CharStar(c)))
|
delimited(tag("["), anychar, tag("*]"))(input).map(|(l, c)| (l, Sequence::CharStar(c)))
|
||||||
}
|
}
|
||||||
|
@ -261,6 +324,7 @@ pub trait SymbolTranslator {
|
||||||
fn translate(&mut self, current: char) -> Option<char>;
|
fn translate(&mut self, current: char) -> Option<char>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct DeleteOperation {
|
pub struct DeleteOperation {
|
||||||
set: Vec<char>,
|
set: Vec<char>,
|
||||||
complement_flag: bool,
|
complement_flag: bool,
|
||||||
|
@ -285,7 +349,7 @@ impl SymbolTranslator for DeleteOperation {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug)]
|
||||||
pub struct TranslateOperationComplement {
|
pub struct TranslateOperationComplement {
|
||||||
iter: u32,
|
iter: u32,
|
||||||
set1: Vec<char>,
|
set1: Vec<char>,
|
||||||
|
@ -306,7 +370,7 @@ impl TranslateOperationComplement {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug)]
|
||||||
pub struct TranslateOperationStandard {
|
pub struct TranslateOperationStandard {
|
||||||
translation_map: HashMap<char, char>,
|
translation_map: HashMap<char, char>,
|
||||||
}
|
}
|
||||||
|
@ -322,15 +386,21 @@ impl TranslateOperationStandard {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug)]
|
||||||
pub enum TranslateOperation {
|
pub enum TranslateOperation {
|
||||||
Standard(TranslateOperationStandard),
|
Standard(TranslateOperationStandard),
|
||||||
Complement(TranslateOperationComplement),
|
Complement(TranslateOperationComplement),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TranslateOperation {
|
impl TranslateOperation {
|
||||||
fn next_complement_char(mut iter: u32) -> (u32, char) {
|
fn next_complement_char(mut iter: u32, ignore_list: &[char]) -> (u32, char) {
|
||||||
while char::from_u32(iter).is_none() {
|
while (char::from_u32(iter).is_none()
|
||||||
|
|| ignore_list
|
||||||
|
.iter()
|
||||||
|
.map(|c| u32::from(*c))
|
||||||
|
.any(|c| iter.eq(&c)))
|
||||||
|
&& iter.ne(&u32::MAX)
|
||||||
|
{
|
||||||
iter = iter.saturating_add(1)
|
iter = iter.saturating_add(1)
|
||||||
}
|
}
|
||||||
(iter.saturating_add(1), char::from_u32(iter).unwrap())
|
(iter.saturating_add(1), char::from_u32(iter).unwrap())
|
||||||
|
@ -392,7 +462,7 @@ impl SymbolTranslator for TranslateOperation {
|
||||||
while translation_map.get(¤t).is_none() {
|
while translation_map.get(¤t).is_none() {
|
||||||
if let Some(p) = set2.pop() {
|
if let Some(p) = set2.pop() {
|
||||||
let (next_index, next_value) =
|
let (next_index, next_value) =
|
||||||
TranslateOperation::next_complement_char(*iter);
|
TranslateOperation::next_complement_char(*iter, &*set1);
|
||||||
*iter = next_index;
|
*iter = next_index;
|
||||||
translation_map.insert(next_value, p);
|
translation_map.insert(next_value, p);
|
||||||
} else {
|
} else {
|
||||||
|
@ -466,7 +536,7 @@ impl SymbolTranslator for SqueezeOperation {
|
||||||
|
|
||||||
pub fn translate_input<T, R, W>(input: &mut R, output: &mut W, mut translator: T)
|
pub fn translate_input<T, R, W>(input: &mut R, output: &mut W, mut translator: T)
|
||||||
where
|
where
|
||||||
T: SymbolTranslator,
|
T: SymbolTranslator + Debug,
|
||||||
R: BufRead,
|
R: BufRead,
|
||||||
W: Write,
|
W: Write,
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue