diff --git a/src/uucore/src/lib/features/format/escape.rs b/src/uucore/src/lib/features/format/escape.rs index b8c21741c..1e06a8176 100644 --- a/src/uucore/src/lib/features/format/escape.rs +++ b/src/uucore/src/lib/features/format/escape.rs @@ -1,6 +1,7 @@ #[derive(Debug)] pub enum EscapedChar { - Char(u8), + Byte(u8), + Char(char), Backslash(u8), End, } @@ -61,6 +62,24 @@ fn parse_code(input: &mut &[u8], base: Base) -> Option { Some(ret) } +/// Parse `\uHHHH` and `\UHHHHHHHH` +// TODO: This should print warnings and possibly halt execution when it fails to parse +// TODO: If the character cannot be converted to u32, the input should be printed. +fn parse_unicode(input: &mut &[u8], digits: u8) -> Option { + let (c, rest) = input.split_first()?; + let mut ret = Base::Hex.to_digit(*c)? as u32; + *input = &rest[..]; + + for _ in 1..digits { + let (c, rest) = input.split_first()?; + let n = Base::Hex.to_digit(*c)?; + ret = ret.wrapping_mul(Base::Hex as u32).wrapping_add(n as u32); + *input = &rest[..]; + } + + char::from_u32(ret) +} + pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar { if let [c, new_rest @ ..] = rest { // This is for the \NNN syntax for octal sequences. @@ -68,33 +87,35 @@ pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar { // would be the \0NNN syntax. if let b'1'..=b'7' = c { if let Some(parsed) = parse_code(rest, Base::Oct) { - return EscapedChar::Char(parsed); + return EscapedChar::Byte(parsed); } } *rest = &new_rest[..]; match c { - b'\\' => EscapedChar::Char(b'\\'), - b'a' => EscapedChar::Char(b'\x07'), - b'b' => EscapedChar::Char(b'\x08'), + b'\\' => EscapedChar::Byte(b'\\'), + b'a' => EscapedChar::Byte(b'\x07'), + b'b' => EscapedChar::Byte(b'\x08'), b'c' => return EscapedChar::End, - b'e' => EscapedChar::Char(b'\x1b'), - b'f' => EscapedChar::Char(b'\x0c'), - b'n' => EscapedChar::Char(b'\n'), - b'r' => EscapedChar::Char(b'\r'), - b't' => EscapedChar::Char(b'\t'), - b'v' => EscapedChar::Char(b'\x0b'), + b'e' => EscapedChar::Byte(b'\x1b'), + b'f' => EscapedChar::Byte(b'\x0c'), + b'n' => EscapedChar::Byte(b'\n'), + b'r' => EscapedChar::Byte(b'\r'), + b't' => EscapedChar::Byte(b'\t'), + b'v' => EscapedChar::Byte(b'\x0b'), b'x' => { if let Some(c) = parse_code(rest, Base::Hex) { - EscapedChar::Char(c) + EscapedChar::Byte(c) } else { EscapedChar::Backslash(b'x') } } - b'0' => EscapedChar::Char(parse_code(rest, Base::Oct).unwrap_or(b'\0')), + b'0' => EscapedChar::Byte(parse_code(rest, Base::Oct).unwrap_or(b'\0')), + b'u' => EscapedChar::Char(parse_unicode(rest, 4).unwrap_or('\0')), + b'U' => EscapedChar::Char(parse_unicode(rest, 8).unwrap_or('\0')), c => EscapedChar::Backslash(*c), } } else { - EscapedChar::Char(b'\\') + EscapedChar::Byte(b'\\') } } diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 47e6fa4cc..cfa9a034f 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -19,11 +19,12 @@ // spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -mod escape; mod argument; +mod escape; pub mod num_format; mod spec; +pub use argument::*; use spec::Spec; use std::{ error::Error, @@ -31,7 +32,6 @@ use std::{ io::{stdout, Write}, ops::ControlFlow, }; -pub use argument::*; use crate::error::UError; @@ -91,9 +91,12 @@ impl FormatChar for u8 { impl FormatChar for EscapedChar { fn write(&self, mut writer: impl Write) -> std::io::Result> { match self { - EscapedChar::Char(c) => { + EscapedChar::Byte(c) => { writer.write(&[*c])?; } + EscapedChar::Char(c) => { + write!(writer, "{c}")?; + } EscapedChar::Backslash(c) => { writer.write(&[b'\\', *c])?; } @@ -125,7 +128,7 @@ pub fn parse_spec_and_escape( [] => return None, [b'%', b'%', rest @ ..] => { current = rest; - Some(Ok(FormatItem::Char(EscapedChar::Char(b'%')))) + Some(Ok(FormatItem::Char(EscapedChar::Byte(b'%')))) } [b'%', rest @ ..] => { current = rest; @@ -141,7 +144,7 @@ pub fn parse_spec_and_escape( } [c, rest @ ..] => { current = rest; - Some(Ok(FormatItem::Char(EscapedChar::Char(*c)))) + Some(Ok(FormatItem::Char(EscapedChar::Byte(*c)))) } }) } @@ -179,7 +182,7 @@ fn parse_escape_only(fmt: &[u8]) -> impl Iterator + '_ { } [c, rest @ ..] => { current = rest; - Some(EscapedChar::Char(*c)) + Some(EscapedChar::Byte(*c)) } }) }