mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
printf: basic support for unicode escape sequences
This commit is contained in:
parent
066d8ba73d
commit
68d036c9a2
2 changed files with 44 additions and 20 deletions
|
@ -1,6 +1,7 @@
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum EscapedChar {
|
pub enum EscapedChar {
|
||||||
Char(u8),
|
Byte(u8),
|
||||||
|
Char(char),
|
||||||
Backslash(u8),
|
Backslash(u8),
|
||||||
End,
|
End,
|
||||||
}
|
}
|
||||||
|
@ -61,6 +62,24 @@ fn parse_code(input: &mut &[u8], base: Base) -> Option<u8> {
|
||||||
Some(ret)
|
Some(ret)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse `\uHHHH` and `\UHHHHHHHH`
|
||||||
|
// TODO: This should print warnings and possibly halt execution when it fails to parse
|
||||||
|
// TODO: If the character cannot be converted to u32, the input should be printed.
|
||||||
|
fn parse_unicode(input: &mut &[u8], digits: u8) -> Option<char> {
|
||||||
|
let (c, rest) = input.split_first()?;
|
||||||
|
let mut ret = Base::Hex.to_digit(*c)? as u32;
|
||||||
|
*input = &rest[..];
|
||||||
|
|
||||||
|
for _ in 1..digits {
|
||||||
|
let (c, rest) = input.split_first()?;
|
||||||
|
let n = Base::Hex.to_digit(*c)?;
|
||||||
|
ret = ret.wrapping_mul(Base::Hex as u32).wrapping_add(n as u32);
|
||||||
|
*input = &rest[..];
|
||||||
|
}
|
||||||
|
|
||||||
|
char::from_u32(ret)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar {
|
pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar {
|
||||||
if let [c, new_rest @ ..] = rest {
|
if let [c, new_rest @ ..] = rest {
|
||||||
// This is for the \NNN syntax for octal sequences.
|
// This is for the \NNN syntax for octal sequences.
|
||||||
|
@ -68,33 +87,35 @@ pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar {
|
||||||
// would be the \0NNN syntax.
|
// would be the \0NNN syntax.
|
||||||
if let b'1'..=b'7' = c {
|
if let b'1'..=b'7' = c {
|
||||||
if let Some(parsed) = parse_code(rest, Base::Oct) {
|
if let Some(parsed) = parse_code(rest, Base::Oct) {
|
||||||
return EscapedChar::Char(parsed);
|
return EscapedChar::Byte(parsed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*rest = &new_rest[..];
|
*rest = &new_rest[..];
|
||||||
match c {
|
match c {
|
||||||
b'\\' => EscapedChar::Char(b'\\'),
|
b'\\' => EscapedChar::Byte(b'\\'),
|
||||||
b'a' => EscapedChar::Char(b'\x07'),
|
b'a' => EscapedChar::Byte(b'\x07'),
|
||||||
b'b' => EscapedChar::Char(b'\x08'),
|
b'b' => EscapedChar::Byte(b'\x08'),
|
||||||
b'c' => return EscapedChar::End,
|
b'c' => return EscapedChar::End,
|
||||||
b'e' => EscapedChar::Char(b'\x1b'),
|
b'e' => EscapedChar::Byte(b'\x1b'),
|
||||||
b'f' => EscapedChar::Char(b'\x0c'),
|
b'f' => EscapedChar::Byte(b'\x0c'),
|
||||||
b'n' => EscapedChar::Char(b'\n'),
|
b'n' => EscapedChar::Byte(b'\n'),
|
||||||
b'r' => EscapedChar::Char(b'\r'),
|
b'r' => EscapedChar::Byte(b'\r'),
|
||||||
b't' => EscapedChar::Char(b'\t'),
|
b't' => EscapedChar::Byte(b'\t'),
|
||||||
b'v' => EscapedChar::Char(b'\x0b'),
|
b'v' => EscapedChar::Byte(b'\x0b'),
|
||||||
b'x' => {
|
b'x' => {
|
||||||
if let Some(c) = parse_code(rest, Base::Hex) {
|
if let Some(c) = parse_code(rest, Base::Hex) {
|
||||||
EscapedChar::Char(c)
|
EscapedChar::Byte(c)
|
||||||
} else {
|
} else {
|
||||||
EscapedChar::Backslash(b'x')
|
EscapedChar::Backslash(b'x')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
b'0' => EscapedChar::Char(parse_code(rest, Base::Oct).unwrap_or(b'\0')),
|
b'0' => EscapedChar::Byte(parse_code(rest, Base::Oct).unwrap_or(b'\0')),
|
||||||
|
b'u' => EscapedChar::Char(parse_unicode(rest, 4).unwrap_or('\0')),
|
||||||
|
b'U' => EscapedChar::Char(parse_unicode(rest, 8).unwrap_or('\0')),
|
||||||
c => EscapedChar::Backslash(*c),
|
c => EscapedChar::Backslash(*c),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
EscapedChar::Char(b'\\')
|
EscapedChar::Byte(b'\\')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,11 +19,12 @@
|
||||||
|
|
||||||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||||
|
|
||||||
mod escape;
|
|
||||||
mod argument;
|
mod argument;
|
||||||
|
mod escape;
|
||||||
pub mod num_format;
|
pub mod num_format;
|
||||||
mod spec;
|
mod spec;
|
||||||
|
|
||||||
|
pub use argument::*;
|
||||||
use spec::Spec;
|
use spec::Spec;
|
||||||
use std::{
|
use std::{
|
||||||
error::Error,
|
error::Error,
|
||||||
|
@ -31,7 +32,6 @@ use std::{
|
||||||
io::{stdout, Write},
|
io::{stdout, Write},
|
||||||
ops::ControlFlow,
|
ops::ControlFlow,
|
||||||
};
|
};
|
||||||
pub use argument::*;
|
|
||||||
|
|
||||||
use crate::error::UError;
|
use crate::error::UError;
|
||||||
|
|
||||||
|
@ -91,9 +91,12 @@ impl FormatChar for u8 {
|
||||||
impl FormatChar for EscapedChar {
|
impl FormatChar for EscapedChar {
|
||||||
fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
|
fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
|
||||||
match self {
|
match self {
|
||||||
EscapedChar::Char(c) => {
|
EscapedChar::Byte(c) => {
|
||||||
writer.write(&[*c])?;
|
writer.write(&[*c])?;
|
||||||
}
|
}
|
||||||
|
EscapedChar::Char(c) => {
|
||||||
|
write!(writer, "{c}")?;
|
||||||
|
}
|
||||||
EscapedChar::Backslash(c) => {
|
EscapedChar::Backslash(c) => {
|
||||||
writer.write(&[b'\\', *c])?;
|
writer.write(&[b'\\', *c])?;
|
||||||
}
|
}
|
||||||
|
@ -125,7 +128,7 @@ pub fn parse_spec_and_escape(
|
||||||
[] => return None,
|
[] => return None,
|
||||||
[b'%', b'%', rest @ ..] => {
|
[b'%', b'%', rest @ ..] => {
|
||||||
current = rest;
|
current = rest;
|
||||||
Some(Ok(FormatItem::Char(EscapedChar::Char(b'%'))))
|
Some(Ok(FormatItem::Char(EscapedChar::Byte(b'%'))))
|
||||||
}
|
}
|
||||||
[b'%', rest @ ..] => {
|
[b'%', rest @ ..] => {
|
||||||
current = rest;
|
current = rest;
|
||||||
|
@ -141,7 +144,7 @@ pub fn parse_spec_and_escape(
|
||||||
}
|
}
|
||||||
[c, rest @ ..] => {
|
[c, rest @ ..] => {
|
||||||
current = rest;
|
current = rest;
|
||||||
Some(Ok(FormatItem::Char(EscapedChar::Char(*c))))
|
Some(Ok(FormatItem::Char(EscapedChar::Byte(*c))))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -179,7 +182,7 @@ fn parse_escape_only(fmt: &[u8]) -> impl Iterator<Item = EscapedChar> + '_ {
|
||||||
}
|
}
|
||||||
[c, rest @ ..] => {
|
[c, rest @ ..] => {
|
||||||
current = rest;
|
current = rest;
|
||||||
Some(EscapedChar::Char(*c))
|
Some(EscapedChar::Byte(*c))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue