From a3e68d5bbd3d063cac01f77e9870121791e24012 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Wed, 2 Aug 2023 23:57:26 +0200 Subject: [PATCH] uucore: start work on a completely new printf implementation --- src/uucore/Cargo.toml | 2 +- src/uucore/src/lib/features.rs | 6 +- src/uucore/src/lib/features/format/mod.rs | 144 +++++ src/uucore/src/lib/features/format/spec.rs | 523 ++++++++++++++++++ src/uucore/src/lib/features/memo.rs | 175 ------ src/uucore/src/lib/features/tokenize/mod.rs | 5 - .../tokenize/num_format/format_field.rs | 43 -- .../features/tokenize/num_format/formatter.rs | 59 -- .../num_format/formatters/base_conv/mod.rs | 270 --------- .../num_format/formatters/base_conv/tests.rs | 56 -- .../formatters/cninetyninehexfloatf.rs | 115 ---- .../tokenize/num_format/formatters/decf.rs | 185 ------- .../num_format/formatters/float_common.rs | 377 ------------- .../tokenize/num_format/formatters/floatf.rs | 43 -- .../tokenize/num_format/formatters/intf.rs | 282 ---------- .../tokenize/num_format/formatters/mod.rs | 9 - .../tokenize/num_format/formatters/scif.rs | 43 -- .../lib/features/tokenize/num_format/mod.rs | 4 - .../tokenize/num_format/num_format.rs | 271 --------- src/uucore/src/lib/features/tokenize/sub.rs | 452 --------------- src/uucore/src/lib/features/tokenize/token.rs | 39 -- .../lib/features/tokenize/unescaped_text.rs | 279 ---------- src/uucore/src/lib/lib.rs | 4 +- 23 files changed, 672 insertions(+), 2714 deletions(-) create mode 100644 src/uucore/src/lib/features/format/mod.rs create mode 100644 src/uucore/src/lib/features/format/spec.rs delete mode 100644 src/uucore/src/lib/features/memo.rs delete mode 100644 src/uucore/src/lib/features/tokenize/mod.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/format_field.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatter.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/mod.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/tests.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/cninetyninehexfloatf.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/float_common.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/floatf.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/mod.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/formatters/scif.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/mod.rs delete mode 100644 src/uucore/src/lib/features/tokenize/num_format/num_format.rs delete mode 100644 src/uucore/src/lib/features/tokenize/sub.rs delete mode 100644 src/uucore/src/lib/features/tokenize/token.rs delete mode 100644 src/uucore/src/lib/features/tokenize/unescaped_text.rs diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index d376e807a..1c1d4c754 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -76,7 +76,7 @@ entries = ["libc"] fs = ["libc", "winapi-util", "windows-sys"] fsext = ["libc", "time", "windows-sys"] lines = [] -memo = ["itertools"] +format = ["itertools"] mode = ["libc"] perms = ["libc", "walkdir"] process = ["libc"] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index f8a8d2d10..fe4839987 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -8,14 +8,12 @@ pub mod fs; pub mod fsext; #[cfg(feature = "lines")] pub mod lines; -#[cfg(feature = "memo")] -pub mod memo; +#[cfg(feature = "format")] +pub mod format; #[cfg(feature = "ringbuffer")] pub mod ringbuffer; #[cfg(feature = "sum")] pub mod sum; -#[cfg(feature = "memo")] -mod tokenize; // * (platform-specific) feature-gated modules // ** non-windows (i.e. Unix + Fuchsia) diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs new file mode 100644 index 000000000..abd92011c --- /dev/null +++ b/src/uucore/src/lib/features/format/mod.rs @@ -0,0 +1,144 @@ +//! Main entry point for our implementation of printf. +//! +//! The [`printf`] and [`sprintf`] closely match the behavior of the +//! corresponding C functions: the former renders a formatted string +//! to stdout, the latter renders to a new [`String`] object. +//! +//! In addition to the [`printf`] and [`sprintf`] functions, we expose the +//! [`Format`] struct, which represents a parsed format string. This reduces +//! the need for parsing a format string multiple times and assures that no +//! parsing errors occur during writing. +// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety + +// mod num_format; +mod spec; + +use spec::Spec; +use std::io::{stdout, Write}; + +pub enum FormatError { + SpecError, + IoError(std::io::Error), + NoMoreArguments, + InvalidArgument(FormatArgument), +} + +/// A single item to format +enum FormatItem { + /// A format specifier + Spec(Spec), + /// Some plain text + Text(Vec), + /// A single character + /// + /// Added in addition to `Text` as an optimization. + Char(u8), +} + +pub enum FormatArgument { + Char(char), + String(String), + UnsignedInt(u64), + SignedInt(i64), + Float(f64), +} + +impl FormatItem { + fn write<'a>(&self, mut writer: impl Write, args: &mut impl Iterator) -> Result<(), FormatError> { + match self { + FormatItem::Spec(spec) => spec.write(writer, args), + FormatItem::Text(bytes) => writer.write_all(bytes).map_err(FormatError::IoError), + FormatItem::Char(char) => writer.write_all(&[*char]).map_err(FormatError::IoError), + } + } +} + +fn parse_iter(fmt: &[u8]) -> impl Iterator> + '_ { + let mut rest = fmt; + std::iter::from_fn(move || { + if rest.is_empty() { + return None; + } + + match rest.iter().position(|c| *c == b'%') { + None => { + let final_text = rest; + rest = &[]; + Some(Ok(FormatItem::Text(final_text.into()))) + } + Some(0) => { + // Handle the spec + rest = &rest[1..]; + match rest.get(0) { + None => Some(Ok(FormatItem::Char(b'%'))), + Some(b'%') => { + rest = &rest[1..]; + Some(Ok(FormatItem::Char(b'%'))) + } + Some(_) => { + let spec = match Spec::parse(&mut rest) { + Some(spec) => spec, + None => return Some(Err(FormatError::SpecError)), + }; + Some(Ok(FormatItem::Spec(spec))) + } + } + } + Some(i) => { + // The `after` slice includes the % so it will be handled correctly + // in the next iteration. + let (before, after) = rest.split_at(i); + rest = after; + return Some(Ok(FormatItem::Text(before.into()))); + } + } + }) +} + +/// Write a formatted string to stdout. +/// +/// `format_string` contains the template and `args` contains the +/// arguments to render into the template. +/// +/// See also [`sprintf`], which creates a new formatted [`String`]. +/// +/// # Examples +/// +/// ```rust +/// use uucore::format::printf; +/// +/// printf("hello %s", &["world".to_string()]).unwrap(); +/// // prints "hello world" +/// ``` +pub fn printf(format_string: &[u8], arguments: impl IntoIterator) -> Result<(), FormatError> { + printf_writer(stdout(), format_string, arguments) +} + +fn printf_writer(mut writer: impl Write, format_string: &[u8], args: impl IntoIterator) -> Result<(), FormatError> { + let mut args = args.into_iter(); + for item in parse_iter(format_string) { + item?.write(&mut writer, &mut args)?; + } + Ok(()) +} + +/// Create a new formatted string. +/// +/// `format_string` contains the template and `args` contains the +/// arguments to render into the template. +/// +/// See also [`printf`], which prints to stdout. +/// +/// # Examples +/// +/// ```rust +/// use uucore::format::sprintf; +/// +/// let s = sprintf("hello %s", &["world".to_string()]).unwrap(); +/// assert_eq!(s, "hello world".to_string()); +/// ``` +pub fn sprintf(format_string: &[u8], arguments: impl IntoIterator) -> Result, FormatError> { + let mut writer = Vec::new(); + printf_writer(&mut writer, format_string, arguments)?; + Ok(writer) +} diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs new file mode 100644 index 000000000..4319e44d9 --- /dev/null +++ b/src/uucore/src/lib/features/format/spec.rs @@ -0,0 +1,523 @@ +// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety + +use super::{FormatArgument, FormatError}; +use std::{fmt::Display, io::Write}; + +pub enum Spec { + Char { + width: Option>, + align_left: bool, + }, + String { + width: Option>, + align_left: bool, + }, + SignedInt { + width: Option>, + positive_sign: PositiveSign, + alignment: NumberAlignment, + }, + UnsignedInt { + variant: UnsignedIntVariant, + width: Option>, + alignment: NumberAlignment, + }, + Float { + variant: FloatVariant, + case: Case, + force_decimal: ForceDecimal, + width: Option>, + positive_sign: PositiveSign, + alignment: NumberAlignment, + precision: Option>, + }, +} + +#[derive(Clone, Copy)] +pub enum UnsignedIntVariant { + Decimal, + Octal(Prefix), + Hexadecimal(Case, Prefix), +} + +#[derive(Clone, Copy)] + +pub enum FloatVariant { + Decimal, + Scientific, + Shortest, + Hexadecimal, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Case { + Lowercase, + Uppercase, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Prefix { + No, + Yes, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum ForceDecimal { + No, + Yes, +} + +#[derive(Clone, Copy)] +pub enum PositiveSign { + None, + Plus, + Space, +} + +#[derive(Clone, Copy)] +pub enum NumberAlignment { + Left, + RightSpace, + RightZero, +} + +/// Precision and width specified might use an asterisk to indicate that they are +/// determined by an argument. +#[derive(Clone, Copy)] +pub enum CanAsterisk { + Fixed(T), + Asterisk, +} + +/// Size of the expected type (ignored) +/// +/// We ignore this parameter entirely, but we do parse it. +/// It could be used in the future if the need arises. +enum Length { + /// signed/unsigned char ("hh") + Char, + /// signed/unsigned short int ("h") + Short, + /// signed/unsigned long int ("l") + Long, + /// signed/unsigned long long int ("ll") + LongLong, + /// intmax_t ("j") + IntMaxT, + /// size_t ("z") + SizeT, + /// ptrdiff_t ("t") + PtfDiffT, + /// long double ("L") + LongDouble, +} + +impl Spec { + pub fn parse(rest: &mut &[u8]) -> Option { + // Based on the C++ reference, the spec format looks like: + // + // %[flags][width][.precision][length]specifier + // + // However, we have already parsed the '%'. + + let mut minus = false; + let mut plus = false; + let mut space = false; + let mut hash = false; + let mut zero = false; + + while let Some(x @ (b'-' | b'+' | b' ' | b'#' | b'0')) = rest.get(0) { + match x { + b'-' => minus = true, + b'+' => plus = true, + b' ' => space = true, + b'#' => hash = true, + b'0' => zero = true, + _ => unreachable!(), + } + *rest = &rest[1..] + } + + let width = eat_asterisk_or_number(rest); + + let precision = if let Some(b'.') = rest.get(0) { + Some(eat_asterisk_or_number(rest).unwrap_or(CanAsterisk::Fixed(0))) + } else { + None + }; + + let length = rest.get(0).and_then(|c| { + Some(match c { + b'h' => { + if let Some(b'h') = rest.get(1) { + *rest = &rest[1..]; + Length::Char + } else { + Length::Short + } + } + b'l' => { + if let Some(b'l') = rest.get(1) { + *rest = &rest[1..]; + Length::Long + } else { + Length::LongLong + } + } + b'j' => Length::IntMaxT, + b'z' => Length::SizeT, + b't' => Length::PtfDiffT, + b'L' => Length::LongDouble, + _ => return None, + }) + }); + + if length.is_some() { + *rest = &rest[1..]; + } + + Some(match rest.get(0)? { + b'c' => Spec::Char { + width, + align_left: minus, + }, + b's' => Spec::String { + width, + align_left: minus, + }, + b'd' | b'i' => Spec::SignedInt { + width, + alignment: match (minus, zero) { + (true, _) => NumberAlignment::Left, + (false, true) => NumberAlignment::RightZero, + (false, false) => NumberAlignment::RightSpace, + }, + positive_sign: match (plus, space) { + (true, _) => PositiveSign::Plus, + (false, true) => PositiveSign::Space, + (false, false) => PositiveSign::None, + }, + }, + c @ (b'u' | b'o' | b'x' | b'X') => { + let prefix = match hash { + false => Prefix::No, + true => Prefix::Yes, + }; + let alignment = match (minus, zero) { + (true, _) => NumberAlignment::Left, + (false, true) => NumberAlignment::RightZero, + (false, false) => NumberAlignment::RightSpace, + }; + let variant = match c { + b'u' => UnsignedIntVariant::Decimal, + b'o' => UnsignedIntVariant::Octal(prefix), + b'x' => UnsignedIntVariant::Hexadecimal(Case::Lowercase, prefix), + b'X' => UnsignedIntVariant::Hexadecimal(Case::Uppercase, prefix), + _ => unreachable!(), + }; + Spec::UnsignedInt { + variant, + width, + alignment, + } + } + c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Spec::Float { + width, + precision, + variant: match c { + b'f' | b'F' => FloatVariant::Decimal, + b'e' | b'E' => FloatVariant::Scientific, + b'g' | b'G' => FloatVariant::Shortest, + b'a' | b'A' => FloatVariant::Hexadecimal, + _ => unreachable!(), + }, + force_decimal: match hash { + false => ForceDecimal::No, + true => ForceDecimal::Yes, + }, + case: match c.is_ascii_uppercase() { + false => Case::Lowercase, + true => Case::Uppercase, + }, + alignment: match (minus, zero) { + (true, _) => NumberAlignment::Left, + (false, true) => NumberAlignment::RightZero, + (false, false) => NumberAlignment::RightSpace, + }, + positive_sign: match (plus, space) { + (true, _) => PositiveSign::Plus, + (false, true) => PositiveSign::Space, + (false, false) => PositiveSign::None, + }, + }, + _ => return None, + }) + } + + pub fn write<'a>( + &self, + mut writer: impl Write, + mut args: impl Iterator, + ) -> Result<(), FormatError> { + match self { + &Spec::Char { width, align_left } => { + let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + let arg = next_arg(&mut args)?; + match arg { + FormatArgument::Char(c) => write_padded(writer, c, width, false, align_left), + _ => Err(FormatError::InvalidArgument(arg)), + } + } + &Spec::String { width, align_left } => { + let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + let arg = next_arg(&mut args)?; + match arg { + FormatArgument::String(s) => write_padded(writer, s, width, false, align_left), + _ => Err(FormatError::InvalidArgument(arg)), + } + } + &Spec::SignedInt { + width, + positive_sign, + alignment, + } => { + let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + + let arg = next_arg(&mut args)?; + let FormatArgument::SignedInt(i) = arg else { + return Err(FormatError::InvalidArgument(arg)); + }; + + if i >= 0 { + match positive_sign { + PositiveSign::None => Ok(()), + PositiveSign::Plus => write!(writer, "+"), + PositiveSign::Space => write!(writer, " "), + } + .map_err(FormatError::IoError)?; + } + + match alignment { + NumberAlignment::Left => write!(writer, "{i: write!(writer, "{i:>width$}"), + NumberAlignment::RightZero => write!(writer, "{i:0>width$}"), + } + .map_err(FormatError::IoError) + } + &Spec::UnsignedInt { + variant, + width, + alignment, + } => { + let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + + let arg = next_arg(args)?; + let FormatArgument::SignedInt(i) = arg else { + return Err(FormatError::InvalidArgument(arg)); + }; + + let s = match variant { + UnsignedIntVariant::Decimal => format!("{i}"), + UnsignedIntVariant::Octal(Prefix::No) => format!("{i:o}"), + UnsignedIntVariant::Octal(Prefix::Yes) => format!("{i:#o}"), + UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::No) => { + format!("{i:x}") + } + UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::Yes) => { + format!("{i:#x}") + } + UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::No) => { + format!("{i:X}") + } + UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::Yes) => { + format!("{i:#X}") + } + }; + + match alignment { + NumberAlignment::Left => write!(writer, "{s: write!(writer, "{s:>width$}"), + NumberAlignment::RightZero => write!(writer, "{s:0>width$}"), + } + .map_err(FormatError::IoError) + } + &Spec::Float { + variant, + case, + force_decimal, + width, + positive_sign, + alignment, + precision, + } => { + let width = resolve_asterisk(width, &mut args)?.unwrap_or(0); + let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6); + + let arg = next_arg(args)?; + let FormatArgument::Float(f) = arg else { + return Err(FormatError::InvalidArgument(arg)); + }; + + match positive_sign { + PositiveSign::None => Ok(()), + PositiveSign::Plus => write!(writer, "+"), + PositiveSign::Space => write!(writer, " "), + } + .map_err(FormatError::IoError)?; + + let s = match variant { + FloatVariant::Decimal => format_float_decimal(f, precision, case, force_decimal), + FloatVariant::Scientific => { + format_float_scientific(f, precision, case, force_decimal) + } + FloatVariant::Shortest => format_float_shortest(f, precision, case, force_decimal), + FloatVariant::Hexadecimal => todo!(), + }; + + match alignment { + NumberAlignment::Left => write!(writer, "{s: write!(writer, "{s:>width$}"), + NumberAlignment::RightZero => write!(writer, "{s:0>width$}"), + } + .map_err(FormatError::IoError) + } + } + } +} + +fn format_float_decimal( + f: f64, + precision: usize, + case: Case, + force_decimal: ForceDecimal, +) -> String { + if !f.is_finite() { + let mut s = format!("{f}"); + if case == Case::Lowercase { + s.make_ascii_uppercase(); + } + return s; + } + + if precision == 0 && force_decimal == ForceDecimal::Yes { + format!("{f:.0}.") + } else { + format!("{f:.*}", precision) + } +} + +fn format_float_scientific( + f: f64, + precision: usize, + case: Case, + force_decimal: ForceDecimal, +) -> String { + // If the float is NaN, -Nan, Inf or -Inf, format like any other float + if !f.is_finite() { + let mut s = format!("{f}"); + if case == Case::Lowercase { + s.make_ascii_uppercase(); + } + return s; + } + + let exponent: i32 = f.log10().floor() as i32; + let normalized = f / 10.0_f64.powi(exponent); + + let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal { + "." + } else { + "" + }; + + let exp_char = match case { + Case::Lowercase => 'e', + Case::Uppercase => 'E', + }; + + format!( + "{normalized:.*}{additional_dot}{exp_char}{exponent:+03}", + precision + ) +} + +// TODO: This could be optimized. It's not terribly important though. +fn format_float_shortest( + f: f64, + precision: usize, + case: Case, + force_decimal: ForceDecimal, +) -> String { + let a = format_float_decimal(f, precision, case, force_decimal); + let b = format_float_scientific(f, precision, case, force_decimal); + + if a.len() > b.len() { + b + } else { + a + } +} + +fn resolve_asterisk( + option: Option>, + args: impl Iterator, +) -> Result, FormatError> { + Ok(match option { + None => None, + Some(CanAsterisk::Asterisk) => { + let arg = next_arg(args)?; + match arg { + FormatArgument::UnsignedInt(u) => match usize::try_from(u) { + Ok(u) => Some(u), + Err(_) => return Err(FormatError::InvalidArgument(arg)), + }, + _ => return Err(FormatError::InvalidArgument(arg)), + } + } + Some(CanAsterisk::Fixed(w)) => Some(w), + }) +} + +fn next_arg( + mut arguments: impl Iterator, +) -> Result { + arguments.next().ok_or(FormatError::NoMoreArguments) +} + +fn write_padded( + mut writer: impl Write, + text: impl Display, + width: usize, + pad_zero: bool, + left: bool, +) -> Result<(), FormatError> { + match (left, pad_zero) { + (false, false) => write!(writer, "{text: >width$}"), + (false, true) => write!(writer, "{text:0>width$}"), + // 0 is ignored if we pad left. + (true, _) => write!(writer, "{text: Option> { + if let Some(b'*') = rest.get(0) { + *rest = &rest[1..]; + Some(CanAsterisk::Asterisk) + } else { + eat_number(rest).map(CanAsterisk::Fixed) + } +} + +fn eat_number(rest: &mut &[u8]) -> Option { + match rest.iter().position(|b| !b.is_ascii_digit()) { + None | Some(0) => None, + Some(i) => { + // TODO: This might need to handle errors better + // For example in case of overflow. + let parsed = std::str::from_utf8(&rest[..i]).unwrap().parse().unwrap(); + *rest = &rest[i..]; + Some(parsed) + } + } +} diff --git a/src/uucore/src/lib/features/memo.rs b/src/uucore/src/lib/features/memo.rs deleted file mode 100644 index 47d04f5b8..000000000 --- a/src/uucore/src/lib/features/memo.rs +++ /dev/null @@ -1,175 +0,0 @@ -//! Main entry point for our implementation of printf. -//! -//! The [`printf`] and [`sprintf`] closely match the behavior of the -//! corresponding C functions: the former renders a formatted string -//! to stdout, the latter renders to a new [`String`] object. -use crate::display::Quotable; -use crate::error::{UResult, USimpleError}; -use crate::features::tokenize::sub::SubParser; -use crate::features::tokenize::token::Token; -use crate::features::tokenize::unescaped_text::UnescapedText; -use crate::show_warning; -use itertools::put_back_n; -use std::io::{stdout, Cursor, Write}; -use std::iter::Peekable; -use std::slice::Iter; - -/// Memo runner of printf -/// Takes a format string and arguments -/// 1. tokenize format string into tokens, consuming -/// any subst. arguments along the way. -/// 2. feeds remaining arguments into function -/// that prints tokens. -struct Memo { - tokens: Vec, -} - -fn warn_excess_args(first_arg: &str) { - show_warning!( - "ignoring excess arguments, starting with {}", - first_arg.quote() - ); -} - -impl Memo { - fn new( - writer: &mut W, - pf_string: &str, - pf_args_it: &mut Peekable>, - ) -> UResult - where - W: Write, - { - let mut pm = Self { tokens: Vec::new() }; - let mut it = put_back_n(pf_string.chars()); - let mut has_sub = false; - loop { - if let Some(x) = UnescapedText::from_it_core(writer, &mut it, false) { - pm.tokens.push(x); - } - if let Some(x) = SubParser::from_it(writer, &mut it, pf_args_it)? { - if !has_sub { - has_sub = true; - } - pm.tokens.push(x); - } - if let Some(x) = it.next() { - it.put_back(x); - } else { - break; - } - } - if !has_sub { - let mut drain = false; - if let Some(first_arg) = pf_args_it.peek() { - warn_excess_args(first_arg); - drain = true; - } - if drain { - loop { - // drain remaining args; - if pf_args_it.next().is_none() { - break; - } - } - } - } - Ok(pm) - } - fn apply(&self, writer: &mut W, pf_args_it: &mut Peekable>) - where - W: Write, - { - for tkn in &self.tokens { - tkn.write(writer, pf_args_it); - } - } - fn run_all(writer: &mut W, pf_string: &str, pf_args: &[String]) -> UResult<()> - where - W: Write, - { - let mut arg_it = pf_args.iter().peekable(); - let pm = Self::new(writer, pf_string, &mut arg_it)?; - loop { - if arg_it.peek().is_none() { - return Ok(()); - } - pm.apply(writer, &mut arg_it); - } - } -} - -/// Write a formatted string to stdout. -/// -/// `format_string` contains the template and `args` contains the -/// arguments to render into the template. -/// -/// See also [`sprintf`], which creates a new formatted [`String`]. -/// -/// # Examples -/// -/// ```rust -/// use uucore::memo::printf; -/// -/// printf("hello %s", &["world".to_string()]).unwrap(); -/// // prints "hello world" -/// ``` -pub fn printf(format_string: &str, args: &[String]) -> UResult<()> { - let mut writer = stdout(); - Memo::run_all(&mut writer, format_string, args) -} - -/// Create a new formatted string. -/// -/// `format_string` contains the template and `args` contains the -/// arguments to render into the template. -/// -/// See also [`printf`], which prints to stdout. -/// -/// # Examples -/// -/// ```rust -/// use uucore::memo::sprintf; -/// -/// let s = sprintf("hello %s", &["world".to_string()]).unwrap(); -/// assert_eq!(s, "hello world".to_string()); -/// ``` -pub fn sprintf(format_string: &str, args: &[String]) -> UResult { - let mut writer = Cursor::new(vec![]); - Memo::run_all(&mut writer, format_string, args)?; - let buf = writer.into_inner(); - match String::from_utf8(buf) { - Ok(s) => Ok(s), - Err(e) => Err(USimpleError::new( - 1, - format!("failed to parse formatted string as UTF-8: {e}"), - )), - } -} - -#[cfg(test)] -mod tests { - - use crate::memo::sprintf; - - #[test] - fn test_sprintf_smoke() { - assert_eq!(sprintf("", &[]).unwrap(), "".to_string()); - } - - #[test] - fn test_sprintf_no_args() { - assert_eq!( - sprintf("hello world", &[]).unwrap(), - "hello world".to_string() - ); - } - - #[test] - fn test_sprintf_string() { - assert_eq!( - sprintf("hello %s", &["world".to_string()]).unwrap(), - "hello world".to_string() - ); - } -} diff --git a/src/uucore/src/lib/features/tokenize/mod.rs b/src/uucore/src/lib/features/tokenize/mod.rs deleted file mode 100644 index dfe44a0e5..000000000 --- a/src/uucore/src/lib/features/tokenize/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -#[allow(clippy::module_inception)] -mod num_format; -pub mod sub; -pub mod token; -pub mod unescaped_text; diff --git a/src/uucore/src/lib/features/tokenize/num_format/format_field.rs b/src/uucore/src/lib/features/tokenize/num_format/format_field.rs deleted file mode 100644 index 02998cde5..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/format_field.rs +++ /dev/null @@ -1,43 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety - -//! Primitives used by Sub Tokenizer -//! and num_format modules -#[derive(Clone)] -pub enum FieldType { - Strf, - Floatf, - CninetyNineHexFloatf, - Scif, - Decf, - Intf, - Charf, -} - -// #[allow(non_camel_case_types)] -// pub enum FChar { -// d, -// e, -// E, -// i, -// f, -// F, -// g, -// G, -// u, -// x, -// X, -// o -// } -// - -// a Sub Tokens' fields are stored -// as a single object so they can be more simply -// passed by ref to num_format in a Sub method -#[derive(Clone)] -pub struct FormatField<'a> { - pub min_width: Option, - pub second_field: Option, - pub field_char: &'a char, - pub field_type: &'a FieldType, - pub orig: &'a String, -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatter.rs b/src/uucore/src/lib/features/tokenize/num_format/formatter.rs deleted file mode 100644 index ed7d5a0f6..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatter.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Primitives used by num_format and sub_modules. -//! never dealt with above (e.g. Sub Tokenizer never uses these) - -use crate::{display::Quotable, show_error}; -use itertools::{put_back_n, PutBackN}; -use std::str::Chars; - -use super::format_field::FormatField; - -// contains the rough ingredients to final -// output for a number, organized together -// to allow for easy generalization of output manipulation -// (e.g. max number of digits after decimal) -#[derive(Default)] -pub struct FormatPrimitive { - pub prefix: Option, - pub pre_decimal: Option, - pub post_decimal: Option, - pub suffix: Option, -} - -#[derive(Clone, PartialEq, Eq)] -pub enum Base { - Ten = 10, - Hex = 16, - Octal = 8, -} - -// information from the beginning of a numeric argument -// the precedes the beginning of a numeric value -pub struct InitialPrefix { - pub radix_in: Base, - pub sign: i8, - pub offset: usize, -} - -pub trait Formatter { - // return a FormatPrimitive for - // particular field char(s), given the argument - // string and prefix information (sign, radix) - fn get_primitive( - &self, - field: &FormatField, - in_prefix: &InitialPrefix, - str_in: &str, - ) -> Option; - // return a string from a FormatPrimitive, - // given information about the field - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String; -} -pub fn get_it_at(offset: usize, str_in: &str) -> PutBackN { - put_back_n(str_in[offset..].chars()) -} - -// TODO: put this somewhere better -pub fn warn_incomplete_conv(pf_arg: &str) { - // important: keep println here not print - show_error!("{}: value not completely converted", pf_arg.maybe_quote()); -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/mod.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/mod.rs deleted file mode 100644 index 3df9f7129..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/mod.rs +++ /dev/null @@ -1,270 +0,0 @@ -// spell-checker:ignore (ToDO) arrnum arr_num mult basenum bufferval refd vals arrfloat conv intermed addl - -pub fn arrnum_int_mult(arr_num: &[u8], basenum: u8, base_ten_int_fact: u8) -> Vec { - let mut carry: u16 = 0; - let mut rem: u16; - let mut new_amount: u16; - let fact: u16 = u16::from(base_ten_int_fact); - let base: u16 = u16::from(basenum); - - let mut ret_rev: Vec = Vec::new(); - let mut it = arr_num.iter().rev(); - loop { - let i = it.next(); - match i { - Some(u) => { - new_amount = (u16::from(*u) * fact) + carry; - rem = new_amount % base; - carry = (new_amount - rem) / base; - ret_rev.push(rem as u8); - } - None => { - while carry != 0 { - rem = carry % base; - carry = (carry - rem) / base; - ret_rev.push(rem as u8); - } - break; - } - } - } - let ret: Vec = ret_rev.into_iter().rev().collect(); - ret -} - -#[allow(dead_code)] -pub struct Remainder<'a> { - pub position: usize, - pub replace: Vec, - pub arr_num: &'a Vec, -} - -#[allow(dead_code)] -pub struct DivOut<'a> { - pub quotient: u8, - pub remainder: Remainder<'a>, -} - -#[allow(dead_code)] -pub fn arrnum_int_div_step<'a>( - rem_in: &'a Remainder, - radix_in: u8, - base_ten_int_divisor: u8, - after_decimal: bool, -) -> DivOut<'a> { - let mut rem_out = Remainder { - position: rem_in.position, - replace: Vec::new(), - arr_num: rem_in.arr_num, - }; - - let mut bufferval: u16 = 0; - let base: u16 = u16::from(radix_in); - let divisor: u16 = u16::from(base_ten_int_divisor); - let mut traversed = 0; - - let mut quotient = 0; - let refd_vals = &rem_in.arr_num[rem_in.position + rem_in.replace.len()..]; - let mut it_replace = rem_in.replace.iter(); - let mut it_f = refd_vals.iter(); - loop { - let u = match it_replace.next() { - Some(u_rep) => u16::from(*u_rep), - None => match it_f.next() { - Some(u_orig) => u16::from(*u_orig), - None => { - if !after_decimal { - break; - } - 0 - } - }, - }; - traversed += 1; - bufferval += u; - if bufferval > divisor { - while bufferval >= divisor { - quotient += 1; - bufferval -= divisor; - } - rem_out.replace = if bufferval == 0 { - Vec::new() - } else { - let remainder_as_arrnum = unsigned_to_arrnum(bufferval); - base_conv_vec(&remainder_as_arrnum, 10, radix_in) - }; - rem_out.position += 1 + (traversed - rem_out.replace.len()); - break; - } else { - bufferval *= base; - } - } - DivOut { - quotient, - remainder: rem_out, - } -} -pub fn arrnum_int_add(arrnum: &[u8], basenum: u8, base_ten_int_term: u8) -> Vec { - let mut carry: u16 = u16::from(base_ten_int_term); - let mut rem: u16; - let mut new_amount: u16; - let base: u16 = u16::from(basenum); - - let mut ret_rev: Vec = Vec::new(); - let mut it = arrnum.iter().rev(); - loop { - let i = it.next(); - match i { - Some(u) => { - new_amount = u16::from(*u) + carry; - rem = new_amount % base; - carry = (new_amount - rem) / base; - ret_rev.push(rem as u8); - } - None => { - while carry != 0 { - rem = carry % base; - carry = (carry - rem) / base; - ret_rev.push(rem as u8); - } - break; - } - } - } - let ret: Vec = ret_rev.into_iter().rev().collect(); - ret -} - -pub fn base_conv_vec(src: &[u8], radix_src: u8, radix_dest: u8) -> Vec { - let mut result = vec![0]; - for i in src { - result = arrnum_int_mult(&result, radix_dest, radix_src); - result = arrnum_int_add(&result, radix_dest, *i); - } - result -} - -#[allow(dead_code)] -pub fn unsigned_to_arrnum(src: u16) -> Vec { - let mut result: Vec = Vec::new(); - let mut src_tmp: u16 = src; - while src_tmp > 0 { - result.push((src_tmp % 10) as u8); - src_tmp /= 10; - } - result.reverse(); - result -} - -// temporary needs-improvement-function -pub fn base_conv_float(src: &[u8], radix_src: u8, _radix_dest: u8) -> f64 { - // it would require a lot of addl code - // to implement this for arbitrary string input. - // until then, the below operates as an outline - // of how it would work. - let mut factor: f64 = 1_f64; - let radix_src_float: f64 = f64::from(radix_src); - let mut r: f64 = 0_f64; - for (i, u) in src.iter().enumerate() { - if i > 15 { - break; - } - factor /= radix_src_float; - r += factor * f64::from(*u); - } - r -} - -pub fn str_to_arrnum(src: &str, radix_def_src: &dyn RadixDef) -> Vec { - let mut intermed_in: Vec = Vec::new(); - for c in src.chars() { - #[allow(clippy::single_match)] - match radix_def_src.parse_char(c) { - Some(u) => { - intermed_in.push(u); - } - None => {} //todo err msg on incorrect - } - } - intermed_in -} - -pub fn arrnum_to_str(src: &[u8], radix_def_dest: &dyn RadixDef) -> String { - let mut str_out = String::new(); - for u in src.iter() { - #[allow(clippy::single_match)] - match radix_def_dest.format_u8(*u) { - Some(c) => { - str_out.push(c); - } - None => {} //todo - } - } - str_out -} - -pub fn base_conv_str( - src: &str, - radix_def_src: &dyn RadixDef, - radix_def_dest: &dyn RadixDef, -) -> String { - let intermed_in: Vec = str_to_arrnum(src, radix_def_src); - let intermed_out = base_conv_vec( - &intermed_in, - radix_def_src.get_max(), - radix_def_dest.get_max(), - ); - arrnum_to_str(&intermed_out, radix_def_dest) -} - -pub trait RadixDef { - fn get_max(&self) -> u8; - fn parse_char(&self, x: char) -> Option; - fn format_u8(&self, x: u8) -> Option; -} -pub struct RadixTen; - -const ZERO_ASC: u8 = b'0'; -const UPPER_A_ASC: u8 = b'A'; -const LOWER_A_ASC: u8 = b'a'; - -impl RadixDef for RadixTen { - fn get_max(&self) -> u8 { - 10 - } - fn parse_char(&self, c: char) -> Option { - match c { - '0'..='9' => Some(c as u8 - ZERO_ASC), - _ => None, - } - } - fn format_u8(&self, u: u8) -> Option { - match u { - 0..=9 => Some((ZERO_ASC + u) as char), - _ => None, - } - } -} -pub struct RadixHex; -impl RadixDef for RadixHex { - fn get_max(&self) -> u8 { - 16 - } - fn parse_char(&self, c: char) -> Option { - match c { - '0'..='9' => Some(c as u8 - ZERO_ASC), - 'A'..='F' => Some(c as u8 + 10 - UPPER_A_ASC), - 'a'..='f' => Some(c as u8 + 10 - LOWER_A_ASC), - _ => None, - } - } - fn format_u8(&self, u: u8) -> Option { - match u { - 0..=9 => Some((ZERO_ASC + u) as char), - 10..=15 => Some((UPPER_A_ASC + (u - 10)) as char), - _ => None, - } - } -} - -mod tests; diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/tests.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/tests.rs deleted file mode 100644 index 903a3faf1..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/base_conv/tests.rs +++ /dev/null @@ -1,56 +0,0 @@ -// spell-checker:ignore (ToDO) arrnum mult - -#[cfg(test)] -use super::*; - -#[test] -fn test_arrnum_int_mult() { - // (in base 10) 12 * 4 = 48 - let factor: Vec = vec![1, 2]; - let base_num = 10; - let base_ten_int_fact: u8 = 4; - let should_output: Vec = vec![4, 8]; - - let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact); - assert!(product == should_output); -} - -#[test] -fn test_arrnum_int_non_base_10() { - // (in base 3) - // 5 * 4 = 20 - let factor: Vec = vec![1, 2]; - let base_num = 3; - let base_ten_int_fact: u8 = 4; - let should_output: Vec = vec![2, 0, 2]; - - let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact); - assert!(product == should_output); -} - -#[test] -fn test_arrnum_int_div_short_circuit() { - // ( - let arrnum: Vec = vec![5, 5, 5, 5, 0]; - let base_num = 10; - let base_ten_int_divisor: u8 = 41; - let remainder_passed_in = Remainder { - position: 1, - replace: vec![1, 3], - arr_num: &arrnum, - }; - - // the "replace" should mean the number being divided - // is 1350, the first time you can get 41 to go into - // 1350, its at 135, where you can get a quotient of - // 3 and a remainder of 12; - - let quotient_should_be: u8 = 3; - let remainder_position_should_be: usize = 3; - let remainder_replace_should_be = vec![1, 2]; - - let result = arrnum_int_div_step(&remainder_passed_in, base_num, base_ten_int_divisor, false); - assert!(quotient_should_be == result.quotient); - assert!(remainder_position_should_be == result.remainder.position); - assert!(remainder_replace_should_be == result.remainder.replace); -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/cninetyninehexfloatf.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/cninetyninehexfloatf.rs deleted file mode 100644 index a5c51153e..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/cninetyninehexfloatf.rs +++ /dev/null @@ -1,115 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -// spell-checker:ignore (ToDO) arrnum - -//! formatter for %a %F C99 Hex-floating-point subs -use super::super::format_field::FormatField; -use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; -use super::base_conv; -use super::base_conv::RadixDef; -use super::float_common::{primitive_to_str_common, FloatAnalysis}; - -#[derive(Default)] -pub struct CninetyNineHexFloatf { - #[allow(dead_code)] - as_num: f64, -} -impl CninetyNineHexFloatf { - pub fn new() -> Self { - Self::default() - } -} - -impl Formatter for CninetyNineHexFloatf { - fn get_primitive( - &self, - field: &FormatField, - initial_prefix: &InitialPrefix, - str_in: &str, - ) -> Option { - let second_field = field.second_field.unwrap_or(6) + 1; - let analysis = FloatAnalysis::analyze( - str_in, - initial_prefix, - Some(second_field as usize), - None, - true, - ); - let f = get_primitive_hex( - initial_prefix, - &str_in[initial_prefix.offset..], - &analysis, - second_field as usize, - *field.field_char == 'A', - ); - Some(f) - } - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { - primitive_to_str_common(prim, &field) - } -} - -// c99 hex has unique requirements of all floating point subs in pretty much every part of building a primitive, from prefix and suffix to need for base conversion (in all other cases if you don't have decimal you must have decimal, here it's the other way around) - -// on the todo list is to have a trait for get_primitive that is implemented by each float formatter and can override a default. when that happens we can take the parts of get_primitive_dec specific to dec and spin them out to their own functions that can be overridden. -fn get_primitive_hex( - initial_prefix: &InitialPrefix, - _str_in: &str, - _analysis: &FloatAnalysis, - _last_dec_place: usize, - capitalized: bool, -) -> FormatPrimitive { - let prefix = Some(String::from(if initial_prefix.sign == -1 { - "-0x" - } else { - "0x" - })); - - // TODO actual conversion, make sure to get back mantissa. - // for hex to hex, it's really just a matter of moving the - // decimal point and calculating the mantissa by its initial - // position and its moves, with every position counting for - // the addition or subtraction of 4 (2**4, because 4 bits in a hex digit) - // to the exponent. - // decimal's going to be a little more complicated. correct simulation - // of glibc will require after-decimal division to a specified precision. - // the difficult part of this (arrnum_int_div_step) is already implemented. - - // the hex float name may be a bit misleading in terms of how to go about the - // conversion. The best way to do it is to just convert the float number - // directly to base 2 and then at the end translate back to hex. - let mantissa = 0; - let suffix = Some({ - let ind = if capitalized { "P" } else { "p" }; - if mantissa >= 0 { - format!("{ind}+{mantissa}") - } else { - format!("{ind}{mantissa}") - } - }); - FormatPrimitive { - prefix, - suffix, - ..Default::default() - } -} - -#[allow(dead_code)] -fn to_hex(src: &str, before_decimal: bool) -> String { - let radix_ten = base_conv::RadixTen; - let radix_hex = base_conv::RadixHex; - if before_decimal { - base_conv::base_conv_str(src, &radix_ten, &radix_hex) - } else { - let as_arrnum_ten = base_conv::str_to_arrnum(src, &radix_ten); - let s = format!( - "{}", - base_conv::base_conv_float(&as_arrnum_ten, radix_ten.get_max(), radix_hex.get_max()) - ); - if s.len() > 2 { - String::from(&s[2..]) - } else { - // zero - s - } - } -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs deleted file mode 100644 index 2ee53882e..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/decf.rs +++ /dev/null @@ -1,185 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety - -//! formatter for %g %G decimal subs -use super::super::format_field::FormatField; -use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; -use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis}; - -const SIGNIFICANT_FIGURES: usize = 6; - -// Parse a numeric string as the nearest integer with a given significance. -// This is a helper function for round(). -// Examples: -// round_to_significance("456", 1) == 500 -// round_to_significance("456", 2) == 460 -// round_to_significance("456", 9) == 456 -fn round_to_significance(input: &str, significant_figures: usize) -> u32 { - if significant_figures < input.len() { - // If the input has too many digits, use a float intermediary - // to round it before converting to an integer. Otherwise, - // converting straight to integer will truncate. - // There might be a cleaner way to do this... - let digits = &input[..significant_figures + 1]; - let float_representation = digits.parse::().unwrap(); - (float_representation / 10.0).round() as u32 - } else { - input.parse::().unwrap_or(0) - } -} - -// Removing trailing zeroes, expressing the result as an integer where -// possible. This is a helper function for round(). -fn truncate(mut format: FormatPrimitive) -> FormatPrimitive { - if let Some(ref post_dec) = format.post_decimal { - let trimmed = post_dec.trim_end_matches('0'); - - if trimmed.is_empty() { - // If there are no nonzero digits after the decimal point, - // use integer formatting by clearing post_decimal and suffix. - format.post_decimal = Some(String::new()); - if format.suffix == Some("e+00".into()) { - format.suffix = Some(String::new()); - } - } else if trimmed.len() != post_dec.len() { - // Otherwise, update the format to remove only the trailing - // zeroes (e.g. "4.50" becomes "4.5", not "4"). If there were - // no trailing zeroes, do nothing. - format.post_decimal = Some(trimmed.to_owned()); - } - } - format -} - -// Round a format to six significant figures and remove trailing zeroes. -fn round(mut format: FormatPrimitive) -> FormatPrimitive { - let mut significant_digits_remaining = SIGNIFICANT_FIGURES; - - // First, take as many significant digits as possible from pre_decimal, - if format.pre_decimal.is_some() { - let input = format.pre_decimal.as_ref().unwrap(); - let rounded = round_to_significance(input, significant_digits_remaining); - let mut rounded_str = rounded.to_string(); - significant_digits_remaining -= rounded_str.len(); - - // If the pre_decimal has exactly enough significant digits, - // round the input to the nearest integer. If the first - // post_decimal digit is 5 or higher, round up by incrementing - // the pre_decimal number. Otherwise, use the pre_decimal as-is. - if significant_digits_remaining == 0 { - if let Some(digits) = &format.post_decimal { - if digits.chars().next().unwrap_or('0') >= '5' { - let rounded = rounded + 1; - rounded_str = rounded.to_string(); - } - } - } - format.pre_decimal = Some(rounded_str); - } - - // If no significant digits remain, or there's no post_decimal to - // round, return the rounded pre_decimal value with no post_decimal. - // Otherwise, round the post_decimal to the remaining significance. - if significant_digits_remaining == 0 { - format.post_decimal = Some(String::new()); - } else if let Some(input) = format.post_decimal { - let leading_zeroes = input.len() - input.trim_start_matches('0').len(); - let digits = &input[leading_zeroes..]; - - // In the post_decimal, leading zeroes are significant. "01.0010" - // has one significant digit in pre_decimal, and 3 from post_decimal. - let mut post_decimal_str = String::with_capacity(significant_digits_remaining); - for _ in 0..leading_zeroes { - post_decimal_str.push('0'); - } - - if leading_zeroes < significant_digits_remaining { - // After significant leading zeroes, round the remaining digits - // to any remaining significance. - let rounded = round_to_significance(digits, significant_digits_remaining); - post_decimal_str.push_str(&rounded.to_string()); - } else if leading_zeroes == significant_digits_remaining - && digits.chars().next().unwrap_or('0') >= '5' - { - // If necessary, round up the post_decimal ("1.000009" should - // round to 1.00001, instead of truncating after the last - // significant leading zero). - post_decimal_str.pop(); - post_decimal_str.push('1'); - } else { - // If the rounded post_decimal is entirely zeroes, discard - // it and use integer formatting instead. - post_decimal_str = String::new(); - } - - format.post_decimal = Some(post_decimal_str); - } - truncate(format) -} - -// Given an exponent used in scientific notation, return whether the -// number is small enough to be expressed as a decimal instead. "Small -// enough" is based only on the number's magnitude, not the length of -// any string representation. -fn should_represent_as_decimal(suffix: &Option) -> bool { - match suffix { - Some(exponent) => { - if exponent.chars().nth(1) == Some('-') { - exponent < &"e-05".into() - } else { - exponent < &"e+06".into() - } - } - None => true, - } -} - -pub struct Decf; - -impl Decf { - pub fn new() -> Self { - Self - } -} -impl Formatter for Decf { - fn get_primitive( - &self, - field: &FormatField, - initial_prefix: &InitialPrefix, - str_in: &str, - ) -> Option { - let second_field = field.second_field.unwrap_or(6) + 1; - // default to scif interpretation so as to not truncate input vals - // (that would be displayed in scif) based on relation to decimal place - let analysis = FloatAnalysis::analyze( - str_in, - initial_prefix, - Some(second_field as usize + 1), - None, - false, - ); - let mut f_dec = get_primitive_dec( - initial_prefix, - &str_in[initial_prefix.offset..], - &analysis, - second_field as usize, - Some(*field.field_char == 'G'), - ); - - if should_represent_as_decimal(&f_dec.suffix) { - // Use decimal formatting instead of scientific notation - // if the input's magnitude is small. - f_dec = get_primitive_dec( - initial_prefix, - &str_in[initial_prefix.offset..], - &analysis, - second_field as usize, - None, - ); - } - - Some(round(f_dec)) - } - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { - primitive_to_str_common(prim, &field) - } -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/float_common.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/float_common.rs deleted file mode 100644 index e0a29217c..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/float_common.rs +++ /dev/null @@ -1,377 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -// spell-checker:ignore (ToDO) arrnum - -use super::super::format_field::FormatField; -use super::super::formatter::{ - get_it_at, warn_incomplete_conv, Base, FormatPrimitive, InitialPrefix, -}; -use super::base_conv; -use super::base_conv::RadixDef; - -// if the memory, copy, and comparison cost of chars -// becomes an issue, we can always operate in vec here -// rather than just at de_hex - -pub struct FloatAnalysis { - pub len_important: usize, - // none means no decimal point. - pub decimal_pos: Option, - pub follow: Option, -} -fn has_enough_digits( - hex_input: bool, - hex_output: bool, - string_position: usize, - starting_position: usize, - limit: usize, -) -> bool { - // -1s are for rounding - if hex_output { - if hex_input { - (string_position - 1) - starting_position >= limit - } else { - false //undecidable without converting - } - } else if hex_input { - (((string_position - 1) - starting_position) * 9) / 8 >= limit - } else { - (string_position - 1) - starting_position >= limit - } -} - -impl FloatAnalysis { - #[allow(clippy::cognitive_complexity)] - pub fn analyze( - str_in: &str, - initial_prefix: &InitialPrefix, - max_sd_opt: Option, - max_after_dec_opt: Option, - hex_output: bool, - ) -> Self { - // this fn assumes - // the input string - // has no leading spaces or 0s - let str_it = get_it_at(initial_prefix.offset, str_in); - let mut ret = Self { - len_important: 0, - decimal_pos: None, - follow: None, - }; - let hex_input = match initial_prefix.radix_in { - Base::Hex => true, - Base::Ten => false, - Base::Octal => { - panic!("this should never happen: floats should never receive octal input"); - } - }; - let mut i = 0; - let mut pos_before_first_nonzero_after_decimal: Option = None; - for c in str_it { - match c { - e @ ('0'..='9' | 'A'..='F' | 'a'..='f') => { - if !hex_input { - match e { - '0'..='9' => {} - _ => { - warn_incomplete_conv(str_in); - break; - } - } - } - if ret.decimal_pos.is_some() - && pos_before_first_nonzero_after_decimal.is_none() - && e != '0' - { - pos_before_first_nonzero_after_decimal = Some(i - 1); - } - if let Some(max_sd) = max_sd_opt { - if i == max_sd { - // follow is used in cases of %g - // where the character right after the last - // sd is considered is rounded affecting - // the previous digit in 1/2 of instances - ret.follow = Some(e); - } else if ret.decimal_pos.is_some() && i > max_sd { - break; - } - } - if let Some(max_after_dec) = max_after_dec_opt { - if let Some(p) = ret.decimal_pos { - if has_enough_digits(hex_input, hex_output, i, p, max_after_dec) { - break; - } - } - } else if let Some(max_sd) = max_sd_opt { - if let Some(p) = pos_before_first_nonzero_after_decimal { - if has_enough_digits(hex_input, hex_output, i, p, max_sd) { - break; - } - } - } - } - '.' => { - if ret.decimal_pos.is_none() { - ret.decimal_pos = Some(i); - } else { - warn_incomplete_conv(str_in); - break; - } - } - _ => { - warn_incomplete_conv(str_in); - break; - } - }; - i += 1; - } - ret.len_important = i; - ret - } -} - -fn de_hex(src: &str, before_decimal: bool) -> String { - let radix_ten = base_conv::RadixTen; - let radix_hex = base_conv::RadixHex; - if before_decimal { - base_conv::base_conv_str(src, &radix_hex, &radix_ten) - } else { - let as_arrnum_hex = base_conv::str_to_arrnum(src, &radix_hex); - let s = format!( - "{}", - base_conv::base_conv_float(&as_arrnum_hex, radix_hex.get_max(), radix_ten.get_max()) - ); - if s.len() > 2 { - String::from(&s[2..]) - } else { - // zero - s - } - } -} - -// takes a string in, -// truncates to a position, -// bumps the last digit up one, -// and if the digit was nine -// propagate to the next, etc. -// If before the decimal and the most -// significant digit is a 9, it becomes a 1 -fn _round_str_from(in_str: &str, position: usize, before_dec: bool) -> (String, bool) { - let mut it = in_str[0..position].chars(); - let mut rev = String::new(); - let mut i = position; - let mut finished_in_dec = false; - while let Some(c) = it.next_back() { - i -= 1; - match c { - '9' => { - // If we're before the decimal - // and on the most significant digit, - // round 9 to 1, else to 0. - if before_dec && i == 0 { - rev.push('1'); - } else { - rev.push('0'); - } - } - e => { - rev.push(((e as u8) + 1) as char); - finished_in_dec = true; - break; - } - } - } - let mut fwd = String::from(&in_str[0..i]); - for ch in rev.chars().rev() { - fwd.push(ch); - } - (fwd, finished_in_dec) -} - -fn round_terminal_digit( - before_dec: String, - after_dec: String, - position: usize, -) -> (String, String, bool) { - if position < after_dec.len() { - let digit_at_pos: char; - { - digit_at_pos = after_dec[position..=position].chars().next().expect(""); - } - if let '5'..='9' = digit_at_pos { - let (new_after_dec, finished_in_dec) = _round_str_from(&after_dec, position, false); - if finished_in_dec { - return (before_dec, new_after_dec, false); - } else { - let (new_before_dec, _) = _round_str_from(&before_dec, before_dec.len(), true); - let mut dec_place_chg = false; - let mut before_dec_chars = new_before_dec.chars(); - if before_dec_chars.next() == Some('1') && before_dec_chars.all(|c| c == '0') { - // If the first digit is a one and remaining are zeros, we have - // rounded to a new decimal place, so the decimal place must be updated. - // Only update decimal place if the before decimal != 0 - dec_place_chg = before_dec != "0"; - } - return (new_before_dec, new_after_dec, dec_place_chg); - } - // TODO - } - } - (before_dec, after_dec, false) -} - -#[allow(clippy::cognitive_complexity)] -pub fn get_primitive_dec( - initial_prefix: &InitialPrefix, - str_in: &str, - analysis: &FloatAnalysis, - last_dec_place: usize, - sci_mode: Option, -) -> FormatPrimitive { - let mut f = FormatPrimitive::default(); - - // add negative sign section - if initial_prefix.sign == -1 { - f.prefix = Some(String::from("-")); - } - - // assign the digits before and after the decimal points - // to separate slices. If no digits after decimal point, - // assign 0 - let (mut first_segment_raw, second_segment_raw) = match analysis.decimal_pos { - Some(pos) => (&str_in[..pos], &str_in[pos + 1..]), - None => (str_in, "0"), - }; - if first_segment_raw.is_empty() { - first_segment_raw = "0"; - } - // convert to string, de_hexifying if input is in hex // spell-checker:disable-line - let (first_segment, second_segment) = match initial_prefix.radix_in { - Base::Hex => ( - de_hex(first_segment_raw, true), - de_hex(second_segment_raw, false), - ), - _ => ( - String::from(first_segment_raw), - String::from(second_segment_raw), - ), - }; - let (pre_dec_unrounded, post_dec_unrounded, mut mantissa) = if sci_mode.is_some() { - if first_segment.len() > 1 { - let mut post_dec = String::from(&first_segment[1..]); - post_dec.push_str(&second_segment); - ( - String::from(&first_segment[0..1]), - post_dec, - first_segment.len() as isize - 1, - ) - } else { - match first_segment - .chars() - .next() - .expect("float_common: no chars in first segment.") - { - '0' => { - let it = second_segment.chars().enumerate(); - let mut m: isize = 0; - let mut pre = String::from("0"); - let mut post = String::from("0"); - for (i, c) in it { - match c { - '0' => {} - _ => { - m = -((i as isize) + 1); - pre = String::from(&second_segment[i..=i]); - post = String::from(&second_segment[i + 1..]); - break; - } - } - } - (pre, post, m) - } - _ => (first_segment, second_segment, 0), - } - } - } else { - (first_segment, second_segment, 0) - }; - - let (pre_dec_draft, post_dec_draft, dec_place_chg) = - round_terminal_digit(pre_dec_unrounded, post_dec_unrounded, last_dec_place - 1); - f.post_decimal = Some(post_dec_draft); - if let Some(capitalized) = sci_mode { - let si_ind = if capitalized { 'E' } else { 'e' }; - // Increase the mantissa if we're adding a decimal place - if dec_place_chg { - mantissa += 1; - } - f.suffix = Some(if mantissa >= 0 { - format!("{si_ind}+{mantissa:02}") - } else { - // negative sign is considered in format!s - // leading zeroes - format!("{si_ind}{mantissa:03}") - }); - f.pre_decimal = Some(pre_dec_draft); - } else if dec_place_chg { - // We've rounded up to a new decimal place so append 0 - f.pre_decimal = Some(pre_dec_draft + "0"); - } else { - f.pre_decimal = Some(pre_dec_draft); - } - - f -} - -pub fn primitive_to_str_common(prim: &FormatPrimitive, field: &FormatField) -> String { - let mut final_str = String::new(); - if let Some(ref prefix) = prim.prefix { - final_str.push_str(prefix); - } - match prim.pre_decimal { - Some(ref pre_decimal) => { - final_str.push_str(pre_decimal); - } - None => { - panic!( - "error, format primitives provided to int, will, incidentally under correct \ - behavior, always have a pre_dec value." - ); - } - } - let decimal_places = field.second_field.unwrap_or(6); - match prim.post_decimal { - Some(ref post_decimal) => { - if !post_decimal.is_empty() && decimal_places > 0 { - final_str.push('.'); - let len_avail = post_decimal.len() as u32; - - if decimal_places >= len_avail { - // println!("dec {}, len avail {}", decimal_places, len_avail); - final_str.push_str(post_decimal); - - if *field.field_char != 'g' && *field.field_char != 'G' { - let diff = decimal_places - len_avail; - for _ in 0..diff { - final_str.push('0'); - } - } - } else { - // println!("printing to only {}", decimal_places); - final_str.push_str(&post_decimal[0..decimal_places as usize]); - } - } - } - None => { - panic!( - "error, format primitives provided to int, will, incidentally under correct \ - behavior, always have a pre_dec value." - ); - } - } - if let Some(ref suffix) = prim.suffix { - final_str.push_str(suffix); - } - - final_str -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/floatf.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/floatf.rs deleted file mode 100644 index cca2750dc..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/floatf.rs +++ /dev/null @@ -1,43 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -// spell-checker:ignore (ToDO) arrnum - -//! formatter for %f %F common-notation floating-point subs -use super::super::format_field::FormatField; -use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; -use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis}; - -#[derive(Default)] -pub struct Floatf; -impl Floatf { - pub fn new() -> Self { - Self - } -} -impl Formatter for Floatf { - fn get_primitive( - &self, - field: &FormatField, - initial_prefix: &InitialPrefix, - str_in: &str, - ) -> Option { - let second_field = field.second_field.unwrap_or(6) + 1; - let analysis = FloatAnalysis::analyze( - str_in, - initial_prefix, - None, - Some(second_field as usize), - false, - ); - let f = get_primitive_dec( - initial_prefix, - &str_in[initial_prefix.offset..], - &analysis, - second_field as usize, - None, - ); - Some(f) - } - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { - primitive_to_str_common(prim, &field) - } -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs deleted file mode 100644 index 0f6e78de6..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/intf.rs +++ /dev/null @@ -1,282 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety -// spell-checker:ignore (ToDO) arrnum - -//! formatter for unsigned and signed int subs -//! unsigned int: %X %x (hex u64) %o (octal u64) %u (base ten u64) -//! signed int: %i %d (both base ten i64) -use super::super::format_field::FormatField; -use super::super::formatter::{ - get_it_at, warn_incomplete_conv, Base, FormatPrimitive, Formatter, InitialPrefix, -}; -use std::i64; -use std::u64; - -#[derive(Default)] -pub struct Intf { - _a: u32, -} - -// see the Intf::analyze() function below -struct IntAnalysis { - check_past_max: bool, - past_max: bool, - is_zero: bool, - len_digits: u8, -} - -impl Intf { - pub fn new() -> Self { - Self::default() - } - // take a ref to argument string, and basic information - // about prefix (offset, radix, sign), and analyze string - // to gain the IntAnalysis information above - // check_past_max: true if the number *may* be above max, - // but we don't know either way. One of several reasons - // we may have to parse as int. - // past_max: true if the object is past max, false if not - // in the future we should probably combine these into an - // Option - // is_zero: true if number is zero, false otherwise - // len_digits: length of digits used to create the int - // important, for example, if we run into a non-valid character - #[allow(clippy::cognitive_complexity)] - fn analyze(str_in: &str, signed_out: bool, initial_prefix: &InitialPrefix) -> IntAnalysis { - // the maximum number of digits we could conceivably - // have before the decimal point without exceeding the - // max - let mut str_it = get_it_at(initial_prefix.offset, str_in); - let max_sd_in = if signed_out { - match initial_prefix.radix_in { - Base::Ten => 19, - Base::Octal => 21, - Base::Hex => 16, - } - } else { - match initial_prefix.radix_in { - Base::Ten => 20, - Base::Octal => 22, - Base::Hex => 16, - } - }; - let mut ret = IntAnalysis { - check_past_max: false, - past_max: false, - is_zero: false, - len_digits: 0, - }; - - // todo turn this to a while let now that we know - // no special behavior on EOI break - loop { - let c_opt = str_it.next(); - if let Some(c) = c_opt { - match c { - '0'..='9' | 'a'..='f' | 'A'..='F' => { - if ret.len_digits == 0 && c == '0' { - ret.is_zero = true; - } else if ret.is_zero { - ret.is_zero = false; - } - ret.len_digits += 1; - if ret.len_digits == max_sd_in { - if let Some(next_ch) = str_it.next() { - match next_ch { - '0'..='9' => { - ret.past_max = true; - } - _ => { - // force conversion - // to check if its above max. - // todo: spin out convert - // into fn, call it here to try - // read val, on Ok() - // save val for reuse later - // that way on same-base in and out - // we don't needlessly convert int - // to str, we can just copy it over. - ret.check_past_max = true; - str_it.put_back(next_ch); - } - } - if ret.past_max { - break; - } - } else { - ret.check_past_max = true; - } - } - } - _ => { - warn_incomplete_conv(str_in); - break; - } - } - } else { - // breaks on EOL - break; - } - } - ret - } - // get a FormatPrimitive of the maximum value for the field char - // and given sign - fn get_max(field_char: char, sign: i8) -> FormatPrimitive { - let mut fmt_primitive = FormatPrimitive::default(); - fmt_primitive.pre_decimal = Some(String::from(match field_char { - 'd' | 'i' => match sign { - 1 => "9223372036854775807", - _ => { - fmt_primitive.prefix = Some(String::from("-")); - "9223372036854775808" - } - }, - 'x' | 'X' => "ffffffffffffffff", - 'o' => "1777777777777777777777", - /* 'u' | */ _ => "18446744073709551615", - })); - fmt_primitive - } - // conv_from_segment contract: - // 1. takes - // - a string that begins with a non-zero digit, and proceeds - // with zero or more following digits until the end of the string - // - a radix to interpret those digits as - // - a char that communicates: - // whether to interpret+output the string as an i64 or u64 - // what radix to write the parsed number as. - // 2. parses it as a rust integral type - // 3. outputs FormatPrimitive with: - // - if the string falls within bounds: - // number parsed and written in the correct radix - // - if the string falls outside bounds: - // for i64 output, the int minimum or int max (depending on sign) - // for u64 output, the u64 max in the output radix - fn conv_from_segment( - segment: &str, - radix_in: Base, - field_char: char, - sign: i8, - ) -> FormatPrimitive { - match field_char { - 'i' | 'd' => match i64::from_str_radix(segment, radix_in as u32) { - Ok(i) => { - let mut fmt_prim = FormatPrimitive::default(); - if sign == -1 { - fmt_prim.prefix = Some(String::from("-")); - } - fmt_prim.pre_decimal = Some(format!("{i}")); - fmt_prim - } - Err(_) => Self::get_max(field_char, sign), - }, - _ => match u64::from_str_radix(segment, radix_in as u32) { - Ok(u) => { - let mut fmt_prim = FormatPrimitive::default(); - let u_f = if sign == -1 { u64::MAX - (u - 1) } else { u }; - fmt_prim.pre_decimal = Some(match field_char { - 'X' => format!("{u_f:X}"), - 'x' => format!("{u_f:x}"), - 'o' => format!("{u_f:o}"), - _ => format!("{u_f}"), - }); - fmt_prim - } - Err(_) => Self::get_max(field_char, sign), - }, - } - } -} -impl Formatter for Intf { - fn get_primitive( - &self, - field: &FormatField, - initial_prefix: &InitialPrefix, - str_in: &str, - ) -> Option { - let begin = initial_prefix.offset; - - // get information about the string. see Intf::Analyze - // def above. - let convert_hints = Self::analyze( - str_in, - *field.field_char == 'i' || *field.field_char == 'd', - initial_prefix, - ); - // We always will have a format primitive to return - Some(if convert_hints.len_digits == 0 || convert_hints.is_zero { - // if non-digit or end is reached before a non-zero digit - FormatPrimitive { - pre_decimal: Some(String::from("0")), - ..Default::default() - } - } else if !convert_hints.past_max { - // if the number is or may be below the bounds limit - let radix_out = match *field.field_char { - 'd' | 'i' | 'u' => Base::Ten, - 'x' | 'X' => Base::Hex, - /* 'o' | */ _ => Base::Octal, - }; - let radix_mismatch = !radix_out.eq(&initial_prefix.radix_in); - let decrease_from_max: bool = initial_prefix.sign == -1 && *field.field_char != 'i'; - let end = begin + convert_hints.len_digits as usize; - - // convert to int if any one of these is true: - // - number of digits in int indicates it may be past max - // - we're subtracting from the max - // - we're converting the base - if convert_hints.check_past_max || decrease_from_max || radix_mismatch { - // radix of in and out is the same. - let segment = String::from(&str_in[begin..end]); - Self::conv_from_segment( - &segment, - initial_prefix.radix_in.clone(), - *field.field_char, - initial_prefix.sign, - ) - } else { - // otherwise just do a straight string copy. - let mut fmt_prim = FormatPrimitive::default(); - - // this is here and not earlier because - // zero doesn't get a sign, and conv_from_segment - // creates its format primitive separately - if initial_prefix.sign == -1 && *field.field_char == 'i' { - fmt_prim.prefix = Some(String::from("-")); - } - fmt_prim.pre_decimal = Some(String::from(&str_in[begin..end])); - fmt_prim - } - } else { - Self::get_max(*field.field_char, initial_prefix.sign) - }) - } - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { - let mut final_str: String = String::new(); - if let Some(ref prefix) = prim.prefix { - final_str.push_str(prefix); - } - // integral second fields is zero-padded minimum-width - // which gets handled before general minimum-width - match prim.pre_decimal { - Some(ref pre_decimal) => { - if let Some(min) = field.second_field { - let mut i = min; - let len = pre_decimal.len() as u32; - while i > len { - final_str.push('0'); - i -= 1; - } - } - final_str.push_str(pre_decimal); - } - None => { - panic!( - "error, format primitives provided to int, will, incidentally under \ - correct behavior, always have a pre_dec value." - ); - } - } - final_str - } -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/mod.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/mod.rs deleted file mode 100644 index e23230071..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety - -mod base_conv; -pub mod cninetyninehexfloatf; -pub mod decf; -mod float_common; -pub mod floatf; -pub mod intf; -pub mod scif; diff --git a/src/uucore/src/lib/features/tokenize/num_format/formatters/scif.rs b/src/uucore/src/lib/features/tokenize/num_format/formatters/scif.rs deleted file mode 100644 index c871dc4e5..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/formatters/scif.rs +++ /dev/null @@ -1,43 +0,0 @@ -// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety - -//! formatter for %e %E scientific notation subs -use super::super::format_field::FormatField; -use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix}; -use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis}; - -#[derive(Default)] -pub struct Scif; - -impl Scif { - pub fn new() -> Self { - Self - } -} -impl Formatter for Scif { - fn get_primitive( - &self, - field: &FormatField, - initial_prefix: &InitialPrefix, - str_in: &str, - ) -> Option { - let second_field = field.second_field.unwrap_or(6) + 1; - let analysis = FloatAnalysis::analyze( - str_in, - initial_prefix, - Some(second_field as usize + 1), - None, - false, - ); - let f = get_primitive_dec( - initial_prefix, - &str_in[initial_prefix.offset..], - &analysis, - second_field as usize, - Some(*field.field_char == 'E'), - ); - Some(f) - } - fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String { - primitive_to_str_common(prim, &field) - } -} diff --git a/src/uucore/src/lib/features/tokenize/num_format/mod.rs b/src/uucore/src/lib/features/tokenize/num_format/mod.rs deleted file mode 100644 index d40cf92de..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub mod format_field; -mod formatter; -mod formatters; -pub mod num_format; diff --git a/src/uucore/src/lib/features/tokenize/num_format/num_format.rs b/src/uucore/src/lib/features/tokenize/num_format/num_format.rs deleted file mode 100644 index c9b1178b6..000000000 --- a/src/uucore/src/lib/features/tokenize/num_format/num_format.rs +++ /dev/null @@ -1,271 +0,0 @@ -// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety - -//! handles creating printed output for numeric substitutions - -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety - -use std::env; -use std::vec::Vec; - -use crate::display::Quotable; -use crate::{show_error, show_warning}; - -use super::format_field::{FieldType, FormatField}; -use super::formatter::{Base, FormatPrimitive, Formatter, InitialPrefix}; -use super::formatters::cninetyninehexfloatf::CninetyNineHexFloatf; -use super::formatters::decf::Decf; -use super::formatters::floatf::Floatf; -use super::formatters::intf::Intf; -use super::formatters::scif::Scif; - -pub fn warn_expected_numeric(pf_arg: &str) { - // important: keep println here not print - show_error!("{}: expected a numeric value", pf_arg.maybe_quote()); -} - -// when character constant arguments have excess characters -// issue a warning when POSIXLY_CORRECT is not set -fn warn_char_constant_ign(remaining_bytes: &[u8]) { - match env::var("POSIXLY_CORRECT") { - Ok(_) => {} - Err(e) => { - if let env::VarError::NotPresent = e { - show_warning!( - "{:?}: character(s) following character \ - constant have been ignored", - remaining_bytes - ); - } - } - } -} - -// this function looks at the first few -// characters of an argument and returns a value if we can learn -// a value from that (e.g. no argument? return 0, char constant? ret value) -fn get_provided(str_in_opt: Option<&String>) -> Option { - const C_S_QUOTE: u8 = 39; - const C_D_QUOTE: u8 = 34; - match str_in_opt { - Some(str_in) => { - let mut byte_it = str_in.bytes(); - if let Some(ch) = byte_it.next() { - match ch { - C_S_QUOTE | C_D_QUOTE => { - Some(match byte_it.next() { - Some(second_byte) => { - let mut ignored: Vec = Vec::new(); - for cont in byte_it { - ignored.push(cont); - } - if !ignored.is_empty() { - warn_char_constant_ign(&ignored); - } - second_byte - } - // no byte after quote - None => { - let so_far = (ch as char).to_string(); - warn_expected_numeric(&so_far); - 0_u8 - } - }) - } - // first byte is not quote - _ => None, // no first byte - } - } else { - Some(0_u8) - } - } - None => Some(0), - } -} - -// takes a string and returns -// a sign, -// a base, -// and an offset for index after all -// initial spacing, sign, base prefix, and leading zeroes -#[allow(clippy::cognitive_complexity)] -fn get_initial_prefix(str_in: &str, field_type: &FieldType) -> InitialPrefix { - let mut str_it = str_in.chars(); - let mut ret = InitialPrefix { - radix_in: Base::Ten, - sign: 1, - offset: 0, - }; - let mut top_char = str_it.next(); - // skip spaces and ensure top_char is the first non-space char - // (or None if none exists) - while let Some(' ') = top_char { - ret.offset += 1; - top_char = str_it.next(); - } - // parse sign - match top_char { - Some('+') => { - ret.offset += 1; - top_char = str_it.next(); - } - Some('-') => { - ret.sign = -1; - ret.offset += 1; - top_char = str_it.next(); - } - _ => {} - } - // we want to exit with offset being - // the index of the first non-zero - // digit before the decimal point or - // if there is none, the zero before the - // decimal point, or, if there is none, - // the decimal point. - - // while we are determining the offset - // we will ensure as a convention - // the offset is always on the first character - // that we are yet unsure if it is the - // final offset. If the zero could be before - // a decimal point we don't move past the zero. - let mut is_hex = false; - if Some('0') == top_char { - if let Some(base) = str_it.next() { - // lead zeroes can only exist in - // octal and hex base - let mut do_clean_lead_zeroes = false; - match base { - 'x' | 'X' => { - is_hex = true; - ret.offset += 2; - ret.radix_in = Base::Hex; - do_clean_lead_zeroes = true; - } - e @ '0'..='9' => { - ret.offset += 1; - if let FieldType::Intf = *field_type { - ret.radix_in = Base::Octal; - } - if e == '0' { - do_clean_lead_zeroes = true; - } - } - _ => {} - } - if do_clean_lead_zeroes { - let mut first = true; - for ch_zero in str_it { - // see notes on offset above: - // this is why the offset for octal and decimal numbers - // that reach this branch is 1 even though - // they have already eaten the characters '00' - // this is also why when hex encounters its - // first zero it does not move its offset - // forward because it does not know for sure - // that it's current offset (of that zero) - // is not the final offset, - // whereas at that point octal knows its - // current offset is not the final offset. - match ch_zero { - '0' => { - if !(is_hex && first) { - ret.offset += 1; - } - } - // if decimal, keep last zero if one exists - // (it's possible for last zero to - // not exist at this branch if we're in hex input) - '.' => break, - // other digit, etc. - _ => { - if !(is_hex && first) { - ret.offset += 1; - } - break; - } - } - if first { - first = false; - } - } - } - } - } - ret -} - -// this is the function a Sub's print will delegate to -// if it is a numeric field, passing the field details -// and an iterator to the argument -pub fn num_format(field: &FormatField, in_str_opt: Option<&String>) -> Option { - let field_char = field.field_char; - - // num format mainly operates by further delegating to one of - // several Formatter structs depending on the field - // see formatter.rs for more details - - // to do switch to static dispatch - let formatter: Box = match *field.field_type { - FieldType::Intf => Box::new(Intf::new()), - FieldType::Floatf => Box::new(Floatf::new()), - FieldType::CninetyNineHexFloatf => Box::new(CninetyNineHexFloatf::new()), - FieldType::Scif => Box::new(Scif::new()), - FieldType::Decf => Box::new(Decf::new()), - _ => { - panic!("asked to do num format with non-num field type"); - } - }; - let prim_opt= - // if we can get an assumed value from looking at the first - // few characters, use that value to create the FormatPrimitive - if let Some(provided_num) = get_provided(in_str_opt) { - let mut tmp = FormatPrimitive::default(); - match field_char { - 'u' | 'i' | 'd' => { - tmp.pre_decimal = Some( - format!("{provided_num}")); - }, - 'x' | 'X' => { - tmp.pre_decimal = Some( - format!("{provided_num:x}")); - }, - 'o' => { - tmp.pre_decimal = Some( - format!("{provided_num:o}")); - }, - 'e' | 'E' | 'g' | 'G' => { - let as_str = format!("{provided_num}"); - let initial_prefix = get_initial_prefix( - &as_str, - field.field_type - ); - tmp=formatter.get_primitive(field, &initial_prefix, &as_str) - .expect("err during default provided num"); - }, - _ => { - tmp.pre_decimal = Some( - format!("{provided_num}")); - tmp.post_decimal = Some(String::from("0")); - } - } - Some(tmp) - } else { - // otherwise we'll interpret the argument as a number - // using the appropriate Formatter - let in_str = in_str_opt.expect( - "please send the devs this message: - \n get_provided is failing to ret as Some(0) on no str "); - // first get information about the beginning of the - // numeric argument that would be useful for - // any formatter (int or float) - let initial_prefix = get_initial_prefix( - in_str, - field.field_type - ); - // then get the FormatPrimitive from the Formatter - formatter.get_primitive(field, &initial_prefix, in_str) - }; - // if we have a formatPrimitive, print its results - // according to the field-char appropriate Formatter - prim_opt.map(|prim| formatter.primitive_to_str(&prim, field.clone())) -} diff --git a/src/uucore/src/lib/features/tokenize/sub.rs b/src/uucore/src/lib/features/tokenize/sub.rs deleted file mode 100644 index 5bdb24dc6..000000000 --- a/src/uucore/src/lib/features/tokenize/sub.rs +++ /dev/null @@ -1,452 +0,0 @@ -// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety - -//! Sub is a token that represents a -//! segment of the format string that is a substitution -//! it is created by Sub's implementation of the Tokenizer trait -//! Subs which have numeric field chars make use of the num_format -//! submodule -use crate::error::{UError, UResult}; -use itertools::{put_back_n, PutBackN}; -use std::error::Error; -use std::fmt::Display; -use std::io::Write; -use std::iter::Peekable; -use std::process::exit; -use std::slice::Iter; -use std::str::Chars; -// use std::collections::HashSet; - -use super::num_format::format_field::{FieldType, FormatField}; -use super::num_format::num_format; -use super::token; -use super::unescaped_text::UnescapedText; - -const EXIT_ERR: i32 = 1; - -#[derive(Debug)] -pub enum SubError { - InvalidSpec(String), -} - -impl Display for SubError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - match self { - Self::InvalidSpec(s) => write!(f, "%{s}: invalid conversion specification"), - } - } -} - -impl Error for SubError {} - -impl UError for SubError {} - -fn convert_asterisk_arg_int(asterisk_arg: &str) -> isize { - // this is a costly way to parse the - // args used for asterisk values into integers - // from various bases. Actually doing it correctly - // (going through the pipeline to intf, but returning - // the integer instead of writing it to string and then - // back) is on the refactoring TODO - let field_type = FieldType::Intf; - let field_char = 'i'; - let field_info = FormatField { - min_width: Some(0), - second_field: Some(0), - orig: &asterisk_arg.to_string(), - field_type: &field_type, - field_char: &field_char, - }; - num_format::num_format(&field_info, Some(&asterisk_arg.to_string())) - .unwrap() - .parse::() - .unwrap() -} - -pub enum CanAsterisk { - Fixed(T), - Asterisk, -} - -// Sub is a tokenizer which creates tokens -// for substitution segments of a format string -pub struct Sub { - min_width: CanAsterisk>, - second_field: CanAsterisk>, - field_char: char, - field_type: FieldType, - orig: String, - prefix_char: char, -} -impl Sub { - pub fn new( - min_width: CanAsterisk>, - second_field: CanAsterisk>, - field_char: char, - orig: String, - prefix_char: char, - ) -> Self { - // for more dry printing, field characters are grouped - // in initialization of token. - let field_type = match field_char { - 's' | 'b' => FieldType::Strf, - 'd' | 'i' | 'u' | 'o' | 'x' | 'X' => FieldType::Intf, - 'f' | 'F' => FieldType::Floatf, - 'a' | 'A' => FieldType::CninetyNineHexFloatf, - 'e' | 'E' => FieldType::Scif, - 'g' | 'G' => FieldType::Decf, - 'c' => FieldType::Charf, - _ => { - // should be unreachable. - println!("Invalid field type"); - exit(EXIT_ERR); - } - }; - Self { - min_width, - second_field, - field_char, - field_type, - orig, - prefix_char, - } - } -} - -#[derive(Default)] -pub(crate) struct SubParser { - min_width_tmp: Option, - min_width_is_asterisk: bool, - past_decimal: bool, - second_field_tmp: Option, - second_field_is_asterisk: bool, - specifiers_found: bool, - field_char: Option, - text_so_far: String, -} - -impl SubParser { - fn new() -> Self { - Self::default() - } - pub(crate) fn from_it( - writer: &mut W, - it: &mut PutBackN, - args: &mut Peekable>, - ) -> UResult> - where - W: Write, - { - let mut parser = Self::new(); - if parser.sub_vals_retrieved(it)? { - let t = Self::build_token(parser); - t.write(writer, args); - Ok(Some(t)) - } else { - Ok(None) - } - } - fn build_token(parser: Self) -> token::Token { - // not a self method so as to allow move of sub-parser vals. - // return new Sub struct as token - let prefix_char = match &parser.min_width_tmp { - Some(width) if width.starts_with('0') => '0', - _ => ' ', - }; - - token::Token::Sub(Sub::new( - if parser.min_width_is_asterisk { - CanAsterisk::Asterisk - } else { - CanAsterisk::Fixed( - parser - .min_width_tmp - .map(|x| x.parse::().unwrap_or(1)), - ) - }, - if parser.second_field_is_asterisk { - CanAsterisk::Asterisk - } else { - CanAsterisk::Fixed(parser.second_field_tmp.map(|x| x.parse::().unwrap())) - }, - parser.field_char.unwrap(), - parser.text_so_far, - prefix_char, - )) - } - #[allow(clippy::cognitive_complexity)] - fn sub_vals_retrieved(&mut self, it: &mut PutBackN) -> UResult { - if !Self::successfully_eat_prefix(it, &mut self.text_so_far)? { - return Ok(false); - } - // this fn in particular is much longer than it needs to be - // .could get a lot - // of code savings just by cleaning it up. shouldn't use a regex - // though, as we want to mimic the original behavior of printing - // the field as interpreted up until the error in the field. - - let mut legal_fields = vec![ - // 'a', 'A', //c99 hex float implementation not yet complete - 'b', 'c', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'i', 'o', 's', 'u', 'x', 'X', - ]; - let mut specifiers = vec!['h', 'j', 'l', 'L', 't', 'z']; - legal_fields.sort_unstable(); - specifiers.sort_unstable(); - - // divide substitution from %([0-9]+)?(.[0-9+])?([a-zA-Z]) - // into min_width, second_field, field_char - for ch in it { - self.text_so_far.push(ch); - match ch { - '-' | '*' | '0'..='9' => { - if self.past_decimal { - // second field should never have a - // negative value - if self.second_field_is_asterisk || ch == '-' || self.specifiers_found { - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } - if self.second_field_tmp.is_none() { - self.second_field_tmp = Some(String::new()); - } - match self.second_field_tmp.as_mut() { - Some(x) => { - if ch == '*' && !x.is_empty() { - return Err( - SubError::InvalidSpec(self.text_so_far.clone()).into() - ); - } - if ch == '*' { - self.second_field_is_asterisk = true; - } - x.push(ch); - } - None => { - panic!("should be unreachable"); - } - } - } else { - if self.min_width_is_asterisk || self.specifiers_found { - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } - if self.min_width_tmp.is_none() { - self.min_width_tmp = Some(String::new()); - } - match self.min_width_tmp.as_mut() { - Some(x) => { - if (ch == '-' || ch == '*') && !x.is_empty() { - return Err( - SubError::InvalidSpec(self.text_so_far.clone()).into() - ); - } - if ch == '*' { - self.min_width_is_asterisk = true; - } - x.push(ch); - } - None => { - panic!("should be unreachable"); - } - } - } - } - '.' => { - if self.past_decimal { - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } else { - self.past_decimal = true; - } - } - x if legal_fields.binary_search(&x).is_ok() => { - self.field_char = Some(ch); - self.text_so_far.push(ch); - break; - } - x if specifiers.binary_search(&x).is_ok() => { - if !self.past_decimal { - self.past_decimal = true; - } - if !self.specifiers_found { - self.specifiers_found = true; - } - } - _ => { - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } - } - } - if self.field_char.is_none() { - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } - let field_char_retrieved = self.field_char.unwrap(); - if self.past_decimal && self.second_field_tmp.is_none() { - self.second_field_tmp = Some(String::from("0")); - } - self.validate_field_params(field_char_retrieved)?; - // if the dot is provided without a second field - // printf interprets it as 0. - if let Some(x) = self.second_field_tmp.as_mut() { - if x.is_empty() { - self.min_width_tmp = Some(String::from("0")); - } - } - - Ok(true) - } - fn successfully_eat_prefix( - it: &mut PutBackN, - text_so_far: &mut String, - ) -> UResult { - // get next two chars, - // if they're '%%' we're not tokenizing it - // else put chars back - let preface = it.next(); - let n_ch = it.next(); - if preface == Some('%') && n_ch != Some('%') { - match n_ch { - Some(x) => { - it.put_back(x); - Ok(true) - } - None => { - text_so_far.push('%'); - Err(SubError::InvalidSpec(text_so_far.clone()).into()) - } - } - } else { - if let Some(x) = n_ch { - it.put_back(x); - }; - if let Some(x) = preface { - it.put_back(x); - }; - Ok(false) - } - } - fn validate_field_params(&self, field_char: char) -> UResult<()> { - // check for illegal combinations here when possible vs - // on each application so we check less per application - // to do: move these checks to Sub::new - if (field_char == 's' && self.min_width_tmp == Some(String::from("0"))) - || (field_char == 'c' - && (self.min_width_tmp == Some(String::from("0")) || self.past_decimal)) - || (field_char == 'b' - && (self.min_width_tmp.is_some() - || self.past_decimal - || self.second_field_tmp.is_some())) - { - // invalid string substitution - // to do: include information about an invalid - // string substitution - return Err(SubError::InvalidSpec(self.text_so_far.clone()).into()); - } - Ok(()) - } -} - -impl Sub { - #[allow(clippy::cognitive_complexity)] - pub(crate) fn write(&self, writer: &mut W, pf_args_it: &mut Peekable>) - where - W: Write, - { - let field = FormatField { - min_width: match self.min_width { - CanAsterisk::Fixed(x) => x, - CanAsterisk::Asterisk => { - match pf_args_it.next() { - // temporary, use intf.rs instead - Some(x) => Some(convert_asterisk_arg_int(x)), - None => Some(0), - } - } - }, - second_field: match self.second_field { - CanAsterisk::Fixed(x) => x, - CanAsterisk::Asterisk => { - match pf_args_it.next() { - // temporary, use intf.rs instead - Some(x) => { - let result = convert_asterisk_arg_int(x); - if result < 0 { - None - } else { - Some(result as u32) - } - } - None => Some(0), - } - } - }, - field_char: &self.field_char, - field_type: &self.field_type, - orig: &self.orig, - }; - let pf_arg = pf_args_it.next(); - - // minimum width is handled independently of actual - // field char - let pre_min_width_opt: Option = match *field.field_type { - // if %s just return arg - // if %b use UnescapedText module's unescape-fn - // if %c return first char of arg - FieldType::Strf | FieldType::Charf => { - match pf_arg { - Some(arg_string) => { - match *field.field_char { - 's' => Some(match field.second_field { - Some(max) => String::from(&arg_string[..max as usize]), - None => arg_string.clone(), - }), - 'b' => { - let mut a_it = put_back_n(arg_string.chars()); - UnescapedText::from_it_core(writer, &mut a_it, true); - None - } - // for 'c': get iter of string vals, - // get opt of first val - // and map it to opt - /* 'c' | */ - _ => arg_string.chars().next().map(|x| x.to_string()), - } - } - None => None, - } - } - _ => { - // non string/char fields are delegated to num_format - num_format::num_format(&field, pf_arg) - } - }; - if let Some(pre_min_width) = pre_min_width_opt { - // if have a string, print it, ensuring minimum width is met. - write!( - writer, - "{}", - match field.min_width { - Some(min_width) => { - let diff: isize = min_width.abs() - pre_min_width.len() as isize; - if diff > 0 { - let mut final_str = String::new(); - // definitely more efficient ways - // to do this. - let pad_before = min_width > 0; - if !pad_before { - final_str.push_str(&pre_min_width); - } - for _ in 0..diff { - final_str.push(self.prefix_char); - } - if pad_before { - final_str.push_str(&pre_min_width); - } - final_str - } else { - pre_min_width - } - } - None => pre_min_width, - } - ) - .ok(); - } - } -} diff --git a/src/uucore/src/lib/features/tokenize/token.rs b/src/uucore/src/lib/features/tokenize/token.rs deleted file mode 100644 index b522c99a4..000000000 --- a/src/uucore/src/lib/features/tokenize/token.rs +++ /dev/null @@ -1,39 +0,0 @@ -//! Traits and enums dealing with Tokenization of printf Format String -use std::io::Write; -use std::iter::Peekable; -use std::slice::Iter; - -use crate::features::tokenize::sub::Sub; -use crate::features::tokenize::unescaped_text::UnescapedText; - -// A token object is an object that can print the expected output -// of a contiguous segment of the format string, and -// requires at most 1 argument -pub enum Token { - Sub(Sub), - UnescapedText(UnescapedText), -} - -impl Token { - pub(crate) fn write(&self, writer: &mut W, args: &mut Peekable>) - where - W: Write, - { - match self { - Self::Sub(sub) => sub.write(writer, args), - Self::UnescapedText(unescaped_text) => unescaped_text.write(writer), - } - } -} - -// A tokenizer object is an object that takes an iterator -// at a position in a format string, and sees whether -// it can return a token of a type it knows how to produce -// if so, return the token, move the iterator past the -// format string text the token represents, and if an -// argument is used move the argument iter forward one - -// creating token of a format string segment should also cause -// printing of that token's value. Essentially tokenizing -// a whole format string will print the format string and consume -// a number of arguments equal to the number of argument-using tokens diff --git a/src/uucore/src/lib/features/tokenize/unescaped_text.rs b/src/uucore/src/lib/features/tokenize/unescaped_text.rs deleted file mode 100644 index 29c657ed8..000000000 --- a/src/uucore/src/lib/features/tokenize/unescaped_text.rs +++ /dev/null @@ -1,279 +0,0 @@ -//! UnescapedText is a tokenizer impl -//! for tokenizing character literals, -//! and escaped character literals (of allowed escapes), -//! into an unescaped text byte array - -// spell-checker:ignore (ToDO) retval hexchars octals printf's bvec vals coreutil addchar eval bytecode bslice - -use itertools::PutBackN; -use std::char::from_u32; -use std::io::Write; -use std::process::exit; -use std::str::Chars; - -use super::token; - -const EXIT_OK: i32 = 0; -const EXIT_ERR: i32 = 1; - -// by default stdout only flushes -// to console when a newline is passed. -macro_rules! write_and_flush { - ($writer:expr, $($args:tt)+) => ({ - write!($writer, "{}", $($args)+).ok(); - $writer.flush().ok(); - }) -} - -fn flush_bytes(writer: &mut W, bslice: &[u8]) -where - W: Write, -{ - writer.write_all(bslice).ok(); - writer.flush().ok(); -} - -#[derive(Default)] -pub struct UnescapedText(Vec); -impl UnescapedText { - fn new() -> Self { - Self::default() - } - // take an iterator to the format string - // consume between min and max chars - // and return it as a base-X number - fn base_to_u32(min_chars: u8, max_chars: u8, base: u32, it: &mut PutBackN) -> u32 { - let mut retval: u32 = 0; - let mut found = 0; - while found < max_chars { - // if end of input break - let nc = it.next(); - match nc { - Some(digit) => { - // if end of hexchars break - match digit.to_digit(base) { - Some(d) => { - found += 1; - retval *= base; - retval += d; - } - None => { - it.put_back(digit); - break; - } - } - } - None => { - break; - } - } - } - if found < min_chars { - // only ever expected for hex - println!("missing hexadecimal number in escape"); //todo stderr - exit(EXIT_ERR); - } - retval - } - // validates against valid - // IEC 10646 vals - these values - // are pinned against the more popular - // printf so as to not disrupt when - // dropped-in as a replacement. - fn validate_iec(val: u32, eight_word: bool) { - let mut preface = 'u'; - let leading_zeros = if eight_word { - preface = 'U'; - 8 - } else { - 4 - }; - let err_msg = format!("invalid universal character name {preface}{val:0leading_zeros$x}"); - if (val < 159 && (val != 36 && val != 64 && val != 96)) || (val > 55296 && val < 57343) { - println!("{err_msg}"); //todo stderr - exit(EXIT_ERR); - } - } - // pass an iterator that succeeds an '/', - // and process the remaining character - // adding the unescaped bytes - // to the passed byte_vec - // in subs_mode change octal behavior - fn handle_escaped( - writer: &mut W, - byte_vec: &mut Vec, - it: &mut PutBackN, - subs_mode: bool, - ) where - W: Write, - { - let ch = it.next().unwrap_or('\\'); - match ch { - '0'..='9' | 'x' => { - let min_len = 1; - let mut max_len = 2; - let mut base = 16; - let ignore = false; - match ch { - 'x' => {} - e @ '0'..='9' => { - max_len = 3; - base = 8; - // in practice, gnu coreutils printf - // interprets octals without a - // leading zero in %b - // but it only skips leading zeros - // in %b mode. - // if we ever want to match gnu coreutil - // printf's docs instead of its behavior - // we'd set this to true. - // if subs_mode && e != '0' - // { ignore = true; } - if !subs_mode || e != '0' { - it.put_back(ch); - } - } - _ => {} - } - if ignore { - byte_vec.push(ch as u8); - } else { - let val = (Self::base_to_u32(min_len, max_len, base, it) % 256) as u8; - byte_vec.push(val); - let bvec = [val]; - flush_bytes(writer, &bvec); - } - } - e => { - // only for hex and octal - // is byte encoding specified. - // otherwise, why not leave the door open - // for other encodings unless it turns out - // a bottleneck. - let mut s = String::new(); - let ch = match e { - '\\' => '\\', - '"' => '"', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - // bell - 'a' => '\x07', - // backspace - 'b' => '\x08', - // vertical tab - 'v' => '\x0B', - // form feed - 'f' => '\x0C', - // escape character - 'e' => '\x1B', - 'c' => exit(EXIT_OK), - 'u' | 'U' => { - let len = match e { - 'u' => 4, - /* 'U' | */ _ => 8, - }; - let val = Self::base_to_u32(len, len, 16, it); - Self::validate_iec(val, false); - if let Some(c) = from_u32(val) { - c - } else { - '-' - } - } - _ => { - s.push('\\'); - ch - } - }; - s.push(ch); - write_and_flush!(writer, &s); - byte_vec.extend(s.bytes()); - } - }; - } - - // take an iterator to a string, - // and return a wrapper around a Vec of unescaped bytes - // break on encounter of sub symbol ('%[^%]') unless called - // through %b subst. - #[allow(clippy::cognitive_complexity)] - pub fn from_it_core( - writer: &mut W, - it: &mut PutBackN, - subs_mode: bool, - ) -> Option - where - W: Write, - { - let mut addchar = false; - let mut new_text = Self::new(); - let mut tmp_str = String::new(); - { - let new_vec: &mut Vec = &mut (new_text.0); - while let Some(ch) = it.next() { - if !addchar { - addchar = true; - } - match ch { - x if x != '\\' && x != '%' => { - // lazy branch eval - // remember this fn could be called - // many times in a single exec through %b - write_and_flush!(writer, ch); - tmp_str.push(ch); - } - '\\' => { - // the literal may be a literal bytecode - // and not valid utf-8. Str only supports - // valid utf-8. - // if we find the unnecessary drain - // on non hex or octal escapes is costly - // then we can make it faster/more complex - // with as-necessary draining. - if !tmp_str.is_empty() { - new_vec.extend(tmp_str.bytes()); - tmp_str = String::new(); - } - Self::handle_escaped(writer, new_vec, it, subs_mode); - } - x if x == '%' && !subs_mode => { - if let Some(follow) = it.next() { - if follow == '%' { - write_and_flush!(writer, ch); - tmp_str.push(ch); - } else { - it.put_back(follow); - it.put_back(ch); - break; - } - } else { - it.put_back(ch); - break; - } - } - _ => { - write_and_flush!(writer, ch); - tmp_str.push(ch); - } - } - } - if !tmp_str.is_empty() { - new_vec.extend(tmp_str.bytes()); - } - } - if addchar { - Some(token::Token::UnescapedText(new_text)) - } else { - None - } - } -} -impl UnescapedText { - pub(crate) fn write(&self, writer: &mut W) - where - W: Write, - { - flush_bytes(writer, &self.0[..]); - } -} diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index ca9a48d25..7f5cc99db 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -44,8 +44,8 @@ pub use crate::features::fs; pub use crate::features::fsext; #[cfg(feature = "lines")] pub use crate::features::lines; -#[cfg(feature = "memo")] -pub use crate::features::memo; +#[cfg(feature = "format")] +pub use crate::features::format; #[cfg(feature = "ringbuffer")] pub use crate::features::ringbuffer; #[cfg(feature = "sum")]