1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-31 13:07:46 +00:00

Merge pull request #5128 from tertsdiepraam/printf-rewrite

`printf` rewrite (with a lot of `seq` changes)
This commit is contained in:
Sylvestre Ledru 2023-11-28 07:52:58 +01:00 committed by GitHub
commit 14a8e8a452
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
37 changed files with 1812 additions and 3336 deletions

View file

@ -18,7 +18,7 @@ path = "src/dd.rs"
clap = { workspace = true }
gcd = { workspace = true }
libc = { workspace = true }
uucore = { workspace = true, features = ["memo", "quoting-style"] }
uucore = { workspace = true, features = ["format", "quoting-style"] }
[target.'cfg(any(target_os = "linux"))'.dependencies]
nix = { workspace = true, features = ["fs"] }

View file

@ -13,8 +13,10 @@ use std::io::Write;
use std::sync::mpsc;
use std::time::Duration;
use uucore::error::UResult;
use uucore::memo::sprintf;
use uucore::{
error::UResult,
format::num_format::{FloatVariant, Formatter},
};
use crate::numbers::{to_magnitude_and_suffix, SuffixType};
@ -152,7 +154,14 @@ impl ProgUpdate {
let (carriage_return, newline) = if rewrite { ("\r", "") } else { ("", "\n") };
// The duration should be formatted as in `printf %g`.
let duration_str = sprintf("%g", &[duration.to_string()])?;
let mut duration_str = Vec::new();
uucore::format::num_format::Float {
variant: FloatVariant::Shortest,
..Default::default()
}
.fmt(&mut duration_str, duration)?;
// We assume that printf will output valid UTF-8
let duration_str = std::str::from_utf8(&duration_str).unwrap();
// If the number of bytes written is sufficiently large, then
// print a more concise representation of the number, like

View file

@ -16,7 +16,7 @@ path = "src/printf.rs"
[dependencies]
clap = { workspace = true }
uucore = { workspace = true, features = ["memo", "quoting-style"] }
uucore = { workspace = true, features = ["format", "quoting-style"] }
[[bin]]
name = "printf"

View file

@ -6,9 +6,12 @@
// spell-checker:ignore (change!) each's
// spell-checker:ignore (ToDO) LONGHELP FORMATSTRING templating parameterizing formatstr
use std::io::stdout;
use std::ops::ControlFlow;
use clap::{crate_version, Arg, ArgAction, Command};
use uucore::error::{UResult, UUsageError};
use uucore::memo::printf;
use uucore::format::{parse_spec_and_escape, FormatArgument};
use uucore::{format_usage, help_about, help_section, help_usage};
const VERSION: &str = "version";
@ -30,12 +33,28 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let format_string = matches
.get_one::<String>(options::FORMATSTRING)
.ok_or_else(|| UUsageError::new(1, "missing operand"))?;
let values: Vec<String> = match matches.get_many::<String>(options::ARGUMENT) {
Some(s) => s.map(|s| s.to_string()).collect(),
let values: Vec<_> = match matches.get_many::<String>(options::ARGUMENT) {
Some(s) => s.map(|s| FormatArgument::Unparsed(s.to_string())).collect(),
None => vec![],
};
printf(format_string, &values[..])?;
let mut args = values.iter().peekable();
for item in parse_spec_and_escape(format_string.as_ref()) {
match item?.write(stdout(), &mut args)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => return Ok(()),
};
}
while args.peek().is_some() {
for item in parse_spec_and_escape(format_string.as_ref()) {
match item?.write(stdout(), &mut args)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => return Ok(()),
};
}
}
Ok(())
}

View file

@ -20,7 +20,7 @@ bigdecimal = { workspace = true }
clap = { workspace = true }
num-bigint = { workspace = true }
num-traits = { workspace = true }
uucore = { workspace = true, features = ["memo", "quoting-style"] }
uucore = { workspace = true, features = ["format", "quoting-style"] }
[[bin]]
name = "seq"

View file

@ -25,13 +25,8 @@ use std::fmt::Display;
use std::ops::Add;
use bigdecimal::BigDecimal;
use num_bigint::BigInt;
use num_bigint::ToBigInt;
use num_traits::One;
use num_traits::Zero;
use crate::extendedbigint::ExtendedBigInt;
#[derive(Debug, Clone)]
pub enum ExtendedBigDecimal {
/// Arbitrary precision floating point number.
@ -72,53 +67,14 @@ pub enum ExtendedBigDecimal {
Nan,
}
/// The smallest integer greater than or equal to this number.
fn ceil(x: BigDecimal) -> BigInt {
if x.is_integer() {
// Unwrapping the Option because it always returns Some
x.to_bigint().unwrap()
} else {
(x + BigDecimal::one().half()).round(0).to_bigint().unwrap()
}
}
/// The largest integer less than or equal to this number.
fn floor(x: BigDecimal) -> BigInt {
if x.is_integer() {
// Unwrapping the Option because it always returns Some
x.to_bigint().unwrap()
} else {
(x - BigDecimal::one().half()).round(0).to_bigint().unwrap()
}
}
impl ExtendedBigDecimal {
/// The smallest integer greater than or equal to this number.
pub fn ceil(self) -> ExtendedBigInt {
match self {
Self::BigDecimal(x) => ExtendedBigInt::BigInt(ceil(x)),
other => From::from(other),
}
#[cfg(test)]
pub fn zero() -> Self {
Self::BigDecimal(0.into())
}
/// The largest integer less than or equal to this number.
pub fn floor(self) -> ExtendedBigInt {
match self {
Self::BigDecimal(x) => ExtendedBigInt::BigInt(floor(x)),
other => From::from(other),
}
}
}
impl From<ExtendedBigInt> for ExtendedBigDecimal {
fn from(big_int: ExtendedBigInt) -> Self {
match big_int {
ExtendedBigInt::BigInt(n) => Self::BigDecimal(BigDecimal::from(n)),
ExtendedBigInt::Infinity => Self::Infinity,
ExtendedBigInt::MinusInfinity => Self::MinusInfinity,
ExtendedBigInt::MinusZero => Self::MinusZero,
ExtendedBigInt::Nan => Self::Nan,
}
pub fn one() -> Self {
Self::BigDecimal(1.into())
}
}

View file

@ -1,214 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore bigint extendedbigint extendedbigdecimal
//! An arbitrary precision integer that can also represent infinity, NaN, etc.
//!
//! Usually infinity, NaN, and negative zero are only represented for
//! floating point numbers. The [`ExtendedBigInt`] enumeration provides
//! a representation of those things with the set of integers. The
//! finite values are stored as [`BigInt`] instances.
//!
//! # Examples
//!
//! Addition works for [`ExtendedBigInt`] as it does for floats. For
//! example, adding infinity to any finite value results in infinity:
//!
//! ```rust,ignore
//! let summand1 = ExtendedBigInt::BigInt(BigInt::zero());
//! let summand2 = ExtendedBigInt::Infinity;
//! assert_eq!(summand1 + summand2, ExtendedBigInt::Infinity);
//! ```
use std::cmp::Ordering;
use std::fmt::Display;
use std::ops::Add;
use num_bigint::BigInt;
use num_bigint::ToBigInt;
use num_traits::One;
use num_traits::Zero;
use crate::extendedbigdecimal::ExtendedBigDecimal;
#[derive(Debug, Clone)]
pub enum ExtendedBigInt {
BigInt(BigInt),
Infinity,
MinusInfinity,
MinusZero,
Nan,
}
impl ExtendedBigInt {
/// The integer number one.
pub fn one() -> Self {
// We would like to implement `num_traits::One`, but it requires
// a multiplication implementation, and we don't want to
// implement that here.
Self::BigInt(BigInt::one())
}
}
impl From<ExtendedBigDecimal> for ExtendedBigInt {
fn from(big_decimal: ExtendedBigDecimal) -> Self {
match big_decimal {
// TODO When can this fail?
ExtendedBigDecimal::BigDecimal(x) => Self::BigInt(x.to_bigint().unwrap()),
ExtendedBigDecimal::Infinity => Self::Infinity,
ExtendedBigDecimal::MinusInfinity => Self::MinusInfinity,
ExtendedBigDecimal::MinusZero => Self::MinusZero,
ExtendedBigDecimal::Nan => Self::Nan,
}
}
}
impl Display for ExtendedBigInt {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::BigInt(n) => n.fmt(f),
Self::Infinity => f32::INFINITY.fmt(f),
Self::MinusInfinity => f32::NEG_INFINITY.fmt(f),
Self::MinusZero => "-0".fmt(f),
Self::Nan => "nan".fmt(f),
}
}
}
impl Zero for ExtendedBigInt {
fn zero() -> Self {
Self::BigInt(BigInt::zero())
}
fn is_zero(&self) -> bool {
match self {
Self::BigInt(n) => n.is_zero(),
Self::MinusZero => true,
_ => false,
}
}
}
impl Add for ExtendedBigInt {
type Output = Self;
fn add(self, other: Self) -> Self {
match (self, other) {
(Self::BigInt(m), Self::BigInt(n)) => Self::BigInt(m.add(n)),
(Self::BigInt(_), Self::MinusInfinity) => Self::MinusInfinity,
(Self::BigInt(_), Self::Infinity) => Self::Infinity,
(Self::BigInt(_), Self::Nan) => Self::Nan,
(Self::BigInt(m), Self::MinusZero) => Self::BigInt(m),
(Self::Infinity, Self::BigInt(_)) => Self::Infinity,
(Self::Infinity, Self::Infinity) => Self::Infinity,
(Self::Infinity, Self::MinusZero) => Self::Infinity,
(Self::Infinity, Self::MinusInfinity) => Self::Nan,
(Self::Infinity, Self::Nan) => Self::Nan,
(Self::MinusInfinity, Self::BigInt(_)) => Self::MinusInfinity,
(Self::MinusInfinity, Self::MinusInfinity) => Self::MinusInfinity,
(Self::MinusInfinity, Self::MinusZero) => Self::MinusInfinity,
(Self::MinusInfinity, Self::Infinity) => Self::Nan,
(Self::MinusInfinity, Self::Nan) => Self::Nan,
(Self::Nan, _) => Self::Nan,
(Self::MinusZero, other) => other,
}
}
}
impl PartialEq for ExtendedBigInt {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::BigInt(m), Self::BigInt(n)) => m.eq(n),
(Self::BigInt(_), Self::MinusInfinity) => false,
(Self::BigInt(_), Self::Infinity) => false,
(Self::BigInt(_), Self::Nan) => false,
(Self::BigInt(_), Self::MinusZero) => false,
(Self::Infinity, Self::BigInt(_)) => false,
(Self::Infinity, Self::Infinity) => true,
(Self::Infinity, Self::MinusZero) => false,
(Self::Infinity, Self::MinusInfinity) => false,
(Self::Infinity, Self::Nan) => false,
(Self::MinusInfinity, Self::BigInt(_)) => false,
(Self::MinusInfinity, Self::Infinity) => false,
(Self::MinusInfinity, Self::MinusZero) => false,
(Self::MinusInfinity, Self::MinusInfinity) => true,
(Self::MinusInfinity, Self::Nan) => false,
(Self::Nan, _) => false,
(Self::MinusZero, Self::BigInt(_)) => false,
(Self::MinusZero, Self::Infinity) => false,
(Self::MinusZero, Self::MinusZero) => true,
(Self::MinusZero, Self::MinusInfinity) => false,
(Self::MinusZero, Self::Nan) => false,
}
}
}
impl PartialOrd for ExtendedBigInt {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
match (self, other) {
(Self::BigInt(m), Self::BigInt(n)) => m.partial_cmp(n),
(Self::BigInt(_), Self::MinusInfinity) => Some(Ordering::Greater),
(Self::BigInt(_), Self::Infinity) => Some(Ordering::Less),
(Self::BigInt(_), Self::Nan) => None,
(Self::BigInt(m), Self::MinusZero) => m.partial_cmp(&BigInt::zero()),
(Self::Infinity, Self::BigInt(_)) => Some(Ordering::Greater),
(Self::Infinity, Self::Infinity) => Some(Ordering::Equal),
(Self::Infinity, Self::MinusZero) => Some(Ordering::Greater),
(Self::Infinity, Self::MinusInfinity) => Some(Ordering::Greater),
(Self::Infinity, Self::Nan) => None,
(Self::MinusInfinity, Self::BigInt(_)) => Some(Ordering::Less),
(Self::MinusInfinity, Self::Infinity) => Some(Ordering::Less),
(Self::MinusInfinity, Self::MinusZero) => Some(Ordering::Less),
(Self::MinusInfinity, Self::MinusInfinity) => Some(Ordering::Equal),
(Self::MinusInfinity, Self::Nan) => None,
(Self::Nan, _) => None,
(Self::MinusZero, Self::BigInt(n)) => BigInt::zero().partial_cmp(n),
(Self::MinusZero, Self::Infinity) => Some(Ordering::Less),
(Self::MinusZero, Self::MinusZero) => Some(Ordering::Equal),
(Self::MinusZero, Self::MinusInfinity) => Some(Ordering::Greater),
(Self::MinusZero, Self::Nan) => None,
}
}
}
#[cfg(test)]
mod tests {
use num_bigint::BigInt;
use num_traits::Zero;
use crate::extendedbigint::ExtendedBigInt;
#[test]
fn test_addition_infinity() {
let summand1 = ExtendedBigInt::BigInt(BigInt::zero());
let summand2 = ExtendedBigInt::Infinity;
assert_eq!(summand1 + summand2, ExtendedBigInt::Infinity);
}
#[test]
fn test_addition_minus_infinity() {
let summand1 = ExtendedBigInt::BigInt(BigInt::zero());
let summand2 = ExtendedBigInt::MinusInfinity;
assert_eq!(summand1 + summand2, ExtendedBigInt::MinusInfinity);
}
#[test]
fn test_addition_nan() {
let summand1 = ExtendedBigInt::BigInt(BigInt::zero());
let summand2 = ExtendedBigInt::Nan;
let sum = summand1 + summand2;
match sum {
ExtendedBigInt::Nan => (),
_ => unreachable!(),
}
}
#[test]
fn test_display() {
assert_eq!(format!("{}", ExtendedBigInt::BigInt(BigInt::zero())), "0");
assert_eq!(format!("{}", ExtendedBigInt::MinusZero), "-0");
assert_eq!(format!("{}", ExtendedBigInt::Infinity), "inf");
assert_eq!(format!("{}", ExtendedBigInt::MinusInfinity), "-inf");
assert_eq!(format!("{}", ExtendedBigInt::Nan), "nan");
}
}

View file

@ -2,80 +2,10 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore extendedbigdecimal extendedbigint
//! A type to represent the possible start, increment, and end values for seq.
//!
//! The [`Number`] enumeration represents the possible values for the
//! start, increment, and end values for `seq`. These may be integers,
//! floating point numbers, negative zero, etc. A [`Number`] can be
//! parsed from a string by calling [`str::parse`].
// spell-checker:ignore extendedbigdecimal
use num_traits::Zero;
use crate::extendedbigdecimal::ExtendedBigDecimal;
use crate::extendedbigint::ExtendedBigInt;
/// An integral or floating point number.
#[derive(Debug, PartialEq)]
pub enum Number {
Int(ExtendedBigInt),
Float(ExtendedBigDecimal),
}
impl Number {
/// Decide whether this number is zero (either positive or negative).
pub fn is_zero(&self) -> bool {
// We would like to implement `num_traits::Zero`, but it
// requires an addition implementation, and we don't want to
// implement that here.
match self {
Self::Int(n) => n.is_zero(),
Self::Float(x) => x.is_zero(),
}
}
/// Convert this number into an `ExtendedBigDecimal`.
pub fn into_extended_big_decimal(self) -> ExtendedBigDecimal {
match self {
Self::Int(n) => ExtendedBigDecimal::from(n),
Self::Float(x) => x,
}
}
/// The integer number one.
pub fn one() -> Self {
// We would like to implement `num_traits::One`, but it requires
// a multiplication implementation, and we don't want to
// implement that here.
Self::Int(ExtendedBigInt::one())
}
/// Round this number towards the given other number.
///
/// If `other` is greater, then round up. If `other` is smaller,
/// then round down.
pub fn round_towards(self, other: &ExtendedBigInt) -> ExtendedBigInt {
match self {
// If this number is already an integer, it is already
// rounded to the nearest integer in the direction of
// `other`.
Self::Int(num) => num,
// Otherwise, if this number is a float, we need to decide
// whether `other` is larger or smaller than it, and thus
// whether to round up or round down, respectively.
Self::Float(num) => {
let other: ExtendedBigDecimal = From::from(other.clone());
if other > num {
num.ceil()
} else {
// If they are equal, then `self` is already an
// integer, so calling `floor()` does no harm and
// will just return that integer anyway.
num.floor()
}
}
}
}
}
/// A number with a specified number of integer and fractional digits.
///
@ -87,13 +17,17 @@ impl Number {
/// You can get an instance of this struct by calling [`str::parse`].
#[derive(Debug)]
pub struct PreciseNumber {
pub number: Number,
pub number: ExtendedBigDecimal,
pub num_integral_digits: usize,
pub num_fractional_digits: usize,
}
impl PreciseNumber {
pub fn new(number: Number, num_integral_digits: usize, num_fractional_digits: usize) -> Self {
pub fn new(
number: ExtendedBigDecimal,
num_integral_digits: usize,
num_fractional_digits: usize,
) -> Self {
Self {
number,
num_integral_digits,
@ -106,7 +40,7 @@ impl PreciseNumber {
// We would like to implement `num_traits::One`, but it requires
// a multiplication implementation, and we don't want to
// implement that here.
Self::new(Number::one(), 1, 0)
Self::new(ExtendedBigDecimal::one(), 1, 0)
}
/// Decide whether this number is zero (either positive or negative).

View file

@ -2,7 +2,7 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore extendedbigdecimal extendedbigint bigdecimal numberparse
// spell-checker:ignore extendedbigdecimal bigdecimal numberparse
//! Parsing numbers for use in `seq`.
//!
//! This module provides an implementation of [`FromStr`] for the
@ -16,8 +16,6 @@ use num_traits::Num;
use num_traits::Zero;
use crate::extendedbigdecimal::ExtendedBigDecimal;
use crate::extendedbigint::ExtendedBigInt;
use crate::number::Number;
use crate::number::PreciseNumber;
/// An error returned when parsing a number fails.
@ -29,8 +27,8 @@ pub enum ParseNumberError {
}
/// Decide whether a given string and its parsed `BigInt` is negative zero.
fn is_minus_zero_int(s: &str, n: &BigInt) -> bool {
s.starts_with('-') && n == &BigInt::zero()
fn is_minus_zero_int(s: &str, n: &BigDecimal) -> bool {
s.starts_with('-') && n == &BigDecimal::zero()
}
/// Decide whether a given string and its parsed `BigDecimal` is negative zero.
@ -53,19 +51,19 @@ fn is_minus_zero_float(s: &str, x: &BigDecimal) -> bool {
/// assert_eq!(actual, expected);
/// ```
fn parse_no_decimal_no_exponent(s: &str) -> Result<PreciseNumber, ParseNumberError> {
match s.parse::<BigInt>() {
match s.parse::<BigDecimal>() {
Ok(n) => {
// If `s` is '-0', then `parse()` returns `BigInt::zero()`,
// but we need to return `Number::MinusZeroInt` instead.
if is_minus_zero_int(s, &n) {
Ok(PreciseNumber::new(
Number::Int(ExtendedBigInt::MinusZero),
ExtendedBigDecimal::MinusZero,
s.len(),
0,
))
} else {
Ok(PreciseNumber::new(
Number::Int(ExtendedBigInt::BigInt(n)),
ExtendedBigDecimal::BigDecimal(n),
s.len(),
0,
))
@ -79,7 +77,7 @@ fn parse_no_decimal_no_exponent(s: &str) -> Result<PreciseNumber, ParseNumberErr
"nan" | "-nan" => return Err(ParseNumberError::Nan),
_ => return Err(ParseNumberError::Float),
};
Ok(PreciseNumber::new(Number::Float(float_val), 0, 0))
Ok(PreciseNumber::new(float_val, 0, 0))
}
}
}
@ -125,13 +123,13 @@ fn parse_exponent_no_decimal(s: &str, j: usize) -> Result<PreciseNumber, ParseNu
if exponent < 0 {
if is_minus_zero_float(s, &x) {
Ok(PreciseNumber::new(
Number::Float(ExtendedBigDecimal::MinusZero),
ExtendedBigDecimal::MinusZero,
num_integral_digits,
num_fractional_digits,
))
} else {
Ok(PreciseNumber::new(
Number::Float(ExtendedBigDecimal::BigDecimal(x)),
ExtendedBigDecimal::BigDecimal(x),
num_integral_digits,
num_fractional_digits,
))
@ -169,13 +167,13 @@ fn parse_decimal_no_exponent(s: &str, i: usize) -> Result<PreciseNumber, ParseNu
let num_fractional_digits = s.len() - (i + 1);
if is_minus_zero_float(s, &x) {
Ok(PreciseNumber::new(
Number::Float(ExtendedBigDecimal::MinusZero),
ExtendedBigDecimal::MinusZero,
num_integral_digits,
num_fractional_digits,
))
} else {
Ok(PreciseNumber::new(
Number::Float(ExtendedBigDecimal::BigDecimal(x)),
ExtendedBigDecimal::BigDecimal(x),
num_integral_digits,
num_fractional_digits,
))
@ -239,7 +237,7 @@ fn parse_decimal_and_exponent(
if num_digits_between_decimal_point_and_e <= exponent {
if is_minus_zero_float(s, &val) {
Ok(PreciseNumber::new(
Number::Int(ExtendedBigInt::MinusZero),
ExtendedBigDecimal::MinusZero,
num_integral_digits,
num_fractional_digits,
))
@ -251,23 +249,23 @@ fn parse_decimal_and_exponent(
);
let expanded = [&s[0..i], &s[i + 1..j], &zeros].concat();
let n = expanded
.parse::<BigInt>()
.parse::<BigDecimal>()
.map_err(|_| ParseNumberError::Float)?;
Ok(PreciseNumber::new(
Number::Int(ExtendedBigInt::BigInt(n)),
ExtendedBigDecimal::BigDecimal(n),
num_integral_digits,
num_fractional_digits,
))
}
} else if is_minus_zero_float(s, &val) {
Ok(PreciseNumber::new(
Number::Float(ExtendedBigDecimal::MinusZero),
ExtendedBigDecimal::MinusZero,
num_integral_digits,
num_fractional_digits,
))
} else {
Ok(PreciseNumber::new(
Number::Float(ExtendedBigDecimal::BigDecimal(val)),
ExtendedBigDecimal::BigDecimal(val),
num_integral_digits,
num_fractional_digits,
))
@ -303,20 +301,17 @@ fn parse_hexadecimal(s: &str) -> Result<PreciseNumber, ParseNumberError> {
}
let num = BigInt::from_str_radix(s, 16).map_err(|_| ParseNumberError::Hex)?;
let num = BigDecimal::from(num);
match (is_neg, num == BigInt::zero()) {
(true, true) => Ok(PreciseNumber::new(
Number::Int(ExtendedBigInt::MinusZero),
2,
0,
)),
match (is_neg, num == BigDecimal::zero()) {
(true, true) => Ok(PreciseNumber::new(ExtendedBigDecimal::MinusZero, 2, 0)),
(true, false) => Ok(PreciseNumber::new(
Number::Int(ExtendedBigInt::BigInt(-num)),
ExtendedBigDecimal::BigDecimal(-num),
0,
0,
)),
(false, _) => Ok(PreciseNumber::new(
Number::Int(ExtendedBigInt::BigInt(num)),
ExtendedBigDecimal::BigDecimal(num),
0,
0,
)),
@ -364,19 +359,14 @@ impl FromStr for PreciseNumber {
#[cfg(test)]
mod tests {
use bigdecimal::BigDecimal;
use num_bigint::BigInt;
use num_traits::Zero;
use crate::extendedbigdecimal::ExtendedBigDecimal;
use crate::extendedbigint::ExtendedBigInt;
use crate::number::Number;
use crate::number::PreciseNumber;
use crate::numberparse::ParseNumberError;
/// Convenience function for parsing a [`Number`] and unwrapping.
fn parse(s: &str) -> Number {
fn parse(s: &str) -> ExtendedBigDecimal {
s.parse::<PreciseNumber>().unwrap().number
}
@ -392,40 +382,37 @@ mod tests {
#[test]
fn test_parse_minus_zero_int() {
assert_eq!(parse("-0e0"), Number::Int(ExtendedBigInt::MinusZero));
assert_eq!(parse("-0e-0"), Number::Int(ExtendedBigInt::MinusZero));
assert_eq!(parse("-0e1"), Number::Int(ExtendedBigInt::MinusZero));
assert_eq!(parse("-0e+1"), Number::Int(ExtendedBigInt::MinusZero));
assert_eq!(parse("-0.0e1"), Number::Int(ExtendedBigInt::MinusZero));
assert_eq!(parse("-0x0"), Number::Int(ExtendedBigInt::MinusZero));
assert_eq!(parse("-0e0"), ExtendedBigDecimal::MinusZero);
assert_eq!(parse("-0e-0"), ExtendedBigDecimal::MinusZero);
assert_eq!(parse("-0e1"), ExtendedBigDecimal::MinusZero);
assert_eq!(parse("-0e+1"), ExtendedBigDecimal::MinusZero);
assert_eq!(parse("-0.0e1"), ExtendedBigDecimal::MinusZero);
assert_eq!(parse("-0x0"), ExtendedBigDecimal::MinusZero);
}
#[test]
fn test_parse_minus_zero_float() {
assert_eq!(parse("-0.0"), Number::Float(ExtendedBigDecimal::MinusZero));
assert_eq!(parse("-0e-1"), Number::Float(ExtendedBigDecimal::MinusZero));
assert_eq!(
parse("-0.0e-1"),
Number::Float(ExtendedBigDecimal::MinusZero)
);
assert_eq!(parse("-0.0"), ExtendedBigDecimal::MinusZero);
assert_eq!(parse("-0e-1"), ExtendedBigDecimal::MinusZero);
assert_eq!(parse("-0.0e-1"), ExtendedBigDecimal::MinusZero);
}
#[test]
fn test_parse_big_int() {
assert_eq!(parse("0"), Number::Int(ExtendedBigInt::zero()));
assert_eq!(parse("0.1e1"), Number::Int(ExtendedBigInt::one()));
assert_eq!(parse("0"), ExtendedBigDecimal::zero());
assert_eq!(parse("0.1e1"), ExtendedBigDecimal::one());
assert_eq!(
parse("1.0e1"),
Number::Int(ExtendedBigInt::BigInt("10".parse::<BigInt>().unwrap()))
ExtendedBigDecimal::BigDecimal("10".parse::<BigDecimal>().unwrap())
);
}
#[test]
fn test_parse_hexadecimal_big_int() {
assert_eq!(parse("0x0"), Number::Int(ExtendedBigInt::zero()));
assert_eq!(parse("0x0"), ExtendedBigDecimal::zero());
assert_eq!(
parse("0x10"),
Number::Int(ExtendedBigInt::BigInt("16".parse::<BigInt>().unwrap()))
ExtendedBigDecimal::BigDecimal("16".parse::<BigDecimal>().unwrap())
);
}
@ -433,56 +420,34 @@ mod tests {
fn test_parse_big_decimal() {
assert_eq!(
parse("0.0"),
Number::Float(ExtendedBigDecimal::BigDecimal(
"0.0".parse::<BigDecimal>().unwrap()
))
ExtendedBigDecimal::BigDecimal("0.0".parse::<BigDecimal>().unwrap())
);
assert_eq!(
parse(".0"),
Number::Float(ExtendedBigDecimal::BigDecimal(
"0.0".parse::<BigDecimal>().unwrap()
))
ExtendedBigDecimal::BigDecimal("0.0".parse::<BigDecimal>().unwrap())
);
assert_eq!(
parse("1.0"),
Number::Float(ExtendedBigDecimal::BigDecimal(
"1.0".parse::<BigDecimal>().unwrap()
))
ExtendedBigDecimal::BigDecimal("1.0".parse::<BigDecimal>().unwrap())
);
assert_eq!(
parse("10e-1"),
Number::Float(ExtendedBigDecimal::BigDecimal(
"1.0".parse::<BigDecimal>().unwrap()
))
ExtendedBigDecimal::BigDecimal("1.0".parse::<BigDecimal>().unwrap())
);
assert_eq!(
parse("-1e-3"),
Number::Float(ExtendedBigDecimal::BigDecimal(
"-0.001".parse::<BigDecimal>().unwrap()
))
ExtendedBigDecimal::BigDecimal("-0.001".parse::<BigDecimal>().unwrap())
);
}
#[test]
fn test_parse_inf() {
assert_eq!(parse("inf"), Number::Float(ExtendedBigDecimal::Infinity));
assert_eq!(
parse("infinity"),
Number::Float(ExtendedBigDecimal::Infinity)
);
assert_eq!(parse("+inf"), Number::Float(ExtendedBigDecimal::Infinity));
assert_eq!(
parse("+infinity"),
Number::Float(ExtendedBigDecimal::Infinity)
);
assert_eq!(
parse("-inf"),
Number::Float(ExtendedBigDecimal::MinusInfinity)
);
assert_eq!(
parse("-infinity"),
Number::Float(ExtendedBigDecimal::MinusInfinity)
);
assert_eq!(parse("inf"), ExtendedBigDecimal::Infinity);
assert_eq!(parse("infinity"), ExtendedBigDecimal::Infinity);
assert_eq!(parse("+inf"), ExtendedBigDecimal::Infinity);
assert_eq!(parse("+infinity"), ExtendedBigDecimal::Infinity);
assert_eq!(parse("-inf"), ExtendedBigDecimal::MinusInfinity);
assert_eq!(parse("-infinity"), ExtendedBigDecimal::MinusInfinity);
}
#[test]

View file

@ -2,28 +2,22 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (ToDO) istr chiter argptr ilen extendedbigdecimal extendedbigint numberparse
// spell-checker:ignore (ToDO) extendedbigdecimal numberparse
use std::io::{stdout, ErrorKind, Write};
use std::process::exit;
use clap::{crate_version, Arg, ArgAction, Command};
use num_traits::Zero;
use num_traits::{ToPrimitive, Zero};
use uucore::error::FromIo;
use uucore::error::UResult;
use uucore::memo::printf;
use uucore::show;
use uucore::error::{FromIo, UResult};
use uucore::format::{num_format, Format};
use uucore::{format_usage, help_about, help_usage};
mod error;
mod extendedbigdecimal;
mod extendedbigint;
mod number;
mod numberparse;
use crate::error::SeqError;
use crate::extendedbigdecimal::ExtendedBigDecimal;
use crate::extendedbigint::ExtendedBigInt;
use crate::number::Number;
use crate::number::PreciseNumber;
const ABOUT: &str = help_about!("seq.md");
@ -44,11 +38,6 @@ struct SeqOptions<'a> {
format: Option<&'a str>,
}
/// A range of integers.
///
/// The elements are (first, increment, last).
type RangeInt = (ExtendedBigInt, ExtendedBigInt, ExtendedBigInt);
/// A range of floats.
///
/// The elements are (first, increment, last).
@ -119,32 +108,22 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
.num_fractional_digits
.max(increment.num_fractional_digits);
let result = match (first.number, increment.number, last.number) {
(Number::Int(first), Number::Int(increment), last) => {
let last = last.round_towards(&first);
print_seq_integers(
(first, increment, last),
&options.separator,
&options.terminator,
options.equal_width,
padding,
options.format,
)
let format = match options.format {
Some(f) => {
let f = Format::<num_format::Float>::parse(f)?;
Some(f)
}
(first, increment, last) => print_seq(
(
first.into_extended_big_decimal(),
increment.into_extended_big_decimal(),
last.into_extended_big_decimal(),
),
largest_dec,
&options.separator,
&options.terminator,
options.equal_width,
padding,
options.format,
),
None => None,
};
let result = print_seq(
(first.number, increment.number, last.number),
largest_dec,
&options.separator,
&options.terminator,
options.equal_width,
padding,
&format,
);
match result {
Ok(_) => Ok(()),
Err(err) if err.kind() == ErrorKind::BrokenPipe => Ok(()),
@ -216,28 +195,6 @@ fn write_value_float(
write!(writer, "{value_as_str}")
}
/// Write a big int formatted according to the given parameters.
fn write_value_int(
writer: &mut impl Write,
value: &ExtendedBigInt,
width: usize,
pad: bool,
) -> std::io::Result<()> {
let value_as_str = if pad {
if *value == ExtendedBigInt::MinusZero {
format!("{value:0<width$}")
} else {
format!("{value:>0width$}")
}
} else {
format!("{value}")
};
write!(writer, "{value_as_str}")
}
// TODO `print_seq()` and `print_seq_integers()` are nearly identical,
// they could be refactored into a single more general function.
/// Floating point based code path
fn print_seq(
range: RangeFloat,
@ -246,13 +203,17 @@ fn print_seq(
terminator: &str,
pad: bool,
padding: usize,
format: Option<&str>,
format: &Option<Format<num_format::Float>>,
) -> std::io::Result<()> {
let stdout = stdout();
let mut stdout = stdout.lock();
let (first, increment, last) = range;
let mut value = first;
let padding = if pad { padding + 1 + largest_dec } else { 0 };
let padding = if pad {
padding + if largest_dec > 0 { largest_dec + 1 } else { 0 }
} else {
0
};
let mut is_first_iteration = true;
while !done_printing(&value, &increment, &last) {
if !is_first_iteration {
@ -270,13 +231,16 @@ fn print_seq(
// it as a string and ultimately writing to `stdout`. We
// shouldn't have to do so much converting back and forth via
// strings.
match format {
match &format {
Some(f) => {
let s = format!("{value}");
if let Err(x) = printf(f, &[s]) {
show!(x);
exit(1);
}
let float = match &value {
ExtendedBigDecimal::BigDecimal(bd) => bd.to_f64().unwrap(),
ExtendedBigDecimal::Infinity => f64::INFINITY,
ExtendedBigDecimal::MinusInfinity => f64::NEG_INFINITY,
ExtendedBigDecimal::MinusZero => -0.0,
ExtendedBigDecimal::Nan => f64::NAN,
};
f.fmt(&mut stdout, float)?;
}
None => write_value_float(&mut stdout, &value, padding, largest_dec)?,
}
@ -290,62 +254,3 @@ fn print_seq(
stdout.flush()?;
Ok(())
}
/// Print an integer sequence.
///
/// This function prints a sequence of integers defined by `range`,
/// which defines the first integer, last integer, and increment of the
/// range. The `separator` is inserted between each integer and
/// `terminator` is inserted at the end.
///
/// The `pad` parameter indicates whether to pad numbers to the width
/// given in `padding`.
///
/// If `is_first_minus_zero` is `true`, then the `first` parameter is
/// printed as if it were negative zero, even though no such number
/// exists as an integer (negative zero only exists for floating point
/// numbers). Only set this to `true` if `first` is actually zero.
fn print_seq_integers(
range: RangeInt,
separator: &str,
terminator: &str,
pad: bool,
padding: usize,
format: Option<&str>,
) -> std::io::Result<()> {
let stdout = stdout();
let mut stdout = stdout.lock();
let (first, increment, last) = range;
let mut value = first;
let mut is_first_iteration = true;
while !done_printing(&value, &increment, &last) {
if !is_first_iteration {
write!(stdout, "{separator}")?;
}
// If there was an argument `-f FORMAT`, then use that format
// template instead of the default formatting strategy.
//
// The `printf()` function takes in the template and
// the current value and writes the result to `stdout`.
//
// TODO See similar comment about formatting in `print_seq()`.
match format {
Some(f) => {
let s = format!("{value}");
if let Err(x) = printf(f, &[s]) {
show!(x);
exit(1);
}
}
None => write_value_int(&mut stdout, &value, padding, pad)?,
}
// TODO Implement augmenting addition.
value = value + increment.clone();
is_first_iteration = false;
}
if !is_first_iteration {
write!(stdout, "{terminator}")?;
}
Ok(())
}

View file

@ -77,7 +77,7 @@ entries = ["libc"]
fs = ["dunce", "libc", "winapi-util", "windows-sys"]
fsext = ["libc", "time", "windows-sys"]
lines = []
memo = ["itertools"]
format = ["itertools"]
mode = ["libc"]
perms = ["libc", "walkdir"]
pipes = []

View file

@ -8,14 +8,14 @@
pub mod backup_control;
#[cfg(feature = "encoding")]
pub mod encoding;
#[cfg(feature = "format")]
pub mod format;
#[cfg(feature = "fs")]
pub mod fs;
#[cfg(feature = "fsext")]
pub mod fsext;
#[cfg(feature = "lines")]
pub mod lines;
#[cfg(feature = "memo")]
pub mod memo;
#[cfg(feature = "quoting-style")]
pub mod quoting_style;
#[cfg(feature = "ranges")]
@ -24,8 +24,6 @@ pub mod ranges;
pub mod ringbuffer;
#[cfg(feature = "sum")]
pub mod sum;
#[cfg(feature = "memo")]
mod tokenize;
#[cfg(feature = "update-control")]
pub mod update_control;
#[cfg(feature = "version-cmp")]

View file

@ -0,0 +1,152 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use os_display::Quotable;
use crate::{error::set_exit_code, show_warning};
/// An argument for formatting
///
/// Each of these variants is only accepted by their respective directives. For
/// example, [`FormatArgument::Char`] requires a `%c` directive.
///
/// The [`FormatArgument::Unparsed`] variant contains a string that can be
/// parsed into other types. This is used by the `printf` utility.
#[derive(Clone, Debug)]
pub enum FormatArgument {
Char(char),
String(String),
UnsignedInt(u64),
SignedInt(i64),
Float(f64),
/// Special argument that gets coerced into the other variants
Unparsed(String),
}
pub trait ArgumentIter<'a>: Iterator<Item = &'a FormatArgument> {
fn get_char(&mut self) -> char;
fn get_i64(&mut self) -> i64;
fn get_u64(&mut self) -> u64;
fn get_f64(&mut self) -> f64;
fn get_str(&mut self) -> &'a str;
}
impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
fn get_char(&mut self) -> char {
let Some(next) = self.next() else {
return '\0';
};
match next {
FormatArgument::Char(c) => *c,
FormatArgument::Unparsed(s) => {
let mut chars = s.chars();
let Some(c) = chars.next() else {
return '\0';
};
let None = chars.next() else {
return '\0';
};
c
}
_ => '\0',
}
}
fn get_u64(&mut self) -> u64 {
let Some(next) = self.next() else {
return 0;
};
match next {
FormatArgument::UnsignedInt(n) => *n,
FormatArgument::Unparsed(s) => {
let opt = if let Some(s) = s.strip_prefix("0x") {
u64::from_str_radix(s, 16).ok()
} else if let Some(s) = s.strip_prefix('0') {
u64::from_str_radix(s, 8).ok()
} else if let Some(s) = s.strip_prefix('\'') {
s.chars().next().map(|c| c as u64)
} else {
s.parse().ok()
};
match opt {
Some(n) => n,
None => {
show_warning!("{}: expected a numeric value", s.quote());
set_exit_code(1);
0
}
}
}
_ => 0,
}
}
fn get_i64(&mut self) -> i64 {
let Some(next) = self.next() else {
return 0;
};
match next {
FormatArgument::SignedInt(n) => *n,
FormatArgument::Unparsed(s) => {
// For hex, we parse `u64` because we do not allow another
// minus sign. We might need to do more precise parsing here.
let opt = if let Some(s) = s.strip_prefix("-0x") {
u64::from_str_radix(s, 16).ok().map(|x| -(x as i64))
} else if let Some(s) = s.strip_prefix("0x") {
u64::from_str_radix(s, 16).ok().map(|x| x as i64)
} else if s.starts_with("-0") || s.starts_with('0') {
i64::from_str_radix(s, 8).ok()
} else if let Some(s) = s.strip_prefix('\'') {
s.chars().next().map(|x| x as i64)
} else {
s.parse().ok()
};
match opt {
Some(n) => n,
None => {
show_warning!("{}: expected a numeric value", s.quote());
set_exit_code(1);
0
}
}
}
_ => 0,
}
}
fn get_f64(&mut self) -> f64 {
let Some(next) = self.next() else {
return 0.0;
};
match next {
FormatArgument::Float(n) => *n,
FormatArgument::Unparsed(s) => {
let opt = if s.starts_with("0x") || s.starts_with("-0x") {
unimplemented!("Hexadecimal floats are unimplemented!")
} else if let Some(s) = s.strip_prefix('\'') {
s.chars().next().map(|x| x as u64 as f64)
} else {
s.parse().ok()
};
match opt {
Some(n) => n,
None => {
show_warning!("{}: expected a numeric value", s.quote());
set_exit_code(1);
0.0
}
}
}
_ => 0.0,
}
}
fn get_str(&mut self) -> &'a str {
match self.next() {
Some(FormatArgument::Unparsed(s) | FormatArgument::String(s)) => s,
_ => "",
}
}
}

View file

@ -0,0 +1,135 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//! Parsing of escape sequences
#[derive(Debug)]
pub enum EscapedChar {
/// A single byte
Byte(u8),
/// A unicode character
Char(char),
/// A character prefixed with a backslash (i.e. an invalid escape sequence)
Backslash(u8),
/// Specifies that the string should stop (`\c`)
End,
}
#[repr(u8)]
#[derive(Clone, Copy)]
enum Base {
Oct = 8,
Hex = 16,
}
impl Base {
fn max_digits(&self) -> u8 {
match self {
Self::Oct => 3,
Self::Hex => 2,
}
}
fn convert_digit(&self, c: u8) -> Option<u8> {
match self {
Self::Oct => {
if matches!(c, b'0'..=b'7') {
Some(c - b'0')
} else {
None
}
}
Self::Hex => match c {
b'0'..=b'9' => Some(c - b'0'),
b'A'..=b'F' => Some(c - b'A' + 10),
b'a'..=b'f' => Some(c - b'a' + 10),
_ => None,
},
}
}
}
/// Parse the numeric part of the `\xHHH` and `\0NNN` escape sequences
fn parse_code(input: &mut &[u8], base: Base) -> Option<u8> {
// All arithmetic on `ret` needs to be wrapping, because octal input can
// take 3 digits, which is 9 bits, and therefore more than what fits in a
// `u8`. GNU just seems to wrap these values.
// Note that if we instead make `ret` a `u32` and use `char::from_u32` will
// yield incorrect results because it will interpret values larger than
// `u8::MAX` as unicode.
let [c, rest @ ..] = input else { return None };
let mut ret = base.convert_digit(*c)?;
*input = rest;
for _ in 1..base.max_digits() {
let [c, rest @ ..] = input else { break };
let Some(n) = base.convert_digit(*c) else {
break;
};
ret = ret.wrapping_mul(base as u8).wrapping_add(n);
*input = rest;
}
Some(ret)
}
// spell-checker:disable-next
/// Parse `\uHHHH` and `\UHHHHHHHH`
// TODO: This should print warnings and possibly halt execution when it fails to parse
// TODO: If the character cannot be converted to u32, the input should be printed.
fn parse_unicode(input: &mut &[u8], digits: u8) -> Option<char> {
let (c, rest) = input.split_first()?;
let mut ret = Base::Hex.convert_digit(*c)? as u32;
*input = rest;
for _ in 1..digits {
let (c, rest) = input.split_first()?;
let n = Base::Hex.convert_digit(*c)?;
ret = ret.wrapping_mul(Base::Hex as u32).wrapping_add(n as u32);
*input = rest;
}
char::from_u32(ret)
}
pub fn parse_escape_code(rest: &mut &[u8]) -> EscapedChar {
if let [c, new_rest @ ..] = rest {
// This is for the \NNN syntax for octal sequences.
// Note that '0' is intentionally omitted because that
// would be the \0NNN syntax.
if let b'1'..=b'7' = c {
if let Some(parsed) = parse_code(rest, Base::Oct) {
return EscapedChar::Byte(parsed);
}
}
*rest = new_rest;
match c {
b'\\' => EscapedChar::Byte(b'\\'),
b'a' => EscapedChar::Byte(b'\x07'),
b'b' => EscapedChar::Byte(b'\x08'),
b'c' => EscapedChar::End,
b'e' => EscapedChar::Byte(b'\x1b'),
b'f' => EscapedChar::Byte(b'\x0c'),
b'n' => EscapedChar::Byte(b'\n'),
b'r' => EscapedChar::Byte(b'\r'),
b't' => EscapedChar::Byte(b'\t'),
b'v' => EscapedChar::Byte(b'\x0b'),
b'x' => {
if let Some(c) = parse_code(rest, Base::Hex) {
EscapedChar::Byte(c)
} else {
EscapedChar::Backslash(b'x')
}
}
b'0' => EscapedChar::Byte(parse_code(rest, Base::Oct).unwrap_or(b'\0')),
b'u' => EscapedChar::Char(parse_unicode(rest, 4).unwrap_or('\0')),
b'U' => EscapedChar::Char(parse_unicode(rest, 8).unwrap_or('\0')),
c => EscapedChar::Backslash(*c),
}
} else {
EscapedChar::Byte(b'\\')
}
}

View file

@ -0,0 +1,334 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//! `printf`-style formatting
//!
//! Rust has excellent formatting capabilities, but the coreutils require very
//! specific formatting that needs to work exactly like the GNU utilities.
//! Naturally, the GNU behavior is based on the C `printf` functionality.
//!
//! Additionally, we need support for escape sequences for the `printf` utility.
//!
//! The [`printf`] and [`sprintf`] functions closely match the behavior of the
//! corresponding C functions: the former renders a formatted string
//! to stdout, the latter renders to a new [`String`] object.
//!
//! There are three kinds of parsing that we might want to do:
//!
//! 1. Parse only `printf` directives (for e.g. `seq`, `dd`)
//! 2. Parse only escape sequences (for e.g. `echo`)
//! 3. Parse both `printf` specifiers and escape sequences (for e.g. `printf`)
//!
//! This module aims to combine all three use cases. An iterator parsing each
//! of these cases is provided by [`parse_escape_only`], [`parse_spec_only`]
//! and [`parse_spec_and_escape`], respectively.
//!
//! There is a special [`Format`] type, which can be used to parse a format
//! string containing exactly one directive and does not use any `*` in that
//! directive. This format can be printed in a type-safe manner without failing
//! (modulo IO errors).
mod argument;
mod escape;
pub mod num_format;
mod spec;
pub use argument::*;
use spec::Spec;
use std::{
error::Error,
fmt::Display,
io::{stdout, Write},
ops::ControlFlow,
};
use crate::error::UError;
use self::{
escape::{parse_escape_code, EscapedChar},
num_format::Formatter,
};
#[derive(Debug)]
pub enum FormatError {
SpecError(Vec<u8>),
IoError(std::io::Error),
NoMoreArguments,
InvalidArgument(FormatArgument),
TooManySpecs,
NeedAtLeastOneSpec,
WrongSpecType,
}
impl Error for FormatError {}
impl UError for FormatError {}
impl From<std::io::Error> for FormatError {
fn from(value: std::io::Error) -> Self {
Self::IoError(value)
}
}
impl Display for FormatError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::SpecError(s) => write!(
f,
"%{}: invalid conversion specification",
String::from_utf8_lossy(s)
),
// TODO: The next two should print the spec as well
Self::TooManySpecs => write!(f, "format has too many % directives"),
Self::NeedAtLeastOneSpec => write!(f, "format has no % directive"),
// TODO: Error message below needs some work
Self::WrongSpecType => write!(f, "wrong % directive type was given"),
Self::IoError(_) => write!(f, "io error"),
Self::NoMoreArguments => write!(f, "no more arguments"),
Self::InvalidArgument(_) => write!(f, "invalid argument"),
}
}
}
/// A single item to format
pub enum FormatItem<C: FormatChar> {
/// A format specifier
Spec(Spec),
/// A single character
Char(C),
}
pub trait FormatChar {
fn write(&self, writer: impl Write) -> std::io::Result<ControlFlow<()>>;
}
impl FormatChar for u8 {
fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
writer.write_all(&[*self])?;
Ok(ControlFlow::Continue(()))
}
}
impl FormatChar for EscapedChar {
fn write(&self, mut writer: impl Write) -> std::io::Result<ControlFlow<()>> {
match self {
Self::Byte(c) => {
writer.write_all(&[*c])?;
}
Self::Char(c) => {
write!(writer, "{c}")?;
}
Self::Backslash(c) => {
writer.write_all(&[b'\\', *c])?;
}
Self::End => return Ok(ControlFlow::Break(())),
}
Ok(ControlFlow::Continue(()))
}
}
impl<C: FormatChar> FormatItem<C> {
pub fn write<'a>(
&self,
writer: impl Write,
args: &mut impl Iterator<Item = &'a FormatArgument>,
) -> Result<ControlFlow<()>, FormatError> {
match self {
Self::Spec(spec) => spec.write(writer, args)?,
Self::Char(c) => return c.write(writer).map_err(FormatError::IoError),
};
Ok(ControlFlow::Continue(()))
}
}
/// Parse a format string containing % directives and escape sequences
pub fn parse_spec_and_escape(
fmt: &[u8],
) -> impl Iterator<Item = Result<FormatItem<EscapedChar>, FormatError>> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => None,
[b'%', b'%', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(EscapedChar::Byte(b'%'))))
}
[b'%', rest @ ..] => {
current = rest;
let spec = match Spec::parse(&mut current) {
Ok(spec) => spec,
Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))),
};
Some(Ok(FormatItem::Spec(spec)))
}
[b'\\', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(parse_escape_code(&mut current))))
}
[c, rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(EscapedChar::Byte(*c))))
}
})
}
/// Parse a format string containing % directives
pub fn parse_spec_only(
fmt: &[u8],
) -> impl Iterator<Item = Result<FormatItem<u8>, FormatError>> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => None,
[b'%', b'%', rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(b'%')))
}
[b'%', rest @ ..] => {
current = rest;
let spec = match Spec::parse(&mut current) {
Ok(spec) => spec,
Err(slice) => return Some(Err(FormatError::SpecError(slice.to_vec()))),
};
Some(Ok(FormatItem::Spec(spec)))
}
[c, rest @ ..] => {
current = rest;
Some(Ok(FormatItem::Char(*c)))
}
})
}
/// Parse a format string containing escape sequences
pub fn parse_escape_only(fmt: &[u8]) -> impl Iterator<Item = EscapedChar> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => None,
[b'\\', rest @ ..] => {
current = rest;
Some(parse_escape_code(&mut current))
}
[c, rest @ ..] => {
current = rest;
Some(EscapedChar::Byte(*c))
}
})
}
/// Write a formatted string to stdout.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`sprintf`], which creates a new formatted [`String`].
///
/// # Examples
///
/// ```rust
/// use uucore::format::{printf, FormatArgument};
///
/// printf("hello %s", &[FormatArgument::String("world".into())]).unwrap();
/// // prints "hello world"
/// ```
pub fn printf<'a>(
format_string: impl AsRef<[u8]>,
arguments: impl IntoIterator<Item = &'a FormatArgument>,
) -> Result<(), FormatError> {
printf_writer(stdout(), format_string, arguments)
}
fn printf_writer<'a>(
mut writer: impl Write,
format_string: impl AsRef<[u8]>,
args: impl IntoIterator<Item = &'a FormatArgument>,
) -> Result<(), FormatError> {
let mut args = args.into_iter();
for item in parse_spec_only(format_string.as_ref()) {
item?.write(&mut writer, &mut args)?;
}
Ok(())
}
/// Create a new formatted string.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`printf`], which prints to stdout.
///
/// # Examples
///
/// ```rust
/// use uucore::format::{sprintf, FormatArgument};
///
/// let s = sprintf("hello %s", &[FormatArgument::String("world".into())]).unwrap();
/// let s = std::str::from_utf8(&s).unwrap();
/// assert_eq!(s, "hello world");
/// ```
pub fn sprintf<'a>(
format_string: impl AsRef<[u8]>,
arguments: impl IntoIterator<Item = &'a FormatArgument>,
) -> Result<Vec<u8>, FormatError> {
let mut writer = Vec::new();
printf_writer(&mut writer, format_string, arguments)?;
Ok(writer)
}
/// A parsed format for a single float value
///
/// This is used by `seq`. It can be constructed with [`Format::parse`]
/// and can write a value with [`Format::fmt`].
///
/// It can only accept a single specification without any asterisk parameters.
/// If it does get more specifications, it will return an error.
pub struct Format<F: Formatter> {
prefix: Vec<u8>,
suffix: Vec<u8>,
formatter: F,
}
impl<F: Formatter> Format<F> {
pub fn parse(format_string: impl AsRef<[u8]>) -> Result<Self, FormatError> {
let mut iter = parse_spec_only(format_string.as_ref());
let mut prefix = Vec::new();
let mut spec = None;
for item in &mut iter {
match item? {
FormatItem::Spec(s) => {
spec = Some(s);
break;
}
FormatItem::Char(c) => prefix.push(c),
}
}
let Some(spec) = spec else {
return Err(FormatError::NeedAtLeastOneSpec);
};
let formatter = F::try_from_spec(spec)?;
let mut suffix = Vec::new();
for item in &mut iter {
match item? {
FormatItem::Spec(_) => {
return Err(FormatError::TooManySpecs);
}
FormatItem::Char(c) => suffix.push(c),
}
}
Ok(Self {
prefix,
suffix,
formatter,
})
}
pub fn fmt(&self, mut w: impl Write, f: F::Input) -> std::io::Result<()> {
w.write_all(&self.prefix)?;
self.formatter.fmt(&mut w, f)?;
w.write_all(&self.suffix)?;
Ok(())
}
}

View file

@ -0,0 +1,577 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//! Utilities for formatting numbers in various formats
use std::io::Write;
use super::{
spec::{CanAsterisk, Spec},
FormatError,
};
pub trait Formatter {
type Input;
fn fmt(&self, writer: impl Write, x: Self::Input) -> std::io::Result<()>;
fn try_from_spec(s: Spec) -> Result<Self, FormatError>
where
Self: Sized;
}
#[derive(Clone, Copy, Debug)]
pub enum UnsignedIntVariant {
Decimal,
Octal(Prefix),
Hexadecimal(Case, Prefix),
}
#[derive(Clone, Copy, Debug)]
pub enum FloatVariant {
Decimal,
Scientific,
Shortest,
Hexadecimal,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Case {
Lowercase,
Uppercase,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Prefix {
No,
Yes,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ForceDecimal {
No,
Yes,
}
#[derive(Clone, Copy, Debug)]
pub enum PositiveSign {
None,
Plus,
Space,
}
#[derive(Clone, Copy, Debug)]
pub enum NumberAlignment {
Left,
RightSpace,
RightZero,
}
pub struct SignedInt {
pub width: usize,
pub precision: usize,
pub positive_sign: PositiveSign,
pub alignment: NumberAlignment,
}
impl Formatter for SignedInt {
type Input = i64;
fn fmt(&self, mut writer: impl Write, x: Self::Input) -> std::io::Result<()> {
if x >= 0 {
match self.positive_sign {
PositiveSign::None => Ok(()),
PositiveSign::Plus => write!(writer, "+"),
PositiveSign::Space => write!(writer, " "),
}?;
}
let s = format!("{:0width$}", x, width = self.precision);
match self.alignment {
NumberAlignment::Left => write!(writer, "{s:<width$}", width = self.width),
NumberAlignment::RightSpace => write!(writer, "{s:>width$}", width = self.width),
NumberAlignment::RightZero => write!(writer, "{s:0>width$}", width = self.width),
}
}
fn try_from_spec(s: Spec) -> Result<Self, FormatError> {
let Spec::SignedInt {
width,
precision,
positive_sign,
alignment,
} = s
else {
return Err(FormatError::WrongSpecType);
};
let width = match width {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
let precision = match precision {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
Ok(Self {
width,
precision,
positive_sign,
alignment,
})
}
}
pub struct UnsignedInt {
pub variant: UnsignedIntVariant,
pub width: usize,
pub precision: usize,
pub alignment: NumberAlignment,
}
impl Formatter for UnsignedInt {
type Input = u64;
fn fmt(&self, mut writer: impl Write, x: Self::Input) -> std::io::Result<()> {
let mut s = match self.variant {
UnsignedIntVariant::Decimal => format!("{x}"),
UnsignedIntVariant::Octal(Prefix::No) => format!("{x:o}"),
UnsignedIntVariant::Octal(Prefix::Yes) => format!("{x:#o}"),
UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::No) => {
format!("{x:x}")
}
UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::Yes) => {
format!("{x:#x}")
}
UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::No) => {
format!("{x:X}")
}
UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::Yes) => {
format!("{x:#X}")
}
};
if self.precision > s.len() {
s = format!("{:0width$}", s, width = self.precision);
}
match self.alignment {
NumberAlignment::Left => write!(writer, "{s:<width$}", width = self.width),
NumberAlignment::RightSpace => write!(writer, "{s:>width$}", width = self.width),
NumberAlignment::RightZero => write!(writer, "{s:0>width$}", width = self.width),
}
}
fn try_from_spec(s: Spec) -> Result<Self, FormatError> {
let Spec::UnsignedInt {
variant,
width,
precision,
alignment,
} = s
else {
return Err(FormatError::WrongSpecType);
};
let width = match width {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
let precision = match precision {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
Ok(Self {
width,
precision,
variant,
alignment,
})
}
}
pub struct Float {
pub variant: FloatVariant,
pub case: Case,
pub force_decimal: ForceDecimal,
pub width: usize,
pub positive_sign: PositiveSign,
pub alignment: NumberAlignment,
pub precision: usize,
}
impl Default for Float {
fn default() -> Self {
Self {
variant: FloatVariant::Decimal,
case: Case::Lowercase,
force_decimal: ForceDecimal::No,
width: 0,
positive_sign: PositiveSign::None,
alignment: NumberAlignment::Left,
precision: 6,
}
}
}
impl Formatter for Float {
type Input = f64;
fn fmt(&self, mut writer: impl Write, x: Self::Input) -> std::io::Result<()> {
if x.is_sign_positive() {
match self.positive_sign {
PositiveSign::None => Ok(()),
PositiveSign::Plus => write!(writer, "+"),
PositiveSign::Space => write!(writer, " "),
}?;
}
let s = if x.is_finite() {
match self.variant {
FloatVariant::Decimal => {
format_float_decimal(x, self.precision, self.force_decimal)
}
FloatVariant::Scientific => {
format_float_scientific(x, self.precision, self.case, self.force_decimal)
}
FloatVariant::Shortest => {
format_float_shortest(x, self.precision, self.case, self.force_decimal)
}
FloatVariant::Hexadecimal => {
format_float_hexadecimal(x, self.precision, self.case, self.force_decimal)
}
}
} else {
format_float_non_finite(x, self.case)
};
match self.alignment {
NumberAlignment::Left => write!(writer, "{s:<width$}", width = self.width),
NumberAlignment::RightSpace => write!(writer, "{s:>width$}", width = self.width),
NumberAlignment::RightZero => write!(writer, "{s:0>width$}", width = self.width),
}
}
fn try_from_spec(s: Spec) -> Result<Self, FormatError>
where
Self: Sized,
{
let Spec::Float {
variant,
case,
force_decimal,
width,
positive_sign,
alignment,
precision,
} = s
else {
return Err(FormatError::WrongSpecType);
};
let width = match width {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
let precision = match precision {
Some(CanAsterisk::Fixed(x)) => x,
None => 0,
Some(CanAsterisk::Asterisk) => return Err(FormatError::WrongSpecType),
};
Ok(Self {
variant,
case,
force_decimal,
width,
positive_sign,
alignment,
precision,
})
}
}
fn format_float_non_finite(f: f64, case: Case) -> String {
debug_assert!(!f.is_finite());
let mut s = format!("{f}");
if case == Case::Uppercase {
s.make_ascii_uppercase();
}
s
}
fn format_float_decimal(f: f64, precision: usize, force_decimal: ForceDecimal) -> String {
if precision == 0 && force_decimal == ForceDecimal::Yes {
format!("{f:.0}.")
} else {
format!("{f:.*}", precision)
}
}
fn format_float_scientific(
f: f64,
precision: usize,
case: Case,
force_decimal: ForceDecimal,
) -> String {
if f == 0.0 {
return if force_decimal == ForceDecimal::Yes && precision == 0 {
"0.e+00".into()
} else {
format!("{:.*}e+00", precision, 0.0)
};
}
let mut exponent: i32 = f.log10().floor() as i32;
let mut normalized = f / 10.0_f64.powi(exponent);
// If the normalized value will be rounded to a value greater than 10
// we need to correct.
if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32) >= 10.0
{
normalized /= 10.0;
exponent += 1;
}
let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal {
"."
} else {
""
};
let exp_char = match case {
Case::Lowercase => 'e',
Case::Uppercase => 'E',
};
format!(
"{normalized:.*}{additional_dot}{exp_char}{exponent:+03}",
precision
)
}
fn format_float_shortest(
f: f64,
precision: usize,
case: Case,
force_decimal: ForceDecimal,
) -> String {
// Precision here is about how many digits should be displayed
// instead of how many digits for the fractional part, this means that if
// we pass this to rust's format string, it's always gonna be one less.
let precision = precision.saturating_sub(1);
if f == 0.0 {
return match (force_decimal, precision) {
(ForceDecimal::Yes, 0) => "0.".into(),
(ForceDecimal::Yes, _) => {
format!("{:.*}", precision, 0.0)
}
(ForceDecimal::No, _) => "0".into(),
};
}
let mut exponent = f.log10().floor() as i32;
if f != 0.0 && exponent <= -4 || exponent > precision as i32 {
// Scientific-ish notation (with a few differences)
let mut normalized = f / 10.0_f64.powi(exponent);
// If the normalized value will be rounded to a value greater than 10
// we need to correct.
if (normalized * 10_f64.powi(precision as i32)).round() / 10_f64.powi(precision as i32)
>= 10.0
{
normalized /= 10.0;
exponent += 1;
}
let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal {
"."
} else {
""
};
let mut normalized = format!("{normalized:.*}", precision);
if force_decimal == ForceDecimal::No {
strip_zeros_and_dot(&mut normalized);
}
let exp_char = match case {
Case::Lowercase => 'e',
Case::Uppercase => 'E',
};
format!("{normalized}{additional_dot}{exp_char}{exponent:+03}")
} else {
// Decimal-ish notation with a few differences:
// - The precision works differently and specifies the total number
// of digits instead of the digits in the fractional part.
// - If we don't force the decimal, '0' and `.` are trimmed.
let decimal_places = (precision as i32).saturating_sub(exponent) as usize;
let mut formatted = if decimal_places == 0 && force_decimal == ForceDecimal::Yes {
format!("{f:.0}.")
} else {
format!("{f:.*}", decimal_places)
};
if force_decimal == ForceDecimal::No {
strip_zeros_and_dot(&mut formatted);
}
formatted
}
}
fn format_float_hexadecimal(
f: f64,
precision: usize,
case: Case,
force_decimal: ForceDecimal,
) -> String {
let (first_digit, mantissa, exponent) = if f == 0.0 {
(0, 0, 0)
} else {
let bits = f.to_bits();
let exponent_bits = ((bits >> 52) & 0x7fff) as i64;
let exponent = exponent_bits - 1023;
let mantissa = bits & 0xf_ffff_ffff_ffff;
(1, mantissa, exponent)
};
let mut s = match (precision, force_decimal) {
(0, ForceDecimal::No) => format!("0x{first_digit}p{exponent:+x}"),
(0, ForceDecimal::Yes) => format!("0x{first_digit}.p{exponent:+x}"),
_ => format!("0x{first_digit}.{mantissa:0>13x}p{exponent:+x}"),
};
if case == Case::Uppercase {
s.make_ascii_uppercase();
}
s
}
fn strip_zeros_and_dot(s: &mut String) {
while s.ends_with('0') {
s.pop();
}
if s.ends_with('.') {
s.pop();
}
}
#[cfg(test)]
mod test {
use crate::format::num_format::{Case, ForceDecimal};
#[test]
fn decimal_float() {
use super::format_float_decimal;
let f = |x| format_float_decimal(x, 6, ForceDecimal::No);
assert_eq!(f(0.0), "0.000000");
assert_eq!(f(1.0), "1.000000");
assert_eq!(f(100.0), "100.000000");
assert_eq!(f(123456.789), "123456.789000");
assert_eq!(f(12.3456789), "12.345679");
assert_eq!(f(1000000.0), "1000000.000000");
assert_eq!(f(99999999.0), "99999999.000000");
assert_eq!(f(1.9999995), "1.999999");
assert_eq!(f(1.9999996), "2.000000");
}
#[test]
fn scientific_float() {
use super::format_float_scientific;
let f = |x| format_float_scientific(x, 6, Case::Lowercase, ForceDecimal::No);
assert_eq!(f(0.0), "0.000000e+00");
assert_eq!(f(1.0), "1.000000e+00");
assert_eq!(f(100.0), "1.000000e+02");
assert_eq!(f(123456.789), "1.234568e+05");
assert_eq!(f(12.3456789), "1.234568e+01");
assert_eq!(f(1000000.0), "1.000000e+06");
assert_eq!(f(99999999.0), "1.000000e+08");
}
#[test]
fn scientific_float_zero_precision() {
use super::format_float_scientific;
let f = |x| format_float_scientific(x, 0, Case::Lowercase, ForceDecimal::No);
assert_eq!(f(0.0), "0e+00");
assert_eq!(f(1.0), "1e+00");
assert_eq!(f(100.0), "1e+02");
assert_eq!(f(123456.789), "1e+05");
assert_eq!(f(12.3456789), "1e+01");
assert_eq!(f(1000000.0), "1e+06");
assert_eq!(f(99999999.0), "1e+08");
let f = |x| format_float_scientific(x, 0, Case::Lowercase, ForceDecimal::Yes);
assert_eq!(f(0.0), "0.e+00");
assert_eq!(f(1.0), "1.e+00");
assert_eq!(f(100.0), "1.e+02");
assert_eq!(f(123456.789), "1.e+05");
assert_eq!(f(12.3456789), "1.e+01");
assert_eq!(f(1000000.0), "1.e+06");
assert_eq!(f(99999999.0), "1.e+08");
}
#[test]
fn shortest_float() {
use super::format_float_shortest;
let f = |x| format_float_shortest(x, 6, Case::Lowercase, ForceDecimal::No);
assert_eq!(f(0.0), "0");
assert_eq!(f(1.0), "1");
assert_eq!(f(100.0), "100");
assert_eq!(f(123456.789), "123457");
assert_eq!(f(12.3456789), "12.3457");
assert_eq!(f(1000000.0), "1e+06");
assert_eq!(f(99999999.0), "1e+08");
}
#[test]
fn shortest_float_force_decimal() {
use super::format_float_shortest;
let f = |x| format_float_shortest(x, 6, Case::Lowercase, ForceDecimal::Yes);
assert_eq!(f(0.0), "0.00000");
assert_eq!(f(1.0), "1.00000");
assert_eq!(f(100.0), "100.000");
assert_eq!(f(123456.789), "123457.");
assert_eq!(f(12.3456789), "12.3457");
assert_eq!(f(1000000.0), "1.00000e+06");
assert_eq!(f(99999999.0), "1.00000e+08");
}
#[test]
fn shortest_float_force_decimal_zero_precision() {
use super::format_float_shortest;
let f = |x| format_float_shortest(x, 0, Case::Lowercase, ForceDecimal::No);
assert_eq!(f(0.0), "0");
assert_eq!(f(1.0), "1");
assert_eq!(f(100.0), "1e+02");
assert_eq!(f(123456.789), "1e+05");
assert_eq!(f(12.3456789), "1e+01");
assert_eq!(f(1000000.0), "1e+06");
assert_eq!(f(99999999.0), "1e+08");
let f = |x| format_float_shortest(x, 0, Case::Lowercase, ForceDecimal::Yes);
assert_eq!(f(0.0), "0.");
assert_eq!(f(1.0), "1.");
assert_eq!(f(100.0), "1.e+02");
assert_eq!(f(123456.789), "1.e+05");
assert_eq!(f(12.3456789), "1.e+01");
assert_eq!(f(1000000.0), "1.e+06");
assert_eq!(f(99999999.0), "1.e+08");
}
}

View file

@ -0,0 +1,462 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (vars) intmax ptrdiff
use crate::quoting_style::{escape_name, QuotingStyle};
use super::{
num_format::{
self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix,
UnsignedIntVariant,
},
parse_escape_only, ArgumentIter, FormatChar, FormatError,
};
use std::{fmt::Display, io::Write, ops::ControlFlow};
/// A parsed specification for formatting a value
///
/// This might require more than one argument to resolve width or precision
/// values that are given as `*`.
#[derive(Debug)]
pub enum Spec {
Char {
width: Option<CanAsterisk<usize>>,
align_left: bool,
},
String {
precision: Option<CanAsterisk<usize>>,
width: Option<CanAsterisk<usize>>,
align_left: bool,
},
EscapedString,
QuotedString,
SignedInt {
width: Option<CanAsterisk<usize>>,
precision: Option<CanAsterisk<usize>>,
positive_sign: PositiveSign,
alignment: NumberAlignment,
},
UnsignedInt {
variant: UnsignedIntVariant,
width: Option<CanAsterisk<usize>>,
precision: Option<CanAsterisk<usize>>,
alignment: NumberAlignment,
},
Float {
variant: FloatVariant,
case: Case,
force_decimal: ForceDecimal,
width: Option<CanAsterisk<usize>>,
positive_sign: PositiveSign,
alignment: NumberAlignment,
precision: Option<CanAsterisk<usize>>,
},
}
/// Precision and width specified might use an asterisk to indicate that they are
/// determined by an argument.
#[derive(Clone, Copy, Debug)]
pub enum CanAsterisk<T> {
Fixed(T),
Asterisk,
}
/// Size of the expected type (ignored)
///
/// We ignore this parameter entirely, but we do parse it.
/// It could be used in the future if the need arises.
enum Length {
/// signed/unsigned char ("hh")
Char,
/// signed/unsigned short int ("h")
Short,
/// signed/unsigned long int ("l")
Long,
/// signed/unsigned long long int ("ll")
LongLong,
/// intmax_t ("j")
IntMaxT,
/// size_t ("z")
SizeT,
/// ptrdiff_t ("t")
PtfDiffT,
/// long double ("L")
LongDouble,
}
impl Spec {
pub fn parse<'a>(rest: &mut &'a [u8]) -> Result<Self, &'a [u8]> {
// Based on the C++ reference, the spec format looks like:
//
// %[flags][width][.precision][length]specifier
//
// However, we have already parsed the '%'.
let mut index = 0;
let start = *rest;
let mut minus = false;
let mut plus = false;
let mut space = false;
let mut hash = false;
let mut zero = false;
while let Some(x) = rest.get(index) {
match x {
b'-' => minus = true,
b'+' => plus = true,
b' ' => space = true,
b'#' => hash = true,
b'0' => zero = true,
_ => break,
}
index += 1;
}
let alignment = match (minus, zero) {
(true, _) => NumberAlignment::Left,
(false, true) => NumberAlignment::RightZero,
(false, false) => NumberAlignment::RightSpace,
};
let positive_sign = match (plus, space) {
(true, _) => PositiveSign::Plus,
(false, true) => PositiveSign::Space,
(false, false) => PositiveSign::None,
};
let width = eat_asterisk_or_number(rest, &mut index);
let precision = if let Some(b'.') = rest.get(index) {
index += 1;
Some(eat_asterisk_or_number(rest, &mut index).unwrap_or(CanAsterisk::Fixed(0)))
} else {
None
};
// We ignore the length. It's not really relevant to printf
let _ = Self::parse_length(rest, &mut index);
let Some(type_spec) = rest.get(index) else {
return Err(&start[..index]);
};
index += 1;
*rest = &start[index..];
Ok(match type_spec {
// GNU accepts minus, plus and space even though they are not used
b'c' => {
if hash || precision.is_some() {
return Err(&start[..index]);
}
Self::Char {
width,
align_left: minus,
}
}
b's' => {
if hash {
return Err(&start[..index]);
}
Self::String {
precision,
width,
align_left: minus,
}
}
b'b' => {
if hash || minus || plus || space || width.is_some() || precision.is_some() {
return Err(&start[..index]);
}
Self::EscapedString
}
b'q' => {
if hash || minus || plus || space || width.is_some() || precision.is_some() {
return Err(&start[..index]);
}
Self::QuotedString
}
b'd' | b'i' => {
if hash {
return Err(&start[..index]);
}
Self::SignedInt {
width,
precision,
alignment,
positive_sign,
}
}
c @ (b'u' | b'o' | b'x' | b'X') => {
// Normal unsigned integer cannot have a prefix
if *c == b'u' && hash {
return Err(&start[..index]);
}
let prefix = match hash {
false => Prefix::No,
true => Prefix::Yes,
};
let variant = match c {
b'u' => UnsignedIntVariant::Decimal,
b'o' => UnsignedIntVariant::Octal(prefix),
b'x' => UnsignedIntVariant::Hexadecimal(Case::Lowercase, prefix),
b'X' => UnsignedIntVariant::Hexadecimal(Case::Uppercase, prefix),
_ => unreachable!(),
};
Self::UnsignedInt {
variant,
precision,
width,
alignment,
}
}
c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Self::Float {
width,
precision,
variant: match c {
b'f' | b'F' => FloatVariant::Decimal,
b'e' | b'E' => FloatVariant::Scientific,
b'g' | b'G' => FloatVariant::Shortest,
b'a' | b'A' => FloatVariant::Hexadecimal,
_ => unreachable!(),
},
force_decimal: match hash {
false => ForceDecimal::No,
true => ForceDecimal::Yes,
},
case: match c.is_ascii_uppercase() {
false => Case::Lowercase,
true => Case::Uppercase,
},
alignment,
positive_sign,
},
_ => return Err(&start[..index]),
})
}
fn parse_length(rest: &mut &[u8], index: &mut usize) -> Option<Length> {
// Parse 0..N length options, keep the last one
// Even though it is just ignored. We might want to use it later and we
// should parse those characters.
//
// TODO: This needs to be configurable: `seq` accepts only one length
// param
let mut length = None;
loop {
let new_length = rest.get(*index).and_then(|c| {
Some(match c {
b'h' => {
if let Some(b'h') = rest.get(*index + 1) {
*index += 1;
Length::Char
} else {
Length::Short
}
}
b'l' => {
if let Some(b'l') = rest.get(*index + 1) {
*index += 1;
Length::Long
} else {
Length::LongLong
}
}
b'j' => Length::IntMaxT,
b'z' => Length::SizeT,
b't' => Length::PtfDiffT,
b'L' => Length::LongDouble,
_ => return None,
})
});
if new_length.is_some() {
*index += 1;
length = new_length;
} else {
break;
}
}
length
}
pub fn write<'a>(
&self,
mut writer: impl Write,
mut args: impl ArgumentIter<'a>,
) -> Result<(), FormatError> {
match self {
Self::Char { width, align_left } => {
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
write_padded(writer, args.get_char(), width, false, *align_left)
}
Self::String {
width,
align_left,
precision,
} => {
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
// GNU does do this truncation on a byte level, see for instance:
// printf "%.1s" 🙃
// > <20>
// For now, we let printf panic when we truncate within a code point.
// TODO: We need to not use Rust's formatting for aligning the output,
// so that we can just write bytes to stdout without panicking.
let precision = resolve_asterisk(*precision, &mut args)?;
let s = args.get_str();
let truncated = match precision {
Some(p) if p < s.len() => &s[..p],
_ => s,
};
write_padded(writer, truncated, width, false, *align_left)
}
Self::EscapedString => {
let s = args.get_str();
let mut parsed = Vec::new();
for c in parse_escape_only(s.as_bytes()) {
match c.write(&mut parsed)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => {
// TODO: This should break the _entire execution_ of printf
break;
}
};
}
writer.write_all(&parsed).map_err(FormatError::IoError)
}
Self::QuotedString => {
let s = args.get_str();
writer
.write_all(
escape_name(
s.as_ref(),
&QuotingStyle::Shell {
escape: true,
always_quote: false,
show_control: false,
},
)
.as_bytes(),
)
.map_err(FormatError::IoError)
}
Self::SignedInt {
width,
precision,
positive_sign,
alignment,
} => {
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0);
let i = args.get_i64();
num_format::SignedInt {
width,
precision,
positive_sign: *positive_sign,
alignment: *alignment,
}
.fmt(writer, i)
.map_err(FormatError::IoError)
}
Self::UnsignedInt {
variant,
width,
precision,
alignment,
} => {
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0);
let i = args.get_u64();
num_format::UnsignedInt {
variant: *variant,
precision,
width,
alignment: *alignment,
}
.fmt(writer, i)
.map_err(FormatError::IoError)
}
Self::Float {
variant,
case,
force_decimal,
width,
positive_sign,
alignment,
precision,
} => {
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(6);
let f = args.get_f64();
num_format::Float {
width,
precision,
variant: *variant,
case: *case,
force_decimal: *force_decimal,
positive_sign: *positive_sign,
alignment: *alignment,
}
.fmt(writer, f)
.map_err(FormatError::IoError)
}
}
}
}
fn resolve_asterisk<'a>(
option: Option<CanAsterisk<usize>>,
mut args: impl ArgumentIter<'a>,
) -> Result<Option<usize>, FormatError> {
Ok(match option {
None => None,
Some(CanAsterisk::Asterisk) => Some(usize::try_from(args.get_u64()).ok().unwrap_or(0)),
Some(CanAsterisk::Fixed(w)) => Some(w),
})
}
fn write_padded(
mut writer: impl Write,
text: impl Display,
width: usize,
pad_zero: bool,
left: bool,
) -> Result<(), FormatError> {
match (left, pad_zero) {
(false, false) => write!(writer, "{text: >width$}"),
(false, true) => write!(writer, "{text:0>width$}"),
// 0 is ignored if we pad left.
(true, _) => write!(writer, "{text: <width$}"),
}
.map_err(FormatError::IoError)
}
fn eat_asterisk_or_number(rest: &mut &[u8], index: &mut usize) -> Option<CanAsterisk<usize>> {
if let Some(b'*') = rest.get(*index) {
*index += 1;
Some(CanAsterisk::Asterisk)
} else {
eat_number(rest, index).map(CanAsterisk::Fixed)
}
}
fn eat_number(rest: &mut &[u8], index: &mut usize) -> Option<usize> {
match rest[*index..].iter().position(|b| !b.is_ascii_digit()) {
None | Some(0) => None,
Some(i) => {
// TODO: This might need to handle errors better
// For example in case of overflow.
let parsed = std::str::from_utf8(&rest[*index..(*index + i)])
.unwrap()
.parse()
.unwrap();
*index += i;
Some(parsed)
}
}
}

View file

@ -1,179 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//! Main entry point for our implementation of printf.
//!
//! The [`printf`] and [`sprintf`] closely match the behavior of the
//! corresponding C functions: the former renders a formatted string
//! to stdout, the latter renders to a new [`String`] object.
use crate::display::Quotable;
use crate::error::{UResult, USimpleError};
use crate::features::tokenize::sub::SubParser;
use crate::features::tokenize::token::Token;
use crate::features::tokenize::unescaped_text::UnescapedText;
use crate::show_warning;
use itertools::put_back_n;
use std::io::{stdout, Cursor, Write};
use std::iter::Peekable;
use std::slice::Iter;
/// Memo runner of printf
/// Takes a format string and arguments
/// 1. tokenize format string into tokens, consuming
/// any subst. arguments along the way.
/// 2. feeds remaining arguments into function
/// that prints tokens.
struct Memo {
tokens: Vec<Token>,
}
fn warn_excess_args(first_arg: &str) {
show_warning!(
"ignoring excess arguments, starting with {}",
first_arg.quote()
);
}
impl Memo {
fn new<W>(
writer: &mut W,
pf_string: &str,
pf_args_it: &mut Peekable<Iter<String>>,
) -> UResult<Self>
where
W: Write,
{
let mut pm = Self { tokens: Vec::new() };
let mut it = put_back_n(pf_string.chars());
let mut has_sub = false;
loop {
if let Some(x) = UnescapedText::from_it_core(writer, &mut it, false) {
pm.tokens.push(x);
}
if let Some(x) = SubParser::from_it(writer, &mut it, pf_args_it)? {
if !has_sub {
has_sub = true;
}
pm.tokens.push(x);
}
if let Some(x) = it.next() {
it.put_back(x);
} else {
break;
}
}
if !has_sub {
let mut drain = false;
if let Some(first_arg) = pf_args_it.peek() {
warn_excess_args(first_arg);
drain = true;
}
if drain {
loop {
// drain remaining args;
if pf_args_it.next().is_none() {
break;
}
}
}
}
Ok(pm)
}
fn apply<W>(&self, writer: &mut W, pf_args_it: &mut Peekable<Iter<String>>)
where
W: Write,
{
for tkn in &self.tokens {
tkn.write(writer, pf_args_it);
}
}
fn run_all<W>(writer: &mut W, pf_string: &str, pf_args: &[String]) -> UResult<()>
where
W: Write,
{
let mut arg_it = pf_args.iter().peekable();
let pm = Self::new(writer, pf_string, &mut arg_it)?;
loop {
if arg_it.peek().is_none() {
return Ok(());
}
pm.apply(writer, &mut arg_it);
}
}
}
/// Write a formatted string to stdout.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`sprintf`], which creates a new formatted [`String`].
///
/// # Examples
///
/// ```rust
/// use uucore::memo::printf;
///
/// printf("hello %s", &["world".to_string()]).unwrap();
/// // prints "hello world"
/// ```
pub fn printf(format_string: &str, args: &[String]) -> UResult<()> {
let mut writer = stdout();
Memo::run_all(&mut writer, format_string, args)
}
/// Create a new formatted string.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`printf`], which prints to stdout.
///
/// # Examples
///
/// ```rust
/// use uucore::memo::sprintf;
///
/// let s = sprintf("hello %s", &["world".to_string()]).unwrap();
/// assert_eq!(s, "hello world".to_string());
/// ```
pub fn sprintf(format_string: &str, args: &[String]) -> UResult<String> {
let mut writer = Cursor::new(vec![]);
Memo::run_all(&mut writer, format_string, args)?;
let buf = writer.into_inner();
match String::from_utf8(buf) {
Ok(s) => Ok(s),
Err(e) => Err(USimpleError::new(
1,
format!("failed to parse formatted string as UTF-8: {e}"),
)),
}
}
#[cfg(test)]
mod tests {
use crate::memo::sprintf;
#[test]
fn test_sprintf_smoke() {
assert_eq!(sprintf("", &[]).unwrap(), "".to_string());
}
#[test]
fn test_sprintf_no_args() {
assert_eq!(
sprintf("hello world", &[]).unwrap(),
"hello world".to_string()
);
}
#[test]
fn test_sprintf_string() {
assert_eq!(
sprintf("hello %s", &["world".to_string()]).unwrap(),
"hello world".to_string()
);
}
}

View file

@ -1,9 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
#[allow(clippy::module_inception)]
mod num_format;
pub mod sub;
pub mod token;
pub mod unescaped_text;

View file

@ -1,30 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
//! Primitives used by Sub Tokenizer
//! and num_format modules
#[derive(Clone)]
pub enum FieldType {
Strf,
Floatf,
CninetyNineHexFloatf,
Scif,
Decf,
Intf,
Charf,
}
// a Sub Tokens' fields are stored
// as a single object so they can be more simply
// passed by ref to num_format in a Sub method
#[derive(Clone)]
pub struct FormatField<'a> {
pub min_width: Option<isize>,
pub second_field: Option<u32>,
pub field_char: &'a char,
pub field_type: &'a FieldType,
pub orig: &'a String,
}

View file

@ -1,63 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//! Primitives used by num_format and sub_modules.
//! never dealt with above (e.g. Sub Tokenizer never uses these)
use crate::{display::Quotable, show_error};
use itertools::{put_back_n, PutBackN};
use std::str::Chars;
use super::format_field::FormatField;
// contains the rough ingredients to final
// output for a number, organized together
// to allow for easy generalization of output manipulation
// (e.g. max number of digits after decimal)
#[derive(Default)]
pub struct FormatPrimitive {
pub prefix: Option<String>,
pub pre_decimal: Option<String>,
pub post_decimal: Option<String>,
pub suffix: Option<String>,
}
#[derive(Clone, PartialEq, Eq)]
pub enum Base {
Ten = 10,
Hex = 16,
Octal = 8,
}
// information from the beginning of a numeric argument
// the precedes the beginning of a numeric value
pub struct InitialPrefix {
pub radix_in: Base,
pub sign: i8,
pub offset: usize,
}
pub trait Formatter {
// return a FormatPrimitive for
// particular field char(s), given the argument
// string and prefix information (sign, radix)
fn get_primitive(
&self,
field: &FormatField,
in_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive>;
// return a string from a FormatPrimitive,
// given information about the field
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String;
}
pub fn get_it_at(offset: usize, str_in: &str) -> PutBackN<Chars> {
put_back_n(str_in[offset..].chars())
}
// TODO: put this somewhere better
pub fn warn_incomplete_conv(pf_arg: &str) {
// important: keep println here not print
show_error!("{}: value not completely converted", pf_arg.maybe_quote());
}

View file

@ -1,274 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (ToDO) arrnum arr_num mult basenum bufferval refd vals arrfloat conv intermed addl
pub fn arrnum_int_mult(arr_num: &[u8], basenum: u8, base_ten_int_fact: u8) -> Vec<u8> {
let mut carry: u16 = 0;
let mut rem: u16;
let mut new_amount: u16;
let fact: u16 = u16::from(base_ten_int_fact);
let base: u16 = u16::from(basenum);
let mut ret_rev: Vec<u8> = Vec::new();
let mut it = arr_num.iter().rev();
loop {
let i = it.next();
match i {
Some(u) => {
new_amount = (u16::from(*u) * fact) + carry;
rem = new_amount % base;
carry = (new_amount - rem) / base;
ret_rev.push(rem as u8);
}
None => {
while carry != 0 {
rem = carry % base;
carry = (carry - rem) / base;
ret_rev.push(rem as u8);
}
break;
}
}
}
let ret: Vec<u8> = ret_rev.into_iter().rev().collect();
ret
}
#[allow(dead_code)]
pub struct Remainder<'a> {
pub position: usize,
pub replace: Vec<u8>,
pub arr_num: &'a Vec<u8>,
}
#[allow(dead_code)]
pub struct DivOut<'a> {
pub quotient: u8,
pub remainder: Remainder<'a>,
}
#[allow(dead_code)]
pub fn arrnum_int_div_step<'a>(
rem_in: &'a Remainder,
radix_in: u8,
base_ten_int_divisor: u8,
after_decimal: bool,
) -> DivOut<'a> {
let mut rem_out = Remainder {
position: rem_in.position,
replace: Vec::new(),
arr_num: rem_in.arr_num,
};
let mut bufferval: u16 = 0;
let base: u16 = u16::from(radix_in);
let divisor: u16 = u16::from(base_ten_int_divisor);
let mut traversed = 0;
let mut quotient = 0;
let refd_vals = &rem_in.arr_num[rem_in.position + rem_in.replace.len()..];
let mut it_replace = rem_in.replace.iter();
let mut it_f = refd_vals.iter();
loop {
let u = match it_replace.next() {
Some(u_rep) => u16::from(*u_rep),
None => match it_f.next() {
Some(u_orig) => u16::from(*u_orig),
None => {
if !after_decimal {
break;
}
0
}
},
};
traversed += 1;
bufferval += u;
if bufferval > divisor {
while bufferval >= divisor {
quotient += 1;
bufferval -= divisor;
}
rem_out.replace = if bufferval == 0 {
Vec::new()
} else {
let remainder_as_arrnum = unsigned_to_arrnum(bufferval);
base_conv_vec(&remainder_as_arrnum, 10, radix_in)
};
rem_out.position += 1 + (traversed - rem_out.replace.len());
break;
} else {
bufferval *= base;
}
}
DivOut {
quotient,
remainder: rem_out,
}
}
pub fn arrnum_int_add(arrnum: &[u8], basenum: u8, base_ten_int_term: u8) -> Vec<u8> {
let mut carry: u16 = u16::from(base_ten_int_term);
let mut rem: u16;
let mut new_amount: u16;
let base: u16 = u16::from(basenum);
let mut ret_rev: Vec<u8> = Vec::new();
let mut it = arrnum.iter().rev();
loop {
let i = it.next();
match i {
Some(u) => {
new_amount = u16::from(*u) + carry;
rem = new_amount % base;
carry = (new_amount - rem) / base;
ret_rev.push(rem as u8);
}
None => {
while carry != 0 {
rem = carry % base;
carry = (carry - rem) / base;
ret_rev.push(rem as u8);
}
break;
}
}
}
let ret: Vec<u8> = ret_rev.into_iter().rev().collect();
ret
}
pub fn base_conv_vec(src: &[u8], radix_src: u8, radix_dest: u8) -> Vec<u8> {
let mut result = vec![0];
for i in src {
result = arrnum_int_mult(&result, radix_dest, radix_src);
result = arrnum_int_add(&result, radix_dest, *i);
}
result
}
#[allow(dead_code)]
pub fn unsigned_to_arrnum(src: u16) -> Vec<u8> {
let mut result: Vec<u8> = Vec::new();
let mut src_tmp: u16 = src;
while src_tmp > 0 {
result.push((src_tmp % 10) as u8);
src_tmp /= 10;
}
result.reverse();
result
}
// temporary needs-improvement-function
pub fn base_conv_float(src: &[u8], radix_src: u8, _radix_dest: u8) -> f64 {
// it would require a lot of addl code
// to implement this for arbitrary string input.
// until then, the below operates as an outline
// of how it would work.
let mut factor: f64 = 1_f64;
let radix_src_float: f64 = f64::from(radix_src);
let mut r: f64 = 0_f64;
for (i, u) in src.iter().enumerate() {
if i > 15 {
break;
}
factor /= radix_src_float;
r += factor * f64::from(*u);
}
r
}
pub fn str_to_arrnum(src: &str, radix_def_src: &dyn RadixDef) -> Vec<u8> {
let mut intermed_in: Vec<u8> = Vec::new();
for c in src.chars() {
#[allow(clippy::single_match)]
match radix_def_src.parse_char(c) {
Some(u) => {
intermed_in.push(u);
}
None => {} //todo err msg on incorrect
}
}
intermed_in
}
pub fn arrnum_to_str(src: &[u8], radix_def_dest: &dyn RadixDef) -> String {
let mut str_out = String::new();
for u in src {
#[allow(clippy::single_match)]
match radix_def_dest.format_u8(*u) {
Some(c) => {
str_out.push(c);
}
None => {} //todo
}
}
str_out
}
pub fn base_conv_str(
src: &str,
radix_def_src: &dyn RadixDef,
radix_def_dest: &dyn RadixDef,
) -> String {
let intermed_in: Vec<u8> = str_to_arrnum(src, radix_def_src);
let intermed_out = base_conv_vec(
&intermed_in,
radix_def_src.get_max(),
radix_def_dest.get_max(),
);
arrnum_to_str(&intermed_out, radix_def_dest)
}
pub trait RadixDef {
fn get_max(&self) -> u8;
fn parse_char(&self, x: char) -> Option<u8>;
fn format_u8(&self, x: u8) -> Option<char>;
}
pub struct RadixTen;
const ZERO_ASC: u8 = b'0';
const UPPER_A_ASC: u8 = b'A';
const LOWER_A_ASC: u8 = b'a';
impl RadixDef for RadixTen {
fn get_max(&self) -> u8 {
10
}
fn parse_char(&self, c: char) -> Option<u8> {
match c {
'0'..='9' => Some(c as u8 - ZERO_ASC),
_ => None,
}
}
fn format_u8(&self, u: u8) -> Option<char> {
match u {
0..=9 => Some((ZERO_ASC + u) as char),
_ => None,
}
}
}
pub struct RadixHex;
impl RadixDef for RadixHex {
fn get_max(&self) -> u8 {
16
}
fn parse_char(&self, c: char) -> Option<u8> {
match c {
'0'..='9' => Some(c as u8 - ZERO_ASC),
'A'..='F' => Some(c as u8 + 10 - UPPER_A_ASC),
'a'..='f' => Some(c as u8 + 10 - LOWER_A_ASC),
_ => None,
}
}
fn format_u8(&self, u: u8) -> Option<char> {
match u {
0..=9 => Some((ZERO_ASC + u) as char),
10..=15 => Some((UPPER_A_ASC + (u - 10)) as char),
_ => None,
}
}
}
mod tests;

View file

@ -1,60 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (ToDO) arrnum mult
#[cfg(test)]
use super::*;
#[test]
fn test_arrnum_int_mult() {
// (in base 10) 12 * 4 = 48
let factor: Vec<u8> = vec![1, 2];
let base_num = 10;
let base_ten_int_fact: u8 = 4;
let should_output: Vec<u8> = vec![4, 8];
let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact);
assert!(product == should_output);
}
#[test]
fn test_arrnum_int_non_base_10() {
// (in base 3)
// 5 * 4 = 20
let factor: Vec<u8> = vec![1, 2];
let base_num = 3;
let base_ten_int_fact: u8 = 4;
let should_output: Vec<u8> = vec![2, 0, 2];
let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact);
assert!(product == should_output);
}
#[test]
fn test_arrnum_int_div_short_circuit() {
// (
let arrnum: Vec<u8> = vec![5, 5, 5, 5, 0];
let base_num = 10;
let base_ten_int_divisor: u8 = 41;
let remainder_passed_in = Remainder {
position: 1,
replace: vec![1, 3],
arr_num: &arrnum,
};
// the "replace" should mean the number being divided
// is 1350, the first time you can get 41 to go into
// 1350, its at 135, where you can get a quotient of
// 3 and a remainder of 12;
let quotient_should_be: u8 = 3;
let remainder_position_should_be: usize = 3;
let remainder_replace_should_be = vec![1, 2];
let result = arrnum_int_div_step(&remainder_passed_in, base_num, base_ten_int_divisor, false);
assert!(quotient_should_be == result.quotient);
assert!(remainder_position_should_be == result.remainder.position);
assert!(remainder_replace_should_be == result.remainder.replace);
}

View file

@ -1,119 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
// spell-checker:ignore (ToDO) arrnum
//! formatter for %a %F C99 Hex-floating-point subs
use super::super::format_field::FormatField;
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
use super::base_conv;
use super::base_conv::RadixDef;
use super::float_common::{primitive_to_str_common, FloatAnalysis};
#[derive(Default)]
pub struct CninetyNineHexFloatf {
#[allow(dead_code)]
as_num: f64,
}
impl CninetyNineHexFloatf {
pub fn new() -> Self {
Self::default()
}
}
impl Formatter for CninetyNineHexFloatf {
fn get_primitive(
&self,
field: &FormatField,
initial_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive> {
let second_field = field.second_field.unwrap_or(6) + 1;
let analysis = FloatAnalysis::analyze(
str_in,
initial_prefix,
Some(second_field as usize),
None,
true,
);
let f = get_primitive_hex(
initial_prefix,
&str_in[initial_prefix.offset..],
&analysis,
second_field as usize,
*field.field_char == 'A',
);
Some(f)
}
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
primitive_to_str_common(prim, &field)
}
}
// c99 hex has unique requirements of all floating point subs in pretty much every part of building a primitive, from prefix and suffix to need for base conversion (in all other cases if you don't have decimal you must have decimal, here it's the other way around)
// on the todo list is to have a trait for get_primitive that is implemented by each float formatter and can override a default. when that happens we can take the parts of get_primitive_dec specific to dec and spin them out to their own functions that can be overridden.
fn get_primitive_hex(
initial_prefix: &InitialPrefix,
_str_in: &str,
_analysis: &FloatAnalysis,
_last_dec_place: usize,
capitalized: bool,
) -> FormatPrimitive {
let prefix = Some(String::from(if initial_prefix.sign == -1 {
"-0x"
} else {
"0x"
}));
// TODO actual conversion, make sure to get back mantissa.
// for hex to hex, it's really just a matter of moving the
// decimal point and calculating the mantissa by its initial
// position and its moves, with every position counting for
// the addition or subtraction of 4 (2**4, because 4 bits in a hex digit)
// to the exponent.
// decimal's going to be a little more complicated. correct simulation
// of glibc will require after-decimal division to a specified precision.
// the difficult part of this (arrnum_int_div_step) is already implemented.
// the hex float name may be a bit misleading in terms of how to go about the
// conversion. The best way to do it is to just convert the float number
// directly to base 2 and then at the end translate back to hex.
let mantissa = 0;
let suffix = Some({
let ind = if capitalized { "P" } else { "p" };
if mantissa >= 0 {
format!("{ind}+{mantissa}")
} else {
format!("{ind}{mantissa}")
}
});
FormatPrimitive {
prefix,
suffix,
..Default::default()
}
}
#[allow(dead_code)]
fn to_hex(src: &str, before_decimal: bool) -> String {
let radix_ten = base_conv::RadixTen;
let radix_hex = base_conv::RadixHex;
if before_decimal {
base_conv::base_conv_str(src, &radix_ten, &radix_hex)
} else {
let as_arrnum_ten = base_conv::str_to_arrnum(src, &radix_ten);
let s = format!(
"{}",
base_conv::base_conv_float(&as_arrnum_ten, radix_ten.get_max(), radix_hex.get_max())
);
if s.len() > 2 {
String::from(&s[2..])
} else {
// zero
s
}
}
}

View file

@ -1,189 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
//! formatter for %g %G decimal subs
use super::super::format_field::FormatField;
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis};
const SIGNIFICANT_FIGURES: usize = 6;
// Parse a numeric string as the nearest integer with a given significance.
// This is a helper function for round().
// Examples:
// round_to_significance("456", 1) == 500
// round_to_significance("456", 2) == 460
// round_to_significance("456", 9) == 456
fn round_to_significance(input: &str, significant_figures: usize) -> u32 {
if significant_figures < input.len() {
// If the input has too many digits, use a float intermediary
// to round it before converting to an integer. Otherwise,
// converting straight to integer will truncate.
// There might be a cleaner way to do this...
let digits = &input[..significant_figures + 1];
let float_representation = digits.parse::<f32>().unwrap();
(float_representation / 10.0).round() as u32
} else {
input.parse::<u32>().unwrap_or(0)
}
}
// Removing trailing zeroes, expressing the result as an integer where
// possible. This is a helper function for round().
fn truncate(mut format: FormatPrimitive) -> FormatPrimitive {
if let Some(ref post_dec) = format.post_decimal {
let trimmed = post_dec.trim_end_matches('0');
if trimmed.is_empty() {
// If there are no nonzero digits after the decimal point,
// use integer formatting by clearing post_decimal and suffix.
format.post_decimal = Some(String::new());
if format.suffix == Some("e+00".into()) {
format.suffix = Some(String::new());
}
} else if trimmed.len() != post_dec.len() {
// Otherwise, update the format to remove only the trailing
// zeroes (e.g. "4.50" becomes "4.5", not "4"). If there were
// no trailing zeroes, do nothing.
format.post_decimal = Some(trimmed.to_owned());
}
}
format
}
// Round a format to six significant figures and remove trailing zeroes.
fn round(mut format: FormatPrimitive) -> FormatPrimitive {
let mut significant_digits_remaining = SIGNIFICANT_FIGURES;
// First, take as many significant digits as possible from pre_decimal,
if format.pre_decimal.is_some() {
let input = format.pre_decimal.as_ref().unwrap();
let rounded = round_to_significance(input, significant_digits_remaining);
let mut rounded_str = rounded.to_string();
significant_digits_remaining -= rounded_str.len();
// If the pre_decimal has exactly enough significant digits,
// round the input to the nearest integer. If the first
// post_decimal digit is 5 or higher, round up by incrementing
// the pre_decimal number. Otherwise, use the pre_decimal as-is.
if significant_digits_remaining == 0 {
if let Some(digits) = &format.post_decimal {
if digits.chars().next().unwrap_or('0') >= '5' {
let rounded = rounded + 1;
rounded_str = rounded.to_string();
}
}
}
format.pre_decimal = Some(rounded_str);
}
// If no significant digits remain, or there's no post_decimal to
// round, return the rounded pre_decimal value with no post_decimal.
// Otherwise, round the post_decimal to the remaining significance.
if significant_digits_remaining == 0 {
format.post_decimal = Some(String::new());
} else if let Some(input) = format.post_decimal {
let leading_zeroes = input.len() - input.trim_start_matches('0').len();
let digits = &input[leading_zeroes..];
// In the post_decimal, leading zeroes are significant. "01.0010"
// has one significant digit in pre_decimal, and 3 from post_decimal.
let mut post_decimal_str = String::with_capacity(significant_digits_remaining);
for _ in 0..leading_zeroes {
post_decimal_str.push('0');
}
if leading_zeroes < significant_digits_remaining {
// After significant leading zeroes, round the remaining digits
// to any remaining significance.
let rounded = round_to_significance(digits, significant_digits_remaining);
post_decimal_str.push_str(&rounded.to_string());
} else if leading_zeroes == significant_digits_remaining
&& digits.chars().next().unwrap_or('0') >= '5'
{
// If necessary, round up the post_decimal ("1.000009" should
// round to 1.00001, instead of truncating after the last
// significant leading zero).
post_decimal_str.pop();
post_decimal_str.push('1');
} else {
// If the rounded post_decimal is entirely zeroes, discard
// it and use integer formatting instead.
post_decimal_str = String::new();
}
format.post_decimal = Some(post_decimal_str);
}
truncate(format)
}
// Given an exponent used in scientific notation, return whether the
// number is small enough to be expressed as a decimal instead. "Small
// enough" is based only on the number's magnitude, not the length of
// any string representation.
fn should_represent_as_decimal(suffix: &Option<String>) -> bool {
match suffix {
Some(exponent) => {
if exponent.chars().nth(1) == Some('-') {
exponent < &"e-05".into()
} else {
exponent < &"e+06".into()
}
}
None => true,
}
}
pub struct Decf;
impl Decf {
pub fn new() -> Self {
Self
}
}
impl Formatter for Decf {
fn get_primitive(
&self,
field: &FormatField,
initial_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive> {
let second_field = field.second_field.unwrap_or(6) + 1;
// default to scif interpretation so as to not truncate input vals
// (that would be displayed in scif) based on relation to decimal place
let analysis = FloatAnalysis::analyze(
str_in,
initial_prefix,
Some(second_field as usize + 1),
None,
false,
);
let mut f_dec = get_primitive_dec(
initial_prefix,
&str_in[initial_prefix.offset..],
&analysis,
second_field as usize,
Some(*field.field_char == 'G'),
);
if should_represent_as_decimal(&f_dec.suffix) {
// Use decimal formatting instead of scientific notation
// if the input's magnitude is small.
f_dec = get_primitive_dec(
initial_prefix,
&str_in[initial_prefix.offset..],
&analysis,
second_field as usize,
None,
);
}
Some(round(f_dec))
}
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
primitive_to_str_common(prim, &field)
}
}

View file

@ -1,381 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
// spell-checker:ignore (ToDO) arrnum
use super::super::format_field::FormatField;
use super::super::formatter::{
get_it_at, warn_incomplete_conv, Base, FormatPrimitive, InitialPrefix,
};
use super::base_conv;
use super::base_conv::RadixDef;
// if the memory, copy, and comparison cost of chars
// becomes an issue, we can always operate in vec<u8> here
// rather than just at de_hex
pub struct FloatAnalysis {
pub len_important: usize,
// none means no decimal point.
pub decimal_pos: Option<usize>,
pub follow: Option<char>,
}
fn has_enough_digits(
hex_input: bool,
hex_output: bool,
string_position: usize,
starting_position: usize,
limit: usize,
) -> bool {
// -1s are for rounding
if hex_output {
if hex_input {
(string_position - 1) - starting_position >= limit
} else {
false //undecidable without converting
}
} else if hex_input {
(((string_position - 1) - starting_position) * 9) / 8 >= limit
} else {
(string_position - 1) - starting_position >= limit
}
}
impl FloatAnalysis {
#[allow(clippy::cognitive_complexity)]
pub fn analyze(
str_in: &str,
initial_prefix: &InitialPrefix,
max_sd_opt: Option<usize>,
max_after_dec_opt: Option<usize>,
hex_output: bool,
) -> Self {
// this fn assumes
// the input string
// has no leading spaces or 0s
let str_it = get_it_at(initial_prefix.offset, str_in);
let mut ret = Self {
len_important: 0,
decimal_pos: None,
follow: None,
};
let hex_input = match initial_prefix.radix_in {
Base::Hex => true,
Base::Ten => false,
Base::Octal => {
panic!("this should never happen: floats should never receive octal input");
}
};
let mut i = 0;
let mut pos_before_first_nonzero_after_decimal: Option<usize> = None;
for c in str_it {
match c {
e @ ('0'..='9' | 'A'..='F' | 'a'..='f') => {
if !hex_input {
match e {
'0'..='9' => {}
_ => {
warn_incomplete_conv(str_in);
break;
}
}
}
if ret.decimal_pos.is_some()
&& pos_before_first_nonzero_after_decimal.is_none()
&& e != '0'
{
pos_before_first_nonzero_after_decimal = Some(i - 1);
}
if let Some(max_sd) = max_sd_opt {
if i == max_sd {
// follow is used in cases of %g
// where the character right after the last
// sd is considered is rounded affecting
// the previous digit in 1/2 of instances
ret.follow = Some(e);
} else if ret.decimal_pos.is_some() && i > max_sd {
break;
}
}
if let Some(max_after_dec) = max_after_dec_opt {
if let Some(p) = ret.decimal_pos {
if has_enough_digits(hex_input, hex_output, i, p, max_after_dec) {
break;
}
}
} else if let Some(max_sd) = max_sd_opt {
if let Some(p) = pos_before_first_nonzero_after_decimal {
if has_enough_digits(hex_input, hex_output, i, p, max_sd) {
break;
}
}
}
}
'.' => {
if ret.decimal_pos.is_none() {
ret.decimal_pos = Some(i);
} else {
warn_incomplete_conv(str_in);
break;
}
}
_ => {
warn_incomplete_conv(str_in);
break;
}
};
i += 1;
}
ret.len_important = i;
ret
}
}
fn de_hex(src: &str, before_decimal: bool) -> String {
let radix_ten = base_conv::RadixTen;
let radix_hex = base_conv::RadixHex;
if before_decimal {
base_conv::base_conv_str(src, &radix_hex, &radix_ten)
} else {
let as_arrnum_hex = base_conv::str_to_arrnum(src, &radix_hex);
let s = format!(
"{}",
base_conv::base_conv_float(&as_arrnum_hex, radix_hex.get_max(), radix_ten.get_max())
);
if s.len() > 2 {
String::from(&s[2..])
} else {
// zero
s
}
}
}
// takes a string in,
// truncates to a position,
// bumps the last digit up one,
// and if the digit was nine
// propagate to the next, etc.
// If before the decimal and the most
// significant digit is a 9, it becomes a 1
fn _round_str_from(in_str: &str, position: usize, before_dec: bool) -> (String, bool) {
let mut it = in_str[0..position].chars();
let mut rev = String::new();
let mut i = position;
let mut finished_in_dec = false;
while let Some(c) = it.next_back() {
i -= 1;
match c {
'9' => {
// If we're before the decimal
// and on the most significant digit,
// round 9 to 1, else to 0.
if before_dec && i == 0 {
rev.push('1');
} else {
rev.push('0');
}
}
e => {
rev.push(((e as u8) + 1) as char);
finished_in_dec = true;
break;
}
}
}
let mut fwd = String::from(&in_str[0..i]);
for ch in rev.chars().rev() {
fwd.push(ch);
}
(fwd, finished_in_dec)
}
fn round_terminal_digit(
before_dec: String,
after_dec: String,
position: usize,
) -> (String, String, bool) {
if position < after_dec.len() {
let digit_at_pos: char;
{
digit_at_pos = after_dec[position..=position].chars().next().expect("");
}
if let '5'..='9' = digit_at_pos {
let (new_after_dec, finished_in_dec) = _round_str_from(&after_dec, position, false);
if finished_in_dec {
return (before_dec, new_after_dec, false);
} else {
let (new_before_dec, _) = _round_str_from(&before_dec, before_dec.len(), true);
let mut dec_place_chg = false;
let mut before_dec_chars = new_before_dec.chars();
if before_dec_chars.next() == Some('1') && before_dec_chars.all(|c| c == '0') {
// If the first digit is a one and remaining are zeros, we have
// rounded to a new decimal place, so the decimal place must be updated.
// Only update decimal place if the before decimal != 0
dec_place_chg = before_dec != "0";
}
return (new_before_dec, new_after_dec, dec_place_chg);
}
// TODO
}
}
(before_dec, after_dec, false)
}
#[allow(clippy::cognitive_complexity)]
pub fn get_primitive_dec(
initial_prefix: &InitialPrefix,
str_in: &str,
analysis: &FloatAnalysis,
last_dec_place: usize,
sci_mode: Option<bool>,
) -> FormatPrimitive {
let mut f = FormatPrimitive::default();
// add negative sign section
if initial_prefix.sign == -1 {
f.prefix = Some(String::from("-"));
}
// assign the digits before and after the decimal points
// to separate slices. If no digits after decimal point,
// assign 0
let (mut first_segment_raw, second_segment_raw) = match analysis.decimal_pos {
Some(pos) => (&str_in[..pos], &str_in[pos + 1..]),
None => (str_in, "0"),
};
if first_segment_raw.is_empty() {
first_segment_raw = "0";
}
// convert to string, de_hexifying if input is in hex // spell-checker:disable-line
let (first_segment, second_segment) = match initial_prefix.radix_in {
Base::Hex => (
de_hex(first_segment_raw, true),
de_hex(second_segment_raw, false),
),
_ => (
String::from(first_segment_raw),
String::from(second_segment_raw),
),
};
let (pre_dec_unrounded, post_dec_unrounded, mut mantissa) = if sci_mode.is_some() {
if first_segment.len() > 1 {
let mut post_dec = String::from(&first_segment[1..]);
post_dec.push_str(&second_segment);
(
String::from(&first_segment[0..1]),
post_dec,
first_segment.len() as isize - 1,
)
} else {
match first_segment
.chars()
.next()
.expect("float_common: no chars in first segment.")
{
'0' => {
let it = second_segment.chars().enumerate();
let mut m: isize = 0;
let mut pre = String::from("0");
let mut post = String::from("0");
for (i, c) in it {
match c {
'0' => {}
_ => {
m = -((i as isize) + 1);
pre = String::from(&second_segment[i..=i]);
post = String::from(&second_segment[i + 1..]);
break;
}
}
}
(pre, post, m)
}
_ => (first_segment, second_segment, 0),
}
}
} else {
(first_segment, second_segment, 0)
};
let (pre_dec_draft, post_dec_draft, dec_place_chg) =
round_terminal_digit(pre_dec_unrounded, post_dec_unrounded, last_dec_place - 1);
f.post_decimal = Some(post_dec_draft);
if let Some(capitalized) = sci_mode {
let si_ind = if capitalized { 'E' } else { 'e' };
// Increase the mantissa if we're adding a decimal place
if dec_place_chg {
mantissa += 1;
}
f.suffix = Some(if mantissa >= 0 {
format!("{si_ind}+{mantissa:02}")
} else {
// negative sign is considered in format!s
// leading zeroes
format!("{si_ind}{mantissa:03}")
});
f.pre_decimal = Some(pre_dec_draft);
} else if dec_place_chg {
// We've rounded up to a new decimal place so append 0
f.pre_decimal = Some(pre_dec_draft + "0");
} else {
f.pre_decimal = Some(pre_dec_draft);
}
f
}
pub fn primitive_to_str_common(prim: &FormatPrimitive, field: &FormatField) -> String {
let mut final_str = String::new();
if let Some(ref prefix) = prim.prefix {
final_str.push_str(prefix);
}
match prim.pre_decimal {
Some(ref pre_decimal) => {
final_str.push_str(pre_decimal);
}
None => {
panic!(
"error, format primitives provided to int, will, incidentally under correct \
behavior, always have a pre_dec value."
);
}
}
let decimal_places = field.second_field.unwrap_or(6);
match prim.post_decimal {
Some(ref post_decimal) => {
if !post_decimal.is_empty() && decimal_places > 0 {
final_str.push('.');
let len_avail = post_decimal.len() as u32;
if decimal_places >= len_avail {
// println!("dec {}, len avail {}", decimal_places, len_avail);
final_str.push_str(post_decimal);
if *field.field_char != 'g' && *field.field_char != 'G' {
let diff = decimal_places - len_avail;
for _ in 0..diff {
final_str.push('0');
}
}
} else {
// println!("printing to only {}", decimal_places);
final_str.push_str(&post_decimal[0..decimal_places as usize]);
}
}
}
None => {
panic!(
"error, format primitives provided to int, will, incidentally under correct \
behavior, always have a pre_dec value."
);
}
}
if let Some(ref suffix) = prim.suffix {
final_str.push_str(suffix);
}
final_str
}

View file

@ -1,47 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
// spell-checker:ignore (ToDO) arrnum
//! formatter for %f %F common-notation floating-point subs
use super::super::format_field::FormatField;
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis};
#[derive(Default)]
pub struct Floatf;
impl Floatf {
pub fn new() -> Self {
Self
}
}
impl Formatter for Floatf {
fn get_primitive(
&self,
field: &FormatField,
initial_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive> {
let second_field = field.second_field.unwrap_or(6) + 1;
let analysis = FloatAnalysis::analyze(
str_in,
initial_prefix,
None,
Some(second_field as usize),
false,
);
let f = get_primitive_dec(
initial_prefix,
&str_in[initial_prefix.offset..],
&analysis,
second_field as usize,
None,
);
Some(f)
}
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
primitive_to_str_common(prim, &field)
}
}

View file

@ -1,288 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
// spell-checker:ignore (ToDO) arrnum
//! formatter for unsigned and signed int subs
//! unsigned int: %X %x (hex u64) %o (octal u64) %u (base ten u64)
//! signed int: %i %d (both base ten i64)
use crate::error::set_exit_code;
use crate::features::tokenize::num_format::num_format::warn_expected_numeric;
use super::super::format_field::FormatField;
use super::super::formatter::{get_it_at, Base, FormatPrimitive, Formatter, InitialPrefix};
use std::i64;
use std::u64;
#[derive(Default)]
pub struct Intf {
_a: u32,
}
// see the Intf::analyze() function below
struct IntAnalysis {
check_past_max: bool,
past_max: bool,
is_zero: bool,
len_digits: u8,
}
impl Intf {
pub fn new() -> Self {
Self::default()
}
// take a ref to argument string, and basic information
// about prefix (offset, radix, sign), and analyze string
// to gain the IntAnalysis information above
// check_past_max: true if the number *may* be above max,
// but we don't know either way. One of several reasons
// we may have to parse as int.
// past_max: true if the object is past max, false if not
// in the future we should probably combine these into an
// Option<bool>
// is_zero: true if number is zero, false otherwise
// len_digits: length of digits used to create the int
// important, for example, if we run into a non-valid character
#[allow(clippy::cognitive_complexity)]
fn analyze(str_in: &str, signed_out: bool, initial_prefix: &InitialPrefix) -> IntAnalysis {
// the maximum number of digits we could conceivably
// have before the decimal point without exceeding the
// max
let mut str_it = get_it_at(initial_prefix.offset, str_in);
let max_sd_in = if signed_out {
match initial_prefix.radix_in {
Base::Ten => 19,
Base::Octal => 21,
Base::Hex => 16,
}
} else {
match initial_prefix.radix_in {
Base::Ten => 20,
Base::Octal => 22,
Base::Hex => 16,
}
};
let mut ret = IntAnalysis {
check_past_max: false,
past_max: false,
is_zero: false,
len_digits: 0,
};
// todo turn this to a while let now that we know
// no special behavior on EOI break
loop {
let c_opt = str_it.next();
if let Some(c) = c_opt {
match c {
'0'..='9' | 'a'..='f' | 'A'..='F' => {
if ret.len_digits == 0 && c == '0' {
ret.is_zero = true;
} else if ret.is_zero {
ret.is_zero = false;
}
ret.len_digits += 1;
if ret.len_digits == max_sd_in {
if let Some(next_ch) = str_it.next() {
match next_ch {
'0'..='9' => {
ret.past_max = true;
}
_ => {
// force conversion
// to check if its above max.
// todo: spin out convert
// into fn, call it here to try
// read val, on Ok()
// save val for reuse later
// that way on same-base in and out
// we don't needlessly convert int
// to str, we can just copy it over.
ret.check_past_max = true;
str_it.put_back(next_ch);
}
}
if ret.past_max {
break;
}
} else {
ret.check_past_max = true;
}
}
}
_ => {
warn_expected_numeric(str_in);
set_exit_code(1);
break;
}
}
} else {
// breaks on EOL
break;
}
}
ret
}
// get a FormatPrimitive of the maximum value for the field char
// and given sign
fn get_max(field_char: char, sign: i8) -> FormatPrimitive {
let mut fmt_primitive = FormatPrimitive::default();
fmt_primitive.pre_decimal = Some(String::from(match field_char {
'd' | 'i' => match sign {
1 => "9223372036854775807",
_ => {
fmt_primitive.prefix = Some(String::from("-"));
"9223372036854775808"
}
},
'x' | 'X' => "ffffffffffffffff",
'o' => "1777777777777777777777",
/* 'u' | */ _ => "18446744073709551615",
}));
fmt_primitive
}
// conv_from_segment contract:
// 1. takes
// - a string that begins with a non-zero digit, and proceeds
// with zero or more following digits until the end of the string
// - a radix to interpret those digits as
// - a char that communicates:
// whether to interpret+output the string as an i64 or u64
// what radix to write the parsed number as.
// 2. parses it as a rust integral type
// 3. outputs FormatPrimitive with:
// - if the string falls within bounds:
// number parsed and written in the correct radix
// - if the string falls outside bounds:
// for i64 output, the int minimum or int max (depending on sign)
// for u64 output, the u64 max in the output radix
fn conv_from_segment(
segment: &str,
radix_in: Base,
field_char: char,
sign: i8,
) -> FormatPrimitive {
match field_char {
'i' | 'd' => match i64::from_str_radix(segment, radix_in as u32) {
Ok(i) => {
let mut fmt_prim = FormatPrimitive::default();
if sign == -1 {
fmt_prim.prefix = Some(String::from("-"));
}
fmt_prim.pre_decimal = Some(format!("{i}"));
fmt_prim
}
Err(_) => Self::get_max(field_char, sign),
},
_ => match u64::from_str_radix(segment, radix_in as u32) {
Ok(u) => {
let mut fmt_prim = FormatPrimitive::default();
let u_f = if sign == -1 { u64::MAX - (u - 1) } else { u };
fmt_prim.pre_decimal = Some(match field_char {
'X' => format!("{u_f:X}"),
'x' => format!("{u_f:x}"),
'o' => format!("{u_f:o}"),
_ => format!("{u_f}"),
});
fmt_prim
}
Err(_) => Self::get_max(field_char, sign),
},
}
}
}
impl Formatter for Intf {
fn get_primitive(
&self,
field: &FormatField,
initial_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive> {
let begin = initial_prefix.offset;
// get information about the string. see Intf::Analyze
// def above.
let convert_hints = Self::analyze(
str_in,
*field.field_char == 'i' || *field.field_char == 'd',
initial_prefix,
);
// We always will have a format primitive to return
Some(if convert_hints.len_digits == 0 || convert_hints.is_zero {
// if non-digit or end is reached before a non-zero digit
FormatPrimitive {
pre_decimal: Some(String::from("0")),
..Default::default()
}
} else if !convert_hints.past_max {
// if the number is or may be below the bounds limit
let radix_out = match *field.field_char {
'd' | 'i' | 'u' => Base::Ten,
'x' | 'X' => Base::Hex,
/* 'o' | */ _ => Base::Octal,
};
let radix_mismatch = !radix_out.eq(&initial_prefix.radix_in);
let decrease_from_max: bool = initial_prefix.sign == -1 && *field.field_char != 'i';
let end = begin + convert_hints.len_digits as usize;
// convert to int if any one of these is true:
// - number of digits in int indicates it may be past max
// - we're subtracting from the max
// - we're converting the base
if convert_hints.check_past_max || decrease_from_max || radix_mismatch {
// radix of in and out is the same.
let segment = String::from(&str_in[begin..end]);
Self::conv_from_segment(
&segment,
initial_prefix.radix_in.clone(),
*field.field_char,
initial_prefix.sign,
)
} else {
// otherwise just do a straight string copy.
let mut fmt_prim = FormatPrimitive::default();
// this is here and not earlier because
// zero doesn't get a sign, and conv_from_segment
// creates its format primitive separately
if initial_prefix.sign == -1 && *field.field_char == 'i' {
fmt_prim.prefix = Some(String::from("-"));
}
fmt_prim.pre_decimal = Some(String::from(&str_in[begin..end]));
fmt_prim
}
} else {
Self::get_max(*field.field_char, initial_prefix.sign)
})
}
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
let mut final_str: String = String::new();
if let Some(ref prefix) = prim.prefix {
final_str.push_str(prefix);
}
// integral second fields is zero-padded minimum-width
// which gets handled before general minimum-width
match prim.pre_decimal {
Some(ref pre_decimal) => {
if let Some(min) = field.second_field {
let mut i = min;
let len = pre_decimal.len() as u32;
while i > len {
final_str.push('0');
i -= 1;
}
}
final_str.push_str(pre_decimal);
}
None => {
panic!(
"error, format primitives provided to int, will, incidentally under \
correct behavior, always have a pre_dec value."
);
}
}
final_str
}
}

View file

@ -1,13 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety
mod base_conv;
pub mod cninetyninehexfloatf;
pub mod decf;
mod float_common;
pub mod floatf;
pub mod intf;
pub mod scif;

View file

@ -1,47 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety
//! formatter for %e %E scientific notation subs
use super::super::format_field::FormatField;
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis};
#[derive(Default)]
pub struct Scif;
impl Scif {
pub fn new() -> Self {
Self
}
}
impl Formatter for Scif {
fn get_primitive(
&self,
field: &FormatField,
initial_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive> {
let second_field = field.second_field.unwrap_or(6) + 1;
let analysis = FloatAnalysis::analyze(
str_in,
initial_prefix,
Some(second_field as usize + 1),
None,
false,
);
let f = get_primitive_dec(
initial_prefix,
&str_in[initial_prefix.offset..],
&analysis,
second_field as usize,
Some(*field.field_char == 'E'),
);
Some(f)
}
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
primitive_to_str_common(prim, &field)
}
}

View file

@ -1,8 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
pub mod format_field;
mod formatter;
mod formatters;
pub mod num_format;

View file

@ -1,275 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety
//! handles creating printed output for numeric substitutions
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
use std::env;
use std::vec::Vec;
use crate::display::Quotable;
use crate::{show_error, show_warning};
use super::format_field::{FieldType, FormatField};
use super::formatter::{Base, FormatPrimitive, Formatter, InitialPrefix};
use super::formatters::cninetyninehexfloatf::CninetyNineHexFloatf;
use super::formatters::decf::Decf;
use super::formatters::floatf::Floatf;
use super::formatters::intf::Intf;
use super::formatters::scif::Scif;
pub fn warn_expected_numeric(pf_arg: &str) {
// important: keep println here not print
show_error!("{}: expected a numeric value", pf_arg.maybe_quote());
}
// when character constant arguments have excess characters
// issue a warning when POSIXLY_CORRECT is not set
fn warn_char_constant_ign(remaining_bytes: &[u8]) {
match env::var("POSIXLY_CORRECT") {
Ok(_) => {}
Err(e) => {
if let env::VarError::NotPresent = e {
show_warning!(
"{:?}: character(s) following character \
constant have been ignored",
remaining_bytes
);
}
}
}
}
// this function looks at the first few
// characters of an argument and returns a value if we can learn
// a value from that (e.g. no argument? return 0, char constant? ret value)
fn get_provided(str_in_opt: Option<&String>) -> Option<u8> {
const C_S_QUOTE: u8 = 39;
const C_D_QUOTE: u8 = 34;
match str_in_opt {
Some(str_in) => {
let mut byte_it = str_in.bytes();
if let Some(ch) = byte_it.next() {
match ch {
C_S_QUOTE | C_D_QUOTE => {
Some(match byte_it.next() {
Some(second_byte) => {
let mut ignored: Vec<u8> = Vec::new();
for cont in byte_it {
ignored.push(cont);
}
if !ignored.is_empty() {
warn_char_constant_ign(&ignored);
}
second_byte
}
// no byte after quote
None => {
let so_far = (ch as char).to_string();
warn_expected_numeric(&so_far);
0_u8
}
})
}
// first byte is not quote
_ => None, // no first byte
}
} else {
Some(0_u8)
}
}
None => Some(0),
}
}
// takes a string and returns
// a sign,
// a base,
// and an offset for index after all
// initial spacing, sign, base prefix, and leading zeroes
#[allow(clippy::cognitive_complexity)]
fn get_initial_prefix(str_in: &str, field_type: &FieldType) -> InitialPrefix {
let mut str_it = str_in.chars();
let mut ret = InitialPrefix {
radix_in: Base::Ten,
sign: 1,
offset: 0,
};
let mut top_char = str_it.next();
// skip spaces and ensure top_char is the first non-space char
// (or None if none exists)
while let Some(' ') = top_char {
ret.offset += 1;
top_char = str_it.next();
}
// parse sign
match top_char {
Some('+') => {
ret.offset += 1;
top_char = str_it.next();
}
Some('-') => {
ret.sign = -1;
ret.offset += 1;
top_char = str_it.next();
}
_ => {}
}
// we want to exit with offset being
// the index of the first non-zero
// digit before the decimal point or
// if there is none, the zero before the
// decimal point, or, if there is none,
// the decimal point.
// while we are determining the offset
// we will ensure as a convention
// the offset is always on the first character
// that we are yet unsure if it is the
// final offset. If the zero could be before
// a decimal point we don't move past the zero.
let mut is_hex = false;
if Some('0') == top_char {
if let Some(base) = str_it.next() {
// lead zeroes can only exist in
// octal and hex base
let mut do_clean_lead_zeroes = false;
match base {
'x' | 'X' => {
is_hex = true;
ret.offset += 2;
ret.radix_in = Base::Hex;
do_clean_lead_zeroes = true;
}
e @ '0'..='9' => {
ret.offset += 1;
if let FieldType::Intf = *field_type {
ret.radix_in = Base::Octal;
}
if e == '0' {
do_clean_lead_zeroes = true;
}
}
_ => {}
}
if do_clean_lead_zeroes {
let mut first = true;
for ch_zero in str_it {
// see notes on offset above:
// this is why the offset for octal and decimal numbers
// that reach this branch is 1 even though
// they have already eaten the characters '00'
// this is also why when hex encounters its
// first zero it does not move its offset
// forward because it does not know for sure
// that it's current offset (of that zero)
// is not the final offset,
// whereas at that point octal knows its
// current offset is not the final offset.
match ch_zero {
'0' => {
if !(is_hex && first) {
ret.offset += 1;
}
}
// if decimal, keep last zero if one exists
// (it's possible for last zero to
// not exist at this branch if we're in hex input)
'.' => break,
// other digit, etc.
_ => {
if !(is_hex && first) {
ret.offset += 1;
}
break;
}
}
if first {
first = false;
}
}
}
}
}
ret
}
// this is the function a Sub's print will delegate to
// if it is a numeric field, passing the field details
// and an iterator to the argument
pub fn num_format(field: &FormatField, in_str_opt: Option<&String>) -> Option<String> {
let field_char = field.field_char;
// num format mainly operates by further delegating to one of
// several Formatter structs depending on the field
// see formatter.rs for more details
// to do switch to static dispatch
let formatter: Box<dyn Formatter> = match *field.field_type {
FieldType::Intf => Box::new(Intf::new()),
FieldType::Floatf => Box::new(Floatf::new()),
FieldType::CninetyNineHexFloatf => Box::new(CninetyNineHexFloatf::new()),
FieldType::Scif => Box::new(Scif::new()),
FieldType::Decf => Box::new(Decf::new()),
_ => {
panic!("asked to do num format with non-num field type");
}
};
let prim_opt=
// if we can get an assumed value from looking at the first
// few characters, use that value to create the FormatPrimitive
if let Some(provided_num) = get_provided(in_str_opt) {
let mut tmp = FormatPrimitive::default();
match field_char {
'u' | 'i' | 'd' => {
tmp.pre_decimal = Some(
format!("{provided_num}"));
},
'x' | 'X' => {
tmp.pre_decimal = Some(
format!("{provided_num:x}"));
},
'o' => {
tmp.pre_decimal = Some(
format!("{provided_num:o}"));
},
'e' | 'E' | 'g' | 'G' => {
let as_str = format!("{provided_num}");
let initial_prefix = get_initial_prefix(
&as_str,
field.field_type
);
tmp=formatter.get_primitive(field, &initial_prefix, &as_str)
.expect("err during default provided num");
},
_ => {
tmp.pre_decimal = Some(
format!("{provided_num}"));
tmp.post_decimal = Some(String::from("0"));
}
}
Some(tmp)
} else {
// otherwise we'll interpret the argument as a number
// using the appropriate Formatter
let in_str = in_str_opt.expect(
"please send the devs this message:
\n get_provided is failing to ret as Some(0) on no str ");
// first get information about the beginning of the
// numeric argument that would be useful for
// any formatter (int or float)
let initial_prefix = get_initial_prefix(
in_str,
field.field_type
);
// then get the FormatPrimitive from the Formatter
formatter.get_primitive(field, &initial_prefix, in_str)
};
// if we have a formatPrimitive, print its results
// according to the field-char appropriate Formatter
prim_opt.map(|prim| formatter.primitive_to_str(&prim, field.clone()))
}

View file

@ -1,463 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
//! Sub is a token that represents a
//! segment of the format string that is a substitution
//! it is created by Sub's implementation of the Tokenizer trait
//! Subs which have numeric field chars make use of the num_format
//! submodule
use crate::error::{UError, UResult};
use crate::quoting_style::{escape_name, QuotingStyle};
use itertools::{put_back_n, PutBackN};
use std::error::Error;
use std::fmt::Display;
use std::io::Write;
use std::iter::Peekable;
use std::process::exit;
use std::slice::Iter;
use std::str::Chars;
use super::num_format::format_field::{FieldType, FormatField};
use super::num_format::num_format;
use super::token;
use super::unescaped_text::UnescapedText;
const EXIT_ERR: i32 = 1;
#[derive(Debug)]
pub enum SubError {
InvalidSpec(String),
}
impl Display for SubError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
match self {
Self::InvalidSpec(s) => write!(f, "%{s}: invalid conversion specification"),
}
}
}
impl Error for SubError {}
impl UError for SubError {}
fn convert_asterisk_arg_int(asterisk_arg: &str) -> isize {
// this is a costly way to parse the
// args used for asterisk values into integers
// from various bases. Actually doing it correctly
// (going through the pipeline to intf, but returning
// the integer instead of writing it to string and then
// back) is on the refactoring TODO
let field_type = FieldType::Intf;
let field_char = 'i';
let field_info = FormatField {
min_width: Some(0),
second_field: Some(0),
orig: &asterisk_arg.to_string(),
field_type: &field_type,
field_char: &field_char,
};
num_format::num_format(&field_info, Some(&asterisk_arg.to_string()))
.unwrap()
.parse::<isize>()
.unwrap()
}
pub enum CanAsterisk<T> {
Fixed(T),
Asterisk,
}
// Sub is a tokenizer which creates tokens
// for substitution segments of a format string
pub struct Sub {
min_width: CanAsterisk<Option<isize>>,
second_field: CanAsterisk<Option<u32>>,
field_char: char,
field_type: FieldType,
orig: String,
prefix_char: char,
}
impl Sub {
pub fn new(
min_width: CanAsterisk<Option<isize>>,
second_field: CanAsterisk<Option<u32>>,
field_char: char,
orig: String,
prefix_char: char,
) -> Self {
// for more dry printing, field characters are grouped
// in initialization of token.
let field_type = match field_char {
's' | 'b' | 'q' => FieldType::Strf,
'd' | 'i' | 'u' | 'o' | 'x' | 'X' => FieldType::Intf,
'f' | 'F' => FieldType::Floatf,
'a' | 'A' => FieldType::CninetyNineHexFloatf,
'e' | 'E' => FieldType::Scif,
'g' | 'G' => FieldType::Decf,
'c' => FieldType::Charf,
_ => {
// should be unreachable.
println!("Invalid field type");
exit(EXIT_ERR);
}
};
Self {
min_width,
second_field,
field_char,
field_type,
orig,
prefix_char,
}
}
}
#[derive(Default)]
pub(crate) struct SubParser {
min_width_tmp: Option<String>,
min_width_is_asterisk: bool,
past_decimal: bool,
second_field_tmp: Option<String>,
second_field_is_asterisk: bool,
specifiers_found: bool,
field_char: Option<char>,
text_so_far: String,
}
impl SubParser {
fn new() -> Self {
Self::default()
}
pub(crate) fn from_it<W>(
writer: &mut W,
it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>,
) -> UResult<Option<token::Token>>
where
W: Write,
{
let mut parser = Self::new();
if parser.sub_vals_retrieved(it)? {
let t = Self::build_token(parser);
t.write(writer, args);
Ok(Some(t))
} else {
Ok(None)
}
}
fn build_token(parser: Self) -> token::Token {
// not a self method so as to allow move of sub-parser vals.
// return new Sub struct as token
let prefix_char = match &parser.min_width_tmp {
Some(width) if width.starts_with('0') => '0',
_ => ' ',
};
token::Token::Sub(Sub::new(
if parser.min_width_is_asterisk {
CanAsterisk::Asterisk
} else {
CanAsterisk::Fixed(
parser
.min_width_tmp
.map(|x| x.parse::<isize>().unwrap_or(1)),
)
},
if parser.second_field_is_asterisk {
CanAsterisk::Asterisk
} else {
CanAsterisk::Fixed(parser.second_field_tmp.map(|x| x.parse::<u32>().unwrap()))
},
parser.field_char.unwrap(),
parser.text_so_far,
prefix_char,
))
}
#[allow(clippy::cognitive_complexity)]
fn sub_vals_retrieved(&mut self, it: &mut PutBackN<Chars>) -> UResult<bool> {
if !Self::successfully_eat_prefix(it, &mut self.text_so_far)? {
return Ok(false);
}
// this fn in particular is much longer than it needs to be
// .could get a lot
// of code savings just by cleaning it up. shouldn't use a regex
// though, as we want to mimic the original behavior of printing
// the field as interpreted up until the error in the field.
let mut legal_fields = [
// 'a', 'A', //c99 hex float implementation not yet complete
'b', 'c', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'i', 'o', 'q', 's', 'u', 'x', 'X',
];
let mut specifiers = ['h', 'j', 'l', 'L', 't', 'z'];
legal_fields.sort_unstable();
specifiers.sort_unstable();
// divide substitution from %([0-9]+)?(.[0-9+])?([a-zA-Z])
// into min_width, second_field, field_char
for ch in it {
self.text_so_far.push(ch);
match ch {
'-' | '*' | '0'..='9' => {
if self.past_decimal {
// second field should never have a
// negative value
if self.second_field_is_asterisk || ch == '-' || self.specifiers_found {
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
}
if self.second_field_tmp.is_none() {
self.second_field_tmp = Some(String::new());
}
match self.second_field_tmp.as_mut() {
Some(x) => {
if ch == '*' && !x.is_empty() {
return Err(
SubError::InvalidSpec(self.text_so_far.clone()).into()
);
}
if ch == '*' {
self.second_field_is_asterisk = true;
}
x.push(ch);
}
None => {
panic!("should be unreachable");
}
}
} else {
if self.min_width_is_asterisk || self.specifiers_found {
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
}
if self.min_width_tmp.is_none() {
self.min_width_tmp = Some(String::new());
}
match self.min_width_tmp.as_mut() {
Some(x) => {
if (ch == '-' || ch == '*') && !x.is_empty() {
return Err(
SubError::InvalidSpec(self.text_so_far.clone()).into()
);
}
if ch == '*' {
self.min_width_is_asterisk = true;
}
x.push(ch);
}
None => {
panic!("should be unreachable");
}
}
}
}
'.' => {
if self.past_decimal {
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
} else {
self.past_decimal = true;
}
}
x if legal_fields.binary_search(&x).is_ok() => {
self.field_char = Some(ch);
break;
}
x if specifiers.binary_search(&x).is_ok() => {
if !self.past_decimal {
self.past_decimal = true;
}
if !self.specifiers_found {
self.specifiers_found = true;
}
}
_ => {
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
}
}
}
if self.field_char.is_none() {
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
}
let field_char_retrieved = self.field_char.unwrap();
if self.past_decimal && self.second_field_tmp.is_none() {
self.second_field_tmp = Some(String::from("0"));
}
self.validate_field_params(field_char_retrieved)?;
// if the dot is provided without a second field
// printf interprets it as 0.
if let Some(x) = self.second_field_tmp.as_mut() {
if x.is_empty() {
self.min_width_tmp = Some(String::from("0"));
}
}
Ok(true)
}
fn successfully_eat_prefix(
it: &mut PutBackN<Chars>,
text_so_far: &mut String,
) -> UResult<bool> {
// get next two chars,
// if they're '%%' we're not tokenizing it
// else put chars back
let preface = it.next();
let n_ch = it.next();
if preface == Some('%') && n_ch != Some('%') {
match n_ch {
Some(x) => {
it.put_back(x);
Ok(true)
}
None => {
text_so_far.push('%');
Err(SubError::InvalidSpec(text_so_far.clone()).into())
}
}
} else {
if let Some(x) = n_ch {
it.put_back(x);
};
if let Some(x) = preface {
it.put_back(x);
};
Ok(false)
}
}
fn validate_field_params(&self, field_char: char) -> UResult<()> {
// check for illegal combinations here when possible vs
// on each application so we check less per application
// to do: move these checks to Sub::new
if (field_char == 's' && self.min_width_tmp == Some(String::from("0")))
|| (field_char == 'c'
&& (self.min_width_tmp == Some(String::from("0")) || self.past_decimal))
|| ((field_char == 'b' || field_char == 'q')
&& (self.min_width_tmp.is_some()
|| self.past_decimal
|| self.second_field_tmp.is_some()))
{
// invalid string substitution
// to do: include information about an invalid
// string substitution
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
}
Ok(())
}
}
impl Sub {
#[allow(clippy::cognitive_complexity)]
pub(crate) fn write<W>(&self, writer: &mut W, pf_args_it: &mut Peekable<Iter<String>>)
where
W: Write,
{
let field = FormatField {
min_width: match self.min_width {
CanAsterisk::Fixed(x) => x,
CanAsterisk::Asterisk => {
match pf_args_it.next() {
// temporary, use intf.rs instead
Some(x) => Some(convert_asterisk_arg_int(x)),
None => Some(0),
}
}
},
second_field: match self.second_field {
CanAsterisk::Fixed(x) => x,
CanAsterisk::Asterisk => {
match pf_args_it.next() {
// temporary, use intf.rs instead
Some(x) => {
let result = convert_asterisk_arg_int(x);
if result < 0 {
None
} else {
Some(result as u32)
}
}
None => Some(0),
}
}
},
field_char: &self.field_char,
field_type: &self.field_type,
orig: &self.orig,
};
let pf_arg = pf_args_it.next();
// minimum width is handled independently of actual
// field char
let pre_min_width_opt: Option<String> = match *field.field_type {
// if %s just return arg
// if %b use UnescapedText module's unescape-fn
// if %c return first char of arg
// if %q return arg which non-printable characters are escaped
FieldType::Strf | FieldType::Charf => {
match pf_arg {
Some(arg_string) => {
match *field.field_char {
's' => Some(match field.second_field {
Some(max) => String::from(&arg_string[..max as usize]),
None => arg_string.clone(),
}),
'b' => {
let mut a_it = put_back_n(arg_string.chars());
UnescapedText::from_it_core(writer, &mut a_it, true);
None
}
'q' => Some(escape_name(
arg_string.as_ref(),
&QuotingStyle::Shell {
escape: true,
always_quote: false,
show_control: false,
},
)),
// get opt<char> of first val
// and map it to opt<String>
'c' => arg_string.chars().next().map(|x| x.to_string()),
_ => unreachable!(),
}
}
None => None,
}
}
_ => {
// non string/char fields are delegated to num_format
num_format::num_format(&field, pf_arg)
}
};
if let Some(pre_min_width) = pre_min_width_opt {
// if have a string, print it, ensuring minimum width is met.
write!(
writer,
"{}",
match field.min_width {
Some(min_width) => {
let diff: isize = min_width.abs() - pre_min_width.len() as isize;
if diff > 0 {
let mut final_str = String::new();
// definitely more efficient ways
// to do this.
let pad_before = min_width > 0;
if !pad_before {
final_str.push_str(&pre_min_width);
}
for _ in 0..diff {
final_str.push(self.prefix_char);
}
if pad_before {
final_str.push_str(&pre_min_width);
}
final_str
} else {
pre_min_width
}
}
None => pre_min_width,
}
)
.ok();
}
}
}

View file

@ -1,43 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//! Traits and enums dealing with Tokenization of printf Format String
use std::io::Write;
use std::iter::Peekable;
use std::slice::Iter;
use crate::features::tokenize::sub::Sub;
use crate::features::tokenize::unescaped_text::UnescapedText;
// A token object is an object that can print the expected output
// of a contiguous segment of the format string, and
// requires at most 1 argument
pub enum Token {
Sub(Sub),
UnescapedText(UnescapedText),
}
impl Token {
pub(crate) fn write<W>(&self, writer: &mut W, args: &mut Peekable<Iter<String>>)
where
W: Write,
{
match self {
Self::Sub(sub) => sub.write(writer, args),
Self::UnescapedText(unescaped_text) => unescaped_text.write(writer),
}
}
}
// A tokenizer object is an object that takes an iterator
// at a position in a format string, and sees whether
// it can return a token of a type it knows how to produce
// if so, return the token, move the iterator past the
// format string text the token represents, and if an
// argument is used move the argument iter forward one
// creating token of a format string segment should also cause
// printing of that token's value. Essentially tokenizing
// a whole format string will print the format string and consume
// a number of arguments equal to the number of argument-using tokens

View file

@ -1,283 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//! UnescapedText is a tokenizer impl
//! for tokenizing character literals,
//! and escaped character literals (of allowed escapes),
//! into an unescaped text byte array
// spell-checker:ignore (ToDO) retval hexchars octals printf's bvec vals coreutil addchar eval bytecode bslice
use itertools::PutBackN;
use std::char::from_u32;
use std::io::Write;
use std::process::exit;
use std::str::Chars;
use super::token;
const EXIT_OK: i32 = 0;
const EXIT_ERR: i32 = 1;
// by default stdout only flushes
// to console when a newline is passed.
macro_rules! write_and_flush {
($writer:expr, $($args:tt)+) => ({
write!($writer, "{}", $($args)+).ok();
$writer.flush().ok();
})
}
fn flush_bytes<W>(writer: &mut W, bslice: &[u8])
where
W: Write,
{
writer.write_all(bslice).ok();
writer.flush().ok();
}
#[derive(Default)]
pub struct UnescapedText(Vec<u8>);
impl UnescapedText {
fn new() -> Self {
Self::default()
}
// take an iterator to the format string
// consume between min and max chars
// and return it as a base-X number
fn base_to_u32(min_chars: u8, max_chars: u8, base: u32, it: &mut PutBackN<Chars>) -> u32 {
let mut retval: u32 = 0;
let mut found = 0;
while found < max_chars {
// if end of input break
let nc = it.next();
match nc {
Some(digit) => {
// if end of hexchars break
match digit.to_digit(base) {
Some(d) => {
found += 1;
retval *= base;
retval += d;
}
None => {
it.put_back(digit);
break;
}
}
}
None => {
break;
}
}
}
if found < min_chars {
// only ever expected for hex
println!("missing hexadecimal number in escape"); //todo stderr
exit(EXIT_ERR);
}
retval
}
// validates against valid
// IEC 10646 vals - these values
// are pinned against the more popular
// printf so as to not disrupt when
// dropped-in as a replacement.
fn validate_iec(val: u32, eight_word: bool) {
let mut preface = 'u';
let leading_zeros = if eight_word {
preface = 'U';
8
} else {
4
};
let err_msg = format!("invalid universal character name {preface}{val:0leading_zeros$x}");
if (val < 159 && (val != 36 && val != 64 && val != 96)) || (val > 55296 && val < 57343) {
println!("{err_msg}"); //todo stderr
exit(EXIT_ERR);
}
}
// pass an iterator that succeeds an '/',
// and process the remaining character
// adding the unescaped bytes
// to the passed byte_vec
// in subs_mode change octal behavior
fn handle_escaped<W>(
writer: &mut W,
byte_vec: &mut Vec<u8>,
it: &mut PutBackN<Chars>,
subs_mode: bool,
) where
W: Write,
{
let ch = it.next().unwrap_or('\\');
match ch {
'0'..='9' | 'x' => {
let min_len = 1;
let mut max_len = 2;
let mut base = 16;
let ignore = false;
match ch {
'x' => {}
e @ '0'..='9' => {
max_len = 3;
base = 8;
// in practice, gnu coreutils printf
// interprets octals without a
// leading zero in %b
// but it only skips leading zeros
// in %b mode.
// if we ever want to match gnu coreutil
// printf's docs instead of its behavior
// we'd set this to true.
// if subs_mode && e != '0'
// { ignore = true; }
if !subs_mode || e != '0' {
it.put_back(ch);
}
}
_ => {}
}
if ignore {
byte_vec.push(ch as u8);
} else {
let val = (Self::base_to_u32(min_len, max_len, base, it) % 256) as u8;
byte_vec.push(val);
let bvec = [val];
flush_bytes(writer, &bvec);
}
}
e => {
// only for hex and octal
// is byte encoding specified.
// otherwise, why not leave the door open
// for other encodings unless it turns out
// a bottleneck.
let mut s = String::new();
let ch = match e {
'\\' => '\\',
'"' => '"',
'n' => '\n',
'r' => '\r',
't' => '\t',
// bell
'a' => '\x07',
// backspace
'b' => '\x08',
// vertical tab
'v' => '\x0B',
// form feed
'f' => '\x0C',
// escape character
'e' => '\x1B',
'c' => exit(EXIT_OK),
'u' | 'U' => {
let len = match e {
'u' => 4,
/* 'U' | */ _ => 8,
};
let val = Self::base_to_u32(len, len, 16, it);
Self::validate_iec(val, false);
if let Some(c) = from_u32(val) {
c
} else {
'-'
}
}
_ => {
s.push('\\');
ch
}
};
s.push(ch);
write_and_flush!(writer, &s);
byte_vec.extend(s.bytes());
}
};
}
// take an iterator to a string,
// and return a wrapper around a Vec<u8> of unescaped bytes
// break on encounter of sub symbol ('%[^%]') unless called
// through %b subst.
#[allow(clippy::cognitive_complexity)]
pub fn from_it_core<W>(
writer: &mut W,
it: &mut PutBackN<Chars>,
subs_mode: bool,
) -> Option<token::Token>
where
W: Write,
{
let mut addchar = false;
let mut new_text = Self::new();
let mut tmp_str = String::new();
{
let new_vec: &mut Vec<u8> = &mut (new_text.0);
while let Some(ch) = it.next() {
if !addchar {
addchar = true;
}
match ch {
x if x != '\\' && x != '%' => {
// lazy branch eval
// remember this fn could be called
// many times in a single exec through %b
write_and_flush!(writer, ch);
tmp_str.push(ch);
}
'\\' => {
// the literal may be a literal bytecode
// and not valid utf-8. Str only supports
// valid utf-8.
// if we find the unnecessary drain
// on non hex or octal escapes is costly
// then we can make it faster/more complex
// with as-necessary draining.
if !tmp_str.is_empty() {
new_vec.extend(tmp_str.bytes());
tmp_str = String::new();
}
Self::handle_escaped(writer, new_vec, it, subs_mode);
}
x if x == '%' && !subs_mode => {
if let Some(follow) = it.next() {
if follow == '%' {
write_and_flush!(writer, ch);
tmp_str.push(ch);
} else {
it.put_back(follow);
it.put_back(ch);
break;
}
} else {
it.put_back(ch);
break;
}
}
_ => {
write_and_flush!(writer, ch);
tmp_str.push(ch);
}
}
}
if !tmp_str.is_empty() {
new_vec.extend(tmp_str.bytes());
}
}
if addchar {
Some(token::Token::UnescapedText(new_text))
} else {
None
}
}
}
impl UnescapedText {
pub(crate) fn write<W>(&self, writer: &mut W)
where
W: Write,
{
flush_bytes(writer, &self.0[..]);
}
}

View file

@ -37,14 +37,14 @@ pub use crate::parser::shortcut_value_parser;
pub use crate::features::backup_control;
#[cfg(feature = "encoding")]
pub use crate::features::encoding;
#[cfg(feature = "format")]
pub use crate::features::format;
#[cfg(feature = "fs")]
pub use crate::features::fs;
#[cfg(feature = "fsext")]
pub use crate::features::fsext;
#[cfg(feature = "lines")]
pub use crate::features::lines;
#[cfg(feature = "memo")]
pub use crate::features::memo;
#[cfg(feature = "quoting-style")]
pub use crate::features::quoting_style;
#[cfg(feature = "ranges")]

View file

@ -224,6 +224,11 @@ fn sub_num_int_char_const_in() {
.args(&["ninety seven is %i", "'a"])
.succeeds()
.stdout_only("ninety seven is 97");
new_ucmd!()
.args(&["emoji is %i", "'🙃"])
.succeeds()
.stdout_only("emoji is 128579");
}
#[test]
@ -291,7 +296,16 @@ fn sub_num_float_e_no_round() {
}
#[test]
fn sub_num_float_round() {
fn sub_num_float_round_to_one() {
new_ucmd!()
.args(&["one is %f", "0.9999995"])
.succeeds()
.stdout_only("one is 1.000000");
}
#[test]
#[ignore = "Requires 'long double' precision floats to be used internally"]
fn sub_num_float_round_to_two() {
new_ucmd!()
.args(&["two is %f", "1.9999995"])
.succeeds()
@ -413,6 +427,7 @@ fn sub_float_dec_places() {
}
#[test]
#[ignore = "hexadecimal floats are unimplemented"]
fn sub_float_hex_in() {
new_ucmd!()
.args(&["%f", "0xF1.1F"])