diff --git a/src/uu/csplit/Cargo.toml b/src/uu/csplit/Cargo.toml index 5e2f310cb..0a3c3bc60 100644 --- a/src/uu/csplit/Cargo.toml +++ b/src/uu/csplit/Cargo.toml @@ -18,7 +18,7 @@ path = "src/csplit.rs" clap = { workspace = true } thiserror = { workspace = true } regex = { workspace = true } -uucore = { workspace = true, features = ["entries", "fs"] } +uucore = { workspace = true, features = ["entries", "fs", "format"] } [[bin]] name = "csplit" diff --git a/src/uu/csplit/src/split_name.rs b/src/uu/csplit/src/split_name.rs index 4d94b56a9..e2432f3ce 100644 --- a/src/uu/csplit/src/split_name.rs +++ b/src/uu/csplit/src/split_name.rs @@ -4,14 +4,15 @@ // file that was distributed with this source code. // spell-checker:ignore (regex) diuox -use regex::Regex; +use uucore::format::{num_format::UnsignedInt, Format, FormatError}; use crate::csplit_error::CsplitError; /// Computes the filename of a split, taking into consideration a possible user-defined suffix /// format. pub struct SplitName { - fn_split_name: Box String>, + prefix: Vec, + format: Format, } impl SplitName { @@ -36,6 +37,7 @@ impl SplitName { ) -> Result { // get the prefix let prefix = prefix_opt.unwrap_or_else(|| "xx".to_string()); + // the width for the split offset let n_digits = n_digits_opt .map(|opt| { @@ -44,120 +46,29 @@ impl SplitName { }) .transpose()? .unwrap_or(2); - // translate the custom format into a function - let fn_split_name: Box String> = match format_opt { - None => Box::new(move |n: usize| -> String { format!("{prefix}{n:0n_digits$}") }), - Some(custom) => { - let spec = - Regex::new(r"(?P%((?P[0#-])(?P\d+)?)?(?P[diuoxX]))") - .unwrap(); - let mut captures_iter = spec.captures_iter(&custom); - let custom_fn: Box String> = match captures_iter.next() { - Some(captures) => { - let all = captures.name("ALL").unwrap(); - let before = custom[0..all.start()].to_owned(); - let after = custom[all.end()..].to_owned(); - let width = match captures.name("WIDTH") { - None => 0, - Some(m) => m.as_str().parse::().unwrap(), - }; - match (captures.name("FLAG"), captures.name("TYPE")) { - (None, Some(ref t)) => match t.as_str() { - "d" | "i" | "u" => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n}{after}") - }), - "o" => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:o}{after}") - }), - "x" => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:x}{after}") - }), - "X" => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:X}{after}") - }), - _ => return Err(CsplitError::SuffixFormatIncorrect), - }, - (Some(ref f), Some(ref t)) => { - match (f.as_str(), t.as_str()) { - /* - * zero padding - */ - // decimal - ("0", "d" | "i" | "u") => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:0width$}{after}") - }), - // octal - ("0", "o") => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:0width$o}{after}") - }), - // lower hexadecimal - ("0", "x") => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:0width$x}{after}") - }), - // upper hexadecimal - ("0", "X") => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:0width$X}{after}") - }), - /* - * Alternate form - */ - // octal - ("#", "o") => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:>#width$o}{after}") - }), - // lower hexadecimal - ("#", "x") => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:>#width$x}{after}") - }), - // upper hexadecimal - ("#", "X") => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:>#width$X}{after}") - }), - - /* - * Left adjusted - */ - // decimal - ("-", "d" | "i" | "u") => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:<#width$}{after}") - }), - // octal - ("-", "o") => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:<#width$o}{after}") - }), - // lower hexadecimal - ("-", "x") => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:<#width$x}{after}") - }), - // upper hexadecimal - ("-", "X") => Box::new(move |n: usize| -> String { - format!("{prefix}{before}{n:<#width$X}{after}") - }), - - _ => return Err(CsplitError::SuffixFormatIncorrect), - } - } - _ => return Err(CsplitError::SuffixFormatIncorrect), - } - } - None => return Err(CsplitError::SuffixFormatIncorrect), - }; - - // there cannot be more than one format pattern - if captures_iter.next().is_some() { - return Err(CsplitError::SuffixFormatTooManyPercents); - } - custom_fn - } + let format_string = match format_opt { + Some(f) => f, + None => format!("%0{n_digits}u"), }; - Ok(Self { fn_split_name }) + let format = match Format::::parse(format_string) { + Ok(format) => Ok(format), + Err(FormatError::TooManySpecs(_)) => Err(CsplitError::SuffixFormatTooManyPercents), + Err(_) => Err(CsplitError::SuffixFormatIncorrect), + }?; + + Ok(Self { + prefix: prefix.as_bytes().to_owned(), + format, + }) } /// Returns the filename of the i-th split. pub fn get(&self, n: usize) -> String { - (self.fn_split_name)(n) + let mut v = self.prefix.clone(); + self.format.fmt(&mut v, n as u64).unwrap(); + String::from_utf8_lossy(&v).to_string() } } @@ -279,7 +190,7 @@ mod tests { #[test] fn alternate_form_octal() { let split_name = SplitName::new(None, Some(String::from("cst-%#10o-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst- 0o52-"); + assert_eq!(split_name.get(42), "xxcst- 052-"); } #[test] @@ -291,7 +202,7 @@ mod tests { #[test] fn alternate_form_upper_hex() { let split_name = SplitName::new(None, Some(String::from("cst-%#10X-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst- 0x2A-"); + assert_eq!(split_name.get(42), "xxcst- 0X2A-"); } #[test] @@ -315,19 +226,19 @@ mod tests { #[test] fn left_adjusted_octal() { let split_name = SplitName::new(None, Some(String::from("cst-%-10o-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-0o52 -"); + assert_eq!(split_name.get(42), "xxcst-52 -"); } #[test] fn left_adjusted_lower_hex() { let split_name = SplitName::new(None, Some(String::from("cst-%-10x-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-0x2a -"); + assert_eq!(split_name.get(42), "xxcst-2a -"); } #[test] fn left_adjusted_upper_hex() { let split_name = SplitName::new(None, Some(String::from("cst-%-10X-")), None).unwrap(); - assert_eq!(split_name.get(42), "xxcst-0x2A -"); + assert_eq!(split_name.get(42), "xxcst-2A -"); } #[test] diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 78c01cd07..b0cb8cd4d 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -80,7 +80,7 @@ fs = ["dunce", "libc", "winapi-util", "windows-sys"] fsext = ["libc", "time", "windows-sys"] fsxattr = ["xattr"] lines = [] -format = ["itertools"] +format = ["itertools", "quoting-style"] mode = ["libc"] perms = ["libc", "walkdir"] pipes = [] diff --git a/src/uucore/src/lib/features/format/num_format.rs b/src/uucore/src/lib/features/format/num_format.rs index 4c4a541c4..52551f10b 100644 --- a/src/uucore/src/lib/features/format/num_format.rs +++ b/src/uucore/src/lib/features/format/num_format.rs @@ -60,7 +60,7 @@ pub enum PositiveSign { Space, } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum NumberAlignment { Left, RightSpace, @@ -168,6 +168,24 @@ impl Formatter for UnsignedInt { } fn try_from_spec(s: Spec) -> Result { + // A signed int spec might be mapped to an unsigned int spec if no sign is specified + let s = if let Spec::SignedInt { + width, + precision, + positive_sign: PositiveSign::None, + alignment, + } = s + { + Spec::UnsignedInt { + variant: UnsignedIntVariant::Decimal, + width, + precision, + alignment, + } + } else { + s + }; + let Spec::UnsignedInt { variant, width, diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index 7c0d02367..6d342f742 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -87,6 +87,40 @@ enum Length { LongDouble, } +#[derive(Default, PartialEq, Eq)] +struct Flags { + minus: bool, + plus: bool, + space: bool, + hash: bool, + zero: bool, +} + +impl Flags { + pub fn parse(rest: &mut &[u8], index: &mut usize) -> Self { + let mut flags = Self::default(); + + while let Some(x) = rest.get(*index) { + match x { + b'-' => flags.minus = true, + b'+' => flags.plus = true, + b' ' => flags.space = true, + b'#' => flags.hash = true, + b'0' => flags.zero = true, + _ => break, + } + *index += 1; + } + + flags + } + + /// Whether any of the flags is set to true + fn any(&self) -> bool { + self != &Self::default() + } +} + impl Spec { pub fn parse<'a>(rest: &mut &'a [u8]) -> Result { // Based on the C++ reference, the spec format looks like: @@ -97,34 +131,12 @@ impl Spec { let mut index = 0; let start = *rest; - let mut minus = false; - let mut plus = false; - let mut space = false; - let mut hash = false; - let mut zero = false; + let flags = Flags::parse(rest, &mut index); - while let Some(x) = rest.get(index) { - match x { - b'-' => minus = true, - b'+' => plus = true, - b' ' => space = true, - b'#' => hash = true, - b'0' => zero = true, - _ => break, - } - index += 1; - } - - let alignment = match (minus, zero) { - (true, _) => NumberAlignment::Left, - (false, true) => NumberAlignment::RightZero, - (false, false) => NumberAlignment::RightSpace, - }; - - let positive_sign = match (plus, space) { - (true, _) => PositiveSign::Plus, - (false, true) => PositiveSign::Space, - (false, false) => PositiveSign::None, + let positive_sign = match flags { + Flags { plus: true, .. } => PositiveSign::Plus, + Flags { space: true, .. } => PositiveSign::Space, + _ => PositiveSign::None, }; let width = eat_asterisk_or_number(rest, &mut index); @@ -136,6 +148,17 @@ impl Spec { None }; + // The `0` flag is ignored if `-` is given or a precision is specified. + // So the only case for RightZero, is when `-` is not given and the + // precision is none. + let alignment = if flags.minus { + NumberAlignment::Left + } else if flags.zero && precision.is_none() { + NumberAlignment::RightZero + } else { + NumberAlignment::RightSpace + }; + // We ignore the length. It's not really relevant to printf let _ = Self::parse_length(rest, &mut index); @@ -148,38 +171,38 @@ impl Spec { Ok(match type_spec { // GNU accepts minus, plus and space even though they are not used b'c' => { - if hash || precision.is_some() { + if flags.hash || precision.is_some() { return Err(&start[..index]); } Self::Char { width, - align_left: minus, + align_left: flags.minus, } } b's' => { - if hash { + if flags.hash { return Err(&start[..index]); } Self::String { precision, width, - align_left: minus, + align_left: flags.minus, } } b'b' => { - if hash || minus || plus || space || width.is_some() || precision.is_some() { + if flags.any() || width.is_some() || precision.is_some() { return Err(&start[..index]); } Self::EscapedString } b'q' => { - if hash || minus || plus || space || width.is_some() || precision.is_some() { + if flags.any() || width.is_some() || precision.is_some() { return Err(&start[..index]); } Self::QuotedString } b'd' | b'i' => { - if hash { + if flags.hash { return Err(&start[..index]); } Self::SignedInt { @@ -191,10 +214,10 @@ impl Spec { } c @ (b'u' | b'o' | b'x' | b'X') => { // Normal unsigned integer cannot have a prefix - if *c == b'u' && hash { + if *c == b'u' && flags.hash { return Err(&start[..index]); } - let prefix = match hash { + let prefix = match flags.hash { false => Prefix::No, true => Prefix::Yes, }; @@ -222,7 +245,7 @@ impl Spec { b'a' | b'A' => FloatVariant::Hexadecimal, _ => unreachable!(), }, - force_decimal: match hash { + force_decimal: match flags.hash { false => ForceDecimal::No, true => ForceDecimal::Yes, }, diff --git a/tests/by-util/test_csplit.rs b/tests/by-util/test_csplit.rs index b83d5e0ee..fb4f4cc2a 100644 --- a/tests/by-util/test_csplit.rs +++ b/tests/by-util/test_csplit.rs @@ -1342,3 +1342,20 @@ fn test_line_num_range_with_up_to_match3() { assert_eq!(at.read("xx01"), ""); assert_eq!(at.read("xx02"), generate(10, 51)); } + +#[test] +fn precision_format() { + for f in ["%#6.3x", "%0#6.3x"] { + let (at, mut ucmd) = at_and_ucmd!(); + ucmd.args(&["numbers50.txt", "10", "--suffix-format", f]) + .succeeds() + .stdout_only("18\n123\n"); + + let count = glob(&at.plus_as_string("xx*")) + .expect("there should be splits created") + .count(); + assert_eq!(count, 2); + assert_eq!(at.read("xx 000"), generate(1, 10)); + assert_eq!(at.read("xx 0x001"), generate(10, 51)); + } +}