1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

Merge pull request #5958 from tertsdiepraam/csplit-printf

`csplit`: use `printf` functionality from `uucore`
This commit is contained in:
Daniel Hofstetter 2024-02-16 14:39:03 +01:00 committed by GitHub
commit 420dfe8a9b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 123 additions and 154 deletions

View file

@ -18,7 +18,7 @@ path = "src/csplit.rs"
clap = { workspace = true }
thiserror = { workspace = true }
regex = { workspace = true }
uucore = { workspace = true, features = ["entries", "fs"] }
uucore = { workspace = true, features = ["entries", "fs", "format"] }
[[bin]]
name = "csplit"

View file

@ -4,14 +4,15 @@
// file that was distributed with this source code.
// spell-checker:ignore (regex) diuox
use regex::Regex;
use uucore::format::{num_format::UnsignedInt, Format, FormatError};
use crate::csplit_error::CsplitError;
/// Computes the filename of a split, taking into consideration a possible user-defined suffix
/// format.
pub struct SplitName {
fn_split_name: Box<dyn Fn(usize) -> String>,
prefix: Vec<u8>,
format: Format<UnsignedInt>,
}
impl SplitName {
@ -36,6 +37,7 @@ impl SplitName {
) -> Result<Self, CsplitError> {
// get the prefix
let prefix = prefix_opt.unwrap_or_else(|| "xx".to_string());
// the width for the split offset
let n_digits = n_digits_opt
.map(|opt| {
@ -44,120 +46,29 @@ impl SplitName {
})
.transpose()?
.unwrap_or(2);
// translate the custom format into a function
let fn_split_name: Box<dyn Fn(usize) -> String> = match format_opt {
None => Box::new(move |n: usize| -> String { format!("{prefix}{n:0n_digits$}") }),
Some(custom) => {
let spec =
Regex::new(r"(?P<ALL>%((?P<FLAG>[0#-])(?P<WIDTH>\d+)?)?(?P<TYPE>[diuoxX]))")
.unwrap();
let mut captures_iter = spec.captures_iter(&custom);
let custom_fn: Box<dyn Fn(usize) -> String> = match captures_iter.next() {
Some(captures) => {
let all = captures.name("ALL").unwrap();
let before = custom[0..all.start()].to_owned();
let after = custom[all.end()..].to_owned();
let width = match captures.name("WIDTH") {
None => 0,
Some(m) => m.as_str().parse::<usize>().unwrap(),
};
match (captures.name("FLAG"), captures.name("TYPE")) {
(None, Some(ref t)) => match t.as_str() {
"d" | "i" | "u" => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n}{after}")
}),
"o" => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:o}{after}")
}),
"x" => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:x}{after}")
}),
"X" => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:X}{after}")
}),
_ => return Err(CsplitError::SuffixFormatIncorrect),
},
(Some(ref f), Some(ref t)) => {
match (f.as_str(), t.as_str()) {
/*
* zero padding
*/
// decimal
("0", "d" | "i" | "u") => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:0width$}{after}")
}),
// octal
("0", "o") => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:0width$o}{after}")
}),
// lower hexadecimal
("0", "x") => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:0width$x}{after}")
}),
// upper hexadecimal
("0", "X") => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:0width$X}{after}")
}),
/*
* Alternate form
*/
// octal
("#", "o") => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:>#width$o}{after}")
}),
// lower hexadecimal
("#", "x") => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:>#width$x}{after}")
}),
// upper hexadecimal
("#", "X") => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:>#width$X}{after}")
}),
/*
* Left adjusted
*/
// decimal
("-", "d" | "i" | "u") => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:<#width$}{after}")
}),
// octal
("-", "o") => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:<#width$o}{after}")
}),
// lower hexadecimal
("-", "x") => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:<#width$x}{after}")
}),
// upper hexadecimal
("-", "X") => Box::new(move |n: usize| -> String {
format!("{prefix}{before}{n:<#width$X}{after}")
}),
_ => return Err(CsplitError::SuffixFormatIncorrect),
}
}
_ => return Err(CsplitError::SuffixFormatIncorrect),
}
}
None => return Err(CsplitError::SuffixFormatIncorrect),
};
// there cannot be more than one format pattern
if captures_iter.next().is_some() {
return Err(CsplitError::SuffixFormatTooManyPercents);
}
custom_fn
}
let format_string = match format_opt {
Some(f) => f,
None => format!("%0{n_digits}u"),
};
Ok(Self { fn_split_name })
let format = match Format::<UnsignedInt>::parse(format_string) {
Ok(format) => Ok(format),
Err(FormatError::TooManySpecs(_)) => Err(CsplitError::SuffixFormatTooManyPercents),
Err(_) => Err(CsplitError::SuffixFormatIncorrect),
}?;
Ok(Self {
prefix: prefix.as_bytes().to_owned(),
format,
})
}
/// Returns the filename of the i-th split.
pub fn get(&self, n: usize) -> String {
(self.fn_split_name)(n)
let mut v = self.prefix.clone();
self.format.fmt(&mut v, n as u64).unwrap();
String::from_utf8_lossy(&v).to_string()
}
}
@ -279,7 +190,7 @@ mod tests {
#[test]
fn alternate_form_octal() {
let split_name = SplitName::new(None, Some(String::from("cst-%#10o-")), None).unwrap();
assert_eq!(split_name.get(42), "xxcst- 0o52-");
assert_eq!(split_name.get(42), "xxcst- 052-");
}
#[test]
@ -291,7 +202,7 @@ mod tests {
#[test]
fn alternate_form_upper_hex() {
let split_name = SplitName::new(None, Some(String::from("cst-%#10X-")), None).unwrap();
assert_eq!(split_name.get(42), "xxcst- 0x2A-");
assert_eq!(split_name.get(42), "xxcst- 0X2A-");
}
#[test]
@ -315,19 +226,19 @@ mod tests {
#[test]
fn left_adjusted_octal() {
let split_name = SplitName::new(None, Some(String::from("cst-%-10o-")), None).unwrap();
assert_eq!(split_name.get(42), "xxcst-0o52 -");
assert_eq!(split_name.get(42), "xxcst-52 -");
}
#[test]
fn left_adjusted_lower_hex() {
let split_name = SplitName::new(None, Some(String::from("cst-%-10x-")), None).unwrap();
assert_eq!(split_name.get(42), "xxcst-0x2a -");
assert_eq!(split_name.get(42), "xxcst-2a -");
}
#[test]
fn left_adjusted_upper_hex() {
let split_name = SplitName::new(None, Some(String::from("cst-%-10X-")), None).unwrap();
assert_eq!(split_name.get(42), "xxcst-0x2A -");
assert_eq!(split_name.get(42), "xxcst-2A -");
}
#[test]

View file

@ -80,7 +80,7 @@ fs = ["dunce", "libc", "winapi-util", "windows-sys"]
fsext = ["libc", "time", "windows-sys"]
fsxattr = ["xattr"]
lines = []
format = ["itertools"]
format = ["itertools", "quoting-style"]
mode = ["libc"]
perms = ["libc", "walkdir"]
pipes = []

View file

@ -60,7 +60,7 @@ pub enum PositiveSign {
Space,
}
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum NumberAlignment {
Left,
RightSpace,
@ -168,6 +168,24 @@ impl Formatter for UnsignedInt {
}
fn try_from_spec(s: Spec) -> Result<Self, FormatError> {
// A signed int spec might be mapped to an unsigned int spec if no sign is specified
let s = if let Spec::SignedInt {
width,
precision,
positive_sign: PositiveSign::None,
alignment,
} = s
{
Spec::UnsignedInt {
variant: UnsignedIntVariant::Decimal,
width,
precision,
alignment,
}
} else {
s
};
let Spec::UnsignedInt {
variant,
width,

View file

@ -87,6 +87,40 @@ enum Length {
LongDouble,
}
#[derive(Default, PartialEq, Eq)]
struct Flags {
minus: bool,
plus: bool,
space: bool,
hash: bool,
zero: bool,
}
impl Flags {
pub fn parse(rest: &mut &[u8], index: &mut usize) -> Self {
let mut flags = Self::default();
while let Some(x) = rest.get(*index) {
match x {
b'-' => flags.minus = true,
b'+' => flags.plus = true,
b' ' => flags.space = true,
b'#' => flags.hash = true,
b'0' => flags.zero = true,
_ => break,
}
*index += 1;
}
flags
}
/// Whether any of the flags is set to true
fn any(&self) -> bool {
self != &Self::default()
}
}
impl Spec {
pub fn parse<'a>(rest: &mut &'a [u8]) -> Result<Self, &'a [u8]> {
// Based on the C++ reference, the spec format looks like:
@ -97,34 +131,12 @@ impl Spec {
let mut index = 0;
let start = *rest;
let mut minus = false;
let mut plus = false;
let mut space = false;
let mut hash = false;
let mut zero = false;
let flags = Flags::parse(rest, &mut index);
while let Some(x) = rest.get(index) {
match x {
b'-' => minus = true,
b'+' => plus = true,
b' ' => space = true,
b'#' => hash = true,
b'0' => zero = true,
_ => break,
}
index += 1;
}
let alignment = match (minus, zero) {
(true, _) => NumberAlignment::Left,
(false, true) => NumberAlignment::RightZero,
(false, false) => NumberAlignment::RightSpace,
};
let positive_sign = match (plus, space) {
(true, _) => PositiveSign::Plus,
(false, true) => PositiveSign::Space,
(false, false) => PositiveSign::None,
let positive_sign = match flags {
Flags { plus: true, .. } => PositiveSign::Plus,
Flags { space: true, .. } => PositiveSign::Space,
_ => PositiveSign::None,
};
let width = eat_asterisk_or_number(rest, &mut index);
@ -136,6 +148,17 @@ impl Spec {
None
};
// The `0` flag is ignored if `-` is given or a precision is specified.
// So the only case for RightZero, is when `-` is not given and the
// precision is none.
let alignment = if flags.minus {
NumberAlignment::Left
} else if flags.zero && precision.is_none() {
NumberAlignment::RightZero
} else {
NumberAlignment::RightSpace
};
// We ignore the length. It's not really relevant to printf
let _ = Self::parse_length(rest, &mut index);
@ -148,38 +171,38 @@ impl Spec {
Ok(match type_spec {
// GNU accepts minus, plus and space even though they are not used
b'c' => {
if hash || precision.is_some() {
if flags.hash || precision.is_some() {
return Err(&start[..index]);
}
Self::Char {
width,
align_left: minus,
align_left: flags.minus,
}
}
b's' => {
if hash {
if flags.hash {
return Err(&start[..index]);
}
Self::String {
precision,
width,
align_left: minus,
align_left: flags.minus,
}
}
b'b' => {
if hash || minus || plus || space || width.is_some() || precision.is_some() {
if flags.any() || width.is_some() || precision.is_some() {
return Err(&start[..index]);
}
Self::EscapedString
}
b'q' => {
if hash || minus || plus || space || width.is_some() || precision.is_some() {
if flags.any() || width.is_some() || precision.is_some() {
return Err(&start[..index]);
}
Self::QuotedString
}
b'd' | b'i' => {
if hash {
if flags.hash {
return Err(&start[..index]);
}
Self::SignedInt {
@ -191,10 +214,10 @@ impl Spec {
}
c @ (b'u' | b'o' | b'x' | b'X') => {
// Normal unsigned integer cannot have a prefix
if *c == b'u' && hash {
if *c == b'u' && flags.hash {
return Err(&start[..index]);
}
let prefix = match hash {
let prefix = match flags.hash {
false => Prefix::No,
true => Prefix::Yes,
};
@ -222,7 +245,7 @@ impl Spec {
b'a' | b'A' => FloatVariant::Hexadecimal,
_ => unreachable!(),
},
force_decimal: match hash {
force_decimal: match flags.hash {
false => ForceDecimal::No,
true => ForceDecimal::Yes,
},

View file

@ -1342,3 +1342,20 @@ fn test_line_num_range_with_up_to_match3() {
assert_eq!(at.read("xx01"), "");
assert_eq!(at.read("xx02"), generate(10, 51));
}
#[test]
fn precision_format() {
for f in ["%#6.3x", "%0#6.3x"] {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["numbers50.txt", "10", "--suffix-format", f])
.succeeds()
.stdout_only("18\n123\n");
let count = glob(&at.plus_as_string("xx*"))
.expect("there should be splits created")
.count();
assert_eq!(count, 2);
assert_eq!(at.read("xx 000"), generate(1, 10));
assert_eq!(at.read("xx 0x001"), generate(10, 51));
}
}