mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 11:37:44 +00:00
Merge pull request #5958 from tertsdiepraam/csplit-printf
`csplit`: use `printf` functionality from `uucore`
This commit is contained in:
commit
420dfe8a9b
6 changed files with 123 additions and 154 deletions
|
@ -18,7 +18,7 @@ path = "src/csplit.rs"
|
|||
clap = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
uucore = { workspace = true, features = ["entries", "fs"] }
|
||||
uucore = { workspace = true, features = ["entries", "fs", "format"] }
|
||||
|
||||
[[bin]]
|
||||
name = "csplit"
|
||||
|
|
|
@ -4,14 +4,15 @@
|
|||
// file that was distributed with this source code.
|
||||
// spell-checker:ignore (regex) diuox
|
||||
|
||||
use regex::Regex;
|
||||
use uucore::format::{num_format::UnsignedInt, Format, FormatError};
|
||||
|
||||
use crate::csplit_error::CsplitError;
|
||||
|
||||
/// Computes the filename of a split, taking into consideration a possible user-defined suffix
|
||||
/// format.
|
||||
pub struct SplitName {
|
||||
fn_split_name: Box<dyn Fn(usize) -> String>,
|
||||
prefix: Vec<u8>,
|
||||
format: Format<UnsignedInt>,
|
||||
}
|
||||
|
||||
impl SplitName {
|
||||
|
@ -36,6 +37,7 @@ impl SplitName {
|
|||
) -> Result<Self, CsplitError> {
|
||||
// get the prefix
|
||||
let prefix = prefix_opt.unwrap_or_else(|| "xx".to_string());
|
||||
|
||||
// the width for the split offset
|
||||
let n_digits = n_digits_opt
|
||||
.map(|opt| {
|
||||
|
@ -44,120 +46,29 @@ impl SplitName {
|
|||
})
|
||||
.transpose()?
|
||||
.unwrap_or(2);
|
||||
// translate the custom format into a function
|
||||
let fn_split_name: Box<dyn Fn(usize) -> String> = match format_opt {
|
||||
None => Box::new(move |n: usize| -> String { format!("{prefix}{n:0n_digits$}") }),
|
||||
Some(custom) => {
|
||||
let spec =
|
||||
Regex::new(r"(?P<ALL>%((?P<FLAG>[0#-])(?P<WIDTH>\d+)?)?(?P<TYPE>[diuoxX]))")
|
||||
.unwrap();
|
||||
let mut captures_iter = spec.captures_iter(&custom);
|
||||
let custom_fn: Box<dyn Fn(usize) -> String> = match captures_iter.next() {
|
||||
Some(captures) => {
|
||||
let all = captures.name("ALL").unwrap();
|
||||
let before = custom[0..all.start()].to_owned();
|
||||
let after = custom[all.end()..].to_owned();
|
||||
let width = match captures.name("WIDTH") {
|
||||
None => 0,
|
||||
Some(m) => m.as_str().parse::<usize>().unwrap(),
|
||||
};
|
||||
match (captures.name("FLAG"), captures.name("TYPE")) {
|
||||
(None, Some(ref t)) => match t.as_str() {
|
||||
"d" | "i" | "u" => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n}{after}")
|
||||
}),
|
||||
"o" => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:o}{after}")
|
||||
}),
|
||||
"x" => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:x}{after}")
|
||||
}),
|
||||
"X" => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:X}{after}")
|
||||
}),
|
||||
_ => return Err(CsplitError::SuffixFormatIncorrect),
|
||||
},
|
||||
(Some(ref f), Some(ref t)) => {
|
||||
match (f.as_str(), t.as_str()) {
|
||||
/*
|
||||
* zero padding
|
||||
*/
|
||||
// decimal
|
||||
("0", "d" | "i" | "u") => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:0width$}{after}")
|
||||
}),
|
||||
// octal
|
||||
("0", "o") => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:0width$o}{after}")
|
||||
}),
|
||||
// lower hexadecimal
|
||||
("0", "x") => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:0width$x}{after}")
|
||||
}),
|
||||
// upper hexadecimal
|
||||
("0", "X") => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:0width$X}{after}")
|
||||
}),
|
||||
|
||||
/*
|
||||
* Alternate form
|
||||
*/
|
||||
// octal
|
||||
("#", "o") => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:>#width$o}{after}")
|
||||
}),
|
||||
// lower hexadecimal
|
||||
("#", "x") => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:>#width$x}{after}")
|
||||
}),
|
||||
// upper hexadecimal
|
||||
("#", "X") => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:>#width$X}{after}")
|
||||
}),
|
||||
|
||||
/*
|
||||
* Left adjusted
|
||||
*/
|
||||
// decimal
|
||||
("-", "d" | "i" | "u") => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:<#width$}{after}")
|
||||
}),
|
||||
// octal
|
||||
("-", "o") => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:<#width$o}{after}")
|
||||
}),
|
||||
// lower hexadecimal
|
||||
("-", "x") => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:<#width$x}{after}")
|
||||
}),
|
||||
// upper hexadecimal
|
||||
("-", "X") => Box::new(move |n: usize| -> String {
|
||||
format!("{prefix}{before}{n:<#width$X}{after}")
|
||||
}),
|
||||
|
||||
_ => return Err(CsplitError::SuffixFormatIncorrect),
|
||||
}
|
||||
}
|
||||
_ => return Err(CsplitError::SuffixFormatIncorrect),
|
||||
}
|
||||
}
|
||||
None => return Err(CsplitError::SuffixFormatIncorrect),
|
||||
let format_string = match format_opt {
|
||||
Some(f) => f,
|
||||
None => format!("%0{n_digits}u"),
|
||||
};
|
||||
|
||||
// there cannot be more than one format pattern
|
||||
if captures_iter.next().is_some() {
|
||||
return Err(CsplitError::SuffixFormatTooManyPercents);
|
||||
}
|
||||
custom_fn
|
||||
}
|
||||
};
|
||||
let format = match Format::<UnsignedInt>::parse(format_string) {
|
||||
Ok(format) => Ok(format),
|
||||
Err(FormatError::TooManySpecs(_)) => Err(CsplitError::SuffixFormatTooManyPercents),
|
||||
Err(_) => Err(CsplitError::SuffixFormatIncorrect),
|
||||
}?;
|
||||
|
||||
Ok(Self { fn_split_name })
|
||||
Ok(Self {
|
||||
prefix: prefix.as_bytes().to_owned(),
|
||||
format,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the filename of the i-th split.
|
||||
pub fn get(&self, n: usize) -> String {
|
||||
(self.fn_split_name)(n)
|
||||
let mut v = self.prefix.clone();
|
||||
self.format.fmt(&mut v, n as u64).unwrap();
|
||||
String::from_utf8_lossy(&v).to_string()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -279,7 +190,7 @@ mod tests {
|
|||
#[test]
|
||||
fn alternate_form_octal() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%#10o-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst- 0o52-");
|
||||
assert_eq!(split_name.get(42), "xxcst- 052-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -291,7 +202,7 @@ mod tests {
|
|||
#[test]
|
||||
fn alternate_form_upper_hex() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%#10X-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst- 0x2A-");
|
||||
assert_eq!(split_name.get(42), "xxcst- 0X2A-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -315,19 +226,19 @@ mod tests {
|
|||
#[test]
|
||||
fn left_adjusted_octal() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%-10o-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-0o52 -");
|
||||
assert_eq!(split_name.get(42), "xxcst-52 -");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn left_adjusted_lower_hex() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%-10x-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-0x2a -");
|
||||
assert_eq!(split_name.get(42), "xxcst-2a -");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn left_adjusted_upper_hex() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%-10X-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-0x2A -");
|
||||
assert_eq!(split_name.get(42), "xxcst-2A -");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -80,7 +80,7 @@ fs = ["dunce", "libc", "winapi-util", "windows-sys"]
|
|||
fsext = ["libc", "time", "windows-sys"]
|
||||
fsxattr = ["xattr"]
|
||||
lines = []
|
||||
format = ["itertools"]
|
||||
format = ["itertools", "quoting-style"]
|
||||
mode = ["libc"]
|
||||
perms = ["libc", "walkdir"]
|
||||
pipes = []
|
||||
|
|
|
@ -60,7 +60,7 @@ pub enum PositiveSign {
|
|||
Space,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum NumberAlignment {
|
||||
Left,
|
||||
RightSpace,
|
||||
|
@ -168,6 +168,24 @@ impl Formatter for UnsignedInt {
|
|||
}
|
||||
|
||||
fn try_from_spec(s: Spec) -> Result<Self, FormatError> {
|
||||
// A signed int spec might be mapped to an unsigned int spec if no sign is specified
|
||||
let s = if let Spec::SignedInt {
|
||||
width,
|
||||
precision,
|
||||
positive_sign: PositiveSign::None,
|
||||
alignment,
|
||||
} = s
|
||||
{
|
||||
Spec::UnsignedInt {
|
||||
variant: UnsignedIntVariant::Decimal,
|
||||
width,
|
||||
precision,
|
||||
alignment,
|
||||
}
|
||||
} else {
|
||||
s
|
||||
};
|
||||
|
||||
let Spec::UnsignedInt {
|
||||
variant,
|
||||
width,
|
||||
|
|
|
@ -87,6 +87,40 @@ enum Length {
|
|||
LongDouble,
|
||||
}
|
||||
|
||||
#[derive(Default, PartialEq, Eq)]
|
||||
struct Flags {
|
||||
minus: bool,
|
||||
plus: bool,
|
||||
space: bool,
|
||||
hash: bool,
|
||||
zero: bool,
|
||||
}
|
||||
|
||||
impl Flags {
|
||||
pub fn parse(rest: &mut &[u8], index: &mut usize) -> Self {
|
||||
let mut flags = Self::default();
|
||||
|
||||
while let Some(x) = rest.get(*index) {
|
||||
match x {
|
||||
b'-' => flags.minus = true,
|
||||
b'+' => flags.plus = true,
|
||||
b' ' => flags.space = true,
|
||||
b'#' => flags.hash = true,
|
||||
b'0' => flags.zero = true,
|
||||
_ => break,
|
||||
}
|
||||
*index += 1;
|
||||
}
|
||||
|
||||
flags
|
||||
}
|
||||
|
||||
/// Whether any of the flags is set to true
|
||||
fn any(&self) -> bool {
|
||||
self != &Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl Spec {
|
||||
pub fn parse<'a>(rest: &mut &'a [u8]) -> Result<Self, &'a [u8]> {
|
||||
// Based on the C++ reference, the spec format looks like:
|
||||
|
@ -97,34 +131,12 @@ impl Spec {
|
|||
let mut index = 0;
|
||||
let start = *rest;
|
||||
|
||||
let mut minus = false;
|
||||
let mut plus = false;
|
||||
let mut space = false;
|
||||
let mut hash = false;
|
||||
let mut zero = false;
|
||||
let flags = Flags::parse(rest, &mut index);
|
||||
|
||||
while let Some(x) = rest.get(index) {
|
||||
match x {
|
||||
b'-' => minus = true,
|
||||
b'+' => plus = true,
|
||||
b' ' => space = true,
|
||||
b'#' => hash = true,
|
||||
b'0' => zero = true,
|
||||
_ => break,
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
|
||||
let alignment = match (minus, zero) {
|
||||
(true, _) => NumberAlignment::Left,
|
||||
(false, true) => NumberAlignment::RightZero,
|
||||
(false, false) => NumberAlignment::RightSpace,
|
||||
};
|
||||
|
||||
let positive_sign = match (plus, space) {
|
||||
(true, _) => PositiveSign::Plus,
|
||||
(false, true) => PositiveSign::Space,
|
||||
(false, false) => PositiveSign::None,
|
||||
let positive_sign = match flags {
|
||||
Flags { plus: true, .. } => PositiveSign::Plus,
|
||||
Flags { space: true, .. } => PositiveSign::Space,
|
||||
_ => PositiveSign::None,
|
||||
};
|
||||
|
||||
let width = eat_asterisk_or_number(rest, &mut index);
|
||||
|
@ -136,6 +148,17 @@ impl Spec {
|
|||
None
|
||||
};
|
||||
|
||||
// The `0` flag is ignored if `-` is given or a precision is specified.
|
||||
// So the only case for RightZero, is when `-` is not given and the
|
||||
// precision is none.
|
||||
let alignment = if flags.minus {
|
||||
NumberAlignment::Left
|
||||
} else if flags.zero && precision.is_none() {
|
||||
NumberAlignment::RightZero
|
||||
} else {
|
||||
NumberAlignment::RightSpace
|
||||
};
|
||||
|
||||
// We ignore the length. It's not really relevant to printf
|
||||
let _ = Self::parse_length(rest, &mut index);
|
||||
|
||||
|
@ -148,38 +171,38 @@ impl Spec {
|
|||
Ok(match type_spec {
|
||||
// GNU accepts minus, plus and space even though they are not used
|
||||
b'c' => {
|
||||
if hash || precision.is_some() {
|
||||
if flags.hash || precision.is_some() {
|
||||
return Err(&start[..index]);
|
||||
}
|
||||
Self::Char {
|
||||
width,
|
||||
align_left: minus,
|
||||
align_left: flags.minus,
|
||||
}
|
||||
}
|
||||
b's' => {
|
||||
if hash {
|
||||
if flags.hash {
|
||||
return Err(&start[..index]);
|
||||
}
|
||||
Self::String {
|
||||
precision,
|
||||
width,
|
||||
align_left: minus,
|
||||
align_left: flags.minus,
|
||||
}
|
||||
}
|
||||
b'b' => {
|
||||
if hash || minus || plus || space || width.is_some() || precision.is_some() {
|
||||
if flags.any() || width.is_some() || precision.is_some() {
|
||||
return Err(&start[..index]);
|
||||
}
|
||||
Self::EscapedString
|
||||
}
|
||||
b'q' => {
|
||||
if hash || minus || plus || space || width.is_some() || precision.is_some() {
|
||||
if flags.any() || width.is_some() || precision.is_some() {
|
||||
return Err(&start[..index]);
|
||||
}
|
||||
Self::QuotedString
|
||||
}
|
||||
b'd' | b'i' => {
|
||||
if hash {
|
||||
if flags.hash {
|
||||
return Err(&start[..index]);
|
||||
}
|
||||
Self::SignedInt {
|
||||
|
@ -191,10 +214,10 @@ impl Spec {
|
|||
}
|
||||
c @ (b'u' | b'o' | b'x' | b'X') => {
|
||||
// Normal unsigned integer cannot have a prefix
|
||||
if *c == b'u' && hash {
|
||||
if *c == b'u' && flags.hash {
|
||||
return Err(&start[..index]);
|
||||
}
|
||||
let prefix = match hash {
|
||||
let prefix = match flags.hash {
|
||||
false => Prefix::No,
|
||||
true => Prefix::Yes,
|
||||
};
|
||||
|
@ -222,7 +245,7 @@ impl Spec {
|
|||
b'a' | b'A' => FloatVariant::Hexadecimal,
|
||||
_ => unreachable!(),
|
||||
},
|
||||
force_decimal: match hash {
|
||||
force_decimal: match flags.hash {
|
||||
false => ForceDecimal::No,
|
||||
true => ForceDecimal::Yes,
|
||||
},
|
||||
|
|
|
@ -1342,3 +1342,20 @@ fn test_line_num_range_with_up_to_match3() {
|
|||
assert_eq!(at.read("xx01"), "");
|
||||
assert_eq!(at.read("xx02"), generate(10, 51));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn precision_format() {
|
||||
for f in ["%#6.3x", "%0#6.3x"] {
|
||||
let (at, mut ucmd) = at_and_ucmd!();
|
||||
ucmd.args(&["numbers50.txt", "10", "--suffix-format", f])
|
||||
.succeeds()
|
||||
.stdout_only("18\n123\n");
|
||||
|
||||
let count = glob(&at.plus_as_string("xx*"))
|
||||
.expect("there should be splits created")
|
||||
.count();
|
||||
assert_eq!(count, 2);
|
||||
assert_eq!(at.read("xx 000"), generate(1, 10));
|
||||
assert_eq!(at.read("xx 0x001"), generate(10, 51));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue