1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 11:07:44 +00:00

Merge pull request #2981 from jfinkels/split-hex-numbers

split: add support for -x option (hex suffixes)
This commit is contained in:
Terts Diepraam 2022-02-17 23:20:58 +01:00 committed by GitHub
commit e1a611374a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 193 additions and 48 deletions

View file

@ -13,12 +13,13 @@
//!
//! ```rust,ignore
//! use crate::filenames::FilenameIterator;
//! use crate::filenames::SuffixType;
//!
//! let prefix = "chunk_".to_string();
//! let suffix = ".txt".to_string();
//! let width = 2;
//! let use_numeric_suffix = false;
//! let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
//! let suffix_type = SuffixType::Alphabetic;
//! let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
//!
//! assert_eq!(it.next().unwrap(), "chunk_aa.txt");
//! assert_eq!(it.next().unwrap(), "chunk_ab.txt");
@ -28,6 +29,30 @@ use crate::number::DynamicWidthNumber;
use crate::number::FixedWidthNumber;
use crate::number::Number;
/// The format to use for suffixes in the filename for each output chunk.
#[derive(Clone, Copy)]
pub enum SuffixType {
/// Lowercase ASCII alphabetic characters.
Alphabetic,
/// Decimal numbers.
NumericDecimal,
/// Hexadecimal numbers.
NumericHexadecimal,
}
impl SuffixType {
/// The radix to use when representing the suffix string as digits.
fn radix(&self) -> u8 {
match self {
SuffixType::Alphabetic => 26,
SuffixType::NumericDecimal => 10,
SuffixType::NumericHexadecimal => 16,
}
}
}
/// Compute filenames from a given index.
///
/// This iterator yields filenames for use with ``split``.
@ -42,8 +67,8 @@ use crate::number::Number;
/// width in characters. In that case, after the iterator yields each
/// string of that width, the iterator is exhausted.
///
/// Finally, if `use_numeric_suffix` is `true`, then numbers will be
/// used instead of lowercase ASCII alphabetic characters.
/// Finally, `suffix_type` controls which type of suffix to produce,
/// alphabetic or numeric.
///
/// # Examples
///
@ -52,28 +77,30 @@ use crate::number::Number;
///
/// ```rust,ignore
/// use crate::filenames::FilenameIterator;
/// use crate::filenames::SuffixType;
///
/// let prefix = "chunk_".to_string();
/// let suffix = ".txt".to_string();
/// let width = 2;
/// let use_numeric_suffix = false;
/// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
/// let suffix_type = SuffixType::Alphabetic;
/// let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
///
/// assert_eq!(it.next().unwrap(), "chunk_aa.txt");
/// assert_eq!(it.next().unwrap(), "chunk_ab.txt");
/// assert_eq!(it.next().unwrap(), "chunk_ac.txt");
/// ```
///
/// For numeric filenames, set `use_numeric_suffix` to `true`:
/// For decimal numeric filenames, use `SuffixType::NumericDecimal`:
///
/// ```rust,ignore
/// use crate::filenames::FilenameIterator;
/// use crate::filenames::SuffixType;
///
/// let prefix = "chunk_".to_string();
/// let suffix = ".txt".to_string();
/// let width = 2;
/// let use_numeric_suffix = true;
/// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
/// let suffix_type = SuffixType::NumericDecimal;
/// let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
///
/// assert_eq!(it.next().unwrap(), "chunk_00.txt");
/// assert_eq!(it.next().unwrap(), "chunk_01.txt");
@ -91,9 +118,9 @@ impl<'a> FilenameIterator<'a> {
prefix: &'a str,
additional_suffix: &'a str,
suffix_length: usize,
use_numeric_suffix: bool,
suffix_type: SuffixType,
) -> FilenameIterator<'a> {
let radix = if use_numeric_suffix { 10 } else { 26 };
let radix = suffix_type.radix();
let number = if suffix_length == 0 {
Number::DynamicWidth(DynamicWidthNumber::new(radix))
} else {
@ -130,39 +157,40 @@ impl<'a> Iterator for FilenameIterator<'a> {
mod tests {
use crate::filenames::FilenameIterator;
use crate::filenames::SuffixType;
#[test]
fn test_filename_iterator_alphabetic_fixed_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 2, false);
let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic);
assert_eq!(it.next().unwrap(), "chunk_aa.txt");
assert_eq!(it.next().unwrap(), "chunk_ab.txt");
assert_eq!(it.next().unwrap(), "chunk_ac.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 2, false);
let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic);
assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt");
assert_eq!(it.next(), None);
}
#[test]
fn test_filename_iterator_numeric_fixed_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 2, true);
let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::NumericDecimal);
assert_eq!(it.next().unwrap(), "chunk_00.txt");
assert_eq!(it.next().unwrap(), "chunk_01.txt");
assert_eq!(it.next().unwrap(), "chunk_02.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 2, true);
let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::NumericDecimal);
assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt");
assert_eq!(it.next(), None);
}
#[test]
fn test_filename_iterator_alphabetic_dynamic_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 0, false);
let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic);
assert_eq!(it.next().unwrap(), "chunk_aa.txt");
assert_eq!(it.next().unwrap(), "chunk_ab.txt");
assert_eq!(it.next().unwrap(), "chunk_ac.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 0, false);
let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic);
assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt");
assert_eq!(it.next().unwrap(), "chunk_zaaa.txt");
assert_eq!(it.next().unwrap(), "chunk_zaab.txt");
@ -170,12 +198,12 @@ mod tests {
#[test]
fn test_filename_iterator_numeric_dynamic_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 0, true);
let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::NumericDecimal);
assert_eq!(it.next().unwrap(), "chunk_00.txt");
assert_eq!(it.next().unwrap(), "chunk_01.txt");
assert_eq!(it.next().unwrap(), "chunk_02.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 0, true);
let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::NumericDecimal);
assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt");
assert_eq!(it.next().unwrap(), "chunk_9000.txt");
assert_eq!(it.next().unwrap(), "chunk_9001.txt");

View file

@ -40,13 +40,19 @@ impl Error for Overflow {}
/// specifically for the `split` program. See the
/// [`DynamicWidthNumber`] documentation for more information.
///
/// Numbers of radix 10 are displayable and rendered as decimal
/// numbers (for example, "00" or "917"). Numbers of radix 26 are
/// displayable and rendered as lowercase ASCII alphabetic characters
/// (for example, "aa" or "zax"). Numbers of other radices cannot be
/// displayed. The display of a [`DynamicWidthNumber`] includes a
/// prefix whose length depends on the width of the number. See the
/// [`DynamicWidthNumber`] documentation for more information.
/// Numbers of radix
///
/// * 10 are displayable and rendered as decimal numbers (for example,
/// "00" or "917"),
/// * 16 are displayable and rendered as hexadecimal numbers (for example,
/// "00" or "e7f"),
/// * 26 are displayable and rendered as lowercase ASCII alphabetic
/// characters (for example, "aa" or "zax").
///
/// Numbers of other radices cannot be displayed. The display of a
/// [`DynamicWidthNumber`] includes a prefix whose length depends on
/// the width of the number. See the [`DynamicWidthNumber`]
/// documentation for more information.
///
/// The digits of a number are accessible via the [`Number::digits`]
/// method. The digits are represented as a [`Vec<u8>`] with the most
@ -169,12 +175,12 @@ impl Display for Number {
///
/// # Displaying
///
/// This number is only displayable if `radix` is 10 or `radix` is
/// 26. If `radix` is 10, then the digits are concatenated and
/// displayed as a fixed-width decimal number. If `radix` is 26, then
/// each digit is translated to the corresponding lowercase ASCII
/// alphabetic character (that is, 'a', 'b', 'c', etc.) and
/// concatenated.
/// This number is only displayable if `radix` is 10, 26, or 26. If
/// `radix` is 10 or 16, then the digits are concatenated and
/// displayed as a fixed-width decimal or hexadecimal number,
/// respectively. If `radix` is 26, then each digit is translated to
/// the corresponding lowercase ASCII alphabetic character (that is,
/// 'a', 'b', 'c', etc.) and concatenated.
#[derive(Clone)]
pub struct FixedWidthNumber {
radix: u8,
@ -228,6 +234,14 @@ impl Display for FixedWidthNumber {
let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect();
write!(f, "{}", digits)
}
16 => {
let digits: String = self
.digits
.iter()
.map(|d| (if *d < 10 { b'0' + d } else { b'a' + (d - 10) }) as char)
.collect();
write!(f, "{}", digits)
}
26 => {
let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect();
write!(f, "{}", digits)
@ -264,14 +278,15 @@ impl Display for FixedWidthNumber {
///
/// # Displaying
///
/// This number is only displayable if `radix` is 10 or `radix` is
/// 26. If `radix` is 10, then the digits are concatenated and
/// displayed as a fixed-width decimal number with a prefix of `n - 2`
/// instances of the character '9', where `n` is the number of digits.
/// If `radix` is 26, then each digit is translated to the
/// corresponding lowercase ASCII alphabetic character (that is, 'a',
/// 'b', 'c', etc.) and concatenated with a prefix of `n - 2`
/// instances of the character 'z'.
/// This number is only displayable if `radix` is 10, 16, or 26. If
/// `radix` is 10 or 16, then the digits are concatenated and
/// displayed as a fixed-width decimal or hexadecimal number,
/// respectively, with a prefix of `n - 2` instances of the character
/// '9' of 'f', respectively, where `n` is the number of digits. If
/// `radix` is 26, then each digit is translated to the corresponding
/// lowercase ASCII alphabetic character (that is, 'a', 'b', 'c',
/// etc.) and concatenated with a prefix of `n - 2` instances of the
/// character 'z'.
///
/// This notion of displaying the number is specific to the `split`
/// program.
@ -349,6 +364,21 @@ impl Display for DynamicWidthNumber {
digits = digits,
)
}
16 => {
let num_fill_chars = self.digits.len() - 2;
let digits: String = self
.digits
.iter()
.map(|d| (if *d < 10 { b'0' + d } else { b'a' + (d - 10) }) as char)
.collect();
write!(
f,
"{empty:f<num_fill_chars$}{digits}",
empty = "",
num_fill_chars = num_fill_chars,
digits = digits,
)
}
26 => {
let num_fill_chars = self.digits.len() - 2;
let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect();
@ -424,7 +454,7 @@ mod tests {
}
#[test]
fn test_dynamic_width_number_display_numeric() {
fn test_dynamic_width_number_display_numeric_decimal() {
fn num(n: usize) -> Number {
let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10));
for _ in 0..n {
@ -444,6 +474,30 @@ mod tests {
assert_eq!(format!("{}", num(10 * 99 + 1)), "990001");
}
#[test]
fn test_dynamic_width_number_display_numeric_hexadecimal() {
fn num(n: usize) -> Number {
let mut number = Number::DynamicWidth(DynamicWidthNumber::new(16));
for _ in 0..n {
number.increment().unwrap()
}
number
}
assert_eq!(format!("{}", num(0)), "00");
assert_eq!(format!("{}", num(15)), "0f");
assert_eq!(format!("{}", num(16)), "10");
assert_eq!(format!("{}", num(17)), "11");
assert_eq!(format!("{}", num(18)), "12");
assert_eq!(format!("{}", num(16 * 15 - 1)), "ef");
assert_eq!(format!("{}", num(16 * 15)), "f000");
assert_eq!(format!("{}", num(16 * 15 + 1)), "f001");
assert_eq!(format!("{}", num(16 * 255 - 1)), "feff");
assert_eq!(format!("{}", num(16 * 255)), "ff0000");
assert_eq!(format!("{}", num(16 * 255 + 1)), "ff0001");
}
#[test]
fn test_fixed_width_number_increment() {
let mut n = Number::FixedWidth(FixedWidthNumber::new(3, 2));
@ -493,7 +547,7 @@ mod tests {
}
#[test]
fn test_fixed_width_number_display_numeric() {
fn test_fixed_width_number_display_numeric_decimal() {
fn num(n: usize) -> Result<Number, Overflow> {
let mut number = Number::FixedWidth(FixedWidthNumber::new(10, 2));
for _ in 0..n {
@ -510,4 +564,23 @@ mod tests {
assert_eq!(format!("{}", num(10 * 10 - 1).unwrap()), "99");
assert!(num(10 * 10).is_err());
}
#[test]
fn test_fixed_width_number_display_numeric_hexadecimal() {
fn num(n: usize) -> Result<Number, Overflow> {
let mut number = Number::FixedWidth(FixedWidthNumber::new(16, 2));
for _ in 0..n {
number.increment()?;
}
Ok(number)
}
assert_eq!(format!("{}", num(0).unwrap()), "00");
assert_eq!(format!("{}", num(15).unwrap()), "0f");
assert_eq!(format!("{}", num(17).unwrap()), "11");
assert_eq!(format!("{}", num(16 * 15 - 1).unwrap()), "ef");
assert_eq!(format!("{}", num(16 * 15).unwrap()), "f0");
assert_eq!(format!("{}", num(16 * 16 - 1).unwrap()), "ff");
assert!(num(16 * 16).is_err());
}
}

View file

@ -12,6 +12,7 @@ mod number;
mod platform;
use crate::filenames::FilenameIterator;
use crate::filenames::SuffixType;
use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
use std::env;
use std::fmt;
@ -31,6 +32,7 @@ static OPT_ADDITIONAL_SUFFIX: &str = "additional-suffix";
static OPT_FILTER: &str = "filter";
static OPT_NUMBER: &str = "number";
static OPT_NUMERIC_SUFFIXES: &str = "numeric-suffixes";
static OPT_HEX_SUFFIXES: &str = "hex-suffixes";
static OPT_SUFFIX_LENGTH: &str = "suffix-length";
static OPT_DEFAULT_SUFFIX_LENGTH: &str = "0";
static OPT_VERBOSE: &str = "verbose";
@ -142,6 +144,14 @@ pub fn uu_app<'a>() -> App<'a> {
.default_value(OPT_DEFAULT_SUFFIX_LENGTH)
.help("use suffixes of length N (default 2)"),
)
.arg(
Arg::new(OPT_HEX_SUFFIXES)
.short('x')
.long(OPT_HEX_SUFFIXES)
.takes_value(true)
.default_missing_value("0")
.help("use hex suffixes starting at 0, not alphabetic"),
)
.arg(
Arg::new(OPT_VERBOSE)
.long(OPT_VERBOSE)
@ -250,13 +260,24 @@ impl Strategy {
}
}
/// Parse the suffix type from the command-line arguments.
fn suffix_type_from(matches: &ArgMatches) -> SuffixType {
if matches.occurrences_of(OPT_NUMERIC_SUFFIXES) > 0 {
SuffixType::NumericDecimal
} else if matches.occurrences_of(OPT_HEX_SUFFIXES) > 0 {
SuffixType::NumericHexadecimal
} else {
SuffixType::Alphabetic
}
}
/// Parameters that control how a file gets split.
///
/// You can convert an [`ArgMatches`] instance into a [`Settings`]
/// instance by calling [`Settings::from`].
struct Settings {
prefix: String,
numeric_suffix: bool,
suffix_type: SuffixType,
suffix_length: usize,
additional_suffix: String,
input: String,
@ -324,7 +345,7 @@ impl Settings {
suffix_length: suffix_length_str
.parse()
.map_err(|_| SettingsError::SuffixLength(suffix_length_str.to_string()))?,
numeric_suffix: matches.occurrences_of(OPT_NUMERIC_SUFFIXES) > 0,
suffix_type: suffix_type_from(matches),
additional_suffix,
verbose: matches.occurrences_of("verbose") > 0,
strategy: Strategy::from(matches).map_err(SettingsError::Strategy)?,
@ -384,7 +405,7 @@ impl<'a> ByteChunkWriter<'a> {
&settings.prefix,
&settings.additional_suffix,
settings.suffix_length,
settings.numeric_suffix,
settings.suffix_type,
);
let filename = filename_iterator.next()?;
if settings.verbose {
@ -512,7 +533,7 @@ impl<'a> LineChunkWriter<'a> {
&settings.prefix,
&settings.additional_suffix,
settings.suffix_length,
settings.numeric_suffix,
settings.suffix_type,
);
let filename = filename_iterator.next()?;
if settings.verbose {
@ -604,7 +625,7 @@ where
&settings.prefix,
&settings.additional_suffix,
settings.suffix_length,
settings.numeric_suffix,
settings.suffix_type,
);
// Create one writer for each chunk. This will create each

View file

@ -2,7 +2,7 @@
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase fghij klmno pqrst uvwxyz fivelines
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase fghij klmno pqrst uvwxyz fivelines twohundredfortyonebytes
extern crate rand;
extern crate regex;
@ -409,6 +409,28 @@ fn test_numeric_dynamic_suffix_length() {
assert_eq!(file_read(&at, "x9000"), "a");
}
#[test]
fn test_hex_dynamic_suffix_length() {
let (at, mut ucmd) = at_and_ucmd!();
// Split into chunks of one byte each, use hexadecimal digits
// instead of letters as file suffixes.
//
// The input file has (16^2) - 16 + 1 = 241 bytes. This is just
// enough to force `split` to dynamically increase the length of
// the filename for the very last chunk.
//
// x00, x01, x02, ..., xed, xee, xef, xf000
//
ucmd.args(&["-x", "-b", "1", "twohundredfortyonebytes.txt"])
.succeeds();
for i in 0..240 {
let filename = format!("x{:02x}", i);
let contents = file_read(&at, &filename);
assert_eq!(contents, "a");
}
assert_eq!(file_read(&at, "xf000"), "a");
}
#[test]
fn test_suffixes_exhausted() {
new_ucmd!()

View file

@ -0,0 +1 @@
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa