1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 19:17:43 +00:00

split: refactor to add SuffixType enum

Refactor the code to use a `SuffixType` enumeration with two members,
`Alphabetic` and `NumericDecimal`, representing the two currently
supported ways of producing filename suffixes. This prepares the code
to more easily support other formats, like numeric hexadecimal.
This commit is contained in:
Jeffrey Finkelstein 2022-01-30 21:35:43 -05:00
parent 1167d811d5
commit 7fbd805713
2 changed files with 58 additions and 24 deletions

View file

@ -13,12 +13,13 @@
//! //!
//! ```rust,ignore //! ```rust,ignore
//! use crate::filenames::FilenameIterator; //! use crate::filenames::FilenameIterator;
//! use crate::filenames::SuffixType;
//! //!
//! let prefix = "chunk_".to_string(); //! let prefix = "chunk_".to_string();
//! let suffix = ".txt".to_string(); //! let suffix = ".txt".to_string();
//! let width = 2; //! let width = 2;
//! let use_numeric_suffix = false; //! let suffix_type = SuffixType::Alphabetic;
//! let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix); //! let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
//! //!
//! assert_eq!(it.next().unwrap(), "chunk_aa.txt"); //! assert_eq!(it.next().unwrap(), "chunk_aa.txt");
//! assert_eq!(it.next().unwrap(), "chunk_ab.txt"); //! assert_eq!(it.next().unwrap(), "chunk_ab.txt");
@ -28,6 +29,26 @@ use crate::number::DynamicWidthNumber;
use crate::number::FixedWidthNumber; use crate::number::FixedWidthNumber;
use crate::number::Number; use crate::number::Number;
/// The format to use for suffixes in the filename for each output chunk.
#[derive(Clone, Copy)]
pub enum SuffixType {
/// Lowercase ASCII alphabetic characters.
Alphabetic,
/// Decimal numbers.
NumericDecimal,
}
impl SuffixType {
/// The radix to use when representing the suffix string as digits.
fn radix(&self) -> u8 {
match self {
SuffixType::Alphabetic => 26,
SuffixType::NumericDecimal => 10,
}
}
}
/// Compute filenames from a given index. /// Compute filenames from a given index.
/// ///
/// This iterator yields filenames for use with ``split``. /// This iterator yields filenames for use with ``split``.
@ -42,8 +63,8 @@ use crate::number::Number;
/// width in characters. In that case, after the iterator yields each /// width in characters. In that case, after the iterator yields each
/// string of that width, the iterator is exhausted. /// string of that width, the iterator is exhausted.
/// ///
/// Finally, if `use_numeric_suffix` is `true`, then numbers will be /// Finally, `suffix_type` controls which type of suffix to produce,
/// used instead of lowercase ASCII alphabetic characters. /// alphabetic or numeric.
/// ///
/// # Examples /// # Examples
/// ///
@ -52,28 +73,30 @@ use crate::number::Number;
/// ///
/// ```rust,ignore /// ```rust,ignore
/// use crate::filenames::FilenameIterator; /// use crate::filenames::FilenameIterator;
/// use crate::filenames::SuffixType;
/// ///
/// let prefix = "chunk_".to_string(); /// let prefix = "chunk_".to_string();
/// let suffix = ".txt".to_string(); /// let suffix = ".txt".to_string();
/// let width = 2; /// let width = 2;
/// let use_numeric_suffix = false; /// let suffix_type = SuffixType::Alphabetic;
/// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix); /// let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
/// ///
/// assert_eq!(it.next().unwrap(), "chunk_aa.txt"); /// assert_eq!(it.next().unwrap(), "chunk_aa.txt");
/// assert_eq!(it.next().unwrap(), "chunk_ab.txt"); /// assert_eq!(it.next().unwrap(), "chunk_ab.txt");
/// assert_eq!(it.next().unwrap(), "chunk_ac.txt"); /// assert_eq!(it.next().unwrap(), "chunk_ac.txt");
/// ``` /// ```
/// ///
/// For numeric filenames, set `use_numeric_suffix` to `true`: /// For numeric filenames, use `SuffixType::NumericDecimal`:
/// ///
/// ```rust,ignore /// ```rust,ignore
/// use crate::filenames::FilenameIterator; /// use crate::filenames::FilenameIterator;
/// use crate::filenames::SuffixType;
/// ///
/// let prefix = "chunk_".to_string(); /// let prefix = "chunk_".to_string();
/// let suffix = ".txt".to_string(); /// let suffix = ".txt".to_string();
/// let width = 2; /// let width = 2;
/// let use_numeric_suffix = true; /// let suffix_type = SuffixType::NumericDecimal;
/// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix); /// let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
/// ///
/// assert_eq!(it.next().unwrap(), "chunk_00.txt"); /// assert_eq!(it.next().unwrap(), "chunk_00.txt");
/// assert_eq!(it.next().unwrap(), "chunk_01.txt"); /// assert_eq!(it.next().unwrap(), "chunk_01.txt");
@ -91,9 +114,9 @@ impl<'a> FilenameIterator<'a> {
prefix: &'a str, prefix: &'a str,
additional_suffix: &'a str, additional_suffix: &'a str,
suffix_length: usize, suffix_length: usize,
use_numeric_suffix: bool, suffix_type: SuffixType,
) -> FilenameIterator<'a> { ) -> FilenameIterator<'a> {
let radix = if use_numeric_suffix { 10 } else { 26 }; let radix = suffix_type.radix();
let number = if suffix_length == 0 { let number = if suffix_length == 0 {
Number::DynamicWidth(DynamicWidthNumber::new(radix)) Number::DynamicWidth(DynamicWidthNumber::new(radix))
} else { } else {
@ -130,39 +153,40 @@ impl<'a> Iterator for FilenameIterator<'a> {
mod tests { mod tests {
use crate::filenames::FilenameIterator; use crate::filenames::FilenameIterator;
use crate::filenames::SuffixType;
#[test] #[test]
fn test_filename_iterator_alphabetic_fixed_width() { fn test_filename_iterator_alphabetic_fixed_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 2, false); let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic);
assert_eq!(it.next().unwrap(), "chunk_aa.txt"); assert_eq!(it.next().unwrap(), "chunk_aa.txt");
assert_eq!(it.next().unwrap(), "chunk_ab.txt"); assert_eq!(it.next().unwrap(), "chunk_ab.txt");
assert_eq!(it.next().unwrap(), "chunk_ac.txt"); assert_eq!(it.next().unwrap(), "chunk_ac.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 2, false); let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic);
assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt"); assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt");
assert_eq!(it.next(), None); assert_eq!(it.next(), None);
} }
#[test] #[test]
fn test_filename_iterator_numeric_fixed_width() { fn test_filename_iterator_numeric_fixed_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 2, true); let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::NumericDecimal);
assert_eq!(it.next().unwrap(), "chunk_00.txt"); assert_eq!(it.next().unwrap(), "chunk_00.txt");
assert_eq!(it.next().unwrap(), "chunk_01.txt"); assert_eq!(it.next().unwrap(), "chunk_01.txt");
assert_eq!(it.next().unwrap(), "chunk_02.txt"); assert_eq!(it.next().unwrap(), "chunk_02.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 2, true); let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::NumericDecimal);
assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt"); assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt");
assert_eq!(it.next(), None); assert_eq!(it.next(), None);
} }
#[test] #[test]
fn test_filename_iterator_alphabetic_dynamic_width() { fn test_filename_iterator_alphabetic_dynamic_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 0, false); let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic);
assert_eq!(it.next().unwrap(), "chunk_aa.txt"); assert_eq!(it.next().unwrap(), "chunk_aa.txt");
assert_eq!(it.next().unwrap(), "chunk_ab.txt"); assert_eq!(it.next().unwrap(), "chunk_ab.txt");
assert_eq!(it.next().unwrap(), "chunk_ac.txt"); assert_eq!(it.next().unwrap(), "chunk_ac.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 0, false); let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic);
assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt"); assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt");
assert_eq!(it.next().unwrap(), "chunk_zaaa.txt"); assert_eq!(it.next().unwrap(), "chunk_zaaa.txt");
assert_eq!(it.next().unwrap(), "chunk_zaab.txt"); assert_eq!(it.next().unwrap(), "chunk_zaab.txt");
@ -170,12 +194,12 @@ mod tests {
#[test] #[test]
fn test_filename_iterator_numeric_dynamic_width() { fn test_filename_iterator_numeric_dynamic_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 0, true); let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::NumericDecimal);
assert_eq!(it.next().unwrap(), "chunk_00.txt"); assert_eq!(it.next().unwrap(), "chunk_00.txt");
assert_eq!(it.next().unwrap(), "chunk_01.txt"); assert_eq!(it.next().unwrap(), "chunk_01.txt");
assert_eq!(it.next().unwrap(), "chunk_02.txt"); assert_eq!(it.next().unwrap(), "chunk_02.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 0, true); let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::NumericDecimal);
assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt"); assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt");
assert_eq!(it.next().unwrap(), "chunk_9000.txt"); assert_eq!(it.next().unwrap(), "chunk_9000.txt");
assert_eq!(it.next().unwrap(), "chunk_9001.txt"); assert_eq!(it.next().unwrap(), "chunk_9001.txt");

View file

@ -12,6 +12,7 @@ mod number;
mod platform; mod platform;
use crate::filenames::FilenameIterator; use crate::filenames::FilenameIterator;
use crate::filenames::SuffixType;
use clap::{crate_version, App, AppSettings, Arg, ArgMatches}; use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
use std::env; use std::env;
use std::fmt; use std::fmt;
@ -240,13 +241,22 @@ impl Strategy {
} }
} }
/// Parse the suffix type from the command-line arguments.
fn suffix_type_from(matches: &ArgMatches) -> SuffixType {
if matches.occurrences_of(OPT_NUMERIC_SUFFIXES) > 0 {
SuffixType::NumericDecimal
} else {
SuffixType::Alphabetic
}
}
/// Parameters that control how a file gets split. /// Parameters that control how a file gets split.
/// ///
/// You can convert an [`ArgMatches`] instance into a [`Settings`] /// You can convert an [`ArgMatches`] instance into a [`Settings`]
/// instance by calling [`Settings::from`]. /// instance by calling [`Settings::from`].
struct Settings { struct Settings {
prefix: String, prefix: String,
numeric_suffix: bool, suffix_type: SuffixType,
suffix_length: usize, suffix_length: usize,
additional_suffix: String, additional_suffix: String,
input: String, input: String,
@ -314,7 +324,7 @@ impl Settings {
suffix_length: suffix_length_str suffix_length: suffix_length_str
.parse() .parse()
.map_err(|_| SettingsError::SuffixLength(suffix_length_str.to_string()))?, .map_err(|_| SettingsError::SuffixLength(suffix_length_str.to_string()))?,
numeric_suffix: matches.occurrences_of(OPT_NUMERIC_SUFFIXES) > 0, suffix_type: suffix_type_from(matches),
additional_suffix, additional_suffix,
verbose: matches.occurrences_of("verbose") > 0, verbose: matches.occurrences_of("verbose") > 0,
strategy: Strategy::from(matches).map_err(SettingsError::Strategy)?, strategy: Strategy::from(matches).map_err(SettingsError::Strategy)?,
@ -374,7 +384,7 @@ impl<'a> ByteChunkWriter<'a> {
&settings.prefix, &settings.prefix,
&settings.additional_suffix, &settings.additional_suffix,
settings.suffix_length, settings.suffix_length,
settings.numeric_suffix, settings.suffix_type,
); );
let filename = filename_iterator.next()?; let filename = filename_iterator.next()?;
if settings.verbose { if settings.verbose {
@ -502,7 +512,7 @@ impl<'a> LineChunkWriter<'a> {
&settings.prefix, &settings.prefix,
&settings.additional_suffix, &settings.additional_suffix,
settings.suffix_length, settings.suffix_length,
settings.numeric_suffix, settings.suffix_type,
); );
let filename = filename_iterator.next()?; let filename = filename_iterator.next()?;
if settings.verbose { if settings.verbose {
@ -594,7 +604,7 @@ where
&settings.prefix, &settings.prefix,
&settings.additional_suffix, &settings.additional_suffix,
settings.suffix_length, settings.suffix_length,
settings.numeric_suffix, settings.suffix_type,
); );
// Create one writer for each chunk. This will create each // Create one writer for each chunk. This will create each