1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 19:17:43 +00:00

split: refactor to add SuffixType enum

Refactor the code to use a `SuffixType` enumeration with two members,
`Alphabetic` and `NumericDecimal`, representing the two currently
supported ways of producing filename suffixes. This prepares the code
to more easily support other formats, like numeric hexadecimal.
This commit is contained in:
Jeffrey Finkelstein 2022-01-30 21:35:43 -05:00
parent 1167d811d5
commit 7fbd805713
2 changed files with 58 additions and 24 deletions

View file

@ -13,12 +13,13 @@
//!
//! ```rust,ignore
//! use crate::filenames::FilenameIterator;
//! use crate::filenames::SuffixType;
//!
//! let prefix = "chunk_".to_string();
//! let suffix = ".txt".to_string();
//! let width = 2;
//! let use_numeric_suffix = false;
//! let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
//! let suffix_type = SuffixType::Alphabetic;
//! let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
//!
//! assert_eq!(it.next().unwrap(), "chunk_aa.txt");
//! assert_eq!(it.next().unwrap(), "chunk_ab.txt");
@ -28,6 +29,26 @@ use crate::number::DynamicWidthNumber;
use crate::number::FixedWidthNumber;
use crate::number::Number;
/// The format to use for suffixes in the filename for each output chunk.
#[derive(Clone, Copy)]
pub enum SuffixType {
/// Lowercase ASCII alphabetic characters.
Alphabetic,
/// Decimal numbers.
NumericDecimal,
}
impl SuffixType {
/// The radix to use when representing the suffix string as digits.
fn radix(&self) -> u8 {
match self {
SuffixType::Alphabetic => 26,
SuffixType::NumericDecimal => 10,
}
}
}
/// Compute filenames from a given index.
///
/// This iterator yields filenames for use with ``split``.
@ -42,8 +63,8 @@ use crate::number::Number;
/// width in characters. In that case, after the iterator yields each
/// string of that width, the iterator is exhausted.
///
/// Finally, if `use_numeric_suffix` is `true`, then numbers will be
/// used instead of lowercase ASCII alphabetic characters.
/// Finally, `suffix_type` controls which type of suffix to produce,
/// alphabetic or numeric.
///
/// # Examples
///
@ -52,28 +73,30 @@ use crate::number::Number;
///
/// ```rust,ignore
/// use crate::filenames::FilenameIterator;
/// use crate::filenames::SuffixType;
///
/// let prefix = "chunk_".to_string();
/// let suffix = ".txt".to_string();
/// let width = 2;
/// let use_numeric_suffix = false;
/// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
/// let suffix_type = SuffixType::Alphabetic;
/// let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
///
/// assert_eq!(it.next().unwrap(), "chunk_aa.txt");
/// assert_eq!(it.next().unwrap(), "chunk_ab.txt");
/// assert_eq!(it.next().unwrap(), "chunk_ac.txt");
/// ```
///
/// For numeric filenames, set `use_numeric_suffix` to `true`:
/// For numeric filenames, use `SuffixType::NumericDecimal`:
///
/// ```rust,ignore
/// use crate::filenames::FilenameIterator;
/// use crate::filenames::SuffixType;
///
/// let prefix = "chunk_".to_string();
/// let suffix = ".txt".to_string();
/// let width = 2;
/// let use_numeric_suffix = true;
/// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
/// let suffix_type = SuffixType::NumericDecimal;
/// let it = FilenameIterator::new(prefix, suffix, width, suffix_type);
///
/// assert_eq!(it.next().unwrap(), "chunk_00.txt");
/// assert_eq!(it.next().unwrap(), "chunk_01.txt");
@ -91,9 +114,9 @@ impl<'a> FilenameIterator<'a> {
prefix: &'a str,
additional_suffix: &'a str,
suffix_length: usize,
use_numeric_suffix: bool,
suffix_type: SuffixType,
) -> FilenameIterator<'a> {
let radix = if use_numeric_suffix { 10 } else { 26 };
let radix = suffix_type.radix();
let number = if suffix_length == 0 {
Number::DynamicWidth(DynamicWidthNumber::new(radix))
} else {
@ -130,39 +153,40 @@ impl<'a> Iterator for FilenameIterator<'a> {
mod tests {
use crate::filenames::FilenameIterator;
use crate::filenames::SuffixType;
#[test]
fn test_filename_iterator_alphabetic_fixed_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 2, false);
let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic);
assert_eq!(it.next().unwrap(), "chunk_aa.txt");
assert_eq!(it.next().unwrap(), "chunk_ab.txt");
assert_eq!(it.next().unwrap(), "chunk_ac.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 2, false);
let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::Alphabetic);
assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt");
assert_eq!(it.next(), None);
}
#[test]
fn test_filename_iterator_numeric_fixed_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 2, true);
let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::NumericDecimal);
assert_eq!(it.next().unwrap(), "chunk_00.txt");
assert_eq!(it.next().unwrap(), "chunk_01.txt");
assert_eq!(it.next().unwrap(), "chunk_02.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 2, true);
let mut it = FilenameIterator::new("chunk_", ".txt", 2, SuffixType::NumericDecimal);
assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt");
assert_eq!(it.next(), None);
}
#[test]
fn test_filename_iterator_alphabetic_dynamic_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 0, false);
let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic);
assert_eq!(it.next().unwrap(), "chunk_aa.txt");
assert_eq!(it.next().unwrap(), "chunk_ab.txt");
assert_eq!(it.next().unwrap(), "chunk_ac.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 0, false);
let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::Alphabetic);
assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt");
assert_eq!(it.next().unwrap(), "chunk_zaaa.txt");
assert_eq!(it.next().unwrap(), "chunk_zaab.txt");
@ -170,12 +194,12 @@ mod tests {
#[test]
fn test_filename_iterator_numeric_dynamic_width() {
let mut it = FilenameIterator::new("chunk_", ".txt", 0, true);
let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::NumericDecimal);
assert_eq!(it.next().unwrap(), "chunk_00.txt");
assert_eq!(it.next().unwrap(), "chunk_01.txt");
assert_eq!(it.next().unwrap(), "chunk_02.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 0, true);
let mut it = FilenameIterator::new("chunk_", ".txt", 0, SuffixType::NumericDecimal);
assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt");
assert_eq!(it.next().unwrap(), "chunk_9000.txt");
assert_eq!(it.next().unwrap(), "chunk_9001.txt");

View file

@ -12,6 +12,7 @@ mod number;
mod platform;
use crate::filenames::FilenameIterator;
use crate::filenames::SuffixType;
use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
use std::env;
use std::fmt;
@ -240,13 +241,22 @@ impl Strategy {
}
}
/// Parse the suffix type from the command-line arguments.
fn suffix_type_from(matches: &ArgMatches) -> SuffixType {
if matches.occurrences_of(OPT_NUMERIC_SUFFIXES) > 0 {
SuffixType::NumericDecimal
} else {
SuffixType::Alphabetic
}
}
/// Parameters that control how a file gets split.
///
/// You can convert an [`ArgMatches`] instance into a [`Settings`]
/// instance by calling [`Settings::from`].
struct Settings {
prefix: String,
numeric_suffix: bool,
suffix_type: SuffixType,
suffix_length: usize,
additional_suffix: String,
input: String,
@ -314,7 +324,7 @@ impl Settings {
suffix_length: suffix_length_str
.parse()
.map_err(|_| SettingsError::SuffixLength(suffix_length_str.to_string()))?,
numeric_suffix: matches.occurrences_of(OPT_NUMERIC_SUFFIXES) > 0,
suffix_type: suffix_type_from(matches),
additional_suffix,
verbose: matches.occurrences_of("verbose") > 0,
strategy: Strategy::from(matches).map_err(SettingsError::Strategy)?,
@ -374,7 +384,7 @@ impl<'a> ByteChunkWriter<'a> {
&settings.prefix,
&settings.additional_suffix,
settings.suffix_length,
settings.numeric_suffix,
settings.suffix_type,
);
let filename = filename_iterator.next()?;
if settings.verbose {
@ -502,7 +512,7 @@ impl<'a> LineChunkWriter<'a> {
&settings.prefix,
&settings.additional_suffix,
settings.suffix_length,
settings.numeric_suffix,
settings.suffix_type,
);
let filename = filename_iterator.next()?;
if settings.verbose {
@ -594,7 +604,7 @@ where
&settings.prefix,
&settings.additional_suffix,
settings.suffix_length,
settings.numeric_suffix,
settings.suffix_type,
);
// Create one writer for each chunk. This will create each