1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 11:07:44 +00:00

Merge pull request #2868 from jfinkels/split-filename-iterator

split: use iterator to produce filenames
This commit is contained in:
Terts Diepraam 2022-01-30 22:37:37 +01:00 committed by GitHub
commit 7b3cfcf708
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 627 additions and 463 deletions

View file

@ -2,529 +2,182 @@
// * // *
// * For the full copyright and license information, please view the LICENSE // * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code. // * file that was distributed with this source code.
// spell-checker:ignore zaaa zaab zzaaaa zzzaaaaa // spell-checker:ignore zaaa zaab
//! Compute filenames from a given index. //! Compute filenames from a given index.
//! //!
//! The [`FilenameFactory`] can be used to convert a chunk index given //! The [`FilenameIterator`] yields filenames for use with ``split``.
//! as a [`usize`] to a filename for that chunk.
//! //!
//! # Examples //! # Examples
//! //!
//! Create filenames of the form `chunk_??.txt`: //! Create filenames of the form `chunk_??.txt`:
//! //!
//! ```rust,ignore //! ```rust,ignore
//! use crate::filenames::FilenameFactory; //! use crate::filenames::FilenameIterator;
//! //!
//! let prefix = "chunk_".to_string(); //! let prefix = "chunk_".to_string();
//! let suffix = ".txt".to_string(); //! let suffix = ".txt".to_string();
//! let width = 2; //! let width = 2;
//! let use_numeric_suffix = false; //! let use_numeric_suffix = false;
//! let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix); //! let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
//! //!
//! assert_eq!(factory.make(0).unwrap(), "chunk_aa.txt"); //! assert_eq!(it.next().unwrap(), "chunk_aa.txt");
//! assert_eq!(factory.make(10).unwrap(), "chunk_ak.txt"); //! assert_eq!(it.next().unwrap(), "chunk_ab.txt");
//! assert_eq!(factory.make(28).unwrap(), "chunk_bc.txt"); //! assert_eq!(it.next().unwrap(), "chunk_ac.txt");
//! ``` //! ```
use crate::number::DynamicWidthNumber;
/// Base 10 logarithm. use crate::number::FixedWidthNumber;
fn log10(n: usize) -> usize { use crate::number::Number;
(n as f64).log10() as usize
}
/// Base 26 logarithm.
fn log26(n: usize) -> usize {
(n as f64).log(26.0) as usize
}
/// Convert a radix 10 number to a radix 26 number of the given width.
///
/// `n` is the radix 10 (that is, decimal) number to transform. This
/// function returns a [`Vec`] of unsigned integers representing the
/// digits, with the most significant digit first and the least
/// significant digit last. The returned `Vec` is always of length
/// `width`.
///
/// If the number `n` is too large to represent within `width` digits,
/// then this function returns `None`.
///
/// # Examples
///
/// ```rust,ignore
/// use crate::filenames::to_radix_26;
///
/// assert_eq!(to_radix_26(20, 2), Some(vec![0, 20]));
/// assert_eq!(to_radix_26(26, 2), Some(vec![1, 0]));
/// assert_eq!(to_radix_26(30, 2), Some(vec![1, 4]));
/// ```
fn to_radix_26(mut n: usize, width: usize) -> Option<Vec<u8>> {
if width == 0 {
return None;
}
// Use the division algorithm to repeatedly compute the quotient
// and remainder of the number after division by the radix 26. The
// successive quotients are the digits in radix 26, from most
// significant to least significant.
let mut result = vec![];
for w in (0..width).rev() {
let divisor = 26_usize.pow(w as u32);
let (quotient, remainder) = (n / divisor, n % divisor);
n = remainder;
// If the quotient is equal to or greater than the radix, that
// means the number `n` requires a greater width to be able to
// represent it in radix 26.
if quotient >= 26 {
return None;
}
result.push(quotient as u8);
}
Some(result)
}
/// Convert a number between 0 and 25 into a lowercase ASCII character.
///
/// # Examples
///
/// ```rust,ignore
/// use crate::filenames::to_ascii_char;
///
/// assert_eq!(to_ascii_char(&0), Some('a'));
/// assert_eq!(to_ascii_char(&25), Some('z'));
/// assert_eq!(to_ascii_char(&26), None);
/// ```
fn to_ascii_char(n: &u8) -> Option<char> {
// TODO In Rust v1.52.0 or later, use `char::from_digit`:
// https://doc.rust-lang.org/std/primitive.char.html#method.from_digit
//
// char::from_digit(*n as u32 + 10, 36)
//
// In that call, radix 36 is used because the characters in radix
// 36 are [0-9a-z]. We want to exclude the the first ten of those
// characters, so we add 10 to the number before conversion.
//
// Until that function is available, just add `n` to `b'a'` and
// cast to `char`.
if *n < 26 {
Some((b'a' + n) as char)
} else {
None
}
}
/// Fixed width alphabetic string representation of index `i`.
///
/// If `i` is greater than or equal to the number of lowercase ASCII
/// strings that can be represented in the given `width`, then this
/// function returns `None`.
///
/// # Examples
///
/// ```rust,ignore
/// use crate::filenames::str_prefix_fixed_width;
///
/// assert_eq!(str_prefix_fixed_width(0, 2).as_deref(), "aa");
/// assert_eq!(str_prefix_fixed_width(675, 2).as_deref(), "zz");
/// assert_eq!(str_prefix_fixed_width(676, 2), None);
/// ```
fn str_prefix_fixed_width(i: usize, width: usize) -> Option<String> {
to_radix_26(i, width)?.iter().map(to_ascii_char).collect()
}
/// Dynamically sized alphabetic string representation of index `i`.
///
/// The size of the returned string starts at two then grows by 2 if
/// `i` is sufficiently large.
///
/// # Examples
///
/// ```rust,ignore
/// use crate::filenames::str_prefix;
///
/// assert_eq!(str_prefix(0), "aa");
/// assert_eq!(str_prefix(649), "yz");
/// assert_eq!(str_prefix(650), "zaaa");
/// assert_eq!(str_prefix(651), "zaab");
/// ```
fn str_prefix(i: usize) -> Option<String> {
// This number tells us the order of magnitude of `i`, with a
// slight adjustment.
//
// We shift by 26 so that
//
// * if `i` is in the interval [0, 26^2 - 26), then `d` is 1,
// * if `i` is in the interval [26^2 - 26, 26^3 - 26), then `d` is 2,
// * if `i` is in the interval [26^3 - 26, 26^4 - 26), then `d` is 3,
//
// and so on. This will allow us to compute how many leading "z"
// characters need to appear in the string and how many characters
// to format to the right of those.
let d = log26(i + 26);
// This is the number of leading "z" characters.
//
// For values of `i` less than 26^2 - 26, the returned string is
// just the radix 26 representation of that number with a width of
// two (using the lowercase ASCII characters as the digits).
//
// * if `i` is 26^2 - 26, then the returned string is "zaa",
// * if `i` is 26^3 - 26, then the returned string is "zzaaaa",
// * if `i` is 26^4 - 26, then the returned string is "zzzaaaaa",
//
// and so on. As you can see, the number of leading "z"s there is
// linearly increasing by 1 for each order of magnitude.
let num_fill_chars = d - 1;
// This is the number of characters after the leading "z" characters.
let width = d + 1;
// This is the radix 10 number to render in radix 26, to the right
// of the leading "z"s.
let number = (i + 26) - 26_usize.pow(d as u32);
// This is the radix 26 number to render after the leading "z"s,
// collected in a `String`.
//
// For example, if `i` is 789, then `number` is 789 + 26 - 676,
// which equals 139. In radix 26 and assuming a `width` of 3, this
// number is
//
// [0, 5, 9]
//
// with the most significant digit on the left and the least
// significant digit on the right. After translating to ASCII
// lowercase letters, this becomes "afj".
let digits = str_prefix_fixed_width(number, width)?;
// `empty` is just the empty string, to be displayed with a width
// of `num_fill_chars` and with blank spaces filled with the
// character "z".
//
// `digits` is as described in the previous comment.
Some(format!(
"{empty:z<num_fill_chars$}{digits}",
empty = "",
num_fill_chars = num_fill_chars,
digits = digits
))
}
/// Fixed width numeric string representation of index `i`.
///
/// If `i` is greater than or equal to the number of numbers that can
/// be represented in the given `width`, then this function returns
/// `None`.
///
/// # Examples
///
/// ```rust,ignore
/// use crate::filenames::num_prefix_fixed_width;
///
/// assert_eq!(num_prefix_fixed_width(0, 2).as_deref(), "89");
/// assert_eq!(num_prefix_fixed_width(99, 2).as_deref(), "9000");
/// assert_eq!(num_prefix_fixed_width(100, 2), None);
/// ```
fn num_prefix_fixed_width(i: usize, width: usize) -> Option<String> {
let max = 10_usize.pow(width as u32);
if i >= max {
None
} else {
Some(format!("{i:0width$}", i = i, width = width))
}
}
/// Dynamically sized numeric string representation of index `i`.
///
/// The size of the returned string starts at two then grows by 2 if
/// `i` is sufficiently large.
///
/// # Examples
///
/// ```rust,ignore
/// use crate::filenames::num_prefix;
///
/// assert_eq!(num_prefix(89), "89");
/// assert_eq!(num_prefix(90), "9000");
/// assert_eq!(num_prefix(91), "9001");
/// ```
fn num_prefix(i: usize) -> String {
// This number tells us the order of magnitude of `i`, with a
// slight adjustment.
//
// We shift by 10 so that
//
// * if `i` is in the interval [0, 90), then `d` is 1,
// * if `i` is in the interval [90, 990), then `d` is 2,
// * if `i` is in the interval [990, 9990), then `d` is 3,
//
// and so on. This will allow us to compute how many leading "9"
// characters need to appear in the string and how many digits to
// format to the right of those.
let d = log10(i + 10);
// This is the number of leading "9" characters.
//
// For values of `i` less than 90, the returned string is just
// that number padded by a 0 to ensure the width is 2, but
//
// * if `i` is 90, then the returned string is "900",
// * if `i` is 990, then the returned string is "990000",
// * if `i` is 9990, then the returned string is "99900000",
//
// and so on. As you can see, the number of leading 9s there is
// linearly increasing by 1 for each order of magnitude.
let num_fill_chars = d - 1;
// This is the number of characters after the leading "9" characters.
let width = d + 1;
// This is the number to render after the leading "9"s.
//
// For example, if `i` is 5732, then the returned string is
// "994742". After the two "9" characters is the number 4742,
// which equals 5732 + 10 - 1000.
let number = (i + 10) - 10_usize.pow(d as u32);
// `empty` is just the empty string, to be displayed with a width
// of `num_fill_chars` and with blank spaces filled with the
// character "9".
//
// `number` is the next remaining part of the number to render;
// for small numbers we pad with 0 and enforce a minimum width.
format!(
"{empty:9<num_fill_chars$}{number:0width$}",
empty = "",
num_fill_chars = num_fill_chars,
number = number,
width = width
)
}
/// Compute filenames from a given index. /// Compute filenames from a given index.
/// ///
/// The [`FilenameFactory`] can be used to convert a chunk index given /// This iterator yields filenames for use with ``split``.
/// as a [`usize`] to a filename for that chunk.
/// ///
/// The general form of filenames produced by instances of this struct is /// The `prefix` is prepended to each filename and the
/// `additional_suffix1 is appended to each filename.
/// ///
/// ```ignore /// If `suffix_length` is 0, then the variable portion of the filename
/// {prefix}{suffix}{additional_suffix} /// that identifies the current chunk will have a dynamically
/// ``` /// increasing width. If `suffix_length` is greater than zero, then
/// the variable portion of the filename will always be exactly that
/// width in characters. In that case, after the iterator yields each
/// string of that width, the iterator is exhausted.
/// ///
/// If `suffix_length` is a positive integer, then the `suffix` /// Finally, if `use_numeric_suffix` is `true`, then numbers will be
/// portion will be of exactly that length. If `suffix_length` is 0, /// used instead of lowercase ASCII alphabetic characters.
/// then the length of the `suffix` portion will grow dynamically to
/// accommodate any chunk index. In that case, the length begins at 2
/// and increases by 2 when the chunk index becomes sufficiently
/// large.
///
/// If `use_numeric_suffix` is `true`, then the `suffix` portion will
/// be nonnegative integers. If `false`, then the `suffix` will
/// comprise lowercase ASCII characters.
/// ///
/// # Examples /// # Examples
/// ///
/// Create filenames of the form `chunk_??.txt`: /// Create filenames of the form `chunk_??.txt`, where the `?`
/// characters are lowercase ASCII alphabetic characters:
/// ///
/// ```rust,ignore /// ```rust,ignore
/// use crate::filenames::FilenameFactory; /// use crate::filenames::FilenameIterator;
/// ///
/// let prefix = "chunk_".to_string(); /// let prefix = "chunk_".to_string();
/// let suffix = ".txt".to_string(); /// let suffix = ".txt".to_string();
/// let width = 2; /// let width = 2;
/// let use_numeric_suffix = false; /// let use_numeric_suffix = false;
/// let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix); /// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
/// ///
/// assert_eq!(factory.make(0).unwrap(), "chunk_aa.txt"); /// assert_eq!(it.next().unwrap(), "chunk_aa.txt");
/// assert_eq!(factory.make(10).unwrap(), "chunk_ak.txt"); /// assert_eq!(it.next().unwrap(), "chunk_ab.txt");
/// assert_eq!(factory.make(28).unwrap(), "chunk_bc.txt"); /// assert_eq!(it.next().unwrap(), "chunk_ac.txt");
/// ``` /// ```
/// ///
/// Set `suffix_length` to 0 for filename sizes that grow dynamically: /// For numeric filenames, set `use_numeric_suffix` to `true`:
/// ///
/// ```rust,ignore /// ```rust,ignore
/// use crate::filenames::FilenameFactory; /// use crate::filenames::FilenameIterator;
/// ///
/// let prefix = String::new(); /// let prefix = "chunk_".to_string();
/// let suffix = String::new(); /// let suffix = ".txt".to_string();
/// let width = 0; /// let width = 2;
/// let use_numeric_suffix = false; /// let use_numeric_suffix = true;
/// let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix); /// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
/// ///
/// assert_eq!(factory.make(0).unwrap(), "aa"); /// assert_eq!(it.next().unwrap(), "chunk_00.txt");
/// assert_eq!(factory.make(1).unwrap(), "ab"); /// assert_eq!(it.next().unwrap(), "chunk_01.txt");
/// assert_eq!(factory.make(649).unwrap(), "yz"); /// assert_eq!(it.next().unwrap(), "chunk_02.txt");
/// assert_eq!(factory.make(650).unwrap(), "zaaa");
/// assert_eq!(factory.make(6551).unwrap(), "zaab");
/// ``` /// ```
pub struct FilenameFactory<'a> { pub struct FilenameIterator<'a> {
prefix: &'a str,
additional_suffix: &'a str, additional_suffix: &'a str,
suffix_length: usize, prefix: &'a str,
use_numeric_suffix: bool, number: Number,
first_iteration: bool,
} }
impl<'a> FilenameFactory<'a> { impl<'a> FilenameIterator<'a> {
/// Create a new instance of this struct.
///
/// For an explanation of the parameters, see the struct documentation.
pub fn new( pub fn new(
prefix: &'a str, prefix: &'a str,
additional_suffix: &'a str, additional_suffix: &'a str,
suffix_length: usize, suffix_length: usize,
use_numeric_suffix: bool, use_numeric_suffix: bool,
) -> FilenameFactory<'a> { ) -> FilenameIterator<'a> {
FilenameFactory { let radix = if use_numeric_suffix { 10 } else { 26 };
let number = if suffix_length == 0 {
Number::DynamicWidth(DynamicWidthNumber::new(radix))
} else {
Number::FixedWidth(FixedWidthNumber::new(radix, suffix_length))
};
FilenameIterator {
prefix, prefix,
additional_suffix, additional_suffix,
suffix_length, number,
use_numeric_suffix, first_iteration: true,
} }
} }
}
/// Construct the filename for the specified element of the output collection of files. impl<'a> Iterator for FilenameIterator<'a> {
/// type Item = String;
/// For an explanation of the parameters, see the struct documentation.
/// fn next(&mut self) -> Option<Self::Item> {
/// If `suffix_length` has been set to a positive integer and `i` if self.first_iteration {
/// is greater than or equal to the number of strings that can be self.first_iteration = false;
/// represented within that length, then this returns `None`. For } else {
/// example: self.number.increment().ok()?;
/// }
/// ```rust,ignore // The first and third parts are just taken directly from the
/// use crate::filenames::FilenameFactory; // struct parameters unchanged.
///
/// let prefix = "";
/// let suffix = "";
/// let width = 1;
/// let use_numeric_suffix = true;
/// let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix);
///
/// assert_eq!(factory.make(10), None);
/// ```
pub fn make(&self, i: usize) -> Option<String> {
let suffix = match (self.use_numeric_suffix, self.suffix_length) {
(true, 0) => Some(num_prefix(i)),
(false, 0) => str_prefix(i),
(true, width) => num_prefix_fixed_width(i, width),
(false, width) => str_prefix_fixed_width(i, width),
}?;
Some(format!( Some(format!(
"{}{}{}", "{}{}{}",
self.prefix, suffix, self.additional_suffix self.prefix, self.number, self.additional_suffix
)) ))
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::filenames::num_prefix;
use crate::filenames::num_prefix_fixed_width; use crate::filenames::FilenameIterator;
use crate::filenames::str_prefix;
use crate::filenames::str_prefix_fixed_width;
use crate::filenames::to_ascii_char;
use crate::filenames::to_radix_26;
use crate::filenames::FilenameFactory;
#[test] #[test]
fn test_to_ascii_char() { fn test_filename_iterator_alphabetic_fixed_width() {
assert_eq!(to_ascii_char(&0), Some('a')); let mut it = FilenameIterator::new("chunk_", ".txt", 2, false);
assert_eq!(to_ascii_char(&5), Some('f')); assert_eq!(it.next().unwrap(), "chunk_aa.txt");
assert_eq!(to_ascii_char(&25), Some('z')); assert_eq!(it.next().unwrap(), "chunk_ab.txt");
assert_eq!(to_ascii_char(&26), None); assert_eq!(it.next().unwrap(), "chunk_ac.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 2, false);
assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt");
assert_eq!(it.next(), None);
} }
#[test] #[test]
fn test_to_radix_26_exceed_width() { fn test_filename_iterator_numeric_fixed_width() {
assert_eq!(to_radix_26(1, 0), None); let mut it = FilenameIterator::new("chunk_", ".txt", 2, true);
assert_eq!(to_radix_26(26, 1), None); assert_eq!(it.next().unwrap(), "chunk_00.txt");
assert_eq!(to_radix_26(26 * 26, 2), None); assert_eq!(it.next().unwrap(), "chunk_01.txt");
assert_eq!(it.next().unwrap(), "chunk_02.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 2, true);
assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt");
assert_eq!(it.next(), None);
} }
#[test] #[test]
fn test_to_radix_26_width_one() { fn test_filename_iterator_alphabetic_dynamic_width() {
assert_eq!(to_radix_26(0, 1), Some(vec![0])); let mut it = FilenameIterator::new("chunk_", ".txt", 0, false);
assert_eq!(to_radix_26(10, 1), Some(vec![10])); assert_eq!(it.next().unwrap(), "chunk_aa.txt");
assert_eq!(to_radix_26(20, 1), Some(vec![20])); assert_eq!(it.next().unwrap(), "chunk_ab.txt");
assert_eq!(to_radix_26(25, 1), Some(vec![25])); assert_eq!(it.next().unwrap(), "chunk_ac.txt");
let mut it = FilenameIterator::new("chunk_", ".txt", 0, false);
assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt");
assert_eq!(it.next().unwrap(), "chunk_zaaa.txt");
assert_eq!(it.next().unwrap(), "chunk_zaab.txt");
} }
#[test] #[test]
fn test_to_radix_26_width_two() { fn test_filename_iterator_numeric_dynamic_width() {
assert_eq!(to_radix_26(0, 2), Some(vec![0, 0])); let mut it = FilenameIterator::new("chunk_", ".txt", 0, true);
assert_eq!(to_radix_26(10, 2), Some(vec![0, 10])); assert_eq!(it.next().unwrap(), "chunk_00.txt");
assert_eq!(to_radix_26(20, 2), Some(vec![0, 20])); assert_eq!(it.next().unwrap(), "chunk_01.txt");
assert_eq!(to_radix_26(25, 2), Some(vec![0, 25])); assert_eq!(it.next().unwrap(), "chunk_02.txt");
assert_eq!(to_radix_26(26, 2), Some(vec![1, 0])); let mut it = FilenameIterator::new("chunk_", ".txt", 0, true);
assert_eq!(to_radix_26(30, 2), Some(vec![1, 4])); assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt");
assert_eq!(it.next().unwrap(), "chunk_9000.txt");
assert_eq!(to_radix_26(26 * 2, 2), Some(vec![2, 0])); assert_eq!(it.next().unwrap(), "chunk_9001.txt");
assert_eq!(to_radix_26(26 * 26 - 1, 2), Some(vec![25, 25]));
}
#[test]
fn test_str_prefix_dynamic_width() {
assert_eq!(str_prefix(0).as_deref(), Some("aa"));
assert_eq!(str_prefix(1).as_deref(), Some("ab"));
assert_eq!(str_prefix(2).as_deref(), Some("ac"));
assert_eq!(str_prefix(25).as_deref(), Some("az"));
assert_eq!(str_prefix(26).as_deref(), Some("ba"));
assert_eq!(str_prefix(27).as_deref(), Some("bb"));
assert_eq!(str_prefix(28).as_deref(), Some("bc"));
assert_eq!(str_prefix(51).as_deref(), Some("bz"));
assert_eq!(str_prefix(52).as_deref(), Some("ca"));
assert_eq!(str_prefix(26 * 25 - 1).as_deref(), Some("yz"));
assert_eq!(str_prefix(26 * 25).as_deref(), Some("zaaa"));
assert_eq!(str_prefix(26 * 25 + 1).as_deref(), Some("zaab"));
}
#[test]
fn test_num_prefix_dynamic_width() {
assert_eq!(num_prefix(0), "00");
assert_eq!(num_prefix(9), "09");
assert_eq!(num_prefix(17), "17");
assert_eq!(num_prefix(89), "89");
assert_eq!(num_prefix(90), "9000");
assert_eq!(num_prefix(91), "9001");
assert_eq!(num_prefix(989), "9899");
assert_eq!(num_prefix(990), "990000");
}
#[test]
fn test_str_prefix_fixed_width() {
assert_eq!(str_prefix_fixed_width(0, 2).as_deref(), Some("aa"));
assert_eq!(str_prefix_fixed_width(1, 2).as_deref(), Some("ab"));
assert_eq!(str_prefix_fixed_width(26, 2).as_deref(), Some("ba"));
assert_eq!(
str_prefix_fixed_width(26 * 26 - 1, 2).as_deref(),
Some("zz")
);
assert_eq!(str_prefix_fixed_width(26 * 26, 2).as_deref(), None);
}
#[test]
fn test_num_prefix_fixed_width() {
assert_eq!(num_prefix_fixed_width(0, 2).as_deref(), Some("00"));
assert_eq!(num_prefix_fixed_width(1, 2).as_deref(), Some("01"));
assert_eq!(num_prefix_fixed_width(99, 2).as_deref(), Some("99"));
assert_eq!(num_prefix_fixed_width(100, 2).as_deref(), None);
}
#[test]
fn test_alphabetic_suffix() {
let factory = FilenameFactory::new("123", "789", 3, false);
assert_eq!(factory.make(0).unwrap(), "123aaa789");
assert_eq!(factory.make(1).unwrap(), "123aab789");
assert_eq!(factory.make(28).unwrap(), "123abc789");
}
#[test]
fn test_numeric_suffix() {
let factory = FilenameFactory::new("abc", "xyz", 3, true);
assert_eq!(factory.make(0).unwrap(), "abc000xyz");
assert_eq!(factory.make(1).unwrap(), "abc001xyz");
assert_eq!(factory.make(123).unwrap(), "abc123xyz");
} }
} }

513
src/uu/split/src/number.rs Normal file
View file

@ -0,0 +1,513 @@
// * This file is part of the uutils coreutils package.
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore zaaa zaab
//! A number in arbitrary radix expressed in a positional notation.
//!
//! Use the [`Number`] enum to represent an arbitrary number in an
//! arbitrary radix. A number can be incremented and can be
//! displayed. See the [`Number`] documentation for more information.
//!
//! See the Wikipedia articles on [radix] and [positional notation]
//! for more background information on those topics.
//!
//! [radix]: https://en.wikipedia.org/wiki/Radix
//! [positional notation]: https://en.wikipedia.org/wiki/Positional_notation
use std::error::Error;
use std::fmt::{self, Display, Formatter};
/// An overflow due to incrementing a number beyond its representable limit.
#[derive(Debug)]
pub struct Overflow;
impl fmt::Display for Overflow {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Overflow")
}
}
impl Error for Overflow {}
/// A number in arbitrary radix expressed in a positional notation.
///
/// Use the [`Number`] enum to represent an arbitrary number in an
/// arbitrary radix. A number can be incremented with
/// [`Number::increment`]. The [`FixedWidthNumber`] overflows when
/// attempting to increment it beyond the maximum number that can be
/// represented in the specified width. The [`DynamicWidthNumber`]
/// follows a non-standard incrementing procedure that is used
/// specifically for the `split` program. See the
/// [`DynamicWidthNumber`] documentation for more information.
///
/// Numbers of radix 10 are displayable and rendered as decimal
/// numbers (for example, "00" or "917"). Numbers of radix 26 are
/// displayable and rendered as lowercase ASCII alphabetic characters
/// (for example, "aa" or "zax"). Numbers of other radices cannot be
/// displayed. The display of a [`DynamicWidthNumber`] includes a
/// prefix whose length depends on the width of the number. See the
/// [`DynamicWidthNumber`] documentation for more information.
///
/// The digits of a number are accessible via the [`Number::digits`]
/// method. The digits are represented as a [`Vec<u8>`] with the most
/// significant digit on the left and the least significant digit on
/// the right. Each digit is a nonnegative integer less than the
/// radix. For example, if the radix is 3, then `vec![1, 0, 2]`
/// represents the decimal number 11:
///
/// ```ignore
/// 1 * 3^2 + 0 * 3^1 + 2 * 3^0 = 9 + 0 + 2 = 11
/// ```
///
/// For the [`DynamicWidthNumber`], the digits are not unique in the
/// sense that repeatedly incrementing the number will eventually
/// yield `vec![0, 0]`, `vec![0, 0, 0], `vec![0, 0, 0, 0]`, etc.
/// That's okay because each of these numbers will be displayed
/// differently and we only intend to use these numbers for display
/// purposes and not for mathematical purposes.
#[derive(Clone)]
pub enum Number {
/// A fixed-width representation of a number.
FixedWidth(FixedWidthNumber),
/// A representation of a number with a dynamically growing width.
DynamicWidth(DynamicWidthNumber),
}
impl Number {
/// The digits of this number in decreasing order of significance.
///
/// The digits are represented as a [`Vec<u8>`] with the most
/// significant digit on the left and the least significant digit
/// on the right. Each digit is a nonnegative integer less than
/// the radix. For example, if the radix is 3, then `vec![1, 0,
/// 2]` represents the decimal number 11:
///
/// ```ignore
/// 1 * 3^2 + 0 * 3^1 + 2 * 3^0 = 9 + 0 + 2 = 11
/// ```
///
/// For the [`DynamicWidthNumber`], the digits are not unique in the
/// sense that repeatedly incrementing the number will eventually
/// yield `vec![0, 0]`, `vec![0, 0, 0], `vec![0, 0, 0, 0]`, etc.
/// That's okay because each of these numbers will be displayed
/// differently and we only intend to use these numbers for display
/// purposes and not for mathematical purposes.
#[allow(dead_code)]
fn digits(&self) -> &Vec<u8> {
match self {
Number::FixedWidth(number) => &number.digits,
Number::DynamicWidth(number) => &number.digits,
}
}
/// Increment this number to its successor.
///
/// If incrementing this number would result in an overflow beyond
/// the maximum representable number, then return
/// [`Err(Overflow)`]. The [`FixedWidthNumber`] overflows, but
/// [`DynamicWidthNumber`] does not.
///
/// The [`DynamicWidthNumber`] follows a non-standard incrementing
/// procedure that is used specifically for the `split` program.
/// See the [`DynamicWidthNumber`] documentation for more
/// information.
///
/// # Errors
///
/// This method returns [`Err(Overflow)`] when attempting to
/// increment beyond the largest representable number.
///
/// # Examples
///
/// Overflowing:
///
/// ```rust,ignore
///
/// use crate::number::FixedWidthNumber;
/// use crate::number::Number;
/// use crate::number::Overflow;
///
/// // Radix 3, width of 1 digit.
/// let mut number = Number::FixedWidth(FixedWidthNumber::new(3, 1));
/// number.increment().unwrap(); // from 0 to 1
/// number.increment().unwrap(); // from 1 to 2
/// assert!(number.increment().is_err());
/// ```
pub fn increment(&mut self) -> Result<(), Overflow> {
match self {
Number::FixedWidth(number) => number.increment(),
Number::DynamicWidth(number) => number.increment(),
}
}
}
impl Display for Number {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Number::FixedWidth(number) => number.fmt(f),
Number::DynamicWidth(number) => number.fmt(f),
}
}
}
/// A positional notation representation of a fixed-width number.
///
/// The digits are represented as a [`Vec<u8>`] with the most
/// significant digit on the left and the least significant digit on
/// the right. Each digit is a nonnegative integer less than the
/// radix.
///
/// # Incrementing
///
/// This number starts at `vec![0; width]`, representing the number 0
/// width the specified number of digits. Incrementing this number
/// with [`Number::increment`] causes it to increase its value by 1 in
/// the usual sense. If the digits are `vec![radix - 1; width]`, then
/// an overflow would occur and the [`Number::increment`] method
/// returns an error.
///
/// # Displaying
///
/// This number is only displayable if `radix` is 10 or `radix` is
/// 26. If `radix` is 10, then the digits are concatenated and
/// displayed as a fixed-width decimal number. If `radix` is 26, then
/// each digit is translated to the corresponding lowercase ASCII
/// alphabetic character (that is, 'a', 'b', 'c', etc.) and
/// concatenated.
#[derive(Clone)]
pub struct FixedWidthNumber {
radix: u8,
digits: Vec<u8>,
}
impl FixedWidthNumber {
/// Instantiate a number of the given radix and width.
pub fn new(radix: u8, width: usize) -> FixedWidthNumber {
FixedWidthNumber {
radix,
digits: vec![0; width],
}
}
/// Increment this number.
///
/// This method adds one to this number. If incrementing this
/// number would require more digits than are available with the
/// specified width, then this method returns [`Err(Overflow)`].
fn increment(&mut self) -> Result<(), Overflow> {
for i in (0..self.digits.len()).rev() {
// Increment the current digit.
self.digits[i] += 1;
// If the digit overflows, then set it to 0 and continue
// to the next iteration to increment the next most
// significant digit. Otherwise, terminate the loop, since
// there will be no further changes to any higher order
// digits.
if self.digits[i] == self.radix {
self.digits[i] = 0;
} else {
break;
}
}
// Return an error on overflow, which is signified by all zeros.
if self.digits == vec![0; self.digits.len()] {
Err(Overflow)
} else {
Ok(())
}
}
}
impl Display for FixedWidthNumber {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self.radix {
10 => {
let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect();
write!(f, "{}", digits)
}
26 => {
let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect();
write!(f, "{}", digits)
}
_ => Err(fmt::Error),
}
}
}
/// A positional notation representation of a number of dynamically growing width.
///
/// The digits are represented as a [`Vec<u8>`] with the most
/// significant digit on the left and the least significant digit on
/// the right. Each digit is a nonnegative integer less than the
/// radix.
///
/// # Incrementing
///
/// This number starts at `vec![0, 0]`, representing the number 0 with
/// a width of 2 digits. Incrementing this number with
/// [`Number::increment`] causes it to increase its value by 1. When
/// incrementing the number would have caused it to change from
/// `vec![radix - 2, radix - 1]` to `vec![radix - 1, 0]`, it instead
/// increases its width by one and resets its value to 0. For example,
/// if the radix were 3, the digits were `vec![1, 2]`, and we called
/// [`Number::increment`], then the digits would become `vec![0, 0,
/// 0]`. In this way, the width grows by one each time the most
/// significant digit would have achieved its maximum value.
///
/// This notion of "incrementing" here does not match the notion of
/// incrementing the *value* of the number, it is just an abstract way
/// of updating the representation of the number in a way that is only
/// useful for the purposes of the `split` program.
///
/// # Displaying
///
/// This number is only displayable if `radix` is 10 or `radix` is
/// 26. If `radix` is 10, then the digits are concatenated and
/// displayed as a fixed-width decimal number with a prefix of `n - 2`
/// instances of the character '9', where `n` is the number of digits.
/// If `radix` is 26, then each digit is translated to the
/// corresponding lowercase ASCII alphabetic character (that is, 'a',
/// 'b', 'c', etc.) and concatenated with a prefix of `n - 2`
/// instances of the character 'z'.
///
/// This notion of displaying the number is specific to the `split`
/// program.
#[derive(Clone)]
pub struct DynamicWidthNumber {
radix: u8,
digits: Vec<u8>,
}
impl DynamicWidthNumber {
/// Instantiate a number of the given radix, starting with width 2.
///
/// This associated function returns a new instance of the struct
/// with the given radix and a width of two digits, both 0.
pub fn new(radix: u8) -> DynamicWidthNumber {
DynamicWidthNumber {
radix,
digits: vec![0, 0],
}
}
/// Set all digits to zero.
fn reset(&mut self) {
for i in 0..self.digits.len() {
self.digits[i] = 0;
}
}
/// Increment this number.
///
/// This method adds one to this number. The first time that the
/// most significant digit would achieve its highest possible
/// value (that is, `radix - 1`), then all the digits get reset to
/// 0 and the number of digits increases by one.
///
/// This method never returns an error.
fn increment(&mut self) -> Result<(), Overflow> {
for i in (0..self.digits.len()).rev() {
// Increment the current digit.
self.digits[i] += 1;
// If the digit overflows, then set it to 0 and continue
// to the next iteration to increment the next most
// significant digit. Otherwise, terminate the loop, since
// there will be no further changes to any higher order
// digits.
if self.digits[i] == self.radix {
self.digits[i] = 0;
} else {
break;
}
}
// If the most significant digit is at its maximum value, then
// add another digit and reset all digits zero.
if self.digits[0] == self.radix - 1 {
self.digits.push(0);
self.reset();
}
Ok(())
}
}
impl Display for DynamicWidthNumber {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self.radix {
10 => {
let num_fill_chars = self.digits.len() - 2;
let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect();
write!(
f,
"{empty:9<num_fill_chars$}{digits}",
empty = "",
num_fill_chars = num_fill_chars,
digits = digits,
)
}
26 => {
let num_fill_chars = self.digits.len() - 2;
let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect();
write!(
f,
"{empty:z<num_fill_chars$}{digits}",
empty = "",
num_fill_chars = num_fill_chars,
digits = digits,
)
}
_ => Err(fmt::Error),
}
}
}
#[cfg(test)]
mod tests {
use crate::number::DynamicWidthNumber;
use crate::number::FixedWidthNumber;
use crate::number::Number;
use crate::number::Overflow;
#[test]
fn test_dynamic_width_number_increment() {
let mut n = Number::DynamicWidth(DynamicWidthNumber::new(3));
assert_eq!(n.digits(), &vec![0, 0]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![0, 1]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![0, 2]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![1, 0]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![1, 1]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![1, 2]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![0, 0, 0]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![0, 0, 1]);
}
#[test]
fn test_dynamic_width_number_display_alphabetic() {
fn num(n: usize) -> Number {
let mut number = Number::DynamicWidth(DynamicWidthNumber::new(26));
for _ in 0..n {
number.increment().unwrap()
}
number
}
assert_eq!(format!("{}", num(0)), "aa");
assert_eq!(format!("{}", num(1)), "ab");
assert_eq!(format!("{}", num(2)), "ac");
assert_eq!(format!("{}", num(25)), "az");
assert_eq!(format!("{}", num(26)), "ba");
assert_eq!(format!("{}", num(27)), "bb");
assert_eq!(format!("{}", num(28)), "bc");
assert_eq!(format!("{}", num(26 + 25)), "bz");
assert_eq!(format!("{}", num(26 + 26)), "ca");
assert_eq!(format!("{}", num(26 * 25 - 1)), "yz");
assert_eq!(format!("{}", num(26 * 25)), "zaaa");
assert_eq!(format!("{}", num(26 * 25 + 1)), "zaab");
}
#[test]
fn test_dynamic_width_number_display_numeric() {
fn num(n: usize) -> Number {
let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10));
for _ in 0..n {
number.increment().unwrap()
}
number
}
assert_eq!(format!("{}", num(0)), "00");
assert_eq!(format!("{}", num(9)), "09");
assert_eq!(format!("{}", num(17)), "17");
assert_eq!(format!("{}", num(10 * 9 - 1)), "89");
assert_eq!(format!("{}", num(10 * 9)), "9000");
assert_eq!(format!("{}", num(10 * 9 + 1)), "9001");
assert_eq!(format!("{}", num(10 * 99 - 1)), "9899");
assert_eq!(format!("{}", num(10 * 99)), "990000");
assert_eq!(format!("{}", num(10 * 99 + 1)), "990001");
}
#[test]
fn test_fixed_width_number_increment() {
let mut n = Number::FixedWidth(FixedWidthNumber::new(3, 2));
assert_eq!(n.digits(), &vec![0, 0]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![0, 1]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![0, 2]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![1, 0]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![1, 1]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![1, 2]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![2, 0]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![2, 1]);
n.increment().unwrap();
assert_eq!(n.digits(), &vec![2, 2]);
assert!(n.increment().is_err());
}
#[test]
fn test_fixed_width_number_display_alphabetic() {
fn num(n: usize) -> Result<Number, Overflow> {
let mut number = Number::FixedWidth(FixedWidthNumber::new(26, 2));
for _ in 0..n {
number.increment()?;
}
Ok(number)
}
assert_eq!(format!("{}", num(0).unwrap()), "aa");
assert_eq!(format!("{}", num(1).unwrap()), "ab");
assert_eq!(format!("{}", num(2).unwrap()), "ac");
assert_eq!(format!("{}", num(25).unwrap()), "az");
assert_eq!(format!("{}", num(26).unwrap()), "ba");
assert_eq!(format!("{}", num(27).unwrap()), "bb");
assert_eq!(format!("{}", num(28).unwrap()), "bc");
assert_eq!(format!("{}", num(26 + 25).unwrap()), "bz");
assert_eq!(format!("{}", num(26 + 26).unwrap()), "ca");
assert_eq!(format!("{}", num(26 * 25 - 1).unwrap()), "yz");
assert_eq!(format!("{}", num(26 * 25).unwrap()), "za");
assert_eq!(format!("{}", num(26 * 26 - 1).unwrap()), "zz");
assert!(num(26 * 26).is_err());
}
#[test]
fn test_fixed_width_number_display_numeric() {
fn num(n: usize) -> Result<Number, Overflow> {
let mut number = Number::FixedWidth(FixedWidthNumber::new(10, 2));
for _ in 0..n {
number.increment()?;
}
Ok(number)
}
assert_eq!(format!("{}", num(0).unwrap()), "00");
assert_eq!(format!("{}", num(9).unwrap()), "09");
assert_eq!(format!("{}", num(17).unwrap()), "17");
assert_eq!(format!("{}", num(10 * 9 - 1).unwrap()), "89");
assert_eq!(format!("{}", num(10 * 9).unwrap()), "90");
assert_eq!(format!("{}", num(10 * 10 - 1).unwrap()), "99");
assert!(num(10 * 10).is_err());
}
}

View file

@ -8,9 +8,10 @@
// spell-checker:ignore (ToDO) PREFIXaa // spell-checker:ignore (ToDO) PREFIXaa
mod filenames; mod filenames;
mod number;
mod platform; mod platform;
use crate::filenames::FilenameFactory; use crate::filenames::FilenameIterator;
use clap::{crate_version, App, AppSettings, Arg, ArgMatches}; use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
use std::convert::TryFrom; use std::convert::TryFrom;
use std::env; use std::env;
@ -384,7 +385,7 @@ where
let chunk_size = (num_bytes / (num_chunks as u64)) as usize; let chunk_size = (num_bytes / (num_chunks as u64)) as usize;
// This object is responsible for creating the filename for each chunk. // This object is responsible for creating the filename for each chunk.
let filename_factory = FilenameFactory::new( let mut filename_iterator = FilenameIterator::new(
&settings.prefix, &settings.prefix,
&settings.additional_suffix, &settings.additional_suffix,
settings.suffix_length, settings.suffix_length,
@ -394,9 +395,9 @@ where
// Create one writer for each chunk. This will create each // Create one writer for each chunk. This will create each
// of the underlying files (if not in `--filter` mode). // of the underlying files (if not in `--filter` mode).
let mut writers = vec![]; let mut writers = vec![];
for i in 0..num_chunks { for _ in 0..num_chunks {
let filename = filename_factory let filename = filename_iterator
.make(i) .next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
writers.push(writer); writers.push(writer);
@ -462,17 +463,16 @@ fn split(settings: &Settings) -> UResult<()> {
}; };
// This object is responsible for creating the filename for each chunk. // This object is responsible for creating the filename for each chunk.
let filename_factory = FilenameFactory::new( let mut filename_iterator = FilenameIterator::new(
&settings.prefix, &settings.prefix,
&settings.additional_suffix, &settings.additional_suffix,
settings.suffix_length, settings.suffix_length,
settings.numeric_suffix, settings.numeric_suffix,
); );
let mut fileno = 0;
loop { loop {
// Get a new part file set up, and construct `writer` for it. // Get a new part file set up, and construct `writer` for it.
let filename = filename_factory let filename = filename_iterator
.make(fileno) .next()
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?; .ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str()); let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
@ -509,8 +509,6 @@ fn split(settings: &Settings) -> UResult<()> {
if settings.verbose { if settings.verbose {
println!("creating file {}", filename.quote()); println!("creating file {}", filename.quote());
} }
fileno += 1;
} }
Ok(()) Ok(())
} }