mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-27 19:17:43 +00:00
Merge pull request #2868 from jfinkels/split-filename-iterator
split: use iterator to produce filenames
This commit is contained in:
commit
7b3cfcf708
3 changed files with 627 additions and 463 deletions
|
@ -2,529 +2,182 @@
|
||||||
// *
|
// *
|
||||||
// * For the full copyright and license information, please view the LICENSE
|
// * For the full copyright and license information, please view the LICENSE
|
||||||
// * file that was distributed with this source code.
|
// * file that was distributed with this source code.
|
||||||
// spell-checker:ignore zaaa zaab zzaaaa zzzaaaaa
|
// spell-checker:ignore zaaa zaab
|
||||||
//! Compute filenames from a given index.
|
//! Compute filenames from a given index.
|
||||||
//!
|
//!
|
||||||
//! The [`FilenameFactory`] can be used to convert a chunk index given
|
//! The [`FilenameIterator`] yields filenames for use with ``split``.
|
||||||
//! as a [`usize`] to a filename for that chunk.
|
|
||||||
//!
|
//!
|
||||||
//! # Examples
|
//! # Examples
|
||||||
//!
|
//!
|
||||||
//! Create filenames of the form `chunk_??.txt`:
|
//! Create filenames of the form `chunk_??.txt`:
|
||||||
//!
|
//!
|
||||||
//! ```rust,ignore
|
//! ```rust,ignore
|
||||||
//! use crate::filenames::FilenameFactory;
|
//! use crate::filenames::FilenameIterator;
|
||||||
//!
|
//!
|
||||||
//! let prefix = "chunk_".to_string();
|
//! let prefix = "chunk_".to_string();
|
||||||
//! let suffix = ".txt".to_string();
|
//! let suffix = ".txt".to_string();
|
||||||
//! let width = 2;
|
//! let width = 2;
|
||||||
//! let use_numeric_suffix = false;
|
//! let use_numeric_suffix = false;
|
||||||
//! let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix);
|
//! let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
|
||||||
//!
|
//!
|
||||||
//! assert_eq!(factory.make(0).unwrap(), "chunk_aa.txt");
|
//! assert_eq!(it.next().unwrap(), "chunk_aa.txt");
|
||||||
//! assert_eq!(factory.make(10).unwrap(), "chunk_ak.txt");
|
//! assert_eq!(it.next().unwrap(), "chunk_ab.txt");
|
||||||
//! assert_eq!(factory.make(28).unwrap(), "chunk_bc.txt");
|
//! assert_eq!(it.next().unwrap(), "chunk_ac.txt");
|
||||||
//! ```
|
//! ```
|
||||||
|
use crate::number::DynamicWidthNumber;
|
||||||
/// Base 10 logarithm.
|
use crate::number::FixedWidthNumber;
|
||||||
fn log10(n: usize) -> usize {
|
use crate::number::Number;
|
||||||
(n as f64).log10() as usize
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Base 26 logarithm.
|
|
||||||
fn log26(n: usize) -> usize {
|
|
||||||
(n as f64).log(26.0) as usize
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convert a radix 10 number to a radix 26 number of the given width.
|
|
||||||
///
|
|
||||||
/// `n` is the radix 10 (that is, decimal) number to transform. This
|
|
||||||
/// function returns a [`Vec`] of unsigned integers representing the
|
|
||||||
/// digits, with the most significant digit first and the least
|
|
||||||
/// significant digit last. The returned `Vec` is always of length
|
|
||||||
/// `width`.
|
|
||||||
///
|
|
||||||
/// If the number `n` is too large to represent within `width` digits,
|
|
||||||
/// then this function returns `None`.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```rust,ignore
|
|
||||||
/// use crate::filenames::to_radix_26;
|
|
||||||
///
|
|
||||||
/// assert_eq!(to_radix_26(20, 2), Some(vec![0, 20]));
|
|
||||||
/// assert_eq!(to_radix_26(26, 2), Some(vec![1, 0]));
|
|
||||||
/// assert_eq!(to_radix_26(30, 2), Some(vec![1, 4]));
|
|
||||||
/// ```
|
|
||||||
fn to_radix_26(mut n: usize, width: usize) -> Option<Vec<u8>> {
|
|
||||||
if width == 0 {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
// Use the division algorithm to repeatedly compute the quotient
|
|
||||||
// and remainder of the number after division by the radix 26. The
|
|
||||||
// successive quotients are the digits in radix 26, from most
|
|
||||||
// significant to least significant.
|
|
||||||
let mut result = vec![];
|
|
||||||
for w in (0..width).rev() {
|
|
||||||
let divisor = 26_usize.pow(w as u32);
|
|
||||||
let (quotient, remainder) = (n / divisor, n % divisor);
|
|
||||||
n = remainder;
|
|
||||||
// If the quotient is equal to or greater than the radix, that
|
|
||||||
// means the number `n` requires a greater width to be able to
|
|
||||||
// represent it in radix 26.
|
|
||||||
if quotient >= 26 {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
result.push(quotient as u8);
|
|
||||||
}
|
|
||||||
Some(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convert a number between 0 and 25 into a lowercase ASCII character.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```rust,ignore
|
|
||||||
/// use crate::filenames::to_ascii_char;
|
|
||||||
///
|
|
||||||
/// assert_eq!(to_ascii_char(&0), Some('a'));
|
|
||||||
/// assert_eq!(to_ascii_char(&25), Some('z'));
|
|
||||||
/// assert_eq!(to_ascii_char(&26), None);
|
|
||||||
/// ```
|
|
||||||
fn to_ascii_char(n: &u8) -> Option<char> {
|
|
||||||
// TODO In Rust v1.52.0 or later, use `char::from_digit`:
|
|
||||||
// https://doc.rust-lang.org/std/primitive.char.html#method.from_digit
|
|
||||||
//
|
|
||||||
// char::from_digit(*n as u32 + 10, 36)
|
|
||||||
//
|
|
||||||
// In that call, radix 36 is used because the characters in radix
|
|
||||||
// 36 are [0-9a-z]. We want to exclude the the first ten of those
|
|
||||||
// characters, so we add 10 to the number before conversion.
|
|
||||||
//
|
|
||||||
// Until that function is available, just add `n` to `b'a'` and
|
|
||||||
// cast to `char`.
|
|
||||||
if *n < 26 {
|
|
||||||
Some((b'a' + n) as char)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Fixed width alphabetic string representation of index `i`.
|
|
||||||
///
|
|
||||||
/// If `i` is greater than or equal to the number of lowercase ASCII
|
|
||||||
/// strings that can be represented in the given `width`, then this
|
|
||||||
/// function returns `None`.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```rust,ignore
|
|
||||||
/// use crate::filenames::str_prefix_fixed_width;
|
|
||||||
///
|
|
||||||
/// assert_eq!(str_prefix_fixed_width(0, 2).as_deref(), "aa");
|
|
||||||
/// assert_eq!(str_prefix_fixed_width(675, 2).as_deref(), "zz");
|
|
||||||
/// assert_eq!(str_prefix_fixed_width(676, 2), None);
|
|
||||||
/// ```
|
|
||||||
fn str_prefix_fixed_width(i: usize, width: usize) -> Option<String> {
|
|
||||||
to_radix_26(i, width)?.iter().map(to_ascii_char).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Dynamically sized alphabetic string representation of index `i`.
|
|
||||||
///
|
|
||||||
/// The size of the returned string starts at two then grows by 2 if
|
|
||||||
/// `i` is sufficiently large.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```rust,ignore
|
|
||||||
/// use crate::filenames::str_prefix;
|
|
||||||
///
|
|
||||||
/// assert_eq!(str_prefix(0), "aa");
|
|
||||||
/// assert_eq!(str_prefix(649), "yz");
|
|
||||||
/// assert_eq!(str_prefix(650), "zaaa");
|
|
||||||
/// assert_eq!(str_prefix(651), "zaab");
|
|
||||||
/// ```
|
|
||||||
fn str_prefix(i: usize) -> Option<String> {
|
|
||||||
// This number tells us the order of magnitude of `i`, with a
|
|
||||||
// slight adjustment.
|
|
||||||
//
|
|
||||||
// We shift by 26 so that
|
|
||||||
//
|
|
||||||
// * if `i` is in the interval [0, 26^2 - 26), then `d` is 1,
|
|
||||||
// * if `i` is in the interval [26^2 - 26, 26^3 - 26), then `d` is 2,
|
|
||||||
// * if `i` is in the interval [26^3 - 26, 26^4 - 26), then `d` is 3,
|
|
||||||
//
|
|
||||||
// and so on. This will allow us to compute how many leading "z"
|
|
||||||
// characters need to appear in the string and how many characters
|
|
||||||
// to format to the right of those.
|
|
||||||
let d = log26(i + 26);
|
|
||||||
|
|
||||||
// This is the number of leading "z" characters.
|
|
||||||
//
|
|
||||||
// For values of `i` less than 26^2 - 26, the returned string is
|
|
||||||
// just the radix 26 representation of that number with a width of
|
|
||||||
// two (using the lowercase ASCII characters as the digits).
|
|
||||||
//
|
|
||||||
// * if `i` is 26^2 - 26, then the returned string is "zaa",
|
|
||||||
// * if `i` is 26^3 - 26, then the returned string is "zzaaaa",
|
|
||||||
// * if `i` is 26^4 - 26, then the returned string is "zzzaaaaa",
|
|
||||||
//
|
|
||||||
// and so on. As you can see, the number of leading "z"s there is
|
|
||||||
// linearly increasing by 1 for each order of magnitude.
|
|
||||||
let num_fill_chars = d - 1;
|
|
||||||
|
|
||||||
// This is the number of characters after the leading "z" characters.
|
|
||||||
let width = d + 1;
|
|
||||||
|
|
||||||
// This is the radix 10 number to render in radix 26, to the right
|
|
||||||
// of the leading "z"s.
|
|
||||||
let number = (i + 26) - 26_usize.pow(d as u32);
|
|
||||||
|
|
||||||
// This is the radix 26 number to render after the leading "z"s,
|
|
||||||
// collected in a `String`.
|
|
||||||
//
|
|
||||||
// For example, if `i` is 789, then `number` is 789 + 26 - 676,
|
|
||||||
// which equals 139. In radix 26 and assuming a `width` of 3, this
|
|
||||||
// number is
|
|
||||||
//
|
|
||||||
// [0, 5, 9]
|
|
||||||
//
|
|
||||||
// with the most significant digit on the left and the least
|
|
||||||
// significant digit on the right. After translating to ASCII
|
|
||||||
// lowercase letters, this becomes "afj".
|
|
||||||
let digits = str_prefix_fixed_width(number, width)?;
|
|
||||||
|
|
||||||
// `empty` is just the empty string, to be displayed with a width
|
|
||||||
// of `num_fill_chars` and with blank spaces filled with the
|
|
||||||
// character "z".
|
|
||||||
//
|
|
||||||
// `digits` is as described in the previous comment.
|
|
||||||
Some(format!(
|
|
||||||
"{empty:z<num_fill_chars$}{digits}",
|
|
||||||
empty = "",
|
|
||||||
num_fill_chars = num_fill_chars,
|
|
||||||
digits = digits
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Fixed width numeric string representation of index `i`.
|
|
||||||
///
|
|
||||||
/// If `i` is greater than or equal to the number of numbers that can
|
|
||||||
/// be represented in the given `width`, then this function returns
|
|
||||||
/// `None`.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```rust,ignore
|
|
||||||
/// use crate::filenames::num_prefix_fixed_width;
|
|
||||||
///
|
|
||||||
/// assert_eq!(num_prefix_fixed_width(0, 2).as_deref(), "89");
|
|
||||||
/// assert_eq!(num_prefix_fixed_width(99, 2).as_deref(), "9000");
|
|
||||||
/// assert_eq!(num_prefix_fixed_width(100, 2), None);
|
|
||||||
/// ```
|
|
||||||
fn num_prefix_fixed_width(i: usize, width: usize) -> Option<String> {
|
|
||||||
let max = 10_usize.pow(width as u32);
|
|
||||||
if i >= max {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(format!("{i:0width$}", i = i, width = width))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Dynamically sized numeric string representation of index `i`.
|
|
||||||
///
|
|
||||||
/// The size of the returned string starts at two then grows by 2 if
|
|
||||||
/// `i` is sufficiently large.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```rust,ignore
|
|
||||||
/// use crate::filenames::num_prefix;
|
|
||||||
///
|
|
||||||
/// assert_eq!(num_prefix(89), "89");
|
|
||||||
/// assert_eq!(num_prefix(90), "9000");
|
|
||||||
/// assert_eq!(num_prefix(91), "9001");
|
|
||||||
/// ```
|
|
||||||
fn num_prefix(i: usize) -> String {
|
|
||||||
// This number tells us the order of magnitude of `i`, with a
|
|
||||||
// slight adjustment.
|
|
||||||
//
|
|
||||||
// We shift by 10 so that
|
|
||||||
//
|
|
||||||
// * if `i` is in the interval [0, 90), then `d` is 1,
|
|
||||||
// * if `i` is in the interval [90, 990), then `d` is 2,
|
|
||||||
// * if `i` is in the interval [990, 9990), then `d` is 3,
|
|
||||||
//
|
|
||||||
// and so on. This will allow us to compute how many leading "9"
|
|
||||||
// characters need to appear in the string and how many digits to
|
|
||||||
// format to the right of those.
|
|
||||||
let d = log10(i + 10);
|
|
||||||
|
|
||||||
// This is the number of leading "9" characters.
|
|
||||||
//
|
|
||||||
// For values of `i` less than 90, the returned string is just
|
|
||||||
// that number padded by a 0 to ensure the width is 2, but
|
|
||||||
//
|
|
||||||
// * if `i` is 90, then the returned string is "900",
|
|
||||||
// * if `i` is 990, then the returned string is "990000",
|
|
||||||
// * if `i` is 9990, then the returned string is "99900000",
|
|
||||||
//
|
|
||||||
// and so on. As you can see, the number of leading 9s there is
|
|
||||||
// linearly increasing by 1 for each order of magnitude.
|
|
||||||
let num_fill_chars = d - 1;
|
|
||||||
|
|
||||||
// This is the number of characters after the leading "9" characters.
|
|
||||||
let width = d + 1;
|
|
||||||
|
|
||||||
// This is the number to render after the leading "9"s.
|
|
||||||
//
|
|
||||||
// For example, if `i` is 5732, then the returned string is
|
|
||||||
// "994742". After the two "9" characters is the number 4742,
|
|
||||||
// which equals 5732 + 10 - 1000.
|
|
||||||
let number = (i + 10) - 10_usize.pow(d as u32);
|
|
||||||
|
|
||||||
// `empty` is just the empty string, to be displayed with a width
|
|
||||||
// of `num_fill_chars` and with blank spaces filled with the
|
|
||||||
// character "9".
|
|
||||||
//
|
|
||||||
// `number` is the next remaining part of the number to render;
|
|
||||||
// for small numbers we pad with 0 and enforce a minimum width.
|
|
||||||
format!(
|
|
||||||
"{empty:9<num_fill_chars$}{number:0width$}",
|
|
||||||
empty = "",
|
|
||||||
num_fill_chars = num_fill_chars,
|
|
||||||
number = number,
|
|
||||||
width = width
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Compute filenames from a given index.
|
/// Compute filenames from a given index.
|
||||||
///
|
///
|
||||||
/// The [`FilenameFactory`] can be used to convert a chunk index given
|
/// This iterator yields filenames for use with ``split``.
|
||||||
/// as a [`usize`] to a filename for that chunk.
|
|
||||||
///
|
///
|
||||||
/// The general form of filenames produced by instances of this struct is
|
/// The `prefix` is prepended to each filename and the
|
||||||
|
/// `additional_suffix1 is appended to each filename.
|
||||||
///
|
///
|
||||||
/// ```ignore
|
/// If `suffix_length` is 0, then the variable portion of the filename
|
||||||
/// {prefix}{suffix}{additional_suffix}
|
/// that identifies the current chunk will have a dynamically
|
||||||
/// ```
|
/// increasing width. If `suffix_length` is greater than zero, then
|
||||||
|
/// the variable portion of the filename will always be exactly that
|
||||||
|
/// width in characters. In that case, after the iterator yields each
|
||||||
|
/// string of that width, the iterator is exhausted.
|
||||||
///
|
///
|
||||||
/// If `suffix_length` is a positive integer, then the `suffix`
|
/// Finally, if `use_numeric_suffix` is `true`, then numbers will be
|
||||||
/// portion will be of exactly that length. If `suffix_length` is 0,
|
/// used instead of lowercase ASCII alphabetic characters.
|
||||||
/// then the length of the `suffix` portion will grow dynamically to
|
|
||||||
/// accommodate any chunk index. In that case, the length begins at 2
|
|
||||||
/// and increases by 2 when the chunk index becomes sufficiently
|
|
||||||
/// large.
|
|
||||||
///
|
|
||||||
/// If `use_numeric_suffix` is `true`, then the `suffix` portion will
|
|
||||||
/// be nonnegative integers. If `false`, then the `suffix` will
|
|
||||||
/// comprise lowercase ASCII characters.
|
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
/// Create filenames of the form `chunk_??.txt`:
|
/// Create filenames of the form `chunk_??.txt`, where the `?`
|
||||||
|
/// characters are lowercase ASCII alphabetic characters:
|
||||||
///
|
///
|
||||||
/// ```rust,ignore
|
/// ```rust,ignore
|
||||||
/// use crate::filenames::FilenameFactory;
|
/// use crate::filenames::FilenameIterator;
|
||||||
///
|
///
|
||||||
/// let prefix = "chunk_".to_string();
|
/// let prefix = "chunk_".to_string();
|
||||||
/// let suffix = ".txt".to_string();
|
/// let suffix = ".txt".to_string();
|
||||||
/// let width = 2;
|
/// let width = 2;
|
||||||
/// let use_numeric_suffix = false;
|
/// let use_numeric_suffix = false;
|
||||||
/// let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix);
|
/// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
|
||||||
///
|
///
|
||||||
/// assert_eq!(factory.make(0).unwrap(), "chunk_aa.txt");
|
/// assert_eq!(it.next().unwrap(), "chunk_aa.txt");
|
||||||
/// assert_eq!(factory.make(10).unwrap(), "chunk_ak.txt");
|
/// assert_eq!(it.next().unwrap(), "chunk_ab.txt");
|
||||||
/// assert_eq!(factory.make(28).unwrap(), "chunk_bc.txt");
|
/// assert_eq!(it.next().unwrap(), "chunk_ac.txt");
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// Set `suffix_length` to 0 for filename sizes that grow dynamically:
|
/// For numeric filenames, set `use_numeric_suffix` to `true`:
|
||||||
///
|
///
|
||||||
/// ```rust,ignore
|
/// ```rust,ignore
|
||||||
/// use crate::filenames::FilenameFactory;
|
/// use crate::filenames::FilenameIterator;
|
||||||
///
|
///
|
||||||
/// let prefix = String::new();
|
/// let prefix = "chunk_".to_string();
|
||||||
/// let suffix = String::new();
|
/// let suffix = ".txt".to_string();
|
||||||
/// let width = 0;
|
/// let width = 2;
|
||||||
/// let use_numeric_suffix = false;
|
/// let use_numeric_suffix = true;
|
||||||
/// let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix);
|
/// let it = FilenameIterator::new(prefix, suffix, width, use_numeric_suffix);
|
||||||
///
|
///
|
||||||
/// assert_eq!(factory.make(0).unwrap(), "aa");
|
/// assert_eq!(it.next().unwrap(), "chunk_00.txt");
|
||||||
/// assert_eq!(factory.make(1).unwrap(), "ab");
|
/// assert_eq!(it.next().unwrap(), "chunk_01.txt");
|
||||||
/// assert_eq!(factory.make(649).unwrap(), "yz");
|
/// assert_eq!(it.next().unwrap(), "chunk_02.txt");
|
||||||
/// assert_eq!(factory.make(650).unwrap(), "zaaa");
|
|
||||||
/// assert_eq!(factory.make(6551).unwrap(), "zaab");
|
|
||||||
/// ```
|
/// ```
|
||||||
pub struct FilenameFactory<'a> {
|
pub struct FilenameIterator<'a> {
|
||||||
prefix: &'a str,
|
|
||||||
additional_suffix: &'a str,
|
additional_suffix: &'a str,
|
||||||
suffix_length: usize,
|
prefix: &'a str,
|
||||||
use_numeric_suffix: bool,
|
number: Number,
|
||||||
|
first_iteration: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> FilenameFactory<'a> {
|
impl<'a> FilenameIterator<'a> {
|
||||||
/// Create a new instance of this struct.
|
|
||||||
///
|
|
||||||
/// For an explanation of the parameters, see the struct documentation.
|
|
||||||
pub fn new(
|
pub fn new(
|
||||||
prefix: &'a str,
|
prefix: &'a str,
|
||||||
additional_suffix: &'a str,
|
additional_suffix: &'a str,
|
||||||
suffix_length: usize,
|
suffix_length: usize,
|
||||||
use_numeric_suffix: bool,
|
use_numeric_suffix: bool,
|
||||||
) -> FilenameFactory<'a> {
|
) -> FilenameIterator<'a> {
|
||||||
FilenameFactory {
|
let radix = if use_numeric_suffix { 10 } else { 26 };
|
||||||
|
let number = if suffix_length == 0 {
|
||||||
|
Number::DynamicWidth(DynamicWidthNumber::new(radix))
|
||||||
|
} else {
|
||||||
|
Number::FixedWidth(FixedWidthNumber::new(radix, suffix_length))
|
||||||
|
};
|
||||||
|
FilenameIterator {
|
||||||
prefix,
|
prefix,
|
||||||
additional_suffix,
|
additional_suffix,
|
||||||
suffix_length,
|
number,
|
||||||
use_numeric_suffix,
|
first_iteration: true,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Construct the filename for the specified element of the output collection of files.
|
impl<'a> Iterator for FilenameIterator<'a> {
|
||||||
///
|
type Item = String;
|
||||||
/// For an explanation of the parameters, see the struct documentation.
|
|
||||||
///
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
/// If `suffix_length` has been set to a positive integer and `i`
|
if self.first_iteration {
|
||||||
/// is greater than or equal to the number of strings that can be
|
self.first_iteration = false;
|
||||||
/// represented within that length, then this returns `None`. For
|
} else {
|
||||||
/// example:
|
self.number.increment().ok()?;
|
||||||
///
|
}
|
||||||
/// ```rust,ignore
|
// The first and third parts are just taken directly from the
|
||||||
/// use crate::filenames::FilenameFactory;
|
// struct parameters unchanged.
|
||||||
///
|
|
||||||
/// let prefix = "";
|
|
||||||
/// let suffix = "";
|
|
||||||
/// let width = 1;
|
|
||||||
/// let use_numeric_suffix = true;
|
|
||||||
/// let factory = FilenameFactory::new(prefix, suffix, width, use_numeric_suffix);
|
|
||||||
///
|
|
||||||
/// assert_eq!(factory.make(10), None);
|
|
||||||
/// ```
|
|
||||||
pub fn make(&self, i: usize) -> Option<String> {
|
|
||||||
let suffix = match (self.use_numeric_suffix, self.suffix_length) {
|
|
||||||
(true, 0) => Some(num_prefix(i)),
|
|
||||||
(false, 0) => str_prefix(i),
|
|
||||||
(true, width) => num_prefix_fixed_width(i, width),
|
|
||||||
(false, width) => str_prefix_fixed_width(i, width),
|
|
||||||
}?;
|
|
||||||
Some(format!(
|
Some(format!(
|
||||||
"{}{}{}",
|
"{}{}{}",
|
||||||
self.prefix, suffix, self.additional_suffix
|
self.prefix, self.number, self.additional_suffix
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::filenames::num_prefix;
|
|
||||||
use crate::filenames::num_prefix_fixed_width;
|
use crate::filenames::FilenameIterator;
|
||||||
use crate::filenames::str_prefix;
|
|
||||||
use crate::filenames::str_prefix_fixed_width;
|
|
||||||
use crate::filenames::to_ascii_char;
|
|
||||||
use crate::filenames::to_radix_26;
|
|
||||||
use crate::filenames::FilenameFactory;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_to_ascii_char() {
|
fn test_filename_iterator_alphabetic_fixed_width() {
|
||||||
assert_eq!(to_ascii_char(&0), Some('a'));
|
let mut it = FilenameIterator::new("chunk_", ".txt", 2, false);
|
||||||
assert_eq!(to_ascii_char(&5), Some('f'));
|
assert_eq!(it.next().unwrap(), "chunk_aa.txt");
|
||||||
assert_eq!(to_ascii_char(&25), Some('z'));
|
assert_eq!(it.next().unwrap(), "chunk_ab.txt");
|
||||||
assert_eq!(to_ascii_char(&26), None);
|
assert_eq!(it.next().unwrap(), "chunk_ac.txt");
|
||||||
|
|
||||||
|
let mut it = FilenameIterator::new("chunk_", ".txt", 2, false);
|
||||||
|
assert_eq!(it.nth(26 * 26 - 1).unwrap(), "chunk_zz.txt");
|
||||||
|
assert_eq!(it.next(), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_to_radix_26_exceed_width() {
|
fn test_filename_iterator_numeric_fixed_width() {
|
||||||
assert_eq!(to_radix_26(1, 0), None);
|
let mut it = FilenameIterator::new("chunk_", ".txt", 2, true);
|
||||||
assert_eq!(to_radix_26(26, 1), None);
|
assert_eq!(it.next().unwrap(), "chunk_00.txt");
|
||||||
assert_eq!(to_radix_26(26 * 26, 2), None);
|
assert_eq!(it.next().unwrap(), "chunk_01.txt");
|
||||||
|
assert_eq!(it.next().unwrap(), "chunk_02.txt");
|
||||||
|
|
||||||
|
let mut it = FilenameIterator::new("chunk_", ".txt", 2, true);
|
||||||
|
assert_eq!(it.nth(10 * 10 - 1).unwrap(), "chunk_99.txt");
|
||||||
|
assert_eq!(it.next(), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_to_radix_26_width_one() {
|
fn test_filename_iterator_alphabetic_dynamic_width() {
|
||||||
assert_eq!(to_radix_26(0, 1), Some(vec![0]));
|
let mut it = FilenameIterator::new("chunk_", ".txt", 0, false);
|
||||||
assert_eq!(to_radix_26(10, 1), Some(vec![10]));
|
assert_eq!(it.next().unwrap(), "chunk_aa.txt");
|
||||||
assert_eq!(to_radix_26(20, 1), Some(vec![20]));
|
assert_eq!(it.next().unwrap(), "chunk_ab.txt");
|
||||||
assert_eq!(to_radix_26(25, 1), Some(vec![25]));
|
assert_eq!(it.next().unwrap(), "chunk_ac.txt");
|
||||||
|
|
||||||
|
let mut it = FilenameIterator::new("chunk_", ".txt", 0, false);
|
||||||
|
assert_eq!(it.nth(26 * 25 - 1).unwrap(), "chunk_yz.txt");
|
||||||
|
assert_eq!(it.next().unwrap(), "chunk_zaaa.txt");
|
||||||
|
assert_eq!(it.next().unwrap(), "chunk_zaab.txt");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_to_radix_26_width_two() {
|
fn test_filename_iterator_numeric_dynamic_width() {
|
||||||
assert_eq!(to_radix_26(0, 2), Some(vec![0, 0]));
|
let mut it = FilenameIterator::new("chunk_", ".txt", 0, true);
|
||||||
assert_eq!(to_radix_26(10, 2), Some(vec![0, 10]));
|
assert_eq!(it.next().unwrap(), "chunk_00.txt");
|
||||||
assert_eq!(to_radix_26(20, 2), Some(vec![0, 20]));
|
assert_eq!(it.next().unwrap(), "chunk_01.txt");
|
||||||
assert_eq!(to_radix_26(25, 2), Some(vec![0, 25]));
|
assert_eq!(it.next().unwrap(), "chunk_02.txt");
|
||||||
|
|
||||||
assert_eq!(to_radix_26(26, 2), Some(vec![1, 0]));
|
let mut it = FilenameIterator::new("chunk_", ".txt", 0, true);
|
||||||
assert_eq!(to_radix_26(30, 2), Some(vec![1, 4]));
|
assert_eq!(it.nth(10 * 9 - 1).unwrap(), "chunk_89.txt");
|
||||||
|
assert_eq!(it.next().unwrap(), "chunk_9000.txt");
|
||||||
assert_eq!(to_radix_26(26 * 2, 2), Some(vec![2, 0]));
|
assert_eq!(it.next().unwrap(), "chunk_9001.txt");
|
||||||
assert_eq!(to_radix_26(26 * 26 - 1, 2), Some(vec![25, 25]));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_str_prefix_dynamic_width() {
|
|
||||||
assert_eq!(str_prefix(0).as_deref(), Some("aa"));
|
|
||||||
assert_eq!(str_prefix(1).as_deref(), Some("ab"));
|
|
||||||
assert_eq!(str_prefix(2).as_deref(), Some("ac"));
|
|
||||||
assert_eq!(str_prefix(25).as_deref(), Some("az"));
|
|
||||||
|
|
||||||
assert_eq!(str_prefix(26).as_deref(), Some("ba"));
|
|
||||||
assert_eq!(str_prefix(27).as_deref(), Some("bb"));
|
|
||||||
assert_eq!(str_prefix(28).as_deref(), Some("bc"));
|
|
||||||
assert_eq!(str_prefix(51).as_deref(), Some("bz"));
|
|
||||||
|
|
||||||
assert_eq!(str_prefix(52).as_deref(), Some("ca"));
|
|
||||||
|
|
||||||
assert_eq!(str_prefix(26 * 25 - 1).as_deref(), Some("yz"));
|
|
||||||
assert_eq!(str_prefix(26 * 25).as_deref(), Some("zaaa"));
|
|
||||||
assert_eq!(str_prefix(26 * 25 + 1).as_deref(), Some("zaab"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_num_prefix_dynamic_width() {
|
|
||||||
assert_eq!(num_prefix(0), "00");
|
|
||||||
assert_eq!(num_prefix(9), "09");
|
|
||||||
assert_eq!(num_prefix(17), "17");
|
|
||||||
assert_eq!(num_prefix(89), "89");
|
|
||||||
assert_eq!(num_prefix(90), "9000");
|
|
||||||
assert_eq!(num_prefix(91), "9001");
|
|
||||||
assert_eq!(num_prefix(989), "9899");
|
|
||||||
assert_eq!(num_prefix(990), "990000");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_str_prefix_fixed_width() {
|
|
||||||
assert_eq!(str_prefix_fixed_width(0, 2).as_deref(), Some("aa"));
|
|
||||||
assert_eq!(str_prefix_fixed_width(1, 2).as_deref(), Some("ab"));
|
|
||||||
assert_eq!(str_prefix_fixed_width(26, 2).as_deref(), Some("ba"));
|
|
||||||
assert_eq!(
|
|
||||||
str_prefix_fixed_width(26 * 26 - 1, 2).as_deref(),
|
|
||||||
Some("zz")
|
|
||||||
);
|
|
||||||
assert_eq!(str_prefix_fixed_width(26 * 26, 2).as_deref(), None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_num_prefix_fixed_width() {
|
|
||||||
assert_eq!(num_prefix_fixed_width(0, 2).as_deref(), Some("00"));
|
|
||||||
assert_eq!(num_prefix_fixed_width(1, 2).as_deref(), Some("01"));
|
|
||||||
assert_eq!(num_prefix_fixed_width(99, 2).as_deref(), Some("99"));
|
|
||||||
assert_eq!(num_prefix_fixed_width(100, 2).as_deref(), None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_alphabetic_suffix() {
|
|
||||||
let factory = FilenameFactory::new("123", "789", 3, false);
|
|
||||||
assert_eq!(factory.make(0).unwrap(), "123aaa789");
|
|
||||||
assert_eq!(factory.make(1).unwrap(), "123aab789");
|
|
||||||
assert_eq!(factory.make(28).unwrap(), "123abc789");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_numeric_suffix() {
|
|
||||||
let factory = FilenameFactory::new("abc", "xyz", 3, true);
|
|
||||||
assert_eq!(factory.make(0).unwrap(), "abc000xyz");
|
|
||||||
assert_eq!(factory.make(1).unwrap(), "abc001xyz");
|
|
||||||
assert_eq!(factory.make(123).unwrap(), "abc123xyz");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
513
src/uu/split/src/number.rs
Normal file
513
src/uu/split/src/number.rs
Normal file
|
@ -0,0 +1,513 @@
|
||||||
|
// * This file is part of the uutils coreutils package.
|
||||||
|
// *
|
||||||
|
// * For the full copyright and license information, please view the LICENSE
|
||||||
|
// * file that was distributed with this source code.
|
||||||
|
// spell-checker:ignore zaaa zaab
|
||||||
|
//! A number in arbitrary radix expressed in a positional notation.
|
||||||
|
//!
|
||||||
|
//! Use the [`Number`] enum to represent an arbitrary number in an
|
||||||
|
//! arbitrary radix. A number can be incremented and can be
|
||||||
|
//! displayed. See the [`Number`] documentation for more information.
|
||||||
|
//!
|
||||||
|
//! See the Wikipedia articles on [radix] and [positional notation]
|
||||||
|
//! for more background information on those topics.
|
||||||
|
//!
|
||||||
|
//! [radix]: https://en.wikipedia.org/wiki/Radix
|
||||||
|
//! [positional notation]: https://en.wikipedia.org/wiki/Positional_notation
|
||||||
|
use std::error::Error;
|
||||||
|
use std::fmt::{self, Display, Formatter};
|
||||||
|
|
||||||
|
/// An overflow due to incrementing a number beyond its representable limit.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Overflow;
|
||||||
|
|
||||||
|
impl fmt::Display for Overflow {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "Overflow")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for Overflow {}
|
||||||
|
|
||||||
|
/// A number in arbitrary radix expressed in a positional notation.
|
||||||
|
///
|
||||||
|
/// Use the [`Number`] enum to represent an arbitrary number in an
|
||||||
|
/// arbitrary radix. A number can be incremented with
|
||||||
|
/// [`Number::increment`]. The [`FixedWidthNumber`] overflows when
|
||||||
|
/// attempting to increment it beyond the maximum number that can be
|
||||||
|
/// represented in the specified width. The [`DynamicWidthNumber`]
|
||||||
|
/// follows a non-standard incrementing procedure that is used
|
||||||
|
/// specifically for the `split` program. See the
|
||||||
|
/// [`DynamicWidthNumber`] documentation for more information.
|
||||||
|
///
|
||||||
|
/// Numbers of radix 10 are displayable and rendered as decimal
|
||||||
|
/// numbers (for example, "00" or "917"). Numbers of radix 26 are
|
||||||
|
/// displayable and rendered as lowercase ASCII alphabetic characters
|
||||||
|
/// (for example, "aa" or "zax"). Numbers of other radices cannot be
|
||||||
|
/// displayed. The display of a [`DynamicWidthNumber`] includes a
|
||||||
|
/// prefix whose length depends on the width of the number. See the
|
||||||
|
/// [`DynamicWidthNumber`] documentation for more information.
|
||||||
|
///
|
||||||
|
/// The digits of a number are accessible via the [`Number::digits`]
|
||||||
|
/// method. The digits are represented as a [`Vec<u8>`] with the most
|
||||||
|
/// significant digit on the left and the least significant digit on
|
||||||
|
/// the right. Each digit is a nonnegative integer less than the
|
||||||
|
/// radix. For example, if the radix is 3, then `vec![1, 0, 2]`
|
||||||
|
/// represents the decimal number 11:
|
||||||
|
///
|
||||||
|
/// ```ignore
|
||||||
|
/// 1 * 3^2 + 0 * 3^1 + 2 * 3^0 = 9 + 0 + 2 = 11
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// For the [`DynamicWidthNumber`], the digits are not unique in the
|
||||||
|
/// sense that repeatedly incrementing the number will eventually
|
||||||
|
/// yield `vec![0, 0]`, `vec![0, 0, 0], `vec![0, 0, 0, 0]`, etc.
|
||||||
|
/// That's okay because each of these numbers will be displayed
|
||||||
|
/// differently and we only intend to use these numbers for display
|
||||||
|
/// purposes and not for mathematical purposes.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum Number {
|
||||||
|
/// A fixed-width representation of a number.
|
||||||
|
FixedWidth(FixedWidthNumber),
|
||||||
|
|
||||||
|
/// A representation of a number with a dynamically growing width.
|
||||||
|
DynamicWidth(DynamicWidthNumber),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Number {
|
||||||
|
/// The digits of this number in decreasing order of significance.
|
||||||
|
///
|
||||||
|
/// The digits are represented as a [`Vec<u8>`] with the most
|
||||||
|
/// significant digit on the left and the least significant digit
|
||||||
|
/// on the right. Each digit is a nonnegative integer less than
|
||||||
|
/// the radix. For example, if the radix is 3, then `vec![1, 0,
|
||||||
|
/// 2]` represents the decimal number 11:
|
||||||
|
///
|
||||||
|
/// ```ignore
|
||||||
|
/// 1 * 3^2 + 0 * 3^1 + 2 * 3^0 = 9 + 0 + 2 = 11
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// For the [`DynamicWidthNumber`], the digits are not unique in the
|
||||||
|
/// sense that repeatedly incrementing the number will eventually
|
||||||
|
/// yield `vec![0, 0]`, `vec![0, 0, 0], `vec![0, 0, 0, 0]`, etc.
|
||||||
|
/// That's okay because each of these numbers will be displayed
|
||||||
|
/// differently and we only intend to use these numbers for display
|
||||||
|
/// purposes and not for mathematical purposes.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn digits(&self) -> &Vec<u8> {
|
||||||
|
match self {
|
||||||
|
Number::FixedWidth(number) => &number.digits,
|
||||||
|
Number::DynamicWidth(number) => &number.digits,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Increment this number to its successor.
|
||||||
|
///
|
||||||
|
/// If incrementing this number would result in an overflow beyond
|
||||||
|
/// the maximum representable number, then return
|
||||||
|
/// [`Err(Overflow)`]. The [`FixedWidthNumber`] overflows, but
|
||||||
|
/// [`DynamicWidthNumber`] does not.
|
||||||
|
///
|
||||||
|
/// The [`DynamicWidthNumber`] follows a non-standard incrementing
|
||||||
|
/// procedure that is used specifically for the `split` program.
|
||||||
|
/// See the [`DynamicWidthNumber`] documentation for more
|
||||||
|
/// information.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// This method returns [`Err(Overflow)`] when attempting to
|
||||||
|
/// increment beyond the largest representable number.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// Overflowing:
|
||||||
|
///
|
||||||
|
/// ```rust,ignore
|
||||||
|
///
|
||||||
|
/// use crate::number::FixedWidthNumber;
|
||||||
|
/// use crate::number::Number;
|
||||||
|
/// use crate::number::Overflow;
|
||||||
|
///
|
||||||
|
/// // Radix 3, width of 1 digit.
|
||||||
|
/// let mut number = Number::FixedWidth(FixedWidthNumber::new(3, 1));
|
||||||
|
/// number.increment().unwrap(); // from 0 to 1
|
||||||
|
/// number.increment().unwrap(); // from 1 to 2
|
||||||
|
/// assert!(number.increment().is_err());
|
||||||
|
/// ```
|
||||||
|
pub fn increment(&mut self) -> Result<(), Overflow> {
|
||||||
|
match self {
|
||||||
|
Number::FixedWidth(number) => number.increment(),
|
||||||
|
Number::DynamicWidth(number) => number.increment(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for Number {
|
||||||
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Number::FixedWidth(number) => number.fmt(f),
|
||||||
|
Number::DynamicWidth(number) => number.fmt(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A positional notation representation of a fixed-width number.
|
||||||
|
///
|
||||||
|
/// The digits are represented as a [`Vec<u8>`] with the most
|
||||||
|
/// significant digit on the left and the least significant digit on
|
||||||
|
/// the right. Each digit is a nonnegative integer less than the
|
||||||
|
/// radix.
|
||||||
|
///
|
||||||
|
/// # Incrementing
|
||||||
|
///
|
||||||
|
/// This number starts at `vec![0; width]`, representing the number 0
|
||||||
|
/// width the specified number of digits. Incrementing this number
|
||||||
|
/// with [`Number::increment`] causes it to increase its value by 1 in
|
||||||
|
/// the usual sense. If the digits are `vec![radix - 1; width]`, then
|
||||||
|
/// an overflow would occur and the [`Number::increment`] method
|
||||||
|
/// returns an error.
|
||||||
|
///
|
||||||
|
/// # Displaying
|
||||||
|
///
|
||||||
|
/// This number is only displayable if `radix` is 10 or `radix` is
|
||||||
|
/// 26. If `radix` is 10, then the digits are concatenated and
|
||||||
|
/// displayed as a fixed-width decimal number. If `radix` is 26, then
|
||||||
|
/// each digit is translated to the corresponding lowercase ASCII
|
||||||
|
/// alphabetic character (that is, 'a', 'b', 'c', etc.) and
|
||||||
|
/// concatenated.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct FixedWidthNumber {
|
||||||
|
radix: u8,
|
||||||
|
digits: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FixedWidthNumber {
|
||||||
|
/// Instantiate a number of the given radix and width.
|
||||||
|
pub fn new(radix: u8, width: usize) -> FixedWidthNumber {
|
||||||
|
FixedWidthNumber {
|
||||||
|
radix,
|
||||||
|
digits: vec![0; width],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Increment this number.
|
||||||
|
///
|
||||||
|
/// This method adds one to this number. If incrementing this
|
||||||
|
/// number would require more digits than are available with the
|
||||||
|
/// specified width, then this method returns [`Err(Overflow)`].
|
||||||
|
fn increment(&mut self) -> Result<(), Overflow> {
|
||||||
|
for i in (0..self.digits.len()).rev() {
|
||||||
|
// Increment the current digit.
|
||||||
|
self.digits[i] += 1;
|
||||||
|
|
||||||
|
// If the digit overflows, then set it to 0 and continue
|
||||||
|
// to the next iteration to increment the next most
|
||||||
|
// significant digit. Otherwise, terminate the loop, since
|
||||||
|
// there will be no further changes to any higher order
|
||||||
|
// digits.
|
||||||
|
if self.digits[i] == self.radix {
|
||||||
|
self.digits[i] = 0;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return an error on overflow, which is signified by all zeros.
|
||||||
|
if self.digits == vec![0; self.digits.len()] {
|
||||||
|
Err(Overflow)
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for FixedWidthNumber {
|
||||||
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||||
|
match self.radix {
|
||||||
|
10 => {
|
||||||
|
let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect();
|
||||||
|
write!(f, "{}", digits)
|
||||||
|
}
|
||||||
|
26 => {
|
||||||
|
let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect();
|
||||||
|
write!(f, "{}", digits)
|
||||||
|
}
|
||||||
|
_ => Err(fmt::Error),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A positional notation representation of a number of dynamically growing width.
|
||||||
|
///
|
||||||
|
/// The digits are represented as a [`Vec<u8>`] with the most
|
||||||
|
/// significant digit on the left and the least significant digit on
|
||||||
|
/// the right. Each digit is a nonnegative integer less than the
|
||||||
|
/// radix.
|
||||||
|
///
|
||||||
|
/// # Incrementing
|
||||||
|
///
|
||||||
|
/// This number starts at `vec![0, 0]`, representing the number 0 with
|
||||||
|
/// a width of 2 digits. Incrementing this number with
|
||||||
|
/// [`Number::increment`] causes it to increase its value by 1. When
|
||||||
|
/// incrementing the number would have caused it to change from
|
||||||
|
/// `vec![radix - 2, radix - 1]` to `vec![radix - 1, 0]`, it instead
|
||||||
|
/// increases its width by one and resets its value to 0. For example,
|
||||||
|
/// if the radix were 3, the digits were `vec![1, 2]`, and we called
|
||||||
|
/// [`Number::increment`], then the digits would become `vec![0, 0,
|
||||||
|
/// 0]`. In this way, the width grows by one each time the most
|
||||||
|
/// significant digit would have achieved its maximum value.
|
||||||
|
///
|
||||||
|
/// This notion of "incrementing" here does not match the notion of
|
||||||
|
/// incrementing the *value* of the number, it is just an abstract way
|
||||||
|
/// of updating the representation of the number in a way that is only
|
||||||
|
/// useful for the purposes of the `split` program.
|
||||||
|
///
|
||||||
|
/// # Displaying
|
||||||
|
///
|
||||||
|
/// This number is only displayable if `radix` is 10 or `radix` is
|
||||||
|
/// 26. If `radix` is 10, then the digits are concatenated and
|
||||||
|
/// displayed as a fixed-width decimal number with a prefix of `n - 2`
|
||||||
|
/// instances of the character '9', where `n` is the number of digits.
|
||||||
|
/// If `radix` is 26, then each digit is translated to the
|
||||||
|
/// corresponding lowercase ASCII alphabetic character (that is, 'a',
|
||||||
|
/// 'b', 'c', etc.) and concatenated with a prefix of `n - 2`
|
||||||
|
/// instances of the character 'z'.
|
||||||
|
///
|
||||||
|
/// This notion of displaying the number is specific to the `split`
|
||||||
|
/// program.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct DynamicWidthNumber {
|
||||||
|
radix: u8,
|
||||||
|
digits: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DynamicWidthNumber {
|
||||||
|
/// Instantiate a number of the given radix, starting with width 2.
|
||||||
|
///
|
||||||
|
/// This associated function returns a new instance of the struct
|
||||||
|
/// with the given radix and a width of two digits, both 0.
|
||||||
|
pub fn new(radix: u8) -> DynamicWidthNumber {
|
||||||
|
DynamicWidthNumber {
|
||||||
|
radix,
|
||||||
|
digits: vec![0, 0],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set all digits to zero.
|
||||||
|
fn reset(&mut self) {
|
||||||
|
for i in 0..self.digits.len() {
|
||||||
|
self.digits[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Increment this number.
|
||||||
|
///
|
||||||
|
/// This method adds one to this number. The first time that the
|
||||||
|
/// most significant digit would achieve its highest possible
|
||||||
|
/// value (that is, `radix - 1`), then all the digits get reset to
|
||||||
|
/// 0 and the number of digits increases by one.
|
||||||
|
///
|
||||||
|
/// This method never returns an error.
|
||||||
|
fn increment(&mut self) -> Result<(), Overflow> {
|
||||||
|
for i in (0..self.digits.len()).rev() {
|
||||||
|
// Increment the current digit.
|
||||||
|
self.digits[i] += 1;
|
||||||
|
|
||||||
|
// If the digit overflows, then set it to 0 and continue
|
||||||
|
// to the next iteration to increment the next most
|
||||||
|
// significant digit. Otherwise, terminate the loop, since
|
||||||
|
// there will be no further changes to any higher order
|
||||||
|
// digits.
|
||||||
|
if self.digits[i] == self.radix {
|
||||||
|
self.digits[i] = 0;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the most significant digit is at its maximum value, then
|
||||||
|
// add another digit and reset all digits zero.
|
||||||
|
if self.digits[0] == self.radix - 1 {
|
||||||
|
self.digits.push(0);
|
||||||
|
self.reset();
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for DynamicWidthNumber {
|
||||||
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||||
|
match self.radix {
|
||||||
|
10 => {
|
||||||
|
let num_fill_chars = self.digits.len() - 2;
|
||||||
|
let digits: String = self.digits.iter().map(|d| (b'0' + d) as char).collect();
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{empty:9<num_fill_chars$}{digits}",
|
||||||
|
empty = "",
|
||||||
|
num_fill_chars = num_fill_chars,
|
||||||
|
digits = digits,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
26 => {
|
||||||
|
let num_fill_chars = self.digits.len() - 2;
|
||||||
|
let digits: String = self.digits.iter().map(|d| (b'a' + d) as char).collect();
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{empty:z<num_fill_chars$}{digits}",
|
||||||
|
empty = "",
|
||||||
|
num_fill_chars = num_fill_chars,
|
||||||
|
digits = digits,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
_ => Err(fmt::Error),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::number::DynamicWidthNumber;
|
||||||
|
use crate::number::FixedWidthNumber;
|
||||||
|
use crate::number::Number;
|
||||||
|
use crate::number::Overflow;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dynamic_width_number_increment() {
|
||||||
|
let mut n = Number::DynamicWidth(DynamicWidthNumber::new(3));
|
||||||
|
assert_eq!(n.digits(), &vec![0, 0]);
|
||||||
|
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![0, 1]);
|
||||||
|
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![0, 2]);
|
||||||
|
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![1, 0]);
|
||||||
|
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![1, 1]);
|
||||||
|
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![1, 2]);
|
||||||
|
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![0, 0, 0]);
|
||||||
|
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![0, 0, 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dynamic_width_number_display_alphabetic() {
|
||||||
|
fn num(n: usize) -> Number {
|
||||||
|
let mut number = Number::DynamicWidth(DynamicWidthNumber::new(26));
|
||||||
|
for _ in 0..n {
|
||||||
|
number.increment().unwrap()
|
||||||
|
}
|
||||||
|
number
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(format!("{}", num(0)), "aa");
|
||||||
|
assert_eq!(format!("{}", num(1)), "ab");
|
||||||
|
assert_eq!(format!("{}", num(2)), "ac");
|
||||||
|
assert_eq!(format!("{}", num(25)), "az");
|
||||||
|
assert_eq!(format!("{}", num(26)), "ba");
|
||||||
|
assert_eq!(format!("{}", num(27)), "bb");
|
||||||
|
assert_eq!(format!("{}", num(28)), "bc");
|
||||||
|
assert_eq!(format!("{}", num(26 + 25)), "bz");
|
||||||
|
assert_eq!(format!("{}", num(26 + 26)), "ca");
|
||||||
|
assert_eq!(format!("{}", num(26 * 25 - 1)), "yz");
|
||||||
|
assert_eq!(format!("{}", num(26 * 25)), "zaaa");
|
||||||
|
assert_eq!(format!("{}", num(26 * 25 + 1)), "zaab");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_dynamic_width_number_display_numeric() {
|
||||||
|
fn num(n: usize) -> Number {
|
||||||
|
let mut number = Number::DynamicWidth(DynamicWidthNumber::new(10));
|
||||||
|
for _ in 0..n {
|
||||||
|
number.increment().unwrap()
|
||||||
|
}
|
||||||
|
number
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(format!("{}", num(0)), "00");
|
||||||
|
assert_eq!(format!("{}", num(9)), "09");
|
||||||
|
assert_eq!(format!("{}", num(17)), "17");
|
||||||
|
assert_eq!(format!("{}", num(10 * 9 - 1)), "89");
|
||||||
|
assert_eq!(format!("{}", num(10 * 9)), "9000");
|
||||||
|
assert_eq!(format!("{}", num(10 * 9 + 1)), "9001");
|
||||||
|
assert_eq!(format!("{}", num(10 * 99 - 1)), "9899");
|
||||||
|
assert_eq!(format!("{}", num(10 * 99)), "990000");
|
||||||
|
assert_eq!(format!("{}", num(10 * 99 + 1)), "990001");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fixed_width_number_increment() {
|
||||||
|
let mut n = Number::FixedWidth(FixedWidthNumber::new(3, 2));
|
||||||
|
assert_eq!(n.digits(), &vec![0, 0]);
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![0, 1]);
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![0, 2]);
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![1, 0]);
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![1, 1]);
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![1, 2]);
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![2, 0]);
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![2, 1]);
|
||||||
|
n.increment().unwrap();
|
||||||
|
assert_eq!(n.digits(), &vec![2, 2]);
|
||||||
|
assert!(n.increment().is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fixed_width_number_display_alphabetic() {
|
||||||
|
fn num(n: usize) -> Result<Number, Overflow> {
|
||||||
|
let mut number = Number::FixedWidth(FixedWidthNumber::new(26, 2));
|
||||||
|
for _ in 0..n {
|
||||||
|
number.increment()?;
|
||||||
|
}
|
||||||
|
Ok(number)
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(format!("{}", num(0).unwrap()), "aa");
|
||||||
|
assert_eq!(format!("{}", num(1).unwrap()), "ab");
|
||||||
|
assert_eq!(format!("{}", num(2).unwrap()), "ac");
|
||||||
|
assert_eq!(format!("{}", num(25).unwrap()), "az");
|
||||||
|
assert_eq!(format!("{}", num(26).unwrap()), "ba");
|
||||||
|
assert_eq!(format!("{}", num(27).unwrap()), "bb");
|
||||||
|
assert_eq!(format!("{}", num(28).unwrap()), "bc");
|
||||||
|
assert_eq!(format!("{}", num(26 + 25).unwrap()), "bz");
|
||||||
|
assert_eq!(format!("{}", num(26 + 26).unwrap()), "ca");
|
||||||
|
assert_eq!(format!("{}", num(26 * 25 - 1).unwrap()), "yz");
|
||||||
|
assert_eq!(format!("{}", num(26 * 25).unwrap()), "za");
|
||||||
|
assert_eq!(format!("{}", num(26 * 26 - 1).unwrap()), "zz");
|
||||||
|
assert!(num(26 * 26).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fixed_width_number_display_numeric() {
|
||||||
|
fn num(n: usize) -> Result<Number, Overflow> {
|
||||||
|
let mut number = Number::FixedWidth(FixedWidthNumber::new(10, 2));
|
||||||
|
for _ in 0..n {
|
||||||
|
number.increment()?;
|
||||||
|
}
|
||||||
|
Ok(number)
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(format!("{}", num(0).unwrap()), "00");
|
||||||
|
assert_eq!(format!("{}", num(9).unwrap()), "09");
|
||||||
|
assert_eq!(format!("{}", num(17).unwrap()), "17");
|
||||||
|
assert_eq!(format!("{}", num(10 * 9 - 1).unwrap()), "89");
|
||||||
|
assert_eq!(format!("{}", num(10 * 9).unwrap()), "90");
|
||||||
|
assert_eq!(format!("{}", num(10 * 10 - 1).unwrap()), "99");
|
||||||
|
assert!(num(10 * 10).is_err());
|
||||||
|
}
|
||||||
|
}
|
|
@ -8,9 +8,10 @@
|
||||||
// spell-checker:ignore (ToDO) PREFIXaa
|
// spell-checker:ignore (ToDO) PREFIXaa
|
||||||
|
|
||||||
mod filenames;
|
mod filenames;
|
||||||
|
mod number;
|
||||||
mod platform;
|
mod platform;
|
||||||
|
|
||||||
use crate::filenames::FilenameFactory;
|
use crate::filenames::FilenameIterator;
|
||||||
use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
|
use clap::{crate_version, App, AppSettings, Arg, ArgMatches};
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
use std::env;
|
use std::env;
|
||||||
|
@ -384,7 +385,7 @@ where
|
||||||
let chunk_size = (num_bytes / (num_chunks as u64)) as usize;
|
let chunk_size = (num_bytes / (num_chunks as u64)) as usize;
|
||||||
|
|
||||||
// This object is responsible for creating the filename for each chunk.
|
// This object is responsible for creating the filename for each chunk.
|
||||||
let filename_factory = FilenameFactory::new(
|
let mut filename_iterator = FilenameIterator::new(
|
||||||
&settings.prefix,
|
&settings.prefix,
|
||||||
&settings.additional_suffix,
|
&settings.additional_suffix,
|
||||||
settings.suffix_length,
|
settings.suffix_length,
|
||||||
|
@ -394,9 +395,9 @@ where
|
||||||
// Create one writer for each chunk. This will create each
|
// Create one writer for each chunk. This will create each
|
||||||
// of the underlying files (if not in `--filter` mode).
|
// of the underlying files (if not in `--filter` mode).
|
||||||
let mut writers = vec![];
|
let mut writers = vec![];
|
||||||
for i in 0..num_chunks {
|
for _ in 0..num_chunks {
|
||||||
let filename = filename_factory
|
let filename = filename_iterator
|
||||||
.make(i)
|
.next()
|
||||||
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||||
let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
let writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
||||||
writers.push(writer);
|
writers.push(writer);
|
||||||
|
@ -462,17 +463,16 @@ fn split(settings: &Settings) -> UResult<()> {
|
||||||
};
|
};
|
||||||
|
|
||||||
// This object is responsible for creating the filename for each chunk.
|
// This object is responsible for creating the filename for each chunk.
|
||||||
let filename_factory = FilenameFactory::new(
|
let mut filename_iterator = FilenameIterator::new(
|
||||||
&settings.prefix,
|
&settings.prefix,
|
||||||
&settings.additional_suffix,
|
&settings.additional_suffix,
|
||||||
settings.suffix_length,
|
settings.suffix_length,
|
||||||
settings.numeric_suffix,
|
settings.numeric_suffix,
|
||||||
);
|
);
|
||||||
let mut fileno = 0;
|
|
||||||
loop {
|
loop {
|
||||||
// Get a new part file set up, and construct `writer` for it.
|
// Get a new part file set up, and construct `writer` for it.
|
||||||
let filename = filename_factory
|
let filename = filename_iterator
|
||||||
.make(fileno)
|
.next()
|
||||||
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
.ok_or_else(|| USimpleError::new(1, "output file suffixes exhausted"))?;
|
||||||
let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
|
||||||
|
|
||||||
|
@ -509,8 +509,6 @@ fn split(settings: &Settings) -> UResult<()> {
|
||||||
if settings.verbose {
|
if settings.verbose {
|
||||||
println!("creating file {}", filename.quote());
|
println!("creating file {}", filename.quote());
|
||||||
}
|
}
|
||||||
|
|
||||||
fileno += 1;
|
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue