1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 19:47:45 +00:00

split: correct filename creation algorithm

Fix two issues with the filename creation algorithm. First, this
corrects the behavior of the `-a` option. This commit ensures a
failure occurs when the number of chunks exceeds the number of
filenames representable with the specified fixed width:

    $ printf "%0.sa" {1..11} | split -d -b 1 -a 1
    split: output file suffixes exhausted

Second, this corrects the behavior of the default behavior when `-a`
is not specified on the command line. Previously, it was always
settings the filenames to have length 2 suffixes. This commit corrects
the behavior to follow the algorithm implied by GNU split, where the
filename lengths grow dynamically by two characters once the number of
chunks grows sufficiently large:

    $ printf "%0.sa" {1..91} | ./target/debug/coreutils split -d -b 1 \
    >   && ls x* | tail
    x81
    x82
    x83
    x84
    x85
    x86
    x87
    x88
    x89
    x9000
This commit is contained in:
Jeffrey Finkelstein 2022-01-01 13:47:11 -05:00
parent e5d6b7a1cf
commit cfe5a0d82c
6 changed files with 618 additions and 48 deletions

View file

@ -2,7 +2,7 @@
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore xzaaa sixhundredfiftyonebytes ninetyonebytes asciilowercase
extern crate rand;
extern crate regex;
@ -16,7 +16,7 @@ use std::io::Write;
use std::path::Path;
use std::{
fs::{read_dir, File},
io::BufWriter,
io::{BufWriter, Read},
};
fn random_chars(n: usize) -> String {
@ -340,3 +340,71 @@ fn test_split_invalid_bytes_size() {
}
}
}
fn file_read(at: &AtPath, filename: &str) -> String {
let mut s = String::new();
at.open(filename).read_to_string(&mut s).unwrap();
s
}
// TODO Use char::from_digit() in Rust v1.51.0 or later.
fn char_from_digit(n: usize) -> char {
(b'a' + n as u8) as char
}
/// Test for the default suffix length behavior: dynamically increasing size.
#[test]
fn test_alphabetic_dynamic_suffix_length() {
let (at, mut ucmd) = at_and_ucmd!();
// Split into chunks of one byte each.
//
// The input file has (26^2) - 26 + 1 = 651 bytes. This is just
// enough to force `split` to dynamically increase the length of
// the filename for the very last chunk.
//
// We expect the output files to be named
//
// xaa, xab, xac, ..., xyx, xyy, xyz, xzaaa
//
ucmd.args(&["-b", "1", "sixhundredfiftyonebytes.txt"])
.succeeds();
for i in 0..25 {
for j in 0..26 {
let filename = format!("x{}{}", char_from_digit(i), char_from_digit(j),);
let contents = file_read(&at, &filename);
assert_eq!(contents, "a");
}
}
assert_eq!(file_read(&at, "xzaaa"), "a");
}
/// Test for the default suffix length behavior: dynamically increasing size.
#[test]
fn test_numeric_dynamic_suffix_length() {
let (at, mut ucmd) = at_and_ucmd!();
// Split into chunks of one byte each, use numbers instead of
// letters as file suffixes.
//
// The input file has (10^2) - 10 + 1 = 91 bytes. This is just
// enough to force `split` to dynamically increase the length of
// the filename for the very last chunk.
//
// x00, x01, x02, ..., x87, x88, x89, x9000
//
ucmd.args(&["-d", "-b", "1", "ninetyonebytes.txt"])
.succeeds();
for i in 0..90 {
let filename = format!("x{:02}", i);
let contents = file_read(&at, &filename);
assert_eq!(contents, "a");
}
assert_eq!(file_read(&at, "x9000"), "a");
}
#[test]
fn test_suffixes_exhausted() {
new_ucmd!()
.args(&["-b", "1", "-a", "1", "asciilowercase.txt"])
.fails()
.stderr_only("split: output file suffixes exhausted");
}