Add initial tests for split.

I created random data to test several cases. I verified that the data is split into the correct number of files and can also be reassembled into the original file.
2025-09-13 10:37:58 +00:00 · 2015-05-09 13:32:30 -04:00 · 2015-05-09 13:32:30 -04:00 · 234c81311f
commit 234c81311f
parent 39de3f7b71
3 changed files with 161 additions and 0 deletions
--- a/1
+++ b/1
@ -169,6 +169,7 @@ TEST_PROGS  := \
  paste \
  seq \
  sort \
+  split \
  test \
  tr \
  truncate \
--- a/src/split/deps.mk
+++ b/src/split/deps.mk
@ -0,0 +1 @@
+DEPLIBS += rand regex
--- a/test/split.rs
+++ b/test/split.rs
@ -0,0 +1,159 @@
+extern crate libc;
+extern crate rand;
+extern crate regex;
+
+use std::fs::{File, read_dir, remove_file};
+use std::io::{Read, Write};
+use std::path::Path;
+use std::process::Command;
+use rand::{Rng, thread_rng};
+use regex::Regex;
+
+static PROGNAME: &'static str = "./split";
+
+fn random_chars(n: usize) -> String {
+    thread_rng().gen_ascii_chars().take(n).collect::<String>()
+}
+
+fn get_file_contents(name: &str) -> Vec<u8> {
+    let mut f = File::open(Path::new(name)).unwrap();
+    let mut contents: Vec<u8> = vec!();
+    let _ = f.read_to_end(&mut contents);
+    contents
+}
+
+struct Glob {
+    directory: String,
+    regex: Regex
+}
+
+impl Glob {
+    fn new(directory: &str, regex: &str) -> Glob {
+        Glob {
+            directory: directory.to_string(),
+            regex: Regex::new(regex).unwrap()
+        }
+    }
+
+    fn count(&self) -> usize {
+        self.collect().len()
+    }
+
+    fn collect(&self) -> Vec<String> {
+        read_dir(Path::new(&self.directory)).unwrap().filter_map(|entry| {
+            let path = entry.unwrap().path();
+            let name = path.as_path().to_str().unwrap_or("");
+            if self.regex.is_match(name) { Some(name.to_string()) } else { None }
+        }).collect()
+    }
+
+    fn collate(&self) -> Vec<u8> {
+        let mut files = self.collect();
+        files.sort();
+        let mut data: Vec<u8> = vec!();
+        for name in files.iter() {
+            data.extend(get_file_contents(name));
+        }
+        data
+    }
+
+    fn remove_all(&self) {
+        for name in self.collect().iter() {
+            let _ = remove_file(name);
+        }
+    }
+}
+
+struct RandomFile {
+    inner: File
+}
+
+impl RandomFile {
+    fn new(name: &str) -> RandomFile {
+        RandomFile { inner: File::create(Path::new(name)).unwrap() }
+    }
+
+    fn add_bytes(&mut self, bytes: usize) {
+        let chunk_size: usize = if bytes >= 1024 { 1024 } else { bytes };
+        let mut n = bytes;
+        while n > chunk_size {
+            let _ = write!(self.inner, "{}", random_chars(chunk_size));
+            n -= chunk_size;
+        }
+        let _ = write!(self.inner, "{}", random_chars(n));
+    }
+
+    fn add_lines(&mut self, lines: usize) {
+        let line_size: usize = 32;
+        let mut n = lines;
+        while n > 0 {
+            let _ = writeln!(self.inner, "{}", random_chars(line_size));
+            n -= 1;
+        }
+    }
+}
+
+#[test]
+fn test_split_default() {
+    let name = "split_default";
+    let glob = Glob::new(".", r"x[:alpha:][:alpha:]$");
+    RandomFile::new(name).add_lines(2000);
+    if !Command::new(PROGNAME).args(&[name]).status().unwrap().success() {
+        panic!();
+    }
+    assert_eq!(glob.count(), 2);
+    assert_eq!(glob.collate(), get_file_contents(name));
+    glob.remove_all();
+}
+
+#[test]
+fn test_split_num_prefixed_chunks_by_bytes() {
+    let name = "split_num_prefixed_chunks_by_bytes";
+    let glob = Glob::new(".", r"x\d\d$");
+    RandomFile::new(name).add_bytes(10000);
+    if !Command::new(PROGNAME).args(&["-d", "-b", "1000", name]).status().unwrap().success() {
+        panic!();
+    }
+    assert_eq!(glob.count(), 10);
+    assert_eq!(glob.collate(), get_file_contents(name));
+    glob.remove_all();
+}
+
+#[test]
+fn test_split_str_prefixed_chunks_by_bytes() {
+    let name = "split_str_prefixed_chunks_by_bytes";
+    let glob = Glob::new(".", r"x[:alpha:][:alpha:]$");
+    RandomFile::new(name).add_bytes(10000);
+    if !Command::new(PROGNAME).args(&["-b", "1000", name]).status().unwrap().success() {
+        panic!();
+    }
+    assert_eq!(glob.count(), 10);
+    assert_eq!(glob.collate(), get_file_contents(name));
+    glob.remove_all();
+}
+
+#[test]
+fn test_split_num_prefixed_chunks_by_lines() {
+    let name = "split_num_prefixed_chunks_by_lines";
+    let glob = Glob::new(".", r"x\d\d$");
+    RandomFile::new(name).add_lines(10000);
+    if !Command::new(PROGNAME).args(&["-d", "-l", "1000", name]).status().unwrap().success() {
+        panic!();
+    }
+    assert_eq!(glob.count(), 10);
+    assert_eq!(glob.collate(), get_file_contents(name));
+    glob.remove_all();
+}
+
+#[test]
+fn test_split_str_prefixed_chunks_by_lines() {
+    let name = "split_str_prefixed_chunks_by_lines";
+    let glob = Glob::new(".", r"x[:alpha:][:alpha:]$");
+    RandomFile::new(name).add_lines(10000);
+    if !Command::new(PROGNAME).args(&["-l", "1000", name]).status().unwrap().success() {
+        panic!();
+    }
+    assert_eq!(glob.count(), 10);
+    assert_eq!(glob.collate(), get_file_contents(name));
+    glob.remove_all();
+}