Merge branch 'master' of https://github.com/uutils/coreutils into sort-optimize-line

2025-09-15 11:36:16 +00:00 · 2021-05-08 15:15:34 +02:00 · 2021-05-08 15:15:34 +02:00 · 1afeb55881
commit 1afeb55881
parent 8c9faa16b9 2ff9cc6570
16 changed files with 574 additions and 565 deletions
--- a/src/uu/chmod/src/chmod.rs
+++ b/src/uu/chmod/src/chmod.rs
@ -15,6 +15,7 @@ use std::fs;
 use std::os::unix::fs::{MetadataExt, PermissionsExt};
 use std::path::Path;
 use uucore::fs::display_permissions_unix;
+use uucore::libc::mode_t;
 #[cfg(not(windows))]
 use uucore::mode;
 use uucore::InvalidEncodingHandling;
@ -306,7 +307,7 @@ impl Chmoder {
                    "mode of '{}' retained as {:04o} ({})",
                    file.display(),
                    fperm,
-                    display_permissions_unix(fperm),
+                    display_permissions_unix(fperm as mode_t, false),
                );
            }
            Ok(())
@ -319,9 +320,9 @@ impl Chmoder {
                    "failed to change mode of file '{}' from {:o} ({}) to {:o} ({})",
                    file.display(),
                    fperm,
-                    display_permissions_unix(fperm),
+                    display_permissions_unix(fperm as mode_t, false),
                    mode,
-                    display_permissions_unix(mode)
+                    display_permissions_unix(mode as mode_t, false)
                );
            }
            Err(1)
@ -331,9 +332,9 @@ impl Chmoder {
                    "mode of '{}' changed from {:o} ({}) to {:o} ({})",
                    file.display(),
                    fperm,
-                    display_permissions_unix(fperm),
+                    display_permissions_unix(fperm as mode_t, false),
                    mode,
-                    display_permissions_unix(mode)
+                    display_permissions_unix(mode as mode_t, false)
                );
            }
            Ok(())
--- a/src/uu/ls/src/ls.rs
+++ b/src/uu/ls/src/ls.rs
@ -1480,9 +1480,8 @@ fn display_item_long(

    let _ = write!(
        out,
-        "{}{} {}",
-        display_file_type(md.file_type()),
-        display_permissions(&md),
+        "{} {}",
+        display_permissions(&md, true),
        pad_left(display_symlink_count(&md), max_links),
    );

@ -1668,16 +1667,6 @@ fn display_size(len: u64, config: &Config) -> String {
    }
 }

-fn display_file_type(file_type: FileType) -> char {
-    if file_type.is_dir() {
-        'd'
-    } else if file_type.is_symlink() {
-        'l'
-    } else {
-        '-'
-    }
-}
-
 #[cfg(unix)]
 fn file_is_executable(md: &Metadata) -> bool {
    // Mode always returns u32, but the flags might not be, based on the platform
--- a/src/uu/sort/src/external_sort/mod.rs
+++ b/src/uu/sort/src/external_sort/mod.rs
@ -1,91 +1,33 @@
-use std::cmp::Ordering;
-use std::collections::VecDeque;
-use std::fs::{File, OpenOptions};
-use std::io::SeekFrom;
-use std::io::{BufRead, BufReader, BufWriter, Seek, Write};
+use std::fs::OpenOptions;
+use std::io::{BufWriter, Write};
 use std::path::Path;

 use tempdir::TempDir;

+use crate::{file_to_lines_iter, FileMerger};
+
 use super::{GlobalSettings, Line};

 /// Iterator that provides sorted `T`s
-pub struct ExtSortedIterator {
-    buffers: Vec<VecDeque<Line>>,
-    chunk_offsets: Vec<u64>,
-    max_per_chunk: usize,
-    chunks: usize,
-    tmp_dir: TempDir,
-    settings: GlobalSettings,
-    failed: bool,
+pub struct ExtSortedIterator<'a> {
+    file_merger: FileMerger<'a>,
+    // Keep tmp_dir around, it is deleted when dropped.
+    _tmp_dir: TempDir,
 }

-impl Iterator for ExtSortedIterator {
+impl<'a> Iterator for ExtSortedIterator<'a> {
    type Item = Line;
-
-    /// # Errors
-    ///
-    /// This method can fail due to issues reading intermediate sorted chunks
-    /// from disk
    fn next(&mut self) -> Option<Self::Item> {
-        if self.failed {
-            return None;
-        }
-        // fill up any empty buffers
-        let mut empty = true;
-        for chunk_num in 0..self.chunks {
-            if self.buffers[chunk_num as usize].is_empty() {
-                let mut f = crash_if_err!(
-                    1,
-                    File::open(self.tmp_dir.path().join(chunk_num.to_string()))
-                );
-                crash_if_err!(1, f.seek(SeekFrom::Start(self.chunk_offsets[chunk_num])));
-                let bytes_read = fill_buff(
-                    &mut self.buffers[chunk_num as usize],
-                    f,
-                    self.max_per_chunk,
-                    &self.settings,
-                );
-                self.chunk_offsets[chunk_num as usize] += bytes_read as u64;
-                if !self.buffers[chunk_num as usize].is_empty() {
-                    empty = false;
-                }
-            } else {
-                empty = false;
-            }
-        }
-        if empty {
-            return None;
-        }
-
-        // find the next record to write
-        // check is_empty() before unwrap()ing
-        let mut idx = 0;
-        for chunk_num in 0..self.chunks as usize {
-            if !self.buffers[chunk_num].is_empty()
-                && (self.buffers[idx].is_empty()
-                    || super::compare_by(
-                        self.buffers[chunk_num].front().unwrap(),
-                        self.buffers[idx].front().unwrap(),
-                        &self.settings,
-                    ) == Ordering::Less)
-            {
-                idx = chunk_num;
-            }
-        }
-
-        // unwrap due to checks above
-        let r = self.buffers[idx].pop_front().unwrap();
-        Some(r)
+        self.file_merger.next()
    }
 }

 /// Sort (based on `compare`) the `T`s provided by `unsorted` and return an
 /// iterator
 ///
-/// # Errors
+/// # Panics
 ///
-/// This method can fail due to issues writing intermediate sorted chunks
+/// This method can panic due to issues writing intermediate sorted chunks
 /// to disk.
 pub fn ext_sort(
    unsorted: impl Iterator<Item = Line>,
@ -93,19 +35,12 @@ pub fn ext_sort(
 ) -> ExtSortedIterator {
    let tmp_dir = crash_if_err!(1, TempDir::new_in(&settings.tmp_dir, "uutils_sort"));

-    let mut iter = ExtSortedIterator {
-        buffers: Vec::new(),
-        chunk_offsets: Vec::new(),
-        max_per_chunk: 0,
-        chunks: 0,
-        tmp_dir,
-        settings: settings.clone(),
-        failed: false,
-    };
-
    let mut total_read = 0;
    let mut chunk = Vec::new();

+    let mut chunks_read = 0;
+    let mut file_merger = FileMerger::new(settings);
+
    // make the initial chunks on disk
    for seq in unsorted {
        let seq_size = seq.estimate_size();
@ -113,65 +48,35 @@ pub fn ext_sort(

        chunk.push(seq);

-        if total_read + chunk.len() * std::mem::size_of::<Line>() >= settings.buffer_size {
+        if total_read >= settings.buffer_size && chunk.len() >= 2 {
            super::sort_by(&mut chunk, &settings);
-            write_chunk(
-                settings,
-                &iter.tmp_dir.path().join(iter.chunks.to_string()),
-                &mut chunk,
-            );
+
+            let file_path = tmp_dir.path().join(chunks_read.to_string());
+            write_chunk(settings, &file_path, &mut chunk);
            chunk.clear();
            total_read = 0;
-            iter.chunks += 1;
+            chunks_read += 1;
+
+            file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap()))
        }
    }
    // write the last chunk
    if !chunk.is_empty() {
        super::sort_by(&mut chunk, &settings);
+
+        let file_path = tmp_dir.path().join(chunks_read.to_string());
        write_chunk(
            settings,
-            &iter.tmp_dir.path().join(iter.chunks.to_string()),
+            &tmp_dir.path().join(chunks_read.to_string()),
            &mut chunk,
        );
-        iter.chunks += 1;
+
+        file_merger.push_file(Box::new(file_to_lines_iter(file_path, settings).unwrap()));
    }
-
-    // We manually drop here to not go over our memory limit when we allocate below.
-    drop(chunk);
-
-    // initialize buffers for each chunk
-    //
-    // Having a right sized buffer for each chunk for smallish values seems silly to me?
-    //
-    // We will have to have the entire iter in memory sometime right?
-    // Set minimum to the size of the writer buffer, ~8K
-
-    const MINIMUM_READBACK_BUFFER: usize = 8200;
-    let right_sized_buffer = settings
-        .buffer_size
-        .checked_div(iter.chunks)
-        .unwrap_or(settings.buffer_size);
-    iter.max_per_chunk = if right_sized_buffer > MINIMUM_READBACK_BUFFER {
-        right_sized_buffer
-    } else {
-        MINIMUM_READBACK_BUFFER
-    };
-    iter.buffers = vec![VecDeque::new(); iter.chunks];
-    iter.chunk_offsets = vec![0; iter.chunks];
-    for chunk_num in 0..iter.chunks {
-        let offset = fill_buff(
-            &mut iter.buffers[chunk_num],
-            crash_if_err!(
-                1,
-                File::open(iter.tmp_dir.path().join(chunk_num.to_string()))
-            ),
-            iter.max_per_chunk,
-            &settings,
-        );
-        iter.chunk_offsets[chunk_num] = offset as u64;
+    ExtSortedIterator {
+        file_merger,
+        _tmp_dir: tmp_dir,
    }
-
-    iter
 }

 fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec<Line>) {
@ -186,29 +91,3 @@ fn write_chunk(settings: &GlobalSettings, file: &Path, chunk: &mut Vec<Line>) {
    }
    crash_if_err!(1, buf_write.flush());
 }
-
-fn fill_buff(
-    vec: &mut VecDeque<Line>,
-    file: File,
-    max_bytes: usize,
-    settings: &GlobalSettings,
-) -> usize {
-    let mut total_read = 0;
-    let mut bytes_read = 0;
-    for line in BufReader::new(file).split(if settings.zero_terminated {
-        b'\0'
-    } else {
-        b'\n'
-    }) {
-        let line_s = String::from_utf8(crash_if_err!(1, line)).unwrap();
-        bytes_read += line_s.len() + 1;
-        let deserialized = Line::new(line_s, settings);
-        total_read += deserialized.estimate_size();
-        vec.push_back(deserialized);
-        if total_read > max_bytes {
-            break;
-        }
-    }
-
-    bytes_read
-}
--- a/src/uu/sort/src/sort.rs
+++ b/src/uu/sort/src/sort.rs
@ -32,6 +32,7 @@ use semver::Version;
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;
 use std::env;
+use std::ffi::OsStr;
 use std::fs::File;
 use std::hash::{Hash, Hasher};
 use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
@ -1122,10 +1123,10 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
    exec(files, settings)
 }

-fn file_to_lines_iter<'a>(
-    file: &str,
-    settings: &'a GlobalSettings,
-) -> Option<impl Iterator<Item = Line> + 'a> {
+fn file_to_lines_iter(
+    file: impl AsRef<OsStr>,
+    settings: &'_ GlobalSettings,
+) -> Option<impl Iterator<Item = Line> + '_> {
    let (reader, _) = match open(file) {
        Some(x) => x,
        None => return None,
@ -1190,7 +1191,7 @@ fn exec(files: Vec<String>, settings: GlobalSettings) -> i32 {
            let mut lines = vec![];

            // This is duplicated from fn file_to_lines_iter, but using that function directly results in a performance regression.
-            for (file, _) in files.iter().map(|file| open(file)).flatten() {
+            for (file, _) in files.iter().map(open).flatten() {
                let buf_reader = BufReader::new(file);
                for line in buf_reader.split(if settings.zero_terminated {
                    b'\0'
@ -1517,7 +1518,8 @@ fn print_sorted<T: Iterator<Item = Line>>(iter: T, settings: &GlobalSettings) {
 }

 // from cat.rs
-fn open(path: &str) -> Option<(Box<dyn Read>, bool)> {
+fn open(path: impl AsRef<OsStr>) -> Option<(Box<dyn Read>, bool)> {
+    let path = path.as_ref();
    if path == "-" {
        let stdin = stdin();
        return Some((Box::new(stdin) as Box<dyn Read>, is_stdin_interactive()));
@ -1526,7 +1528,7 @@ fn open(path: &str) -> Option<(Box<dyn Read>, bool)> {
    match File::open(Path::new(path)) {
        Ok(f) => Some((Box::new(f) as Box<dyn Read>, false)),
        Err(e) => {
-            show_error!("{0}: {1}", path, e.to_string());
+            show_error!("{0:?}: {1}", path, e.to_string());
            None
        }
    }
--- a/src/uu/split/src/split.rs
+++ b/src/uu/split/src/split.rs
@ -13,11 +13,11 @@ extern crate uucore;
 mod platform;

 use clap::{App, Arg};
-use std::char;
 use std::env;
 use std::fs::File;
-use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
+use std::io::{stdin, BufRead, BufReader, BufWriter, Read, Write};
 use std::path::Path;
+use std::{char, fs::remove_file};

 static NAME: &str = "split";
 static VERSION: &str = env!("CARGO_PKG_VERSION");
@ -213,107 +213,145 @@ struct Settings {
    verbose: bool,
 }

-struct SplitControl {
-    current_line: String,   // Don't touch
-    request_new_file: bool, // Splitter implementation requests new file
-}
-
 trait Splitter {
-    // Consume the current_line and return the consumed string
-    fn consume(&mut self, _: &mut SplitControl) -> String;
+    // Consume as much as possible from `reader` so as to saturate `writer`.
+    // Equivalent to finishing one of the part files. Returns the number of
+    // bytes that have been moved.
+    fn consume(
+        &mut self,
+        reader: &mut BufReader<Box<dyn Read>>,
+        writer: &mut BufWriter<Box<dyn Write>>,
+    ) -> u128;
 }

 struct LineSplitter {
-    saved_lines_to_write: usize,
-    lines_to_write: usize,
+    lines_per_split: usize,
 }

 impl LineSplitter {
    fn new(settings: &Settings) -> LineSplitter {
-        let n = match settings.strategy_param.parse() {
-            Ok(a) => a,
-            Err(e) => crash!(1, "invalid number of lines: {}", e),
-        };
        LineSplitter {
-            saved_lines_to_write: n,
-            lines_to_write: n,
+            lines_per_split: settings
+                .strategy_param
+                .parse()
+                .unwrap_or_else(|e| crash!(1, "invalid number of lines: {}", e)),
        }
    }
 }

 impl Splitter for LineSplitter {
-    fn consume(&mut self, control: &mut SplitControl) -> String {
-        self.lines_to_write -= 1;
-        if self.lines_to_write == 0 {
-            self.lines_to_write = self.saved_lines_to_write;
-            control.request_new_file = true;
+    fn consume(
+        &mut self,
+        reader: &mut BufReader<Box<dyn Read>>,
+        writer: &mut BufWriter<Box<dyn Write>>,
+    ) -> u128 {
+        let mut bytes_consumed = 0u128;
+        let mut buffer = String::with_capacity(1024);
+        for _ in 0..self.lines_per_split {
+            let bytes_read = reader
+                .read_line(&mut buffer)
+                .unwrap_or_else(|_| crash!(1, "error reading bytes from input file"));
+            // If we ever read 0 bytes then we know we've hit EOF.
+            if bytes_read == 0 {
+                return bytes_consumed;
+            }
+
+            writer
+                .write_all(buffer.as_bytes())
+                .unwrap_or_else(|_| crash!(1, "error writing bytes to output file"));
+            // Empty out the String buffer since `read_line` appends instead of
+            // replaces.
+            buffer.clear();
+
+            bytes_consumed += bytes_read as u128;
        }
-        control.current_line.clone()
+
+        bytes_consumed
    }
 }

 struct ByteSplitter {
-    saved_bytes_to_write: usize,
-    bytes_to_write: usize,
-    break_on_line_end: bool,
-    require_whole_line: bool,
+    bytes_per_split: u128,
 }

 impl ByteSplitter {
    fn new(settings: &Settings) -> ByteSplitter {
-        let mut strategy_param: Vec<char> = settings.strategy_param.chars().collect();
-        let suffix = strategy_param.pop().unwrap();
-        let multiplier = match suffix {
-            '0'..='9' => 1usize,
-            'b' => 512usize,
-            'k' => 1024usize,
-            'm' => 1024usize * 1024usize,
-            _ => crash!(1, "invalid number of bytes"),
-        };
-        let n = if suffix.is_alphabetic() {
-            match strategy_param
-                .iter()
-                .cloned()
-                .collect::<String>()
-                .parse::<usize>()
-            {
-                Ok(a) => a,
-                Err(e) => crash!(1, "invalid number of bytes: {}", e),
-            }
-        } else {
-            match settings.strategy_param.parse::<usize>() {
-                Ok(a) => a,
-                Err(e) => crash!(1, "invalid number of bytes: {}", e),
-            }
-        };
+        // These multipliers are the same as supported by GNU coreutils.
+        let modifiers: Vec<(&str, u128)> = vec![
+            ("K", 1024u128),
+            ("M", 1024 * 1024),
+            ("G", 1024 * 1024 * 1024),
+            ("T", 1024 * 1024 * 1024 * 1024),
+            ("P", 1024 * 1024 * 1024 * 1024 * 1024),
+            ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
+            ("Z", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
+            ("Y", 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
+            ("KB", 1000),
+            ("MB", 1000 * 1000),
+            ("GB", 1000 * 1000 * 1000),
+            ("TB", 1000 * 1000 * 1000 * 1000),
+            ("PB", 1000 * 1000 * 1000 * 1000 * 1000),
+            ("EB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
+            ("ZB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
+            ("YB", 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000),
+        ];
+
+        // This sequential find is acceptable since none of the modifiers are
+        // suffixes of any other modifiers, a la Huffman codes.
+        let (suffix, multiplier) = modifiers
+            .iter()
+            .find(|(suffix, _)| settings.strategy_param.ends_with(suffix))
+            .unwrap_or(&("", 1));
+
+        // Try to parse the actual numeral.
+        let n = &settings.strategy_param[0..(settings.strategy_param.len() - suffix.len())]
+            .parse::<u128>()
+            .unwrap_or_else(|e| crash!(1, "invalid number of bytes: {}", e));
+
        ByteSplitter {
-            saved_bytes_to_write: n * multiplier,
-            bytes_to_write: n * multiplier,
-            break_on_line_end: settings.strategy == "b",
-            require_whole_line: false,
+            bytes_per_split: n * multiplier,
        }
    }
 }

 impl Splitter for ByteSplitter {
-    fn consume(&mut self, control: &mut SplitControl) -> String {
-        let line = control.current_line.clone();
-        let n = std::cmp::min(line.chars().count(), self.bytes_to_write);
-        if self.require_whole_line && n < line.chars().count() {
-            self.bytes_to_write = self.saved_bytes_to_write;
-            control.request_new_file = true;
-            self.require_whole_line = false;
-            return "".to_owned();
+    fn consume(
+        &mut self,
+        reader: &mut BufReader<Box<dyn Read>>,
+        writer: &mut BufWriter<Box<dyn Write>>,
+    ) -> u128 {
+        // We buffer reads and writes. We proceed until `bytes_consumed` is
+        // equal to `self.bytes_per_split` or we reach EOF.
+        let mut bytes_consumed = 0u128;
+        const BUFFER_SIZE: usize = 1024;
+        let mut buffer = [0u8; BUFFER_SIZE];
+        while bytes_consumed < self.bytes_per_split {
+            // Don't overshoot `self.bytes_per_split`! Note: Using std::cmp::min
+            // doesn't really work since we have to get types to match which
+            // can't be done in a way that keeps all conversions safe.
+            let bytes_desired = if (BUFFER_SIZE as u128) <= self.bytes_per_split - bytes_consumed {
+                BUFFER_SIZE
+            } else {
+                // This is a safe conversion since the difference must be less
+                // than BUFFER_SIZE in this branch.
+                (self.bytes_per_split - bytes_consumed) as usize
+            };
+            let bytes_read = reader
+                .read(&mut buffer[0..bytes_desired])
+                .unwrap_or_else(|_| crash!(1, "error reading bytes from input file"));
+            // If we ever read 0 bytes then we know we've hit EOF.
+            if bytes_read == 0 {
+                return bytes_consumed;
+            }
+
+            writer
+                .write_all(&buffer[0..bytes_read])
+                .unwrap_or_else(|_| crash!(1, "error writing bytes to output file"));
+
+            bytes_consumed += bytes_read as u128;
        }
-        self.bytes_to_write -= n;
-        if n == 0 {
-            self.bytes_to_write = self.saved_bytes_to_write;
-            control.request_new_file = true;
-        }
-        if self.break_on_line_end && n == line.chars().count() {
-            self.require_whole_line = self.break_on_line_end;
-        }
-        line[..n].to_owned()
+
+        bytes_consumed
    }
 }

@ -353,14 +391,13 @@ fn split(settings: &Settings) -> i32 {
    let mut reader = BufReader::new(if settings.input == "-" {
        Box::new(stdin()) as Box<dyn Read>
    } else {
-        let r = match File::open(Path::new(&settings.input)) {
-            Ok(a) => a,
-            Err(_) => crash!(
+        let r = File::open(Path::new(&settings.input)).unwrap_or_else(|_| {
+            crash!(
                1,
                "cannot open '{}' for reading: No such file or directory",
                settings.input
-            ),
-        };
+            )
+        });
        Box::new(r) as Box<dyn Read>
    });

@ -370,48 +407,39 @@ fn split(settings: &Settings) -> i32 {
        a => crash!(1, "strategy {} not supported", a),
    };

-    let mut control = SplitControl {
-        current_line: "".to_owned(), // Request new line
-        request_new_file: true,      // Request new file
-    };
-
-    let mut writer = BufWriter::new(Box::new(stdout()) as Box<dyn Write>);
    let mut fileno = 0;
    loop {
-        if control.current_line.chars().count() == 0 {
-            match reader.read_line(&mut control.current_line) {
-                Ok(0) | Err(_) => break,
-                _ => {}
+        // Get a new part file set up, and construct `writer` for it.
+        let mut filename = settings.prefix.clone();
+        filename.push_str(
+            if settings.numeric_suffix {
+                num_prefix(fileno, settings.suffix_length)
+            } else {
+                str_prefix(fileno, settings.suffix_length)
            }
-        }
-        if control.request_new_file {
-            let mut filename = settings.prefix.clone();
-            filename.push_str(
-                if settings.numeric_suffix {
-                    num_prefix(fileno, settings.suffix_length)
-                } else {
-                    str_prefix(fileno, settings.suffix_length)
-                }
-                .as_ref(),
-            );
-            filename.push_str(settings.additional_suffix.as_ref());
+            .as_ref(),
+        );
+        filename.push_str(settings.additional_suffix.as_ref());
+        let mut writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());

-            crash_if_err!(1, writer.flush());
-            fileno += 1;
-            writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
-            control.request_new_file = false;
-            if settings.verbose {
-                println!("creating file '{}'", filename);
+        let bytes_consumed = splitter.consume(&mut reader, &mut writer);
+        writer
+            .flush()
+            .unwrap_or_else(|e| crash!(1, "error flushing to output file: {}", e));
+
+        // If we didn't write anything we should clean up the empty file, and
+        // break from the loop.
+        if bytes_consumed == 0 {
+            // The output file is only ever created if --filter isn't used.
+            // Complicated, I know...
+            if settings.filter.is_none() {
+                remove_file(filename)
+                    .unwrap_or_else(|e| crash!(1, "error removing empty file: {}", e));
            }
+            break;
        }

-        let consumed = splitter.consume(&mut control);
-        crash_if_err!(1, writer.write_all(consumed.as_bytes()));
-
-        let advance = consumed.chars().count();
-        let clone = control.current_line.clone();
-        let sl = clone;
-        control.current_line = sl[advance..sl.chars().count()].to_owned();
+        fileno += 1;
    }
    0
 }
--- a/src/uu/stat/Cargo.toml
+++ b/src/uu/stat/Cargo.toml
@ -17,7 +17,7 @@ path = "src/stat.rs"
 [dependencies]
 clap = "2.33"
 time = "0.1.40"
-uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc"] }
+uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["entries", "libc", "fs"] }
 uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }

 [[bin]]
--- a/src/uu/stat/src/fsext.rs
+++ b/src/uu/stat/src/fsext.rs
@ -41,13 +41,6 @@ impl BirthTime for Metadata {
    }
 }

-#[macro_export]
-macro_rules! has {
-    ($mode:expr, $perm:expr) => {
-        $mode & $perm != 0
-    };
-}
-
 pub fn pretty_time(sec: i64, nsec: i64) -> String {
    // sec == seconds since UNIX_EPOCH
    // nsec == nanoseconds since (UNIX_EPOCH + sec)
@ -81,65 +74,6 @@ pub fn pretty_filetype<'a>(mode: mode_t, size: u64) -> &'a str {
    }
 }

-pub fn pretty_access(mode: mode_t) -> String {
-    let mut result = String::with_capacity(10);
-    result.push(match mode & S_IFMT {
-        S_IFDIR => 'd',
-        S_IFCHR => 'c',
-        S_IFBLK => 'b',
-        S_IFREG => '-',
-        S_IFIFO => 'p',
-        S_IFLNK => 'l',
-        S_IFSOCK => 's',
-        // TODO: Other file types
-        _ => '?',
-    });
-
-    result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' });
-    result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' });
-    result.push(if has!(mode, S_ISUID as mode_t) {
-        if has!(mode, S_IXUSR) {
-            's'
-        } else {
-            'S'
-        }
-    } else if has!(mode, S_IXUSR) {
-        'x'
-    } else {
-        '-'
-    });
-
-    result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' });
-    result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' });
-    result.push(if has!(mode, S_ISGID as mode_t) {
-        if has!(mode, S_IXGRP) {
-            's'
-        } else {
-            'S'
-        }
-    } else if has!(mode, S_IXGRP) {
-        'x'
-    } else {
-        '-'
-    });
-
-    result.push(if has!(mode, S_IROTH) { 'r' } else { '-' });
-    result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' });
-    result.push(if has!(mode, S_ISVTX as mode_t) {
-        if has!(mode, S_IXOTH) {
-            't'
-        } else {
-            'T'
-        }
-    } else if has!(mode, S_IXOTH) {
-        'x'
-    } else {
-        '-'
-    });
-
-    result
-}
-
 use std::borrow::Cow;
 use std::convert::{AsRef, From};
 use std::ffi::CString;
--- a/src/uu/stat/src/stat.rs
+++ b/src/uu/stat/src/stat.rs
@ -7,13 +7,13 @@

 // spell-checker:ignore (ToDO) mtab fsext showfs otype fmtstr prec ftype blocksize nlink rdev fnodes fsid namelen blksize inodes fstype iosize statfs gnulib NBLOCKSIZE

-#[macro_use]
 mod fsext;
 pub use crate::fsext::*;

 #[macro_use]
 extern crate uucore;
 use uucore::entries;
+use uucore::fs::display_permissions;

 use clap::{App, Arg, ArgMatches};
 use std::borrow::Cow;
@ -575,7 +575,7 @@ impl Stater {
                                    }
                                    // access rights in human readable form
                                    'A' => {
-                                        arg = pretty_access(meta.mode() as mode_t);
+                                        arg = display_permissions(&meta, true);
                                        otype = OutputType::Str;
                                    }
                                    // number of blocks allocated (see %B)
--- a/src/uu/wc/src/wc.rs
+++ b/src/uu/wc/src/wc.rs
@ -12,8 +12,10 @@ extern crate uucore;

 mod count_bytes;
 mod countable;
+mod wordcount;
 use count_bytes::count_bytes_fast;
 use countable::WordCountable;
+use wordcount::{TitledWordCount, WordCount};

 use clap::{App, Arg, ArgMatches};
 use thiserror::Error;
@ -21,9 +23,7 @@ use thiserror::Error;
 use std::cmp::max;
 use std::fs::File;
 use std::io::{self, Write};
-use std::ops::{Add, AddAssign};
 use std::path::Path;
-use std::str::from_utf8;

 #[derive(Error, Debug)]
 pub enum WcError {
@ -82,51 +82,6 @@ impl Settings {
    }
 }

-#[derive(Debug, Default, Copy, Clone)]
-struct WordCount {
-    bytes: usize,
-    chars: usize,
-    lines: usize,
-    words: usize,
-    max_line_length: usize,
-}
-
-impl Add for WordCount {
-    type Output = Self;
-
-    fn add(self, other: Self) -> Self {
-        Self {
-            bytes: self.bytes + other.bytes,
-            chars: self.chars + other.chars,
-            lines: self.lines + other.lines,
-            words: self.words + other.words,
-            max_line_length: max(self.max_line_length, other.max_line_length),
-        }
-    }
-}
-
-impl AddAssign for WordCount {
-    fn add_assign(&mut self, other: Self) {
-        *self = *self + other
-    }
-}
-
-impl WordCount {
-    fn with_title(self, title: &str) -> TitledWordCount {
-        TitledWordCount { title, count: self }
-    }
-}
-
-/// This struct supplements the actual word count with a title that is displayed
-/// to the user at the end of the program.
-/// The reason we don't simply include title in the `WordCount` struct is that
-/// it would result in unneccesary copying of `String`.
-#[derive(Debug, Default, Clone)]
-struct TitledWordCount<'a> {
-    title: &'a str,
-    count: WordCount,
-}
-
 static ABOUT: &str = "Display newline, word, and byte counts for each FILE, and a total line if
 more than one FILE is specified.";
 static VERSION: &str = env!("CARGO_PKG_VERSION");
@ -207,18 +162,6 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
    }
 }

-const CR: u8 = b'\r';
-const LF: u8 = b'\n';
-const SPACE: u8 = b' ';
-const TAB: u8 = b'\t';
-const SYN: u8 = 0x16_u8;
-const FF: u8 = 0x0C_u8;
-
-#[inline(always)]
-fn is_word_separator(byte: u8) -> bool {
-    byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF
-}
-
 fn word_count_from_reader<T: WordCountable>(
    mut reader: T,
    settings: &Settings,
@ -239,58 +182,20 @@ fn word_count_from_reader<T: WordCountable>(
    // we do not need to decode the byte stream if we're only counting bytes/newlines
    let decode_chars = settings.show_chars || settings.show_words || settings.show_max_line_length;

-    let mut line_count: usize = 0;
-    let mut word_count: usize = 0;
-    let mut byte_count: usize = 0;
-    let mut char_count: usize = 0;
-    let mut longest_line_length: usize = 0;
-    let mut ends_lf: bool;
-
-    // reading from a TTY seems to raise a condition on, rather than return Some(0) like a file.
-    // hence the option wrapped in a result here
-    for line_result in reader.lines() {
-        let raw_line = match line_result {
-            Ok(l) => l,
+    // Sum the WordCount for each line. Show a warning for each line
+    // that results in an IO error when trying to read it.
+    let total = reader
+        .lines()
+        .filter_map(|res| match res {
+            Ok(line) => Some(line),
            Err(e) => {
                show_warning!("Error while reading {}: {}", path, e);
-                continue;
+                None
            }
-        };
-
-        // GNU 'wc' only counts lines that end in LF as lines
-        ends_lf = *raw_line.last().unwrap() == LF;
-        line_count += ends_lf as usize;
-
-        byte_count += raw_line.len();
-
-        if decode_chars {
-            // try and convert the bytes to UTF-8 first
-            let current_char_count;
-            match from_utf8(&raw_line[..]) {
-                Ok(line) => {
-                    word_count += line.split_whitespace().count();
-                    current_char_count = line.chars().count();
-                }
-                Err(..) => {
-                    word_count += raw_line.split(|&x| is_word_separator(x)).count();
-                    current_char_count = raw_line.iter().filter(|c| c.is_ascii()).count()
-                }
-            }
-            char_count += current_char_count;
-            if current_char_count > longest_line_length {
-                // -L is a GNU 'wc' extension so same behavior on LF
-                longest_line_length = current_char_count - (ends_lf as usize);
-            }
-        }
-    }
-
-    Ok(WordCount {
-        bytes: byte_count,
-        chars: char_count,
-        lines: line_count,
-        words: word_count,
-        max_line_length: longest_line_length,
-    })
+        })
+        .map(|line| WordCount::from_line(&line, decode_chars))
+        .sum();
+    Ok(total)
 }

 fn word_count_from_path(path: &str, settings: &Settings) -> WcResult<WordCount> {
@ -323,7 +228,12 @@ fn wc(files: Vec<String>, settings: &Settings) -> Result<(), u32> {
            error_count += 1;
            WordCount::default()
        });
-        max_width = max(max_width, word_count.bytes.to_string().len() + 1);
+        // Compute the number of digits needed to display the number
+        // of bytes in the file. Even if the settings indicate that we
+        // won't *display* the number of bytes, we still use the
+        // number of digits in the byte count as the width when
+        // formatting each count as a string for output.
+        max_width = max(max_width, word_count.bytes.to_string().len());
        total_word_count += word_count;
        results.push(word_count.with_title(path));
    }
@ -364,19 +274,40 @@ fn print_stats(
        min_width = 0;
    }

+    let mut is_first: bool = true;
+
    if settings.show_lines {
+        if !is_first {
+            write!(stdout_lock, " ")?;
+        }
        write!(stdout_lock, "{:1$}", result.count.lines, min_width)?;
+        is_first = false;
    }
    if settings.show_words {
+        if !is_first {
+            write!(stdout_lock, " ")?;
+        }
        write!(stdout_lock, "{:1$}", result.count.words, min_width)?;
+        is_first = false;
    }
    if settings.show_bytes {
+        if !is_first {
+            write!(stdout_lock, " ")?;
+        }
        write!(stdout_lock, "{:1$}", result.count.bytes, min_width)?;
+        is_first = false;
    }
    if settings.show_chars {
+        if !is_first {
+            write!(stdout_lock, " ")?;
+        }
        write!(stdout_lock, "{:1$}", result.count.chars, min_width)?;
+        is_first = false;
    }
    if settings.show_max_line_length {
+        if !is_first {
+            write!(stdout_lock, " ")?;
+        }
        write!(
            stdout_lock,
            "{:1$}",
--- a/src/uu/wc/src/wordcount.rs
+++ b/src/uu/wc/src/wordcount.rs
@ -0,0 +1,131 @@
+use std::cmp::max;
+use std::iter::Sum;
+use std::ops::{Add, AddAssign};
+use std::str::from_utf8;
+
+const CR: u8 = b'\r';
+const LF: u8 = b'\n';
+const SPACE: u8 = b' ';
+const TAB: u8 = b'\t';
+const SYN: u8 = 0x16_u8;
+const FF: u8 = 0x0C_u8;
+
+#[inline(always)]
+fn is_word_separator(byte: u8) -> bool {
+    byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF
+}
+
+#[derive(Debug, Default, Copy, Clone)]
+pub struct WordCount {
+    pub bytes: usize,
+    pub chars: usize,
+    pub lines: usize,
+    pub words: usize,
+    pub max_line_length: usize,
+}
+
+impl Add for WordCount {
+    type Output = Self;
+
+    fn add(self, other: Self) -> Self {
+        Self {
+            bytes: self.bytes + other.bytes,
+            chars: self.chars + other.chars,
+            lines: self.lines + other.lines,
+            words: self.words + other.words,
+            max_line_length: max(self.max_line_length, other.max_line_length),
+        }
+    }
+}
+
+impl AddAssign for WordCount {
+    fn add_assign(&mut self, other: Self) {
+        *self = *self + other
+    }
+}
+
+impl Sum for WordCount {
+    fn sum<I>(iter: I) -> WordCount
+    where
+        I: Iterator<Item = WordCount>,
+    {
+        iter.fold(WordCount::default(), |acc, x| acc + x)
+    }
+}
+
+impl WordCount {
+    /// Count the characters and whitespace-separated words in the given bytes.
+    ///
+    /// `line` is a slice of bytes that will be decoded as ASCII characters.
+    fn ascii_word_and_char_count(line: &[u8]) -> (usize, usize) {
+        let word_count = line.split(|&x| is_word_separator(x)).count();
+        let char_count = line.iter().filter(|c| c.is_ascii()).count();
+        (word_count, char_count)
+    }
+
+    /// Create a [`WordCount`] from a sequence of bytes representing a line.
+    ///
+    /// If the last byte of `line` encodes a newline character (`\n`),
+    /// then the [`lines`] field will be set to 1. Otherwise, it will
+    /// be set to 0. The [`bytes`] field is simply the length of
+    /// `line`.
+    ///
+    /// If `decode_chars` is `false`, the [`chars`] and [`words`]
+    /// fields will be set to 0. If it is `true`, this function will
+    /// attempt to decode the bytes first as UTF-8, and failing that,
+    /// as ASCII.
+    pub fn from_line(line: &[u8], decode_chars: bool) -> WordCount {
+        // GNU 'wc' only counts lines that end in LF as lines
+        let lines = (*line.last().unwrap() == LF) as usize;
+        let bytes = line.len();
+        let (words, chars) = if decode_chars {
+            WordCount::word_and_char_count(line)
+        } else {
+            (0, 0)
+        };
+        // -L is a GNU 'wc' extension so same behavior on LF
+        let max_line_length = if chars > 0 { chars - lines } else { 0 };
+        WordCount {
+            bytes,
+            chars,
+            lines,
+            words,
+            max_line_length,
+        }
+    }
+
+    /// Count the UTF-8 characters and words in the given string slice.
+    ///
+    /// `s` is a string slice that is assumed to be a UTF-8 string.
+    fn utf8_word_and_char_count(s: &str) -> (usize, usize) {
+        let word_count = s.split_whitespace().count();
+        let char_count = s.chars().count();
+        (word_count, char_count)
+    }
+
+    pub fn with_title(self, title: &str) -> TitledWordCount {
+        TitledWordCount { title, count: self }
+    }
+
+    /// Count the characters and words in the given slice of bytes.
+    ///
+    /// `line` is a slice of bytes that will be decoded as UTF-8
+    /// characters, or if that fails, as ASCII characters.
+    fn word_and_char_count(line: &[u8]) -> (usize, usize) {
+        // try and convert the bytes to UTF-8 first
+        match from_utf8(line) {
+            Ok(s) => WordCount::utf8_word_and_char_count(s),
+            Err(..) => WordCount::ascii_word_and_char_count(line),
+        }
+    }
+}
+
+/// This struct supplements the actual word count with a title that is displayed
+/// to the user at the end of the program.
+/// The reason we don't simply include title in the `WordCount` struct is that
+/// it would result in unneccesary copying of `String`.
+#[derive(Debug, Default, Clone)]
+pub struct TitledWordCount<'a> {
+    pub title: &'a str,
+    pub count: WordCount,
+}
--- a/src/uucore/src/lib/features/fs.rs
+++ b/src/uucore/src/lib/features/fs.rs
@ -8,8 +8,9 @@

 #[cfg(unix)]
 use libc::{
-    mode_t, S_IRGRP, S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR,
-    S_IXGRP, S_IXOTH, S_IXUSR,
+    mode_t, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, S_IRGRP,
+    S_IROTH, S_IRUSR, S_ISGID, S_ISUID, S_ISVTX, S_IWGRP, S_IWOTH, S_IWUSR, S_IXGRP, S_IXOTH,
+    S_IXUSR,
 };
 use std::borrow::Cow;
 use std::env;
@ -23,9 +24,10 @@ use std::os::unix::fs::MetadataExt;
 use std::path::{Component, Path, PathBuf};

 #[cfg(unix)]
+#[macro_export]
 macro_rules! has {
    ($mode:expr, $perm:expr) => {
-        $mode & ($perm as u32) != 0
+        $mode & $perm != 0
    };
 }

@ -240,22 +242,42 @@ pub fn is_stderr_interactive() -> bool {

 #[cfg(not(unix))]
 #[allow(unused_variables)]
-pub fn display_permissions(metadata: &fs::Metadata) -> String {
+pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String {
+    if display_file_type {
+        return String::from("----------");
+    }
    String::from("---------")
 }

 #[cfg(unix)]
-pub fn display_permissions(metadata: &fs::Metadata) -> String {
+pub fn display_permissions(metadata: &fs::Metadata, display_file_type: bool) -> String {
    let mode: mode_t = metadata.mode() as mode_t;
-    display_permissions_unix(mode as u32)
+    display_permissions_unix(mode, display_file_type)
 }

 #[cfg(unix)]
-pub fn display_permissions_unix(mode: u32) -> String {
-    let mut result = String::with_capacity(9);
+pub fn display_permissions_unix(mode: mode_t, display_file_type: bool) -> String {
+    let mut result;
+    if display_file_type {
+        result = String::with_capacity(10);
+        result.push(match mode & S_IFMT {
+            S_IFDIR => 'd',
+            S_IFCHR => 'c',
+            S_IFBLK => 'b',
+            S_IFREG => '-',
+            S_IFIFO => 'p',
+            S_IFLNK => 'l',
+            S_IFSOCK => 's',
+            // TODO: Other file types
+            _ => '?',
+        });
+    } else {
+        result = String::with_capacity(9);
+    }
+
    result.push(if has!(mode, S_IRUSR) { 'r' } else { '-' });
    result.push(if has!(mode, S_IWUSR) { 'w' } else { '-' });
-    result.push(if has!(mode, S_ISUID) {
+    result.push(if has!(mode, S_ISUID as mode_t) {
        if has!(mode, S_IXUSR) {
            's'
        } else {
@ -269,7 +291,7 @@ pub fn display_permissions_unix(mode: u32) -> String {

    result.push(if has!(mode, S_IRGRP) { 'r' } else { '-' });
    result.push(if has!(mode, S_IWGRP) { 'w' } else { '-' });
-    result.push(if has!(mode, S_ISGID) {
+    result.push(if has!(mode, S_ISGID as mode_t) {
        if has!(mode, S_IXGRP) {
            's'
        } else {
@ -283,7 +305,7 @@ pub fn display_permissions_unix(mode: u32) -> String {

    result.push(if has!(mode, S_IROTH) { 'r' } else { '-' });
    result.push(if has!(mode, S_IWOTH) { 'w' } else { '-' });
-    result.push(if has!(mode, S_ISVTX) {
+    result.push(if has!(mode, S_ISVTX as mode_t) {
        if has!(mode, S_IXOTH) {
            't'
        } else {
@ -355,4 +377,57 @@ mod tests {
            );
        }
    }
+
+    #[cfg(unix)]
+    #[test]
+    fn test_display_permissions() {
+        assert_eq!(
+            "drwxr-xr-x",
+            display_permissions_unix(S_IFDIR | 0o755, true)
+        );
+        assert_eq!(
+            "rwxr-xr-x",
+            display_permissions_unix(S_IFDIR | 0o755, false)
+        );
+        assert_eq!(
+            "-rw-r--r--",
+            display_permissions_unix(S_IFREG | 0o644, true)
+        );
+        assert_eq!(
+            "srw-r-----",
+            display_permissions_unix(S_IFSOCK | 0o640, true)
+        );
+        assert_eq!(
+            "lrw-r-xr-x",
+            display_permissions_unix(S_IFLNK | 0o655, true)
+        );
+        assert_eq!("?rw-r-xr-x", display_permissions_unix(0o655, true));
+
+        assert_eq!(
+            "brwSr-xr-x",
+            display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o655, true)
+        );
+        assert_eq!(
+            "brwsr-xr-x",
+            display_permissions_unix(S_IFBLK | S_ISUID as mode_t | 0o755, true)
+        );
+
+        assert_eq!(
+            "prw---sr--",
+            display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o614, true)
+        );
+        assert_eq!(
+            "prw---Sr--",
+            display_permissions_unix(S_IFIFO | S_ISGID as mode_t | 0o604, true)
+        );
+
+        assert_eq!(
+            "c---r-xr-t",
+            display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o055, true)
+        );
+        assert_eq!(
+            "c---r-xr-T",
+            display_permissions_unix(S_IFCHR | S_ISVTX as mode_t | 0o054, true)
+        );
+    }
 }
--- a/tests/by-util/test_df.rs
+++ b/tests/by-util/test_df.rs
@ -27,7 +27,7 @@ fn test_df_output() {
        stdout_only("Filesystem               Size         Used    Available     Capacity  Use% Mounted on       \n");
    } else {
        new_ucmd!().arg("-H").arg("-total").succeeds().stdout_only(
-            "Filesystem               Size         Used    Available  Use% Mounted on       \n"
+            "Filesystem               Size         Used    Available  Use% Mounted on       \n",
        );
    }
 }
--- a/tests/by-util/test_du.rs
+++ b/tests/by-util/test_du.rs
@ -53,7 +53,15 @@ fn _du_basics_subdir(s: &str) {
 fn _du_basics_subdir(s: &str) {
    assert_eq!(s, "0\tsubdir/deeper\n");
 }
-#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
+#[cfg(target_os = "freebsd")]
+fn _du_basics_subdir(s: &str) {
+    assert_eq!(s, "8\tsubdir/deeper\n");
+}
+#[cfg(all(
+    not(target_vendor = "apple"),
+    not(target_os = "windows"),
+    not(target_os = "freebsd")
+))]
 fn _du_basics_subdir(s: &str) {
    // MS-WSL linux has altered expected output
    if !uucore::os::is_wsl_1() {
@ -100,7 +108,15 @@ fn _du_soft_link(s: &str) {
 fn _du_soft_link(s: &str) {
    assert_eq!(s, "8\tsubdir/links\n");
 }
-#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
+#[cfg(target_os = "freebsd")]
+fn _du_soft_link(s: &str) {
+    assert_eq!(s, "16\tsubdir/links\n");
+}
+#[cfg(all(
+    not(target_vendor = "apple"),
+    not(target_os = "windows"),
+    not(target_os = "freebsd")
+))]
 fn _du_soft_link(s: &str) {
    // MS-WSL linux has altered expected output
    if !uucore::os::is_wsl_1() {
@ -141,7 +157,15 @@ fn _du_hard_link(s: &str) {
 fn _du_hard_link(s: &str) {
    assert_eq!(s, "8\tsubdir/links\n")
 }
-#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
+#[cfg(target_os = "freebsd")]
+fn _du_hard_link(s: &str) {
+    assert_eq!(s, "16\tsubdir/links\n")
+}
+#[cfg(all(
+    not(target_vendor = "apple"),
+    not(target_os = "windows"),
+    not(target_os = "freebsd")
+))]
 fn _du_hard_link(s: &str) {
    // MS-WSL linux has altered expected output
    if !uucore::os::is_wsl_1() {
@ -181,7 +205,15 @@ fn _du_d_flag(s: &str) {
 fn _du_d_flag(s: &str) {
    assert_eq!(s, "8\t./subdir\n8\t./\n");
 }
-#[cfg(all(not(target_vendor = "apple"), not(target_os = "windows")))]
+#[cfg(target_os = "freebsd")]
+fn _du_d_flag(s: &str) {
+    assert_eq!(s, "28\t./subdir\n36\t./\n");
+}
+#[cfg(all(
+    not(target_vendor = "apple"),
+    not(target_os = "windows"),
+    not(target_os = "freebsd")
+))]
 fn _du_d_flag(s: &str) {
    // MS-WSL linux has altered expected output
    if !uucore::os::is_wsl_1() {
--- a/tests/by-util/test_split.rs
+++ b/tests/by-util/test_split.rs
@ -4,11 +4,15 @@ extern crate regex;
 use self::rand::{thread_rng, Rng};
 use self::regex::Regex;
 use crate::common::util::*;
+use rand::SeedableRng;
 #[cfg(not(windows))]
 use std::env;
-use std::fs::{read_dir, File};
 use std::io::Write;
 use std::path::Path;
+use std::{
+    fs::{read_dir, File},
+    io::BufWriter,
+};

 fn random_chars(n: usize) -> String {
    thread_rng()
@ -58,7 +62,7 @@ impl Glob {
        files.sort();
        let mut data: Vec<u8> = vec![];
        for name in &files {
-            data.extend(self.directory.read(name).into_bytes());
+            data.extend(self.directory.read_bytes(name));
        }
        data
    }
@ -81,20 +85,30 @@ impl RandomFile {
    }

    fn add_bytes(&mut self, bytes: usize) {
-        let chunk_size: usize = if bytes >= 1024 { 1024 } else { bytes };
-        let mut n = bytes;
-        while n > chunk_size {
-            let _ = write!(self.inner, "{}", random_chars(chunk_size));
-            n -= chunk_size;
+        // Note that just writing random characters isn't enough to cover all
+        // cases. We need truly random bytes.
+        let mut writer = BufWriter::new(&self.inner);
+
+        // Seed the rng so as to avoid spurious test failures.
+        let mut rng = rand::rngs::StdRng::seed_from_u64(123);
+        let mut buffer = [0; 1024];
+        let mut remaining_size = bytes;
+
+        while remaining_size > 0 {
+            let to_write = std::cmp::min(remaining_size, buffer.len());
+            let buf = &mut buffer[..to_write];
+            rng.fill(buf);
+            writer.write(buf).unwrap();
+
+            remaining_size -= to_write;
        }
-        let _ = write!(self.inner, "{}", random_chars(n));
    }

    /// Add n lines each of size `RandomFile::LINESIZE`
    fn add_lines(&mut self, lines: usize) {
        let mut n = lines;
        while n > 0 {
-            let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE));
+            writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE)).unwrap();
            n -= 1;
        }
    }
@ -104,18 +118,18 @@ impl RandomFile {
 fn test_split_default() {
    let (at, mut ucmd) = at_and_ucmd!();
    let name = "split_default";
-    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
    RandomFile::new(&at, name).add_lines(2000);
    ucmd.args(&[name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
    assert_eq!(glob.count(), 2);
-    assert_eq!(glob.collate(), at.read(name).into_bytes());
+    assert_eq!(glob.collate(), at.read_bytes(name));
 }

 #[test]
 fn test_split_numeric_prefixed_chunks_by_bytes() {
    let (at, mut ucmd) = at_and_ucmd!();
    let name = "split_num_prefixed_chunks_by_bytes";
-    let glob = Glob::new(&at, ".", r"a\d\d$");
    RandomFile::new(&at, name).add_bytes(10000);
    ucmd.args(&[
        "-d", // --numeric-suffixes
@ -123,52 +137,89 @@ fn test_split_numeric_prefixed_chunks_by_bytes() {
        "1000", name, "a",
    ])
    .succeeds();
+
+    let glob = Glob::new(&at, ".", r"a\d\d$");
    assert_eq!(glob.count(), 10);
-    assert_eq!(glob.collate(), at.read(name).into_bytes());
+    for filename in glob.collect() {
+        assert_eq!(glob.directory.metadata(&filename).len(), 1000);
+    }
+    assert_eq!(glob.collate(), at.read_bytes(name));
 }

 #[test]
 fn test_split_str_prefixed_chunks_by_bytes() {
    let (at, mut ucmd) = at_and_ucmd!();
    let name = "split_str_prefixed_chunks_by_bytes";
-    let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
    RandomFile::new(&at, name).add_bytes(10000);
+    // Important that this is less than 1024 since that's our internal buffer
+    // size. Good to test that we don't overshoot.
    ucmd.args(&["-b", "1000", name, "b"]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
    assert_eq!(glob.count(), 10);
-    assert_eq!(glob.collate(), at.read(name).into_bytes());
+    for filename in glob.collect() {
+        assert_eq!(glob.directory.metadata(&filename).len(), 1000);
+    }
+    assert_eq!(glob.collate(), at.read_bytes(name));
+}
+
+// This is designed to test what happens when the desired part size is not a
+// multiple of the buffer size and we hopefully don't overshoot the desired part
+// size.
+#[test]
+fn test_split_bytes_prime_part_size() {
+    let (at, mut ucmd) = at_and_ucmd!();
+    let name = "test_split_bytes_prime_part_size";
+    RandomFile::new(&at, name).add_bytes(10000);
+    // 1753 is prime and greater than the buffer size, 1024.
+    ucmd.args(&["-b", "1753", name, "b"]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"b[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.count(), 6);
+    let mut fns = glob.collect();
+    // glob.collect() is not guaranteed to return in sorted order, so we sort.
+    fns.sort();
+    for i in 0..5 {
+        assert_eq!(glob.directory.metadata(&fns[i]).len(), 1753);
+    }
+    assert_eq!(glob.directory.metadata(&fns[5]).len(), 1235);
+    assert_eq!(glob.collate(), at.read_bytes(name));
 }

 #[test]
 fn test_split_num_prefixed_chunks_by_lines() {
    let (at, mut ucmd) = at_and_ucmd!();
    let name = "split_num_prefixed_chunks_by_lines";
-    let glob = Glob::new(&at, ".", r"c\d\d$");
    RandomFile::new(&at, name).add_lines(10000);
    ucmd.args(&["-d", "-l", "1000", name, "c"]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"c\d\d$");
    assert_eq!(glob.count(), 10);
-    assert_eq!(glob.collate(), at.read(name).into_bytes());
+    assert_eq!(glob.collate(), at.read_bytes(name));
 }

 #[test]
 fn test_split_str_prefixed_chunks_by_lines() {
    let (at, mut ucmd) = at_and_ucmd!();
    let name = "split_str_prefixed_chunks_by_lines";
-    let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$");
    RandomFile::new(&at, name).add_lines(10000);
    ucmd.args(&["-l", "1000", name, "d"]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"d[[:alpha:]][[:alpha:]]$");
    assert_eq!(glob.count(), 10);
-    assert_eq!(glob.collate(), at.read(name).into_bytes());
+    assert_eq!(glob.collate(), at.read_bytes(name));
 }

 #[test]
 fn test_split_additional_suffix() {
    let (at, mut ucmd) = at_and_ucmd!();
    let name = "split_additional_suffix";
-    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$");
    RandomFile::new(&at, name).add_lines(2000);
    ucmd.args(&["--additional-suffix", ".txt", name]).succeeds();
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]].txt$");
    assert_eq!(glob.count(), 2);
-    assert_eq!(glob.collate(), at.read(name).into_bytes());
+    assert_eq!(glob.collate(), at.read_bytes(name));
 }

 // note: the test_filter* tests below are unix-only
@ -182,15 +233,16 @@ fn test_filter() {
    // like `test_split_default()` but run a command before writing
    let (at, mut ucmd) = at_and_ucmd!();
    let name = "filtered";
-    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
    let n_lines = 3;
    RandomFile::new(&at, name).add_lines(n_lines);

    // change all characters to 'i'
    ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
        .succeeds();
+
    // assert all characters are 'i' / no character is not 'i'
    // (assert that command succeded)
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
    assert!(
        glob.collate().iter().find(|&&c| {
            // is not i
@ -209,7 +261,6 @@ fn test_filter_with_env_var_set() {
    // implemented like `test_split_default()` but run a command before writing
    let (at, mut ucmd) = at_and_ucmd!();
    let name = "filtered";
-    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
    let n_lines = 3;
    RandomFile::new(&at, name).add_lines(n_lines);

@ -217,7 +268,9 @@ fn test_filter_with_env_var_set() {
    env::set_var("FILE", &env_var_value);
    ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
        .succeeds();
-    assert_eq!(glob.collate(), at.read(name).into_bytes());
+
+    let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
+    assert_eq!(glob.collate(), at.read_bytes(name));
    assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value);
 }

--- a/tests/by-util/test_stat.rs
+++ b/tests/by-util/test_stat.rs
@ -9,42 +9,6 @@ pub use self::stat::*;
 mod test_fsext {
    use super::*;

-    #[test]
-    fn test_access() {
-        assert_eq!("drwxr-xr-x", pretty_access(S_IFDIR | 0o755));
-        assert_eq!("-rw-r--r--", pretty_access(S_IFREG | 0o644));
-        assert_eq!("srw-r-----", pretty_access(S_IFSOCK | 0o640));
-        assert_eq!("lrw-r-xr-x", pretty_access(S_IFLNK | 0o655));
-        assert_eq!("?rw-r-xr-x", pretty_access(0o655));
-
-        assert_eq!(
-            "brwSr-xr-x",
-            pretty_access(S_IFBLK | S_ISUID as mode_t | 0o655)
-        );
-        assert_eq!(
-            "brwsr-xr-x",
-            pretty_access(S_IFBLK | S_ISUID as mode_t | 0o755)
-        );
-
-        assert_eq!(
-            "prw---sr--",
-            pretty_access(S_IFIFO | S_ISGID as mode_t | 0o614)
-        );
-        assert_eq!(
-            "prw---Sr--",
-            pretty_access(S_IFIFO | S_ISGID as mode_t | 0o604)
-        );
-
-        assert_eq!(
-            "c---r-xr-t",
-            pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o055)
-        );
-        assert_eq!(
-            "c---r-xr-T",
-            pretty_access(S_IFCHR | S_ISVTX as mode_t | 0o054)
-        );
-    }
-
    #[test]
    fn test_file_type() {
        assert_eq!("block special file", pretty_filetype(S_IFBLK, 0));
--- a/tests/by-util/test_wc.rs
+++ b/tests/by-util/test_wc.rs
@ -33,7 +33,7 @@ fn test_stdin_default() {
    new_ucmd!()
        .pipe_in_fixture("lorem_ipsum.txt")
        .run()
-        .stdout_is("  13 109 772\n");
+        .stdout_is(" 13 109 772\n");
 }

 #[test]
@ -42,7 +42,7 @@ fn test_utf8() {
        .args(&["-lwmcL"])
        .pipe_in_fixture("UTF_8_test.txt")
        .run()
-        .stdout_is("   300  4969 22781 22213    79\n");
+        .stdout_is("  300  4969 22781 22213    79\n");
    // GNU returns "  300  2086 22219 22781    79"
    // TODO: we should fix that to match GNU's behavior
 }
@ -71,7 +71,7 @@ fn test_stdin_all_counts() {
        .args(&["-c", "-m", "-l", "-L", "-w"])
        .pipe_in_fixture("alice_in_wonderland.txt")
        .run()
-        .stdout_is("   5  57 302 302  66\n");
+        .stdout_is("  5  57 302 302  66\n");
 }

 #[test]
@ -79,7 +79,7 @@ fn test_single_default() {
    new_ucmd!()
        .arg("moby_dick.txt")
        .run()
-        .stdout_is("   18  204 1115 moby_dick.txt\n");
+        .stdout_is("  18  204 1115 moby_dick.txt\n");
 }

 #[test]
@ -95,7 +95,7 @@ fn test_single_all_counts() {
    new_ucmd!()
        .args(&["-c", "-l", "-L", "-m", "-w", "alice_in_wonderland.txt"])
        .run()
-        .stdout_is("   5  57 302 302  66 alice_in_wonderland.txt\n");
+        .stdout_is("  5  57 302 302  66 alice_in_wonderland.txt\n");
 }

 #[test]
@ -108,64 +108,54 @@ fn test_multiple_default() {
        ])
        .run()
        .stdout_is(
-            "   13  109  772 lorem_ipsum.txt\n   18  204 1115 moby_dick.txt\n    5   57  302 \
-             alice_in_wonderland.txt\n   36  370 2189 total\n",
+            "  13  109  772 lorem_ipsum.txt\n  18  204 1115 moby_dick.txt\n   5   57  302 \
+             alice_in_wonderland.txt\n  36  370 2189 total\n",
        );
 }

 /// Test for an empty file.
 #[test]
 fn test_file_empty() {
-    // TODO There is a leading space in the output that should be
-    // removed; see issue #2173.
    new_ucmd!()
        .args(&["-clmwL", "emptyfile.txt"])
        .run()
-        .stdout_is(" 0 0 0 0 0 emptyfile.txt\n");
+        .stdout_is("0 0 0 0 0 emptyfile.txt\n");
 }

 /// Test for an file containing a single non-whitespace character
 /// *without* a trailing newline.
 #[test]
 fn test_file_single_line_no_trailing_newline() {
-    // TODO There is a leading space in the output that should be
-    // removed; see issue #2173.
    new_ucmd!()
        .args(&["-clmwL", "notrailingnewline.txt"])
        .run()
-        .stdout_is(" 1 1 2 2 1 notrailingnewline.txt\n");
+        .stdout_is("1 1 2 2 1 notrailingnewline.txt\n");
 }

 /// Test for a file that has 100 empty lines (that is, the contents of
 /// the file are the newline character repeated one hundred times).
 #[test]
 fn test_file_many_empty_lines() {
-    // TODO There is a leading space in the output that should be
-    // removed; see issue #2173.
    new_ucmd!()
        .args(&["-clmwL", "manyemptylines.txt"])
        .run()
-        .stdout_is(" 100   0 100 100   0 manyemptylines.txt\n");
+        .stdout_is("100   0 100 100   0 manyemptylines.txt\n");
 }

 /// Test for a file that has one long line comprising only spaces.
 #[test]
 fn test_file_one_long_line_only_spaces() {
-    // TODO There is a leading space in the output that should be
-    // removed; see issue #2173.
    new_ucmd!()
        .args(&["-clmwL", "onelongemptyline.txt"])
        .run()
-        .stdout_is("     1     0 10001 10001 10000 onelongemptyline.txt\n");
+        .stdout_is("    1     0 10001 10001 10000 onelongemptyline.txt\n");
 }

 /// Test for a file that has one long line comprising a single "word".
 #[test]
 fn test_file_one_long_word() {
-    // TODO There is a leading space in the output that should be
-    // removed; see issue #2173.
    new_ucmd!()
        .args(&["-clmwL", "onelongword.txt"])
        .run()
-        .stdout_is("     1     1 10001 10001 10000 onelongword.txt\n");
+        .stdout_is("    1     1 10001 10001 10000 onelongword.txt\n");
 }