diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs
index 27ab319d0..8cf95b82a 100644
--- a/src/uu/fold/src/fold.rs
+++ b/src/uu/fold/src/fold.rs
@@ -79,7 +79,6 @@ fn handle_obsolete(args: &[String]) -> (Vec<String>, Option<String>) {
     (args.to_vec(), None)
 }
 
-#[inline]
 fn fold(filenames: Vec<String>, bytes: bool, spaces: bool, width: usize) {
     for filename in &filenames {
         let filename: &str = &filename;
@@ -92,123 +91,173 @@ fn fold(filenames: Vec<String>, bytes: bool, spaces: bool, width: usize) {
             file_buf = safe_unwrap!(File::open(Path::new(filename)));
             &mut file_buf as &mut dyn Read
         });
-        fold_file(buffer, bytes, spaces, width);
+
+        if bytes {
+            fold_file_bytewise(buffer, spaces, width);
+        } else {
+            fold_file(buffer, spaces, width);
+        }
     }
 }
 
-#[inline]
-fn fold_file<T: Read>(file: BufReader<T>, bytes: bool, spaces: bool, width: usize) {
-    for line_result in file.lines() {
-        let mut line = safe_unwrap!(line_result);
+/// Fold `file` to fit `width` (number of columns), counting all characters as
+/// one column.
+///
+/// This function handles folding for the `-b`/`--bytes` option, counting
+/// tab, backspace, and carriage return as occupying one column, identically
+/// to all other characters in the stream.
+///
+///  If `spaces` is `true`, attempt to break lines at whitespace boundaries.
+fn fold_file_bytewise<T: Read>(mut file: BufReader<T>, spaces: bool, width: usize) {
+    let mut line = String::new();
 
-        if line.is_empty() {
+    loop {
+        if let Ok(0) = file.read_line(&mut line) {
+            break;
+        }
+
+        if line == "\n" {
             println!();
-        } else if bytes {
-            let len = line.len();
-            let mut i = 0;
-            while i < len {
-                let width = if len - i >= width { width } else { len - i };
-                let slice = {
-                    let slice = &line[i..i + width];
-                    if spaces && i + width < len {
-                        match slice.rfind(char::is_whitespace) {
-                            Some(m) => &slice[..=m],
-                            None => slice,
-                        }
-                    } else {
-                        slice
+            line.truncate(0);
+            continue;
+        }
+
+        let len = line.len();
+        let mut i = 0;
+
+        while i < len {
+            let width = if len - i >= width { width } else { len - i };
+            let slice = {
+                let slice = &line[i..i + width];
+                if spaces && i + width < len {
+                    match slice.rfind(char::is_whitespace) {
+                        Some(m) => &slice[..=m],
+                        None => slice,
                     }
-                };
-                print!("{}", slice);
-                i += slice.len();
+                } else {
+                    slice
+                }
+            };
+
+            // Don't duplicate trailing newlines: if the slice is "\n", the
+            // previous iteration folded just before the end of the line and
+            // has already printed this newline.
+            if slice == "\n" {
+                break;
             }
-        } else {
-            let mut len = line.chars().count();
-            let newline = line.ends_with('\n');
-            if newline {
-                if len == 1 {
-                    println!();
+
+            i += slice.len();
+
+            let at_eol = i >= len;
+
+            if at_eol {
+                print!("{}", slice);
+            } else {
+                println!("{}", slice);
+            }
+        }
+
+        line.truncate(0);
+    }
+}
+
+/// Fold `file` to fit `width` (number of columns).
+///
+/// By default `fold` treats tab, backspace, and carriage return specially:
+/// tab characters count as 8 columns, backspace decreases the
+/// column count, and carriage return resets the column count to 0.
+///
+/// If `spaces` is `true`, attempt to break lines at whitespace boundaries.
+#[allow(unused_assignments)]
+fn fold_file<T: Read>(mut file: BufReader<T>, spaces: bool, width: usize) {
+    let mut line = String::new();
+    let mut output = String::new();
+    let mut col_count = 0;
+    let mut char_count = 0;
+    let mut last_space = None;
+
+    /// Print the output line, resetting the column and character counts.
+    ///
+    /// If `spaces` is `true`, print the output line up to the last
+    /// encountered whitespace character (inclusive) and set the remaining
+    /// characters as the start of the next line.
+    macro_rules! emit_output {
+        () => {
+            let consume = match last_space {
+                Some(i) => i + 1,
+                None => output.len(),
+            };
+
+            println!("{}", &output[..consume]);
+            output.replace_range(..consume, "");
+            char_count = output.len();
+
+            // we know there are no tabs left in output, so each char counts
+            // as 1 column
+            col_count = char_count;
+
+            last_space = None;
+        };
+    }
+
+    loop {
+        if let Ok(0) = file.read_line(&mut line) {
+            break;
+        }
+
+        for ch in line.chars() {
+            if ch == '\n' {
+                // make sure to _not_ split output at whitespace, since we
+                // know the entire output will fit
+                last_space = None;
+                emit_output!();
+                break;
+            }
+
+            if col_count >= width {
+                emit_output!();
+            }
+
+            match ch {
+                '\t' => {
+                    if col_count + 8 > width && !output.is_empty() {
+                        emit_output!();
+                    }
+                    col_count += 8;
+                    last_space = Some(char_count);
+                }
+                '\x08' => {
+                    // FIXME: does not match GNU's handling of backspace
+                    if col_count > 0 {
+                        col_count -= 1;
+                        char_count -= 1;
+                        output.truncate(char_count);
+                    }
                     continue;
                 }
-                len -= 1;
-                line.truncate(len);
-            }
-            let mut output = String::new();
-            let mut count = 0;
-            for (i, ch) in line.chars().enumerate() {
-                if count >= width {
-                    let (val, ncount) = {
-                        let slice = &output[..];
-                        let (out, val, ncount) = if spaces && i + 1 < len {
-                            match rfind_whitespace(slice) {
-                                Some(m) => {
-                                    let routput = &slice[m + 1..slice.chars().count()];
-                                    let ncount = routput.chars().fold(0, |out, ch: char| {
-                                        out + match ch {
-                                            '\t' => 8,
-                                            '\x08' => {
-                                                if out > 0 {
-                                                    !0
-                                                } else {
-                                                    0
-                                                }
-                                            }
-                                            '\r' => return 0,
-                                            _ => 1,
-                                        }
-                                    });
-                                    (&slice[0..=m], routput, ncount)
-                                }
-                                None => (slice, "", 0),
-                            }
-                        } else {
-                            (slice, "", 0)
-                        };
-                        println!("{}", out);
-                        (val.to_owned(), ncount)
-                    };
-                    output = val;
-                    count = ncount;
+                '\r' => {
+                    // FIXME: does not match GNU's handling of carriage return
+                    output.truncate(0);
+                    col_count = 0;
+                    char_count = 0;
+                    continue;
                 }
-                match ch {
-                    '\t' => {
-                        count += 8;
-                        if count > width {
-                            println!("{}", output);
-                            output.truncate(0);
-                            count = 8;
-                        }
-                    }
-                    '\x08' => {
-                        if count > 0 {
-                            count -= 1;
-                            let len = output.len() - 1;
-                            output.truncate(len);
-                        }
-                        continue;
-                    }
-                    '\r' => {
-                        output.truncate(0);
-                        count = 0;
-                        continue;
-                    }
-                    _ => count += 1,
-                };
-                output.push(ch);
-            }
-            if count > 0 {
-                println!("{}", output);
-            }
-        }
-    }
-}
+                _ if spaces && ch.is_whitespace() => {
+                    last_space = Some(char_count);
+                    col_count += 1
+                }
+                _ => col_count += 1,
+            };
 
-#[inline]
-fn rfind_whitespace(slice: &str) -> Option<usize> {
-    for (i, ch) in slice.chars().rev().enumerate() {
-        if ch.is_whitespace() {
-            return Some(slice.chars().count() - (i + 1));
+            output.push(ch);
+            char_count += 1;
         }
+
+        if col_count > 0 {
+            print!("{}", output);
+            output.truncate(0);
+        }
+
+        line.truncate(0);
     }
-    None
 }
diff --git a/tests/by-util/test_fold.rs b/tests/by-util/test_fold.rs
index 64d77cd2b..52e630e5b 100644
--- a/tests/by-util/test_fold.rs
+++ b/tests/by-util/test_fold.rs
@@ -32,6 +32,24 @@ fn test_default_wrap_with_newlines() {
         .stdout_is_fixture("lorem_ipsum_new_line_80_column.expected");
 }
 
+#[test]
+fn test_should_preserve_empty_line_without_final_newline() {
+    new_ucmd!()
+        .arg("-w2")
+        .pipe_in("12\n\n34")
+        .succeeds()
+        .stdout_is("12\n\n34");
+}
+
+#[test]
+fn test_should_preserve_empty_line_and_final_newline() {
+    new_ucmd!()
+        .arg("-w2")
+        .pipe_in("12\n\n34\n")
+        .succeeds()
+        .stdout_is("12\n\n34\n");
+}
+
 #[test]
 fn test_should_preserve_empty_lines() {
     new_ucmd!().pipe_in("\n").succeeds().stdout_is("\n");
@@ -57,3 +75,262 @@ fn test_word_boundary_split_should_preserve_empty_lines() {
         .succeeds()
         .stdout_is("0\n1\n\n2\n\n\n");
 }
+
+#[test]
+fn test_should_not_add_newline_when_line_less_than_fold() {
+    new_ucmd!().pipe_in("1234").succeeds().stdout_is("1234");
+}
+
+#[test]
+fn test_should_not_add_newline_when_line_longer_than_fold() {
+    new_ucmd!()
+        .arg("-w2")
+        .pipe_in("1234")
+        .succeeds()
+        .stdout_is("12\n34");
+}
+
+#[test]
+fn test_should_not_add_newline_when_line_equal_to_fold() {
+    new_ucmd!()
+        .arg("-w1")
+        .pipe_in(" ")
+        .succeeds()
+        .stdout_is(" ");
+}
+
+#[test]
+fn test_should_preserve_final_newline_when_line_less_than_fold() {
+    new_ucmd!().pipe_in("1234\n").succeeds().stdout_is("1234\n");
+}
+
+#[test]
+fn test_should_preserve_final_newline_when_line_longer_than_fold() {
+    new_ucmd!()
+        .arg("-w2")
+        .pipe_in("1234\n")
+        .succeeds()
+        .stdout_is("12\n34\n");
+}
+
+#[test]
+fn test_should_preserve_final_newline_when_line_equal_to_fold() {
+    new_ucmd!()
+        .arg("-w2")
+        .pipe_in("1\n")
+        .succeeds()
+        .stdout_is("1\n");
+}
+
+#[test]
+fn test_single_tab_should_not_add_extra_newline() {
+    new_ucmd!()
+        .arg("-w1")
+        .pipe_in("\t")
+        .succeeds()
+        .stdout_is("\t");
+}
+
+#[test]
+fn test_tab_counts_as_8_columns() {
+    new_ucmd!()
+        .arg("-w8")
+        .pipe_in("\t1")
+        .succeeds()
+        .stdout_is("\t\n1");
+}
+
+#[test]
+fn test_fold_at_word_boundary() {
+    new_ucmd!()
+        .args(&["-w4", "-s"])
+        .pipe_in("one two")
+        .succeeds()
+        .stdout_is("one \ntwo");
+}
+
+#[test]
+fn test_fold_at_leading_word_boundary() {
+    new_ucmd!()
+        .args(&["-w3", "-s"])
+        .pipe_in(" aaa")
+        .succeeds()
+        .stdout_is(" \naaa");
+}
+
+#[test]
+fn test_fold_at_word_boundary_preserve_final_newline() {
+    new_ucmd!()
+        .args(&["-w4", "-s"])
+        .pipe_in("one two\n")
+        .succeeds()
+        .stdout_is("one \ntwo\n");
+}
+
+#[test]
+fn test_fold_at_tab_as_word_boundary() {
+    new_ucmd!()
+        .args(&["-w10", "-s"])
+        .pipe_in("a\tbbb\n")
+        .succeeds()
+        .stdout_is("a\t\nbbb\n");
+}
+
+#[test]
+fn test_fold_at_word_boundary_only_whitespace() {
+    new_ucmd!()
+        .args(&["-w2", "-s"])
+        .pipe_in("    ")
+        .succeeds()
+        .stdout_is("  \n  ");
+}
+
+#[test]
+fn test_fold_at_word_boundary_only_whitespace_preserve_final_newline() {
+    new_ucmd!()
+        .args(&["-w2", "-s"])
+        .pipe_in("    \n")
+        .succeeds()
+        .stdout_is("  \n  \n");
+}
+
+//
+// bytewise tests
+
+#[test]
+fn test_bytewise_should_preserve_empty_line_without_final_newline() {
+    new_ucmd!()
+        .args(&["-w2", "-b"])
+        .pipe_in("123\n\n45")
+        .succeeds()
+        .stdout_is("12\n3\n\n45");
+}
+
+#[test]
+fn test_bytewise_should_preserve_empty_line_and_final_newline() {
+    new_ucmd!()
+        .args(&["-w2", "-b"])
+        .pipe_in("12\n\n34\n")
+        .succeeds()
+        .stdout_is("12\n\n34\n");
+}
+
+#[test]
+fn test_bytewise_should_preserve_empty_lines() {
+    new_ucmd!()
+        .arg("-b")
+        .pipe_in("\n")
+        .succeeds()
+        .stdout_is("\n");
+
+    new_ucmd!()
+        .args(&["-w1", "-b"])
+        .pipe_in("0\n1\n\n2\n\n\n")
+        .succeeds()
+        .stdout_is("0\n1\n\n2\n\n\n");
+}
+
+#[test]
+fn test_bytewise_word_boundary_split_should_preserve_empty_lines() {
+    new_ucmd!()
+        .args(&["-s", "-b"])
+        .pipe_in("\n")
+        .succeeds()
+        .stdout_is("\n");
+
+    new_ucmd!()
+        .args(&["-w1", "-s", "-b"])
+        .pipe_in("0\n1\n\n2\n\n\n")
+        .succeeds()
+        .stdout_is("0\n1\n\n2\n\n\n");
+}
+
+#[test]
+fn test_bytewise_should_not_add_newline_when_line_less_than_fold() {
+    new_ucmd!()
+        .arg("-b")
+        .pipe_in("1234")
+        .succeeds()
+        .stdout_is("1234");
+}
+
+#[test]
+fn test_bytewise_should_not_add_newline_when_line_longer_than_fold() {
+    new_ucmd!()
+        .args(&["-w2", "-b"])
+        .pipe_in("1234")
+        .succeeds()
+        .stdout_is("12\n34");
+}
+
+#[test]
+fn test_bytewise_should_not_add_newline_when_line_equal_to_fold() {
+    new_ucmd!()
+        .args(&["-w1", "-b"])
+        .pipe_in(" ")
+        .succeeds()
+        .stdout_is(" ");
+}
+
+#[test]
+fn test_bytewise_should_preserve_final_newline_when_line_less_than_fold() {
+    new_ucmd!()
+        .arg("-b")
+        .pipe_in("1234\n")
+        .succeeds()
+        .stdout_is("1234\n");
+}
+
+#[test]
+fn test_bytewise_should_preserve_final_newline_when_line_longer_than_fold() {
+    new_ucmd!()
+        .args(&["-w2", "-b"])
+        .pipe_in("1234\n")
+        .succeeds()
+        .stdout_is("12\n34\n");
+}
+
+#[test]
+fn test_bytewise_should_preserve_final_newline_when_line_equal_to_fold() {
+    new_ucmd!()
+        .args(&["-w2", "-b"])
+        .pipe_in("1\n")
+        .succeeds()
+        .stdout_is("1\n");
+}
+
+#[test]
+fn test_bytewise_single_tab_should_not_add_extra_newline() {
+    new_ucmd!()
+        .args(&["-w1", "-b"])
+        .pipe_in("\t")
+        .succeeds()
+        .stdout_is("\t");
+}
+
+#[test]
+fn test_tab_counts_as_one_byte() {
+    new_ucmd!()
+        .args(&["-w2", "-b"])
+        .pipe_in("1\t2\n")
+        .succeeds()
+        .stdout_is("1\t\n2\n");
+}
+
+#[test]
+fn test_bytewise_fold_at_word_boundary_only_whitespace() {
+    new_ucmd!()
+        .args(&["-w2", "-s", "-b"])
+        .pipe_in("    ")
+        .succeeds()
+        .stdout_is("  \n  ");
+}
+
+#[test]
+fn test_bytewise_fold_at_word_boundary_only_whitespace_preserve_final_newline() {
+    new_ucmd!()
+        .args(&["-w2", "-s", "-b"])
+        .pipe_in("    \n")
+        .succeeds()
+        .stdout_is("  \n  \n");
+}