From 233a7789634aa0c5f4fb2fc69e01411ed23e42c7 Mon Sep 17 00:00:00 2001
From: Michael Debertol <michael.debertol@gmail.com>
Date: Sun, 27 Jun 2021 12:52:46 +0200
Subject: [PATCH] sort/ls: implement version cmp matching GNU spec

This reimplements version_cmp, which is used in sort and ls to sort
according to versions.
However, it is not bug-for-bug identical with GNU's implementation.
I reported a bug with GNU here:
https://lists.gnu.org/archive/html/bug-coreutils/2021-06/msg00045.html
This implementation does not contain the bugs regarding the handling of
file extensions and null bytes.
---
 Cargo.lock                                    |  16 -
 src/uu/ls/src/ls.rs                           |   7 +-
 src/uu/ls/src/version_cmp.rs                  | 306 ---------------
 src/uu/sort/Cargo.toml                        |   1 -
 src/uu/sort/src/sort.rs                       |  31 +-
 src/uucore/src/lib/lib.rs                     |   1 +
 src/uucore/src/lib/mods.rs                    |   1 +
 src/uucore/src/lib/mods/version_cmp.rs        | 361 ++++++++++++++++++
 tests/by-util/test_sort.rs                    |   6 +-
 .../sort/version-empty-lines.expected         |   4 +
 .../sort/version-empty-lines.expected.debug   |  45 +++
 tests/fixtures/sort/version-empty-lines.txt   |   4 +
 12 files changed, 423 insertions(+), 360 deletions(-)
 delete mode 100644 src/uu/ls/src/version_cmp.rs
 create mode 100644 src/uucore/src/lib/mods/version_cmp.rs
 create mode 100644 tests/fixtures/sort/version-empty-lines.expected.debug
diff --git a/Cargo.lock b/Cargo.lock
index 51424332d..e2dd7d5f3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1434,21 +1434,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
 
-[[package]]
-name = "semver"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
-dependencies = [
- "semver-parser",
-]
-
-[[package]]
-name = "semver-parser"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
-
 [[package]]
 name = "sha1"
 version = "0.6.0"
@@ -2488,7 +2473,6 @@ dependencies = [
  "ouroboros",
  "rand 0.7.3",
  "rayon",
- "semver",
  "tempfile",
  "unicode-width",
  "uucore",
diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs
index 6ca3f4bbe..b866253a9 100644
--- a/src/uu/ls/src/ls.rs
+++ b/src/uu/ls/src/ls.rs
@@ -14,7 +14,6 @@ extern crate uucore;
 extern crate lazy_static;
 
 mod quoting_style;
-mod version_cmp;
 
 use clap::{crate_version, App, Arg};
 use globset::{self, Glob, GlobSet, GlobSetBuilder};
@@ -44,6 +43,7 @@ use term_grid::{Cell, Direction, Filling, Grid, GridOptions};
 use unicode_width::UnicodeWidthStr;
 #[cfg(unix)]
 use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR};
+use uucore::{fs::display_permissions, version_cmp::version_cmp};
 
 static ABOUT: &str = "
  By default, ls will list the files and contents of any directories on
@@ -1256,7 +1256,8 @@ fn sort_entries(entries: &mut Vec<PathData>, config: &Config) {
         }
         // The default sort in GNU ls is case insensitive
         Sort::Name => entries.sort_by(|a, b| a.display_name.cmp(&b.display_name)),
-        Sort::Version => entries.sort_by(|a, b| version_cmp::version_cmp(&a.p_buf, &b.p_buf)),
+        Sort::Version => entries
+            .sort_by(|a, b| version_cmp(&a.p_buf.to_string_lossy(), &b.p_buf.to_string_lossy())),
         Sort::Extension => entries.sort_by(|a, b| {
             a.p_buf
                 .extension()
@@ -1467,8 +1468,6 @@ fn display_grid(
     }
 }
 
-use uucore::fs::display_permissions;
-
 fn display_item_long(
     item: &PathData,
     max_links: usize,
diff --git a/src/uu/ls/src/version_cmp.rs b/src/uu/ls/src/version_cmp.rs
deleted file mode 100644
index e3f7e29e3..000000000
--- a/src/uu/ls/src/version_cmp.rs
+++ /dev/null
@@ -1,306 +0,0 @@
-use std::cmp::Ordering;
-use std::path::Path;
-
-/// Compare paths in a way that matches the GNU version sort, meaning that
-/// numbers get sorted in a natural way.
-pub(crate) fn version_cmp(a: &Path, b: &Path) -> Ordering {
-    let a_string = a.to_string_lossy();
-    let b_string = b.to_string_lossy();
-    let mut a = a_string.chars().peekable();
-    let mut b = b_string.chars().peekable();
-
-    // The order determined from the number of leading zeroes.
-    // This is used if the filenames are equivalent up to leading zeroes.
-    let mut leading_zeroes = Ordering::Equal;
-
-    loop {
-        match (a.next(), b.next()) {
-            // If the characters are both numerical. We collect the rest of the number
-            // and parse them to u64's and compare them.
-            (Some(a_char @ '0'..='9'), Some(b_char @ '0'..='9')) => {
-                let mut a_leading_zeroes = 0;
-                if a_char == '0' {
-                    a_leading_zeroes = 1;
-                    while let Some('0') = a.peek() {
-                        a_leading_zeroes += 1;
-                        a.next();
-                    }
-                }
-
-                let mut b_leading_zeroes = 0;
-                if b_char == '0' {
-                    b_leading_zeroes = 1;
-                    while let Some('0') = b.peek() {
-                        b_leading_zeroes += 1;
-                        b.next();
-                    }
-                }
-                // The first different number of leading zeros determines the order
-                // so if it's already been determined by a previous number, we leave
-                // it as that ordering.
-                // It's b.cmp(&a), because the *largest* number of leading zeros
-                // should go first
-                if leading_zeroes == Ordering::Equal {
-                    leading_zeroes = b_leading_zeroes.cmp(&a_leading_zeroes);
-                }
-
-                let mut a_str = String::new();
-                let mut b_str = String::new();
-                if a_char != '0' {
-                    a_str.push(a_char);
-                }
-                if b_char != '0' {
-                    b_str.push(b_char);
-                }
-
-                // Unwrapping here is fine because we only call next if peek returns
-                // Some(_), so next should also return Some(_).
-                while let Some('0'..='9') = a.peek() {
-                    a_str.push(a.next().unwrap());
-                }
-
-                while let Some('0'..='9') = b.peek() {
-                    b_str.push(b.next().unwrap());
-                }
-
-                // Since the leading zeroes are stripped, the length can be
-                // used to compare the numbers.
-                match a_str.len().cmp(&b_str.len()) {
-                    Ordering::Equal => {}
-                    x => return x,
-                }
-
-                // At this point, leading zeroes are stripped and the lengths
-                // are equal, meaning that the strings can be compared using
-                // the standard compare function.
-                match a_str.cmp(&b_str) {
-                    Ordering::Equal => {}
-                    x => return x,
-                }
-            }
-            // If there are two characters we just compare the characters
-            (Some(a_char), Some(b_char)) => match a_char.cmp(&b_char) {
-                Ordering::Equal => {}
-                x => return x,
-            },
-            // Otherwise, we compare the options (because None < Some(_))
-            (a_opt, b_opt) => match a_opt.cmp(&b_opt) {
-                // If they are completely equal except for leading zeroes, we use the leading zeroes.
-                Ordering::Equal => return leading_zeroes,
-                x => return x,
-            },
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::version_cmp::version_cmp;
-    use std::cmp::Ordering;
-    use std::path::PathBuf;
-    #[test]
-    fn test_version_cmp() {
-        // Identical strings
-        assert_eq!(
-            version_cmp(&PathBuf::from("hello"), &PathBuf::from("hello")),
-            Ordering::Equal
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("file12"), &PathBuf::from("file12")),
-            Ordering::Equal
-        );
-
-        assert_eq!(
-            version_cmp(
-                &PathBuf::from("file12-suffix"),
-                &PathBuf::from("file12-suffix")
-            ),
-            Ordering::Equal
-        );
-
-        assert_eq!(
-            version_cmp(
-                &PathBuf::from("file12-suffix24"),
-                &PathBuf::from("file12-suffix24")
-            ),
-            Ordering::Equal
-        );
-
-        // Shortened names
-        assert_eq!(
-            version_cmp(&PathBuf::from("world"), &PathBuf::from("wo")),
-            Ordering::Greater,
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("hello10wo"), &PathBuf::from("hello10world")),
-            Ordering::Less,
-        );
-
-        // Simple names
-        assert_eq!(
-            version_cmp(&PathBuf::from("world"), &PathBuf::from("hello")),
-            Ordering::Greater,
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("hello"), &PathBuf::from("world")),
-            Ordering::Less
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("apple"), &PathBuf::from("ant")),
-            Ordering::Greater
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("ant"), &PathBuf::from("apple")),
-            Ordering::Less
-        );
-
-        // Uppercase letters
-        assert_eq!(
-            version_cmp(&PathBuf::from("Beef"), &PathBuf::from("apple")),
-            Ordering::Less,
-            "Uppercase letters are sorted before all lowercase letters"
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("Apple"), &PathBuf::from("apple")),
-            Ordering::Less
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("apple"), &PathBuf::from("aPple")),
-            Ordering::Greater
-        );
-
-        // Numbers
-        assert_eq!(
-            version_cmp(&PathBuf::from("100"), &PathBuf::from("20")),
-            Ordering::Greater,
-            "Greater numbers are greater even if they start with a smaller digit",
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("20"), &PathBuf::from("20")),
-            Ordering::Equal,
-            "Equal numbers are equal"
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("15"), &PathBuf::from("200")),
-            Ordering::Less,
-            "Small numbers are smaller"
-        );
-
-        // Comparing numbers with other characters
-        assert_eq!(
-            version_cmp(&PathBuf::from("1000"), &PathBuf::from("apple")),
-            Ordering::Less,
-            "Numbers are sorted before other characters"
-        );
-
-        assert_eq!(
-            // spell-checker:disable-next-line
-            version_cmp(&PathBuf::from("file1000"), &PathBuf::from("fileapple")),
-            Ordering::Less,
-            "Numbers in the middle of the name are sorted before other characters"
-        );
-
-        // Leading zeroes
-        assert_eq!(
-            version_cmp(&PathBuf::from("012"), &PathBuf::from("12")),
-            Ordering::Less,
-            "A single leading zero can make a difference"
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("000800"), &PathBuf::from("0000800")),
-            Ordering::Greater,
-            "Leading number of zeroes is used even if both non-zero number of zeros"
-        );
-
-        // Numbers and other characters combined
-        assert_eq!(
-            version_cmp(&PathBuf::from("ab10"), &PathBuf::from("aa11")),
-            Ordering::Greater
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("aa10"), &PathBuf::from("aa11")),
-            Ordering::Less,
-            "Numbers after other characters are handled correctly."
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("aa2"), &PathBuf::from("aa100")),
-            Ordering::Less,
-            "Numbers after alphabetical characters are handled correctly."
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("aa10bb"), &PathBuf::from("aa11aa")),
-            Ordering::Less,
-            "Number is used even if alphabetical characters after it differ."
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa11aa1")),
-            Ordering::Less,
-            "Second number is ignored if the first number differs."
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa10aa1")),
-            Ordering::Greater,
-            "Second number is used if the rest is equal."
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa00010aa1")),
-            Ordering::Greater,
-            "Second number is used if the rest is equal up to leading zeroes of the first number."
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("aa10aa0022"), &PathBuf::from("aa010aa022")),
-            Ordering::Greater,
-            "The leading zeroes of the first number has priority."
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("aa10aa0022"), &PathBuf::from("aa10aa022")),
-            Ordering::Less,
-            "The leading zeroes of other numbers than the first are used."
-        );
-
-        assert_eq!(
-            version_cmp(&PathBuf::from("file-1.4"), &PathBuf::from("file-1.13")),
-            Ordering::Less,
-            "Periods are handled as normal text, not as a decimal point."
-        );
-
-        // Greater than u64::Max
-        // u64 == 18446744073709551615 so this should be plenty:
-        //        20000000000000000000000
-        assert_eq!(
-            version_cmp(
-                &PathBuf::from("aa2000000000000000000000bb"),
-                &PathBuf::from("aa002000000000000000000001bb")
-            ),
-            Ordering::Less,
-            "Numbers larger than u64::MAX are handled correctly without crashing"
-        );
-
-        assert_eq!(
-        version_cmp(
-            &PathBuf::from("aa2000000000000000000000bb"),
-            &PathBuf::from("aa002000000000000000000000bb")
-        ),
-        Ordering::Greater,
-        "Leading zeroes for numbers larger than u64::MAX are handled correctly without crashing"
-    );
-    }
-}
diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml
index f06610248..d454179b0 100644
--- a/src/uu/sort/Cargo.toml
+++ b/src/uu/sort/Cargo.toml
@@ -24,7 +24,6 @@ memchr = "2.4.0"
 ouroboros = "0.9.3"
 rand = "0.7"
 rayon = "1.5"
-semver = "0.9.0"
 tempfile = "3"
 unicode-width = "0.1.8"
 uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] }
diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs
index fb0241945..821def70d 100644
--- a/src/uu/sort/src/sort.rs
+++ b/src/uu/sort/src/sort.rs
@@ -32,7 +32,6 @@ use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoPa
 use rand::distributions::Alphanumeric;
 use rand::{thread_rng, Rng};
 use rayon::prelude::*;
-use semver::Version;
 use std::cmp::Ordering;
 use std::env;
 use std::ffi::OsStr;
@@ -44,6 +43,7 @@ use std::path::Path;
 use std::path::PathBuf;
 use unicode_width::UnicodeWidthStr;
 use uucore::parse_size::{parse_size, ParseSizeError};
+use uucore::version_cmp::version_cmp;
 use uucore::InvalidEncodingHandling;
 
 const NAME: &str = "sort";
@@ -1410,7 +1410,7 @@ fn compare_by<'a>(
                 general_numeric_compare(a_float, b_float)
             }
             SortMode::Month => month_compare(a_str, b_str),
-            SortMode::Version => version_compare(a_str, b_str),
+            SortMode::Version => version_cmp(a_str, b_str),
             SortMode::Default => custom_str_cmp(
                 a_str,
                 b_str,
@@ -1615,31 +1615,6 @@ fn month_compare(a: &str, b: &str) -> Ordering {
     }
 }
 
-fn version_parse(a: &str) -> Version {
-    let result = Version::parse(a);
-
-    match result {
-        Ok(vers_a) => vers_a,
-        // Non-version lines parse to 0.0.0
-        Err(_e) => Version::parse("0.0.0").unwrap(),
-    }
-}
-
-fn version_compare(a: &str, b: &str) -> Ordering {
-    #![allow(clippy::comparison_chain)]
-    let ver_a = version_parse(a);
-    let ver_b = version_parse(b);
-
-    // Version::cmp is not implemented; implement comparison directly
-    if ver_a > ver_b {
-        Ordering::Greater
-    } else if ver_a < ver_b {
-        Ordering::Less
-    } else {
-        Ordering::Equal
-    }
-}
-
 fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(iter: T, settings: &GlobalSettings) {
     let mut writer = settings.out_writer();
     for line in iter {
@@ -1712,7 +1687,7 @@ mod tests {
         let a = "1.2.3-alpha2";
         let b = "1.4.0";
 
-        assert_eq!(Ordering::Less, version_compare(a, b));
+        assert_eq!(Ordering::Less, version_cmp(a, b));
     }
 
     #[test]
diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs
index bf2e5b1bb..ac505e42f 100644
--- a/src/uucore/src/lib/lib.rs
+++ b/src/uucore/src/lib/lib.rs
@@ -30,6 +30,7 @@ pub use crate::mods::coreopts;
 pub use crate::mods::os;
 pub use crate::mods::panic;
 pub use crate::mods::ranges;
+pub use crate::mods::version_cmp;
 
 // * string parsing modules
 pub use crate::parser::parse_size;
diff --git a/src/uucore/src/lib/mods.rs b/src/uucore/src/lib/mods.rs
index 2689361a0..f81d05ed9 100644
--- a/src/uucore/src/lib/mods.rs
+++ b/src/uucore/src/lib/mods.rs
@@ -5,3 +5,4 @@ pub mod coreopts;
 pub mod os;
 pub mod panic;
 pub mod ranges;
+pub mod version_cmp;
diff --git a/src/uucore/src/lib/mods/version_cmp.rs b/src/uucore/src/lib/mods/version_cmp.rs
new file mode 100644
index 000000000..99b8c8b40
--- /dev/null
+++ b/src/uucore/src/lib/mods/version_cmp.rs
@@ -0,0 +1,361 @@
+use std::cmp::Ordering;
+
+/// Compares the non-digit parts of a version.
+/// Special cases: ~ are before everything else, even ends ("a~" < "a")
+/// Letters are before non-letters
+fn version_non_digit_cmp(a: &str, b: &str) -> Ordering {
+    let mut a_chars = a.chars();
+    let mut b_chars = b.chars();
+    loop {
+        match (a_chars.next(), b_chars.next()) {
+            (Some(c1), Some(c2)) if c1 == c2 => {}
+            (None, None) => return Ordering::Equal,
+            (_, Some('~')) => return Ordering::Greater,
+            (Some('~'), _) => return Ordering::Less,
+            (None, Some(_)) => return Ordering::Less,
+            (Some(_), None) => return Ordering::Greater,
+            (Some(c1), Some(c2)) if c1.is_ascii_alphabetic() && !c2.is_ascii_alphabetic() => {
+                return Ordering::Less
+            }
+            (Some(c1), Some(c2)) if !c1.is_ascii_alphabetic() && c2.is_ascii_alphabetic() => {
+                return Ordering::Greater
+            }
+            (Some(c1), Some(c2)) => return c1.cmp(&c2),
+        }
+    }
+}
+
+/// Remove file endings matching the regex (\.[A-Za-z~][A-Za-z0-9~]*)*$
+fn remove_file_ending(a: &str) -> &str {
+    let mut ending_start = None;
+    let mut prev_was_dot = false;
+    for (idx, char) in a.char_indices() {
+        if char == '.' {
+            if ending_start.is_none() || prev_was_dot {
+                ending_start = Some(idx);
+            }
+            prev_was_dot = true;
+        } else if prev_was_dot {
+            prev_was_dot = false;
+            if !char.is_ascii_alphabetic() && char != '~' {
+                ending_start = None;
+            }
+        } else if !char.is_ascii_alphanumeric() && char != '~' {
+            ending_start = None;
+        }
+    }
+    if prev_was_dot {
+        ending_start = None;
+    }
+    if let Some(ending_start) = ending_start {
+        &a[..ending_start]
+    } else {
+        a
+    }
+}
+
+pub fn version_cmp(mut a: &str, mut b: &str) -> Ordering {
+    let str_cmp = a.cmp(b);
+    if str_cmp == Ordering::Equal {
+        return str_cmp;
+    }
+
+    // Special cases:
+    // 1. Empty strings
+    match (a.is_empty(), b.is_empty()) {
+        (true, false) => return Ordering::Less,
+        (false, true) => return Ordering::Greater,
+        (true, true) => unreachable!(),
+        (false, false) => {}
+    }
+    // 2. Dots
+    match (a == ".", b == ".") {
+        (true, false) => return Ordering::Less,
+        (false, true) => return Ordering::Greater,
+        (true, true) => unreachable!(),
+        (false, false) => {}
+    }
+    // 3. Two Dots
+    match (a == "..", b == "..") {
+        (true, false) => return Ordering::Less,
+        (false, true) => return Ordering::Greater,
+        (true, true) => unreachable!(),
+        (false, false) => {}
+    }
+    // 4. Strings starting with a dot
+    match (a.starts_with('.'), b.starts_with('.')) {
+        (true, false) => return Ordering::Less,
+        (false, true) => return Ordering::Greater,
+        (true, true) => {
+            // Strip the leading dot for later comparisons
+            a = &a[1..];
+            b = &b[1..];
+        }
+        _ => {}
+    }
+
+    // Try to strip file extensions
+    let (mut a, mut b) = match (remove_file_ending(a), remove_file_ending(b)) {
+        (a_stripped, b_stripped) if a_stripped == b_stripped => {
+            // If both would be the same after stripping file extensions, don't strip them.
+            (a, b)
+        }
+        stripped => stripped,
+    };
+
+    // 1. Compare leading non-numerical part
+    // 2. Compare leading numerical part
+    // 3. Repeat
+    loop {
+        let a_numerical_start = a.find(|c: char| c.is_ascii_digit()).unwrap_or(a.len());
+        let b_numerical_start = b.find(|c: char| c.is_ascii_digit()).unwrap_or(b.len());
+
+        let a_str = &a[..a_numerical_start];
+        let b_str = &b[..b_numerical_start];
+
+        match version_non_digit_cmp(a_str, b_str) {
+            Ordering::Equal => {}
+            ord => return ord,
+        }
+
+        a = &a[a_numerical_start..];
+        b = &b[a_numerical_start..];
+
+        let a_numerical_end = a.find(|c: char| !c.is_ascii_digit()).unwrap_or(a.len());
+        let b_numerical_end = b.find(|c: char| !c.is_ascii_digit()).unwrap_or(b.len());
+
+        let a_str = a[..a_numerical_end].trim_start_matches('0');
+        let b_str = b[..b_numerical_end].trim_start_matches('0');
+
+        match a_str.len().cmp(&b_str.len()) {
+            Ordering::Equal => {}
+            ord => return ord,
+        }
+
+        match a_str.cmp(b_str) {
+            Ordering::Equal => {}
+            ord => return ord,
+        }
+
+        a = &a[a_numerical_end..];
+        b = &b[b_numerical_end..];
+
+        if a.is_empty() && b.is_empty() {
+            // Default to the lexical comparison.
+            return str_cmp;
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::version_cmp::version_cmp;
+    use std::cmp::Ordering;
+    #[test]
+    fn test_version_cmp() {
+        // Identical strings
+        assert_eq!(version_cmp("hello", "hello"), Ordering::Equal);
+
+        assert_eq!(version_cmp("file12", "file12"), Ordering::Equal);
+
+        assert_eq!(
+            version_cmp("file12-suffix", "file12-suffix"),
+            Ordering::Equal
+        );
+
+        assert_eq!(
+            version_cmp("file12-suffix24", "file12-suffix24"),
+            Ordering::Equal
+        );
+
+        // Shortened names
+        assert_eq!(version_cmp("world", "wo"), Ordering::Greater,);
+
+        assert_eq!(version_cmp("hello10wo", "hello10world"), Ordering::Less,);
+
+        // Simple names
+        assert_eq!(version_cmp("world", "hello"), Ordering::Greater,);
+
+        assert_eq!(version_cmp("hello", "world"), Ordering::Less);
+
+        assert_eq!(version_cmp("apple", "ant"), Ordering::Greater);
+
+        assert_eq!(version_cmp("ant", "apple"), Ordering::Less);
+
+        // Uppercase letters
+        assert_eq!(
+            version_cmp("Beef", "apple"),
+            Ordering::Less,
+            "Uppercase letters are sorted before all lowercase letters"
+        );
+
+        assert_eq!(version_cmp("Apple", "apple"), Ordering::Less);
+
+        assert_eq!(version_cmp("apple", "aPple"), Ordering::Greater);
+
+        // Numbers
+        assert_eq!(
+            version_cmp("100", "20"),
+            Ordering::Greater,
+            "Greater numbers are greater even if they start with a smaller digit",
+        );
+
+        assert_eq!(
+            version_cmp("20", "20"),
+            Ordering::Equal,
+            "Equal numbers are equal"
+        );
+
+        assert_eq!(
+            version_cmp("15", "200"),
+            Ordering::Less,
+            "Small numbers are smaller"
+        );
+
+        // Comparing numbers with other characters
+        assert_eq!(
+            version_cmp("1000", "apple"),
+            Ordering::Less,
+            "Numbers are sorted before other characters"
+        );
+
+        assert_eq!(
+            // spell-checker:disable-next-line
+            version_cmp("file1000", "fileapple"),
+            Ordering::Less,
+            "Numbers in the middle of the name are sorted before other characters"
+        );
+
+        // Leading zeroes
+        assert_eq!(
+            version_cmp("012", "12"),
+            Ordering::Less,
+            "A single leading zero can make a difference"
+        );
+
+        assert_eq!(
+            version_cmp("000800", "0000800"),
+            Ordering::Greater,
+            "Leading number of zeroes is used even if both non-zero number of zeros"
+        );
+
+        // Numbers and other characters combined
+        assert_eq!(version_cmp("ab10", "aa11"), Ordering::Greater);
+
+        assert_eq!(
+            version_cmp("aa10", "aa11"),
+            Ordering::Less,
+            "Numbers after other characters are handled correctly."
+        );
+
+        assert_eq!(
+            version_cmp("aa2", "aa100"),
+            Ordering::Less,
+            "Numbers after alphabetical characters are handled correctly."
+        );
+
+        assert_eq!(
+            version_cmp("aa10bb", "aa11aa"),
+            Ordering::Less,
+            "Number is used even if alphabetical characters after it differ."
+        );
+
+        assert_eq!(
+            version_cmp("aa10aa0010", "aa11aa1"),
+            Ordering::Less,
+            "Second number is ignored if the first number differs."
+        );
+
+        assert_eq!(
+            version_cmp("aa10aa0010", "aa10aa1"),
+            Ordering::Greater,
+            "Second number is used if the rest is equal."
+        );
+
+        assert_eq!(
+            version_cmp("aa10aa0010", "aa00010aa1"),
+            Ordering::Greater,
+            "Second number is used if the rest is equal up to leading zeroes of the first number."
+        );
+
+        assert_eq!(
+            version_cmp("aa10aa0022", "aa010aa022"),
+            Ordering::Greater,
+            "The leading zeroes of the first number has priority."
+        );
+
+        assert_eq!(
+            version_cmp("aa10aa0022", "aa10aa022"),
+            Ordering::Less,
+            "The leading zeroes of other numbers than the first are used."
+        );
+
+        assert_eq!(
+            version_cmp("file-1.4", "file-1.13"),
+            Ordering::Less,
+            "Periods are handled as normal text, not as a decimal point."
+        );
+
+        // Greater than u64::Max
+        // u64 == 18446744073709551615 so this should be plenty:
+        //        20000000000000000000000
+        assert_eq!(
+            version_cmp("aa2000000000000000000000bb", "aa002000000000000000000001bb"),
+            Ordering::Less,
+            "Numbers larger than u64::MAX are handled correctly without crashing"
+        );
+
+        assert_eq!(
+            version_cmp("aa2000000000000000000000bb", "aa002000000000000000000000bb"),
+            Ordering::Greater,
+            "Leading zeroes for numbers larger than u64::MAX are \
+            handled correctly without crashing"
+        );
+
+        assert_eq!(
+            version_cmp("  a", "a"),
+            Ordering::Greater,
+            "Whitespace is after letters because letters are before non-letters"
+        );
+
+        assert_eq!(
+            version_cmp("a~", "ab"),
+            Ordering::Less,
+            "A tilde is before other letters"
+        );
+
+        assert_eq!(
+            version_cmp("a~", "a"),
+            Ordering::Less,
+            "A tilde is before the line end"
+        );
+        assert_eq!(
+            version_cmp("~", ""),
+            Ordering::Greater,
+            "A tilde is after the empty string"
+        );
+        assert_eq!(
+            version_cmp(".f", ".1"),
+            Ordering::Greater,
+            "if both start with a dot it is ignored for the comparison"
+        );
+
+        // The following tests are incompatible with GNU as of 2021/06.
+        // I think that's because of a bug in GNU, reported as https://lists.gnu.org/archive/html/bug-coreutils/2021-06/msg00045.html
+        assert_eq!(
+            version_cmp("a..a", "a.+"),
+            Ordering::Less,
+            ".a is stripped before the comparison"
+        );
+        assert_eq!(
+            version_cmp("a.", "a+"),
+            Ordering::Greater,
+            ". is not stripped before the comparison"
+        );
+        assert_eq!(
+            version_cmp("a\0a", "a"),
+            Ordering::Greater,
+            "NULL bytes are handled comparison"
+        );
+    }
+}
diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs
index 3e841f630..2c99c1536 100644
--- a/tests/by-util/test_sort.rs
+++ b/tests/by-util/test_sort.rs
@@ -127,11 +127,7 @@ fn test_months_whitespace() {
 
 #[test]
 fn test_version_empty_lines() {
-    new_ucmd!()
-        .arg("-V")
-        .arg("version-empty-lines.txt")
-        .succeeds()
-        .stdout_is("\n\n\n\n\n\n\n1.2.3-alpha\n1.2.3-alpha2\n\t\t\t1.12.4\n11.2.3\n");
+    test_helper("version-empty-lines", &["-V", "--version-sort"]);
 }
 
 #[test]
diff --git a/tests/fixtures/sort/version-empty-lines.expected b/tests/fixtures/sort/version-empty-lines.expected
index c496c0ff5..69a648966 100644
--- a/tests/fixtures/sort/version-empty-lines.expected
+++ b/tests/fixtures/sort/version-empty-lines.expected
@@ -8,4 +8,8 @@
 1.2.3-alpha
 1.2.3-alpha2
 11.2.3
+bar2
+bar2.0.0
+foo0.1
+foo1.0
 			1.12.4
diff --git a/tests/fixtures/sort/version-empty-lines.expected.debug b/tests/fixtures/sort/version-empty-lines.expected.debug
new file mode 100644
index 000000000..d3f2aaceb
--- /dev/null
+++ b/tests/fixtures/sort/version-empty-lines.expected.debug
@@ -0,0 +1,45 @@
+
+^ no match for key
+^ no match for key
+
+^ no match for key
+^ no match for key
+
+^ no match for key
+^ no match for key
+
+^ no match for key
+^ no match for key
+
+^ no match for key
+^ no match for key
+
+^ no match for key
+^ no match for key
+
+^ no match for key
+^ no match for key
+1.2.3-alpha
+___________
+___________
+1.2.3-alpha2
+____________
+____________
+11.2.3
+______
+______
+bar2
+____
+____
+bar2.0.0
+________
+________
+foo0.1
+______
+______
+foo1.0
+______
+______
+>>>1.12.4
+_________
+_________
diff --git a/tests/fixtures/sort/version-empty-lines.txt b/tests/fixtures/sort/version-empty-lines.txt
index 9b6b89788..fef474259 100644
--- a/tests/fixtures/sort/version-empty-lines.txt
+++ b/tests/fixtures/sort/version-empty-lines.txt
@@ -9,3 +9,7 @@
 
 
 			1.12.4
+foo1.0
+foo0.1
+bar2.0.0
+bar2
\ No newline at end of file