From 233a7789634aa0c5f4fb2fc69e01411ed23e42c7 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sun, 27 Jun 2021 12:52:46 +0200 Subject: [PATCH] sort/ls: implement version cmp matching GNU spec This reimplements version_cmp, which is used in sort and ls to sort according to versions. However, it is not bug-for-bug identical with GNU's implementation. I reported a bug with GNU here: https://lists.gnu.org/archive/html/bug-coreutils/2021-06/msg00045.html This implementation does not contain the bugs regarding the handling of file extensions and null bytes. --- Cargo.lock | 16 - src/uu/ls/src/ls.rs | 7 +- src/uu/ls/src/version_cmp.rs | 306 --------------- src/uu/sort/Cargo.toml | 1 - src/uu/sort/src/sort.rs | 31 +- src/uucore/src/lib/lib.rs | 1 + src/uucore/src/lib/mods.rs | 1 + src/uucore/src/lib/mods/version_cmp.rs | 361 ++++++++++++++++++ tests/by-util/test_sort.rs | 6 +- .../sort/version-empty-lines.expected | 4 + .../sort/version-empty-lines.expected.debug | 45 +++ tests/fixtures/sort/version-empty-lines.txt | 4 + 12 files changed, 423 insertions(+), 360 deletions(-) delete mode 100644 src/uu/ls/src/version_cmp.rs create mode 100644 src/uucore/src/lib/mods/version_cmp.rs create mode 100644 tests/fixtures/sort/version-empty-lines.expected.debug diff --git a/Cargo.lock b/Cargo.lock index 51424332d..e2dd7d5f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1434,21 +1434,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "semver" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -dependencies = [ - "semver-parser", -] - -[[package]] -name = "semver-parser" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" - [[package]] name = "sha1" version = "0.6.0" @@ -2488,7 +2473,6 @@ dependencies = [ "ouroboros", "rand 0.7.3", "rayon", - "semver", "tempfile", "unicode-width", "uucore", diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 6ca3f4bbe..b866253a9 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -14,7 +14,6 @@ extern crate uucore; extern crate lazy_static; mod quoting_style; -mod version_cmp; use clap::{crate_version, App, Arg}; use globset::{self, Glob, GlobSet, GlobSetBuilder}; @@ -44,6 +43,7 @@ use term_grid::{Cell, Direction, Filling, Grid, GridOptions}; use unicode_width::UnicodeWidthStr; #[cfg(unix)] use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR}; +use uucore::{fs::display_permissions, version_cmp::version_cmp}; static ABOUT: &str = " By default, ls will list the files and contents of any directories on @@ -1256,7 +1256,8 @@ fn sort_entries(entries: &mut Vec, config: &Config) { } // The default sort in GNU ls is case insensitive Sort::Name => entries.sort_by(|a, b| a.display_name.cmp(&b.display_name)), - Sort::Version => entries.sort_by(|a, b| version_cmp::version_cmp(&a.p_buf, &b.p_buf)), + Sort::Version => entries + .sort_by(|a, b| version_cmp(&a.p_buf.to_string_lossy(), &b.p_buf.to_string_lossy())), Sort::Extension => entries.sort_by(|a, b| { a.p_buf .extension() @@ -1467,8 +1468,6 @@ fn display_grid( } } -use uucore::fs::display_permissions; - fn display_item_long( item: &PathData, max_links: usize, diff --git a/src/uu/ls/src/version_cmp.rs b/src/uu/ls/src/version_cmp.rs deleted file mode 100644 index e3f7e29e3..000000000 --- a/src/uu/ls/src/version_cmp.rs +++ /dev/null @@ -1,306 +0,0 @@ -use std::cmp::Ordering; -use std::path::Path; - -/// Compare paths in a way that matches the GNU version sort, meaning that -/// numbers get sorted in a natural way. -pub(crate) fn version_cmp(a: &Path, b: &Path) -> Ordering { - let a_string = a.to_string_lossy(); - let b_string = b.to_string_lossy(); - let mut a = a_string.chars().peekable(); - let mut b = b_string.chars().peekable(); - - // The order determined from the number of leading zeroes. - // This is used if the filenames are equivalent up to leading zeroes. - let mut leading_zeroes = Ordering::Equal; - - loop { - match (a.next(), b.next()) { - // If the characters are both numerical. We collect the rest of the number - // and parse them to u64's and compare them. - (Some(a_char @ '0'..='9'), Some(b_char @ '0'..='9')) => { - let mut a_leading_zeroes = 0; - if a_char == '0' { - a_leading_zeroes = 1; - while let Some('0') = a.peek() { - a_leading_zeroes += 1; - a.next(); - } - } - - let mut b_leading_zeroes = 0; - if b_char == '0' { - b_leading_zeroes = 1; - while let Some('0') = b.peek() { - b_leading_zeroes += 1; - b.next(); - } - } - // The first different number of leading zeros determines the order - // so if it's already been determined by a previous number, we leave - // it as that ordering. - // It's b.cmp(&a), because the *largest* number of leading zeros - // should go first - if leading_zeroes == Ordering::Equal { - leading_zeroes = b_leading_zeroes.cmp(&a_leading_zeroes); - } - - let mut a_str = String::new(); - let mut b_str = String::new(); - if a_char != '0' { - a_str.push(a_char); - } - if b_char != '0' { - b_str.push(b_char); - } - - // Unwrapping here is fine because we only call next if peek returns - // Some(_), so next should also return Some(_). - while let Some('0'..='9') = a.peek() { - a_str.push(a.next().unwrap()); - } - - while let Some('0'..='9') = b.peek() { - b_str.push(b.next().unwrap()); - } - - // Since the leading zeroes are stripped, the length can be - // used to compare the numbers. - match a_str.len().cmp(&b_str.len()) { - Ordering::Equal => {} - x => return x, - } - - // At this point, leading zeroes are stripped and the lengths - // are equal, meaning that the strings can be compared using - // the standard compare function. - match a_str.cmp(&b_str) { - Ordering::Equal => {} - x => return x, - } - } - // If there are two characters we just compare the characters - (Some(a_char), Some(b_char)) => match a_char.cmp(&b_char) { - Ordering::Equal => {} - x => return x, - }, - // Otherwise, we compare the options (because None < Some(_)) - (a_opt, b_opt) => match a_opt.cmp(&b_opt) { - // If they are completely equal except for leading zeroes, we use the leading zeroes. - Ordering::Equal => return leading_zeroes, - x => return x, - }, - } - } -} - -#[cfg(test)] -mod tests { - use crate::version_cmp::version_cmp; - use std::cmp::Ordering; - use std::path::PathBuf; - #[test] - fn test_version_cmp() { - // Identical strings - assert_eq!( - version_cmp(&PathBuf::from("hello"), &PathBuf::from("hello")), - Ordering::Equal - ); - - assert_eq!( - version_cmp(&PathBuf::from("file12"), &PathBuf::from("file12")), - Ordering::Equal - ); - - assert_eq!( - version_cmp( - &PathBuf::from("file12-suffix"), - &PathBuf::from("file12-suffix") - ), - Ordering::Equal - ); - - assert_eq!( - version_cmp( - &PathBuf::from("file12-suffix24"), - &PathBuf::from("file12-suffix24") - ), - Ordering::Equal - ); - - // Shortened names - assert_eq!( - version_cmp(&PathBuf::from("world"), &PathBuf::from("wo")), - Ordering::Greater, - ); - - assert_eq!( - version_cmp(&PathBuf::from("hello10wo"), &PathBuf::from("hello10world")), - Ordering::Less, - ); - - // Simple names - assert_eq!( - version_cmp(&PathBuf::from("world"), &PathBuf::from("hello")), - Ordering::Greater, - ); - - assert_eq!( - version_cmp(&PathBuf::from("hello"), &PathBuf::from("world")), - Ordering::Less - ); - - assert_eq!( - version_cmp(&PathBuf::from("apple"), &PathBuf::from("ant")), - Ordering::Greater - ); - - assert_eq!( - version_cmp(&PathBuf::from("ant"), &PathBuf::from("apple")), - Ordering::Less - ); - - // Uppercase letters - assert_eq!( - version_cmp(&PathBuf::from("Beef"), &PathBuf::from("apple")), - Ordering::Less, - "Uppercase letters are sorted before all lowercase letters" - ); - - assert_eq!( - version_cmp(&PathBuf::from("Apple"), &PathBuf::from("apple")), - Ordering::Less - ); - - assert_eq!( - version_cmp(&PathBuf::from("apple"), &PathBuf::from("aPple")), - Ordering::Greater - ); - - // Numbers - assert_eq!( - version_cmp(&PathBuf::from("100"), &PathBuf::from("20")), - Ordering::Greater, - "Greater numbers are greater even if they start with a smaller digit", - ); - - assert_eq!( - version_cmp(&PathBuf::from("20"), &PathBuf::from("20")), - Ordering::Equal, - "Equal numbers are equal" - ); - - assert_eq!( - version_cmp(&PathBuf::from("15"), &PathBuf::from("200")), - Ordering::Less, - "Small numbers are smaller" - ); - - // Comparing numbers with other characters - assert_eq!( - version_cmp(&PathBuf::from("1000"), &PathBuf::from("apple")), - Ordering::Less, - "Numbers are sorted before other characters" - ); - - assert_eq!( - // spell-checker:disable-next-line - version_cmp(&PathBuf::from("file1000"), &PathBuf::from("fileapple")), - Ordering::Less, - "Numbers in the middle of the name are sorted before other characters" - ); - - // Leading zeroes - assert_eq!( - version_cmp(&PathBuf::from("012"), &PathBuf::from("12")), - Ordering::Less, - "A single leading zero can make a difference" - ); - - assert_eq!( - version_cmp(&PathBuf::from("000800"), &PathBuf::from("0000800")), - Ordering::Greater, - "Leading number of zeroes is used even if both non-zero number of zeros" - ); - - // Numbers and other characters combined - assert_eq!( - version_cmp(&PathBuf::from("ab10"), &PathBuf::from("aa11")), - Ordering::Greater - ); - - assert_eq!( - version_cmp(&PathBuf::from("aa10"), &PathBuf::from("aa11")), - Ordering::Less, - "Numbers after other characters are handled correctly." - ); - - assert_eq!( - version_cmp(&PathBuf::from("aa2"), &PathBuf::from("aa100")), - Ordering::Less, - "Numbers after alphabetical characters are handled correctly." - ); - - assert_eq!( - version_cmp(&PathBuf::from("aa10bb"), &PathBuf::from("aa11aa")), - Ordering::Less, - "Number is used even if alphabetical characters after it differ." - ); - - assert_eq!( - version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa11aa1")), - Ordering::Less, - "Second number is ignored if the first number differs." - ); - - assert_eq!( - version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa10aa1")), - Ordering::Greater, - "Second number is used if the rest is equal." - ); - - assert_eq!( - version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa00010aa1")), - Ordering::Greater, - "Second number is used if the rest is equal up to leading zeroes of the first number." - ); - - assert_eq!( - version_cmp(&PathBuf::from("aa10aa0022"), &PathBuf::from("aa010aa022")), - Ordering::Greater, - "The leading zeroes of the first number has priority." - ); - - assert_eq!( - version_cmp(&PathBuf::from("aa10aa0022"), &PathBuf::from("aa10aa022")), - Ordering::Less, - "The leading zeroes of other numbers than the first are used." - ); - - assert_eq!( - version_cmp(&PathBuf::from("file-1.4"), &PathBuf::from("file-1.13")), - Ordering::Less, - "Periods are handled as normal text, not as a decimal point." - ); - - // Greater than u64::Max - // u64 == 18446744073709551615 so this should be plenty: - // 20000000000000000000000 - assert_eq!( - version_cmp( - &PathBuf::from("aa2000000000000000000000bb"), - &PathBuf::from("aa002000000000000000000001bb") - ), - Ordering::Less, - "Numbers larger than u64::MAX are handled correctly without crashing" - ); - - assert_eq!( - version_cmp( - &PathBuf::from("aa2000000000000000000000bb"), - &PathBuf::from("aa002000000000000000000000bb") - ), - Ordering::Greater, - "Leading zeroes for numbers larger than u64::MAX are handled correctly without crashing" - ); - } -} diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index f06610248..d454179b0 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -24,7 +24,6 @@ memchr = "2.4.0" ouroboros = "0.9.3" rand = "0.7" rayon = "1.5" -semver = "0.9.0" tempfile = "3" unicode-width = "0.1.8" uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index fb0241945..821def70d 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -32,7 +32,6 @@ use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoPa use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use rayon::prelude::*; -use semver::Version; use std::cmp::Ordering; use std::env; use std::ffi::OsStr; @@ -44,6 +43,7 @@ use std::path::Path; use std::path::PathBuf; use unicode_width::UnicodeWidthStr; use uucore::parse_size::{parse_size, ParseSizeError}; +use uucore::version_cmp::version_cmp; use uucore::InvalidEncodingHandling; const NAME: &str = "sort"; @@ -1410,7 +1410,7 @@ fn compare_by<'a>( general_numeric_compare(a_float, b_float) } SortMode::Month => month_compare(a_str, b_str), - SortMode::Version => version_compare(a_str, b_str), + SortMode::Version => version_cmp(a_str, b_str), SortMode::Default => custom_str_cmp( a_str, b_str, @@ -1615,31 +1615,6 @@ fn month_compare(a: &str, b: &str) -> Ordering { } } -fn version_parse(a: &str) -> Version { - let result = Version::parse(a); - - match result { - Ok(vers_a) => vers_a, - // Non-version lines parse to 0.0.0 - Err(_e) => Version::parse("0.0.0").unwrap(), - } -} - -fn version_compare(a: &str, b: &str) -> Ordering { - #![allow(clippy::comparison_chain)] - let ver_a = version_parse(a); - let ver_b = version_parse(b); - - // Version::cmp is not implemented; implement comparison directly - if ver_a > ver_b { - Ordering::Greater - } else if ver_a < ver_b { - Ordering::Less - } else { - Ordering::Equal - } -} - fn print_sorted<'a, T: Iterator>>(iter: T, settings: &GlobalSettings) { let mut writer = settings.out_writer(); for line in iter { @@ -1712,7 +1687,7 @@ mod tests { let a = "1.2.3-alpha2"; let b = "1.4.0"; - assert_eq!(Ordering::Less, version_compare(a, b)); + assert_eq!(Ordering::Less, version_cmp(a, b)); } #[test] diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index bf2e5b1bb..ac505e42f 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -30,6 +30,7 @@ pub use crate::mods::coreopts; pub use crate::mods::os; pub use crate::mods::panic; pub use crate::mods::ranges; +pub use crate::mods::version_cmp; // * string parsing modules pub use crate::parser::parse_size; diff --git a/src/uucore/src/lib/mods.rs b/src/uucore/src/lib/mods.rs index 2689361a0..f81d05ed9 100644 --- a/src/uucore/src/lib/mods.rs +++ b/src/uucore/src/lib/mods.rs @@ -5,3 +5,4 @@ pub mod coreopts; pub mod os; pub mod panic; pub mod ranges; +pub mod version_cmp; diff --git a/src/uucore/src/lib/mods/version_cmp.rs b/src/uucore/src/lib/mods/version_cmp.rs new file mode 100644 index 000000000..99b8c8b40 --- /dev/null +++ b/src/uucore/src/lib/mods/version_cmp.rs @@ -0,0 +1,361 @@ +use std::cmp::Ordering; + +/// Compares the non-digit parts of a version. +/// Special cases: ~ are before everything else, even ends ("a~" < "a") +/// Letters are before non-letters +fn version_non_digit_cmp(a: &str, b: &str) -> Ordering { + let mut a_chars = a.chars(); + let mut b_chars = b.chars(); + loop { + match (a_chars.next(), b_chars.next()) { + (Some(c1), Some(c2)) if c1 == c2 => {} + (None, None) => return Ordering::Equal, + (_, Some('~')) => return Ordering::Greater, + (Some('~'), _) => return Ordering::Less, + (None, Some(_)) => return Ordering::Less, + (Some(_), None) => return Ordering::Greater, + (Some(c1), Some(c2)) if c1.is_ascii_alphabetic() && !c2.is_ascii_alphabetic() => { + return Ordering::Less + } + (Some(c1), Some(c2)) if !c1.is_ascii_alphabetic() && c2.is_ascii_alphabetic() => { + return Ordering::Greater + } + (Some(c1), Some(c2)) => return c1.cmp(&c2), + } + } +} + +/// Remove file endings matching the regex (\.[A-Za-z~][A-Za-z0-9~]*)*$ +fn remove_file_ending(a: &str) -> &str { + let mut ending_start = None; + let mut prev_was_dot = false; + for (idx, char) in a.char_indices() { + if char == '.' { + if ending_start.is_none() || prev_was_dot { + ending_start = Some(idx); + } + prev_was_dot = true; + } else if prev_was_dot { + prev_was_dot = false; + if !char.is_ascii_alphabetic() && char != '~' { + ending_start = None; + } + } else if !char.is_ascii_alphanumeric() && char != '~' { + ending_start = None; + } + } + if prev_was_dot { + ending_start = None; + } + if let Some(ending_start) = ending_start { + &a[..ending_start] + } else { + a + } +} + +pub fn version_cmp(mut a: &str, mut b: &str) -> Ordering { + let str_cmp = a.cmp(b); + if str_cmp == Ordering::Equal { + return str_cmp; + } + + // Special cases: + // 1. Empty strings + match (a.is_empty(), b.is_empty()) { + (true, false) => return Ordering::Less, + (false, true) => return Ordering::Greater, + (true, true) => unreachable!(), + (false, false) => {} + } + // 2. Dots + match (a == ".", b == ".") { + (true, false) => return Ordering::Less, + (false, true) => return Ordering::Greater, + (true, true) => unreachable!(), + (false, false) => {} + } + // 3. Two Dots + match (a == "..", b == "..") { + (true, false) => return Ordering::Less, + (false, true) => return Ordering::Greater, + (true, true) => unreachable!(), + (false, false) => {} + } + // 4. Strings starting with a dot + match (a.starts_with('.'), b.starts_with('.')) { + (true, false) => return Ordering::Less, + (false, true) => return Ordering::Greater, + (true, true) => { + // Strip the leading dot for later comparisons + a = &a[1..]; + b = &b[1..]; + } + _ => {} + } + + // Try to strip file extensions + let (mut a, mut b) = match (remove_file_ending(a), remove_file_ending(b)) { + (a_stripped, b_stripped) if a_stripped == b_stripped => { + // If both would be the same after stripping file extensions, don't strip them. + (a, b) + } + stripped => stripped, + }; + + // 1. Compare leading non-numerical part + // 2. Compare leading numerical part + // 3. Repeat + loop { + let a_numerical_start = a.find(|c: char| c.is_ascii_digit()).unwrap_or(a.len()); + let b_numerical_start = b.find(|c: char| c.is_ascii_digit()).unwrap_or(b.len()); + + let a_str = &a[..a_numerical_start]; + let b_str = &b[..b_numerical_start]; + + match version_non_digit_cmp(a_str, b_str) { + Ordering::Equal => {} + ord => return ord, + } + + a = &a[a_numerical_start..]; + b = &b[a_numerical_start..]; + + let a_numerical_end = a.find(|c: char| !c.is_ascii_digit()).unwrap_or(a.len()); + let b_numerical_end = b.find(|c: char| !c.is_ascii_digit()).unwrap_or(b.len()); + + let a_str = a[..a_numerical_end].trim_start_matches('0'); + let b_str = b[..b_numerical_end].trim_start_matches('0'); + + match a_str.len().cmp(&b_str.len()) { + Ordering::Equal => {} + ord => return ord, + } + + match a_str.cmp(b_str) { + Ordering::Equal => {} + ord => return ord, + } + + a = &a[a_numerical_end..]; + b = &b[b_numerical_end..]; + + if a.is_empty() && b.is_empty() { + // Default to the lexical comparison. + return str_cmp; + } + } +} + +#[cfg(test)] +mod tests { + use crate::version_cmp::version_cmp; + use std::cmp::Ordering; + #[test] + fn test_version_cmp() { + // Identical strings + assert_eq!(version_cmp("hello", "hello"), Ordering::Equal); + + assert_eq!(version_cmp("file12", "file12"), Ordering::Equal); + + assert_eq!( + version_cmp("file12-suffix", "file12-suffix"), + Ordering::Equal + ); + + assert_eq!( + version_cmp("file12-suffix24", "file12-suffix24"), + Ordering::Equal + ); + + // Shortened names + assert_eq!(version_cmp("world", "wo"), Ordering::Greater,); + + assert_eq!(version_cmp("hello10wo", "hello10world"), Ordering::Less,); + + // Simple names + assert_eq!(version_cmp("world", "hello"), Ordering::Greater,); + + assert_eq!(version_cmp("hello", "world"), Ordering::Less); + + assert_eq!(version_cmp("apple", "ant"), Ordering::Greater); + + assert_eq!(version_cmp("ant", "apple"), Ordering::Less); + + // Uppercase letters + assert_eq!( + version_cmp("Beef", "apple"), + Ordering::Less, + "Uppercase letters are sorted before all lowercase letters" + ); + + assert_eq!(version_cmp("Apple", "apple"), Ordering::Less); + + assert_eq!(version_cmp("apple", "aPple"), Ordering::Greater); + + // Numbers + assert_eq!( + version_cmp("100", "20"), + Ordering::Greater, + "Greater numbers are greater even if they start with a smaller digit", + ); + + assert_eq!( + version_cmp("20", "20"), + Ordering::Equal, + "Equal numbers are equal" + ); + + assert_eq!( + version_cmp("15", "200"), + Ordering::Less, + "Small numbers are smaller" + ); + + // Comparing numbers with other characters + assert_eq!( + version_cmp("1000", "apple"), + Ordering::Less, + "Numbers are sorted before other characters" + ); + + assert_eq!( + // spell-checker:disable-next-line + version_cmp("file1000", "fileapple"), + Ordering::Less, + "Numbers in the middle of the name are sorted before other characters" + ); + + // Leading zeroes + assert_eq!( + version_cmp("012", "12"), + Ordering::Less, + "A single leading zero can make a difference" + ); + + assert_eq!( + version_cmp("000800", "0000800"), + Ordering::Greater, + "Leading number of zeroes is used even if both non-zero number of zeros" + ); + + // Numbers and other characters combined + assert_eq!(version_cmp("ab10", "aa11"), Ordering::Greater); + + assert_eq!( + version_cmp("aa10", "aa11"), + Ordering::Less, + "Numbers after other characters are handled correctly." + ); + + assert_eq!( + version_cmp("aa2", "aa100"), + Ordering::Less, + "Numbers after alphabetical characters are handled correctly." + ); + + assert_eq!( + version_cmp("aa10bb", "aa11aa"), + Ordering::Less, + "Number is used even if alphabetical characters after it differ." + ); + + assert_eq!( + version_cmp("aa10aa0010", "aa11aa1"), + Ordering::Less, + "Second number is ignored if the first number differs." + ); + + assert_eq!( + version_cmp("aa10aa0010", "aa10aa1"), + Ordering::Greater, + "Second number is used if the rest is equal." + ); + + assert_eq!( + version_cmp("aa10aa0010", "aa00010aa1"), + Ordering::Greater, + "Second number is used if the rest is equal up to leading zeroes of the first number." + ); + + assert_eq!( + version_cmp("aa10aa0022", "aa010aa022"), + Ordering::Greater, + "The leading zeroes of the first number has priority." + ); + + assert_eq!( + version_cmp("aa10aa0022", "aa10aa022"), + Ordering::Less, + "The leading zeroes of other numbers than the first are used." + ); + + assert_eq!( + version_cmp("file-1.4", "file-1.13"), + Ordering::Less, + "Periods are handled as normal text, not as a decimal point." + ); + + // Greater than u64::Max + // u64 == 18446744073709551615 so this should be plenty: + // 20000000000000000000000 + assert_eq!( + version_cmp("aa2000000000000000000000bb", "aa002000000000000000000001bb"), + Ordering::Less, + "Numbers larger than u64::MAX are handled correctly without crashing" + ); + + assert_eq!( + version_cmp("aa2000000000000000000000bb", "aa002000000000000000000000bb"), + Ordering::Greater, + "Leading zeroes for numbers larger than u64::MAX are \ + handled correctly without crashing" + ); + + assert_eq!( + version_cmp(" a", "a"), + Ordering::Greater, + "Whitespace is after letters because letters are before non-letters" + ); + + assert_eq!( + version_cmp("a~", "ab"), + Ordering::Less, + "A tilde is before other letters" + ); + + assert_eq!( + version_cmp("a~", "a"), + Ordering::Less, + "A tilde is before the line end" + ); + assert_eq!( + version_cmp("~", ""), + Ordering::Greater, + "A tilde is after the empty string" + ); + assert_eq!( + version_cmp(".f", ".1"), + Ordering::Greater, + "if both start with a dot it is ignored for the comparison" + ); + + // The following tests are incompatible with GNU as of 2021/06. + // I think that's because of a bug in GNU, reported as https://lists.gnu.org/archive/html/bug-coreutils/2021-06/msg00045.html + assert_eq!( + version_cmp("a..a", "a.+"), + Ordering::Less, + ".a is stripped before the comparison" + ); + assert_eq!( + version_cmp("a.", "a+"), + Ordering::Greater, + ". is not stripped before the comparison" + ); + assert_eq!( + version_cmp("a\0a", "a"), + Ordering::Greater, + "NULL bytes are handled comparison" + ); + } +} diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index 3e841f630..2c99c1536 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -127,11 +127,7 @@ fn test_months_whitespace() { #[test] fn test_version_empty_lines() { - new_ucmd!() - .arg("-V") - .arg("version-empty-lines.txt") - .succeeds() - .stdout_is("\n\n\n\n\n\n\n1.2.3-alpha\n1.2.3-alpha2\n\t\t\t1.12.4\n11.2.3\n"); + test_helper("version-empty-lines", &["-V", "--version-sort"]); } #[test] diff --git a/tests/fixtures/sort/version-empty-lines.expected b/tests/fixtures/sort/version-empty-lines.expected index c496c0ff5..69a648966 100644 --- a/tests/fixtures/sort/version-empty-lines.expected +++ b/tests/fixtures/sort/version-empty-lines.expected @@ -8,4 +8,8 @@ 1.2.3-alpha 1.2.3-alpha2 11.2.3 +bar2 +bar2.0.0 +foo0.1 +foo1.0 1.12.4 diff --git a/tests/fixtures/sort/version-empty-lines.expected.debug b/tests/fixtures/sort/version-empty-lines.expected.debug new file mode 100644 index 000000000..d3f2aaceb --- /dev/null +++ b/tests/fixtures/sort/version-empty-lines.expected.debug @@ -0,0 +1,45 @@ + +^ no match for key +^ no match for key + +^ no match for key +^ no match for key + +^ no match for key +^ no match for key + +^ no match for key +^ no match for key + +^ no match for key +^ no match for key + +^ no match for key +^ no match for key + +^ no match for key +^ no match for key +1.2.3-alpha +___________ +___________ +1.2.3-alpha2 +____________ +____________ +11.2.3 +______ +______ +bar2 +____ +____ +bar2.0.0 +________ +________ +foo0.1 +______ +______ +foo1.0 +______ +______ +>>>1.12.4 +_________ +_________ diff --git a/tests/fixtures/sort/version-empty-lines.txt b/tests/fixtures/sort/version-empty-lines.txt index 9b6b89788..fef474259 100644 --- a/tests/fixtures/sort/version-empty-lines.txt +++ b/tests/fixtures/sort/version-empty-lines.txt @@ -9,3 +9,7 @@ 1.12.4 +foo1.0 +foo0.1 +bar2.0.0 +bar2 \ No newline at end of file