diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 455b4f7b6..8714a0fa1 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -13,6 +13,8 @@ extern crate lazy_static; #[macro_use] extern crate uucore; +mod version_cmp; + use clap::{App, Arg}; use number_prefix::NumberPrefix; #[cfg(unix)] @@ -91,6 +93,7 @@ pub mod options { pub static SIZE: &str = "S"; pub static TIME: &str = "t"; pub static NONE: &str = "U"; + pub static VERSION: &str = "v"; } pub mod time { pub static ACCESS: &str = "u"; @@ -132,6 +135,7 @@ enum Sort { Name, Size, Time, + Version, } enum SizeFormat { @@ -270,6 +274,7 @@ impl Config { "name" => Sort::Name, "time" => Sort::Time, "size" => Sort::Size, + "version" => Sort::Version, // below should never happen as clap already restricts the values. _ => unreachable!("Invalid field for --sort"), } @@ -279,6 +284,8 @@ impl Config { Sort::Size } else if options.is_present(options::sort::NONE) { Sort::None + } else if options.is_present(options::sort::VERSION) { + Sort::Version } else { Sort::Name }; @@ -507,13 +514,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 { .help("Sort by : name, none (-U), time (-t) or size (-S)") .value_name("field") .takes_value(true) - .possible_values(&["name", "none", "time", "size"]) + .possible_values(&["name", "none", "time", "size", "version"]) .require_equals(true) .overrides_with_all(&[ options::SORT, options::sort::SIZE, options::sort::TIME, options::sort::NONE, + options::sort::VERSION, ]) ) .arg( @@ -525,6 +533,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { options::sort::SIZE, options::sort::TIME, options::sort::NONE, + options::sort::VERSION, ]) ) .arg( @@ -536,6 +545,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 { options::sort::SIZE, options::sort::TIME, options::sort::NONE, + options::sort::VERSION, + ]) + ) + .arg( + Arg::with_name(options::sort::VERSION) + .short(options::sort::VERSION) + .help("Natural sort of (version) numbers in the filenames.") + .overrides_with_all(&[ + options::SORT, + options::sort::SIZE, + options::sort::TIME, + options::sort::NONE, + options::sort::VERSION, ]) ) .arg( @@ -549,6 +571,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 { options::sort::SIZE, options::sort::TIME, options::sort::NONE, + options::sort::VERSION, ]) ) @@ -747,6 +770,7 @@ fn sort_entries(entries: &mut Vec, config: &Config) { .sort_by_key(|k| Reverse(get_metadata(k, config).map(|md| md.len()).unwrap_or(0))), // The default sort in GNU ls is case insensitive Sort::Name => entries.sort_by_key(|k| k.to_string_lossy().to_lowercase()), + Sort::Version => entries.sort_by(version_cmp::version_cmp), Sort::None => {} } diff --git a/src/uu/ls/src/version_cmp.rs b/src/uu/ls/src/version_cmp.rs new file mode 100644 index 000000000..3cd5989f1 --- /dev/null +++ b/src/uu/ls/src/version_cmp.rs @@ -0,0 +1,304 @@ +use std::{cmp::Ordering, path::PathBuf}; + +/// Compare pathbufs in a way that matches the GNU version sort, meaning that +/// numbers get sorted in a natural way. +pub(crate) fn version_cmp(a: &PathBuf, b: &PathBuf) -> Ordering { + let a_string = a.to_string_lossy(); + let b_string = b.to_string_lossy(); + let mut a = a_string.chars().peekable(); + let mut b = b_string.chars().peekable(); + + // The order determined from the number of leading zeroes. + // This is used if the filenames are equivalent up to leading zeroes. + let mut leading_zeroes = Ordering::Equal; + + loop { + match (a.next(), b.next()) { + // If the characters are both numerical. We collect the rest of the number + // and parse them to u64's and compare them. + (Some(a_char @ '0'..='9'), Some(b_char @ '0'..='9')) => { + let mut a_leading_zeroes = 0; + if a_char == '0' { + a_leading_zeroes = 1; + while let Some('0') = a.peek() { + a_leading_zeroes += 1; + a.next(); + } + } + + let mut b_leading_zeroes = 0; + if b_char == '0' { + b_leading_zeroes = 1; + while let Some('0') = b.peek() { + b_leading_zeroes += 1; + b.next(); + } + } + // The first different number of leading zeros determines the order + // so if it's already been determined by a previous number, we leave + // it as that ordering. + // It's b.cmp(&a), because the *largest* number of leading zeros + // should go first + if leading_zeroes == Ordering::Equal { + leading_zeroes = b_leading_zeroes.cmp(&a_leading_zeroes); + } + + let mut a_str = String::new(); + let mut b_str = String::new(); + if a_char != '0' { + a_str.push(a_char); + } + if b_char != '0' { + b_str.push(b_char); + } + + // Unwrapping here is fine because we only call next if peek returns + // Some(_), so next should also return Some(_). + while let Some('0'..='9') = a.peek() { + a_str.push(a.next().unwrap()); + } + + while let Some('0'..='9') = b.peek() { + b_str.push(b.next().unwrap()); + } + + // Since the leading zeroes are stripped, the length can be + // used to compare the numbers. + match a_str.len().cmp(&b_str.len()) { + Ordering::Equal => {} + x => return x, + } + + // At this point, leading zeroes are stripped and the lengths + // are equal, meaning that the strings can be compared using + // the standard compare function. + match a_str.cmp(&b_str) { + Ordering::Equal => {} + x => return x, + } + } + // If there are two characters we just compare the characters + (Some(a_char), Some(b_char)) => match a_char.cmp(&b_char) { + Ordering::Equal => {} + x => return x, + }, + // Otherise, we compare the options (because None < Some(_)) + (a_opt, b_opt) => match a_opt.cmp(&b_opt) { + // If they are completely equal except for leading zeroes, we use the leading zeroes. + Ordering::Equal => return leading_zeroes, + x => return x, + }, + } + } +} + +#[cfg(test)] +mod tests { + use crate::version_cmp::version_cmp; + use std::cmp::Ordering; + use std::path::PathBuf; + #[test] + fn test_version_cmp() { + // Identical strings + assert_eq!( + version_cmp(&PathBuf::from("hello"), &PathBuf::from("hello")), + Ordering::Equal + ); + + assert_eq!( + version_cmp(&PathBuf::from("file12"), &PathBuf::from("file12")), + Ordering::Equal + ); + + assert_eq!( + version_cmp( + &PathBuf::from("file12-suffix"), + &PathBuf::from("file12-suffix") + ), + Ordering::Equal + ); + + assert_eq!( + version_cmp( + &PathBuf::from("file12-suffix24"), + &PathBuf::from("file12-suffix24") + ), + Ordering::Equal + ); + + // Shortened names + assert_eq!( + version_cmp(&PathBuf::from("world"), &PathBuf::from("wo")), + Ordering::Greater, + ); + + assert_eq!( + version_cmp(&PathBuf::from("hello10wo"), &PathBuf::from("hello10world")), + Ordering::Less, + ); + + // Simple names + assert_eq!( + version_cmp(&PathBuf::from("world"), &PathBuf::from("hello")), + Ordering::Greater, + ); + + assert_eq!( + version_cmp(&PathBuf::from("hello"), &PathBuf::from("world")), + Ordering::Less + ); + + assert_eq!( + version_cmp(&PathBuf::from("apple"), &PathBuf::from("ant")), + Ordering::Greater + ); + + assert_eq!( + version_cmp(&PathBuf::from("ant"), &PathBuf::from("apple")), + Ordering::Less + ); + + // Uppercase letters + assert_eq!( + version_cmp(&PathBuf::from("Beef"), &PathBuf::from("apple")), + Ordering::Less, + "Uppercase letters are sorted before all lowercase letters" + ); + + assert_eq!( + version_cmp(&PathBuf::from("Apple"), &PathBuf::from("apple")), + Ordering::Less + ); + + assert_eq!( + version_cmp(&PathBuf::from("apple"), &PathBuf::from("aPple")), + Ordering::Greater + ); + + // Numbers + assert_eq!( + version_cmp(&PathBuf::from("100"), &PathBuf::from("20")), + Ordering::Greater, + "Greater numbers are greater even if they start with a smaller digit", + ); + + assert_eq!( + version_cmp(&PathBuf::from("20"), &PathBuf::from("20")), + Ordering::Equal, + "Equal numbers are equal" + ); + + assert_eq!( + version_cmp(&PathBuf::from("15"), &PathBuf::from("200")), + Ordering::Less, + "Small numbers are smaller" + ); + + // Comparing numbers with other characters + assert_eq!( + version_cmp(&PathBuf::from("1000"), &PathBuf::from("apple")), + Ordering::Less, + "Numbers are sorted before other characters" + ); + + assert_eq!( + version_cmp(&PathBuf::from("file1000"), &PathBuf::from("fileapple")), + Ordering::Less, + "Numbers in the middle of the name are sorted before other characters" + ); + + // Leading zeroes + assert_eq!( + version_cmp(&PathBuf::from("012"), &PathBuf::from("12")), + Ordering::Less, + "A single leading zero can make a difference" + ); + + assert_eq!( + version_cmp(&PathBuf::from("000800"), &PathBuf::from("0000800")), + Ordering::Greater, + "Leading number of zeroes is used even if both non-zero number of zeros" + ); + + // Numbers and other characters combined + assert_eq!( + version_cmp(&PathBuf::from("ab10"), &PathBuf::from("aa11")), + Ordering::Greater + ); + + assert_eq!( + version_cmp(&PathBuf::from("aa10"), &PathBuf::from("aa11")), + Ordering::Less, + "Numbers after other characters are handled correctly." + ); + + assert_eq!( + version_cmp(&PathBuf::from("aa2"), &PathBuf::from("aa100")), + Ordering::Less, + "Numbers after alphabetical characters are handled correctly." + ); + + assert_eq!( + version_cmp(&PathBuf::from("aa10bb"), &PathBuf::from("aa11aa")), + Ordering::Less, + "Number is used even if alphabetical characters after it differ." + ); + + assert_eq!( + version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa11aa1")), + Ordering::Less, + "Second number is ignored if the first number differs." + ); + + assert_eq!( + version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa10aa1")), + Ordering::Greater, + "Second number is used if the rest is equal." + ); + + assert_eq!( + version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa00010aa1")), + Ordering::Greater, + "Second number is used if the rest is equal up to leading zeroes of the first number." + ); + + assert_eq!( + version_cmp(&PathBuf::from("aa10aa0022"), &PathBuf::from("aa010aa022")), + Ordering::Greater, + "The leading zeroes of the first number has priority." + ); + + assert_eq!( + version_cmp(&PathBuf::from("aa10aa0022"), &PathBuf::from("aa10aa022")), + Ordering::Less, + "The leading zeroes of other numbers than the first are used." + ); + + assert_eq!( + version_cmp(&PathBuf::from("file-1.4"), &PathBuf::from("file-1.13")), + Ordering::Less, + "Periods are handled as normal text, not as a decimal point." + ); + + // Greater than u64::Max + // u64 == 18446744073709551615 so this should be plenty: + // 20000000000000000000000 + assert_eq!( + version_cmp( + &PathBuf::from("aa2000000000000000000000bb"), + &PathBuf::from("aa002000000000000000000001bb") + ), + Ordering::Less, + "Numbers larger than u64::MAX are handled correctly without crashing" + ); + + assert_eq!( + version_cmp( + &PathBuf::from("aa2000000000000000000000bb"), + &PathBuf::from("aa002000000000000000000000bb") + ), + Ordering::Greater, + "Leading zeroes for numbers larger than u64::MAX are handled correctly without crashing" + ); + } +} diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index ecd288735..091d47234 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -526,7 +526,6 @@ fn test_ls_order_time() { at.metadata("test-2").permissions(), ) .unwrap(); - let second_access = at.open("test-2").metadata().unwrap().accessed().unwrap(); let result = scene.ucmd().arg("-al").run(); println!("stderr = {:?}", result.stderr); @@ -870,3 +869,81 @@ fn test_ls_hidden_windows() { assert!(result.success); assert!(result.stdout.contains(file)); } + +#[test] +fn test_ls_version_sort() { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + for filename in &[ + "a2", + "b1", + "b20", + "a1.4", + "a1.40", + "b3", + "b11", + "b20b", + "b20a", + "a100", + "a1.13", + "aa", + "a1", + "aaa", + "a1.00000040", + "abab", + "ab", + "a01.40", + "a001.001", + "a01.0000001", + "a01.001", + "a001.01", + ] { + at.touch(filename); + } + + let mut expected = vec![ + "a1", + "a001.001", + "a001.01", + "a01.0000001", + "a01.001", + "a1.4", + "a1.13", + "a01.40", + "a1.00000040", + "a1.40", + "a2", + "a100", + "aa", + "aaa", + "ab", + "abab", + "b1", + "b3", + "b11", + "b20", + "b20a", + "b20b", + "", // because of '\n' at the end of the output + ]; + + let result = scene.ucmd().arg("-1v").run(); + println!("stderr = {:?}", result.stderr); + println!("stdout = {:?}", result.stdout); + + assert_eq!(result.stdout.split('\n').collect::>(), expected); + + let result = scene.ucmd().arg("-1").arg("--sort=version").run(); + println!("stderr = {:?}", result.stderr); + println!("stdout = {:?}", result.stdout); + + assert_eq!(result.stdout.split('\n').collect::>(), expected); + + let result = scene.ucmd().arg("-a1v").run(); + println!("stderr = {:?}", result.stderr); + println!("stdout = {:?}", result.stdout); + + expected.insert(0, ".."); + expected.insert(0, "."); + assert_eq!(result.stdout.split('\n').collect::>(), expected,) +}