1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

ls: version sort (#1898)

This commit is contained in:
Terts Diepraam 2021-03-25 20:24:53 +01:00 committed by GitHub
parent 52997b63fb
commit 23b70001a8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 407 additions and 2 deletions

View file

@ -13,6 +13,8 @@ extern crate lazy_static;
#[macro_use]
extern crate uucore;
mod version_cmp;
use clap::{App, Arg};
use number_prefix::NumberPrefix;
#[cfg(unix)]
@ -91,6 +93,7 @@ pub mod options {
pub static SIZE: &str = "S";
pub static TIME: &str = "t";
pub static NONE: &str = "U";
pub static VERSION: &str = "v";
}
pub mod time {
pub static ACCESS: &str = "u";
@ -132,6 +135,7 @@ enum Sort {
Name,
Size,
Time,
Version,
}
enum SizeFormat {
@ -270,6 +274,7 @@ impl Config {
"name" => Sort::Name,
"time" => Sort::Time,
"size" => Sort::Size,
"version" => Sort::Version,
// below should never happen as clap already restricts the values.
_ => unreachable!("Invalid field for --sort"),
}
@ -279,6 +284,8 @@ impl Config {
Sort::Size
} else if options.is_present(options::sort::NONE) {
Sort::None
} else if options.is_present(options::sort::VERSION) {
Sort::Version
} else {
Sort::Name
};
@ -507,13 +514,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
.help("Sort by <field>: name, none (-U), time (-t) or size (-S)")
.value_name("field")
.takes_value(true)
.possible_values(&["name", "none", "time", "size"])
.possible_values(&["name", "none", "time", "size", "version"])
.require_equals(true)
.overrides_with_all(&[
options::SORT,
options::sort::SIZE,
options::sort::TIME,
options::sort::NONE,
options::sort::VERSION,
])
)
.arg(
@ -525,6 +533,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
options::sort::SIZE,
options::sort::TIME,
options::sort::NONE,
options::sort::VERSION,
])
)
.arg(
@ -536,6 +545,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
options::sort::SIZE,
options::sort::TIME,
options::sort::NONE,
options::sort::VERSION,
])
)
.arg(
Arg::with_name(options::sort::VERSION)
.short(options::sort::VERSION)
.help("Natural sort of (version) numbers in the filenames.")
.overrides_with_all(&[
options::SORT,
options::sort::SIZE,
options::sort::TIME,
options::sort::NONE,
options::sort::VERSION,
])
)
.arg(
@ -549,6 +571,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
options::sort::SIZE,
options::sort::TIME,
options::sort::NONE,
options::sort::VERSION,
])
)
@ -747,6 +770,7 @@ fn sort_entries(entries: &mut Vec<PathBuf>, config: &Config) {
.sort_by_key(|k| Reverse(get_metadata(k, config).map(|md| md.len()).unwrap_or(0))),
// The default sort in GNU ls is case insensitive
Sort::Name => entries.sort_by_key(|k| k.to_string_lossy().to_lowercase()),
Sort::Version => entries.sort_by(version_cmp::version_cmp),
Sort::None => {}
}

View file

@ -0,0 +1,304 @@
use std::{cmp::Ordering, path::PathBuf};
/// Compare pathbufs in a way that matches the GNU version sort, meaning that
/// numbers get sorted in a natural way.
pub(crate) fn version_cmp(a: &PathBuf, b: &PathBuf) -> Ordering {
let a_string = a.to_string_lossy();
let b_string = b.to_string_lossy();
let mut a = a_string.chars().peekable();
let mut b = b_string.chars().peekable();
// The order determined from the number of leading zeroes.
// This is used if the filenames are equivalent up to leading zeroes.
let mut leading_zeroes = Ordering::Equal;
loop {
match (a.next(), b.next()) {
// If the characters are both numerical. We collect the rest of the number
// and parse them to u64's and compare them.
(Some(a_char @ '0'..='9'), Some(b_char @ '0'..='9')) => {
let mut a_leading_zeroes = 0;
if a_char == '0' {
a_leading_zeroes = 1;
while let Some('0') = a.peek() {
a_leading_zeroes += 1;
a.next();
}
}
let mut b_leading_zeroes = 0;
if b_char == '0' {
b_leading_zeroes = 1;
while let Some('0') = b.peek() {
b_leading_zeroes += 1;
b.next();
}
}
// The first different number of leading zeros determines the order
// so if it's already been determined by a previous number, we leave
// it as that ordering.
// It's b.cmp(&a), because the *largest* number of leading zeros
// should go first
if leading_zeroes == Ordering::Equal {
leading_zeroes = b_leading_zeroes.cmp(&a_leading_zeroes);
}
let mut a_str = String::new();
let mut b_str = String::new();
if a_char != '0' {
a_str.push(a_char);
}
if b_char != '0' {
b_str.push(b_char);
}
// Unwrapping here is fine because we only call next if peek returns
// Some(_), so next should also return Some(_).
while let Some('0'..='9') = a.peek() {
a_str.push(a.next().unwrap());
}
while let Some('0'..='9') = b.peek() {
b_str.push(b.next().unwrap());
}
// Since the leading zeroes are stripped, the length can be
// used to compare the numbers.
match a_str.len().cmp(&b_str.len()) {
Ordering::Equal => {}
x => return x,
}
// At this point, leading zeroes are stripped and the lengths
// are equal, meaning that the strings can be compared using
// the standard compare function.
match a_str.cmp(&b_str) {
Ordering::Equal => {}
x => return x,
}
}
// If there are two characters we just compare the characters
(Some(a_char), Some(b_char)) => match a_char.cmp(&b_char) {
Ordering::Equal => {}
x => return x,
},
// Otherise, we compare the options (because None < Some(_))
(a_opt, b_opt) => match a_opt.cmp(&b_opt) {
// If they are completely equal except for leading zeroes, we use the leading zeroes.
Ordering::Equal => return leading_zeroes,
x => return x,
},
}
}
}
#[cfg(test)]
mod tests {
use crate::version_cmp::version_cmp;
use std::cmp::Ordering;
use std::path::PathBuf;
#[test]
fn test_version_cmp() {
// Identical strings
assert_eq!(
version_cmp(&PathBuf::from("hello"), &PathBuf::from("hello")),
Ordering::Equal
);
assert_eq!(
version_cmp(&PathBuf::from("file12"), &PathBuf::from("file12")),
Ordering::Equal
);
assert_eq!(
version_cmp(
&PathBuf::from("file12-suffix"),
&PathBuf::from("file12-suffix")
),
Ordering::Equal
);
assert_eq!(
version_cmp(
&PathBuf::from("file12-suffix24"),
&PathBuf::from("file12-suffix24")
),
Ordering::Equal
);
// Shortened names
assert_eq!(
version_cmp(&PathBuf::from("world"), &PathBuf::from("wo")),
Ordering::Greater,
);
assert_eq!(
version_cmp(&PathBuf::from("hello10wo"), &PathBuf::from("hello10world")),
Ordering::Less,
);
// Simple names
assert_eq!(
version_cmp(&PathBuf::from("world"), &PathBuf::from("hello")),
Ordering::Greater,
);
assert_eq!(
version_cmp(&PathBuf::from("hello"), &PathBuf::from("world")),
Ordering::Less
);
assert_eq!(
version_cmp(&PathBuf::from("apple"), &PathBuf::from("ant")),
Ordering::Greater
);
assert_eq!(
version_cmp(&PathBuf::from("ant"), &PathBuf::from("apple")),
Ordering::Less
);
// Uppercase letters
assert_eq!(
version_cmp(&PathBuf::from("Beef"), &PathBuf::from("apple")),
Ordering::Less,
"Uppercase letters are sorted before all lowercase letters"
);
assert_eq!(
version_cmp(&PathBuf::from("Apple"), &PathBuf::from("apple")),
Ordering::Less
);
assert_eq!(
version_cmp(&PathBuf::from("apple"), &PathBuf::from("aPple")),
Ordering::Greater
);
// Numbers
assert_eq!(
version_cmp(&PathBuf::from("100"), &PathBuf::from("20")),
Ordering::Greater,
"Greater numbers are greater even if they start with a smaller digit",
);
assert_eq!(
version_cmp(&PathBuf::from("20"), &PathBuf::from("20")),
Ordering::Equal,
"Equal numbers are equal"
);
assert_eq!(
version_cmp(&PathBuf::from("15"), &PathBuf::from("200")),
Ordering::Less,
"Small numbers are smaller"
);
// Comparing numbers with other characters
assert_eq!(
version_cmp(&PathBuf::from("1000"), &PathBuf::from("apple")),
Ordering::Less,
"Numbers are sorted before other characters"
);
assert_eq!(
version_cmp(&PathBuf::from("file1000"), &PathBuf::from("fileapple")),
Ordering::Less,
"Numbers in the middle of the name are sorted before other characters"
);
// Leading zeroes
assert_eq!(
version_cmp(&PathBuf::from("012"), &PathBuf::from("12")),
Ordering::Less,
"A single leading zero can make a difference"
);
assert_eq!(
version_cmp(&PathBuf::from("000800"), &PathBuf::from("0000800")),
Ordering::Greater,
"Leading number of zeroes is used even if both non-zero number of zeros"
);
// Numbers and other characters combined
assert_eq!(
version_cmp(&PathBuf::from("ab10"), &PathBuf::from("aa11")),
Ordering::Greater
);
assert_eq!(
version_cmp(&PathBuf::from("aa10"), &PathBuf::from("aa11")),
Ordering::Less,
"Numbers after other characters are handled correctly."
);
assert_eq!(
version_cmp(&PathBuf::from("aa2"), &PathBuf::from("aa100")),
Ordering::Less,
"Numbers after alphabetical characters are handled correctly."
);
assert_eq!(
version_cmp(&PathBuf::from("aa10bb"), &PathBuf::from("aa11aa")),
Ordering::Less,
"Number is used even if alphabetical characters after it differ."
);
assert_eq!(
version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa11aa1")),
Ordering::Less,
"Second number is ignored if the first number differs."
);
assert_eq!(
version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa10aa1")),
Ordering::Greater,
"Second number is used if the rest is equal."
);
assert_eq!(
version_cmp(&PathBuf::from("aa10aa0010"), &PathBuf::from("aa00010aa1")),
Ordering::Greater,
"Second number is used if the rest is equal up to leading zeroes of the first number."
);
assert_eq!(
version_cmp(&PathBuf::from("aa10aa0022"), &PathBuf::from("aa010aa022")),
Ordering::Greater,
"The leading zeroes of the first number has priority."
);
assert_eq!(
version_cmp(&PathBuf::from("aa10aa0022"), &PathBuf::from("aa10aa022")),
Ordering::Less,
"The leading zeroes of other numbers than the first are used."
);
assert_eq!(
version_cmp(&PathBuf::from("file-1.4"), &PathBuf::from("file-1.13")),
Ordering::Less,
"Periods are handled as normal text, not as a decimal point."
);
// Greater than u64::Max
// u64 == 18446744073709551615 so this should be plenty:
// 20000000000000000000000
assert_eq!(
version_cmp(
&PathBuf::from("aa2000000000000000000000bb"),
&PathBuf::from("aa002000000000000000000001bb")
),
Ordering::Less,
"Numbers larger than u64::MAX are handled correctly without crashing"
);
assert_eq!(
version_cmp(
&PathBuf::from("aa2000000000000000000000bb"),
&PathBuf::from("aa002000000000000000000000bb")
),
Ordering::Greater,
"Leading zeroes for numbers larger than u64::MAX are handled correctly without crashing"
);
}
}

View file

@ -526,7 +526,6 @@ fn test_ls_order_time() {
at.metadata("test-2").permissions(),
)
.unwrap();
let second_access = at.open("test-2").metadata().unwrap().accessed().unwrap();
let result = scene.ucmd().arg("-al").run();
println!("stderr = {:?}", result.stderr);
@ -870,3 +869,81 @@ fn test_ls_hidden_windows() {
assert!(result.success);
assert!(result.stdout.contains(file));
}
#[test]
fn test_ls_version_sort() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
for filename in &[
"a2",
"b1",
"b20",
"a1.4",
"a1.40",
"b3",
"b11",
"b20b",
"b20a",
"a100",
"a1.13",
"aa",
"a1",
"aaa",
"a1.00000040",
"abab",
"ab",
"a01.40",
"a001.001",
"a01.0000001",
"a01.001",
"a001.01",
] {
at.touch(filename);
}
let mut expected = vec![
"a1",
"a001.001",
"a001.01",
"a01.0000001",
"a01.001",
"a1.4",
"a1.13",
"a01.40",
"a1.00000040",
"a1.40",
"a2",
"a100",
"aa",
"aaa",
"ab",
"abab",
"b1",
"b3",
"b11",
"b20",
"b20a",
"b20b",
"", // because of '\n' at the end of the output
];
let result = scene.ucmd().arg("-1v").run();
println!("stderr = {:?}", result.stderr);
println!("stdout = {:?}", result.stdout);
assert_eq!(result.stdout.split('\n').collect::<Vec<_>>(), expected);
let result = scene.ucmd().arg("-1").arg("--sort=version").run();
println!("stderr = {:?}", result.stderr);
println!("stdout = {:?}", result.stdout);
assert_eq!(result.stdout.split('\n').collect::<Vec<_>>(), expected);
let result = scene.ucmd().arg("-a1v").run();
println!("stderr = {:?}", result.stderr);
println!("stdout = {:?}", result.stdout);
expected.insert(0, "..");
expected.insert(0, ".");
assert_eq!(result.stdout.split('\n').collect::<Vec<_>>(), expected,)
}