mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-30 12:37:49 +00:00
Merge pull request #3754 from ackerleytng/main
Add `parse_glob` module and update `du` to use `parse_glob`
This commit is contained in:
commit
8692301ec7
7 changed files with 224 additions and 104 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -3102,6 +3102,7 @@ dependencies = [
|
||||||
"data-encoding-macro",
|
"data-encoding-macro",
|
||||||
"dns-lookup",
|
"dns-lookup",
|
||||||
"dunce",
|
"dunce",
|
||||||
|
"glob",
|
||||||
"itertools",
|
"itertools",
|
||||||
"libc",
|
"libc",
|
||||||
"nix",
|
"nix",
|
||||||
|
|
|
@ -37,6 +37,7 @@ use uucore::display::{print_verbatim, Quotable};
|
||||||
use uucore::error::FromIo;
|
use uucore::error::FromIo;
|
||||||
use uucore::error::{UError, UResult};
|
use uucore::error::{UError, UResult};
|
||||||
use uucore::format_usage;
|
use uucore::format_usage;
|
||||||
|
use uucore::parse_glob;
|
||||||
use uucore::parse_size::{parse_size, ParseSizeError};
|
use uucore::parse_size::{parse_size, ParseSizeError};
|
||||||
use uucore::InvalidEncodingHandling;
|
use uucore::InvalidEncodingHandling;
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
|
@ -488,55 +489,28 @@ fn file_as_vec(filename: impl AsRef<Path>) -> Vec<String> {
|
||||||
|
|
||||||
// Given the --exclude-from and/or --exclude arguments, returns the globset lists
|
// Given the --exclude-from and/or --exclude arguments, returns the globset lists
|
||||||
// to ignore the files
|
// to ignore the files
|
||||||
fn get_glob_ignore(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
|
fn build_exclude_patterns(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
|
||||||
let mut excludes_from = if matches.contains_id(options::EXCLUDE_FROM) {
|
let exclude_from_iterator = matches
|
||||||
match matches.values_of(options::EXCLUDE_FROM) {
|
.values_of(options::EXCLUDE_FROM)
|
||||||
Some(all_files) => {
|
.unwrap_or_default()
|
||||||
let mut exclusion = Vec::<String>::new();
|
.flat_map(|f| file_as_vec(&f));
|
||||||
// Read the exclude lists from all the files
|
|
||||||
// and add them into a vector of string
|
|
||||||
let files: Vec<String> = all_files.clone().map(|v| v.to_owned()).collect();
|
|
||||||
for f in files {
|
|
||||||
exclusion.extend(file_as_vec(&f));
|
|
||||||
}
|
|
||||||
exclusion
|
|
||||||
}
|
|
||||||
None => Vec::<String>::new(),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Vec::<String>::new()
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut excludes = if matches.contains_id(options::EXCLUDE) {
|
let excludes_iterator = matches
|
||||||
match matches.values_of(options::EXCLUDE) {
|
.values_of(options::EXCLUDE)
|
||||||
Some(v) => {
|
.unwrap_or_default()
|
||||||
// Read the various arguments
|
.map(|v| v.to_owned());
|
||||||
v.clone().map(|v| v.to_owned()).collect()
|
|
||||||
}
|
|
||||||
None => Vec::<String>::new(),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Vec::<String>::new()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Merge the two lines
|
let mut exclude_patterns = Vec::new();
|
||||||
excludes.append(&mut excludes_from);
|
for f in excludes_iterator.chain(exclude_from_iterator) {
|
||||||
if !&excludes.is_empty() {
|
if matches.is_present(options::VERBOSE) {
|
||||||
let mut builder = Vec::new();
|
println!("adding {:?} to the exclude list ", &f);
|
||||||
// Create the `Vec` of excludes
|
}
|
||||||
for f in excludes {
|
match parse_glob::from_str(&f) {
|
||||||
if matches.contains_id(options::VERBOSE) {
|
Ok(glob) => exclude_patterns.push(glob),
|
||||||
println!("adding {:?} to the exclude list ", &f);
|
Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
|
||||||
}
|
|
||||||
match Pattern::new(&f) {
|
|
||||||
Ok(glob) => builder.push(glob),
|
|
||||||
Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
Ok(builder)
|
|
||||||
} else {
|
|
||||||
Ok(Vec::new())
|
|
||||||
}
|
}
|
||||||
|
Ok(exclude_patterns)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[uucore::main]
|
#[uucore::main]
|
||||||
|
@ -615,85 +589,84 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||||
"\n"
|
"\n"
|
||||||
};
|
};
|
||||||
|
|
||||||
let excludes = get_glob_ignore(&matches)?;
|
let excludes = build_exclude_patterns(&matches)?;
|
||||||
|
|
||||||
let mut grand_total = 0;
|
let mut grand_total = 0;
|
||||||
'loop_file: for path_string in files {
|
'loop_file: for path_string in files {
|
||||||
// Skip if we don't want to ignore anything
|
// Skip if we don't want to ignore anything
|
||||||
if !&excludes.is_empty() {
|
if !&excludes.is_empty() {
|
||||||
for pattern in &excludes {
|
for pattern in &excludes {
|
||||||
{
|
if pattern.matches(path_string) {
|
||||||
if pattern.matches(path_string) {
|
// if the directory is ignored, leave early
|
||||||
// if the directory is ignored, leave early
|
if options.verbose {
|
||||||
if options.verbose {
|
println!("{} ignored", path_string.quote());
|
||||||
println!("{} ignored", path_string.quote());
|
|
||||||
}
|
|
||||||
continue 'loop_file;
|
|
||||||
}
|
}
|
||||||
|
continue 'loop_file;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let path = PathBuf::from(&path_string);
|
let path = PathBuf::from(&path_string);
|
||||||
match Stat::new(path, &options) {
|
// Check existence of path provided in argument
|
||||||
Ok(stat) => {
|
if let Ok(stat) = Stat::new(path, &options) {
|
||||||
let mut inodes: HashSet<FileInfo> = HashSet::new();
|
// Kick off the computation of disk usage from the initial path
|
||||||
if let Some(inode) = stat.inode {
|
let mut inodes: HashSet<FileInfo> = HashSet::new();
|
||||||
inodes.insert(inode);
|
if let Some(inode) = stat.inode {
|
||||||
|
inodes.insert(inode);
|
||||||
|
}
|
||||||
|
let iter = du(stat, &options, 0, &mut inodes, &excludes);
|
||||||
|
|
||||||
|
// Sum up all the returned `Stat`s and display results
|
||||||
|
let (_, len) = iter.size_hint();
|
||||||
|
let len = len.unwrap();
|
||||||
|
for (index, stat) in iter.enumerate() {
|
||||||
|
let size = choose_size(&matches, &stat);
|
||||||
|
|
||||||
|
if threshold.map_or(false, |threshold| threshold.should_exclude(size)) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
let iter = du(stat, &options, 0, &mut inodes, &excludes);
|
|
||||||
let (_, len) = iter.size_hint();
|
|
||||||
let len = len.unwrap();
|
|
||||||
for (index, stat) in iter.enumerate() {
|
|
||||||
let size = choose_size(&matches, &stat);
|
|
||||||
|
|
||||||
if threshold.map_or(false, |threshold| threshold.should_exclude(size)) {
|
if matches.is_present(options::TIME) {
|
||||||
continue;
|
let tm = {
|
||||||
}
|
let secs = {
|
||||||
|
match matches.value_of(options::TIME) {
|
||||||
if matches.contains_id(options::TIME) {
|
Some(s) => match s {
|
||||||
let tm = {
|
"ctime" | "status" => stat.modified,
|
||||||
let secs = {
|
"access" | "atime" | "use" => stat.accessed,
|
||||||
match matches.value_of(options::TIME) {
|
"birth" | "creation" => stat
|
||||||
Some(s) => match s {
|
.created
|
||||||
"ctime" | "status" => stat.modified,
|
.ok_or_else(|| DuError::InvalidTimeArg(s.into()))?,
|
||||||
"access" | "atime" | "use" => stat.accessed,
|
// below should never happen as clap already restricts the values.
|
||||||
"birth" | "creation" => stat
|
_ => unreachable!("Invalid field for --time"),
|
||||||
.created
|
},
|
||||||
.ok_or_else(|| DuError::InvalidTimeArg(s.into()))?,
|
None => stat.modified,
|
||||||
// below should never happen as clap already restricts the values.
|
}
|
||||||
_ => unreachable!("Invalid field for --time"),
|
|
||||||
},
|
|
||||||
None => stat.modified,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs(secs))
|
|
||||||
};
|
};
|
||||||
if !summarize || index == len - 1 {
|
DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs(secs))
|
||||||
let time_str = tm.format(time_format_str).to_string();
|
};
|
||||||
print!("{}\t{}\t", convert_size(size), time_str);
|
if !summarize || index == len - 1 {
|
||||||
print_verbatim(stat.path).unwrap();
|
let time_str = tm.format(time_format_str).to_string();
|
||||||
print!("{}", line_separator);
|
print!("{}\t{}\t", convert_size(size), time_str);
|
||||||
}
|
|
||||||
} else if !summarize || index == len - 1 {
|
|
||||||
print!("{}\t", convert_size(size));
|
|
||||||
print_verbatim(stat.path).unwrap();
|
print_verbatim(stat.path).unwrap();
|
||||||
print!("{}", line_separator);
|
print!("{}", line_separator);
|
||||||
}
|
}
|
||||||
if options.total && index == (len - 1) {
|
} else if !summarize || index == len - 1 {
|
||||||
// The last element will be the total size of the the path under
|
print!("{}\t", convert_size(size));
|
||||||
// path_string. We add it to the grand total.
|
print_verbatim(stat.path).unwrap();
|
||||||
grand_total += size;
|
print!("{}", line_separator);
|
||||||
}
|
}
|
||||||
|
if options.total && index == (len - 1) {
|
||||||
|
// The last element will be the total size of the the path under
|
||||||
|
// path_string. We add it to the grand total.
|
||||||
|
grand_total += size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(_) => {
|
} else {
|
||||||
show_error!(
|
show_error!(
|
||||||
"{}: {}",
|
"{}: {}",
|
||||||
path_string.maybe_quote(),
|
path_string.maybe_quote(),
|
||||||
"No such file or directory"
|
"No such file or directory"
|
||||||
);
|
);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@ clap = "3.2"
|
||||||
dns-lookup = { version="1.0.5", optional=true }
|
dns-lookup = { version="1.0.5", optional=true }
|
||||||
dunce = "1.0.0"
|
dunce = "1.0.0"
|
||||||
wild = "2.0"
|
wild = "2.0"
|
||||||
|
glob = "0.3.0"
|
||||||
# * optional
|
# * optional
|
||||||
itertools = { version="0.10.0", optional=true }
|
itertools = { version="0.10.0", optional=true }
|
||||||
thiserror = { version="1.0", optional=true }
|
thiserror = { version="1.0", optional=true }
|
||||||
|
|
|
@ -29,6 +29,7 @@ pub use crate::mods::ranges;
|
||||||
pub use crate::mods::version_cmp;
|
pub use crate::mods::version_cmp;
|
||||||
|
|
||||||
// * string parsing modules
|
// * string parsing modules
|
||||||
|
pub use crate::parser::parse_glob;
|
||||||
pub use crate::parser::parse_size;
|
pub use crate::parser::parse_size;
|
||||||
pub use crate::parser::parse_time;
|
pub use crate::parser::parse_time;
|
||||||
|
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
|
pub mod parse_glob;
|
||||||
pub mod parse_size;
|
pub mod parse_size;
|
||||||
pub mod parse_time;
|
pub mod parse_time;
|
||||||
|
|
109
src/uucore/src/lib/parser/parse_glob.rs
Normal file
109
src/uucore/src/lib/parser/parse_glob.rs
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
//! Parsing a glob Pattern from a string.
|
||||||
|
//!
|
||||||
|
//! Use the [`from_str`] function to parse a [`Pattern`] from a string.
|
||||||
|
|
||||||
|
// cSpell:words fnmatch
|
||||||
|
|
||||||
|
use glob::{Pattern, PatternError};
|
||||||
|
|
||||||
|
fn fix_negation(glob: &str) -> String {
|
||||||
|
let mut chars = glob.chars().collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let mut i = 0;
|
||||||
|
while i < chars.len() {
|
||||||
|
if chars[i] == '[' && i + 4 <= glob.len() && chars[i + 1] == '^' {
|
||||||
|
match chars[i + 3..].iter().position(|x| *x == ']') {
|
||||||
|
None => (),
|
||||||
|
Some(j) => {
|
||||||
|
chars[i + 1] = '!';
|
||||||
|
i += j + 4;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
chars.into_iter().collect::<String>()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a glob Pattern from a string.
|
||||||
|
///
|
||||||
|
/// This function amends the input string to replace any caret or circumflex
|
||||||
|
/// character (^) used to negate a set of characters with an exclamation mark
|
||||||
|
/// (!), which adapts rust's glob matching to function the way the GNU utils'
|
||||||
|
/// fnmatch does.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use std::time::Duration;
|
||||||
|
/// use uucore::parse_glob::from_str;
|
||||||
|
/// assert!(!from_str("[^abc]").unwrap().matches("a"));
|
||||||
|
/// assert!(from_str("[^abc]").unwrap().matches("x"));
|
||||||
|
/// ```
|
||||||
|
pub fn from_str(glob: &str) -> Result<Pattern, PatternError> {
|
||||||
|
Pattern::new(&fix_negation(glob))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_from_str() {
|
||||||
|
assert_eq!(from_str("[^abc]").unwrap(), Pattern::new("[!abc]").unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fix_negation() {
|
||||||
|
// Happy/Simple case
|
||||||
|
assert_eq!(fix_negation("[^abc]"), "[!abc]");
|
||||||
|
|
||||||
|
// Should fix negations in a long regex
|
||||||
|
assert_eq!(fix_negation("foo[abc] bar[^def]"), "foo[abc] bar[!def]");
|
||||||
|
|
||||||
|
// Should fix multiple negations in a regex
|
||||||
|
assert_eq!(fix_negation("foo[^abc]bar[^def]"), "foo[!abc]bar[!def]");
|
||||||
|
|
||||||
|
// Should fix negation of the single character ]
|
||||||
|
assert_eq!(fix_negation("[^]]"), "[!]]");
|
||||||
|
|
||||||
|
// Should fix negation of the single character ^
|
||||||
|
assert_eq!(fix_negation("[^^]"), "[!^]");
|
||||||
|
|
||||||
|
// Should fix negation of the space character
|
||||||
|
assert_eq!(fix_negation("[^ ]"), "[! ]");
|
||||||
|
|
||||||
|
// Complicated patterns
|
||||||
|
assert_eq!(fix_negation("[^][]"), "[!][]");
|
||||||
|
assert_eq!(fix_negation("[^[]]"), "[![]]");
|
||||||
|
|
||||||
|
// More complex patterns that should be replaced
|
||||||
|
assert_eq!(fix_negation("[[]] [^a]"), "[[]] [!a]");
|
||||||
|
assert_eq!(fix_negation("[[] [^a]"), "[[] [!a]");
|
||||||
|
assert_eq!(fix_negation("[]] [^a]"), "[]] [!a]");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fix_negation_should_not_amend() {
|
||||||
|
assert_eq!(fix_negation("abc"), "abc");
|
||||||
|
|
||||||
|
// Regex specifically matches either [ or ^
|
||||||
|
assert_eq!(fix_negation("[[^]"), "[[^]");
|
||||||
|
|
||||||
|
// Regex that specifically matches either space or ^
|
||||||
|
assert_eq!(fix_negation("[ ^]"), "[ ^]");
|
||||||
|
|
||||||
|
// Regex that specifically matches either [, space or ^
|
||||||
|
assert_eq!(fix_negation("[[ ^]"), "[[ ^]");
|
||||||
|
assert_eq!(fix_negation("[ [^]"), "[ [^]");
|
||||||
|
|
||||||
|
// Invalid globs (according to rust's glob implementation) will remain unamended
|
||||||
|
assert_eq!(fix_negation("[^]"), "[^]");
|
||||||
|
assert_eq!(fix_negation("[^"), "[^");
|
||||||
|
assert_eq!(fix_negation("[][^]"), "[][^]");
|
||||||
|
}
|
||||||
|
}
|
|
@ -747,6 +747,40 @@ fn test_du_exclude_mix() {
|
||||||
assert!(result.stdout_str().contains("xcwww"));
|
assert!(result.stdout_str().contains("xcwww"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
// Disable on Windows because we are looking for /
|
||||||
|
// And the tests would be more complex if we have to support \ too
|
||||||
|
#[cfg(not(target_os = "windows"))]
|
||||||
|
fn test_du_complex_exclude_patterns() {
|
||||||
|
let ts = TestScenario::new(util_name!());
|
||||||
|
let at = &ts.fixtures;
|
||||||
|
|
||||||
|
at.mkdir_all("azerty/xcwww/azeaze");
|
||||||
|
at.mkdir_all("azerty/xcwww/qzerty");
|
||||||
|
at.mkdir_all("azerty/xcwww/amazing");
|
||||||
|
|
||||||
|
// Negation in glob should work with both ^ and !
|
||||||
|
let result = ts
|
||||||
|
.ucmd()
|
||||||
|
.arg("--exclude=azerty/*/[^q]*")
|
||||||
|
.arg("azerty")
|
||||||
|
.succeeds();
|
||||||
|
assert!(!result.stdout_str().contains("amazing"));
|
||||||
|
assert!(result.stdout_str().contains("qzerty"));
|
||||||
|
assert!(!result.stdout_str().contains("azeaze"));
|
||||||
|
assert!(result.stdout_str().contains("xcwww"));
|
||||||
|
|
||||||
|
let result = ts
|
||||||
|
.ucmd()
|
||||||
|
.arg("--exclude=azerty/*/[!q]*")
|
||||||
|
.arg("azerty")
|
||||||
|
.succeeds();
|
||||||
|
assert!(!result.stdout_str().contains("amazing"));
|
||||||
|
assert!(result.stdout_str().contains("qzerty"));
|
||||||
|
assert!(!result.stdout_str().contains("azeaze"));
|
||||||
|
assert!(result.stdout_str().contains("xcwww"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_du_exclude_several_components() {
|
fn test_du_exclude_several_components() {
|
||||||
let ts = TestScenario::new(util_name!());
|
let ts = TestScenario::new(util_name!());
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue