mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-28 19:47:45 +00:00
Optimize recursive ls (#2083)
* ls: Remove allocations by eliminating collect/clones * ls: Introduce PathData structure - PathData will hold Path related metadata / strings that are required frequently in subsequent functions - All data is precomputed and cached and subsequent functions just use cached data * ls: Cache more data related to paths - Cache filename and sort by filename instead of full path - Cache uid->usr and gid->grp mappings https://github.com/uutils/coreutils/pull/2099/files * ls: Add BENCHMARKING.md * ls: Document PathData structure * tests/ls: Add testcase for error paths with width option * ls: Fix unused import warning cached will be only used for unix currently as current use of caching gid/uid mappings is only relevant on unix * ls: Suggest checking syscall count in BENCHMARKING.md * ls: Remove mentions of sort in BENCHMARKING.md * ls: Remove dependency on cached Implement caching using HashMap and lazy_static * ls: Fix MSRV error related to map_or Rust 1.40 did not support map_or for result types
This commit is contained in:
parent
b756b987a4
commit
8554cdf35b
3 changed files with 181 additions and 73 deletions
34
src/uu/ls/BENCHMARKING.md
Normal file
34
src/uu/ls/BENCHMARKING.md
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
# Benchmarking ls
|
||||||
|
|
||||||
|
ls majorly involves fetching a lot of details (depending upon what details are requested, eg. time/date, inode details, etc) for each path using system calls. Ideally, any system call should be done only once for each of the paths - not adhering to this principle leads to a lot of system call overhead multiplying and bubbling up, especially for recursive ls, therefore it is important to always benchmark multiple scenarios.
|
||||||
|
This is an overwiew over what was benchmarked, and if you make changes to `ls`, you are encouraged to check
|
||||||
|
how performance was affected for the workloads listed below. Feel free to add other workloads to the
|
||||||
|
list that we should improve / make sure not to regress.
|
||||||
|
|
||||||
|
Run `cargo build --release` before benchmarking after you make a change!
|
||||||
|
|
||||||
|
## Simple recursive ls
|
||||||
|
|
||||||
|
- Get a large tree, for example linux kernel source tree.
|
||||||
|
- Benchmark simple recursive ls with hyperfine: `hyperfine --warmup 2 "target/release/coreutils ls -R tree > /dev/null"`.
|
||||||
|
|
||||||
|
## Recursive ls with all and long options
|
||||||
|
|
||||||
|
- Same tree as above
|
||||||
|
- Benchmark recursive ls with -al -R options with hyperfine: `hyperfine --warmup 2 "target/release/coreutils ls -al -R tree > /dev/null"`.
|
||||||
|
|
||||||
|
## Comparing with GNU ls
|
||||||
|
|
||||||
|
Hyperfine accepts multiple commands to run and will compare them. To compare performance with GNU ls
|
||||||
|
duplicate the string you passed to hyperfine but remove the `target/release/coreutils` bit from it.
|
||||||
|
|
||||||
|
Example: `hyperfine --warmup 2 "target/release/coreutils ls -al -R tree > /dev/null"` becomes
|
||||||
|
`hyperfine --warmup 2 "target/release/coreutils ls -al -R tree > /dev/null" "ls -al -R tree > /dev/null"`
|
||||||
|
(This assumes GNU ls is installed as `ls`)
|
||||||
|
|
||||||
|
This can also be used to compare with version of ls built before your changes to ensure your change does not regress this
|
||||||
|
|
||||||
|
## Checking system call count
|
||||||
|
|
||||||
|
- Another thing to look at would be system calls count using strace (on linux) or equivalent on other operating systems.
|
||||||
|
- Example: `strace -c target/release/coreutils ls -al -R tree`
|
|
@ -1038,12 +1038,43 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||||
list(locs, Config::from(matches))
|
list(locs, Config::from(matches))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Represents a Path along with it's associated data
|
||||||
|
/// Any data that will be reused several times makes sense to be added to this structure
|
||||||
|
/// Caching data here helps eliminate redundant syscalls to fetch same information
|
||||||
|
struct PathData {
|
||||||
|
// Result<MetaData> got from symlink_metadata() or metadata() based on config
|
||||||
|
md: std::io::Result<Metadata>,
|
||||||
|
// String formed from get_lossy_string() for the path
|
||||||
|
lossy_string: String,
|
||||||
|
// Name of the file - will be empty for . or ..
|
||||||
|
file_name: String,
|
||||||
|
// PathBuf that all above data corresponds to
|
||||||
|
p_buf: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PathData {
|
||||||
|
fn new(p_buf: PathBuf, config: &Config, command_line: bool) -> Self {
|
||||||
|
let md = get_metadata(&p_buf, config, command_line);
|
||||||
|
let lossy_string = p_buf.to_string_lossy().into_owned();
|
||||||
|
let name = p_buf
|
||||||
|
.file_name()
|
||||||
|
.map_or(String::new(), |s| s.to_string_lossy().into_owned());
|
||||||
|
Self {
|
||||||
|
md,
|
||||||
|
lossy_string,
|
||||||
|
file_name: name,
|
||||||
|
p_buf,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn list(locs: Vec<String>, config: Config) -> i32 {
|
fn list(locs: Vec<String>, config: Config) -> i32 {
|
||||||
let number_of_locs = locs.len();
|
let number_of_locs = locs.len();
|
||||||
|
|
||||||
let mut files = Vec::<PathBuf>::new();
|
let mut files = Vec::<PathData>::new();
|
||||||
let mut dirs = Vec::<PathBuf>::new();
|
let mut dirs = Vec::<PathData>::new();
|
||||||
let mut has_failed = false;
|
let mut has_failed = false;
|
||||||
|
|
||||||
for loc in locs {
|
for loc in locs {
|
||||||
let p = PathBuf::from(&loc);
|
let p = PathBuf::from(&loc);
|
||||||
if !p.exists() {
|
if !p.exists() {
|
||||||
|
@ -1054,36 +1085,28 @@ fn list(locs: Vec<String>, config: Config) -> i32 {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let show_dir_contents = if !config.directory {
|
let path_data = PathData::new(p, &config, true);
|
||||||
match config.dereference {
|
|
||||||
Dereference::None => {
|
let show_dir_contents = if let Ok(md) = path_data.md.as_ref() {
|
||||||
if let Ok(md) = p.symlink_metadata() {
|
!config.directory && md.is_dir()
|
||||||
md.is_dir()
|
|
||||||
} else {
|
|
||||||
show_error!("'{}': {}", &loc, "No such file or directory");
|
|
||||||
has_failed = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => p.is_dir(),
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
|
has_failed = true;
|
||||||
false
|
false
|
||||||
};
|
};
|
||||||
|
|
||||||
if show_dir_contents {
|
if show_dir_contents {
|
||||||
dirs.push(p);
|
dirs.push(path_data);
|
||||||
} else {
|
} else {
|
||||||
files.push(p);
|
files.push(path_data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sort_entries(&mut files, &config);
|
sort_entries(&mut files, &config);
|
||||||
display_items(&files, None, &config, true);
|
display_items(&files, None, &config);
|
||||||
|
|
||||||
sort_entries(&mut dirs, &config);
|
sort_entries(&mut dirs, &config);
|
||||||
for dir in dirs {
|
for dir in dirs {
|
||||||
if number_of_locs > 1 {
|
if number_of_locs > 1 {
|
||||||
println!("\n{}:", dir.to_string_lossy());
|
println!("\n{}:", dir.lossy_string);
|
||||||
}
|
}
|
||||||
enter_directory(&dir, &config);
|
enter_directory(&dir, &config);
|
||||||
}
|
}
|
||||||
|
@ -1094,22 +1117,22 @@ fn list(locs: Vec<String>, config: Config) -> i32 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sort_entries(entries: &mut Vec<PathBuf>, config: &Config) {
|
fn sort_entries(entries: &mut Vec<PathData>, config: &Config) {
|
||||||
match config.sort {
|
match config.sort {
|
||||||
Sort::Time => entries.sort_by_key(|k| {
|
Sort::Time => entries.sort_by_key(|k| {
|
||||||
Reverse(
|
Reverse(
|
||||||
get_metadata(k, false)
|
k.md.as_ref()
|
||||||
.ok()
|
.ok()
|
||||||
.and_then(|md| get_system_time(&md, config))
|
.and_then(|md| get_system_time(&md, config))
|
||||||
.unwrap_or(UNIX_EPOCH),
|
.unwrap_or(UNIX_EPOCH),
|
||||||
)
|
)
|
||||||
}),
|
}),
|
||||||
Sort::Size => {
|
Sort::Size => {
|
||||||
entries.sort_by_key(|k| Reverse(get_metadata(k, false).map(|md| md.len()).unwrap_or(0)))
|
entries.sort_by_key(|k| Reverse(k.md.as_ref().map(|md| md.len()).unwrap_or(0)))
|
||||||
}
|
}
|
||||||
// The default sort in GNU ls is case insensitive
|
// The default sort in GNU ls is case insensitive
|
||||||
Sort::Name => entries.sort_by_key(|k| k.to_string_lossy().to_lowercase()),
|
Sort::Name => entries.sort_by_key(|k| k.file_name.to_lowercase()),
|
||||||
Sort::Version => entries.sort_by(|a, b| version_cmp::version_cmp(a, b)),
|
Sort::Version => entries.sort_by(|k, j| version_cmp::version_cmp(&k.p_buf, &j.p_buf)),
|
||||||
Sort::None => {}
|
Sort::None => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1143,32 +1166,57 @@ fn should_display(entry: &DirEntry, config: &Config) -> bool {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
fn enter_directory(dir: &Path, config: &Config) {
|
fn enter_directory(dir: &PathData, config: &Config) {
|
||||||
let mut entries: Vec<_> = safe_unwrap!(fs::read_dir(dir).and_then(Iterator::collect));
|
let mut entries: Vec<_> = if config.files == Files::All {
|
||||||
|
vec![
|
||||||
entries.retain(|e| should_display(e, config));
|
PathData::new(dir.p_buf.join("."), config, false),
|
||||||
|
PathData::new(dir.p_buf.join(".."), config, false),
|
||||||
let mut entries: Vec<_> = entries.iter().map(DirEntry::path).collect();
|
]
|
||||||
sort_entries(&mut entries, config);
|
|
||||||
|
|
||||||
if config.files == Files::All {
|
|
||||||
let mut display_entries = entries.clone();
|
|
||||||
display_entries.insert(0, dir.join(".."));
|
|
||||||
display_entries.insert(0, dir.join("."));
|
|
||||||
display_items(&display_entries, Some(dir), config, false);
|
|
||||||
} else {
|
} else {
|
||||||
display_items(&entries, Some(dir), config, false);
|
vec![]
|
||||||
}
|
};
|
||||||
|
|
||||||
|
let mut temp: Vec<_> = safe_unwrap!(fs::read_dir(&dir.p_buf))
|
||||||
|
.map(|res| safe_unwrap!(res))
|
||||||
|
.filter(|e| should_display(e, config))
|
||||||
|
.map(|e| PathData::new(DirEntry::path(&e), config, false))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
sort_entries(&mut temp, config);
|
||||||
|
|
||||||
|
entries.append(&mut temp);
|
||||||
|
|
||||||
|
display_items(&entries, Some(&dir.p_buf), config);
|
||||||
|
|
||||||
if config.recursive {
|
if config.recursive {
|
||||||
for e in entries.iter().filter(|p| p.is_dir()) {
|
for e in entries
|
||||||
println!("\n{}:", e.to_string_lossy());
|
.iter()
|
||||||
|
.skip(if config.files == Files::All { 2 } else { 0 })
|
||||||
|
.filter(|p| p.md.as_ref().map(|md| md.is_dir()).unwrap_or(false))
|
||||||
|
{
|
||||||
|
println!("\n{}:", e.lossy_string);
|
||||||
enter_directory(&e, config);
|
enter_directory(&e, config);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_metadata(entry: &Path, dereference: bool) -> std::io::Result<Metadata> {
|
fn get_metadata(entry: &Path, config: &Config, command_line: bool) -> std::io::Result<Metadata> {
|
||||||
|
let dereference = match &config.dereference {
|
||||||
|
Dereference::All => true,
|
||||||
|
Dereference::Args => command_line,
|
||||||
|
Dereference::DirArgs => {
|
||||||
|
if command_line {
|
||||||
|
if let Ok(md) = entry.metadata() {
|
||||||
|
md.is_dir()
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Dereference::None => false,
|
||||||
|
};
|
||||||
if dereference {
|
if dereference {
|
||||||
entry.metadata().or_else(|_| entry.symlink_metadata())
|
entry.metadata().or_else(|_| entry.symlink_metadata())
|
||||||
} else {
|
} else {
|
||||||
|
@ -1176,8 +1224,8 @@ fn get_metadata(entry: &Path, dereference: bool) -> std::io::Result<Metadata> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn display_dir_entry_size(entry: &Path, config: &Config) -> (usize, usize) {
|
fn display_dir_entry_size(entry: &PathData, config: &Config) -> (usize, usize) {
|
||||||
if let Ok(md) = get_metadata(entry, false) {
|
if let Ok(md) = entry.md.as_ref() {
|
||||||
(
|
(
|
||||||
display_symlink_count(&md).len(),
|
display_symlink_count(&md).len(),
|
||||||
display_file_size(&md, config).len(),
|
display_file_size(&md, config).len(),
|
||||||
|
@ -1191,7 +1239,7 @@ fn pad_left(string: String, count: usize) -> String {
|
||||||
format!("{:>width$}", string, width = count)
|
format!("{:>width$}", string, width = count)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn display_items(items: &[PathBuf], strip: Option<&Path>, config: &Config, command_line: bool) {
|
fn display_items(items: &[PathData], strip: Option<&Path>, config: &Config) {
|
||||||
if config.format == Format::Long {
|
if config.format == Format::Long {
|
||||||
let (mut max_links, mut max_size) = (1, 1);
|
let (mut max_links, mut max_size) = (1, 1);
|
||||||
for item in items {
|
for item in items {
|
||||||
|
@ -1200,18 +1248,18 @@ fn display_items(items: &[PathBuf], strip: Option<&Path>, config: &Config, comma
|
||||||
max_size = size.max(max_size);
|
max_size = size.max(max_size);
|
||||||
}
|
}
|
||||||
for item in items {
|
for item in items {
|
||||||
display_item_long(item, strip, max_links, max_size, config, command_line);
|
display_item_long(item, strip, max_links, max_size, config);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let names = items.iter().filter_map(|i| {
|
let names = items.iter().filter_map(|i| {
|
||||||
let md = get_metadata(i, false);
|
let md = i.md.as_ref();
|
||||||
match md {
|
match md {
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
let filename = get_file_name(i, strip);
|
let filename = get_file_name(&i.p_buf, strip);
|
||||||
show_error!("'{}': {}", filename, e);
|
show_error!("'{}': {}", filename, e);
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
Ok(md) => Some(display_file_name(&i, strip, &md, config)),
|
Ok(md) => Some(display_file_name(&i.p_buf, strip, &md, config)),
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1271,33 +1319,15 @@ fn display_grid(names: impl Iterator<Item = Cell>, width: u16, direction: Direct
|
||||||
use uucore::fs::display_permissions;
|
use uucore::fs::display_permissions;
|
||||||
|
|
||||||
fn display_item_long(
|
fn display_item_long(
|
||||||
item: &Path,
|
item: &PathData,
|
||||||
strip: Option<&Path>,
|
strip: Option<&Path>,
|
||||||
max_links: usize,
|
max_links: usize,
|
||||||
max_size: usize,
|
max_size: usize,
|
||||||
config: &Config,
|
config: &Config,
|
||||||
command_line: bool,
|
|
||||||
) {
|
) {
|
||||||
let dereference = match &config.dereference {
|
let md = match &item.md {
|
||||||
Dereference::All => true,
|
|
||||||
Dereference::Args => command_line,
|
|
||||||
Dereference::DirArgs => {
|
|
||||||
if command_line {
|
|
||||||
if let Ok(md) = item.metadata() {
|
|
||||||
md.is_dir()
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Dereference::None => false,
|
|
||||||
};
|
|
||||||
|
|
||||||
let md = match get_metadata(item, dereference) {
|
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
let filename = get_file_name(&item, strip);
|
let filename = get_file_name(&item.p_buf, strip);
|
||||||
show_error!("{}: {}", filename, e);
|
show_error!("{}: {}", filename, e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1336,7 +1366,7 @@ fn display_item_long(
|
||||||
" {} {} {}",
|
" {} {} {}",
|
||||||
pad_left(display_file_size(&md, config), max_size),
|
pad_left(display_file_size(&md, config), max_size),
|
||||||
display_date(&md, config),
|
display_date(&md, config),
|
||||||
display_file_name(&item, strip, &md, config).contents,
|
display_file_name(&item.p_buf, strip, &md, config).contents,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1348,14 +1378,50 @@ fn get_inode(metadata: &Metadata) -> String {
|
||||||
// Currently getpwuid is `linux` target only. If it's broken out into
|
// Currently getpwuid is `linux` target only. If it's broken out into
|
||||||
// a posix-compliant attribute this can be updated...
|
// a posix-compliant attribute this can be updated...
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
|
use std::sync::Mutex;
|
||||||
|
#[cfg(unix)]
|
||||||
use uucore::entries;
|
use uucore::entries;
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn cached_uid2usr(uid: u32) -> String {
|
||||||
|
lazy_static! {
|
||||||
|
static ref UID_CACHE: Mutex<HashMap<u32, String>> = Mutex::new(HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut uid_cache = UID_CACHE.lock().unwrap();
|
||||||
|
match uid_cache.get(&uid) {
|
||||||
|
Some(usr) => usr.clone(),
|
||||||
|
None => {
|
||||||
|
let usr = entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string());
|
||||||
|
uid_cache.insert(uid, usr.clone());
|
||||||
|
usr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
fn display_uname(metadata: &Metadata, config: &Config) -> String {
|
fn display_uname(metadata: &Metadata, config: &Config) -> String {
|
||||||
if config.long.numeric_uid_gid {
|
if config.long.numeric_uid_gid {
|
||||||
metadata.uid().to_string()
|
metadata.uid().to_string()
|
||||||
} else {
|
} else {
|
||||||
entries::uid2usr(metadata.uid()).unwrap_or_else(|_| metadata.uid().to_string())
|
cached_uid2usr(metadata.uid())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn cached_gid2grp(gid: u32) -> String {
|
||||||
|
lazy_static! {
|
||||||
|
static ref GID_CACHE: Mutex<HashMap<u32, String>> = Mutex::new(HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut gid_cache = GID_CACHE.lock().unwrap();
|
||||||
|
match gid_cache.get(&gid) {
|
||||||
|
Some(grp) => grp.clone(),
|
||||||
|
None => {
|
||||||
|
let grp = entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string());
|
||||||
|
gid_cache.insert(gid, grp.clone());
|
||||||
|
grp
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1364,7 +1430,7 @@ fn display_group(metadata: &Metadata, config: &Config) -> String {
|
||||||
if config.long.numeric_uid_gid {
|
if config.long.numeric_uid_gid {
|
||||||
metadata.gid().to_string()
|
metadata.gid().to_string()
|
||||||
} else {
|
} else {
|
||||||
entries::gid2grp(metadata.gid()).unwrap_or_else(|_| metadata.gid().to_string())
|
cached_gid2grp(metadata.gid())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -103,6 +103,14 @@ fn test_ls_width() {
|
||||||
.succeeds()
|
.succeeds()
|
||||||
.stdout_only("test-width-1\ntest-width-2\ntest-width-3\ntest-width-4\n");
|
.stdout_only("test-width-1\ntest-width-2\ntest-width-3\ntest-width-4\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for option in &["-w 1a", "-w=1a", "--width=1a", "--width 1a"] {
|
||||||
|
scene
|
||||||
|
.ucmd()
|
||||||
|
.args(&option.split(" ").collect::<Vec<_>>())
|
||||||
|
.fails()
|
||||||
|
.stderr_only("ls: error: invalid line width: ‘1a’");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue