1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

Merge pull request #7801 from drinkcat/ls-opt-1

ls: Print optimizations
This commit is contained in:
Sylvestre Ledru 2025-04-21 10:17:55 +02:00 committed by GitHub
commit b5e0e2342b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 119 additions and 109 deletions

View file

@ -1,6 +1,13 @@
# Benchmarking ls # Benchmarking ls
ls majorly involves fetching a lot of details (depending upon what details are requested, eg. time/date, inode details, etc) for each path using system calls. Ideally, any system call should be done only once for each of the paths - not adhering to this principle leads to a lot of system call overhead multiplying and bubbling up, especially for recursive ls, therefore it is important to always benchmark multiple scenarios. ls majorly involves fetching a lot of details (depending upon what details are requested, eg. time/date, inode details, etc) for each path using system calls. Ideally, any system call should be done only once for each of the paths - not adhering to this principle leads to a lot of system call overhead multiplying and bubbling up, especially for recursive ls, therefore it is important to always benchmark multiple scenarios.
ls _also_ prints a lot of information, so optimizing formatting operations is also critical:
- Try to avoid using `format` unless required.
- Try to avoid repeated string copies unless necessary.
- If a temporary buffer is required, try to allocate a reasonable capacity to start with to avoid repeated reallocations.
- Some values might be expensive to compute (e.g. current line width), but are only required in some cases. Consider delaying computations, e.g. by wrapping the evaluation in a LazyCell.
This is an overview over what was benchmarked, and if you make changes to `ls`, you are encouraged to check This is an overview over what was benchmarked, and if you make changes to `ls`, you are encouraged to check
how performance was affected for the workloads listed below. Feel free to add other workloads to the how performance was affected for the workloads listed below. Feel free to add other workloads to the
list that we should improve / make sure not to regress. list that we should improve / make sure not to regress.

View file

@ -5,9 +5,10 @@
// spell-checker:ignore (ToDO) somegroup nlink tabsize dired subdired dtype colorterm stringly // spell-checker:ignore (ToDO) somegroup nlink tabsize dired subdired dtype colorterm stringly
use std::iter;
#[cfg(windows)] #[cfg(windows)]
use std::os::windows::fs::MetadataExt; use std::os::windows::fs::MetadataExt;
use std::{cell::OnceCell, num::IntErrorKind}; use std::{cell::LazyCell, cell::OnceCell, num::IntErrorKind};
use std::{ use std::{
cmp::Reverse, cmp::Reverse,
ffi::{OsStr, OsString}, ffi::{OsStr, OsString},
@ -2420,12 +2421,33 @@ fn display_dir_entry_size(
} }
} }
fn pad_left(string: &str, count: usize) -> String { // A simple, performant, ExtendPad trait to add a string to a Vec<u8>, padding with spaces
format!("{string:>count$}") // on the left or right, without making additional copies, or using formatting functions.
trait ExtendPad {
fn extend_pad_left(&mut self, string: &str, count: usize);
fn extend_pad_right(&mut self, string: &str, count: usize);
} }
fn pad_right(string: &str, count: usize) -> String { impl ExtendPad for Vec<u8> {
format!("{string:<count$}") fn extend_pad_left(&mut self, string: &str, count: usize) {
if string.len() < count {
self.extend(iter::repeat_n(b' ', count - string.len()));
}
self.extend(string.as_bytes());
}
fn extend_pad_right(&mut self, string: &str, count: usize) {
self.extend(string.as_bytes());
if string.len() < count {
self.extend(iter::repeat_n(b' ', count - string.len()));
}
}
}
// TODO: Consider converting callers to use ExtendPad instead, as it avoids
// additional copies.
fn pad_left(string: &str, count: usize) -> String {
format!("{string:>count$}")
} }
fn return_total( fn return_total(
@ -2555,8 +2577,15 @@ fn display_items(
// whether text will wrap or not, because when format is grid or // whether text will wrap or not, because when format is grid or
// column ls will try to place the item name in a new line if it // column ls will try to place the item name in a new line if it
// wraps. // wraps.
let cell = let cell = display_item_name(
display_item_name(i, config, prefix_context, more_info, out, style_manager, 0); i,
config,
prefix_context,
more_info,
out,
style_manager,
LazyCell::new(Box::new(|| 0)),
);
names_vec.push(cell); names_vec.push(cell);
} }
@ -2760,11 +2789,11 @@ fn display_item_long(
style_manager: &mut Option<StyleManager>, style_manager: &mut Option<StyleManager>,
quoted: bool, quoted: bool,
) -> UResult<()> { ) -> UResult<()> {
let mut output_display: Vec<u8> = vec![]; let mut output_display: Vec<u8> = Vec::with_capacity(128);
// apply normal color to non filename outputs // apply normal color to non filename outputs
if let Some(style_manager) = style_manager { if let Some(style_manager) = style_manager {
write!(output_display, "{}", style_manager.apply_normal()).unwrap(); output_display.extend(style_manager.apply_normal().as_bytes());
} }
if config.dired { if config.dired {
output_display.extend(b" "); output_display.extend(b" ");
@ -2775,93 +2804,71 @@ fn display_item_long(
let is_acl_set = false; let is_acl_set = false;
#[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))] #[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))]
let is_acl_set = has_acl(item.display_name.as_os_str()); let is_acl_set = has_acl(item.display_name.as_os_str());
write!( output_display.extend(display_permissions(md, true).as_bytes());
output_display, if item.security_context.len() > 1 {
"{}{} {}", // GNU `ls` uses a "." character to indicate a file with a security context,
display_permissions(md, true), // but not other alternate access method.
if item.security_context.len() > 1 { output_display.extend(b".");
// GNU `ls` uses a "." character to indicate a file with a security context, } else if is_acl_set {
// but not other alternate access method. output_display.extend(b"+");
"." }
} else if is_acl_set { output_display.extend(b" ");
"+" output_display.extend_pad_left(&display_symlink_count(md), padding.link_count);
} else {
""
},
pad_left(&display_symlink_count(md), padding.link_count)
)
.unwrap();
if config.long.owner { if config.long.owner {
write!( output_display.extend(b" ");
output_display, output_display.extend_pad_right(&display_uname(md, config), padding.uname);
" {}",
pad_right(&display_uname(md, config), padding.uname)
)
.unwrap();
} }
if config.long.group { if config.long.group {
write!( output_display.extend(b" ");
output_display, output_display.extend_pad_right(&display_group(md, config), padding.group);
" {}",
pad_right(&display_group(md, config), padding.group)
)
.unwrap();
} }
if config.context { if config.context {
write!( output_display.extend(b" ");
output_display, output_display.extend_pad_right(&item.security_context, padding.context);
" {}",
pad_right(&item.security_context, padding.context)
)
.unwrap();
} }
// Author is only different from owner on GNU/Hurd, so we reuse // Author is only different from owner on GNU/Hurd, so we reuse
// the owner, since GNU/Hurd is not currently supported by Rust. // the owner, since GNU/Hurd is not currently supported by Rust.
if config.long.author { if config.long.author {
write!( output_display.extend(b" ");
output_display, output_display.extend_pad_right(&display_uname(md, config), padding.uname);
" {}",
pad_right(&display_uname(md, config), padding.uname)
)
.unwrap();
} }
match display_len_or_rdev(md, config) { match display_len_or_rdev(md, config) {
SizeOrDeviceId::Size(size) => { SizeOrDeviceId::Size(size) => {
write!(output_display, " {}", pad_left(&size, padding.size)).unwrap(); output_display.extend(b" ");
output_display.extend_pad_left(&size, padding.size);
} }
SizeOrDeviceId::Device(major, minor) => { SizeOrDeviceId::Device(major, minor) => {
write!( output_display.extend(b" ");
output_display, output_display.extend_pad_left(
" {}, {}", &major,
pad_left( #[cfg(not(unix))]
&major, 0usize,
#[cfg(not(unix))] #[cfg(unix)]
0usize, padding.major.max(
#[cfg(unix)] padding
padding.major.max( .size
padding .saturating_sub(padding.minor.saturating_add(2usize)),
.size
.saturating_sub(padding.minor.saturating_add(2usize))
),
), ),
pad_left( );
&minor, output_display.extend(b", ");
#[cfg(not(unix))] output_display.extend_pad_left(
0usize, &minor,
#[cfg(unix)] #[cfg(not(unix))]
padding.minor, 0usize,
), #[cfg(unix)]
) padding.minor,
.unwrap(); );
} }
}; };
write!(output_display, " {} ", display_date(md, config)).unwrap(); output_display.extend(b" ");
output_display.extend(display_date(md, config).as_bytes());
output_display.extend(b" ");
let item_name = display_item_name( let item_name = display_item_name(
item, item,
@ -2870,7 +2877,9 @@ fn display_item_long(
String::new(), String::new(),
out, out,
style_manager, style_manager,
ansi_width(&String::from_utf8_lossy(&output_display)), LazyCell::new(Box::new(|| {
ansi_width(&String::from_utf8_lossy(&output_display))
})),
); );
let displayed_item = if quoted && !os_str_starts_with(&item_name, b"'") { let displayed_item = if quoted && !os_str_starts_with(&item_name, b"'") {
@ -2890,7 +2899,7 @@ fn display_item_long(
dired::update_positions(dired, start, end); dired::update_positions(dired, start, end);
} }
write_os_str(&mut output_display, &displayed_item)?; write_os_str(&mut output_display, &displayed_item)?;
write!(output_display, "{}", config.line_ending)?; output_display.extend(config.line_ending.to_string().as_bytes());
} else { } else {
#[cfg(unix)] #[cfg(unix)]
let leading_char = { let leading_char = {
@ -2925,42 +2934,36 @@ fn display_item_long(
} }
}; };
write!( output_display.extend(leading_char.as_bytes());
output_display, output_display.extend(b"?????????");
"{}{} {}", if item.security_context.len() > 1 {
format_args!("{leading_char}?????????"), // GNU `ls` uses a "." character to indicate a file with a security context,
if item.security_context.len() > 1 { // but not other alternate access method.
// GNU `ls` uses a "." character to indicate a file with a security context, output_display.extend(b".");
// but not other alternate access method. }
"." output_display.extend(b" ");
} else { output_display.extend_pad_left("?", padding.link_count);
""
},
pad_left("?", padding.link_count)
)
.unwrap();
if config.long.owner { if config.long.owner {
write!(output_display, " {}", pad_right("?", padding.uname)).unwrap(); output_display.extend(b" ");
output_display.extend_pad_right("?", padding.uname);
} }
if config.long.group { if config.long.group {
write!(output_display, " {}", pad_right("?", padding.group)).unwrap(); output_display.extend(b" ");
output_display.extend_pad_right("?", padding.group);
} }
if config.context { if config.context {
write!( output_display.extend(b" ");
output_display, output_display.extend_pad_right(&item.security_context, padding.context);
" {}",
pad_right(&item.security_context, padding.context)
)
.unwrap();
} }
// Author is only different from owner on GNU/Hurd, so we reuse // Author is only different from owner on GNU/Hurd, so we reuse
// the owner, since GNU/Hurd is not currently supported by Rust. // the owner, since GNU/Hurd is not currently supported by Rust.
if config.long.author { if config.long.author {
write!(output_display, " {}", pad_right("?", padding.uname)).unwrap(); output_display.extend(b" ");
output_display.extend_pad_right("?", padding.uname);
} }
let displayed_item = display_item_name( let displayed_item = display_item_name(
@ -2970,17 +2973,17 @@ fn display_item_long(
String::new(), String::new(),
out, out,
style_manager, style_manager,
ansi_width(&String::from_utf8_lossy(&output_display)), LazyCell::new(Box::new(|| {
ansi_width(&String::from_utf8_lossy(&output_display))
})),
); );
let date_len = 12; let date_len = 12;
write!( output_display.extend(b" ");
output_display, output_display.extend_pad_left("?", padding.size);
" {} {} ", output_display.extend(b" ");
pad_left("?", padding.size), output_display.extend_pad_left("?", date_len);
pad_left("?", date_len), output_display.extend(b" ");
)
.unwrap();
if config.dired { if config.dired {
dired::calculate_and_update_positions( dired::calculate_and_update_positions(
@ -2990,7 +2993,7 @@ fn display_item_long(
); );
} }
write_os_str(&mut output_display, &displayed_item)?; write_os_str(&mut output_display, &displayed_item)?;
write!(output_display, "{}", config.line_ending)?; output_display.extend(config.line_ending.to_string().as_bytes());
} }
out.write_all(&output_display)?; out.write_all(&output_display)?;
@ -3206,13 +3209,13 @@ fn display_item_name(
more_info: String, more_info: String,
out: &mut BufWriter<Stdout>, out: &mut BufWriter<Stdout>,
style_manager: &mut Option<StyleManager>, style_manager: &mut Option<StyleManager>,
current_column: usize, current_column: LazyCell<usize, Box<dyn FnOnce() -> usize + '_>>,
) -> OsString { ) -> OsString {
// This is our return value. We start by `&path.display_name` and modify it along the way. // This is our return value. We start by `&path.display_name` and modify it along the way.
let mut name = escape_name(&path.display_name, &config.quoting_style); let mut name = escape_name(&path.display_name, &config.quoting_style);
let is_wrap = let is_wrap =
|namelen: usize| config.width != 0 && current_column + namelen > config.width.into(); |namelen: usize| config.width != 0 && *current_column + namelen > config.width.into();
if config.hyperlink { if config.hyperlink {
name = create_hyperlink(&name, path); name = create_hyperlink(&name, path);