1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

Merge pull request #7801 from drinkcat/ls-opt-1

ls: Print optimizations
This commit is contained in:
Sylvestre Ledru 2025-04-21 10:17:55 +02:00 committed by GitHub
commit b5e0e2342b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 119 additions and 109 deletions

View file

@ -1,6 +1,13 @@
# Benchmarking ls
ls majorly involves fetching a lot of details (depending upon what details are requested, eg. time/date, inode details, etc) for each path using system calls. Ideally, any system call should be done only once for each of the paths - not adhering to this principle leads to a lot of system call overhead multiplying and bubbling up, especially for recursive ls, therefore it is important to always benchmark multiple scenarios.
ls _also_ prints a lot of information, so optimizing formatting operations is also critical:
- Try to avoid using `format` unless required.
- Try to avoid repeated string copies unless necessary.
- If a temporary buffer is required, try to allocate a reasonable capacity to start with to avoid repeated reallocations.
- Some values might be expensive to compute (e.g. current line width), but are only required in some cases. Consider delaying computations, e.g. by wrapping the evaluation in a LazyCell.
This is an overview over what was benchmarked, and if you make changes to `ls`, you are encouraged to check
how performance was affected for the workloads listed below. Feel free to add other workloads to the
list that we should improve / make sure not to regress.

View file

@ -5,9 +5,10 @@
// spell-checker:ignore (ToDO) somegroup nlink tabsize dired subdired dtype colorterm stringly
use std::iter;
#[cfg(windows)]
use std::os::windows::fs::MetadataExt;
use std::{cell::OnceCell, num::IntErrorKind};
use std::{cell::LazyCell, cell::OnceCell, num::IntErrorKind};
use std::{
cmp::Reverse,
ffi::{OsStr, OsString},
@ -2420,12 +2421,33 @@ fn display_dir_entry_size(
}
}
fn pad_left(string: &str, count: usize) -> String {
format!("{string:>count$}")
// A simple, performant, ExtendPad trait to add a string to a Vec<u8>, padding with spaces
// on the left or right, without making additional copies, or using formatting functions.
trait ExtendPad {
fn extend_pad_left(&mut self, string: &str, count: usize);
fn extend_pad_right(&mut self, string: &str, count: usize);
}
fn pad_right(string: &str, count: usize) -> String {
format!("{string:<count$}")
impl ExtendPad for Vec<u8> {
fn extend_pad_left(&mut self, string: &str, count: usize) {
if string.len() < count {
self.extend(iter::repeat_n(b' ', count - string.len()));
}
self.extend(string.as_bytes());
}
fn extend_pad_right(&mut self, string: &str, count: usize) {
self.extend(string.as_bytes());
if string.len() < count {
self.extend(iter::repeat_n(b' ', count - string.len()));
}
}
}
// TODO: Consider converting callers to use ExtendPad instead, as it avoids
// additional copies.
fn pad_left(string: &str, count: usize) -> String {
format!("{string:>count$}")
}
fn return_total(
@ -2555,8 +2577,15 @@ fn display_items(
// whether text will wrap or not, because when format is grid or
// column ls will try to place the item name in a new line if it
// wraps.
let cell =
display_item_name(i, config, prefix_context, more_info, out, style_manager, 0);
let cell = display_item_name(
i,
config,
prefix_context,
more_info,
out,
style_manager,
LazyCell::new(Box::new(|| 0)),
);
names_vec.push(cell);
}
@ -2760,11 +2789,11 @@ fn display_item_long(
style_manager: &mut Option<StyleManager>,
quoted: bool,
) -> UResult<()> {
let mut output_display: Vec<u8> = vec![];
let mut output_display: Vec<u8> = Vec::with_capacity(128);
// apply normal color to non filename outputs
if let Some(style_manager) = style_manager {
write!(output_display, "{}", style_manager.apply_normal()).unwrap();
output_display.extend(style_manager.apply_normal().as_bytes());
}
if config.dired {
output_display.extend(b" ");
@ -2775,93 +2804,71 @@ fn display_item_long(
let is_acl_set = false;
#[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))]
let is_acl_set = has_acl(item.display_name.as_os_str());
write!(
output_display,
"{}{} {}",
display_permissions(md, true),
if item.security_context.len() > 1 {
// GNU `ls` uses a "." character to indicate a file with a security context,
// but not other alternate access method.
"."
} else if is_acl_set {
"+"
} else {
""
},
pad_left(&display_symlink_count(md), padding.link_count)
)
.unwrap();
output_display.extend(display_permissions(md, true).as_bytes());
if item.security_context.len() > 1 {
// GNU `ls` uses a "." character to indicate a file with a security context,
// but not other alternate access method.
output_display.extend(b".");
} else if is_acl_set {
output_display.extend(b"+");
}
output_display.extend(b" ");
output_display.extend_pad_left(&display_symlink_count(md), padding.link_count);
if config.long.owner {
write!(
output_display,
" {}",
pad_right(&display_uname(md, config), padding.uname)
)
.unwrap();
output_display.extend(b" ");
output_display.extend_pad_right(&display_uname(md, config), padding.uname);
}
if config.long.group {
write!(
output_display,
" {}",
pad_right(&display_group(md, config), padding.group)
)
.unwrap();
output_display.extend(b" ");
output_display.extend_pad_right(&display_group(md, config), padding.group);
}
if config.context {
write!(
output_display,
" {}",
pad_right(&item.security_context, padding.context)
)
.unwrap();
output_display.extend(b" ");
output_display.extend_pad_right(&item.security_context, padding.context);
}
// Author is only different from owner on GNU/Hurd, so we reuse
// the owner, since GNU/Hurd is not currently supported by Rust.
if config.long.author {
write!(
output_display,
" {}",
pad_right(&display_uname(md, config), padding.uname)
)
.unwrap();
output_display.extend(b" ");
output_display.extend_pad_right(&display_uname(md, config), padding.uname);
}
match display_len_or_rdev(md, config) {
SizeOrDeviceId::Size(size) => {
write!(output_display, " {}", pad_left(&size, padding.size)).unwrap();
output_display.extend(b" ");
output_display.extend_pad_left(&size, padding.size);
}
SizeOrDeviceId::Device(major, minor) => {
write!(
output_display,
" {}, {}",
pad_left(
&major,
#[cfg(not(unix))]
0usize,
#[cfg(unix)]
padding.major.max(
padding
.size
.saturating_sub(padding.minor.saturating_add(2usize))
),
output_display.extend(b" ");
output_display.extend_pad_left(
&major,
#[cfg(not(unix))]
0usize,
#[cfg(unix)]
padding.major.max(
padding
.size
.saturating_sub(padding.minor.saturating_add(2usize)),
),
pad_left(
&minor,
#[cfg(not(unix))]
0usize,
#[cfg(unix)]
padding.minor,
),
)
.unwrap();
);
output_display.extend(b", ");
output_display.extend_pad_left(
&minor,
#[cfg(not(unix))]
0usize,
#[cfg(unix)]
padding.minor,
);
}
};
write!(output_display, " {} ", display_date(md, config)).unwrap();
output_display.extend(b" ");
output_display.extend(display_date(md, config).as_bytes());
output_display.extend(b" ");
let item_name = display_item_name(
item,
@ -2870,7 +2877,9 @@ fn display_item_long(
String::new(),
out,
style_manager,
ansi_width(&String::from_utf8_lossy(&output_display)),
LazyCell::new(Box::new(|| {
ansi_width(&String::from_utf8_lossy(&output_display))
})),
);
let displayed_item = if quoted && !os_str_starts_with(&item_name, b"'") {
@ -2890,7 +2899,7 @@ fn display_item_long(
dired::update_positions(dired, start, end);
}
write_os_str(&mut output_display, &displayed_item)?;
write!(output_display, "{}", config.line_ending)?;
output_display.extend(config.line_ending.to_string().as_bytes());
} else {
#[cfg(unix)]
let leading_char = {
@ -2925,42 +2934,36 @@ fn display_item_long(
}
};
write!(
output_display,
"{}{} {}",
format_args!("{leading_char}?????????"),
if item.security_context.len() > 1 {
// GNU `ls` uses a "." character to indicate a file with a security context,
// but not other alternate access method.
"."
} else {
""
},
pad_left("?", padding.link_count)
)
.unwrap();
output_display.extend(leading_char.as_bytes());
output_display.extend(b"?????????");
if item.security_context.len() > 1 {
// GNU `ls` uses a "." character to indicate a file with a security context,
// but not other alternate access method.
output_display.extend(b".");
}
output_display.extend(b" ");
output_display.extend_pad_left("?", padding.link_count);
if config.long.owner {
write!(output_display, " {}", pad_right("?", padding.uname)).unwrap();
output_display.extend(b" ");
output_display.extend_pad_right("?", padding.uname);
}
if config.long.group {
write!(output_display, " {}", pad_right("?", padding.group)).unwrap();
output_display.extend(b" ");
output_display.extend_pad_right("?", padding.group);
}
if config.context {
write!(
output_display,
" {}",
pad_right(&item.security_context, padding.context)
)
.unwrap();
output_display.extend(b" ");
output_display.extend_pad_right(&item.security_context, padding.context);
}
// Author is only different from owner on GNU/Hurd, so we reuse
// the owner, since GNU/Hurd is not currently supported by Rust.
if config.long.author {
write!(output_display, " {}", pad_right("?", padding.uname)).unwrap();
output_display.extend(b" ");
output_display.extend_pad_right("?", padding.uname);
}
let displayed_item = display_item_name(
@ -2970,17 +2973,17 @@ fn display_item_long(
String::new(),
out,
style_manager,
ansi_width(&String::from_utf8_lossy(&output_display)),
LazyCell::new(Box::new(|| {
ansi_width(&String::from_utf8_lossy(&output_display))
})),
);
let date_len = 12;
write!(
output_display,
" {} {} ",
pad_left("?", padding.size),
pad_left("?", date_len),
)
.unwrap();
output_display.extend(b" ");
output_display.extend_pad_left("?", padding.size);
output_display.extend(b" ");
output_display.extend_pad_left("?", date_len);
output_display.extend(b" ");
if config.dired {
dired::calculate_and_update_positions(
@ -2990,7 +2993,7 @@ fn display_item_long(
);
}
write_os_str(&mut output_display, &displayed_item)?;
write!(output_display, "{}", config.line_ending)?;
output_display.extend(config.line_ending.to_string().as_bytes());
}
out.write_all(&output_display)?;
@ -3206,13 +3209,13 @@ fn display_item_name(
more_info: String,
out: &mut BufWriter<Stdout>,
style_manager: &mut Option<StyleManager>,
current_column: usize,
current_column: LazyCell<usize, Box<dyn FnOnce() -> usize + '_>>,
) -> OsString {
// This is our return value. We start by `&path.display_name` and modify it along the way.
let mut name = escape_name(&path.display_name, &config.quoting_style);
let is_wrap =
|namelen: usize| config.width != 0 && current_column + namelen > config.width.into();
|namelen: usize| config.width != 0 && *current_column + namelen > config.width.into();
if config.hyperlink {
name = create_hyperlink(&name, path);