From 615e684c5deab36ebbaf771bc6e76aba0517034b Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Sun, 20 Apr 2025 17:19:36 +0200 Subject: [PATCH 1/4] ls: Create a ListState struct to maintain state We put the out writer and style manager in there, for now. Reduces the number of parameters to pass around, and we'll add more useful things in there. Little to no performance difference. --- src/uu/ls/src/ls.rs | 196 ++++++++++++++++++++++---------------------- 1 file changed, 98 insertions(+), 98 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 21f5d55fd..815af0327 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -2047,15 +2047,24 @@ fn show_dir_name( write!(out, ":") } +// A struct to encapsulate state that is passed around from `list` functions. +struct ListState<'a> { + out: BufWriter, + style_manager: Option>, +} + #[allow(clippy::cognitive_complexity)] pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { let mut files = Vec::::new(); let mut dirs = Vec::::new(); - let mut out = BufWriter::new(stdout()); let mut dired = DiredOutput::default(); - let mut style_manager = config.color.as_ref().map(StyleManager::new); let initial_locs_len = locs.len(); + let mut state = ListState { + out: BufWriter::new(stdout()), + style_manager: config.color.as_ref().map(StyleManager::new), + }; + for loc in locs { let path_data = PathData::new(PathBuf::from(loc), None, None, config, true); @@ -2065,11 +2074,11 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { // Proper GNU handling is don't show if dereferenced symlink DNE // but only for the base dir, for a child dir show, and print ?s // in long format - if path_data.get_metadata(&mut out).is_none() { + if path_data.get_metadata(&mut state.out).is_none() { continue; } - let show_dir_contents = match path_data.file_type(&mut out) { + let show_dir_contents = match path_data.file_type(&mut state.out) { Some(ft) => !config.directory && ft.is_dir(), None => { set_exit_code(1); @@ -2084,19 +2093,19 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { } } - sort_entries(&mut files, config, &mut out); - sort_entries(&mut dirs, config, &mut out); + sort_entries(&mut files, config, &mut state.out); + sort_entries(&mut dirs, config, &mut state.out); - if let Some(style_manager) = style_manager.as_mut() { + if let Some(style_manager) = state.style_manager.as_mut() { // ls will try to write a reset before anything is written if normal // color is given if style_manager.get_normal_style().is_some() { let to_write = style_manager.reset(true); - write!(out, "{to_write}")?; + write!(state.out, "{to_write}")?; } } - display_items(&files, config, &mut out, &mut dired, &mut style_manager)?; + display_items(&files, config, &mut state, &mut dired)?; for (pos, path_data) in dirs.iter().enumerate() { // Do read_dir call here to match GNU semantics by printing @@ -2104,7 +2113,7 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { let read_dir = match fs::read_dir(&path_data.p_buf) { Err(err) => { // flush stdout buffer before the error to preserve formatting and order - out.flush()?; + state.out.flush()?; show!(LsError::IOErrorContext( path_data.p_buf.clone(), err, @@ -2119,10 +2128,10 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { if initial_locs_len > 1 || config.recursive { if pos.eq(&0usize) && files.is_empty() { if config.dired { - dired::indent(&mut out)?; + dired::indent(&mut state.out)?; } - show_dir_name(path_data, &mut out, config)?; - writeln!(out)?; + show_dir_name(path_data, &mut state.out, config)?; + writeln!(state.out)?; if config.dired { // First directory displayed let dir_len = path_data.display_name.len(); @@ -2132,9 +2141,9 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { dired::add_dir_name(&mut dired, dir_len); } } else { - writeln!(out)?; - show_dir_name(path_data, &mut out, config)?; - writeln!(out)?; + writeln!(state.out)?; + show_dir_name(path_data, &mut state.out, config)?; + writeln!(state.out)?; } } let mut listed_ancestors = HashSet::new(); @@ -2146,14 +2155,13 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { path_data, read_dir, config, - &mut out, + &mut state, &mut listed_ancestors, &mut dired, - &mut style_manager, )?; } if config.dired && !config.hyperlink { - dired::print_dired_output(config, &dired, &mut out)?; + dired::print_dired_output(config, &dired, &mut state.out)?; } Ok(()) } @@ -2266,10 +2274,9 @@ fn enter_directory( path_data: &PathData, read_dir: ReadDir, config: &Config, - out: &mut BufWriter, + state: &mut ListState, listed_ancestors: &mut HashSet, dired: &mut DiredOutput, - style_manager: &mut Option, ) -> UResult<()> { // Create vec of entries with initial dot files let mut entries: Vec = if config.files == Files::All { @@ -2298,7 +2305,7 @@ fn enter_directory( let dir_entry = match raw_entry { Ok(path) => path, Err(err) => { - out.flush()?; + state.out.flush()?; show!(LsError::IOError(err)); continue; } @@ -2311,18 +2318,18 @@ fn enter_directory( }; } - sort_entries(&mut entries, config, out); + sort_entries(&mut entries, config, &mut state.out); // Print total after any error display if config.format == Format::Long || config.alloc_size { - let total = return_total(&entries, config, out)?; - write!(out, "{}", total.as_str())?; + let total = return_total(&entries, config, &mut state.out)?; + write!(state.out, "{}", total.as_str())?; if config.dired { dired::add_total(dired, total.len()); } } - display_items(&entries, config, out, dired, style_manager)?; + display_items(&entries, config, state, dired)?; if config.recursive { for e in entries @@ -2335,7 +2342,7 @@ fn enter_directory( { match fs::read_dir(&e.p_buf) { Err(err) => { - out.flush()?; + state.out.flush()?; show!(LsError::IOErrorContext( e.p_buf.clone(), err, @@ -2349,34 +2356,26 @@ fn enter_directory( { // when listing several directories in recursive mode, we show // "dirname:" at the beginning of the file list - writeln!(out)?; + writeln!(state.out)?; if config.dired { // We already injected the first dir // Continue with the others // 2 = \n + \n dired.padding = 2; - dired::indent(out)?; + dired::indent(&mut state.out)?; let dir_name_size = e.p_buf.to_string_lossy().len(); dired::calculate_subdired(dired, dir_name_size); // inject dir name dired::add_dir_name(dired, dir_name_size); } - show_dir_name(e, out, config)?; - writeln!(out)?; - enter_directory( - e, - rd, - config, - out, - listed_ancestors, - dired, - style_manager, - )?; + show_dir_name(e, &mut state.out, config)?; + writeln!(state.out)?; + enter_directory(e, rd, config, state, listed_ancestors, dired)?; listed_ancestors .remove(&FileInformation::from_path(&e.p_buf, e.must_dereference)?); } else { - out.flush()?; + state.out.flush()?; show!(LsError::AlreadyListedError(e.p_buf.clone())); } } @@ -2511,9 +2510,8 @@ fn display_additional_leading_info( fn display_items( items: &[PathData], config: &Config, - out: &mut BufWriter, + state: &mut ListState, dired: &mut DiredOutput, - style_manager: &mut Option, ) -> UResult<()> { // `-Z`, `--context`: // Display the SELinux security context or '?' if none is found. When used with the `-l` @@ -2525,31 +2523,31 @@ fn display_items( }); if config.format == Format::Long { - let padding_collection = calculate_padding_collection(items, config, out); + let padding_collection = calculate_padding_collection(items, config, &mut state.out); for item in items { #[cfg(unix)] if config.inode || config.alloc_size { - let more_info = - display_additional_leading_info(item, &padding_collection, config, out)?; + let more_info = display_additional_leading_info( + item, + &padding_collection, + config, + &mut state.out, + )?; - write!(out, "{more_info}")?; + write!(state.out, "{more_info}")?; } #[cfg(not(unix))] if config.alloc_size { - let more_info = - display_additional_leading_info(item, &padding_collection, config, out)?; - write!(out, "{more_info}")?; + let more_info = display_additional_leading_info( + item, + &padding_collection, + config, + &mut state.out, + )?; + write!(state.out, "{more_info}")?; } - display_item_long( - item, - &padding_collection, - config, - out, - dired, - style_manager, - quoted, - )?; + display_item_long(item, &padding_collection, config, state, dired, quoted)?; } } else { let mut longest_context_len = 1; @@ -2563,16 +2561,16 @@ fn display_items( None }; - let padding = calculate_padding_collection(items, config, out); + let padding = calculate_padding_collection(items, config, &mut state.out); // we need to apply normal color to non filename output - if let Some(style_manager) = style_manager { - write!(out, "{}", style_manager.apply_normal())?; + if let Some(style_manager) = &mut state.style_manager { + write!(state.out, "{}", style_manager.apply_normal())?; } let mut names_vec = Vec::new(); for i in items { - let more_info = display_additional_leading_info(i, &padding, config, out)?; + let more_info = display_additional_leading_info(i, &padding, config, &mut state.out)?; // it's okay to set current column to zero which is used to decide // whether text will wrap or not, because when format is grid or // column ls will try to place the item name in a new line if it @@ -2582,8 +2580,7 @@ fn display_items( config, prefix_context, more_info, - out, - style_manager, + state, LazyCell::new(Box::new(|| 0)), ); @@ -2598,7 +2595,7 @@ fn display_items( names, config.width, Direction::TopToBottom, - out, + &mut state.out, quoted, config.tab_size, )?; @@ -2608,7 +2605,7 @@ fn display_items( names, config.width, Direction::LeftToRight, - out, + &mut state.out, quoted, config.tab_size, )?; @@ -2616,7 +2613,7 @@ fn display_items( Format::Commas => { let mut current_col = 0; if let Some(name) = names.next() { - write_os_str(out, &name)?; + write_os_str(&mut state.out, &name)?; current_col = ansi_width(&name.to_string_lossy()) as u16 + 2; } for name in names { @@ -2624,23 +2621,23 @@ fn display_items( // If the width is 0 we print one single line if config.width != 0 && current_col + name_width + 1 > config.width { current_col = name_width + 2; - writeln!(out, ",")?; + writeln!(state.out, ",")?; } else { current_col += name_width + 2; - write!(out, ", ")?; + write!(state.out, ", ")?; } - write_os_str(out, &name)?; + write_os_str(&mut state.out, &name)?; } // Current col is never zero again if names have been printed. // So we print a newline. if current_col > 0 { - write!(out, "{}", config.line_ending)?; + write!(state.out, "{}", config.line_ending)?; } } _ => { for name in names { - write_os_str(out, &name)?; - write!(out, "{}", config.line_ending)?; + write_os_str(&mut state.out, &name)?; + write!(state.out, "{}", config.line_ending)?; } } }; @@ -2751,7 +2748,7 @@ fn display_grid( Ok(()) } -/// This writes to the BufWriter out a single string of the output of `ls -l`. +/// This writes to the BufWriter state.out a single string of the output of `ls -l`. /// /// It writes the following keys, in order: /// * `inode` ([`get_inode`], config-optional) @@ -2784,21 +2781,20 @@ fn display_item_long( item: &PathData, padding: &PaddingCollection, config: &Config, - out: &mut BufWriter, + state: &mut ListState, dired: &mut DiredOutput, - style_manager: &mut Option, quoted: bool, ) -> UResult<()> { let mut output_display: Vec = Vec::with_capacity(128); // apply normal color to non filename outputs - if let Some(style_manager) = style_manager { + if let Some(style_manager) = &mut state.style_manager { output_display.extend(style_manager.apply_normal().as_bytes()); } if config.dired { output_display.extend(b" "); } - if let Some(md) = item.get_metadata(out) { + if let Some(md) = item.get_metadata(&mut state.out) { #[cfg(any(not(unix), target_os = "android", target_os = "macos"))] // TODO: See how Mac should work here let is_acl_set = false; @@ -2875,8 +2871,7 @@ fn display_item_long( config, None, String::new(), - out, - style_manager, + state, LazyCell::new(Box::new(|| { ansi_width(&String::from_utf8_lossy(&output_display)) })), @@ -2971,8 +2966,7 @@ fn display_item_long( config, None, String::new(), - out, - style_manager, + state, LazyCell::new(Box::new(|| { ansi_width(&String::from_utf8_lossy(&output_display)) })), @@ -2995,7 +2989,7 @@ fn display_item_long( write_os_str(&mut output_display, &displayed_item)?; output_display.extend(config.line_ending.to_string().as_bytes()); } - out.write_all(&output_display)?; + state.out.write_all(&output_display)?; Ok(()) } @@ -3005,7 +2999,7 @@ fn get_inode(metadata: &Metadata) -> String { format!("{}", metadata.ino()) } -// Currently getpwuid is `linux` target only. If it's broken out into +// Currently getpwuid is `linux` target only. If it's broken state.out into // a posix-compliant attribute this can be updated... #[cfg(unix)] use std::sync::LazyLock; @@ -3207,8 +3201,7 @@ fn display_item_name( config: &Config, prefix_context: Option, more_info: String, - out: &mut BufWriter, - style_manager: &mut Option, + state: &mut ListState, current_column: LazyCell usize + '_>>, ) -> OsString { // This is our return value. We start by `&path.display_name` and modify it along the way. @@ -3221,9 +3214,16 @@ fn display_item_name( name = create_hyperlink(&name, path); } - if let Some(style_manager) = style_manager { + if let Some(style_manager) = &mut state.style_manager { let len = name.len(); - name = color_name(name, path, style_manager, out, None, is_wrap(len)); + name = color_name( + name, + path, + style_manager, + &mut state.out, + None, + is_wrap(len), + ); } if config.format != Format::Long && !more_info.is_empty() { @@ -3233,7 +3233,7 @@ fn display_item_name( } if config.indicator_style != IndicatorStyle::None { - let sym = classify_file(path, out); + let sym = classify_file(path, &mut state.out); let char_opt = match config.indicator_style { IndicatorStyle::Classify => sym, @@ -3260,8 +3260,8 @@ fn display_item_name( } if config.format == Format::Long - && path.file_type(out).is_some() - && path.file_type(out).unwrap().is_symlink() + && path.file_type(&mut state.out).is_some() + && path.file_type(&mut state.out).unwrap().is_symlink() && !path.must_dereference { match path.p_buf.read_link() { @@ -3271,7 +3271,7 @@ fn display_item_name( // We might as well color the symlink output after the arrow. // This makes extra system calls, but provides important information that // people run `ls -l --color` are very interested in. - if let Some(style_manager) = style_manager { + if let Some(style_manager) = &mut state.style_manager { // We get the absolute path to be able to construct PathData with valid Metadata. // This is because relative symlinks will fail to get_metadata. let mut absolute_target = target.clone(); @@ -3287,7 +3287,7 @@ fn display_item_name( // Because we use an absolute path, we can assume this is guaranteed to exist. // Otherwise, we use path.md(), which will guarantee we color to the same // color of non-existent symlinks according to style_for_path_with_metadata. - if path.get_metadata(out).is_none() + if path.get_metadata(&mut state.out).is_none() && get_metadata_with_deref_opt( target_data.p_buf.as_path(), target_data.must_dereference, @@ -3300,7 +3300,7 @@ fn display_item_name( escape_name(target.as_os_str(), &config.quoting_style), path, style_manager, - out, + &mut state.out, Some(&target_data), is_wrap(name.len()), )); @@ -3502,7 +3502,7 @@ fn calculate_padding_collection( fn calculate_padding_collection( items: &[PathData], config: &Config, - out: &mut BufWriter, + state: &mut ListState, ) -> PaddingCollection { let mut padding_collections = PaddingCollection { link_count: 1, @@ -3515,7 +3515,7 @@ fn calculate_padding_collection( for item in items { if config.alloc_size { - if let Some(md) = item.get_metadata(out) { + if let Some(md) = item.get_metadata(&mut state.out) { let block_size_len = display_size(get_block_size(md, config), config).len(); padding_collections.block_size = block_size_len.max(padding_collections.block_size); } @@ -3523,7 +3523,7 @@ fn calculate_padding_collection( let context_len = item.security_context.len(); let (link_count_len, uname_len, group_len, size_len, _major_len, _minor_len) = - display_dir_entry_size(item, config, out); + display_dir_entry_size(item, config, state); padding_collections.link_count = link_count_len.max(padding_collections.link_count); padding_collections.uname = uname_len.max(padding_collections.uname); padding_collections.group = group_len.max(padding_collections.group); From 1890467bd7559257344562a01b47be47e9b0ac2e Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Sun, 20 Apr 2025 17:30:55 +0200 Subject: [PATCH 2/4] ls: ListState: Add uid/gid cache to the structure Easier to reason about than the LazyLock/Mutex encapsulated static variables. Performance difference is not measurable, but this drops uneeded Mutex lock/unlock that were seen in samply output. --- src/uu/ls/src/ls.rs | 86 +++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 50 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 815af0327..1bf233ebc 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -2051,6 +2051,8 @@ fn show_dir_name( struct ListState<'a> { out: BufWriter, style_manager: Option>, + uid_cache: HashMap, + gid_cache: HashMap, } #[allow(clippy::cognitive_complexity)] @@ -2063,6 +2065,8 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { let mut state = ListState { out: BufWriter::new(stdout()), style_manager: config.color.as_ref().map(StyleManager::new), + uid_cache: HashMap::new(), + gid_cache: HashMap::new(), }; for loc in locs { @@ -2397,10 +2401,10 @@ fn get_metadata_with_deref_opt(p_buf: &Path, dereference: bool) -> std::io::Resu fn display_dir_entry_size( entry: &PathData, config: &Config, - out: &mut BufWriter, + state: &mut ListState, ) -> (usize, usize, usize, usize, usize, usize) { // TODO: Cache/memorize the display_* results so we don't have to recalculate them. - if let Some(md) = entry.get_metadata(out) { + if let Some(md) = entry.get_metadata(&mut state.out) { let (size_len, major_len, minor_len) = match display_len_or_rdev(md, config) { SizeOrDeviceId::Device(major, minor) => { (major.len() + minor.len() + 2usize, major.len(), minor.len()) @@ -2409,8 +2413,8 @@ fn display_dir_entry_size( }; ( display_symlink_count(md).len(), - display_uname(md, config).len(), - display_group(md, config).len(), + display_uname(md, config, state).len(), + display_group(md, config, state).len(), size_len, major_len, minor_len, @@ -2523,7 +2527,7 @@ fn display_items( }); if config.format == Format::Long { - let padding_collection = calculate_padding_collection(items, config, &mut state.out); + let padding_collection = calculate_padding_collection(items, config, state); for item in items { #[cfg(unix)] @@ -2561,7 +2565,7 @@ fn display_items( None }; - let padding = calculate_padding_collection(items, config, &mut state.out); + let padding = calculate_padding_collection(items, config, state); // we need to apply normal color to non filename output if let Some(style_manager) = &mut state.style_manager { @@ -2813,12 +2817,12 @@ fn display_item_long( if config.long.owner { output_display.extend(b" "); - output_display.extend_pad_right(&display_uname(md, config), padding.uname); + output_display.extend_pad_right(&display_uname(md, config, state), padding.uname); } if config.long.group { output_display.extend(b" "); - output_display.extend_pad_right(&display_group(md, config), padding.group); + output_display.extend_pad_right(&display_group(md, config, state), padding.group); } if config.context { @@ -2830,7 +2834,7 @@ fn display_item_long( // the owner, since GNU/Hurd is not currently supported by Rust. if config.long.author { output_display.extend(b" "); - output_display.extend_pad_right(&display_uname(md, config), padding.uname); + output_display.extend_pad_right(&display_uname(md, config, state), padding.uname); } match display_len_or_rdev(md, config) { @@ -3002,67 +3006,49 @@ fn get_inode(metadata: &Metadata) -> String { // Currently getpwuid is `linux` target only. If it's broken state.out into // a posix-compliant attribute this can be updated... #[cfg(unix)] -use std::sync::LazyLock; -#[cfg(unix)] -use std::sync::Mutex; -#[cfg(unix)] use uucore::entries; use uucore::fs::FileInformation; #[cfg(unix)] -fn cached_uid2usr(uid: u32) -> String { - static UID_CACHE: LazyLock>> = - LazyLock::new(|| Mutex::new(HashMap::new())); - - let mut uid_cache = UID_CACHE.lock().unwrap(); - uid_cache - .entry(uid) - .or_insert_with(|| entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string())) - .clone() -} - -#[cfg(unix)] -fn display_uname(metadata: &Metadata, config: &Config) -> String { +fn display_uname(metadata: &Metadata, config: &Config, state: &mut ListState) -> String { + let uid = metadata.uid(); if config.long.numeric_uid_gid { - metadata.uid().to_string() + uid.to_string() } else { - cached_uid2usr(metadata.uid()) + state + .uid_cache + .entry(uid) + .or_insert_with(|| entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string())) + .clone() } } #[cfg(all(unix, not(target_os = "redox")))] -fn cached_gid2grp(gid: u32) -> String { - static GID_CACHE: LazyLock>> = - LazyLock::new(|| Mutex::new(HashMap::new())); - - let mut gid_cache = GID_CACHE.lock().unwrap(); - gid_cache - .entry(gid) - .or_insert_with(|| entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string())) - .clone() -} - -#[cfg(all(unix, not(target_os = "redox")))] -fn display_group(metadata: &Metadata, config: &Config) -> String { +fn display_group(metadata: &Metadata, config: &Config, state: &mut ListState) -> String { + let gid = metadata.gid(); if config.long.numeric_uid_gid { - metadata.gid().to_string() + gid.to_string() } else { - cached_gid2grp(metadata.gid()) + state + .gid_cache + .entry(gid) + .or_insert_with(|| entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string())) + .clone() } } #[cfg(target_os = "redox")] -fn display_group(metadata: &Metadata, _config: &Config) -> String { +fn display_group(metadata: &Metadata, _config: &Config, _state: &mut ListState) -> String { metadata.gid().to_string() } #[cfg(not(unix))] -fn display_uname(_metadata: &Metadata, _config: &Config) -> String { +fn display_uname(_metadata: &Metadata, _config: &Config, _state: &mut ListState) -> String { "somebody".to_string() } #[cfg(not(unix))] -fn display_group(_metadata: &Metadata, _config: &Config) -> String { +fn display_group(_metadata: &Metadata, _config: &Config, _state: &mut ListState) -> String { "somegroup".to_string() } @@ -3439,7 +3425,7 @@ fn get_security_context(config: &Config, p_buf: &Path, must_dereference: bool) - fn calculate_padding_collection( items: &[PathData], config: &Config, - out: &mut BufWriter, + state: &mut ListState, ) -> PaddingCollection { let mut padding_collections = PaddingCollection { inode: 1, @@ -3456,7 +3442,7 @@ fn calculate_padding_collection( for item in items { #[cfg(unix)] if config.inode { - let inode_len = if let Some(md) = item.get_metadata(out) { + let inode_len = if let Some(md) = item.get_metadata(&mut state.out) { display_inode(md).len() } else { continue; @@ -3465,7 +3451,7 @@ fn calculate_padding_collection( } if config.alloc_size { - if let Some(md) = item.get_metadata(out) { + if let Some(md) = item.get_metadata(&mut state.out) { let block_size_len = display_size(get_block_size(md, config), config).len(); padding_collections.block_size = block_size_len.max(padding_collections.block_size); } @@ -3474,7 +3460,7 @@ fn calculate_padding_collection( if config.format == Format::Long { let context_len = item.security_context.len(); let (link_count_len, uname_len, group_len, size_len, major_len, minor_len) = - display_dir_entry_size(item, config, out); + display_dir_entry_size(item, config, state); padding_collections.link_count = link_count_len.max(padding_collections.link_count); padding_collections.uname = uname_len.max(padding_collections.uname); padding_collections.group = group_len.max(padding_collections.group); From b833deb8d19369b185d07b6d8bddbe66160e87c8 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Sun, 20 Apr 2025 18:07:21 +0200 Subject: [PATCH 3/4] ls: display_uname/group: Return a reference Cache even numerical strings (numeric_uid_gid) in the HashMap, this makes very little difference performance wise. However, this allows us to return a reference to a String instead of making a clone. Saves about 2-3% on `ls -lR /var/lib .git` (and `ls -lRn`). Also, add a note that HashMap might not be the most optimal choice. --- src/uu/ls/src/ls.rs | 69 +++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 1bf233ebc..d3135bc82 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (ToDO) somegroup nlink tabsize dired subdired dtype colorterm stringly +// spell-checker:ignore (ToDO) somegroup nlink tabsize dired subdired dtype colorterm stringly nohash use std::iter; #[cfg(windows)] @@ -2051,7 +2051,14 @@ fn show_dir_name( struct ListState<'a> { out: BufWriter, style_manager: Option>, + // TODO: More benchmarking with different use cases is required here. + // From experiments, BTreeMap may be faster than HashMap, especially as the + // number of users/groups is very limited. It seems like nohash::IntMap + // performance was equivalent to BTreeMap. + // It's possible a simple vector linear(binary?) search implementation would be even faster. + #[cfg(unix)] uid_cache: HashMap, + #[cfg(unix)] gid_cache: HashMap, } @@ -2065,7 +2072,9 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { let mut state = ListState { out: BufWriter::new(stdout()), style_manager: config.color.as_ref().map(StyleManager::new), + #[cfg(unix)] uid_cache: HashMap::new(), + #[cfg(unix)] gid_cache: HashMap::new(), }; @@ -2817,12 +2826,12 @@ fn display_item_long( if config.long.owner { output_display.extend(b" "); - output_display.extend_pad_right(&display_uname(md, config, state), padding.uname); + output_display.extend_pad_right(display_uname(md, config, state), padding.uname); } if config.long.group { output_display.extend(b" "); - output_display.extend_pad_right(&display_group(md, config, state), padding.group); + output_display.extend_pad_right(display_group(md, config, state), padding.group); } if config.context { @@ -2834,7 +2843,7 @@ fn display_item_long( // the owner, since GNU/Hurd is not currently supported by Rust. if config.long.author { output_display.extend(b" "); - output_display.extend_pad_right(&display_uname(md, config, state), padding.uname); + output_display.extend_pad_right(display_uname(md, config, state), padding.uname); } match display_len_or_rdev(md, config) { @@ -3010,46 +3019,38 @@ use uucore::entries; use uucore::fs::FileInformation; #[cfg(unix)] -fn display_uname(metadata: &Metadata, config: &Config, state: &mut ListState) -> String { +fn display_uname<'a>(metadata: &Metadata, config: &Config, state: &'a mut ListState) -> &'a String { let uid = metadata.uid(); - if config.long.numeric_uid_gid { - uid.to_string() - } else { - state - .uid_cache - .entry(uid) - .or_insert_with(|| entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string())) - .clone() - } + + state.uid_cache.entry(uid).or_insert_with(|| { + if config.long.numeric_uid_gid { + uid.to_string() + } else { + entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string()) + } + }) } -#[cfg(all(unix, not(target_os = "redox")))] -fn display_group(metadata: &Metadata, config: &Config, state: &mut ListState) -> String { +#[cfg(unix)] +fn display_group<'a>(metadata: &Metadata, config: &Config, state: &'a mut ListState) -> &'a String { let gid = metadata.gid(); - if config.long.numeric_uid_gid { - gid.to_string() - } else { - state - .gid_cache - .entry(gid) - .or_insert_with(|| entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string())) - .clone() - } -} - -#[cfg(target_os = "redox")] -fn display_group(metadata: &Metadata, _config: &Config, _state: &mut ListState) -> String { - metadata.gid().to_string() + state.gid_cache.entry(gid).or_insert_with(|| { + if cfg!(target_os = "redox") || config.long.numeric_uid_gid { + gid.to_string() + } else { + entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()) + } + }) } #[cfg(not(unix))] -fn display_uname(_metadata: &Metadata, _config: &Config, _state: &mut ListState) -> String { - "somebody".to_string() +fn display_uname(_metadata: &Metadata, _config: &Config, _state: &mut ListState) -> &'static str { + "somebody" } #[cfg(not(unix))] -fn display_group(_metadata: &Metadata, _config: &Config, _state: &mut ListState) -> String { - "somegroup".to_string() +fn display_group(_metadata: &Metadata, _config: &Config, _state: &mut ListState) -> &'static str { + "somegroup" } // The implementations for get_time are separated because some options, such From fc6b896c271eed5654418acc267eb21377c55690 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Sat, 19 Apr 2025 23:02:40 +0200 Subject: [PATCH 4/4] ls: Optimize time formatting Instead of recreating the formatter over and over again, keep it pre-parsed in a variable in TimeStyler class. Also, avoid calling `now` over and over again, that's also slow. Improves performance by about 6%. --- src/uu/ls/src/ls.rs | 88 +++++++++++++++++++++++++++++++-------------- 1 file changed, 62 insertions(+), 26 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index d3135bc82..267f6f1c6 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -27,6 +27,7 @@ use std::{ use std::{collections::HashSet, io::IsTerminal}; use ansi_width::ansi_width; +use chrono::format::{Item, StrftimeItems}; use chrono::{DateTime, Local, TimeDelta}; use clap::{ Arg, ArgAction, Command, @@ -273,32 +274,64 @@ enum TimeStyle { Format(String), } -/// Whether the given date is considered recent (i.e., in the last 6 months). -fn is_recent(time: DateTime) -> bool { - // According to GNU a Gregorian year has 365.2425 * 24 * 60 * 60 == 31556952 seconds on the average. - time + TimeDelta::try_seconds(31_556_952 / 2).unwrap() > Local::now() +/// A struct/impl used to format a file DateTime, precomputing the format for performance reasons. +struct TimeStyler { + // default format, always specified. + default: Vec>, + // format for a recent time, only specified it is is different from the default + recent: Option>>, + // If `recent` is set, cache the threshold time when we switch from recent to default format. + recent_time_threshold: Option>, } -impl TimeStyle { - /// Format the given time according to this time format style. - fn format(&self, time: DateTime) -> String { - let recent = is_recent(time); - match (self, recent) { - (Self::FullIso, _) => time.format("%Y-%m-%d %H:%M:%S.%f %z").to_string(), - (Self::LongIso, _) => time.format("%Y-%m-%d %H:%M").to_string(), - (Self::Iso, true) => time.format("%m-%d %H:%M").to_string(), - (Self::Iso, false) => time.format("%Y-%m-%d ").to_string(), - // spell-checker:ignore (word) datetime - //In this version of chrono translating can be done - //The function is chrono::datetime::DateTime::format_localized - //However it's currently still hard to get the current pure-rust-locale - //So it's not yet implemented - (Self::Locale, true) => time.format("%b %e %H:%M").to_string(), - (Self::Locale, false) => time.format("%b %e %Y").to_string(), - (Self::Format(fmt), _) => time - .format(custom_tz_fmt::custom_time_format(fmt).as_str()) - .to_string(), +impl TimeStyler { + /// Create a TimeStyler based on a TimeStyle specification. + fn new(style: &TimeStyle) -> TimeStyler { + let default: Vec> = match style { + TimeStyle::FullIso => StrftimeItems::new("%Y-%m-%d %H:%M:%S.%f %z").parse(), + TimeStyle::LongIso => StrftimeItems::new("%Y-%m-%d %H:%M").parse(), + TimeStyle::Iso => StrftimeItems::new("%Y-%m-%d ").parse(), + // In this version of chrono translating can be done + // The function is chrono::datetime::DateTime::format_localized + // However it's currently still hard to get the current pure-rust-locale + // So it's not yet implemented + TimeStyle::Locale => StrftimeItems::new("%b %e %Y").parse(), + TimeStyle::Format(fmt) => { + // TODO (#7802): Replace with new_lenient + StrftimeItems::new(custom_tz_fmt::custom_time_format(fmt).as_str()).parse_to_owned() + } } + .unwrap(); + let recent = match style { + TimeStyle::Iso => Some(StrftimeItems::new("%m-%d %H:%M")), + // See comment above about locale + TimeStyle::Locale => Some(StrftimeItems::new("%b %e %H:%M")), + _ => None, + } + .map(|x| x.collect()); + let recent_time_threshold = if recent.is_some() { + // According to GNU a Gregorian year has 365.2425 * 24 * 60 * 60 == 31556952 seconds on the average. + Some(Local::now() - TimeDelta::try_seconds(31_556_952 / 2).unwrap()) + } else { + None + }; + + TimeStyler { + default, + recent, + recent_time_threshold, + } + } + + /// Format a DateTime, using `recent` format if available, and the DateTime + /// is recent enough. + fn format(&self, time: DateTime) -> String { + if self.recent.is_none() || time <= self.recent_time_threshold.unwrap() { + time.format_with_items(self.default.iter()) + } else { + time.format_with_items(self.recent.as_ref().unwrap().iter()) + } + .to_string() } } @@ -2060,6 +2093,8 @@ struct ListState<'a> { uid_cache: HashMap, #[cfg(unix)] gid_cache: HashMap, + + time_styler: TimeStyler, } #[allow(clippy::cognitive_complexity)] @@ -2076,6 +2111,7 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { uid_cache: HashMap::new(), #[cfg(unix)] gid_cache: HashMap::new(), + time_styler: TimeStyler::new(&config.time_style), }; for loc in locs { @@ -2876,7 +2912,7 @@ fn display_item_long( }; output_display.extend(b" "); - output_display.extend(display_date(md, config).as_bytes()); + output_display.extend(display_date(md, config, state).as_bytes()); output_display.extend(b" "); let item_name = display_item_name( @@ -3080,9 +3116,9 @@ fn get_time(md: &Metadata, config: &Config) -> Option> { Some(time.into()) } -fn display_date(metadata: &Metadata, config: &Config) -> String { +fn display_date(metadata: &Metadata, config: &Config, state: &mut ListState) -> String { match get_time(metadata, config) { - Some(time) => config.time_style.format(time), + Some(time) => state.time_styler.format(time), None => "???".into(), } }