1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

ls: add better support for non-UTF-8 bytes

This commit is contained in:
Justin Tracey 2024-12-27 11:10:48 -05:00
parent edf8be5e08
commit b0837a0ca5
No known key found for this signature in database
GPG key ID: 62B84F5ABDDDCE54
4 changed files with 131 additions and 69 deletions

View file

@ -5,6 +5,7 @@
use super::get_metadata_with_deref_opt; use super::get_metadata_with_deref_opt;
use super::PathData; use super::PathData;
use lscolors::{Indicator, LsColors, Style}; use lscolors::{Indicator, LsColors, Style};
use std::ffi::OsString;
use std::fs::{DirEntry, Metadata}; use std::fs::{DirEntry, Metadata};
use std::io::{BufWriter, Stdout}; use std::io::{BufWriter, Stdout};
@ -31,9 +32,9 @@ impl<'a> StyleManager<'a> {
pub(crate) fn apply_style( pub(crate) fn apply_style(
&mut self, &mut self,
new_style: Option<&Style>, new_style: Option<&Style>,
name: &str, name: OsString,
wrap: bool, wrap: bool,
) -> String { ) -> OsString {
let mut style_code = String::new(); let mut style_code = String::new();
let mut force_suffix_reset: bool = false; let mut force_suffix_reset: bool = false;
@ -50,14 +51,14 @@ impl<'a> StyleManager<'a> {
// normal style is to be printed we could skip printing new color // normal style is to be printed we could skip printing new color
// codes // codes
if !self.is_current_style(new_style) { if !self.is_current_style(new_style) {
style_code.push_str(&self.reset(!self.initial_reset_is_done)); style_code.push_str(self.reset(!self.initial_reset_is_done));
style_code.push_str(&self.get_style_code(new_style)); style_code.push_str(&self.get_style_code(new_style));
} }
} }
// if new style is None and current style is Normal we should reset it // if new style is None and current style is Normal we should reset it
else if matches!(self.get_normal_style().copied(), Some(norm_style) if self.is_current_style(&norm_style)) else if matches!(self.get_normal_style().copied(), Some(norm_style) if self.is_current_style(&norm_style))
{ {
style_code.push_str(&self.reset(false)); style_code.push_str(self.reset(false));
// even though this is an unnecessary reset for gnu compatibility we allow it here // even though this is an unnecessary reset for gnu compatibility we allow it here
force_suffix_reset = true; force_suffix_reset = true;
} }
@ -69,16 +70,17 @@ impl<'a> StyleManager<'a> {
// till the end of line // till the end of line
let clear_to_eol = if wrap { "\x1b[K" } else { "" }; let clear_to_eol = if wrap { "\x1b[K" } else { "" };
format!( let mut ret: OsString = style_code.into();
"{style_code}{name}{}{clear_to_eol}", ret.push(name);
self.reset(force_suffix_reset), ret.push(self.reset(force_suffix_reset));
) ret.push(clear_to_eol);
ret
} }
/// Resets the current style and returns the default ANSI reset code to /// Resets the current style and returns the default ANSI reset code to
/// reset all text formatting attributes. If `force` is true, the reset is /// reset all text formatting attributes. If `force` is true, the reset is
/// done even if the reset has been applied before. /// done even if the reset has been applied before.
pub(crate) fn reset(&mut self, force: bool) -> String { pub(crate) fn reset(&mut self, force: bool) -> &str {
// todo: // todo:
// We need to use style from `Indicator::Reset` but as of now ls colors // We need to use style from `Indicator::Reset` but as of now ls colors
// uses a fallback mechanism and because of that if `Indicator::Reset` // uses a fallback mechanism and because of that if `Indicator::Reset`
@ -87,9 +89,9 @@ impl<'a> StyleManager<'a> {
if self.current_style.is_some() || force { if self.current_style.is_some() || force {
self.initial_reset_is_done = true; self.initial_reset_is_done = true;
self.current_style = None; self.current_style = None;
return "\x1b[0m".to_string(); return "\x1b[0m";
} }
String::new() ""
} }
pub(crate) fn get_style_code(&mut self, new_style: &Style) -> String { pub(crate) fn get_style_code(&mut self, new_style: &Style) -> String {
@ -124,9 +126,9 @@ impl<'a> StyleManager<'a> {
&mut self, &mut self,
path: &PathData, path: &PathData,
md_option: Option<&Metadata>, md_option: Option<&Metadata>,
name: &str, name: OsString,
wrap: bool, wrap: bool,
) -> String { ) -> OsString {
let style = self let style = self
.colors .colors
.style_for_path_with_metadata(&path.p_buf, md_option); .style_for_path_with_metadata(&path.p_buf, md_option);
@ -136,9 +138,9 @@ impl<'a> StyleManager<'a> {
pub(crate) fn apply_style_based_on_dir_entry( pub(crate) fn apply_style_based_on_dir_entry(
&mut self, &mut self,
dir_entry: &DirEntry, dir_entry: &DirEntry,
name: &str, name: OsString,
wrap: bool, wrap: bool,
) -> String { ) -> OsString {
let style = self.colors.style_for(dir_entry); let style = self.colors.style_for(dir_entry);
self.apply_style(style, name, wrap) self.apply_style(style, name, wrap)
} }
@ -149,13 +151,13 @@ impl<'a> StyleManager<'a> {
/// unnecessary calls to stat() /// unnecessary calls to stat()
/// and manages the symlink errors /// and manages the symlink errors
pub(crate) fn color_name( pub(crate) fn color_name(
name: &str, name: OsString,
path: &PathData, path: &PathData,
style_manager: &mut StyleManager, style_manager: &mut StyleManager,
out: &mut BufWriter<Stdout>, out: &mut BufWriter<Stdout>,
target_symlink: Option<&PathData>, target_symlink: Option<&PathData>,
wrap: bool, wrap: bool,
) -> String { ) -> OsString {
// Check if the file has capabilities // Check if the file has capabilities
#[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))] #[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))]
{ {

View file

@ -183,8 +183,7 @@ pub fn update_positions(dired: &mut DiredOutput, start: usize, end: usize) {
/// we don't use clap here because we need to know if the argument is present /// we don't use clap here because we need to know if the argument is present
/// as it can be overridden by --hyperlink /// as it can be overridden by --hyperlink
pub fn is_dired_arg_present() -> bool { pub fn is_dired_arg_present() -> bool {
let args: Vec<String> = std::env::args().collect(); std::env::args_os().any(|x| x == "--dired" || x == "-D")
args.iter().any(|x| x == "--dired" || x == "-D")
} }
#[cfg(test)] #[cfg(test)]

View file

@ -55,12 +55,13 @@ use uucore::libc::{dev_t, major, minor};
#[cfg(unix)] #[cfg(unix)]
use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR}; use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR};
use uucore::line_ending::LineEnding; use uucore::line_ending::LineEnding;
use uucore::quoting_style::{self, QuotingStyle}; use uucore::quoting_style::{self, escape_name, QuotingStyle};
use uucore::{ use uucore::{
display::Quotable, display::Quotable,
error::{set_exit_code, UError, UResult}, error::{set_exit_code, UError, UResult},
format_usage, format_usage,
fs::display_permissions, fs::display_permissions,
os_str_as_bytes_lossy,
parse_size::parse_size_u64, parse_size::parse_size_u64,
shortcut_value_parser::ShortcutValueParser, shortcut_value_parser::ShortcutValueParser,
version_cmp::version_cmp, version_cmp::version_cmp,
@ -2047,12 +2048,13 @@ impl PathData {
/// dir1: <- This as well /// dir1: <- This as well
/// file11 /// file11
/// ``` /// ```
fn show_dir_name(path_data: &PathData, out: &mut BufWriter<Stdout>, config: &Config) { fn show_dir_name(
// FIXME: replace this with appropriate behavior for literal unprintable bytes path_data: &PathData,
out: &mut BufWriter<Stdout>,
config: &Config,
) -> std::io::Result<()> {
let escaped_name = let escaped_name =
quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style) quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style);
.to_string_lossy()
.to_string();
let name = if config.hyperlink && !config.dired { let name = if config.hyperlink && !config.dired {
create_hyperlink(&escaped_name, path_data) create_hyperlink(&escaped_name, path_data)
@ -2060,7 +2062,8 @@ fn show_dir_name(path_data: &PathData, out: &mut BufWriter<Stdout>, config: &Con
escaped_name escaped_name
}; };
write!(out, "{name}:").unwrap(); write_os_str(out, &name)?;
write!(out, ":")
} }
#[allow(clippy::cognitive_complexity)] #[allow(clippy::cognitive_complexity)]
@ -2137,7 +2140,7 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> {
if config.dired { if config.dired {
dired::indent(&mut out)?; dired::indent(&mut out)?;
} }
show_dir_name(path_data, &mut out, config); show_dir_name(path_data, &mut out, config)?;
writeln!(out)?; writeln!(out)?;
if config.dired { if config.dired {
// First directory displayed // First directory displayed
@ -2149,7 +2152,7 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> {
} }
} else { } else {
writeln!(out)?; writeln!(out)?;
show_dir_name(path_data, &mut out, config); show_dir_name(path_data, &mut out, config)?;
writeln!(out)?; writeln!(out)?;
} }
} }
@ -2266,7 +2269,11 @@ fn should_display(entry: &DirEntry, config: &Config) -> bool {
case_sensitive: true, case_sensitive: true,
}; };
let file_name = entry.file_name(); let file_name = entry.file_name();
// If the decoding fails, still show an incorrect rendering // If the decoding fails, still match best we can
// FIXME: use OsStrings or Paths once we have a glob crate that supports it:
// https://github.com/rust-lang/glob/issues/23
// https://github.com/rust-lang/glob/issues/78
// https://github.com/BurntSushi/ripgrep/issues/1250
let file_name = match file_name.to_str() { let file_name = match file_name.to_str() {
Some(s) => s.to_string(), Some(s) => s.to_string(),
None => file_name.to_string_lossy().into_owned(), None => file_name.to_string_lossy().into_owned(),
@ -2378,7 +2385,7 @@ fn enter_directory(
dired::add_dir_name(dired, dir_name_size); dired::add_dir_name(dired, dir_name_size);
} }
show_dir_name(e, out, config); show_dir_name(e, out, config)?;
writeln!(out)?; writeln!(out)?;
enter_directory( enter_directory(
e, e,
@ -2518,7 +2525,7 @@ fn display_items(
let quoted = items.iter().any(|item| { let quoted = items.iter().any(|item| {
let name = escape_name(&item.display_name, &config.quoting_style); let name = escape_name(&item.display_name, &config.quoting_style);
name.starts_with('\'') os_str_starts_with(&name, b"'")
}); });
if config.format == Format::Long { if config.format == Format::Long {
@ -2592,19 +2599,20 @@ fn display_items(
Format::Commas => { Format::Commas => {
let mut current_col = 0; let mut current_col = 0;
if let Some(name) = names.next() { if let Some(name) = names.next() {
write!(out, "{name}")?; write_os_str(out, &name)?;
current_col = ansi_width(&name) as u16 + 2; current_col = ansi_width(&name.to_string_lossy()) as u16 + 2;
} }
for name in names { for name in names {
let name_width = ansi_width(&name) as u16; let name_width = ansi_width(&name.to_string_lossy()) as u16;
// If the width is 0 we print one single line // If the width is 0 we print one single line
if config.width != 0 && current_col + name_width + 1 > config.width { if config.width != 0 && current_col + name_width + 1 > config.width {
current_col = name_width + 2; current_col = name_width + 2;
write!(out, ",\n{name}")?; writeln!(out, ",")?;
} else { } else {
current_col += name_width + 2; current_col += name_width + 2;
write!(out, ", {name}")?; write!(out, ", ")?;
} }
write_os_str(out, &name)?;
} }
// Current col is never zero again if names have been printed. // Current col is never zero again if names have been printed.
// So we print a newline. // So we print a newline.
@ -2614,7 +2622,8 @@ fn display_items(
} }
_ => { _ => {
for name in names { for name in names {
write!(out, "{}{}", name, config.line_ending)?; write_os_str(out, &name)?;
write!(out, "{}", config.line_ending)?;
} }
} }
}; };
@ -2648,7 +2657,7 @@ fn get_block_size(md: &Metadata, config: &Config) -> u64 {
} }
fn display_grid( fn display_grid(
names: impl Iterator<Item = String>, names: impl Iterator<Item = OsString>,
width: u16, width: u16,
direction: Direction, direction: Direction,
out: &mut BufWriter<Stdout>, out: &mut BufWriter<Stdout>,
@ -2662,13 +2671,13 @@ fn display_grid(
write!(out, " ")?; write!(out, " ")?;
} }
printed_something = true; printed_something = true;
write!(out, "{name}")?; write_os_str(out, &name)?;
} }
if printed_something { if printed_something {
writeln!(out)?; writeln!(out)?;
} }
} else { } else {
let names: Vec<String> = if quoted { let names: Vec<_> = if quoted {
// In case some names are quoted, GNU adds a space before each // In case some names are quoted, GNU adds a space before each
// entry that does not start with a quote to make it prettier // entry that does not start with a quote to make it prettier
// on multiline. // on multiline.
@ -2683,10 +2692,12 @@ fn display_grid(
// ``` // ```
names names
.map(|n| { .map(|n| {
if n.starts_with('\'') || n.starts_with('"') { if os_str_starts_with(&n, b"'") || os_str_starts_with(&n, b"\"") {
n n
} else { } else {
format!(" {n}") let mut ret: OsString = " ".into();
ret.push(n);
ret
} }
}) })
.collect() .collect()
@ -2694,6 +2705,12 @@ fn display_grid(
names.collect() names.collect()
}; };
// FIXME: the Grid crate only supports &str, so can't display raw bytes
let names: Vec<_> = names
.into_iter()
.map(|s| s.to_string_lossy().into_owned())
.collect();
// Determine whether to use tabs for separation based on whether any entry ends with '/'. // Determine whether to use tabs for separation based on whether any entry ends with '/'.
// If any entry ends with '/', it indicates that the -F flag is likely used to classify directories. // If any entry ends with '/', it indicates that the -F flag is likely used to classify directories.
let use_tabs = names.iter().any(|name| name.ends_with('/')); let use_tabs = names.iter().any(|name| name.ends_with('/'));
@ -2755,14 +2772,14 @@ fn display_item_long(
style_manager: &mut Option<StyleManager>, style_manager: &mut Option<StyleManager>,
quoted: bool, quoted: bool,
) -> UResult<()> { ) -> UResult<()> {
let mut output_display: String = String::new(); let mut output_display: Vec<u8> = vec![];
// apply normal color to non filename outputs // apply normal color to non filename outputs
if let Some(style_manager) = style_manager { if let Some(style_manager) = style_manager {
write!(output_display, "{}", style_manager.apply_normal()).unwrap(); write!(output_display, "{}", style_manager.apply_normal()).unwrap();
} }
if config.dired { if config.dired {
output_display += " "; output_display.extend(b" ");
} }
if let Some(md) = item.get_metadata(out) { if let Some(md) = item.get_metadata(out) {
#[cfg(any(not(unix), target_os = "android", target_os = "macos"))] #[cfg(any(not(unix), target_os = "android", target_os = "macos"))]
@ -2869,11 +2886,13 @@ fn display_item_long(
String::new(), String::new(),
out, out,
style_manager, style_manager,
ansi_width(&output_display), ansi_width(&String::from_utf8_lossy(&output_display)),
); );
let displayed_item = if quoted && !item_name.starts_with('\'') { let displayed_item = if quoted && !os_str_starts_with(&item_name, b"'") {
format!(" {item_name}") let mut ret: OsString = " ".into();
ret.push(item_name);
ret
} else { } else {
item_name item_name
}; };
@ -2886,7 +2905,8 @@ fn display_item_long(
); );
dired::update_positions(dired, start, end); dired::update_positions(dired, start, end);
} }
write!(output_display, "{}{}", displayed_item, config.line_ending).unwrap(); write_os_str(&mut output_display, &displayed_item)?;
write!(output_display, "{}", config.line_ending)?;
} else { } else {
#[cfg(unix)] #[cfg(unix)]
let leading_char = { let leading_char = {
@ -2966,7 +2986,7 @@ fn display_item_long(
String::new(), String::new(),
out, out,
style_manager, style_manager,
ansi_width(&output_display), ansi_width(&String::from_utf8_lossy(&output_display)),
); );
let date_len = 12; let date_len = 12;
@ -2982,12 +3002,13 @@ fn display_item_long(
dired::calculate_and_update_positions( dired::calculate_and_update_positions(
dired, dired,
output_display.len(), output_display.len(),
displayed_item.trim().len(), displayed_item.to_string_lossy().trim().len(),
); );
} }
write!(output_display, "{}{}", displayed_item, config.line_ending).unwrap(); write_os_str(&mut output_display, &displayed_item)?;
write!(output_display, "{}", config.line_ending)?;
} }
write!(out, "{output_display}")?; out.write_all(&output_display)?;
Ok(()) Ok(())
} }
@ -3228,7 +3249,7 @@ fn display_item_name(
out: &mut BufWriter<Stdout>, out: &mut BufWriter<Stdout>,
style_manager: &mut Option<StyleManager>, style_manager: &mut Option<StyleManager>,
current_column: usize, current_column: usize,
) -> String { ) -> OsString {
// This is our return value. We start by `&path.display_name` and modify it along the way. // This is our return value. We start by `&path.display_name` and modify it along the way.
let mut name = escape_name(&path.display_name, &config.quoting_style); let mut name = escape_name(&path.display_name, &config.quoting_style);
@ -3240,11 +3261,14 @@ fn display_item_name(
} }
if let Some(style_manager) = style_manager { if let Some(style_manager) = style_manager {
name = color_name(&name, path, style_manager, out, None, is_wrap(name.len())); let len = name.len();
name = color_name(name, path, style_manager, out, None, is_wrap(len));
} }
if config.format != Format::Long && !more_info.is_empty() { if config.format != Format::Long && !more_info.is_empty() {
name = more_info + &name; let old_name = name;
name = more_info.into();
name.push(&old_name);
} }
if config.indicator_style != IndicatorStyle::None { if config.indicator_style != IndicatorStyle::None {
@ -3270,7 +3294,7 @@ fn display_item_name(
}; };
if let Some(c) = char_opt { if let Some(c) = char_opt {
name.push(c); name.push(OsStr::new(&c.to_string()));
} }
} }
@ -3281,7 +3305,7 @@ fn display_item_name(
{ {
match path.p_buf.read_link() { match path.p_buf.read_link() {
Ok(target) => { Ok(target) => {
name.push_str(" -> "); name.push(" -> ");
// We might as well color the symlink output after the arrow. // We might as well color the symlink output after the arrow.
// This makes extra system calls, but provides important information that // This makes extra system calls, but provides important information that
@ -3309,10 +3333,10 @@ fn display_item_name(
) )
.is_err() .is_err()
{ {
name.push_str(&path.p_buf.read_link().unwrap().to_string_lossy()); name.push(path.p_buf.read_link().unwrap());
} else { } else {
name.push_str(&color_name( name.push(color_name(
&escape_name(target.as_os_str(), &config.quoting_style), escape_name(target.as_os_str(), &config.quoting_style),
path, path,
style_manager, style_manager,
out, out,
@ -3323,7 +3347,7 @@ fn display_item_name(
} else { } else {
// If no coloring is required, we just use target as is. // If no coloring is required, we just use target as is.
// Apply the right quoting // Apply the right quoting
name.push_str(&escape_name(target.as_os_str(), &config.quoting_style)); name.push(escape_name(target.as_os_str(), &config.quoting_style));
} }
} }
Err(err) => { Err(err) => {
@ -3341,14 +3365,16 @@ fn display_item_name(
} else { } else {
pad_left(&path.security_context, pad_count) pad_left(&path.security_context, pad_count)
}; };
name = format!("{security_context} {name}"); let old_name = name;
name = format!("{security_context} ").into();
name.push(old_name);
} }
} }
name name
} }
fn create_hyperlink(name: &str, path: &PathData) -> String { fn create_hyperlink(name: &OsStr, path: &PathData) -> OsString {
let hostname = hostname::get().unwrap_or_else(|_| OsString::from("")); let hostname = hostname::get().unwrap_or_else(|_| OsString::from(""));
let hostname = hostname.to_string_lossy(); let hostname = hostname.to_string_lossy();
@ -3373,7 +3399,10 @@ fn create_hyperlink(name: &str, path: &PathData) -> String {
.collect(); .collect();
// \x1b = ESC, \x07 = BEL // \x1b = ESC, \x07 = BEL
format!("\x1b]8;;file://{hostname}{absolute_path}\x07{name}\x1b]8;;\x07") let mut ret: OsString = format!("\x1b]8;;file://{hostname}{absolute_path}\x07").into();
ret.push(name);
ret.push("\x1b]8;;\x07");
ret
} }
#[cfg(not(unix))] #[cfg(not(unix))]
@ -3546,9 +3575,10 @@ fn calculate_padding_collection(
padding_collections padding_collections
} }
// FIXME: replace this with appropriate behavior for literal unprintable bytes fn os_str_starts_with(haystack: &OsStr, needle: &[u8]) -> bool {
fn escape_name(name: &OsStr, style: &QuotingStyle) -> String { os_str_as_bytes_lossy(haystack).starts_with(needle)
quoting_style::escape_name(name, style) }
.to_string_lossy()
.to_string() fn write_os_str<W: Write>(writer: &mut W, string: &OsStr) -> std::io::Result<()> {
writer.write_all(&os_str_as_bytes_lossy(string))
} }

View file

@ -5563,3 +5563,34 @@ fn test_ls_capabilities() {
.stdout_contains("\x1b[30;41mcap_pos") // spell-checker:disable-line .stdout_contains("\x1b[30;41mcap_pos") // spell-checker:disable-line
.stdout_does_not_contain("0;41mtest/dir/cap_neg"); // spell-checker:disable-line .stdout_does_not_contain("0;41mtest/dir/cap_neg"); // spell-checker:disable-line
} }
#[cfg(feature = "test_risky_names")]
#[test]
fn test_non_unicode_names() {
// more extensive unit tests for correct escaping etc. are in the quoting_style module
let scene = TestScenario::new(util_name!());
let target_file = uucore::os_str_from_bytes(b"some-dir1/\xC0.file")
.expect("Only unix platforms can test non-unicode names");
let target_dir = uucore::os_str_from_bytes(b"some-dir1/\xC0.dir")
.expect("Only unix platforms can test non-unicode names");
let at = &scene.fixtures;
at.mkdir("some-dir1");
at.touch(target_file);
at.mkdir(target_dir);
scene
.ucmd()
.arg("--quoting-style=shell-escape")
.arg("some-dir1")
.succeeds()
.stdout_contains("''$'\\300''.dir'")
.stdout_contains("''$'\\300''.file'");
scene
.ucmd()
.arg("--quoting-style=literal")
.arg("--show-control-chars")
.arg("some-dir1")
.succeeds()
.stdout_is_bytes(b"\xC0.dir\n\xC0.file\n");
}