1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

ls: add better support for non-UTF-8 bytes

This commit is contained in:
Justin Tracey 2024-12-27 11:10:48 -05:00
parent edf8be5e08
commit b0837a0ca5
No known key found for this signature in database
GPG key ID: 62B84F5ABDDDCE54
4 changed files with 131 additions and 69 deletions

View file

@ -5,6 +5,7 @@
use super::get_metadata_with_deref_opt;
use super::PathData;
use lscolors::{Indicator, LsColors, Style};
use std::ffi::OsString;
use std::fs::{DirEntry, Metadata};
use std::io::{BufWriter, Stdout};
@ -31,9 +32,9 @@ impl<'a> StyleManager<'a> {
pub(crate) fn apply_style(
&mut self,
new_style: Option<&Style>,
name: &str,
name: OsString,
wrap: bool,
) -> String {
) -> OsString {
let mut style_code = String::new();
let mut force_suffix_reset: bool = false;
@ -50,14 +51,14 @@ impl<'a> StyleManager<'a> {
// normal style is to be printed we could skip printing new color
// codes
if !self.is_current_style(new_style) {
style_code.push_str(&self.reset(!self.initial_reset_is_done));
style_code.push_str(self.reset(!self.initial_reset_is_done));
style_code.push_str(&self.get_style_code(new_style));
}
}
// if new style is None and current style is Normal we should reset it
else if matches!(self.get_normal_style().copied(), Some(norm_style) if self.is_current_style(&norm_style))
{
style_code.push_str(&self.reset(false));
style_code.push_str(self.reset(false));
// even though this is an unnecessary reset for gnu compatibility we allow it here
force_suffix_reset = true;
}
@ -69,16 +70,17 @@ impl<'a> StyleManager<'a> {
// till the end of line
let clear_to_eol = if wrap { "\x1b[K" } else { "" };
format!(
"{style_code}{name}{}{clear_to_eol}",
self.reset(force_suffix_reset),
)
let mut ret: OsString = style_code.into();
ret.push(name);
ret.push(self.reset(force_suffix_reset));
ret.push(clear_to_eol);
ret
}
/// Resets the current style and returns the default ANSI reset code to
/// reset all text formatting attributes. If `force` is true, the reset is
/// done even if the reset has been applied before.
pub(crate) fn reset(&mut self, force: bool) -> String {
pub(crate) fn reset(&mut self, force: bool) -> &str {
// todo:
// We need to use style from `Indicator::Reset` but as of now ls colors
// uses a fallback mechanism and because of that if `Indicator::Reset`
@ -87,9 +89,9 @@ impl<'a> StyleManager<'a> {
if self.current_style.is_some() || force {
self.initial_reset_is_done = true;
self.current_style = None;
return "\x1b[0m".to_string();
return "\x1b[0m";
}
String::new()
""
}
pub(crate) fn get_style_code(&mut self, new_style: &Style) -> String {
@ -124,9 +126,9 @@ impl<'a> StyleManager<'a> {
&mut self,
path: &PathData,
md_option: Option<&Metadata>,
name: &str,
name: OsString,
wrap: bool,
) -> String {
) -> OsString {
let style = self
.colors
.style_for_path_with_metadata(&path.p_buf, md_option);
@ -136,9 +138,9 @@ impl<'a> StyleManager<'a> {
pub(crate) fn apply_style_based_on_dir_entry(
&mut self,
dir_entry: &DirEntry,
name: &str,
name: OsString,
wrap: bool,
) -> String {
) -> OsString {
let style = self.colors.style_for(dir_entry);
self.apply_style(style, name, wrap)
}
@ -149,13 +151,13 @@ impl<'a> StyleManager<'a> {
/// unnecessary calls to stat()
/// and manages the symlink errors
pub(crate) fn color_name(
name: &str,
name: OsString,
path: &PathData,
style_manager: &mut StyleManager,
out: &mut BufWriter<Stdout>,
target_symlink: Option<&PathData>,
wrap: bool,
) -> String {
) -> OsString {
// Check if the file has capabilities
#[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))]
{

View file

@ -183,8 +183,7 @@ pub fn update_positions(dired: &mut DiredOutput, start: usize, end: usize) {
/// we don't use clap here because we need to know if the argument is present
/// as it can be overridden by --hyperlink
pub fn is_dired_arg_present() -> bool {
let args: Vec<String> = std::env::args().collect();
args.iter().any(|x| x == "--dired" || x == "-D")
std::env::args_os().any(|x| x == "--dired" || x == "-D")
}
#[cfg(test)]

View file

@ -55,12 +55,13 @@ use uucore::libc::{dev_t, major, minor};
#[cfg(unix)]
use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR};
use uucore::line_ending::LineEnding;
use uucore::quoting_style::{self, QuotingStyle};
use uucore::quoting_style::{self, escape_name, QuotingStyle};
use uucore::{
display::Quotable,
error::{set_exit_code, UError, UResult},
format_usage,
fs::display_permissions,
os_str_as_bytes_lossy,
parse_size::parse_size_u64,
shortcut_value_parser::ShortcutValueParser,
version_cmp::version_cmp,
@ -2047,12 +2048,13 @@ impl PathData {
/// dir1: <- This as well
/// file11
/// ```
fn show_dir_name(path_data: &PathData, out: &mut BufWriter<Stdout>, config: &Config) {
// FIXME: replace this with appropriate behavior for literal unprintable bytes
fn show_dir_name(
path_data: &PathData,
out: &mut BufWriter<Stdout>,
config: &Config,
) -> std::io::Result<()> {
let escaped_name =
quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style)
.to_string_lossy()
.to_string();
quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style);
let name = if config.hyperlink && !config.dired {
create_hyperlink(&escaped_name, path_data)
@ -2060,7 +2062,8 @@ fn show_dir_name(path_data: &PathData, out: &mut BufWriter<Stdout>, config: &Con
escaped_name
};
write!(out, "{name}:").unwrap();
write_os_str(out, &name)?;
write!(out, ":")
}
#[allow(clippy::cognitive_complexity)]
@ -2137,7 +2140,7 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> {
if config.dired {
dired::indent(&mut out)?;
}
show_dir_name(path_data, &mut out, config);
show_dir_name(path_data, &mut out, config)?;
writeln!(out)?;
if config.dired {
// First directory displayed
@ -2149,7 +2152,7 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> {
}
} else {
writeln!(out)?;
show_dir_name(path_data, &mut out, config);
show_dir_name(path_data, &mut out, config)?;
writeln!(out)?;
}
}
@ -2266,7 +2269,11 @@ fn should_display(entry: &DirEntry, config: &Config) -> bool {
case_sensitive: true,
};
let file_name = entry.file_name();
// If the decoding fails, still show an incorrect rendering
// If the decoding fails, still match best we can
// FIXME: use OsStrings or Paths once we have a glob crate that supports it:
// https://github.com/rust-lang/glob/issues/23
// https://github.com/rust-lang/glob/issues/78
// https://github.com/BurntSushi/ripgrep/issues/1250
let file_name = match file_name.to_str() {
Some(s) => s.to_string(),
None => file_name.to_string_lossy().into_owned(),
@ -2378,7 +2385,7 @@ fn enter_directory(
dired::add_dir_name(dired, dir_name_size);
}
show_dir_name(e, out, config);
show_dir_name(e, out, config)?;
writeln!(out)?;
enter_directory(
e,
@ -2518,7 +2525,7 @@ fn display_items(
let quoted = items.iter().any(|item| {
let name = escape_name(&item.display_name, &config.quoting_style);
name.starts_with('\'')
os_str_starts_with(&name, b"'")
});
if config.format == Format::Long {
@ -2592,19 +2599,20 @@ fn display_items(
Format::Commas => {
let mut current_col = 0;
if let Some(name) = names.next() {
write!(out, "{name}")?;
current_col = ansi_width(&name) as u16 + 2;
write_os_str(out, &name)?;
current_col = ansi_width(&name.to_string_lossy()) as u16 + 2;
}
for name in names {
let name_width = ansi_width(&name) as u16;
let name_width = ansi_width(&name.to_string_lossy()) as u16;
// If the width is 0 we print one single line
if config.width != 0 && current_col + name_width + 1 > config.width {
current_col = name_width + 2;
write!(out, ",\n{name}")?;
writeln!(out, ",")?;
} else {
current_col += name_width + 2;
write!(out, ", {name}")?;
write!(out, ", ")?;
}
write_os_str(out, &name)?;
}
// Current col is never zero again if names have been printed.
// So we print a newline.
@ -2614,7 +2622,8 @@ fn display_items(
}
_ => {
for name in names {
write!(out, "{}{}", name, config.line_ending)?;
write_os_str(out, &name)?;
write!(out, "{}", config.line_ending)?;
}
}
};
@ -2648,7 +2657,7 @@ fn get_block_size(md: &Metadata, config: &Config) -> u64 {
}
fn display_grid(
names: impl Iterator<Item = String>,
names: impl Iterator<Item = OsString>,
width: u16,
direction: Direction,
out: &mut BufWriter<Stdout>,
@ -2662,13 +2671,13 @@ fn display_grid(
write!(out, " ")?;
}
printed_something = true;
write!(out, "{name}")?;
write_os_str(out, &name)?;
}
if printed_something {
writeln!(out)?;
}
} else {
let names: Vec<String> = if quoted {
let names: Vec<_> = if quoted {
// In case some names are quoted, GNU adds a space before each
// entry that does not start with a quote to make it prettier
// on multiline.
@ -2683,10 +2692,12 @@ fn display_grid(
// ```
names
.map(|n| {
if n.starts_with('\'') || n.starts_with('"') {
if os_str_starts_with(&n, b"'") || os_str_starts_with(&n, b"\"") {
n
} else {
format!(" {n}")
let mut ret: OsString = " ".into();
ret.push(n);
ret
}
})
.collect()
@ -2694,6 +2705,12 @@ fn display_grid(
names.collect()
};
// FIXME: the Grid crate only supports &str, so can't display raw bytes
let names: Vec<_> = names
.into_iter()
.map(|s| s.to_string_lossy().into_owned())
.collect();
// Determine whether to use tabs for separation based on whether any entry ends with '/'.
// If any entry ends with '/', it indicates that the -F flag is likely used to classify directories.
let use_tabs = names.iter().any(|name| name.ends_with('/'));
@ -2755,14 +2772,14 @@ fn display_item_long(
style_manager: &mut Option<StyleManager>,
quoted: bool,
) -> UResult<()> {
let mut output_display: String = String::new();
let mut output_display: Vec<u8> = vec![];
// apply normal color to non filename outputs
if let Some(style_manager) = style_manager {
write!(output_display, "{}", style_manager.apply_normal()).unwrap();
}
if config.dired {
output_display += " ";
output_display.extend(b" ");
}
if let Some(md) = item.get_metadata(out) {
#[cfg(any(not(unix), target_os = "android", target_os = "macos"))]
@ -2869,11 +2886,13 @@ fn display_item_long(
String::new(),
out,
style_manager,
ansi_width(&output_display),
ansi_width(&String::from_utf8_lossy(&output_display)),
);
let displayed_item = if quoted && !item_name.starts_with('\'') {
format!(" {item_name}")
let displayed_item = if quoted && !os_str_starts_with(&item_name, b"'") {
let mut ret: OsString = " ".into();
ret.push(item_name);
ret
} else {
item_name
};
@ -2886,7 +2905,8 @@ fn display_item_long(
);
dired::update_positions(dired, start, end);
}
write!(output_display, "{}{}", displayed_item, config.line_ending).unwrap();
write_os_str(&mut output_display, &displayed_item)?;
write!(output_display, "{}", config.line_ending)?;
} else {
#[cfg(unix)]
let leading_char = {
@ -2966,7 +2986,7 @@ fn display_item_long(
String::new(),
out,
style_manager,
ansi_width(&output_display),
ansi_width(&String::from_utf8_lossy(&output_display)),
);
let date_len = 12;
@ -2982,12 +3002,13 @@ fn display_item_long(
dired::calculate_and_update_positions(
dired,
output_display.len(),
displayed_item.trim().len(),
displayed_item.to_string_lossy().trim().len(),
);
}
write!(output_display, "{}{}", displayed_item, config.line_ending).unwrap();
write_os_str(&mut output_display, &displayed_item)?;
write!(output_display, "{}", config.line_ending)?;
}
write!(out, "{output_display}")?;
out.write_all(&output_display)?;
Ok(())
}
@ -3228,7 +3249,7 @@ fn display_item_name(
out: &mut BufWriter<Stdout>,
style_manager: &mut Option<StyleManager>,
current_column: usize,
) -> String {
) -> OsString {
// This is our return value. We start by `&path.display_name` and modify it along the way.
let mut name = escape_name(&path.display_name, &config.quoting_style);
@ -3240,11 +3261,14 @@ fn display_item_name(
}
if let Some(style_manager) = style_manager {
name = color_name(&name, path, style_manager, out, None, is_wrap(name.len()));
let len = name.len();
name = color_name(name, path, style_manager, out, None, is_wrap(len));
}
if config.format != Format::Long && !more_info.is_empty() {
name = more_info + &name;
let old_name = name;
name = more_info.into();
name.push(&old_name);
}
if config.indicator_style != IndicatorStyle::None {
@ -3270,7 +3294,7 @@ fn display_item_name(
};
if let Some(c) = char_opt {
name.push(c);
name.push(OsStr::new(&c.to_string()));
}
}
@ -3281,7 +3305,7 @@ fn display_item_name(
{
match path.p_buf.read_link() {
Ok(target) => {
name.push_str(" -> ");
name.push(" -> ");
// We might as well color the symlink output after the arrow.
// This makes extra system calls, but provides important information that
@ -3309,10 +3333,10 @@ fn display_item_name(
)
.is_err()
{
name.push_str(&path.p_buf.read_link().unwrap().to_string_lossy());
name.push(path.p_buf.read_link().unwrap());
} else {
name.push_str(&color_name(
&escape_name(target.as_os_str(), &config.quoting_style),
name.push(color_name(
escape_name(target.as_os_str(), &config.quoting_style),
path,
style_manager,
out,
@ -3323,7 +3347,7 @@ fn display_item_name(
} else {
// If no coloring is required, we just use target as is.
// Apply the right quoting
name.push_str(&escape_name(target.as_os_str(), &config.quoting_style));
name.push(escape_name(target.as_os_str(), &config.quoting_style));
}
}
Err(err) => {
@ -3341,14 +3365,16 @@ fn display_item_name(
} else {
pad_left(&path.security_context, pad_count)
};
name = format!("{security_context} {name}");
let old_name = name;
name = format!("{security_context} ").into();
name.push(old_name);
}
}
name
}
fn create_hyperlink(name: &str, path: &PathData) -> String {
fn create_hyperlink(name: &OsStr, path: &PathData) -> OsString {
let hostname = hostname::get().unwrap_or_else(|_| OsString::from(""));
let hostname = hostname.to_string_lossy();
@ -3373,7 +3399,10 @@ fn create_hyperlink(name: &str, path: &PathData) -> String {
.collect();
// \x1b = ESC, \x07 = BEL
format!("\x1b]8;;file://{hostname}{absolute_path}\x07{name}\x1b]8;;\x07")
let mut ret: OsString = format!("\x1b]8;;file://{hostname}{absolute_path}\x07").into();
ret.push(name);
ret.push("\x1b]8;;\x07");
ret
}
#[cfg(not(unix))]
@ -3546,9 +3575,10 @@ fn calculate_padding_collection(
padding_collections
}
// FIXME: replace this with appropriate behavior for literal unprintable bytes
fn escape_name(name: &OsStr, style: &QuotingStyle) -> String {
quoting_style::escape_name(name, style)
.to_string_lossy()
.to_string()
fn os_str_starts_with(haystack: &OsStr, needle: &[u8]) -> bool {
os_str_as_bytes_lossy(haystack).starts_with(needle)
}
fn write_os_str<W: Write>(writer: &mut W, string: &OsStr) -> std::io::Result<()> {
writer.write_all(&os_str_as_bytes_lossy(string))
}

View file

@ -5563,3 +5563,34 @@ fn test_ls_capabilities() {
.stdout_contains("\x1b[30;41mcap_pos") // spell-checker:disable-line
.stdout_does_not_contain("0;41mtest/dir/cap_neg"); // spell-checker:disable-line
}
#[cfg(feature = "test_risky_names")]
#[test]
fn test_non_unicode_names() {
// more extensive unit tests for correct escaping etc. are in the quoting_style module
let scene = TestScenario::new(util_name!());
let target_file = uucore::os_str_from_bytes(b"some-dir1/\xC0.file")
.expect("Only unix platforms can test non-unicode names");
let target_dir = uucore::os_str_from_bytes(b"some-dir1/\xC0.dir")
.expect("Only unix platforms can test non-unicode names");
let at = &scene.fixtures;
at.mkdir("some-dir1");
at.touch(target_file);
at.mkdir(target_dir);
scene
.ucmd()
.arg("--quoting-style=shell-escape")
.arg("some-dir1")
.succeeds()
.stdout_contains("''$'\\300''.dir'")
.stdout_contains("''$'\\300''.file'");
scene
.ucmd()
.arg("--quoting-style=literal")
.arg("--show-control-chars")
.arg("some-dir1")
.succeeds()
.stdout_is_bytes(b"\xC0.dir\n\xC0.file\n");
}