1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

quoting_style: introduce locale-aware quoting style

This commit is contained in:
Dorian Peron 2025-06-14 13:48:22 +02:00
parent bb30eb513e
commit deeaec3d4a
6 changed files with 65 additions and 24 deletions

View file

@ -61,7 +61,9 @@ use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR};
use uucore::libc::{dev_t, major, minor}; use uucore::libc::{dev_t, major, minor};
use uucore::line_ending::LineEnding; use uucore::line_ending::LineEnding;
use uucore::locale::{get_message, get_message_with_args}; use uucore::locale::{get_message, get_message_with_args};
use uucore::quoting_style::{self, QuotingStyle, escape_name}; use uucore::quoting_style::{
self, QuotingStyle, locale_aware_escape_dir_name, locale_aware_escape_name,
};
use uucore::{ use uucore::{
display::Quotable, display::Quotable,
error::{UError, UResult, set_exit_code}, error::{UError, UResult, set_exit_code},
@ -2008,7 +2010,7 @@ fn show_dir_name(
config: &Config, config: &Config,
) -> std::io::Result<()> { ) -> std::io::Result<()> {
let escaped_name = let escaped_name =
quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style); locale_aware_escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style);
let name = if config.hyperlink && !config.dired { let name = if config.hyperlink && !config.dired {
create_hyperlink(&escaped_name, path_data) create_hyperlink(&escaped_name, path_data)
@ -2509,7 +2511,7 @@ fn display_items(
// option, print the security context to the left of the size column. // option, print the security context to the left of the size column.
let quoted = items.iter().any(|item| { let quoted = items.iter().any(|item| {
let name = escape_name(&item.display_name, &config.quoting_style); let name = locale_aware_escape_name(&item.display_name, &config.quoting_style);
os_str_starts_with(&name, b"'") os_str_starts_with(&name, b"'")
}); });
@ -3152,7 +3154,7 @@ fn classify_file(path: &PathData, out: &mut BufWriter<Stdout>) -> Option<char> {
/// Takes a [`PathData`] struct and returns a cell with a name ready for displaying. /// Takes a [`PathData`] struct and returns a cell with a name ready for displaying.
/// ///
/// This function relies on the following parameters in the provided `&Config`: /// This function relies on the following parameters in the provided `&Config`:
/// * `config.quoting_style` to decide how we will escape `name` using [`escape_name`]. /// * `config.quoting_style` to decide how we will escape `name` using [`locale_aware_escape_name`].
/// * `config.inode` decides whether to display inode numbers beside names using [`get_inode`]. /// * `config.inode` decides whether to display inode numbers beside names using [`get_inode`].
/// * `config.color` decides whether it's going to color `name` using [`color_name`]. /// * `config.color` decides whether it's going to color `name` using [`color_name`].
/// * `config.indicator_style` to append specific characters to `name` using [`classify_file`]. /// * `config.indicator_style` to append specific characters to `name` using [`classify_file`].
@ -3173,7 +3175,7 @@ fn display_item_name(
current_column: LazyCell<usize, Box<dyn FnOnce() -> usize + '_>>, current_column: LazyCell<usize, Box<dyn FnOnce() -> usize + '_>>,
) -> OsString { ) -> OsString {
// This is our return value. We start by `&path.display_name` and modify it along the way. // This is our return value. We start by `&path.display_name` and modify it along the way.
let mut name = escape_name(&path.display_name, &config.quoting_style); let mut name = locale_aware_escape_name(&path.display_name, &config.quoting_style);
let is_wrap = let is_wrap =
|namelen: usize| config.width != 0 && *current_column + namelen > config.width.into(); |namelen: usize| config.width != 0 && *current_column + namelen > config.width.into();
@ -3265,7 +3267,7 @@ fn display_item_name(
name.push(path.p_buf.read_link().unwrap()); name.push(path.p_buf.read_link().unwrap());
} else { } else {
name.push(color_name( name.push(color_name(
escape_name(target.as_os_str(), &config.quoting_style), locale_aware_escape_name(target.as_os_str(), &config.quoting_style),
path, path,
style_manager, style_manager,
&mut state.out, &mut state.out,
@ -3276,7 +3278,10 @@ fn display_item_name(
} else { } else {
// If no coloring is required, we just use target as is. // If no coloring is required, we just use target as is.
// Apply the right quoting // Apply the right quoting
name.push(escape_name(target.as_os_str(), &config.quoting_style)); name.push(locale_aware_escape_name(
target.as_os_str(),
&config.quoting_style,
));
} }
} }
Err(err) => { Err(err) => {

View file

@ -259,7 +259,9 @@ impl<'a> Input<'a> {
Self::Path(path) => { Self::Path(path) => {
let path = path.as_os_str(); let path = path.as_os_str();
if path.to_string_lossy().contains('\n') { if path.to_string_lossy().contains('\n') {
Some(Cow::Owned(quoting_style::escape_name(path, QS_ESCAPE))) Some(Cow::Owned(quoting_style::locale_aware_escape_name(
path, QS_ESCAPE,
)))
} else { } else {
Some(Cow::Borrowed(path)) Some(Cow::Borrowed(path))
} }
@ -759,7 +761,7 @@ fn files0_iter_file<'a>(path: &Path) -> UResult<impl Iterator<Item = InputIterIt
"wc-error-cannot-open-for-reading", "wc-error-cannot-open-for-reading",
HashMap::from([( HashMap::from([(
"path".to_string(), "path".to_string(),
quoting_style::escape_name(path.as_os_str(), QS_QUOTE_ESCAPE) quoting_style::locale_aware_escape_name(path.as_os_str(), QS_QUOTE_ESCAPE)
.into_string() .into_string()
.expect("All escaped names with the escaping option return valid strings."), .expect("All escaped names with the escaping option return valid strings."),
)]), )]),
@ -812,7 +814,7 @@ fn files0_iter<'a>(
} }
fn escape_name_wrapper(name: &OsStr) -> String { fn escape_name_wrapper(name: &OsStr) -> String {
quoting_style::escape_name(name, QS_ESCAPE) quoting_style::locale_aware_escape_name(name, QS_ESCAPE)
.into_string() .into_string()
.expect("All escaped names with the escaping option return valid strings.") .expect("All escaped names with the escaping option return valid strings.")
} }

View file

@ -8,7 +8,7 @@ use crate::format::spec::ArgumentLocation;
use crate::{ use crate::{
error::set_exit_code, error::set_exit_code,
parser::num_parser::{ExtendedParser, ExtendedParserError}, parser::num_parser::{ExtendedParser, ExtendedParserError},
quoting_style::{Quotes, QuotingStyle, escape_name}, quoting_style::{Quotes, QuotingStyle, locale_aware_escape_name},
show_error, show_warning, show_error, show_warning,
}; };
use os_display::Quotable; use os_display::Quotable;
@ -153,7 +153,7 @@ fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &s
Ok(v) => v, Ok(v) => v,
Err(e) => { Err(e) => {
set_exit_code(1); set_exit_code(1);
let input = escape_name( let input = locale_aware_escape_name(
OsStr::new(input), OsStr::new(input),
&QuotingStyle::C { &QuotingStyle::C {
quotes: Quotes::None, quotes: Quotes::None,

View file

@ -5,7 +5,7 @@
// spell-checker:ignore (vars) intmax ptrdiff padlen // spell-checker:ignore (vars) intmax ptrdiff padlen
use crate::quoting_style::{QuotingStyle, escape_name}; use crate::quoting_style::{QuotingStyle, locale_aware_escape_name};
use super::{ use super::{
ExtendedBigDecimal, FormatChar, FormatError, OctalParsing, ExtendedBigDecimal, FormatChar, FormatError, OctalParsing,
@ -402,7 +402,7 @@ impl Spec {
writer.write_all(&parsed).map_err(FormatError::IoError) writer.write_all(&parsed).map_err(FormatError::IoError)
} }
Self::QuotedString { position } => { Self::QuotedString { position } => {
let s = escape_name( let s = locale_aware_escape_name(
args.next_string(position).as_ref(), args.next_string(position).as_ref(),
&QuotingStyle::Shell { &QuotingStyle::Shell {
escape: true, escape: true,

View file

@ -8,6 +8,7 @@
use std::ffi::{OsStr, OsString}; use std::ffi::{OsStr, OsString};
use std::fmt; use std::fmt;
use crate::i18n::{self, UEncoding};
use crate::quoting_style::c_quoter::CQuoter; use crate::quoting_style::c_quoter::CQuoter;
use crate::quoting_style::literal_quoter::LiteralQuoter; use crate::quoting_style::literal_quoter::LiteralQuoter;
use crate::quoting_style::shell_quoter::{EscapedShellQuoter, NonEscapedShellQuoter}; use crate::quoting_style::shell_quoter::{EscapedShellQuoter, NonEscapedShellQuoter};
@ -89,7 +90,12 @@ pub enum Quotes {
/// ///
/// This inner function provides an additional flag `dirname` which /// This inner function provides an additional flag `dirname` which
/// is meant for ls' directory name display. /// is meant for ls' directory name display.
fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8> { fn escape_name_inner(
name: &[u8],
style: &QuotingStyle,
dirname: bool,
encoding: UEncoding,
) -> Vec<u8> {
// Early handle Literal with show_control style // Early handle Literal with show_control style
if let QuotingStyle::Literal { show_control: true } = style { if let QuotingStyle::Literal { show_control: true } = style {
return name.to_owned(); return name.to_owned();
@ -121,30 +127,53 @@ fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8
)), )),
}; };
match encoding {
UEncoding::Ascii => {
for b in name {
if b.is_ascii() {
quoter.push_char(*b as char);
} else {
quoter.push_invalid(&[*b]);
}
}
}
UEncoding::Utf8 => {
for chunk in name.utf8_chunks() { for chunk in name.utf8_chunks() {
quoter.push_str(chunk.valid()); quoter.push_str(chunk.valid());
quoter.push_invalid(chunk.invalid()); quoter.push_invalid(chunk.invalid());
} }
}
}
quoter.finalize() quoter.finalize()
} }
/// Escape a filename with respect to the given style. /// Escape a filename with respect to the given style.
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString { pub fn escape_name(name: &OsStr, style: &QuotingStyle, encoding: UEncoding) -> OsString {
let name = crate::os_str_as_bytes_lossy(name); let name = crate::os_str_as_bytes_lossy(name);
crate::os_string_from_vec(escape_name_inner(&name, style, false)) crate::os_string_from_vec(escape_name_inner(&name, style, false, encoding))
.expect("all byte sequences should be valid for platform, or already replaced in name") .expect("all byte sequences should be valid for platform, or already replaced in name")
} }
/// Retrieve the encoding from the locale and pass it to `escape_name`.
pub fn locale_aware_escape_name(name: &OsStr, style: &QuotingStyle) -> OsString {
escape_name(name, style, i18n::get_locale_encoding())
}
/// Escape a directory name with respect to the given style. /// Escape a directory name with respect to the given style.
/// This is mainly meant to be used for ls' directory name printing and is not /// This is mainly meant to be used for ls' directory name printing and is not
/// likely to be used elsewhere. /// likely to be used elsewhere.
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString { pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle, encoding: UEncoding) -> OsString {
let name = crate::os_str_as_bytes_lossy(dir_name); let name = crate::os_str_as_bytes_lossy(dir_name);
crate::os_string_from_vec(escape_name_inner(&name, style, true)) crate::os_string_from_vec(escape_name_inner(&name, style, true, encoding))
.expect("all byte sequences should be valid for platform, or already replaced in name") .expect("all byte sequences should be valid for platform, or already replaced in name")
} }
/// Retrieve the encoding from the locale and pass it to `escape_dir_name`.
pub fn locale_aware_escape_dir_name(name: &OsStr, style: &QuotingStyle) -> OsString {
escape_dir_name(name, style, i18n::get_locale_encoding())
}
impl fmt::Display for QuotingStyle { impl fmt::Display for QuotingStyle {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self { match *self {
@ -183,7 +212,10 @@ impl fmt::Display for Quotes {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::quoting_style::{Quotes, QuotingStyle, escape_name_inner}; use crate::{
i18n::UEncoding,
quoting_style::{Quotes, QuotingStyle, escape_name_inner},
};
// spell-checker:ignore (tests/words) one\'two one'two // spell-checker:ignore (tests/words) one\'two one'two
@ -235,7 +267,7 @@ mod tests {
fn check_names_inner<T>(name: &[u8], map: &[(T, &str)]) -> Vec<Vec<u8>> { fn check_names_inner<T>(name: &[u8], map: &[(T, &str)]) -> Vec<Vec<u8>> {
map.iter() map.iter()
.map(|(_, style)| escape_name_inner(name, &get_style(style), false)) .map(|(_, style)| escape_name_inner(name, &get_style(style), false, UEncoding::Utf8))
.collect() .collect()
} }

2
test/sums Normal file
View file

@ -0,0 +1,2 @@
SHA256 (funkyÿname) = 29953405eaa3dcc41c37d1621d55b6a47eee93e05613e439e73295029740b10c
SHA256 (funkyÿ) = 29953405eaa3dcc41c37d1621d55b6a47eee93e05613e439e73295029740b10c