diff --git a/Cargo.lock b/Cargo.lock index 74423c091..1e5cf14b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 4 [[package]] name = "adler2" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aho-corasick" @@ -55,9 +55,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.18" +version = "0.6.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" dependencies = [ "anstyle", "anstyle-parse", @@ -70,33 +70,33 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" [[package]] name = "anstyle-parse" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" dependencies = [ "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.8" +version = "3.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6680de5231bd6ee4c6191b8a1325daa282b415391ec9d3a37bd34f2060dc73fa" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" dependencies = [ "anstyle", "once_cell_polyfill", @@ -268,9 +268,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.17.0" +version = "3.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" [[package]] name = "bytecount" @@ -292,9 +292,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.25" +version = "1.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0fc897dc1e865cc67c0e05a836d9d3f1df3cbe442aa4a9473b18e12624a4951" +checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" dependencies = [ "shlex", ] @@ -310,9 +310,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "cfg_aliases" @@ -403,9 +403,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "clap_mangen" @@ -419,9 +419,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "compare" @@ -992,9 +992,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" +checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" dependencies = [ "crc32fast", "libz-rs-sys", @@ -1156,7 +1156,7 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi 0.11.1+wasi-snapshot-preview1", ] [[package]] @@ -1195,9 +1195,9 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "hashbrown" -version = "0.15.3" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" dependencies = [ "allocator-api2", "equivalent", @@ -1251,6 +1251,140 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collator" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ad4c6a556938dfd31f75a8c54141079e8821dc697ffb799cfe0f0fa11f2edc" +dependencies = [ + "displaydoc", + "icu_collator_data", + "icu_collections", + "icu_locale", + "icu_locale_core", + "icu_normalizer", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "zerovec", +] + +[[package]] +name = "icu_collator_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d880b8e680799eabd90c054e1b95526cd48db16c95269f3c89fb3117e1ac92c5" + +[[package]] +name = "icu_collections" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ae5921528335e91da1b6c695dbf1ec37df5ac13faa3f91e5640be93aa2fbefd" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locale_core", + "icu_locale_data", + "icu_provider", + "potential_utf", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locale_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fdef0c124749d06a743c69e938350816554eb63ac979166590e2b4ee4252765" + +[[package]] +name = "icu_normalizer" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" + +[[package]] +name = "icu_properties" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "potential_utf", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" + +[[package]] +name = "icu_provider" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +dependencies = [ + "displaydoc", + "icu_locale_core", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + [[package]] name = "indexmap" version = "2.9.0" @@ -1258,7 +1392,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.15.3", + "hashbrown 0.15.4", ] [[package]] @@ -1467,9 +1601,9 @@ dependencies = [ [[package]] name = "libz-rs-sys" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6489ca9bd760fe9642d7644e827b0c9add07df89857b0416ee15c1cc1a3b8c5a" +checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221" dependencies = [ "zlib-rs", ] @@ -1480,6 +1614,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +[[package]] +name = "litemap" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" + [[package]] name = "litrs" version = "0.4.1" @@ -1508,7 +1648,7 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" dependencies = [ - "hashbrown 0.15.3", + "hashbrown 0.15.4", ] [[package]] @@ -1554,9 +1694,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", ] @@ -1569,7 +1709,7 @@ checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.59.0", ] @@ -1905,9 +2045,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" [[package]] name = "portable-atomic-util" @@ -1918,6 +2058,16 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "potential_utf" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +dependencies = [ + "serde", + "zerovec", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -1945,9 +2095,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.33" +version = "0.2.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dee91521343f4c5c6a63edd65e54f31f5c92fe8978c40a4282f8372194c6a7d" +checksum = "6837b9e10d61f45f987d50808f83d1ee3d206c66acf650c3e4ae2e1f6ddedf55" dependencies = [ "proc-macro2", "syn", @@ -2073,9 +2223,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.12" +version = "0.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" +checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" dependencies = [ "bitflags 2.9.1", ] @@ -2426,6 +2576,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.11.1" @@ -2434,15 +2590,26 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.101" +version = "2.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tap" version = "1.0.1" @@ -2588,15 +2755,15 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.9" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" [[package]] name = "toml_edit" -version = "0.22.26" +version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ "indexmap", "toml_datetime", @@ -2684,6 +2851,18 @@ version = "0.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -3777,6 +3956,8 @@ dependencies = [ "fluent-syntax", "glob", "hex", + "icu_collator", + "icu_locale", "itertools 0.14.0", "libc", "md-5", @@ -3879,9 +4060,9 @@ dependencies = [ [[package]] name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasi" @@ -4047,9 +4228,9 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.1.1" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" [[package]] name = "windows-result" @@ -4292,9 +4473,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" [[package]] name = "winnow" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec" +checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" dependencies = [ "memchr", ] @@ -4308,6 +4489,12 @@ dependencies = [ "bitflags 2.9.1", ] +[[package]] +name = "writeable" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" + [[package]] name = "wyz" version = "0.5.1" @@ -4333,6 +4520,30 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "yoke" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "z85" version = "3.0.6" @@ -4385,6 +4596,32 @@ name = "zerofrom" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] [[package]] name = "zerovec" @@ -4392,7 +4629,20 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" dependencies = [ + "yoke", "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -4411,9 +4661,9 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "868b928d7949e09af2f6086dfc1e01936064cc7a819253bce650d4e2a2d63ba8" +checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a" [[package]] name = "zopfli" diff --git a/Cargo.toml b/Cargo.toml index e77a8c7a1..0bb8432d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ # coreutils (uutils) # * see the repository LICENSE, README, and CONTRIBUTING files for more information -# spell-checker:ignore (libs) bigdecimal datetime serde bincode gethostid kqueue libselinux mangen memmap uuhelp startswith constness expl +# spell-checker:ignore (libs) bigdecimal datetime serde bincode gethostid kqueue libselinux mangen memmap uuhelp startswith constness expl unnested [package] name = "coreutils" @@ -313,6 +313,8 @@ gcd = "2.3" glob = "0.3.1" half = "2.4.1" hostname = "0.4" +icu_collator = "2.0.0" +icu_locale = "2.0.0" indicatif = "0.17.8" itertools = "0.14.0" jiff = { version = "0.2.10", default-features = false, features = [ diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 2c7d9199d..d29ec288e 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -28,9 +28,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.18" +version = "0.6.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" dependencies = [ "anstyle", "anstyle-parse", @@ -43,33 +43,33 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" [[package]] name = "anstyle-parse" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" dependencies = [ "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.8" +version = "3.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6680de5231bd6ee4c6191b8a1325daa282b415391ec9d3a37bd34f2060dc73fa" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" dependencies = [ "anstyle", "once_cell_polyfill", @@ -174,21 +174,21 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.17.0" +version = "3.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" [[package]] name = "bytecount" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" [[package]] name = "cc" -version = "1.2.23" +version = "1.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4ac86a9e5bc1e2b3449ab9d7d3a6a405e3d1bb28d7b9be8614f55846ae3766" +checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" dependencies = [ "jobserver", "libc", @@ -197,9 +197,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "cfg_aliases" @@ -221,18 +221,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.38" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed93b9805f8ba930df42c2590f05453d5ec36cbb85d018868a5b24d31f6ac000" +checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.38" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "379026ff283facf611b0ea629334361c4211d1b12ee01024eec1591133b04120" +checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" dependencies = [ "anstream", "anstyle", @@ -243,15 +243,15 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "colorchoice" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "compare" @@ -483,7 +483,7 @@ dependencies = [ "fluent-syntax", "intl-memoizer", "intl_pluralrules", - "rustc-hash 2.1.1", + "rustc-hash", "self_cell", "smallvec", "unic-langid", @@ -532,7 +532,7 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi 0.11.1+wasi-snapshot-preview1", ] [[package]] @@ -589,6 +589,140 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collator" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ad4c6a556938dfd31f75a8c54141079e8821dc697ffb799cfe0f0fa11f2edc" +dependencies = [ + "displaydoc", + "icu_collator_data", + "icu_collections", + "icu_locale", + "icu_locale_core", + "icu_normalizer", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "zerovec", +] + +[[package]] +name = "icu_collator_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d880b8e680799eabd90c054e1b95526cd48db16c95269f3c89fb3117e1ac92c5" + +[[package]] +name = "icu_collections" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ae5921528335e91da1b6c695dbf1ec37df5ac13faa3f91e5640be93aa2fbefd" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locale_core", + "icu_locale_data", + "icu_provider", + "potential_utf", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locale_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fdef0c124749d06a743c69e938350816554eb63ac979166590e2b4ee4252765" + +[[package]] +name = "icu_normalizer" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" + +[[package]] +name = "icu_properties" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "potential_utf", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" + +[[package]] +name = "icu_provider" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +dependencies = [ + "displaydoc", + "icu_locale_core", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + [[package]] name = "intl-memoizer" version = "0.5.3" @@ -625,9 +759,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.11" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27e77966151130221b079bcec80f1f34a9e414fa489d99152a201c07fd2182bc" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -640,9 +774,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.11" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97265751f8a9a4228476f2fc17874a9e7e70e96b893368e42619880fe143b48a" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" dependencies = [ "proc-macro2", "quote", @@ -721,6 +855,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +[[package]] +name = "litemap" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" + [[package]] name = "log" version = "0.4.27" @@ -739,9 +879,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.4" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "nix" @@ -870,9 +1010,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "portable-atomic" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" [[package]] name = "portable-atomic-util" @@ -883,6 +1023,16 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "potential_utf" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +dependencies = [ + "serde", + "zerovec", +] + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1005,12 +1155,6 @@ dependencies = [ "trim-in-place", ] -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hash" version = "2.1.1" @@ -1032,9 +1176,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" [[package]] name = "self_cell" @@ -1117,9 +1261,15 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.15.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "strsim" @@ -1129,15 +1279,26 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.101" +version = "2.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tempfile" version = "3.20.0" @@ -1208,11 +1369,11 @@ checksum = "343e926fc669bc8cde4fa3129ab681c63671bae288b1f1081ceee6d9d37904fc" [[package]] name = "type-map" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deb68604048ff8fa93347f02441e4487594adc20bb8a084f9e564d2b827a0a9f" +checksum = "cb30dbbd9036155e74adad6812e9898d03ec374946234fbcebd5dfc7b9187b90" dependencies = [ - "rustc-hash 1.1.0", + "rustc-hash", ] [[package]] @@ -1247,9 +1408,21 @@ checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unicode-width" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "utf8parse" @@ -1287,7 +1460,7 @@ dependencies = [ "libc", "parse_datetime", "uucore", - "windows-sys 0.60.1", + "windows-sys 0.60.2", ] [[package]] @@ -1423,6 +1596,8 @@ dependencies = [ "fluent-syntax", "glob", "hex", + "icu_collator", + "icu_locale", "itertools", "libc", "md-5", @@ -1440,7 +1615,7 @@ dependencies = [ "uucore_procs", "wild", "winapi-util", - "windows-sys 0.60.1", + "windows-sys 0.60.2", "z85", ] @@ -1500,9 +1675,9 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasi" @@ -1626,9 +1801,9 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.1.1" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" [[package]] name = "windows-result" @@ -1659,11 +1834,11 @@ dependencies = [ [[package]] name = "windows-sys" -version = "0.60.1" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b36e9ed89376c545e20cbf5a13c306b49106b21b9d1d4f9cb9a1cb6b1e9ee06a" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.53.1", + "windows-targets 0.53.2", ] [[package]] @@ -1684,9 +1859,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.53.1" +version = "0.53.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30357ec391cde730f8fbfcdc29adc47518b06504528df977ab5af02ef23fdee9" +checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" dependencies = [ "windows_aarch64_gnullvm 0.53.0", "windows_aarch64_msvc 0.53.0", @@ -1803,6 +1978,36 @@ dependencies = [ "bitflags", ] +[[package]] +name = "writeable" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" + +[[package]] +name = "yoke" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "z85" version = "3.0.6" @@ -1834,6 +2039,32 @@ name = "zerofrom" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] [[package]] name = "zerovec" @@ -1841,5 +2072,18 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" dependencies = [ + "yoke", "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 41cd92010..9a9e35e00 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -61,7 +61,9 @@ use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR}; use uucore::libc::{dev_t, major, minor}; use uucore::line_ending::LineEnding; use uucore::locale::{get_message, get_message_with_args}; -use uucore::quoting_style::{self, QuotingStyle, escape_name}; +use uucore::quoting_style::{ + self, QuotingStyle, locale_aware_escape_dir_name, locale_aware_escape_name, +}; use uucore::{ display::Quotable, error::{UError, UResult, set_exit_code}, @@ -2008,7 +2010,7 @@ fn show_dir_name( config: &Config, ) -> std::io::Result<()> { let escaped_name = - quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style); + locale_aware_escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style); let name = if config.hyperlink && !config.dired { create_hyperlink(&escaped_name, path_data) @@ -2509,7 +2511,7 @@ fn display_items( // option, print the security context to the left of the size column. let quoted = items.iter().any(|item| { - let name = escape_name(&item.display_name, &config.quoting_style); + let name = locale_aware_escape_name(&item.display_name, &config.quoting_style); os_str_starts_with(&name, b"'") }); @@ -3152,7 +3154,7 @@ fn classify_file(path: &PathData, out: &mut BufWriter) -> Option { /// Takes a [`PathData`] struct and returns a cell with a name ready for displaying. /// /// This function relies on the following parameters in the provided `&Config`: -/// * `config.quoting_style` to decide how we will escape `name` using [`escape_name`]. +/// * `config.quoting_style` to decide how we will escape `name` using [`locale_aware_escape_name`]. /// * `config.inode` decides whether to display inode numbers beside names using [`get_inode`]. /// * `config.color` decides whether it's going to color `name` using [`color_name`]. /// * `config.indicator_style` to append specific characters to `name` using [`classify_file`]. @@ -3173,7 +3175,7 @@ fn display_item_name( current_column: LazyCell usize + '_>>, ) -> OsString { // This is our return value. We start by `&path.display_name` and modify it along the way. - let mut name = escape_name(&path.display_name, &config.quoting_style); + let mut name = locale_aware_escape_name(&path.display_name, &config.quoting_style); let is_wrap = |namelen: usize| config.width != 0 && *current_column + namelen > config.width.into(); @@ -3265,7 +3267,7 @@ fn display_item_name( name.push(path.p_buf.read_link().unwrap()); } else { name.push(color_name( - escape_name(target.as_os_str(), &config.quoting_style), + locale_aware_escape_name(target.as_os_str(), &config.quoting_style), path, style_manager, &mut state.out, @@ -3276,7 +3278,10 @@ fn display_item_name( } else { // If no coloring is required, we just use target as is. // Apply the right quoting - name.push(escape_name(target.as_os_str(), &config.quoting_style)); + name.push(locale_aware_escape_name( + target.as_os_str(), + &config.quoting_style, + )); } } Err(err) => { diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 65a9c6fe1..c00b09bbf 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -259,7 +259,9 @@ impl<'a> Input<'a> { Self::Path(path) => { let path = path.as_os_str(); if path.to_string_lossy().contains('\n') { - Some(Cow::Owned(quoting_style::escape_name(path, QS_ESCAPE))) + Some(Cow::Owned(quoting_style::locale_aware_escape_name( + path, QS_ESCAPE, + ))) } else { Some(Cow::Borrowed(path)) } @@ -759,7 +761,7 @@ fn files0_iter_file<'a>(path: &Path) -> UResult( } fn escape_name_wrapper(name: &OsStr) -> String { - quoting_style::escape_name(name, QS_ESCAPE) + quoting_style::locale_aware_escape_name(name, QS_ESCAPE) .into_string() .expect("All escaped names with the escaping option return valid strings.") } diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 7101cca70..5dc64b2b8 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -27,6 +27,10 @@ dns-lookup = { workspace = true, optional = true } dunce = { version = "1.0.4", optional = true } wild = "2.2.1" glob = { workspace = true, optional = true } +icu_collator = { workspace = true, optional = true, features = [ + "compiled_data", +] } +icu_locale = { workspace = true, optional = true, features = ["compiled_data"] } itertools = { workspace = true, optional = true } time = { workspace = true, optional = true, features = [ "formatting", @@ -106,6 +110,7 @@ format = [ "num-traits", "quoting-style", ] +i18n = ["icu_collator", "icu_locale"] mode = ["libc"] perms = ["entries", "libc", "walkdir"] buf-copy = [] @@ -113,7 +118,7 @@ parser = ["extendedbigdecimal", "glob", "num-traits"] pipes = [] process = ["libc"] proc-info = ["tty", "walkdir"] -quoting-style = [] +quoting-style = ["i18n"] ranges = [] ringbuffer = [] selinux = ["dep:selinux"] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index 44db53071..fcc97b0f0 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -26,6 +26,8 @@ pub mod format; pub mod fs; #[cfg(feature = "fsext")] pub mod fsext; +#[cfg(feature = "i18n")] +pub mod i18n; #[cfg(feature = "lines")] pub mod lines; #[cfg(feature = "parser")] diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index f3edbae55..349527db7 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -8,7 +8,7 @@ use crate::format::spec::ArgumentLocation; use crate::{ error::set_exit_code, parser::num_parser::{ExtendedParser, ExtendedParserError}, - quoting_style::{Quotes, QuotingStyle, escape_name}, + quoting_style::{Quotes, QuotingStyle, locale_aware_escape_name}, show_error, show_warning, }; use os_display::Quotable; @@ -153,7 +153,7 @@ fn extract_value(p: Result>, input: &s Ok(v) => v, Err(e) => { set_exit_code(1); - let input = escape_name( + let input = locale_aware_escape_name( OsStr::new(input), &QuotingStyle::C { quotes: Quotes::None, diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index d22626590..3cffc08bc 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -5,7 +5,7 @@ // spell-checker:ignore (vars) intmax ptrdiff padlen -use crate::quoting_style::{QuotingStyle, escape_name}; +use crate::quoting_style::{QuotingStyle, locale_aware_escape_name}; use super::{ ExtendedBigDecimal, FormatChar, FormatError, OctalParsing, @@ -402,7 +402,7 @@ impl Spec { writer.write_all(&parsed).map_err(FormatError::IoError) } Self::QuotedString { position } => { - let s = escape_name( + let s = locale_aware_escape_name( args.next_string(position).as_ref(), &QuotingStyle::Shell { escape: true, diff --git a/src/uucore/src/lib/features/i18n/mod.rs b/src/uucore/src/lib/features/i18n/mod.rs new file mode 100644 index 000000000..5a7cf8ea3 --- /dev/null +++ b/src/uucore/src/lib/features/i18n/mod.rs @@ -0,0 +1,62 @@ +use std::sync::OnceLock; + +use icu_locale::{Locale, locale}; + +/// The encoding specified by the locale, if specified +/// Currently only supports ASCII and UTF-8 for the sake of simplicity. +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum UEncoding { + Ascii, + Utf8, +} + +const DEFAULT_LOCALE: Locale = locale!("en-US-posix"); + +/// Deduce the locale from the current environment +fn get_collating_locale() -> &'static (Locale, UEncoding) { + static COLLATING_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new(); + + COLLATING_LOCALE.get_or_init(|| { + // Look at 3 environment variables in the following order + // + // 1. LC_ALL + // 2. LC_COLLATE + // 3. LANG + // + // Or fallback on Posix locale, with ASCII encoding. + + let locale_var = std::env::var("LC_ALL") + .or_else(|_| std::env::var("LC_COLLATE")) + .or_else(|_| std::env::var("LANG")); + + if let Ok(locale_var_str) = locale_var { + let mut split = locale_var_str.split(&['.', '@']); + + if let Some(simple) = split.next() { + let bcp47 = simple.replace("_", "-"); + let locale = Locale::try_from_str(&bcp47).unwrap_or(DEFAULT_LOCALE); + + // If locale parsing failed, parse the encoding part of the + // locale. Treat the special case of the given locale being "C" + // which becomes the default locale. + let encoding = if (locale != DEFAULT_LOCALE || bcp47 == "C") + && split.next() == Some("UTF-8") + { + UEncoding::Utf8 + } else { + UEncoding::Ascii + }; + return (locale, encoding); + } else { + return (DEFAULT_LOCALE, UEncoding::Ascii); + }; + } + // Default POSIX locale representing LC_ALL=C + (DEFAULT_LOCALE, UEncoding::Ascii) + }) +} + +/// Return the encoding deduced from the locale environment variable. +pub fn get_locale_encoding() -> UEncoding { + get_collating_locale().1 +} diff --git a/src/uucore/src/lib/features/quoting_style/c_quoter.rs b/src/uucore/src/lib/features/quoting_style/c_quoter.rs new file mode 100644 index 000000000..47a215719 --- /dev/null +++ b/src/uucore/src/lib/features/quoting_style/c_quoter.rs @@ -0,0 +1,57 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use super::{EscapedChar, Quoter, Quotes}; + +pub(super) struct CQuoter { + /// The type of quotes to use. + quotes: Quotes, + + dirname: bool, + + buffer: Vec, +} + +impl CQuoter { + pub fn new(quotes: Quotes, dirname: bool, size_hint: usize) -> Self { + let mut buffer = Vec::with_capacity(size_hint); + match quotes { + Quotes::None => (), + Quotes::Single => buffer.push(b'\''), + Quotes::Double => buffer.push(b'"'), + } + + Self { + quotes, + dirname, + buffer, + } + } +} + +impl Quoter for CQuoter { + fn push_char(&mut self, input: char) { + let escaped: String = EscapedChar::new_c(input, self.quotes, self.dirname) + .hide_control() + .collect(); + self.buffer.extend_from_slice(escaped.as_bytes()); + } + + fn push_invalid(&mut self, input: &[u8]) { + for b in input { + let escaped: String = EscapedChar::new_octal(*b).hide_control().collect(); + self.buffer.extend_from_slice(escaped.as_bytes()); + } + } + + fn finalize(mut self: Box) -> Vec { + match self.quotes { + Quotes::None => (), + Quotes::Single => self.buffer.push(b'\''), + Quotes::Double => self.buffer.push(b'"'), + } + self.buffer + } +} diff --git a/src/uucore/src/lib/features/quoting_style/escaped_char.rs b/src/uucore/src/lib/features/quoting_style/escaped_char.rs new file mode 100644 index 000000000..e9a14ca73 --- /dev/null +++ b/src/uucore/src/lib/features/quoting_style/escaped_char.rs @@ -0,0 +1,201 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::char::from_digit; + +use super::Quotes; + +// PR#6559 : Remove `]{}` from special shell chars. +const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! "; + +// This implementation is heavily inspired by the std::char::EscapeDefault implementation +// in the Rust standard library. This custom implementation is needed because the +// characters \a, \b, \e, \f & \v are not recognized by Rust. +pub struct EscapedChar { + pub state: EscapeState, +} + +pub enum EscapeState { + Done, + Char(char), + Backslash(char), + ForceQuote(char), + Octal(EscapeOctal), +} + +/// Bytes we need to present as escaped octal, in the form of `\nnn` per byte. +/// Only supports characters up to 2 bytes long in UTF-8. +pub struct EscapeOctal { + c: [u8; 2], + state: EscapeOctalState, + idx: u8, +} + +enum EscapeOctalState { + Done, + FirstBackslash, + FirstValue, + LastBackslash, + LastValue, +} + +fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 { + (byte >> (idx * 3)) & 0o7 +} + +impl Iterator for EscapeOctal { + type Item = char; + + fn next(&mut self) -> Option { + match self.state { + EscapeOctalState::Done => None, + EscapeOctalState::FirstBackslash => { + self.state = EscapeOctalState::FirstValue; + Some('\\') + } + EscapeOctalState::LastBackslash => { + self.state = EscapeOctalState::LastValue; + Some('\\') + } + EscapeOctalState::FirstValue => { + let octal_digit = byte_to_octal_digit(self.c[0], self.idx); + if self.idx == 0 { + self.state = EscapeOctalState::LastBackslash; + self.idx = 2; + } else { + self.idx -= 1; + } + Some(from_digit(octal_digit.into(), 8).unwrap()) + } + EscapeOctalState::LastValue => { + let octal_digit = byte_to_octal_digit(self.c[1], self.idx); + if self.idx == 0 { + self.state = EscapeOctalState::Done; + } else { + self.idx -= 1; + } + Some(from_digit(octal_digit.into(), 8).unwrap()) + } + } + } +} + +impl EscapeOctal { + fn from_char(c: char) -> Self { + if c.len_utf8() == 1 { + return Self::from_byte(c as u8); + } + + let mut buf = [0; 2]; + let _s = c.encode_utf8(&mut buf); + Self { + c: buf, + idx: 2, + state: EscapeOctalState::FirstBackslash, + } + } + + fn from_byte(b: u8) -> Self { + Self { + c: [0, b], + idx: 2, + state: EscapeOctalState::LastBackslash, + } + } +} + +impl EscapedChar { + pub fn new_literal(c: char) -> Self { + Self { + state: EscapeState::Char(c), + } + } + + pub fn new_octal(b: u8) -> Self { + Self { + state: EscapeState::Octal(EscapeOctal::from_byte(b)), + } + } + + pub fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self { + use EscapeState::*; + let init_state = match c { + '\x07' => Backslash('a'), + '\x08' => Backslash('b'), + '\t' => Backslash('t'), + '\n' => Backslash('n'), + '\x0B' => Backslash('v'), + '\x0C' => Backslash('f'), + '\r' => Backslash('r'), + '\\' => Backslash('\\'), + '\'' => match quotes { + Quotes::Single => Backslash('\''), + _ => Char('\''), + }, + '"' => match quotes { + Quotes::Double => Backslash('"'), + _ => Char('"'), + }, + ' ' if !dirname => match quotes { + Quotes::None => Backslash(' '), + _ => Char(' '), + }, + ':' if dirname => Backslash(':'), + _ if c.is_control() => Octal(EscapeOctal::from_char(c)), + _ => Char(c), + }; + Self { state: init_state } + } + + pub fn new_shell(c: char, escape: bool, quotes: Quotes) -> Self { + use EscapeState::*; + let init_state = match c { + _ if !escape && c.is_control() => Char(c), + '\x07' => Backslash('a'), + '\x08' => Backslash('b'), + '\t' => Backslash('t'), + '\n' => Backslash('n'), + '\x0B' => Backslash('v'), + '\x0C' => Backslash('f'), + '\r' => Backslash('r'), + '\'' => match quotes { + Quotes::Single => Backslash('\''), + _ => Char('\''), + }, + _ if c.is_control() => Octal(EscapeOctal::from_char(c)), + _ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c), + _ => Char(c), + }; + Self { state: init_state } + } + + pub fn hide_control(self) -> Self { + match self.state { + EscapeState::Char(c) if c.is_control() => Self { + state: EscapeState::Char('?'), + }, + _ => self, + } + } +} + +impl Iterator for EscapedChar { + type Item = char; + + fn next(&mut self) -> Option { + match self.state { + EscapeState::Backslash(c) => { + self.state = EscapeState::Char(c); + Some('\\') + } + EscapeState::Char(c) | EscapeState::ForceQuote(c) => { + self.state = EscapeState::Done; + Some(c) + } + EscapeState::Done => None, + EscapeState::Octal(ref mut iter) => iter.next(), + } + } +} diff --git a/src/uucore/src/lib/features/quoting_style/literal_quoter.rs b/src/uucore/src/lib/features/quoting_style/literal_quoter.rs new file mode 100644 index 000000000..555bbf890 --- /dev/null +++ b/src/uucore/src/lib/features/quoting_style/literal_quoter.rs @@ -0,0 +1,31 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use super::{EscapedChar, Quoter}; + +pub(super) struct LiteralQuoter(Vec); + +impl LiteralQuoter { + pub fn new(size_hint: usize) -> Self { + Self(Vec::with_capacity(size_hint)) + } +} + +impl Quoter for LiteralQuoter { + fn push_char(&mut self, input: char) { + let escaped = EscapedChar::new_literal(input) + .hide_control() + .collect::(); + self.0.extend(escaped.as_bytes()); + } + + fn push_invalid(&mut self, input: &[u8]) { + self.0.extend(std::iter::repeat_n(b'?', input.len())); + } + + fn finalize(self: Box) -> Vec { + self.0 + } +} diff --git a/src/uucore/src/lib/features/quoting_style.rs b/src/uucore/src/lib/features/quoting_style/mod.rs similarity index 67% rename from src/uucore/src/lib/features/quoting_style.rs rename to src/uucore/src/lib/features/quoting_style/mod.rs index d9dcd078b..e5edc8fe9 100644 --- a/src/uucore/src/lib/features/quoting_style.rs +++ b/src/uucore/src/lib/features/quoting_style/mod.rs @@ -5,15 +5,20 @@ //! Set of functions for escaping names according to different quoting styles. -use std::char::from_digit; use std::ffi::{OsStr, OsString}; use std::fmt; -// These are characters with special meaning in the shell (e.g. bash). -// The first const contains characters that only have a special meaning when they appear at the beginning of a name. -const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#"; -// PR#6559 : Remove `]{}` from special shell chars. -const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! "; +use crate::i18n::{self, UEncoding}; +use crate::quoting_style::c_quoter::CQuoter; +use crate::quoting_style::literal_quoter::LiteralQuoter; +use crate::quoting_style::shell_quoter::{EscapedShellQuoter, NonEscapedShellQuoter}; + +mod escaped_char; +pub use escaped_char::{EscapeState, EscapedChar}; + +mod c_quoter; +mod literal_quoter; +mod shell_quoter; /// The quoting style to use when escaping a name. #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -47,6 +52,26 @@ pub enum QuotingStyle { }, } +/// Common interface of quoting mechanisms. +trait Quoter { + /// Push a valid character. + fn push_char(&mut self, input: char); + + /// Push a sequence of valid characters. + fn push_str(&mut self, input: &str) { + for c in input.chars() { + self.push_char(c); + } + } + + /// Push a continuous slice of invalid data wrt the encoding used to + /// decode the stream. + fn push_invalid(&mut self, input: &[u8]); + + /// Apply post-processing on the constructed buffer and return it. + fn finalize(self: Box) -> Vec; +} + /// The type of quotes to use when escaping a name as a C string. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Quotes { @@ -61,419 +86,94 @@ pub enum Quotes { // TODO: Locale } -// This implementation is heavily inspired by the std::char::EscapeDefault implementation -// in the Rust standard library. This custom implementation is needed because the -// characters \a, \b, \e, \f & \v are not recognized by Rust. -struct EscapedChar { - state: EscapeState, -} - -enum EscapeState { - Done, - Char(char), - Backslash(char), - ForceQuote(char), - Octal(EscapeOctal), -} - -/// Bytes we need to present as escaped octal, in the form of `\nnn` per byte. -/// Only supports characters up to 2 bytes long in UTF-8. -struct EscapeOctal { - c: [u8; 2], - state: EscapeOctalState, - idx: u8, -} - -enum EscapeOctalState { - Done, - FirstBackslash, - FirstValue, - LastBackslash, - LastValue, -} - -fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 { - (byte >> (idx * 3)) & 0o7 -} - -impl Iterator for EscapeOctal { - type Item = char; - - fn next(&mut self) -> Option { - match self.state { - EscapeOctalState::Done => None, - EscapeOctalState::FirstBackslash => { - self.state = EscapeOctalState::FirstValue; - Some('\\') - } - EscapeOctalState::LastBackslash => { - self.state = EscapeOctalState::LastValue; - Some('\\') - } - EscapeOctalState::FirstValue => { - let octal_digit = byte_to_octal_digit(self.c[0], self.idx); - if self.idx == 0 { - self.state = EscapeOctalState::LastBackslash; - self.idx = 2; - } else { - self.idx -= 1; - } - Some(from_digit(octal_digit.into(), 8).unwrap()) - } - EscapeOctalState::LastValue => { - let octal_digit = byte_to_octal_digit(self.c[1], self.idx); - if self.idx == 0 { - self.state = EscapeOctalState::Done; - } else { - self.idx -= 1; - } - Some(from_digit(octal_digit.into(), 8).unwrap()) - } - } - } -} - -impl EscapeOctal { - fn from_char(c: char) -> Self { - if c.len_utf8() == 1 { - return Self::from_byte(c as u8); - } - - let mut buf = [0; 2]; - let _s = c.encode_utf8(&mut buf); - Self { - c: buf, - idx: 2, - state: EscapeOctalState::FirstBackslash, - } - } - - fn from_byte(b: u8) -> Self { - Self { - c: [0, b], - idx: 2, - state: EscapeOctalState::LastBackslash, - } - } -} - -impl EscapedChar { - fn new_literal(c: char) -> Self { - Self { - state: EscapeState::Char(c), - } - } - - fn new_octal(b: u8) -> Self { - Self { - state: EscapeState::Octal(EscapeOctal::from_byte(b)), - } - } - - fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self { - use EscapeState::*; - let init_state = match c { - '\x07' => Backslash('a'), - '\x08' => Backslash('b'), - '\t' => Backslash('t'), - '\n' => Backslash('n'), - '\x0B' => Backslash('v'), - '\x0C' => Backslash('f'), - '\r' => Backslash('r'), - '\\' => Backslash('\\'), - '\'' => match quotes { - Quotes::Single => Backslash('\''), - _ => Char('\''), - }, - '"' => match quotes { - Quotes::Double => Backslash('"'), - _ => Char('"'), - }, - ' ' if !dirname => match quotes { - Quotes::None => Backslash(' '), - _ => Char(' '), - }, - ':' if dirname => Backslash(':'), - _ if c.is_control() => Octal(EscapeOctal::from_char(c)), - _ => Char(c), - }; - Self { state: init_state } - } - - fn new_shell(c: char, escape: bool, quotes: Quotes) -> Self { - use EscapeState::*; - let init_state = match c { - _ if !escape && c.is_control() => Char(c), - '\x07' => Backslash('a'), - '\x08' => Backslash('b'), - '\t' => Backslash('t'), - '\n' => Backslash('n'), - '\x0B' => Backslash('v'), - '\x0C' => Backslash('f'), - '\r' => Backslash('r'), - '\'' => match quotes { - Quotes::Single => Backslash('\''), - _ => Char('\''), - }, - _ if c.is_control() => Octal(EscapeOctal::from_char(c)), - _ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c), - _ => Char(c), - }; - Self { state: init_state } - } - - fn hide_control(self) -> Self { - match self.state { - EscapeState::Char(c) if c.is_control() => Self { - state: EscapeState::Char('?'), - }, - _ => self, - } - } -} - -impl Iterator for EscapedChar { - type Item = char; - - fn next(&mut self) -> Option { - match self.state { - EscapeState::Backslash(c) => { - self.state = EscapeState::Char(c); - Some('\\') - } - EscapeState::Char(c) | EscapeState::ForceQuote(c) => { - self.state = EscapeState::Done; - Some(c) - } - EscapeState::Done => None, - EscapeState::Octal(ref mut iter) => iter.next(), - } - } -} - -/// Check whether `bytes` starts with any byte in `pattern`. -fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool { - !bytes.is_empty() && pattern.contains(&bytes[0]) -} - -fn shell_without_escape(name: &[u8], quotes: Quotes, show_control_chars: bool) -> (Vec, bool) { - let mut must_quote = false; - let mut escaped_str = Vec::with_capacity(name.len()); - let mut utf8_buf = vec![0; 4]; - - for s in name.utf8_chunks() { - for c in s.valid().chars() { - let escaped = { - let ec = EscapedChar::new_shell(c, false, quotes); - if show_control_chars { - ec - } else { - ec.hide_control() - } - }; - - match escaped.state { - EscapeState::Backslash('\'') => escaped_str.extend_from_slice(b"'\\''"), - EscapeState::ForceQuote(x) => { - must_quote = true; - escaped_str.extend_from_slice(x.encode_utf8(&mut utf8_buf).as_bytes()); - } - _ => { - for c in escaped { - escaped_str.extend_from_slice(c.encode_utf8(&mut utf8_buf).as_bytes()); - } - } - } - } - - if show_control_chars { - escaped_str.extend_from_slice(s.invalid()); - } else { - escaped_str.resize(escaped_str.len() + s.invalid().len(), b'?'); - } - } - - must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START); - (escaped_str, must_quote) -} - -fn shell_with_escape(name: &[u8], quotes: Quotes) -> (Vec, bool) { - // We need to keep track of whether we are in a dollar expression - // because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n' - let mut in_dollar = false; - let mut must_quote = false; - let mut escaped_str = String::with_capacity(name.len()); - - for s in name.utf8_chunks() { - for c in s.valid().chars() { - let escaped = EscapedChar::new_shell(c, true, quotes); - match escaped.state { - EscapeState::Char(x) => { - if in_dollar { - escaped_str.push_str("''"); - in_dollar = false; - } - escaped_str.push(x); - } - EscapeState::ForceQuote(x) => { - if in_dollar { - escaped_str.push_str("''"); - in_dollar = false; - } - must_quote = true; - escaped_str.push(x); - } - // Single quotes are not put in dollar expressions, but are escaped - // if the string also contains double quotes. In that case, they must - // be handled separately. - EscapeState::Backslash('\'') => { - must_quote = true; - in_dollar = false; - escaped_str.push_str("'\\''"); - } - _ => { - if !in_dollar { - escaped_str.push_str("'$'"); - in_dollar = true; - } - must_quote = true; - for char in escaped { - escaped_str.push(char); - } - } - } - } - if !s.invalid().is_empty() { - if !in_dollar { - escaped_str.push_str("'$'"); - in_dollar = true; - } - must_quote = true; - let escaped_bytes: String = s - .invalid() - .iter() - .flat_map(|b| EscapedChar::new_octal(*b)) - .collect(); - escaped_str.push_str(&escaped_bytes); - } - } - must_quote = must_quote || bytes_start_with(name, SPECIAL_SHELL_CHARS_START); - (escaped_str.into(), must_quote) -} - -/// Return a set of characters that implies quoting of the word in -/// shell-quoting mode. -fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] { - const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r="; - // the ':' colon character only induce quoting in the - // context of ls displaying a directory name before listing its content. - // (e.g. with the recursive flag -R) - let start_index = if is_dirname { 0 } else { 1 }; - &ESCAPED_CHARS[start_index..] -} - /// Escape a name according to the given quoting style. /// /// This inner function provides an additional flag `dirname` which /// is meant for ls' directory name display. -fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec { - match style { - QuotingStyle::Literal { show_control } => { - if *show_control { - name.to_owned() - } else { - name.utf8_chunks() - .map(|s| { - let valid: String = s - .valid() - .chars() - .flat_map(|c| EscapedChar::new_literal(c).hide_control()) - .collect(); - let invalid = "?".repeat(s.invalid().len()); - valid + &invalid - }) - .collect::() - .into() - } - } - QuotingStyle::C { quotes } => { - let escaped_str: String = name - .utf8_chunks() - .flat_map(|s| { - let valid = s - .valid() - .chars() - .flat_map(|c| EscapedChar::new_c(c, *quotes, dirname)); - let invalid = s.invalid().iter().flat_map(|b| EscapedChar::new_octal(*b)); - valid.chain(invalid) - }) - .collect::(); +fn escape_name_inner( + name: &[u8], + style: &QuotingStyle, + dirname: bool, + encoding: UEncoding, +) -> Vec { + // Early handle Literal with show_control style + if let QuotingStyle::Literal { show_control: true } = style { + return name.to_owned(); + } - match quotes { - Quotes::Single => format!("'{escaped_str}'"), - Quotes::Double => format!("\"{escaped_str}\""), - Quotes::None => escaped_str, - } - .into() - } + let mut quoter: Box = match style { + QuotingStyle::Literal { .. } => Box::new(LiteralQuoter::new(name.len())), + QuotingStyle::C { quotes } => Box::new(CQuoter::new(*quotes, dirname, name.len())), QuotingStyle::Shell { - escape, + escape: true, + always_quote, + .. + } => Box::new(EscapedShellQuoter::new( + name, + *always_quote, + dirname, + name.len(), + )), + QuotingStyle::Shell { + escape: false, always_quote, show_control, - } => { - let (quotes, must_quote) = if name - .iter() - .any(|c| shell_escaped_char_set(dirname).contains(c)) - { - (Quotes::Single, true) - } else if name.contains(&b'\'') { - (Quotes::Double, true) - } else if *always_quote || name.is_empty() { - (Quotes::Single, true) - } else { - (Quotes::Single, false) - }; + } => Box::new(NonEscapedShellQuoter::new( + name, + *show_control, + *always_quote, + dirname, + name.len(), + )), + }; - let (escaped_str, contains_quote_chars) = if *escape { - shell_with_escape(name, quotes) - } else { - shell_without_escape(name, quotes, *show_control) - }; - - if must_quote | contains_quote_chars && quotes != Quotes::None { - let mut quoted_str = Vec::::with_capacity(escaped_str.len() + 2); - let quote = if quotes == Quotes::Single { - b'\'' + match encoding { + UEncoding::Ascii => { + for b in name { + if b.is_ascii() { + quoter.push_char(*b as char); } else { - b'"' - }; - quoted_str.push(quote); - quoted_str.extend(escaped_str); - quoted_str.push(quote); - quoted_str - } else { - escaped_str + quoter.push_invalid(&[*b]); + } + } + } + UEncoding::Utf8 => { + for chunk in name.utf8_chunks() { + quoter.push_str(chunk.valid()); + quoter.push_invalid(chunk.invalid()); } } } + + quoter.finalize() } /// Escape a filename with respect to the given style. -pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString { +pub fn escape_name(name: &OsStr, style: &QuotingStyle, encoding: UEncoding) -> OsString { let name = crate::os_str_as_bytes_lossy(name); - crate::os_string_from_vec(escape_name_inner(&name, style, false)) + crate::os_string_from_vec(escape_name_inner(&name, style, false, encoding)) .expect("all byte sequences should be valid for platform, or already replaced in name") } +/// Retrieve the encoding from the locale and pass it to `escape_name`. +pub fn locale_aware_escape_name(name: &OsStr, style: &QuotingStyle) -> OsString { + escape_name(name, style, i18n::get_locale_encoding()) +} + /// Escape a directory name with respect to the given style. /// This is mainly meant to be used for ls' directory name printing and is not /// likely to be used elsewhere. -pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString { +pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle, encoding: UEncoding) -> OsString { let name = crate::os_str_as_bytes_lossy(dir_name); - crate::os_string_from_vec(escape_name_inner(&name, style, true)) + crate::os_string_from_vec(escape_name_inner(&name, style, true, encoding)) .expect("all byte sequences should be valid for platform, or already replaced in name") } +/// Retrieve the encoding from the locale and pass it to `escape_dir_name`. +pub fn locale_aware_escape_dir_name(name: &OsStr, style: &QuotingStyle) -> OsString { + escape_dir_name(name, style, i18n::get_locale_encoding()) +} + impl fmt::Display for QuotingStyle { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { @@ -512,7 +212,10 @@ impl fmt::Display for Quotes { #[cfg(test)] mod tests { - use crate::quoting_style::{Quotes, QuotingStyle, escape_name_inner}; + use crate::{ + i18n::UEncoding, + quoting_style::{Quotes, QuotingStyle, escape_name_inner}, + }; // spell-checker:ignore (tests/words) one\'two one'two @@ -562,18 +265,18 @@ mod tests { } } - fn check_names_inner(name: &[u8], map: &[(T, &str)]) -> Vec> { + fn check_names_inner(encoding: UEncoding, name: &[u8], map: &[(T, &str)]) -> Vec> { map.iter() - .map(|(_, style)| escape_name_inner(name, &get_style(style), false)) + .map(|(_, style)| escape_name_inner(name, &get_style(style), false, encoding)) .collect() } - fn check_names(name: &str, map: &[(&str, &str)]) { + fn check_names_encoding(encoding: UEncoding, name: &str, map: &[(&str, &str)]) { assert_eq!( map.iter() .map(|(correct, _)| *correct) .collect::>(), - check_names_inner(name.as_bytes(), map) + check_names_inner(encoding, name.as_bytes(), map) .iter() .map(|bytes| std::str::from_utf8(bytes) .expect("valid str goes in, valid str comes out")) @@ -581,18 +284,28 @@ mod tests { ); } - fn check_names_raw(name: &[u8], map: &[(&[u8], &str)]) { + fn check_names_both(name: &str, map: &[(&str, &str)]) { + check_names_encoding(UEncoding::Utf8, name, map); + check_names_encoding(UEncoding::Ascii, name, map); + } + + fn check_names_encoding_raw(encoding: UEncoding, name: &[u8], map: &[(&[u8], &str)]) { assert_eq!( map.iter() .map(|(correct, _)| *correct) .collect::>(), - check_names_inner(name, map) + check_names_inner(encoding, name, map) ); } + fn check_names_raw_both(name: &[u8], map: &[(&[u8], &str)]) { + check_names_encoding_raw(UEncoding::Utf8, name, map); + check_names_encoding_raw(UEncoding::Ascii, name, map); + } + #[test] fn test_simple_names() { - check_names( + check_names_both( "one_two", &[ ("one_two", "literal"), @@ -611,7 +324,7 @@ mod tests { #[test] fn test_empty_string() { - check_names( + check_names_both( "", &[ ("", "literal"), @@ -630,7 +343,7 @@ mod tests { #[test] fn test_spaces() { - check_names( + check_names_both( "one two", &[ ("one two", "literal"), @@ -646,7 +359,7 @@ mod tests { ], ); - check_names( + check_names_both( " one", &[ (" one", "literal"), @@ -666,7 +379,7 @@ mod tests { #[test] fn test_quotes() { // One double quote - check_names( + check_names_both( "one\"two", &[ ("one\"two", "literal"), @@ -683,7 +396,7 @@ mod tests { ); // One single quote - check_names( + check_names_both( "one'two", &[ ("one'two", "literal"), @@ -700,7 +413,7 @@ mod tests { ); // One single quote and one double quote - check_names( + check_names_both( "one'two\"three", &[ ("one'two\"three", "literal"), @@ -717,7 +430,7 @@ mod tests { ); // Consecutive quotes - check_names( + check_names_both( "one''two\"\"three", &[ ("one''two\"\"three", "literal"), @@ -737,7 +450,7 @@ mod tests { #[test] fn test_control_chars() { // A simple newline - check_names( + check_names_both( "one\ntwo", &[ ("one?two", "literal"), @@ -754,7 +467,7 @@ mod tests { ); // A control character followed by a special shell character - check_names( + check_names_both( "one\n&two", &[ ("one?&two", "literal"), @@ -772,7 +485,7 @@ mod tests { // The first 16 ASCII control characters. NUL is also included, even though it is of // no importance for file names. - check_names( + check_names_both( "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", &[ ("????????????????", "literal"), @@ -810,7 +523,7 @@ mod tests { ); // The last 16 ASCII control characters. - check_names( + check_names_both( "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", &[ ("????????????????", "literal"), @@ -848,7 +561,7 @@ mod tests { ); // DEL - check_names( + check_names_both( "\x7F", &[ ("?", "literal"), @@ -866,10 +579,9 @@ mod tests { // The first 16 Unicode control characters. let test_str = std::str::from_utf8(b"\xC2\x80\xC2\x81\xC2\x82\xC2\x83\xC2\x84\xC2\x85\xC2\x86\xC2\x87\xC2\x88\xC2\x89\xC2\x8A\xC2\x8B\xC2\x8C\xC2\x8D\xC2\x8E\xC2\x8F").unwrap(); - check_names( + check_names_both( test_str, &[ - ("????????????????", "literal"), (test_str, "literal-show"), ( "\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217", @@ -879,9 +591,7 @@ mod tests { "\"\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217\"", "c", ), - ("????????????????", "shell"), (test_str, "shell-show"), - ("'????????????????'", "shell-always"), (&format!("'{test_str}'"), "shell-always-show"), ( "''$'\\302\\200\\302\\201\\302\\202\\302\\203\\302\\204\\302\\205\\302\\206\\302\\207\\302\\210\\302\\211\\302\\212\\302\\213\\302\\214\\302\\215\\302\\216\\302\\217'", @@ -893,13 +603,31 @@ mod tests { ), ], ); - - // The last 16 Unicode control characters. - let test_str = std::str::from_utf8(b"\xC2\x90\xC2\x91\xC2\x92\xC2\x93\xC2\x94\xC2\x95\xC2\x96\xC2\x97\xC2\x98\xC2\x99\xC2\x9A\xC2\x9B\xC2\x9C\xC2\x9D\xC2\x9E\xC2\x9F").unwrap(); - check_names( + // Different expected output for UTF-8 and ASCII in these cases. + check_names_encoding( + UEncoding::Utf8, test_str, &[ ("????????????????", "literal"), + ("????????????????", "shell"), + ("'????????????????'", "shell-always"), + ], + ); + check_names_encoding( + UEncoding::Ascii, + test_str, + &[ + ("????????????????????????????????", "literal"), + ("????????????????????????????????", "shell"), + ("'????????????????????????????????'", "shell-always"), + ], + ); + + // The last 16 Unicode control characters. + let test_str = std::str::from_utf8(b"\xC2\x90\xC2\x91\xC2\x92\xC2\x93\xC2\x94\xC2\x95\xC2\x96\xC2\x97\xC2\x98\xC2\x99\xC2\x9A\xC2\x9B\xC2\x9C\xC2\x9D\xC2\x9E\xC2\x9F").unwrap(); + check_names_both( + test_str, + &[ (test_str, "literal-show"), ( "\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237", @@ -909,9 +637,7 @@ mod tests { "\"\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237\"", "c", ), - ("????????????????", "shell"), (test_str, "shell-show"), - ("'????????????????'", "shell-always"), (&format!("'{test_str}'"), "shell-always-show"), ( "''$'\\302\\220\\302\\221\\302\\222\\302\\223\\302\\224\\302\\225\\302\\226\\302\\227\\302\\230\\302\\231\\302\\232\\302\\233\\302\\234\\302\\235\\302\\236\\302\\237'", @@ -923,6 +649,25 @@ mod tests { ), ], ); + // Different expected output for UTF-8 and ASCII in these cases. + check_names_encoding( + UEncoding::Utf8, + test_str, + &[ + ("????????????????", "literal"), + ("????????????????", "shell"), + ("'????????????????'", "shell-always"), + ], + ); + check_names_encoding( + UEncoding::Ascii, + test_str, + &[ + ("????????????????????????????????", "literal"), + ("????????????????????????????????", "shell"), + ("'????????????????????????????????'", "shell-always"), + ], + ); } #[test] @@ -935,7 +680,7 @@ mod tests { let invalid = b'\xC0'; // a single byte value invalid outside of additional context in UTF-8 - check_names_raw( + check_names_raw_both( &[continuation], &[ (b"?", "literal"), @@ -953,24 +698,45 @@ mod tests { // ...but the byte becomes valid with appropriate context // (this is just the § character in UTF-8, written as bytes) - check_names_raw( - &[first2byte, continuation], + let input = &[first2byte, continuation]; + check_names_raw_both( + input, + &[ + (b"\xC2\xA7", "literal-show"), + (b"\xC2\xA7", "shell-show"), + (b"'\xC2\xA7'", "shell-always-show"), + ], + ); + // Different expected output for UTF-8 and ASCII in these cases. + check_names_encoding_raw( + UEncoding::Utf8, + input, &[ (b"\xC2\xA7", "literal"), - (b"\xC2\xA7", "literal-show"), (b"\xC2\xA7", "escape"), (b"\"\xC2\xA7\"", "c"), (b"\xC2\xA7", "shell"), - (b"\xC2\xA7", "shell-show"), (b"'\xC2\xA7'", "shell-always"), - (b"'\xC2\xA7'", "shell-always-show"), (b"\xC2\xA7", "shell-escape"), (b"'\xC2\xA7'", "shell-escape-always"), ], ); + check_names_encoding_raw( + UEncoding::Ascii, + input, + &[ + (b"??", "literal"), + (b"\\302\\247", "escape"), + (b"\"\\302\\247\"", "c"), + (b"??", "shell"), + (b"'??'", "shell-always"), + (b"''$'\\302\\247'", "shell-escape"), + (b"''$'\\302\\247'", "shell-escape-always"), + ], + ); // mixed with valid characters - check_names_raw( + check_names_raw_both( &[continuation, ascii], &[ (b"?_", "literal"), @@ -985,7 +751,7 @@ mod tests { (b"''$'\\247''_'", "shell-escape-always"), ], ); - check_names_raw( + check_names_raw_both( &[ascii, continuation], &[ (b"_?", "literal"), @@ -1000,7 +766,7 @@ mod tests { (b"'_'$'\\247'", "shell-escape-always"), ], ); - check_names_raw( + check_names_raw_both( &[ascii, continuation, ascii], &[ (b"_?_", "literal"), @@ -1015,7 +781,7 @@ mod tests { (b"'_'$'\\247''_'", "shell-escape-always"), ], ); - check_names_raw( + check_names_raw_both( &[continuation, ascii, continuation], &[ (b"?_?", "literal"), @@ -1032,7 +798,7 @@ mod tests { ); // contiguous invalid bytes - check_names_raw( + check_names_raw_both( &[ ascii, invalid, @@ -1086,7 +852,7 @@ mod tests { ); // invalid multi-byte sequences that start valid - check_names_raw( + check_names_raw_both( &[first2byte, ascii], &[ (b"?_", "literal"), @@ -1101,11 +867,15 @@ mod tests { (b"''$'\\302''_'", "shell-escape-always"), ], ); - check_names_raw( - &[first2byte, first2byte, continuation], + + let input = &[first2byte, first2byte, continuation]; + check_names_raw_both(input, &[(b"\xC2\xC2\xA7", "literal-show")]); + // Different expected output for UTF-8 and ASCII in these cases. + check_names_encoding_raw( + UEncoding::Utf8, + input, &[ (b"?\xC2\xA7", "literal"), - (b"\xC2\xC2\xA7", "literal-show"), (b"\\302\xC2\xA7", "escape"), (b"\"\\302\xC2\xA7\"", "c"), (b"?\xC2\xA7", "shell"), @@ -1116,7 +886,23 @@ mod tests { (b"''$'\\302''\xC2\xA7'", "shell-escape-always"), ], ); - check_names_raw( + check_names_encoding_raw( + UEncoding::Ascii, + input, + &[ + (b"???", "literal"), + (b"\\302\\302\\247", "escape"), + (b"\"\\302\\302\\247\"", "c"), + (b"???", "shell"), + (b"\xC2\xC2\xA7", "shell-show"), + (b"'???'", "shell-always"), + (b"'\xC2\xC2\xA7'", "shell-always-show"), + (b"''$'\\302\\302\\247'", "shell-escape"), + (b"''$'\\302\\302\\247'", "shell-escape-always"), + ], + ); + + check_names_raw_both( &[first3byte, continuation, ascii], &[ (b"??_", "literal"), @@ -1131,7 +917,7 @@ mod tests { (b"''$'\\340\\247''_'", "shell-escape-always"), ], ); - check_names_raw( + check_names_raw_both( &[first4byte, continuation, continuation, ascii], &[ (b"???_", "literal"), @@ -1153,7 +939,7 @@ mod tests { // A question mark must force quotes in shell and shell-always, unless // it is in place of a control character (that case is already covered // in other tests) - check_names( + check_names_both( "one?two", &[ ("one?two", "literal"), @@ -1173,7 +959,7 @@ mod tests { #[test] fn test_backslash() { // Escaped in C-style, but not in Shell-style escaping - check_names( + check_names_both( "one\\two", &[ ("one\\two", "literal"), @@ -1190,32 +976,32 @@ mod tests { #[test] fn test_tilde_and_hash() { - check_names("~", &[("'~'", "shell"), ("'~'", "shell-escape")]); - check_names( + check_names_both("~", &[("'~'", "shell"), ("'~'", "shell-escape")]); + check_names_both( "~name", &[("'~name'", "shell"), ("'~name'", "shell-escape")], ); - check_names( + check_names_both( "some~name", &[("some~name", "shell"), ("some~name", "shell-escape")], ); - check_names("name~", &[("name~", "shell"), ("name~", "shell-escape")]); + check_names_both("name~", &[("name~", "shell"), ("name~", "shell-escape")]); - check_names("#", &[("'#'", "shell"), ("'#'", "shell-escape")]); - check_names( + check_names_both("#", &[("'#'", "shell"), ("'#'", "shell-escape")]); + check_names_both( "#name", &[("'#name'", "shell"), ("'#name'", "shell-escape")], ); - check_names( + check_names_both( "some#name", &[("some#name", "shell"), ("some#name", "shell-escape")], ); - check_names("name#", &[("name#", "shell"), ("name#", "shell-escape")]); + check_names_both("name#", &[("name#", "shell"), ("name#", "shell-escape")]); } #[test] fn test_special_chars_in_double_quotes() { - check_names( + check_names_both( "can'$t", &[ ("'can'\\''$t'", "shell"), @@ -1225,7 +1011,7 @@ mod tests { ], ); - check_names( + check_names_both( "can'`t", &[ ("'can'\\''`t'", "shell"), @@ -1235,7 +1021,7 @@ mod tests { ], ); - check_names( + check_names_both( "can'\\t", &[ ("'can'\\''\\t'", "shell"), diff --git a/src/uucore/src/lib/features/quoting_style/shell_quoter.rs b/src/uucore/src/lib/features/quoting_style/shell_quoter.rs new file mode 100644 index 000000000..d05dda52f --- /dev/null +++ b/src/uucore/src/lib/features/quoting_style/shell_quoter.rs @@ -0,0 +1,241 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use super::{EscapeState, EscapedChar, Quoter, Quotes}; + +// These are characters with special meaning in the shell (e.g. bash). The +// first const contains characters that only have a special meaning when they +// appear at the beginning of a name. +const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#"; + +// Escaped and NonEscaped shell quoting strategies are very different. +// Therefore, we are using separate Quoter structures for each of them. + +pub(super) struct NonEscapedShellQuoter<'a> { + // INIT + /// Original name. + reference: &'a [u8], + + /// The quotes to be used if necessary + quotes: Quotes, + + /// Whether to show control and non-unicode characters, or replace them + /// with `?`. + show_control: bool, + + // INTERNAL STATE + /// Whether the name should be quoted. + must_quote: bool, + + buffer: Vec, +} + +impl<'a> NonEscapedShellQuoter<'a> { + pub fn new( + reference: &'a [u8], + show_control: bool, + always_quote: bool, + dirname: bool, + size_hint: usize, + ) -> Self { + let (quotes, must_quote) = initial_quoting(reference, dirname, always_quote); + Self { + reference, + quotes, + show_control, + must_quote, + buffer: Vec::with_capacity(size_hint), + } + } +} + +impl<'a> Quoter for NonEscapedShellQuoter<'a> { + fn push_char(&mut self, input: char) { + let escaped = EscapedChar::new_shell(input, false, self.quotes); + + let escaped = if self.show_control { + escaped + } else { + escaped.hide_control() + }; + + match escaped.state { + EscapeState::Backslash('\'') => self.buffer.extend(b"'\\''"), + EscapeState::ForceQuote(x) => { + self.must_quote = true; + self.buffer.extend(x.to_string().as_bytes()); + } + _ => { + self.buffer.extend(escaped.collect::().as_bytes()); + } + } + } + + fn push_invalid(&mut self, input: &[u8]) { + if self.show_control { + self.buffer.extend(input); + } else { + self.buffer.extend(std::iter::repeat_n(b'?', input.len())); + } + } + + fn finalize(self: Box) -> Vec { + finalize_shell_quoter(self.buffer, self.reference, self.must_quote, self.quotes) + } +} + +// We need to keep track of whether we are in a dollar expression +// because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n' +pub(super) struct EscapedShellQuoter<'a> { + // INIT + /// Original name. + reference: &'a [u8], + + /// The quotes to be used if necessary + quotes: Quotes, + + // INTERNAL STATE + /// Whether the name should be quoted. + must_quote: bool, + + /// Whether we are currently in a dollar escaped environment. + in_dollar: bool, + + buffer: Vec, +} + +impl<'a> EscapedShellQuoter<'a> { + pub fn new(reference: &'a [u8], always_quote: bool, dirname: bool, size_hint: usize) -> Self { + let (quotes, must_quote) = initial_quoting(reference, dirname, always_quote); + Self { + reference, + quotes, + must_quote, + in_dollar: false, + buffer: Vec::with_capacity(size_hint), + } + } + + fn enter_dollar(&mut self) { + if !self.in_dollar { + self.buffer.extend(b"'$'"); + self.in_dollar = true; + } + } + + fn exit_dollar(&mut self) { + if self.in_dollar { + self.buffer.extend(b"''"); + self.in_dollar = false; + } + } +} + +impl<'a> Quoter for EscapedShellQuoter<'a> { + fn push_char(&mut self, input: char) { + let escaped = EscapedChar::new_shell(input, true, self.quotes); + match escaped.state { + EscapeState::Char(x) => { + self.exit_dollar(); + self.buffer.extend(x.to_string().as_bytes()); + } + EscapeState::ForceQuote(x) => { + self.exit_dollar(); + self.must_quote = true; + self.buffer.extend(x.to_string().as_bytes()); + } + // Single quotes are not put in dollar expressions, but are escaped + // if the string also contains double quotes. In that case, they + // must be handled separately. + EscapeState::Backslash('\'') => { + self.must_quote = true; + self.in_dollar = false; + self.buffer.extend(b"'\\''"); + } + _ => { + self.enter_dollar(); + self.must_quote = true; + self.buffer.extend(escaped.collect::().as_bytes()); + } + } + } + + fn push_invalid(&mut self, input: &[u8]) { + // Early return on empty inputs. + if input.is_empty() { + return; + } + + self.enter_dollar(); + self.must_quote = true; + self.buffer.extend( + input + .iter() + .flat_map(|b| EscapedChar::new_octal(*b)) + .collect::() + .as_bytes(), + ); + } + + fn finalize(self: Box) -> Vec { + finalize_shell_quoter(self.buffer, self.reference, self.must_quote, self.quotes) + } +} + +/// Deduce the initial quoting status from the provided information +fn initial_quoting(input: &[u8], dirname: bool, always_quote: bool) -> (Quotes, bool) { + if input + .iter() + .any(|c| shell_escaped_char_set(dirname).contains(c)) + { + (Quotes::Single, true) + } else if input.contains(&b'\'') { + (Quotes::Double, true) + } else if always_quote || input.is_empty() { + (Quotes::Single, true) + } else { + (Quotes::Single, false) + } +} + +/// Check whether `bytes` starts with any byte in `pattern`. +fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool { + !bytes.is_empty() && pattern.contains(&bytes[0]) +} + +/// Return a set of characters that implies quoting of the word in +/// shell-quoting mode. +fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] { + const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r="; + // the ':' colon character only induce quoting in the + // context of ls displaying a directory name before listing its content. + // (e.g. with the recursive flag -R) + let start_index = if is_dirname { 0 } else { 1 }; + &ESCAPED_CHARS[start_index..] +} + +fn finalize_shell_quoter( + buffer: Vec, + reference: &[u8], + must_quote: bool, + quotes: Quotes, +) -> Vec { + let contains_quote_chars = must_quote || bytes_start_with(reference, SPECIAL_SHELL_CHARS_START); + + if must_quote | contains_quote_chars && quotes != Quotes::None { + let mut quoted = Vec::::with_capacity(buffer.len() + 2); + let quote = if quotes == Quotes::Single { + b'\'' + } else { + b'"' + }; + quoted.push(quote); + quoted.extend(buffer); + quoted.push(quote); + quoted + } else { + buffer + } +} diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 469c52444..6a137b787 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -51,6 +51,8 @@ pub use crate::features::fast_inc; pub use crate::features::format; #[cfg(feature = "fs")] pub use crate::features::fs; +#[cfg(feature = "i18n")] +pub use crate::features::i18n; #[cfg(feature = "lines")] pub use crate::features::lines; #[cfg(feature = "parser")] diff --git a/test/sums b/test/sums new file mode 100644 index 000000000..9a3546341 --- /dev/null +++ b/test/sums @@ -0,0 +1,2 @@ +SHA256 (funkyname) = 29953405eaa3dcc41c37d1621d55b6a47eee93e05613e439e73295029740b10c +SHA256 (funky) = 29953405eaa3dcc41c37d1621d55b6a47eee93e05613e439e73295029740b10c diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index a0da1400b..f47ebb5b6 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -2690,6 +2690,71 @@ mod quoting { &[], ); } + + #[cfg(not(any(target_vendor = "apple", target_os = "windows", target_os = "openbsd")))] + #[test] + /// This test creates files with an UTF-8 encoded name and verify that it + /// gets escaped depending on the used locale. + fn test_locale_aware_quoting() { + let cases: &[(&[u8], _, _, &[&str])] = &[ + ( + "😁".as_bytes(), // == b"\xF0\x9F\x98\x81" + "''$'\\360\\237\\230\\201'\n", // ASCII sees 4 bytes + "😁\n", // UTF-8 sees an emoji + &["--quoting-style=shell-escape"], + ), + ( + "€".as_bytes(), // == b"\xE2\x82\xAC" + "''$'\\342\\202\\254'\n", // ASCII sees 3 bytes + "€\n", // UTF-8 still only 2 + &["--quoting-style=shell-escape"], + ), + ( + b"\xC2\x80\xC2\x81", // 2 first Unicode control characters + "????\n", // ASCII sees 4 bytes + "??\n", // UTF-8 sees only 2 + &["--quoting-style=literal", "--hide-control-char"], + ), + ( + b"\xC2\xC2\x81", + "???\n", // ASCII sees 3 bytes + "??\n", // UTF-8 still only 2 + &["--quoting-style=literal", "--hide-control-char"], + ), + ( + b"\xC2\x81\xC2", + "???\n", // ASCII sees 3 bytes + "??\n", // UTF-8 still only 2 + &["--quoting-style=literal", "--hide-control-char"], + ), + ]; + + for (filename, ascii_ref, utf_8_ref, args) in cases { + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + + let filename = uucore::os_str_from_bytes(filename) + .expect("Filename is valid Unicode supported on Linux"); + + at.touch(filename); + + // When the locale does not handle UTF-8 encoding, escaping is done. + scene + .ucmd() + .env("LC_ALL", "C") // Non UTF-8 locale + .args(args) + .succeeds() + .stdout_only(ascii_ref); + + // When the locale has UTF-8 support, the symbol is shown as-is. + scene + .ucmd() + .env("LC_ALL", "en_US.UTF-8") // UTF-8 locale + .args(args) + .succeeds() + .stdout_only(utf_8_ref); + } + } } #[test]