diff --git a/Userland/Shell/Shell.cpp b/Userland/Shell/Shell.cpp index c0e2b5c998..0dc3caae62 100644 --- a/Userland/Shell/Shell.cpp +++ b/Userland/Shell/Shell.cpp @@ -1142,9 +1142,9 @@ String Shell::escape_token_for_double_quotes(const String& token) return builder.build(); } -bool Shell::is_special(char c) +Shell::SpecialCharacterEscapeMode Shell::special_character_escape_mode(u32 code_point) { - switch (c) { + switch (code_point) { case '\'': case '"': case '$': @@ -1156,25 +1156,72 @@ bool Shell::is_special(char c) case '{': case '}': case '&': + case ';': case '\\': case ' ': - return true; + return SpecialCharacterEscapeMode::Escaped; + case '\n': + case '\t': + case '\r': + return SpecialCharacterEscapeMode::QuotedAsEscape; default: - return false; + // FIXME: Should instead use unicode's "graphic" property (categories L, M, N, P, S, Zs) + if (code_point < NumericLimits::max()) { + if (isascii(static_cast(code_point))) + return isprint(static_cast(code_point)) ? SpecialCharacterEscapeMode::Untouched : SpecialCharacterEscapeMode::QuotedAsHex; + } + return SpecialCharacterEscapeMode::Untouched; } } String Shell::escape_token(const String& token) { - StringBuilder builder; + auto do_escape = [](auto& token) { + StringBuilder builder; + for (auto c : token) { + static_assert(sizeof(c) == sizeof(u32) || sizeof(c) == sizeof(u8)); + switch (special_character_escape_mode(c)) { + case SpecialCharacterEscapeMode::Untouched: + if constexpr (sizeof(c) == sizeof(u8)) + builder.append(c); + else + builder.append(Utf32View { &c, 1 }); + break; + case SpecialCharacterEscapeMode::Escaped: + builder.append('\\'); + builder.append(c); + break; + case SpecialCharacterEscapeMode::QuotedAsEscape: + switch (c) { + case '\n': + builder.append(R"("\n")"); + break; + case '\t': + builder.append(R"("\t")"); + break; + case '\r': + builder.append(R"("\r")"); + break; + default: + VERIFY_NOT_REACHED(); + } + break; + case SpecialCharacterEscapeMode::QuotedAsHex: + if (c <= NumericLimits::max()) + builder.appendff(R"("\x{:0>2x}")", static_cast(c)); + else + builder.appendff(R"("\u{:0>8x}")", static_cast(c)); + break; + } + } - for (auto c : token) { - if (is_special(c)) - builder.append('\\'); - builder.append(c); - } + return builder.build(); + }; - return builder.build(); + Utf8View view { token }; + if (view.validate()) + return do_escape(view); + return do_escape(token); } String Shell::unescape_token(const String& token) @@ -2057,5 +2104,4 @@ SavedFileDescriptors::~SavedFileDescriptors() } } } - } diff --git a/Userland/Shell/Shell.h b/Userland/Shell/Shell.h index dfa2d93231..147d0f1b3f 100644 --- a/Userland/Shell/Shell.h +++ b/Userland/Shell/Shell.h @@ -156,7 +156,13 @@ public: static String escape_token_for_single_quotes(const String& token); static String escape_token(const String& token); static String unescape_token(const String& token); - static bool is_special(char c); + enum class SpecialCharacterEscapeMode { + Untouched, + Escaped, + QuotedAsEscape, + QuotedAsHex, + }; + static SpecialCharacterEscapeMode special_character_escape_mode(u32 c); static bool is_glob(const StringView&); static Vector split_path(const StringView&); @@ -352,17 +358,37 @@ inline size_t find_offset_into_node(const String& unescaped_text, size_t escaped { size_t unescaped_offset = 0; size_t offset = 0; - for (auto& c : unescaped_text) { - if (offset == escaped_offset) - return unescaped_offset; + auto do_find_offset = [&](auto& unescaped_text) { + for (auto c : unescaped_text) { + if (offset == escaped_offset) + return unescaped_offset; - if (Shell::is_special(c)) + switch (Shell::special_character_escape_mode(c)) { + case Shell::SpecialCharacterEscapeMode::Untouched: + break; + case Shell::SpecialCharacterEscapeMode::Escaped: + ++offset; // X -> \X + break; + case Shell::SpecialCharacterEscapeMode::QuotedAsEscape: + offset += 3; // X -> "\Y" + break; + case Shell::SpecialCharacterEscapeMode::QuotedAsHex: + if (c > NumericLimits::max()) + offset += 11; // X -> "\uhhhhhhhh" + else + offset += 5; // X -> "\xhh" + break; + } ++offset; - ++offset; - ++unescaped_offset; - } + ++unescaped_offset; + } + return unescaped_offset; + }; - return unescaped_offset; + Utf8View view { unescaped_text }; + if (view.validate()) + return do_find_offset(view); + return do_find_offset(unescaped_text); } }