From d6075ef5b52fcf281270c192f01aa59d821a6528 Mon Sep 17 00:00:00 2001 From: Sam Atkins Date: Mon, 13 Feb 2023 17:23:31 +0000 Subject: [PATCH] LibTextCodec+Everywhere: Make TextCodec::decoder_for() take a StringView We don't need a full String/DeprecatedString inside this function, so we might as well not force users to create one. --- Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp | 2 +- Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp | 2 +- Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp | 2 +- Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp | 2 +- Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp | 2 +- Userland/Libraries/LibGfx/Font/OpenType/Font.cpp | 2 +- Userland/Libraries/LibGfx/ICC/TagTypes.cpp | 6 +++--- Userland/Libraries/LibPDF/Parser.cpp | 2 +- Userland/Libraries/LibTextCodec/Decoder.cpp | 2 +- Userland/Libraries/LibTextCodec/Decoder.h | 2 +- Userland/Libraries/LibWeb/HTML/Window.cpp | 2 +- Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp | 2 +- Userland/Libraries/LibWeb/Loader/Resource.cpp | 2 +- Userland/Utilities/js.cpp | 2 +- 14 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp index 8bbd7cc83c..db48c54f36 100644 --- a/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp @@ -11,7 +11,7 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { - auto* decoder = TextCodec::decoder_for("windows-1251"); + auto* decoder = TextCodec::decoder_for("windows-1251"sv); VERIFY(decoder); decoder->to_utf8({ data, size }); return 0; diff --git a/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp index 98e123df48..90816f9162 100644 --- a/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp @@ -11,7 +11,7 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { - auto* decoder = TextCodec::decoder_for("windows-1255"); + auto* decoder = TextCodec::decoder_for("windows-1255"sv); VERIFY(decoder); decoder->to_utf8({ data, size }); return 0; diff --git a/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp b/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp index 97733bbc7a..7c615061b9 100644 --- a/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp @@ -11,7 +11,7 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { - auto* decoder = TextCodec::decoder_for("windows-1252"); + auto* decoder = TextCodec::decoder_for("windows-1252"sv); VERIFY(decoder); decoder->to_utf8({ data, size }); return 0; diff --git a/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp b/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp index 3ff3e14ceb..ac28dc510a 100644 --- a/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp @@ -11,7 +11,7 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { - auto* decoder = TextCodec::decoder_for("iso-8859-2"); + auto* decoder = TextCodec::decoder_for("iso-8859-2"sv); VERIFY(decoder); decoder->to_utf8({ data, size }); return 0; diff --git a/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp index f054e793d5..5562b33537 100644 --- a/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp @@ -11,7 +11,7 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { - auto* decoder = TextCodec::decoder_for("utf-16be"); + auto* decoder = TextCodec::decoder_for("utf-16be"sv); VERIFY(decoder); decoder->to_utf8({ data, size }); return 0; diff --git a/Userland/Libraries/LibGfx/Font/OpenType/Font.cpp b/Userland/Libraries/LibGfx/Font/OpenType/Font.cpp index 2c8ae189ef..3a983ee213 100644 --- a/Userland/Libraries/LibGfx/Font/OpenType/Font.cpp +++ b/Userland/Libraries/LibGfx/Font/OpenType/Font.cpp @@ -321,7 +321,7 @@ DeprecatedString Name::string_for_id(NameId id) const auto const offset = name_record.string_offset; if (platform_id == to_underlying(Platform::Windows)) { - static auto& decoder = *TextCodec::decoder_for("utf-16be"); + static auto& decoder = *TextCodec::decoder_for("utf-16be"sv); return decoder.to_utf8(StringView { (char const*)m_slice.offset_pointer(storage_offset + offset), length }); } diff --git a/Userland/Libraries/LibGfx/ICC/TagTypes.cpp b/Userland/Libraries/LibGfx/ICC/TagTypes.cpp index bb7ac8f4d9..2346c5a82e 100644 --- a/Userland/Libraries/LibGfx/ICC/TagTypes.cpp +++ b/Userland/Libraries/LibGfx/ICC/TagTypes.cpp @@ -747,7 +747,7 @@ ErrorOr> MultiLocalizedUnicodeTagDat // "For the definition of language codes and country codes, see respectively // ISO 639-1 and ISO 3166-1. The Unicode strings in storage should be encoded as 16-bit big-endian, UTF-16BE, // and should not be NULL terminated." - auto& utf_16be_decoder = *TextCodec::decoder_for("utf-16be"); + auto& utf_16be_decoder = *TextCodec::decoder_for("utf-16be"sv); struct RawRecord { BigEndian language_code; @@ -1023,7 +1023,7 @@ ErrorOr> TextDescriptionTagData::from_byte return Error::from_string_literal("ICC::Profile: textDescriptionType Unicode description not \\0-terminated"); StringView utf_16be_data { unicode_description_data, byte_size_without_nul }; - unicode_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("utf-16be")->to_utf8(utf_16be_data))); + unicode_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("utf-16be"sv)->to_utf8(utf_16be_data))); } // ScriptCode @@ -1069,7 +1069,7 @@ ErrorOr> TextDescriptionTagData::from_byte if (macintosh_description_data[macintosh_description_length - 1] != '\0') return Error::from_string_literal("ICC::Profile: textDescriptionType ScriptCode not \\0-terminated"); - macintosh_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("x-mac-roman")->to_utf8({ macintosh_description_data, (size_t)macintosh_description_length - 1 }))); + macintosh_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("x-mac-roman"sv)->to_utf8({ macintosh_description_data, (size_t)macintosh_description_length - 1 }))); } else { dbgln("TODO: ICCProfile textDescriptionType ScriptCode {}, length {}", scriptcode_code, macintosh_description_length); } diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index 1c7df04511..c6ade6bac6 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -267,7 +267,7 @@ NonnullRefPtr Parser::parse_string() if (unencrypted_string.bytes().starts_with(Array { 0xfe, 0xff })) { // The string is encoded in UTF16-BE - string_object->set_string(TextCodec::decoder_for("utf-16be")->to_utf8(unencrypted_string)); + string_object->set_string(TextCodec::decoder_for("utf-16be"sv)->to_utf8(unencrypted_string)); } else if (unencrypted_string.bytes().starts_with(Array { 239, 187, 191 })) { // The string is encoded in UTF-8. This is the default anyways, but if these bytes // are explicitly included, we have to trim them diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index d991528593..3563f19c73 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -30,7 +30,7 @@ TurkishDecoder s_turkish_decoder; XUserDefinedDecoder s_x_user_defined_decoder; } -Decoder* decoder_for(DeprecatedString const& a_encoding) +Decoder* decoder_for(StringView a_encoding) { auto encoding = get_standardized_encoding(a_encoding); if (encoding.has_value()) { diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h index 02cdfba599..7a426d35e1 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.h +++ b/Userland/Libraries/LibTextCodec/Decoder.h @@ -84,7 +84,7 @@ public: virtual void process(StringView, Function on_code_point) override; }; -Decoder* decoder_for(DeprecatedString const& encoding); +Decoder* decoder_for(StringView encoding); Optional get_standardized_encoding(StringView encoding); // This returns the appropriate Unicode decoder for the sniffed BOM or nullptr if there is no appropriate decoder. diff --git a/Userland/Libraries/LibWeb/HTML/Window.cpp b/Userland/Libraries/LibWeb/HTML/Window.cpp index adc631576d..023bdaa8fc 100644 --- a/Userland/Libraries/LibWeb/HTML/Window.cpp +++ b/Userland/Libraries/LibWeb/HTML/Window.cpp @@ -1419,7 +1419,7 @@ JS_DEFINE_NATIVE_FUNCTION(Window::atob) // The bytes object might contain bytes greater than 128, encode them in UTF8 // NOTE: Any 8-bit encoding -> utf-8 decoder will work for this - auto text_decoder = TextCodec::decoder_for("windows-1252"); + auto text_decoder = TextCodec::decoder_for("windows-1252"sv); VERIFY(text_decoder); auto text = text_decoder->to_utf8(decoded.release_value()); diff --git a/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp b/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp index c7b8973c4f..98194d8ea8 100644 --- a/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp +++ b/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp @@ -158,7 +158,7 @@ WebIDL::ExceptionOr WorkerGlobalScope::atob(DeprecatedString c // 3. Return decodedData. // decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8. - auto* decoder = TextCodec::decoder_for("windows-1252"); + auto* decoder = TextCodec::decoder_for("windows-1252"sv); VERIFY(decoder); return decoder->to_utf8(decoded_data.value()); } diff --git a/Userland/Libraries/LibWeb/Loader/Resource.cpp b/Userland/Libraries/LibWeb/Loader/Resource.cpp index d0b7c64415..9bc9c82711 100644 --- a/Userland/Libraries/LibWeb/Loader/Resource.cpp +++ b/Userland/Libraries/LibWeb/Loader/Resource.cpp @@ -81,7 +81,7 @@ static DeprecatedString mime_type_from_content_type(DeprecatedString const& cont return content_type; } -static bool is_valid_encoding(DeprecatedString const& encoding) +static bool is_valid_encoding(StringView encoding) { return TextCodec::decoder_for(encoding); } diff --git a/Userland/Utilities/js.cpp b/Userland/Utilities/js.cpp index a12af0420d..13ee8ba579 100644 --- a/Userland/Utilities/js.cpp +++ b/Userland/Utilities/js.cpp @@ -877,7 +877,7 @@ ErrorOr serenity_main(Main::Arguments arguments) if (Utf8View { file_contents }.validate()) { builder.append(source); } else { - auto* decoder = TextCodec::decoder_for("windows-1252"); + auto* decoder = TextCodec::decoder_for("windows-1252"sv); VERIFY(decoder); auto utf8_source = TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, source);