diff --git a/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp index db48c54f36..10e3c6e3e6 100644 --- a/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp @@ -11,8 +11,8 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { - auto* decoder = TextCodec::decoder_for("windows-1251"sv); - VERIFY(decoder); + auto decoder = TextCodec::decoder_for("windows-1251"sv); + VERIFY(decoder.has_value()); decoder->to_utf8({ data, size }); return 0; } diff --git a/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp index 90816f9162..866e3b1e61 100644 --- a/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp @@ -11,8 +11,8 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { - auto* decoder = TextCodec::decoder_for("windows-1255"sv); - VERIFY(decoder); + auto decoder = TextCodec::decoder_for("windows-1255"sv); + VERIFY(decoder.has_value()); decoder->to_utf8({ data, size }); return 0; } diff --git a/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp b/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp index 7c615061b9..3a3f03cd90 100644 --- a/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp @@ -11,8 +11,8 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { - auto* decoder = TextCodec::decoder_for("windows-1252"sv); - VERIFY(decoder); + auto decoder = TextCodec::decoder_for("windows-1252"sv); + VERIFY(decoder.has_value()); decoder->to_utf8({ data, size }); return 0; } diff --git a/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp b/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp index ac28dc510a..1d83a83920 100644 --- a/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp @@ -11,8 +11,8 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { - auto* decoder = TextCodec::decoder_for("iso-8859-2"sv); - VERIFY(decoder); + auto decoder = TextCodec::decoder_for("iso-8859-2"sv); + VERIFY(decoder.has_value()); decoder->to_utf8({ data, size }); return 0; } diff --git a/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp index 5562b33537..26b2205549 100644 --- a/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp @@ -11,8 +11,8 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { - auto* decoder = TextCodec::decoder_for("utf-16be"sv); - VERIFY(decoder); + auto decoder = TextCodec::decoder_for("utf-16be"sv); + VERIFY(decoder.has_value()); decoder->to_utf8({ data, size }); return 0; } diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index 3563f19c73..e69d7abef7 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -1,6 +1,7 @@ /* * Copyright (c) 2020, Andreas Kling * Copyright (c) 2022, Jelle Raaijmakers + * Copyright (c) 2023, Sam Atkins * * SPDX-License-Identifier: BSD-2-Clause */ @@ -30,37 +31,37 @@ TurkishDecoder s_turkish_decoder; XUserDefinedDecoder s_x_user_defined_decoder; } -Decoder* decoder_for(StringView a_encoding) +Optional decoder_for(StringView a_encoding) { auto encoding = get_standardized_encoding(a_encoding); if (encoding.has_value()) { if (encoding.value().equals_ignoring_case("windows-1252"sv)) - return &s_latin1_decoder; + return s_latin1_decoder; if (encoding.value().equals_ignoring_case("utf-8"sv)) - return &s_utf8_decoder; + return s_utf8_decoder; if (encoding.value().equals_ignoring_case("utf-16be"sv)) - return &s_utf16be_decoder; + return s_utf16be_decoder; if (encoding.value().equals_ignoring_case("utf-16le"sv)) - return &s_utf16le_decoder; + return s_utf16le_decoder; if (encoding.value().equals_ignoring_case("iso-8859-2"sv)) - return &s_latin2_decoder; + return s_latin2_decoder; if (encoding.value().equals_ignoring_case("windows-1255"sv)) - return &s_hebrew_decoder; + return s_hebrew_decoder; if (encoding.value().equals_ignoring_case("windows-1251"sv)) - return &s_cyrillic_decoder; + return s_cyrillic_decoder; if (encoding.value().equals_ignoring_case("koi8-r"sv)) - return &s_koi8r_decoder; + return s_koi8r_decoder; if (encoding.value().equals_ignoring_case("iso-8859-15"sv)) - return &s_latin9_decoder; + return s_latin9_decoder; if (encoding.value().equals_ignoring_case("macintosh"sv)) - return &s_mac_roman_decoder; + return s_mac_roman_decoder; if (encoding.value().equals_ignoring_case("windows-1254"sv)) - return &s_turkish_decoder; + return s_turkish_decoder; if (encoding.value().equals_ignoring_case("x-user-defined"sv)) - return &s_x_user_defined_decoder; + return s_x_user_defined_decoder; } dbgln("TextCodec: No decoder implemented for encoding '{}'", a_encoding); - return nullptr; + return {}; } // https://encoding.spec.whatwg.org/#concept-encoding-get diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h index 7a426d35e1..f8e8205544 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.h +++ b/Userland/Libraries/LibTextCodec/Decoder.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2020-2021, Andreas Kling * Copyright (c) 2022, Jelle Raaijmakers + * Copyright (c) 2023, Sam Atkins * * SPDX-License-Identifier: BSD-2-Clause */ @@ -9,6 +10,7 @@ #include #include +#include namespace TextCodec { @@ -84,7 +86,7 @@ public: virtual void process(StringView, Function on_code_point) override; }; -Decoder* decoder_for(StringView encoding); +Optional decoder_for(StringView encoding); Optional get_standardized_encoding(StringView encoding); // This returns the appropriate Unicode decoder for the sniffed BOM or nullptr if there is no appropriate decoder. diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index baa16d709a..8cb69ab0c7 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -199,8 +199,8 @@ Tokenizer::Tokenizer(StringView input, StringView encoding) { // https://www.w3.org/TR/css-syntax-3/#css-filter-code-points auto filter_code_points = [](StringView input, auto encoding) -> ErrorOr { - auto* decoder = TextCodec::decoder_for(encoding); - VERIFY(decoder); + auto decoder = TextCodec::decoder_for(encoding); + VERIFY(decoder.has_value()); StringBuilder builder { input.length() }; bool last_was_carriage_return = false; diff --git a/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp b/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp index 5426b24d09..af2cce78aa 100644 --- a/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp +++ b/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp @@ -15,7 +15,7 @@ namespace Web::Encoding { WebIDL::ExceptionOr> TextDecoder::construct_impl(JS::Realm& realm, DeprecatedFlyString encoding) { auto decoder = TextCodec::decoder_for(encoding); - if (!decoder) + if (!decoder.has_value()) return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, DeprecatedString::formatted("Invalid encoding {}", encoding) }; return MUST_OR_THROW_OOM(realm.heap().allocate(realm, realm, *decoder, move(encoding), false, false)); diff --git a/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp b/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp index caa8a8fb97..b3904a5d84 100644 --- a/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp +++ b/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp @@ -514,7 +514,7 @@ void HTMLScriptElement::resource_did_load() // If the resource has an explicit encoding (i.e from a HTTP Content-Type header) // we have to re-encode it to UTF-8. if (resource()->has_encoding()) { - if (auto* codec = TextCodec::decoder_for(resource()->encoding().value())) { + if (auto codec = TextCodec::decoder_for(resource()->encoding().value()); codec.has_value()) { data = codec->to_utf8(data).to_byte_buffer(); } } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 50864bb915..3dfd4e3274 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -2798,8 +2798,8 @@ HTMLTokenizer::HTMLTokenizer() HTMLTokenizer::HTMLTokenizer(StringView input, DeprecatedString const& encoding) { - auto* decoder = TextCodec::decoder_for(encoding); - VERIFY(decoder); + auto decoder = TextCodec::decoder_for(encoding); + VERIFY(decoder.has_value()); m_decoded_input = decoder->to_utf8(input); m_utf8_view = Utf8View(m_decoded_input); m_utf8_iterator = m_utf8_view.begin(); diff --git a/Userland/Libraries/LibWeb/HTML/Window.cpp b/Userland/Libraries/LibWeb/HTML/Window.cpp index 0300b45285..1edac12573 100644 --- a/Userland/Libraries/LibWeb/HTML/Window.cpp +++ b/Userland/Libraries/LibWeb/HTML/Window.cpp @@ -1420,7 +1420,7 @@ JS_DEFINE_NATIVE_FUNCTION(Window::atob) // The bytes object might contain bytes greater than 128, encode them in UTF8 // NOTE: Any 8-bit encoding -> utf-8 decoder will work for this auto text_decoder = TextCodec::decoder_for("windows-1252"sv); - VERIFY(text_decoder); + VERIFY(text_decoder.has_value()); auto text = text_decoder->to_utf8(decoded.release_value()); return JS::PrimitiveString::create(vm, DeprecatedString(text)); diff --git a/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp b/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp index de252ae32c..c31c8982b1 100644 --- a/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp +++ b/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp @@ -161,8 +161,8 @@ WebIDL::ExceptionOr WorkerGlobalScope::atob(DeprecatedString c // 3. Return decodedData. // decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8. - auto* decoder = TextCodec::decoder_for("windows-1252"sv); - VERIFY(decoder); + auto decoder = TextCodec::decoder_for("windows-1252"sv); + VERIFY(decoder.has_value()); return decoder->to_utf8(decoded_data.value()); } diff --git a/Userland/Libraries/LibWeb/Loader/Resource.cpp b/Userland/Libraries/LibWeb/Loader/Resource.cpp index 9bc9c82711..8d1839427b 100644 --- a/Userland/Libraries/LibWeb/Loader/Resource.cpp +++ b/Userland/Libraries/LibWeb/Loader/Resource.cpp @@ -83,7 +83,7 @@ static DeprecatedString mime_type_from_content_type(DeprecatedString const& cont static bool is_valid_encoding(StringView encoding) { - return TextCodec::decoder_for(encoding); + return TextCodec::decoder_for(encoding).has_value(); } void Resource::did_load(Badge, ReadonlyBytes data, HashMap const& headers, Optional status_code) diff --git a/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp b/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp index 1580c93d30..1f8529eb91 100644 --- a/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp +++ b/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp @@ -219,10 +219,10 @@ DeprecatedString XMLHttpRequest::get_text_response() const charset = "UTF-8"sv; // 5. Return the result of running decode on xhr’s received bytes using fallback encoding charset. - auto* decoder = TextCodec::decoder_for(charset.value()); + auto decoder = TextCodec::decoder_for(charset.value()); // If we don't support the decoder yet, let's crash instead of attempting to return something, as the result would be incorrect and create obscure bugs. - VERIFY(decoder); + VERIFY(decoder.has_value()); return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, m_received_bytes); } diff --git a/Userland/Utilities/js.cpp b/Userland/Utilities/js.cpp index fdeb39e5aa..855f3b1d27 100644 --- a/Userland/Utilities/js.cpp +++ b/Userland/Utilities/js.cpp @@ -882,8 +882,8 @@ ErrorOr serenity_main(Main::Arguments arguments) if (Utf8View { file_contents }.validate()) { builder.append(source); } else { - auto* decoder = TextCodec::decoder_for("windows-1252"sv); - VERIFY(decoder); + auto decoder = TextCodec::decoder_for("windows-1252"sv); + VERIFY(decoder.has_value()); auto utf8_source = TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, source); builder.append(utf8_source);