From d325403cb599da640d5e8313c96155c16799c87a Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 11 May 2021 15:52:25 +0200 Subject: [PATCH] LibTextCodec: Use Optional for get_standardized_encoding This patch changes get_standardized_encoding to use an Optional return type instead of just returning the null string when unable to match the provided encoding to one of the canonical encoding names. This is part of an effort to move away from using null strings towards explicitly using Optional to indicate that the String may not have a value. --- Userland/Libraries/LibTextCodec/Decoder.cpp | 31 ++++++++++--------- Userland/Libraries/LibTextCodec/Decoder.h | 2 +- .../LibWeb/HTML/Parser/HTMLDocumentParser.cpp | 4 ++- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index 37a28d5999..bebdb004a3 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -64,24 +64,26 @@ CyrillicDecoder& cyrillic_decoder() Decoder* decoder_for(const String& a_encoding) { auto encoding = get_standardized_encoding(a_encoding); - if (encoding.equals_ignoring_case("windows-1252")) - return &latin1_decoder(); - if (encoding.equals_ignoring_case("utf-8")) - return &utf8_decoder(); - if (encoding.equals_ignoring_case("utf-16be")) - return &utf16be_decoder(); - if (encoding.equals_ignoring_case("iso-8859-2")) - return &latin2_decoder(); - if (encoding.equals_ignoring_case("windows-1255")) - return &hebrew_decoder(); - if (encoding.equals_ignoring_case("windows-1251")) - return &cyrillic_decoder(); + if (encoding.has_value()) { + if (encoding.value().equals_ignoring_case("windows-1252")) + return &latin1_decoder(); + if (encoding.value().equals_ignoring_case("utf-8")) + return &utf8_decoder(); + if (encoding.value().equals_ignoring_case("utf-16be")) + return &utf16be_decoder(); + if (encoding.value().equals_ignoring_case("iso-8859-2")) + return &latin2_decoder(); + if (encoding.value().equals_ignoring_case("windows-1255")) + return &hebrew_decoder(); + if (encoding.value().equals_ignoring_case("windows-1251")) + return &cyrillic_decoder(); + } dbgln("TextCodec: No decoder implemented for encoding '{}'", a_encoding); return nullptr; } // https://encoding.spec.whatwg.org/#concept-encoding-get -String get_standardized_encoding(const String& encoding) +Optional get_standardized_encoding(const String& encoding) { String trimmed_lowercase_encoding = encoding.trim_whitespace().to_lowercase(); @@ -172,7 +174,8 @@ String get_standardized_encoding(const String& encoding) bool is_standardized_encoding(const String& encoding) { - return encoding.equals_ignoring_case(get_standardized_encoding(encoding)); + auto standardized_encoding = get_standardized_encoding(encoding); + return standardized_encoding.has_value() && encoding.equals_ignoring_case(standardized_encoding.value()); } String UTF8Decoder::to_utf8(const StringView& input) diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h index 6ea4147b7e..585f05ff0d 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.h +++ b/Userland/Libraries/LibTextCodec/Decoder.h @@ -49,7 +49,7 @@ public: }; Decoder* decoder_for(const String& encoding); -String get_standardized_encoding(const String& encoding); +Optional get_standardized_encoding(const String& encoding); bool is_standardized_encoding(const String& encoding); } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp index 635e07ea6d..e0c5efc4b2 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp @@ -104,7 +104,9 @@ HTMLDocumentParser::HTMLDocumentParser(DOM::Document& document, const StringView , m_document(document) { m_document->set_should_invalidate_styles_on_attribute_changes(false); - m_document->set_encoding(TextCodec::get_standardized_encoding(encoding)); + auto standardized_encoding = TextCodec::get_standardized_encoding(encoding); + VERIFY(standardized_encoding.has_value()); + m_document->set_encoding(standardized_encoding.value()); } HTMLDocumentParser::~HTMLDocumentParser()