diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index e69d7abef7..14b4acd155 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -157,7 +157,7 @@ Optional get_standardized_encoding(StringView encoding) } // https://encoding.spec.whatwg.org/#bom-sniff -Decoder* bom_sniff_to_decoder(StringView input) +Optional bom_sniff_to_decoder(StringView input) { // 1. Let BOM be the result of peeking 3 bytes from ioQueue, converted to a byte sequence. // 2. For each of the rows in the table below, starting with the first one and going down, @@ -172,22 +172,28 @@ Decoder* bom_sniff_to_decoder(StringView input) auto bytes = input.bytes(); if (bytes.size() < 2) - return nullptr; + return {}; auto first_byte = bytes[0]; switch (first_byte) { case 0xEF: // UTF-8 if (bytes.size() < 3) - return nullptr; - return bytes[1] == 0xBB && bytes[2] == 0xBF ? &s_utf8_decoder : nullptr; + return {}; + if (bytes[1] == 0xBB && bytes[2] == 0xBF) + return s_utf8_decoder; + return {}; case 0xFE: // UTF-16BE - return bytes[1] == 0xFF ? &s_utf16be_decoder : nullptr; + if (bytes[1] == 0xFF) + return s_utf16be_decoder; + return {}; case 0xFF: // UTF-16LE - return bytes[1] == 0xFE ? &s_utf16le_decoder : nullptr; + if (bytes[1] == 0xFE) + return s_utf16le_decoder; + return {}; } - return nullptr; + return {}; } // https://encoding.spec.whatwg.org/#decode @@ -197,13 +203,13 @@ DeprecatedString convert_input_to_utf8_using_given_decoder_unless_there_is_a_byt // 1. Let BOMEncoding be the result of BOM sniffing ioQueue. // 2. If BOMEncoding is non-null: - if (auto* unicode_decoder = bom_sniff_to_decoder(input); unicode_decoder) { + if (auto unicode_decoder = bom_sniff_to_decoder(input); unicode_decoder.has_value()) { // 1. Set encoding to BOMEncoding. - actual_decoder = unicode_decoder; + actual_decoder = &unicode_decoder.value(); // 2. Read three bytes from ioQueue, if BOMEncoding is UTF-8; otherwise read two bytes. (Do nothing with those bytes.) // FIXME: I imagine this will be pretty slow for large inputs, as it's regenerating the input without the first 2/3 bytes. - input = input.substring_view(unicode_decoder == &s_utf8_decoder ? 3 : 2); + input = input.substring_view(&unicode_decoder.value() == &s_utf8_decoder ? 3 : 2); } VERIFY(actual_decoder); diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h index f8e8205544..6ab41556b5 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.h +++ b/Userland/Libraries/LibTextCodec/Decoder.h @@ -89,8 +89,8 @@ public: Optional decoder_for(StringView encoding); Optional get_standardized_encoding(StringView encoding); -// This returns the appropriate Unicode decoder for the sniffed BOM or nullptr if there is no appropriate decoder. -Decoder* bom_sniff_to_decoder(StringView); +// This returns the appropriate Unicode decoder for the sniffed BOM or nothing if there is no appropriate decoder. +Optional bom_sniff_to_decoder(StringView); // NOTE: This has an obnoxious name to discourage usage. Only use this if you absolutely must! For example, XHR in LibWeb uses this. // This will use the given decoder unless there is a byte order mark in the input, in which we will instead use the appropriate Unicode decoder.