From 00fa23237ad38b148c981d60c16db7851999bc1e Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 11 May 2023 11:22:41 -0400 Subject: [PATCH] LibTextCodec: Change UTF-8's decoder to replace invalid code points The UTF-8 decoder will currently crash if it is provided invalid UTF-8 input. Instead, change its behavior to match that of all other decoders to replace invalid code points with U+FFFD. This is required by the web. --- Userland/Libraries/LibTextCodec/Decoder.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index fcb50a6768..5b40442a6a 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -213,10 +213,12 @@ ErrorOr convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte VERIFY(actual_decoder); - // FIXME: 3. Process a queue with an instance of encoding’s decoder, ioQueue, output, and "replacement". - // This isn't the exact same as the spec, especially the error mode of "replacement", which we don't have the concept of yet. + // 3. Process a queue with an instance of encoding’s decoder, ioQueue, output, and "replacement". + // FIXME: This isn't the exact same as the spec, which is written in terms of I/O queues. + auto output = TRY(actual_decoder->to_utf8(input)); + // 4. Return output. - return actual_decoder->to_utf8(input); + return output; } ErrorOr Decoder::to_utf8(StringView input) @@ -242,7 +244,7 @@ ErrorOr UTF8Decoder::to_utf8(StringView input) bomless_input = input.substring_view(3); } - return String::from_utf8(bomless_input); + return Decoder::to_utf8(bomless_input); } ErrorOr UTF16BEDecoder::process(StringView input, Function(u32)> on_code_point)