diff --git a/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp index 10e3c6e3e6..647dd54020 100644 --- a/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp @@ -13,6 +13,6 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { auto decoder = TextCodec::decoder_for("windows-1251"sv); VERIFY(decoder.has_value()); - decoder->to_utf8({ data, size }); + (void)decoder->to_utf8({ data, size }); return 0; } diff --git a/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp index 866e3b1e61..cfe0873317 100644 --- a/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp @@ -13,6 +13,6 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { auto decoder = TextCodec::decoder_for("windows-1255"sv); VERIFY(decoder.has_value()); - decoder->to_utf8({ data, size }); + (void)decoder->to_utf8({ data, size }); return 0; } diff --git a/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp b/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp index 3a3f03cd90..8a2b667995 100644 --- a/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp @@ -13,6 +13,6 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { auto decoder = TextCodec::decoder_for("windows-1252"sv); VERIFY(decoder.has_value()); - decoder->to_utf8({ data, size }); + (void)decoder->to_utf8({ data, size }); return 0; } diff --git a/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp b/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp index 1d83a83920..3fb16d631e 100644 --- a/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp @@ -13,6 +13,6 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { auto decoder = TextCodec::decoder_for("iso-8859-2"sv); VERIFY(decoder.has_value()); - decoder->to_utf8({ data, size }); + (void)decoder->to_utf8({ data, size }); return 0; } diff --git a/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp index 26b2205549..a24fb5e8e3 100644 --- a/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp +++ b/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp @@ -13,6 +13,6 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) { auto decoder = TextCodec::decoder_for("utf-16be"sv); VERIFY(decoder.has_value()); - decoder->to_utf8({ data, size }); + (void)decoder->to_utf8({ data, size }); return 0; } diff --git a/Tests/LibTextCodec/TestTextDecoders.cpp b/Tests/LibTextCodec/TestTextDecoders.cpp index fe873bf7ff..d1658d337c 100644 --- a/Tests/LibTextCodec/TestTextDecoders.cpp +++ b/Tests/LibTextCodec/TestTextDecoders.cpp @@ -4,6 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include #include @@ -15,13 +16,13 @@ TEST_CASE(test_utf8_decode) auto test_string = "\xf0\x9f\x98\x80"sv; Vector processed_code_points; - decoder.process(test_string, [&](u32 code_point) { - processed_code_points.append(code_point); - }); + MUST(decoder.process(test_string, [&](u32 code_point) { + return processed_code_points.try_append(code_point); + })); EXPECT(processed_code_points.size() == 1); EXPECT(processed_code_points[0] == 0x1F600); - EXPECT(decoder.to_utf8(test_string) == test_string); + EXPECT(MUST(decoder.to_utf8(test_string)) == test_string); } TEST_CASE(test_utf16be_decode) @@ -31,9 +32,9 @@ TEST_CASE(test_utf16be_decode) auto test_string = "\x00s\x00\xe4\x00k\xd8=\xde\x00"sv; Vector processed_code_points; - decoder.process(test_string, [&](u32 code_point) { - processed_code_points.append(code_point); - }); + MUST(decoder.process(test_string, [&](u32 code_point) { + return processed_code_points.try_append(code_point); + })); EXPECT(processed_code_points.size() == 4); EXPECT(processed_code_points[0] == 0x73); EXPECT(processed_code_points[1] == 0xE4); @@ -48,9 +49,9 @@ TEST_CASE(test_utf16le_decode) auto test_string = "s\x00\xe4\x00k\x00=\xd8\x00\xde"sv; Vector processed_code_points; - decoder.process(test_string, [&](u32 code_point) { - processed_code_points.append(code_point); - }); + MUST(decoder.process(test_string, [&](u32 code_point) { + return processed_code_points.try_append(code_point); + })); EXPECT(processed_code_points.size() == 4); EXPECT(processed_code_points[0] == 0x73); EXPECT(processed_code_points[1] == 0xE4); diff --git a/Userland/Applications/HexEditor/HexEditorWidget.cpp b/Userland/Applications/HexEditor/HexEditorWidget.cpp index 5feae61de1..2869195bc4 100644 --- a/Userland/Applications/HexEditor/HexEditorWidget.cpp +++ b/Userland/Applications/HexEditor/HexEditorWidget.cpp @@ -392,7 +392,7 @@ void HexEditorWidget::update_inspector_values(size_t position) // FIXME: Parse as other values like Timestamp etc - DeprecatedString utf16_string = TextCodec::decoder_for("utf-16le"sv)->to_utf8(StringView(selected_bytes.span())); + DeprecatedString utf16_string = TextCodec::decoder_for("utf-16le"sv)->to_utf8(StringView(selected_bytes.span())).release_value_but_fixme_should_propagate_errors().to_deprecated_string(); value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16String, utf16_string); m_value_inspector->set_model(value_inspector_model); diff --git a/Userland/Libraries/LibGfx/Font/OpenType/Font.cpp b/Userland/Libraries/LibGfx/Font/OpenType/Font.cpp index 3a983ee213..78983fca08 100644 --- a/Userland/Libraries/LibGfx/Font/OpenType/Font.cpp +++ b/Userland/Libraries/LibGfx/Font/OpenType/Font.cpp @@ -322,7 +322,7 @@ DeprecatedString Name::string_for_id(NameId id) const if (platform_id == to_underlying(Platform::Windows)) { static auto& decoder = *TextCodec::decoder_for("utf-16be"sv); - return decoder.to_utf8(StringView { (char const*)m_slice.offset_pointer(storage_offset + offset), length }); + return decoder.to_utf8(StringView { (char const*)m_slice.offset_pointer(storage_offset + offset), length }).release_value_but_fixme_should_propagate_errors().to_deprecated_string(); } return DeprecatedString((char const*)m_slice.offset_pointer(storage_offset + offset), length); diff --git a/Userland/Libraries/LibGfx/ICC/TagTypes.cpp b/Userland/Libraries/LibGfx/ICC/TagTypes.cpp index c777e3166a..a494c1a16d 100644 --- a/Userland/Libraries/LibGfx/ICC/TagTypes.cpp +++ b/Userland/Libraries/LibGfx/ICC/TagTypes.cpp @@ -703,7 +703,7 @@ ErrorOr> MultiLocalizedUnicodeTagDat return Error::from_string_literal("ICC::Profile: multiLocalizedUnicodeType string offset out of bounds"); StringView utf_16be_data { bytes.data() + record.string_offset_in_bytes, record.string_length_in_bytes }; - records[i].text = TRY(String::from_deprecated_string(utf_16be_decoder.to_utf8(utf_16be_data))); + records[i].text = TRY(utf_16be_decoder.to_utf8(utf_16be_data)); } return try_make_ref_counted(offset, size, move(records)); @@ -955,7 +955,7 @@ ErrorOr> TextDescriptionTagData::from_byte return Error::from_string_literal("ICC::Profile: textDescriptionType Unicode description not \\0-terminated"); StringView utf_16be_data { unicode_description_data, byte_size_without_nul }; - unicode_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("utf-16be"sv)->to_utf8(utf_16be_data))); + unicode_description = TRY(TextCodec::decoder_for("utf-16be"sv)->to_utf8(utf_16be_data)); } // ScriptCode @@ -1001,7 +1001,7 @@ ErrorOr> TextDescriptionTagData::from_byte if (macintosh_description_data[macintosh_description_length - 1] != '\0') return Error::from_string_literal("ICC::Profile: textDescriptionType ScriptCode not \\0-terminated"); - macintosh_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("x-mac-roman"sv)->to_utf8({ macintosh_description_data, (size_t)macintosh_description_length - 1 }))); + macintosh_description = TRY(TextCodec::decoder_for("x-mac-roman"sv)->to_utf8({ macintosh_description_data, (size_t)macintosh_description_length - 1 })); } else { dbgln("TODO: ICCProfile textDescriptionType ScriptCode {}, length {}", scriptcode_code, macintosh_description_length); } diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index c6ade6bac6..4f90d06471 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -267,7 +267,7 @@ NonnullRefPtr Parser::parse_string() if (unencrypted_string.bytes().starts_with(Array { 0xfe, 0xff })) { // The string is encoded in UTF16-BE - string_object->set_string(TextCodec::decoder_for("utf-16be"sv)->to_utf8(unencrypted_string)); + string_object->set_string(TextCodec::decoder_for("utf-16be"sv)->to_utf8(unencrypted_string).release_value_but_fixme_should_propagate_errors().to_deprecated_string()); } else if (unencrypted_string.bytes().starts_with(Array { 239, 187, 191 })) { // The string is encoded in UTF-8. This is the default anyways, but if these bytes // are explicitly included, we have to trim them diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index 14b4acd155..79c3d8f760 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -6,7 +6,6 @@ * SPDX-License-Identifier: BSD-2-Clause */ -#include #include #include #include @@ -197,7 +196,7 @@ Optional bom_sniff_to_decoder(StringView input) } // https://encoding.spec.whatwg.org/#decode -DeprecatedString convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder& fallback_decoder, StringView input) +ErrorOr convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder& fallback_decoder, StringView input) { Decoder* actual_decoder = &fallback_decoder; @@ -220,21 +219,22 @@ DeprecatedString convert_input_to_utf8_using_given_decoder_unless_there_is_a_byt return actual_decoder->to_utf8(input); } -DeprecatedString Decoder::to_utf8(StringView input) +ErrorOr Decoder::to_utf8(StringView input) { StringBuilder builder(input.length()); - process(input, [&builder](u32 c) { builder.append_code_point(c); }); - return builder.to_deprecated_string(); + TRY(process(input, [&builder](u32 c) { return builder.try_append_code_point(c); })); + return builder.to_string(); } -void UTF8Decoder::process(StringView input, Function on_code_point) +ErrorOr UTF8Decoder::process(StringView input, Function(u32)> on_code_point) { for (auto c : Utf8View(input)) { - on_code_point(c); + TRY(on_code_point(c)); } + return {}; } -DeprecatedString UTF8Decoder::to_utf8(StringView input) +ErrorOr UTF8Decoder::to_utf8(StringView input) { // Discard the BOM auto bomless_input = input; @@ -242,10 +242,10 @@ DeprecatedString UTF8Decoder::to_utf8(StringView input) bomless_input = input.substring_view(3); } - return bomless_input; + return String::from_utf8(bomless_input); } -void UTF16BEDecoder::process(StringView input, Function on_code_point) +ErrorOr UTF16BEDecoder::process(StringView input, Function(u32)> on_code_point) { // rfc2781, 2.2 Decoding UTF-16 size_t utf16_length = input.length() - (input.length() % 2); @@ -254,7 +254,7 @@ void UTF16BEDecoder::process(StringView input, Function on_code_point // of W1. Terminate. u16 w1 = (static_cast(input[i]) << 8) | static_cast(input[i + 1]); if (!is_unicode_surrogate(w1)) { - on_code_point(w1); + TRY(on_code_point(w1)); continue; } @@ -265,13 +265,13 @@ void UTF16BEDecoder::process(StringView input, Function on_code_point // is not between 0xDC00 and 0xDFFF, the sequence is in error. // Terminate. if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length) { - on_code_point(replacement_code_point); + TRY(on_code_point(replacement_code_point)); continue; } u16 w2 = (static_cast(input[i + 2]) << 8) | static_cast(input[i + 3]); if (!Utf16View::is_low_surrogate(w2)) { - on_code_point(replacement_code_point); + TRY(on_code_point(replacement_code_point)); continue; } @@ -279,12 +279,14 @@ void UTF16BEDecoder::process(StringView input, Function on_code_point // bits of W1 as its 10 high-order bits and the 10 low-order bits of // W2 as its 10 low-order bits. // 5) Add 0x10000 to U' to obtain the character value U. Terminate. - on_code_point(Utf16View::decode_surrogate_pair(w1, w2)); + TRY(on_code_point(Utf16View::decode_surrogate_pair(w1, w2))); i += 2; } + + return {}; } -DeprecatedString UTF16BEDecoder::to_utf8(StringView input) +ErrorOr UTF16BEDecoder::to_utf8(StringView input) { // Discard the BOM auto bomless_input = input; @@ -292,11 +294,11 @@ DeprecatedString UTF16BEDecoder::to_utf8(StringView input) bomless_input = input.substring_view(2); StringBuilder builder(bomless_input.length() / 2); - process(bomless_input, [&builder](u32 c) { builder.append_code_point(c); }); - return builder.to_deprecated_string(); + TRY(process(bomless_input, [&builder](u32 c) { return builder.try_append_code_point(c); })); + return builder.to_string(); } -void UTF16LEDecoder::process(StringView input, Function on_code_point) +ErrorOr UTF16LEDecoder::process(StringView input, Function(u32)> on_code_point) { // rfc2781, 2.2 Decoding UTF-16 size_t utf16_length = input.length() - (input.length() % 2); @@ -305,7 +307,7 @@ void UTF16LEDecoder::process(StringView input, Function on_code_point // of W1. Terminate. u16 w1 = static_cast(input[i]) | (static_cast(input[i + 1]) << 8); if (!is_unicode_surrogate(w1)) { - on_code_point(w1); + TRY(on_code_point(w1)); continue; } @@ -316,13 +318,13 @@ void UTF16LEDecoder::process(StringView input, Function on_code_point // is not between 0xDC00 and 0xDFFF, the sequence is in error. // Terminate. if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length) { - on_code_point(replacement_code_point); + TRY(on_code_point(replacement_code_point)); continue; } u16 w2 = static_cast(input[i + 2]) | (static_cast(input[i + 3]) << 8); if (!Utf16View::is_low_surrogate(w2)) { - on_code_point(replacement_code_point); + TRY(on_code_point(replacement_code_point)); continue; } @@ -330,12 +332,14 @@ void UTF16LEDecoder::process(StringView input, Function on_code_point // bits of W1 as its 10 high-order bits and the 10 low-order bits of // W2 as its 10 low-order bits. // 5) Add 0x10000 to U' to obtain the character value U. Terminate. - on_code_point(Utf16View::decode_surrogate_pair(w1, w2)); + TRY(on_code_point(Utf16View::decode_surrogate_pair(w1, w2))); i += 2; } + + return {}; } -DeprecatedString UTF16LEDecoder::to_utf8(StringView input) +ErrorOr UTF16LEDecoder::to_utf8(StringView input) { // Discard the BOM auto bomless_input = input; @@ -343,16 +347,18 @@ DeprecatedString UTF16LEDecoder::to_utf8(StringView input) bomless_input = input.substring_view(2); StringBuilder builder(bomless_input.length() / 2); - process(bomless_input, [&builder](u32 c) { builder.append_code_point(c); }); - return builder.to_deprecated_string(); + TRY(process(bomless_input, [&builder](u32 c) { return builder.try_append_code_point(c); })); + return builder.to_string(); } -void Latin1Decoder::process(StringView input, Function on_code_point) +ErrorOr Latin1Decoder::process(StringView input, Function(u32)> on_code_point) { for (auto ch : input) { // Latin1 is the same as the first 256 Unicode code_points, so no mapping is needed, just utf-8 encoding. - on_code_point(ch); + TRY(on_code_point(ch)); } + + return {}; } namespace { @@ -434,14 +440,16 @@ u32 convert_latin2_to_utf8(u8 in) } } -void Latin2Decoder::process(StringView input, Function on_code_point) +ErrorOr Latin2Decoder::process(StringView input, Function(u32)> on_code_point) { for (auto c : input) { - on_code_point(convert_latin2_to_utf8(c)); + TRY(on_code_point(convert_latin2_to_utf8(c))); } + + return {}; } -void HebrewDecoder::process(StringView input, Function on_code_point) +ErrorOr HebrewDecoder::process(StringView input, Function(u32)> on_code_point) { static constexpr Array translation_table = { 0x20AC, 0xFFFD, 0x201A, 0x192, 0x201E, 0x2026, 0x2020, 0x2021, 0x2C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, @@ -455,14 +463,16 @@ void HebrewDecoder::process(StringView input, Function on_code_point) }; for (unsigned char ch : input) { if (ch < 0x80) { // Superset of ASCII - on_code_point(ch); + TRY(on_code_point(ch)); } else { - on_code_point(translation_table[ch - 0x80]); + TRY(on_code_point(translation_table[ch - 0x80])); } } + + return {}; } -void CyrillicDecoder::process(StringView input, Function on_code_point) +ErrorOr CyrillicDecoder::process(StringView input, Function(u32)> on_code_point) { static constexpr Array translation_table = { 0x402, 0x403, 0x201A, 0x453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x409, 0x2039, 0x40A, 0x40C, 0x40B, 0x40F, @@ -476,14 +486,16 @@ void CyrillicDecoder::process(StringView input, Function on_code_poin }; for (unsigned char ch : input) { if (ch < 0x80) { // Superset of ASCII - on_code_point(ch); + TRY(on_code_point(ch)); } else { - on_code_point(translation_table[ch - 0x80]); + TRY(on_code_point(translation_table[ch - 0x80])); } } + + return {}; } -void Koi8RDecoder::process(StringView input, Function on_code_point) +ErrorOr Koi8RDecoder::process(StringView input, Function(u32)> on_code_point) { // clang-format off static constexpr Array translation_table = { @@ -500,14 +512,16 @@ void Koi8RDecoder::process(StringView input, Function on_code_point) for (unsigned char ch : input) { if (ch < 0x80) { // Superset of ASCII - on_code_point(ch); + TRY(on_code_point(ch)); } else { - on_code_point(translation_table[ch - 0x80]); + TRY(on_code_point(translation_table[ch - 0x80])); } } + + return {}; } -void Latin9Decoder::process(StringView input, Function on_code_point) +ErrorOr Latin9Decoder::process(StringView input, Function(u32)> on_code_point) { auto convert_latin9_to_utf8 = [](u8 ch) -> u32 { // Latin9 is the same as the first 256 Unicode code points, except for 8 characters. @@ -534,11 +548,13 @@ void Latin9Decoder::process(StringView input, Function on_code_point) }; for (auto ch : input) { - on_code_point(convert_latin9_to_utf8(ch)); + TRY(on_code_point(convert_latin9_to_utf8(ch))); } + + return {}; } -void MacRomanDecoder::process(StringView input, Function on_code_point) +ErrorOr MacRomanDecoder::process(StringView input, Function(u32)> on_code_point) { // https://encoding.spec.whatwg.org/index-macintosh.txt // clang-format off @@ -556,14 +572,16 @@ void MacRomanDecoder::process(StringView input, Function on_code_poin for (u8 ch : input) { if (ch < 0x80) { // Superset of ASCII - on_code_point(ch); + TRY(on_code_point(ch)); } else { - on_code_point(translation_table[ch - 0x80]); + TRY(on_code_point(translation_table[ch - 0x80])); } } + + return {}; } -void TurkishDecoder::process(StringView input, Function on_code_point) +ErrorOr TurkishDecoder::process(StringView input, Function(u32)> on_code_point) { auto convert_turkish_to_utf8 = [](u8 ch) -> u32 { // Turkish (aka ISO-8859-9, Windows-1254) is the same as the first 256 Unicode code points, except for 6 characters. @@ -586,12 +604,14 @@ void TurkishDecoder::process(StringView input, Function on_code_point }; for (auto ch : input) { - on_code_point(convert_turkish_to_utf8(ch)); + TRY(on_code_point(convert_turkish_to_utf8(ch))); } + + return {}; } // https://encoding.spec.whatwg.org/#x-user-defined-decoder -void XUserDefinedDecoder::process(StringView input, Function on_code_point) +ErrorOr XUserDefinedDecoder::process(StringView input, Function(u32)> on_code_point) { auto convert_x_user_defined_to_utf8 = [](u8 ch) -> u32 { // 2. If byte is an ASCII byte, return a code point whose value is byte. @@ -606,10 +626,12 @@ void XUserDefinedDecoder::process(StringView input, Function on_code_ }; for (auto ch : input) { - on_code_point(convert_x_user_defined_to_utf8(ch)); + TRY(on_code_point(convert_x_user_defined_to_utf8(ch))); } // 1. If byte is end-of-queue, return finished. + + return {}; } } diff --git a/Userland/Libraries/LibTextCodec/Decoder.h b/Userland/Libraries/LibTextCodec/Decoder.h index 6ab41556b5..c678677bf5 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.h +++ b/Userland/Libraries/LibTextCodec/Decoder.h @@ -11,13 +11,14 @@ #include #include #include +#include namespace TextCodec { class Decoder { public: - virtual void process(StringView, Function on_code_point) = 0; - virtual DeprecatedString to_utf8(StringView); + virtual ErrorOr process(StringView, Function(u32)> on_code_point) = 0; + virtual ErrorOr to_utf8(StringView); protected: virtual ~Decoder() = default; @@ -25,65 +26,65 @@ protected: class UTF8Decoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; - virtual DeprecatedString to_utf8(StringView) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; + virtual ErrorOr to_utf8(StringView) override; }; class UTF16BEDecoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; - virtual DeprecatedString to_utf8(StringView) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; + virtual ErrorOr to_utf8(StringView) override; }; class UTF16LEDecoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; - virtual DeprecatedString to_utf8(StringView) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; + virtual ErrorOr to_utf8(StringView) override; }; class Latin1Decoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; }; class Latin2Decoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; }; class HebrewDecoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; }; class CyrillicDecoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; }; class Koi8RDecoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; }; class Latin9Decoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; }; class MacRomanDecoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; }; class TurkishDecoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; }; class XUserDefinedDecoder final : public Decoder { public: - virtual void process(StringView, Function on_code_point) override; + virtual ErrorOr process(StringView, Function(u32)> on_code_point) override; }; Optional decoder_for(StringView encoding); @@ -94,6 +95,6 @@ Optional bom_sniff_to_decoder(StringView); // NOTE: This has an obnoxious name to discourage usage. Only use this if you absolutely must! For example, XHR in LibWeb uses this. // This will use the given decoder unless there is a byte order mark in the input, in which we will instead use the appropriate Unicode decoder. -DeprecatedString convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder&, StringView); +ErrorOr convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder&, StringView); } diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index 8cb69ab0c7..f808bbaea7 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -206,37 +206,39 @@ Tokenizer::Tokenizer(StringView input, StringView encoding) bool last_was_carriage_return = false; // To filter code points from a stream of (unfiltered) code points input: - decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) { - // Replace any U+000D CARRIAGE RETURN (CR) code points, - // U+000C FORM FEED (FF) code points, - // or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) - // in input by a single U+000A LINE FEED (LF) code point. - if (code_point == '\r') { - if (last_was_carriage_return) { - builder.append('\n'); - } else { - last_was_carriage_return = true; - } - } else { - if (last_was_carriage_return) - builder.append('\n'); + decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) -> ErrorOr { + // Replace any U+000D CARRIAGE RETURN (CR) code points, + // U+000C FORM FEED (FF) code points, + // or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) + // in input by a single U+000A LINE FEED (LF) code point. + if (code_point == '\r') { + if (last_was_carriage_return) { + TRY(builder.try_append('\n')); + } else { + last_was_carriage_return = true; + } + } else { + if (last_was_carriage_return) + TRY(builder.try_append('\n')); - if (code_point == '\n') { - if (!last_was_carriage_return) - builder.append('\n'); + if (code_point == '\n') { + if (!last_was_carriage_return) + TRY(builder.try_append('\n')); - } else if (code_point == '\f') { - builder.append('\n'); - // Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). - } else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) { - builder.append_code_point(REPLACEMENT_CHARACTER); - } else { - builder.append_code_point(code_point); - } + } else if (code_point == '\f') { + TRY(builder.try_append('\n')); + // Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). + } else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) { + TRY(builder.try_append_code_point(REPLACEMENT_CHARACTER)); + } else { + TRY(builder.try_append_code_point(code_point)); + } - last_was_carriage_return = false; - } - }); + last_was_carriage_return = false; + } + return {}; + }) + .release_value_but_fixme_should_propagate_errors(); return builder.to_string(); }; diff --git a/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp b/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp index af2cce78aa..1420352dbc 100644 --- a/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp +++ b/Userland/Libraries/LibWeb/Encoding/TextDecoder.cpp @@ -50,7 +50,7 @@ WebIDL::ExceptionOr TextDecoder::decode(JS::Handle if (data_buffer_or_error.is_error()) return WebIDL::OperationError::create(realm(), "Failed to copy bytes from ArrayBuffer"); auto& data_buffer = data_buffer_or_error.value(); - return m_decoder.to_utf8({ data_buffer.data(), data_buffer.size() }); + return TRY_OR_THROW_OOM(vm(), m_decoder.to_utf8({ data_buffer.data(), data_buffer.size() })); } } diff --git a/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp b/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp index b3904a5d84..8b9943944c 100644 --- a/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp +++ b/Userland/Libraries/LibWeb/HTML/HTMLScriptElement.cpp @@ -515,7 +515,7 @@ void HTMLScriptElement::resource_did_load() // we have to re-encode it to UTF-8. if (resource()->has_encoding()) { if (auto codec = TextCodec::decoder_for(resource()->encoding().value()); codec.has_value()) { - data = codec->to_utf8(data).to_byte_buffer(); + data = codec->to_utf8(data).release_value_but_fixme_should_propagate_errors().to_deprecated_string().to_byte_buffer(); } } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 3dfd4e3274..f661ee6f80 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -2800,7 +2800,7 @@ HTMLTokenizer::HTMLTokenizer(StringView input, DeprecatedString const& encoding) { auto decoder = TextCodec::decoder_for(encoding); VERIFY(decoder.has_value()); - m_decoded_input = decoder->to_utf8(input); + m_decoded_input = decoder->to_utf8(input).release_value_but_fixme_should_propagate_errors().to_deprecated_string(); m_utf8_view = Utf8View(m_decoded_input); m_utf8_iterator = m_utf8_view.begin(); m_prev_utf8_iterator = m_utf8_view.begin(); diff --git a/Userland/Libraries/LibWeb/HTML/Window.cpp b/Userland/Libraries/LibWeb/HTML/Window.cpp index 1edac12573..c03b9e7425 100644 --- a/Userland/Libraries/LibWeb/HTML/Window.cpp +++ b/Userland/Libraries/LibWeb/HTML/Window.cpp @@ -1421,9 +1421,9 @@ JS_DEFINE_NATIVE_FUNCTION(Window::atob) // NOTE: Any 8-bit encoding -> utf-8 decoder will work for this auto text_decoder = TextCodec::decoder_for("windows-1252"sv); VERIFY(text_decoder.has_value()); - auto text = text_decoder->to_utf8(decoded.release_value()); + auto text = TRY_OR_THROW_OOM(vm, text_decoder->to_utf8(decoded.release_value())); - return JS::PrimitiveString::create(vm, DeprecatedString(text)); + return JS::PrimitiveString::create(vm, text); } JS_DEFINE_NATIVE_FUNCTION(Window::btoa) diff --git a/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp b/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp index c31c8982b1..e600860285 100644 --- a/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp +++ b/Userland/Libraries/LibWeb/HTML/WorkerGlobalScope.cpp @@ -163,7 +163,7 @@ WebIDL::ExceptionOr WorkerGlobalScope::atob(DeprecatedString c // decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8. auto decoder = TextCodec::decoder_for("windows-1252"sv); VERIFY(decoder.has_value()); - return decoder->to_utf8(decoded_data.value()); + return TRY_OR_THROW_OOM(vm(), decoder->to_utf8(decoded_data.value())); } } diff --git a/Userland/Libraries/LibWeb/Infra/JSON.cpp b/Userland/Libraries/LibWeb/Infra/JSON.cpp index 5bc079c626..05d73c5dfc 100644 --- a/Userland/Libraries/LibWeb/Infra/JSON.cpp +++ b/Userland/Libraries/LibWeb/Infra/JSON.cpp @@ -27,7 +27,7 @@ WebIDL::ExceptionOr parse_json_bytes_to_javascript_value(JS::VM& vm, { // 1. Let string be the result of running UTF-8 decode on bytes. TextCodec::UTF8Decoder decoder; - auto string = decoder.to_utf8(bytes); + auto string = TRY_OR_THROW_OOM(vm, decoder.to_utf8(bytes)); // 2. Return the result of parsing a JSON string to an Infra value given string. return parse_json_string_to_javascript_value(vm, string); diff --git a/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp b/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp index 1f8529eb91..e12d38c38e 100644 --- a/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp +++ b/Userland/Libraries/LibWeb/XHR/XMLHttpRequest.cpp @@ -224,7 +224,7 @@ DeprecatedString XMLHttpRequest::get_text_response() const // If we don't support the decoder yet, let's crash instead of attempting to return something, as the result would be incorrect and create obscure bugs. VERIFY(decoder.has_value()); - return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, m_received_bytes); + return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, m_received_bytes).release_value_but_fixme_should_propagate_errors().to_deprecated_string(); } // https://xhr.spec.whatwg.org/#final-mime-type diff --git a/Userland/Utilities/js.cpp b/Userland/Utilities/js.cpp index 855f3b1d27..162741661f 100644 --- a/Userland/Utilities/js.cpp +++ b/Userland/Utilities/js.cpp @@ -885,7 +885,7 @@ ErrorOr serenity_main(Main::Arguments arguments) auto decoder = TextCodec::decoder_for("windows-1252"sv); VERIFY(decoder.has_value()); - auto utf8_source = TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, source); + auto utf8_source = TRY(TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, source)); builder.append(utf8_source); } }