mirror of
https://github.com/RGBCube/serenity
synced 2025-05-14 08:24:58 +00:00
LibTextCodec+Everywhere: Port Decoders to new Strings
This commit is contained in:
parent
3c5090e172
commit
2db168acc1
21 changed files with 149 additions and 123 deletions
|
@ -13,6 +13,6 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||||
{
|
{
|
||||||
auto decoder = TextCodec::decoder_for("windows-1251"sv);
|
auto decoder = TextCodec::decoder_for("windows-1251"sv);
|
||||||
VERIFY(decoder.has_value());
|
VERIFY(decoder.has_value());
|
||||||
decoder->to_utf8({ data, size });
|
(void)decoder->to_utf8({ data, size });
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,6 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||||
{
|
{
|
||||||
auto decoder = TextCodec::decoder_for("windows-1255"sv);
|
auto decoder = TextCodec::decoder_for("windows-1255"sv);
|
||||||
VERIFY(decoder.has_value());
|
VERIFY(decoder.has_value());
|
||||||
decoder->to_utf8({ data, size });
|
(void)decoder->to_utf8({ data, size });
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,6 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||||
{
|
{
|
||||||
auto decoder = TextCodec::decoder_for("windows-1252"sv);
|
auto decoder = TextCodec::decoder_for("windows-1252"sv);
|
||||||
VERIFY(decoder.has_value());
|
VERIFY(decoder.has_value());
|
||||||
decoder->to_utf8({ data, size });
|
(void)decoder->to_utf8({ data, size });
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,6 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||||
{
|
{
|
||||||
auto decoder = TextCodec::decoder_for("iso-8859-2"sv);
|
auto decoder = TextCodec::decoder_for("iso-8859-2"sv);
|
||||||
VERIFY(decoder.has_value());
|
VERIFY(decoder.has_value());
|
||||||
decoder->to_utf8({ data, size });
|
(void)decoder->to_utf8({ data, size });
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,6 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||||
{
|
{
|
||||||
auto decoder = TextCodec::decoder_for("utf-16be"sv);
|
auto decoder = TextCodec::decoder_for("utf-16be"sv);
|
||||||
VERIFY(decoder.has_value());
|
VERIFY(decoder.has_value());
|
||||||
decoder->to_utf8({ data, size });
|
(void)decoder->to_utf8({ data, size });
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <AK/String.h>
|
||||||
#include <AK/Vector.h>
|
#include <AK/Vector.h>
|
||||||
#include <LibTest/TestCase.h>
|
#include <LibTest/TestCase.h>
|
||||||
#include <LibTextCodec/Decoder.h>
|
#include <LibTextCodec/Decoder.h>
|
||||||
|
@ -15,13 +16,13 @@ TEST_CASE(test_utf8_decode)
|
||||||
auto test_string = "\xf0\x9f\x98\x80"sv;
|
auto test_string = "\xf0\x9f\x98\x80"sv;
|
||||||
|
|
||||||
Vector<u32> processed_code_points;
|
Vector<u32> processed_code_points;
|
||||||
decoder.process(test_string, [&](u32 code_point) {
|
MUST(decoder.process(test_string, [&](u32 code_point) {
|
||||||
processed_code_points.append(code_point);
|
return processed_code_points.try_append(code_point);
|
||||||
});
|
}));
|
||||||
EXPECT(processed_code_points.size() == 1);
|
EXPECT(processed_code_points.size() == 1);
|
||||||
EXPECT(processed_code_points[0] == 0x1F600);
|
EXPECT(processed_code_points[0] == 0x1F600);
|
||||||
|
|
||||||
EXPECT(decoder.to_utf8(test_string) == test_string);
|
EXPECT(MUST(decoder.to_utf8(test_string)) == test_string);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE(test_utf16be_decode)
|
TEST_CASE(test_utf16be_decode)
|
||||||
|
@ -31,9 +32,9 @@ TEST_CASE(test_utf16be_decode)
|
||||||
auto test_string = "\x00s\x00\xe4\x00k\xd8=\xde\x00"sv;
|
auto test_string = "\x00s\x00\xe4\x00k\xd8=\xde\x00"sv;
|
||||||
|
|
||||||
Vector<u32> processed_code_points;
|
Vector<u32> processed_code_points;
|
||||||
decoder.process(test_string, [&](u32 code_point) {
|
MUST(decoder.process(test_string, [&](u32 code_point) {
|
||||||
processed_code_points.append(code_point);
|
return processed_code_points.try_append(code_point);
|
||||||
});
|
}));
|
||||||
EXPECT(processed_code_points.size() == 4);
|
EXPECT(processed_code_points.size() == 4);
|
||||||
EXPECT(processed_code_points[0] == 0x73);
|
EXPECT(processed_code_points[0] == 0x73);
|
||||||
EXPECT(processed_code_points[1] == 0xE4);
|
EXPECT(processed_code_points[1] == 0xE4);
|
||||||
|
@ -48,9 +49,9 @@ TEST_CASE(test_utf16le_decode)
|
||||||
auto test_string = "s\x00\xe4\x00k\x00=\xd8\x00\xde"sv;
|
auto test_string = "s\x00\xe4\x00k\x00=\xd8\x00\xde"sv;
|
||||||
|
|
||||||
Vector<u32> processed_code_points;
|
Vector<u32> processed_code_points;
|
||||||
decoder.process(test_string, [&](u32 code_point) {
|
MUST(decoder.process(test_string, [&](u32 code_point) {
|
||||||
processed_code_points.append(code_point);
|
return processed_code_points.try_append(code_point);
|
||||||
});
|
}));
|
||||||
EXPECT(processed_code_points.size() == 4);
|
EXPECT(processed_code_points.size() == 4);
|
||||||
EXPECT(processed_code_points[0] == 0x73);
|
EXPECT(processed_code_points[0] == 0x73);
|
||||||
EXPECT(processed_code_points[1] == 0xE4);
|
EXPECT(processed_code_points[1] == 0xE4);
|
||||||
|
|
|
@ -392,7 +392,7 @@ void HexEditorWidget::update_inspector_values(size_t position)
|
||||||
|
|
||||||
// FIXME: Parse as other values like Timestamp etc
|
// FIXME: Parse as other values like Timestamp etc
|
||||||
|
|
||||||
DeprecatedString utf16_string = TextCodec::decoder_for("utf-16le"sv)->to_utf8(StringView(selected_bytes.span()));
|
DeprecatedString utf16_string = TextCodec::decoder_for("utf-16le"sv)->to_utf8(StringView(selected_bytes.span())).release_value_but_fixme_should_propagate_errors().to_deprecated_string();
|
||||||
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16String, utf16_string);
|
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16String, utf16_string);
|
||||||
|
|
||||||
m_value_inspector->set_model(value_inspector_model);
|
m_value_inspector->set_model(value_inspector_model);
|
||||||
|
|
|
@ -322,7 +322,7 @@ DeprecatedString Name::string_for_id(NameId id) const
|
||||||
|
|
||||||
if (platform_id == to_underlying(Platform::Windows)) {
|
if (platform_id == to_underlying(Platform::Windows)) {
|
||||||
static auto& decoder = *TextCodec::decoder_for("utf-16be"sv);
|
static auto& decoder = *TextCodec::decoder_for("utf-16be"sv);
|
||||||
return decoder.to_utf8(StringView { (char const*)m_slice.offset_pointer(storage_offset + offset), length });
|
return decoder.to_utf8(StringView { (char const*)m_slice.offset_pointer(storage_offset + offset), length }).release_value_but_fixme_should_propagate_errors().to_deprecated_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
return DeprecatedString((char const*)m_slice.offset_pointer(storage_offset + offset), length);
|
return DeprecatedString((char const*)m_slice.offset_pointer(storage_offset + offset), length);
|
||||||
|
|
|
@ -703,7 +703,7 @@ ErrorOr<NonnullRefPtr<MultiLocalizedUnicodeTagData>> MultiLocalizedUnicodeTagDat
|
||||||
return Error::from_string_literal("ICC::Profile: multiLocalizedUnicodeType string offset out of bounds");
|
return Error::from_string_literal("ICC::Profile: multiLocalizedUnicodeType string offset out of bounds");
|
||||||
|
|
||||||
StringView utf_16be_data { bytes.data() + record.string_offset_in_bytes, record.string_length_in_bytes };
|
StringView utf_16be_data { bytes.data() + record.string_offset_in_bytes, record.string_length_in_bytes };
|
||||||
records[i].text = TRY(String::from_deprecated_string(utf_16be_decoder.to_utf8(utf_16be_data)));
|
records[i].text = TRY(utf_16be_decoder.to_utf8(utf_16be_data));
|
||||||
}
|
}
|
||||||
|
|
||||||
return try_make_ref_counted<MultiLocalizedUnicodeTagData>(offset, size, move(records));
|
return try_make_ref_counted<MultiLocalizedUnicodeTagData>(offset, size, move(records));
|
||||||
|
@ -955,7 +955,7 @@ ErrorOr<NonnullRefPtr<TextDescriptionTagData>> TextDescriptionTagData::from_byte
|
||||||
return Error::from_string_literal("ICC::Profile: textDescriptionType Unicode description not \\0-terminated");
|
return Error::from_string_literal("ICC::Profile: textDescriptionType Unicode description not \\0-terminated");
|
||||||
|
|
||||||
StringView utf_16be_data { unicode_description_data, byte_size_without_nul };
|
StringView utf_16be_data { unicode_description_data, byte_size_without_nul };
|
||||||
unicode_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("utf-16be"sv)->to_utf8(utf_16be_data)));
|
unicode_description = TRY(TextCodec::decoder_for("utf-16be"sv)->to_utf8(utf_16be_data));
|
||||||
}
|
}
|
||||||
|
|
||||||
// ScriptCode
|
// ScriptCode
|
||||||
|
@ -1001,7 +1001,7 @@ ErrorOr<NonnullRefPtr<TextDescriptionTagData>> TextDescriptionTagData::from_byte
|
||||||
if (macintosh_description_data[macintosh_description_length - 1] != '\0')
|
if (macintosh_description_data[macintosh_description_length - 1] != '\0')
|
||||||
return Error::from_string_literal("ICC::Profile: textDescriptionType ScriptCode not \\0-terminated");
|
return Error::from_string_literal("ICC::Profile: textDescriptionType ScriptCode not \\0-terminated");
|
||||||
|
|
||||||
macintosh_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("x-mac-roman"sv)->to_utf8({ macintosh_description_data, (size_t)macintosh_description_length - 1 })));
|
macintosh_description = TRY(TextCodec::decoder_for("x-mac-roman"sv)->to_utf8({ macintosh_description_data, (size_t)macintosh_description_length - 1 }));
|
||||||
} else {
|
} else {
|
||||||
dbgln("TODO: ICCProfile textDescriptionType ScriptCode {}, length {}", scriptcode_code, macintosh_description_length);
|
dbgln("TODO: ICCProfile textDescriptionType ScriptCode {}, length {}", scriptcode_code, macintosh_description_length);
|
||||||
}
|
}
|
||||||
|
|
|
@ -267,7 +267,7 @@ NonnullRefPtr<StringObject> Parser::parse_string()
|
||||||
|
|
||||||
if (unencrypted_string.bytes().starts_with(Array<u8, 2> { 0xfe, 0xff })) {
|
if (unencrypted_string.bytes().starts_with(Array<u8, 2> { 0xfe, 0xff })) {
|
||||||
// The string is encoded in UTF16-BE
|
// The string is encoded in UTF16-BE
|
||||||
string_object->set_string(TextCodec::decoder_for("utf-16be"sv)->to_utf8(unencrypted_string));
|
string_object->set_string(TextCodec::decoder_for("utf-16be"sv)->to_utf8(unencrypted_string).release_value_but_fixme_should_propagate_errors().to_deprecated_string());
|
||||||
} else if (unencrypted_string.bytes().starts_with(Array<u8, 3> { 239, 187, 191 })) {
|
} else if (unencrypted_string.bytes().starts_with(Array<u8, 3> { 239, 187, 191 })) {
|
||||||
// The string is encoded in UTF-8. This is the default anyways, but if these bytes
|
// The string is encoded in UTF-8. This is the default anyways, but if these bytes
|
||||||
// are explicitly included, we have to trim them
|
// are explicitly included, we have to trim them
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <AK/DeprecatedString.h>
|
|
||||||
#include <AK/StringBuilder.h>
|
#include <AK/StringBuilder.h>
|
||||||
#include <AK/Utf16View.h>
|
#include <AK/Utf16View.h>
|
||||||
#include <AK/Utf8View.h>
|
#include <AK/Utf8View.h>
|
||||||
|
@ -197,7 +196,7 @@ Optional<Decoder&> bom_sniff_to_decoder(StringView input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://encoding.spec.whatwg.org/#decode
|
// https://encoding.spec.whatwg.org/#decode
|
||||||
DeprecatedString convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder& fallback_decoder, StringView input)
|
ErrorOr<String> convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder& fallback_decoder, StringView input)
|
||||||
{
|
{
|
||||||
Decoder* actual_decoder = &fallback_decoder;
|
Decoder* actual_decoder = &fallback_decoder;
|
||||||
|
|
||||||
|
@ -220,21 +219,22 @@ DeprecatedString convert_input_to_utf8_using_given_decoder_unless_there_is_a_byt
|
||||||
return actual_decoder->to_utf8(input);
|
return actual_decoder->to_utf8(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
DeprecatedString Decoder::to_utf8(StringView input)
|
ErrorOr<String> Decoder::to_utf8(StringView input)
|
||||||
{
|
{
|
||||||
StringBuilder builder(input.length());
|
StringBuilder builder(input.length());
|
||||||
process(input, [&builder](u32 c) { builder.append_code_point(c); });
|
TRY(process(input, [&builder](u32 c) { return builder.try_append_code_point(c); }));
|
||||||
return builder.to_deprecated_string();
|
return builder.to_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
void UTF8Decoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> UTF8Decoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
for (auto c : Utf8View(input)) {
|
for (auto c : Utf8View(input)) {
|
||||||
on_code_point(c);
|
TRY(on_code_point(c));
|
||||||
}
|
}
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
DeprecatedString UTF8Decoder::to_utf8(StringView input)
|
ErrorOr<String> UTF8Decoder::to_utf8(StringView input)
|
||||||
{
|
{
|
||||||
// Discard the BOM
|
// Discard the BOM
|
||||||
auto bomless_input = input;
|
auto bomless_input = input;
|
||||||
|
@ -242,10 +242,10 @@ DeprecatedString UTF8Decoder::to_utf8(StringView input)
|
||||||
bomless_input = input.substring_view(3);
|
bomless_input = input.substring_view(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
return bomless_input;
|
return String::from_utf8(bomless_input);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UTF16BEDecoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> UTF16BEDecoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
// rfc2781, 2.2 Decoding UTF-16
|
// rfc2781, 2.2 Decoding UTF-16
|
||||||
size_t utf16_length = input.length() - (input.length() % 2);
|
size_t utf16_length = input.length() - (input.length() % 2);
|
||||||
|
@ -254,7 +254,7 @@ void UTF16BEDecoder::process(StringView input, Function<void(u32)> on_code_point
|
||||||
// of W1. Terminate.
|
// of W1. Terminate.
|
||||||
u16 w1 = (static_cast<u8>(input[i]) << 8) | static_cast<u8>(input[i + 1]);
|
u16 w1 = (static_cast<u8>(input[i]) << 8) | static_cast<u8>(input[i + 1]);
|
||||||
if (!is_unicode_surrogate(w1)) {
|
if (!is_unicode_surrogate(w1)) {
|
||||||
on_code_point(w1);
|
TRY(on_code_point(w1));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -265,13 +265,13 @@ void UTF16BEDecoder::process(StringView input, Function<void(u32)> on_code_point
|
||||||
// is not between 0xDC00 and 0xDFFF, the sequence is in error.
|
// is not between 0xDC00 and 0xDFFF, the sequence is in error.
|
||||||
// Terminate.
|
// Terminate.
|
||||||
if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length) {
|
if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length) {
|
||||||
on_code_point(replacement_code_point);
|
TRY(on_code_point(replacement_code_point));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
u16 w2 = (static_cast<u8>(input[i + 2]) << 8) | static_cast<u8>(input[i + 3]);
|
u16 w2 = (static_cast<u8>(input[i + 2]) << 8) | static_cast<u8>(input[i + 3]);
|
||||||
if (!Utf16View::is_low_surrogate(w2)) {
|
if (!Utf16View::is_low_surrogate(w2)) {
|
||||||
on_code_point(replacement_code_point);
|
TRY(on_code_point(replacement_code_point));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -279,12 +279,14 @@ void UTF16BEDecoder::process(StringView input, Function<void(u32)> on_code_point
|
||||||
// bits of W1 as its 10 high-order bits and the 10 low-order bits of
|
// bits of W1 as its 10 high-order bits and the 10 low-order bits of
|
||||||
// W2 as its 10 low-order bits.
|
// W2 as its 10 low-order bits.
|
||||||
// 5) Add 0x10000 to U' to obtain the character value U. Terminate.
|
// 5) Add 0x10000 to U' to obtain the character value U. Terminate.
|
||||||
on_code_point(Utf16View::decode_surrogate_pair(w1, w2));
|
TRY(on_code_point(Utf16View::decode_surrogate_pair(w1, w2)));
|
||||||
i += 2;
|
i += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
DeprecatedString UTF16BEDecoder::to_utf8(StringView input)
|
ErrorOr<String> UTF16BEDecoder::to_utf8(StringView input)
|
||||||
{
|
{
|
||||||
// Discard the BOM
|
// Discard the BOM
|
||||||
auto bomless_input = input;
|
auto bomless_input = input;
|
||||||
|
@ -292,11 +294,11 @@ DeprecatedString UTF16BEDecoder::to_utf8(StringView input)
|
||||||
bomless_input = input.substring_view(2);
|
bomless_input = input.substring_view(2);
|
||||||
|
|
||||||
StringBuilder builder(bomless_input.length() / 2);
|
StringBuilder builder(bomless_input.length() / 2);
|
||||||
process(bomless_input, [&builder](u32 c) { builder.append_code_point(c); });
|
TRY(process(bomless_input, [&builder](u32 c) { return builder.try_append_code_point(c); }));
|
||||||
return builder.to_deprecated_string();
|
return builder.to_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
void UTF16LEDecoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> UTF16LEDecoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
// rfc2781, 2.2 Decoding UTF-16
|
// rfc2781, 2.2 Decoding UTF-16
|
||||||
size_t utf16_length = input.length() - (input.length() % 2);
|
size_t utf16_length = input.length() - (input.length() % 2);
|
||||||
|
@ -305,7 +307,7 @@ void UTF16LEDecoder::process(StringView input, Function<void(u32)> on_code_point
|
||||||
// of W1. Terminate.
|
// of W1. Terminate.
|
||||||
u16 w1 = static_cast<u8>(input[i]) | (static_cast<u8>(input[i + 1]) << 8);
|
u16 w1 = static_cast<u8>(input[i]) | (static_cast<u8>(input[i + 1]) << 8);
|
||||||
if (!is_unicode_surrogate(w1)) {
|
if (!is_unicode_surrogate(w1)) {
|
||||||
on_code_point(w1);
|
TRY(on_code_point(w1));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -316,13 +318,13 @@ void UTF16LEDecoder::process(StringView input, Function<void(u32)> on_code_point
|
||||||
// is not between 0xDC00 and 0xDFFF, the sequence is in error.
|
// is not between 0xDC00 and 0xDFFF, the sequence is in error.
|
||||||
// Terminate.
|
// Terminate.
|
||||||
if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length) {
|
if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length) {
|
||||||
on_code_point(replacement_code_point);
|
TRY(on_code_point(replacement_code_point));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
u16 w2 = static_cast<u8>(input[i + 2]) | (static_cast<u8>(input[i + 3]) << 8);
|
u16 w2 = static_cast<u8>(input[i + 2]) | (static_cast<u8>(input[i + 3]) << 8);
|
||||||
if (!Utf16View::is_low_surrogate(w2)) {
|
if (!Utf16View::is_low_surrogate(w2)) {
|
||||||
on_code_point(replacement_code_point);
|
TRY(on_code_point(replacement_code_point));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -330,12 +332,14 @@ void UTF16LEDecoder::process(StringView input, Function<void(u32)> on_code_point
|
||||||
// bits of W1 as its 10 high-order bits and the 10 low-order bits of
|
// bits of W1 as its 10 high-order bits and the 10 low-order bits of
|
||||||
// W2 as its 10 low-order bits.
|
// W2 as its 10 low-order bits.
|
||||||
// 5) Add 0x10000 to U' to obtain the character value U. Terminate.
|
// 5) Add 0x10000 to U' to obtain the character value U. Terminate.
|
||||||
on_code_point(Utf16View::decode_surrogate_pair(w1, w2));
|
TRY(on_code_point(Utf16View::decode_surrogate_pair(w1, w2)));
|
||||||
i += 2;
|
i += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
DeprecatedString UTF16LEDecoder::to_utf8(StringView input)
|
ErrorOr<String> UTF16LEDecoder::to_utf8(StringView input)
|
||||||
{
|
{
|
||||||
// Discard the BOM
|
// Discard the BOM
|
||||||
auto bomless_input = input;
|
auto bomless_input = input;
|
||||||
|
@ -343,16 +347,18 @@ DeprecatedString UTF16LEDecoder::to_utf8(StringView input)
|
||||||
bomless_input = input.substring_view(2);
|
bomless_input = input.substring_view(2);
|
||||||
|
|
||||||
StringBuilder builder(bomless_input.length() / 2);
|
StringBuilder builder(bomless_input.length() / 2);
|
||||||
process(bomless_input, [&builder](u32 c) { builder.append_code_point(c); });
|
TRY(process(bomless_input, [&builder](u32 c) { return builder.try_append_code_point(c); }));
|
||||||
return builder.to_deprecated_string();
|
return builder.to_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Latin1Decoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> Latin1Decoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
for (auto ch : input) {
|
for (auto ch : input) {
|
||||||
// Latin1 is the same as the first 256 Unicode code_points, so no mapping is needed, just utf-8 encoding.
|
// Latin1 is the same as the first 256 Unicode code_points, so no mapping is needed, just utf-8 encoding.
|
||||||
on_code_point(ch);
|
TRY(on_code_point(ch));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -434,14 +440,16 @@ u32 convert_latin2_to_utf8(u8 in)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Latin2Decoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> Latin2Decoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
for (auto c : input) {
|
for (auto c : input) {
|
||||||
on_code_point(convert_latin2_to_utf8(c));
|
TRY(on_code_point(convert_latin2_to_utf8(c)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
void HebrewDecoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> HebrewDecoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
static constexpr Array<u32, 128> translation_table = {
|
static constexpr Array<u32, 128> translation_table = {
|
||||||
0x20AC, 0xFFFD, 0x201A, 0x192, 0x201E, 0x2026, 0x2020, 0x2021, 0x2C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
0x20AC, 0xFFFD, 0x201A, 0x192, 0x201E, 0x2026, 0x2020, 0x2021, 0x2C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||||||
|
@ -455,14 +463,16 @@ void HebrewDecoder::process(StringView input, Function<void(u32)> on_code_point)
|
||||||
};
|
};
|
||||||
for (unsigned char ch : input) {
|
for (unsigned char ch : input) {
|
||||||
if (ch < 0x80) { // Superset of ASCII
|
if (ch < 0x80) { // Superset of ASCII
|
||||||
on_code_point(ch);
|
TRY(on_code_point(ch));
|
||||||
} else {
|
} else {
|
||||||
on_code_point(translation_table[ch - 0x80]);
|
TRY(on_code_point(translation_table[ch - 0x80]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
void CyrillicDecoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> CyrillicDecoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
static constexpr Array<u32, 128> translation_table = {
|
static constexpr Array<u32, 128> translation_table = {
|
||||||
0x402, 0x403, 0x201A, 0x453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x409, 0x2039, 0x40A, 0x40C, 0x40B, 0x40F,
|
0x402, 0x403, 0x201A, 0x453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC, 0x2030, 0x409, 0x2039, 0x40A, 0x40C, 0x40B, 0x40F,
|
||||||
|
@ -476,14 +486,16 @@ void CyrillicDecoder::process(StringView input, Function<void(u32)> on_code_poin
|
||||||
};
|
};
|
||||||
for (unsigned char ch : input) {
|
for (unsigned char ch : input) {
|
||||||
if (ch < 0x80) { // Superset of ASCII
|
if (ch < 0x80) { // Superset of ASCII
|
||||||
on_code_point(ch);
|
TRY(on_code_point(ch));
|
||||||
} else {
|
} else {
|
||||||
on_code_point(translation_table[ch - 0x80]);
|
TRY(on_code_point(translation_table[ch - 0x80]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
void Koi8RDecoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> Koi8RDecoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
// clang-format off
|
// clang-format off
|
||||||
static constexpr Array<u32, 128> translation_table = {
|
static constexpr Array<u32, 128> translation_table = {
|
||||||
|
@ -500,14 +512,16 @@ void Koi8RDecoder::process(StringView input, Function<void(u32)> on_code_point)
|
||||||
|
|
||||||
for (unsigned char ch : input) {
|
for (unsigned char ch : input) {
|
||||||
if (ch < 0x80) { // Superset of ASCII
|
if (ch < 0x80) { // Superset of ASCII
|
||||||
on_code_point(ch);
|
TRY(on_code_point(ch));
|
||||||
} else {
|
} else {
|
||||||
on_code_point(translation_table[ch - 0x80]);
|
TRY(on_code_point(translation_table[ch - 0x80]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
void Latin9Decoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> Latin9Decoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
auto convert_latin9_to_utf8 = [](u8 ch) -> u32 {
|
auto convert_latin9_to_utf8 = [](u8 ch) -> u32 {
|
||||||
// Latin9 is the same as the first 256 Unicode code points, except for 8 characters.
|
// Latin9 is the same as the first 256 Unicode code points, except for 8 characters.
|
||||||
|
@ -534,11 +548,13 @@ void Latin9Decoder::process(StringView input, Function<void(u32)> on_code_point)
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto ch : input) {
|
for (auto ch : input) {
|
||||||
on_code_point(convert_latin9_to_utf8(ch));
|
TRY(on_code_point(convert_latin9_to_utf8(ch)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
void MacRomanDecoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> MacRomanDecoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
// https://encoding.spec.whatwg.org/index-macintosh.txt
|
// https://encoding.spec.whatwg.org/index-macintosh.txt
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
@ -556,14 +572,16 @@ void MacRomanDecoder::process(StringView input, Function<void(u32)> on_code_poin
|
||||||
|
|
||||||
for (u8 ch : input) {
|
for (u8 ch : input) {
|
||||||
if (ch < 0x80) { // Superset of ASCII
|
if (ch < 0x80) { // Superset of ASCII
|
||||||
on_code_point(ch);
|
TRY(on_code_point(ch));
|
||||||
} else {
|
} else {
|
||||||
on_code_point(translation_table[ch - 0x80]);
|
TRY(on_code_point(translation_table[ch - 0x80]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
void TurkishDecoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> TurkishDecoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
auto convert_turkish_to_utf8 = [](u8 ch) -> u32 {
|
auto convert_turkish_to_utf8 = [](u8 ch) -> u32 {
|
||||||
// Turkish (aka ISO-8859-9, Windows-1254) is the same as the first 256 Unicode code points, except for 6 characters.
|
// Turkish (aka ISO-8859-9, Windows-1254) is the same as the first 256 Unicode code points, except for 6 characters.
|
||||||
|
@ -586,12 +604,14 @@ void TurkishDecoder::process(StringView input, Function<void(u32)> on_code_point
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto ch : input) {
|
for (auto ch : input) {
|
||||||
on_code_point(convert_turkish_to_utf8(ch));
|
TRY(on_code_point(convert_turkish_to_utf8(ch)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://encoding.spec.whatwg.org/#x-user-defined-decoder
|
// https://encoding.spec.whatwg.org/#x-user-defined-decoder
|
||||||
void XUserDefinedDecoder::process(StringView input, Function<void(u32)> on_code_point)
|
ErrorOr<void> XUserDefinedDecoder::process(StringView input, Function<ErrorOr<void>(u32)> on_code_point)
|
||||||
{
|
{
|
||||||
auto convert_x_user_defined_to_utf8 = [](u8 ch) -> u32 {
|
auto convert_x_user_defined_to_utf8 = [](u8 ch) -> u32 {
|
||||||
// 2. If byte is an ASCII byte, return a code point whose value is byte.
|
// 2. If byte is an ASCII byte, return a code point whose value is byte.
|
||||||
|
@ -606,10 +626,12 @@ void XUserDefinedDecoder::process(StringView input, Function<void(u32)> on_code_
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto ch : input) {
|
for (auto ch : input) {
|
||||||
on_code_point(convert_x_user_defined_to_utf8(ch));
|
TRY(on_code_point(convert_x_user_defined_to_utf8(ch)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1. If byte is end-of-queue, return finished.
|
// 1. If byte is end-of-queue, return finished.
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,13 +11,14 @@
|
||||||
#include <AK/Forward.h>
|
#include <AK/Forward.h>
|
||||||
#include <AK/Function.h>
|
#include <AK/Function.h>
|
||||||
#include <AK/Optional.h>
|
#include <AK/Optional.h>
|
||||||
|
#include <AK/String.h>
|
||||||
|
|
||||||
namespace TextCodec {
|
namespace TextCodec {
|
||||||
|
|
||||||
class Decoder {
|
class Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) = 0;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) = 0;
|
||||||
virtual DeprecatedString to_utf8(StringView);
|
virtual ErrorOr<String> to_utf8(StringView);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual ~Decoder() = default;
|
virtual ~Decoder() = default;
|
||||||
|
@ -25,65 +26,65 @@ protected:
|
||||||
|
|
||||||
class UTF8Decoder final : public Decoder {
|
class UTF8Decoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
virtual DeprecatedString to_utf8(StringView) override;
|
virtual ErrorOr<String> to_utf8(StringView) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class UTF16BEDecoder final : public Decoder {
|
class UTF16BEDecoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
virtual DeprecatedString to_utf8(StringView) override;
|
virtual ErrorOr<String> to_utf8(StringView) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class UTF16LEDecoder final : public Decoder {
|
class UTF16LEDecoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
virtual DeprecatedString to_utf8(StringView) override;
|
virtual ErrorOr<String> to_utf8(StringView) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Latin1Decoder final : public Decoder {
|
class Latin1Decoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Latin2Decoder final : public Decoder {
|
class Latin2Decoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class HebrewDecoder final : public Decoder {
|
class HebrewDecoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class CyrillicDecoder final : public Decoder {
|
class CyrillicDecoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Koi8RDecoder final : public Decoder {
|
class Koi8RDecoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Latin9Decoder final : public Decoder {
|
class Latin9Decoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class MacRomanDecoder final : public Decoder {
|
class MacRomanDecoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class TurkishDecoder final : public Decoder {
|
class TurkishDecoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class XUserDefinedDecoder final : public Decoder {
|
class XUserDefinedDecoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
Optional<Decoder&> decoder_for(StringView encoding);
|
Optional<Decoder&> decoder_for(StringView encoding);
|
||||||
|
@ -94,6 +95,6 @@ Optional<Decoder&> bom_sniff_to_decoder(StringView);
|
||||||
|
|
||||||
// NOTE: This has an obnoxious name to discourage usage. Only use this if you absolutely must! For example, XHR in LibWeb uses this.
|
// NOTE: This has an obnoxious name to discourage usage. Only use this if you absolutely must! For example, XHR in LibWeb uses this.
|
||||||
// This will use the given decoder unless there is a byte order mark in the input, in which we will instead use the appropriate Unicode decoder.
|
// This will use the given decoder unless there is a byte order mark in the input, in which we will instead use the appropriate Unicode decoder.
|
||||||
DeprecatedString convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder&, StringView);
|
ErrorOr<String> convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(Decoder&, StringView);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -206,37 +206,39 @@ Tokenizer::Tokenizer(StringView input, StringView encoding)
|
||||||
bool last_was_carriage_return = false;
|
bool last_was_carriage_return = false;
|
||||||
|
|
||||||
// To filter code points from a stream of (unfiltered) code points input:
|
// To filter code points from a stream of (unfiltered) code points input:
|
||||||
decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) {
|
decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) -> ErrorOr<void> {
|
||||||
// Replace any U+000D CARRIAGE RETURN (CR) code points,
|
// Replace any U+000D CARRIAGE RETURN (CR) code points,
|
||||||
// U+000C FORM FEED (FF) code points,
|
// U+000C FORM FEED (FF) code points,
|
||||||
// or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF)
|
// or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF)
|
||||||
// in input by a single U+000A LINE FEED (LF) code point.
|
// in input by a single U+000A LINE FEED (LF) code point.
|
||||||
if (code_point == '\r') {
|
if (code_point == '\r') {
|
||||||
if (last_was_carriage_return) {
|
if (last_was_carriage_return) {
|
||||||
builder.append('\n');
|
TRY(builder.try_append('\n'));
|
||||||
} else {
|
} else {
|
||||||
last_was_carriage_return = true;
|
last_was_carriage_return = true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (last_was_carriage_return)
|
if (last_was_carriage_return)
|
||||||
builder.append('\n');
|
TRY(builder.try_append('\n'));
|
||||||
|
|
||||||
if (code_point == '\n') {
|
if (code_point == '\n') {
|
||||||
if (!last_was_carriage_return)
|
if (!last_was_carriage_return)
|
||||||
builder.append('\n');
|
TRY(builder.try_append('\n'));
|
||||||
|
|
||||||
} else if (code_point == '\f') {
|
} else if (code_point == '\f') {
|
||||||
builder.append('\n');
|
TRY(builder.try_append('\n'));
|
||||||
// Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (<28>).
|
// Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (<28>).
|
||||||
} else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) {
|
} else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) {
|
||||||
builder.append_code_point(REPLACEMENT_CHARACTER);
|
TRY(builder.try_append_code_point(REPLACEMENT_CHARACTER));
|
||||||
} else {
|
} else {
|
||||||
builder.append_code_point(code_point);
|
TRY(builder.try_append_code_point(code_point));
|
||||||
}
|
}
|
||||||
|
|
||||||
last_was_carriage_return = false;
|
last_was_carriage_return = false;
|
||||||
}
|
}
|
||||||
});
|
return {};
|
||||||
|
})
|
||||||
|
.release_value_but_fixme_should_propagate_errors();
|
||||||
return builder.to_string();
|
return builder.to_string();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@ WebIDL::ExceptionOr<DeprecatedString> TextDecoder::decode(JS::Handle<JS::Object>
|
||||||
if (data_buffer_or_error.is_error())
|
if (data_buffer_or_error.is_error())
|
||||||
return WebIDL::OperationError::create(realm(), "Failed to copy bytes from ArrayBuffer");
|
return WebIDL::OperationError::create(realm(), "Failed to copy bytes from ArrayBuffer");
|
||||||
auto& data_buffer = data_buffer_or_error.value();
|
auto& data_buffer = data_buffer_or_error.value();
|
||||||
return m_decoder.to_utf8({ data_buffer.data(), data_buffer.size() });
|
return TRY_OR_THROW_OOM(vm(), m_decoder.to_utf8({ data_buffer.data(), data_buffer.size() }));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -515,7 +515,7 @@ void HTMLScriptElement::resource_did_load()
|
||||||
// we have to re-encode it to UTF-8.
|
// we have to re-encode it to UTF-8.
|
||||||
if (resource()->has_encoding()) {
|
if (resource()->has_encoding()) {
|
||||||
if (auto codec = TextCodec::decoder_for(resource()->encoding().value()); codec.has_value()) {
|
if (auto codec = TextCodec::decoder_for(resource()->encoding().value()); codec.has_value()) {
|
||||||
data = codec->to_utf8(data).to_byte_buffer();
|
data = codec->to_utf8(data).release_value_but_fixme_should_propagate_errors().to_deprecated_string().to_byte_buffer();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2800,7 +2800,7 @@ HTMLTokenizer::HTMLTokenizer(StringView input, DeprecatedString const& encoding)
|
||||||
{
|
{
|
||||||
auto decoder = TextCodec::decoder_for(encoding);
|
auto decoder = TextCodec::decoder_for(encoding);
|
||||||
VERIFY(decoder.has_value());
|
VERIFY(decoder.has_value());
|
||||||
m_decoded_input = decoder->to_utf8(input);
|
m_decoded_input = decoder->to_utf8(input).release_value_but_fixme_should_propagate_errors().to_deprecated_string();
|
||||||
m_utf8_view = Utf8View(m_decoded_input);
|
m_utf8_view = Utf8View(m_decoded_input);
|
||||||
m_utf8_iterator = m_utf8_view.begin();
|
m_utf8_iterator = m_utf8_view.begin();
|
||||||
m_prev_utf8_iterator = m_utf8_view.begin();
|
m_prev_utf8_iterator = m_utf8_view.begin();
|
||||||
|
|
|
@ -1421,9 +1421,9 @@ JS_DEFINE_NATIVE_FUNCTION(Window::atob)
|
||||||
// NOTE: Any 8-bit encoding -> utf-8 decoder will work for this
|
// NOTE: Any 8-bit encoding -> utf-8 decoder will work for this
|
||||||
auto text_decoder = TextCodec::decoder_for("windows-1252"sv);
|
auto text_decoder = TextCodec::decoder_for("windows-1252"sv);
|
||||||
VERIFY(text_decoder.has_value());
|
VERIFY(text_decoder.has_value());
|
||||||
auto text = text_decoder->to_utf8(decoded.release_value());
|
auto text = TRY_OR_THROW_OOM(vm, text_decoder->to_utf8(decoded.release_value()));
|
||||||
|
|
||||||
return JS::PrimitiveString::create(vm, DeprecatedString(text));
|
return JS::PrimitiveString::create(vm, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
JS_DEFINE_NATIVE_FUNCTION(Window::btoa)
|
JS_DEFINE_NATIVE_FUNCTION(Window::btoa)
|
||||||
|
|
|
@ -163,7 +163,7 @@ WebIDL::ExceptionOr<DeprecatedString> WorkerGlobalScope::atob(DeprecatedString c
|
||||||
// decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8.
|
// decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8.
|
||||||
auto decoder = TextCodec::decoder_for("windows-1252"sv);
|
auto decoder = TextCodec::decoder_for("windows-1252"sv);
|
||||||
VERIFY(decoder.has_value());
|
VERIFY(decoder.has_value());
|
||||||
return decoder->to_utf8(decoded_data.value());
|
return TRY_OR_THROW_OOM(vm(), decoder->to_utf8(decoded_data.value()));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,7 @@ WebIDL::ExceptionOr<JS::Value> parse_json_bytes_to_javascript_value(JS::VM& vm,
|
||||||
{
|
{
|
||||||
// 1. Let string be the result of running UTF-8 decode on bytes.
|
// 1. Let string be the result of running UTF-8 decode on bytes.
|
||||||
TextCodec::UTF8Decoder decoder;
|
TextCodec::UTF8Decoder decoder;
|
||||||
auto string = decoder.to_utf8(bytes);
|
auto string = TRY_OR_THROW_OOM(vm, decoder.to_utf8(bytes));
|
||||||
|
|
||||||
// 2. Return the result of parsing a JSON string to an Infra value given string.
|
// 2. Return the result of parsing a JSON string to an Infra value given string.
|
||||||
return parse_json_string_to_javascript_value(vm, string);
|
return parse_json_string_to_javascript_value(vm, string);
|
||||||
|
|
|
@ -224,7 +224,7 @@ DeprecatedString XMLHttpRequest::get_text_response() const
|
||||||
// If we don't support the decoder yet, let's crash instead of attempting to return something, as the result would be incorrect and create obscure bugs.
|
// If we don't support the decoder yet, let's crash instead of attempting to return something, as the result would be incorrect and create obscure bugs.
|
||||||
VERIFY(decoder.has_value());
|
VERIFY(decoder.has_value());
|
||||||
|
|
||||||
return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, m_received_bytes);
|
return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, m_received_bytes).release_value_but_fixme_should_propagate_errors().to_deprecated_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://xhr.spec.whatwg.org/#final-mime-type
|
// https://xhr.spec.whatwg.org/#final-mime-type
|
||||||
|
|
|
@ -885,7 +885,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
|
||||||
auto decoder = TextCodec::decoder_for("windows-1252"sv);
|
auto decoder = TextCodec::decoder_for("windows-1252"sv);
|
||||||
VERIFY(decoder.has_value());
|
VERIFY(decoder.has_value());
|
||||||
|
|
||||||
auto utf8_source = TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, source);
|
auto utf8_source = TRY(TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, source));
|
||||||
builder.append(utf8_source);
|
builder.append(utf8_source);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue