mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 13:28:11 +00:00
LibTextCodec+Everywhere: Make TextCodec::decoder_for() take a StringView
We don't need a full String/DeprecatedString inside this function, so we might as well not force users to create one.
This commit is contained in:
parent
3c8bfa4662
commit
d6075ef5b5
14 changed files with 16 additions and 16 deletions
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||||
{
|
{
|
||||||
auto* decoder = TextCodec::decoder_for("windows-1251");
|
auto* decoder = TextCodec::decoder_for("windows-1251"sv);
|
||||||
VERIFY(decoder);
|
VERIFY(decoder);
|
||||||
decoder->to_utf8({ data, size });
|
decoder->to_utf8({ data, size });
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||||
{
|
{
|
||||||
auto* decoder = TextCodec::decoder_for("windows-1255");
|
auto* decoder = TextCodec::decoder_for("windows-1255"sv);
|
||||||
VERIFY(decoder);
|
VERIFY(decoder);
|
||||||
decoder->to_utf8({ data, size });
|
decoder->to_utf8({ data, size });
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||||
{
|
{
|
||||||
auto* decoder = TextCodec::decoder_for("windows-1252");
|
auto* decoder = TextCodec::decoder_for("windows-1252"sv);
|
||||||
VERIFY(decoder);
|
VERIFY(decoder);
|
||||||
decoder->to_utf8({ data, size });
|
decoder->to_utf8({ data, size });
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||||
{
|
{
|
||||||
auto* decoder = TextCodec::decoder_for("iso-8859-2");
|
auto* decoder = TextCodec::decoder_for("iso-8859-2"sv);
|
||||||
VERIFY(decoder);
|
VERIFY(decoder);
|
||||||
decoder->to_utf8({ data, size });
|
decoder->to_utf8({ data, size });
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||||
{
|
{
|
||||||
auto* decoder = TextCodec::decoder_for("utf-16be");
|
auto* decoder = TextCodec::decoder_for("utf-16be"sv);
|
||||||
VERIFY(decoder);
|
VERIFY(decoder);
|
||||||
decoder->to_utf8({ data, size });
|
decoder->to_utf8({ data, size });
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -321,7 +321,7 @@ DeprecatedString Name::string_for_id(NameId id) const
|
||||||
auto const offset = name_record.string_offset;
|
auto const offset = name_record.string_offset;
|
||||||
|
|
||||||
if (platform_id == to_underlying(Platform::Windows)) {
|
if (platform_id == to_underlying(Platform::Windows)) {
|
||||||
static auto& decoder = *TextCodec::decoder_for("utf-16be");
|
static auto& decoder = *TextCodec::decoder_for("utf-16be"sv);
|
||||||
return decoder.to_utf8(StringView { (char const*)m_slice.offset_pointer(storage_offset + offset), length });
|
return decoder.to_utf8(StringView { (char const*)m_slice.offset_pointer(storage_offset + offset), length });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -747,7 +747,7 @@ ErrorOr<NonnullRefPtr<MultiLocalizedUnicodeTagData>> MultiLocalizedUnicodeTagDat
|
||||||
// "For the definition of language codes and country codes, see respectively
|
// "For the definition of language codes and country codes, see respectively
|
||||||
// ISO 639-1 and ISO 3166-1. The Unicode strings in storage should be encoded as 16-bit big-endian, UTF-16BE,
|
// ISO 639-1 and ISO 3166-1. The Unicode strings in storage should be encoded as 16-bit big-endian, UTF-16BE,
|
||||||
// and should not be NULL terminated."
|
// and should not be NULL terminated."
|
||||||
auto& utf_16be_decoder = *TextCodec::decoder_for("utf-16be");
|
auto& utf_16be_decoder = *TextCodec::decoder_for("utf-16be"sv);
|
||||||
|
|
||||||
struct RawRecord {
|
struct RawRecord {
|
||||||
BigEndian<u16> language_code;
|
BigEndian<u16> language_code;
|
||||||
|
@ -1023,7 +1023,7 @@ ErrorOr<NonnullRefPtr<TextDescriptionTagData>> TextDescriptionTagData::from_byte
|
||||||
return Error::from_string_literal("ICC::Profile: textDescriptionType Unicode description not \\0-terminated");
|
return Error::from_string_literal("ICC::Profile: textDescriptionType Unicode description not \\0-terminated");
|
||||||
|
|
||||||
StringView utf_16be_data { unicode_description_data, byte_size_without_nul };
|
StringView utf_16be_data { unicode_description_data, byte_size_without_nul };
|
||||||
unicode_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("utf-16be")->to_utf8(utf_16be_data)));
|
unicode_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("utf-16be"sv)->to_utf8(utf_16be_data)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// ScriptCode
|
// ScriptCode
|
||||||
|
@ -1069,7 +1069,7 @@ ErrorOr<NonnullRefPtr<TextDescriptionTagData>> TextDescriptionTagData::from_byte
|
||||||
if (macintosh_description_data[macintosh_description_length - 1] != '\0')
|
if (macintosh_description_data[macintosh_description_length - 1] != '\0')
|
||||||
return Error::from_string_literal("ICC::Profile: textDescriptionType ScriptCode not \\0-terminated");
|
return Error::from_string_literal("ICC::Profile: textDescriptionType ScriptCode not \\0-terminated");
|
||||||
|
|
||||||
macintosh_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("x-mac-roman")->to_utf8({ macintosh_description_data, (size_t)macintosh_description_length - 1 })));
|
macintosh_description = TRY(String::from_deprecated_string(TextCodec::decoder_for("x-mac-roman"sv)->to_utf8({ macintosh_description_data, (size_t)macintosh_description_length - 1 })));
|
||||||
} else {
|
} else {
|
||||||
dbgln("TODO: ICCProfile textDescriptionType ScriptCode {}, length {}", scriptcode_code, macintosh_description_length);
|
dbgln("TODO: ICCProfile textDescriptionType ScriptCode {}, length {}", scriptcode_code, macintosh_description_length);
|
||||||
}
|
}
|
||||||
|
|
|
@ -267,7 +267,7 @@ NonnullRefPtr<StringObject> Parser::parse_string()
|
||||||
|
|
||||||
if (unencrypted_string.bytes().starts_with(Array<u8, 2> { 0xfe, 0xff })) {
|
if (unencrypted_string.bytes().starts_with(Array<u8, 2> { 0xfe, 0xff })) {
|
||||||
// The string is encoded in UTF16-BE
|
// The string is encoded in UTF16-BE
|
||||||
string_object->set_string(TextCodec::decoder_for("utf-16be")->to_utf8(unencrypted_string));
|
string_object->set_string(TextCodec::decoder_for("utf-16be"sv)->to_utf8(unencrypted_string));
|
||||||
} else if (unencrypted_string.bytes().starts_with(Array<u8, 3> { 239, 187, 191 })) {
|
} else if (unencrypted_string.bytes().starts_with(Array<u8, 3> { 239, 187, 191 })) {
|
||||||
// The string is encoded in UTF-8. This is the default anyways, but if these bytes
|
// The string is encoded in UTF-8. This is the default anyways, but if these bytes
|
||||||
// are explicitly included, we have to trim them
|
// are explicitly included, we have to trim them
|
||||||
|
|
|
@ -30,7 +30,7 @@ TurkishDecoder s_turkish_decoder;
|
||||||
XUserDefinedDecoder s_x_user_defined_decoder;
|
XUserDefinedDecoder s_x_user_defined_decoder;
|
||||||
}
|
}
|
||||||
|
|
||||||
Decoder* decoder_for(DeprecatedString const& a_encoding)
|
Decoder* decoder_for(StringView a_encoding)
|
||||||
{
|
{
|
||||||
auto encoding = get_standardized_encoding(a_encoding);
|
auto encoding = get_standardized_encoding(a_encoding);
|
||||||
if (encoding.has_value()) {
|
if (encoding.has_value()) {
|
||||||
|
|
|
@ -84,7 +84,7 @@ public:
|
||||||
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
virtual void process(StringView, Function<void(u32)> on_code_point) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
Decoder* decoder_for(DeprecatedString const& encoding);
|
Decoder* decoder_for(StringView encoding);
|
||||||
Optional<StringView> get_standardized_encoding(StringView encoding);
|
Optional<StringView> get_standardized_encoding(StringView encoding);
|
||||||
|
|
||||||
// This returns the appropriate Unicode decoder for the sniffed BOM or nullptr if there is no appropriate decoder.
|
// This returns the appropriate Unicode decoder for the sniffed BOM or nullptr if there is no appropriate decoder.
|
||||||
|
|
|
@ -1419,7 +1419,7 @@ JS_DEFINE_NATIVE_FUNCTION(Window::atob)
|
||||||
|
|
||||||
// The bytes object might contain bytes greater than 128, encode them in UTF8
|
// The bytes object might contain bytes greater than 128, encode them in UTF8
|
||||||
// NOTE: Any 8-bit encoding -> utf-8 decoder will work for this
|
// NOTE: Any 8-bit encoding -> utf-8 decoder will work for this
|
||||||
auto text_decoder = TextCodec::decoder_for("windows-1252");
|
auto text_decoder = TextCodec::decoder_for("windows-1252"sv);
|
||||||
VERIFY(text_decoder);
|
VERIFY(text_decoder);
|
||||||
auto text = text_decoder->to_utf8(decoded.release_value());
|
auto text = text_decoder->to_utf8(decoded.release_value());
|
||||||
|
|
||||||
|
|
|
@ -158,7 +158,7 @@ WebIDL::ExceptionOr<DeprecatedString> WorkerGlobalScope::atob(DeprecatedString c
|
||||||
|
|
||||||
// 3. Return decodedData.
|
// 3. Return decodedData.
|
||||||
// decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8.
|
// decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8.
|
||||||
auto* decoder = TextCodec::decoder_for("windows-1252");
|
auto* decoder = TextCodec::decoder_for("windows-1252"sv);
|
||||||
VERIFY(decoder);
|
VERIFY(decoder);
|
||||||
return decoder->to_utf8(decoded_data.value());
|
return decoder->to_utf8(decoded_data.value());
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,7 +81,7 @@ static DeprecatedString mime_type_from_content_type(DeprecatedString const& cont
|
||||||
return content_type;
|
return content_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_valid_encoding(DeprecatedString const& encoding)
|
static bool is_valid_encoding(StringView encoding)
|
||||||
{
|
{
|
||||||
return TextCodec::decoder_for(encoding);
|
return TextCodec::decoder_for(encoding);
|
||||||
}
|
}
|
||||||
|
|
|
@ -877,7 +877,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
|
||||||
if (Utf8View { file_contents }.validate()) {
|
if (Utf8View { file_contents }.validate()) {
|
||||||
builder.append(source);
|
builder.append(source);
|
||||||
} else {
|
} else {
|
||||||
auto* decoder = TextCodec::decoder_for("windows-1252");
|
auto* decoder = TextCodec::decoder_for("windows-1252"sv);
|
||||||
VERIFY(decoder);
|
VERIFY(decoder);
|
||||||
|
|
||||||
auto utf8_source = TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, source);
|
auto utf8_source = TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, source);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue