mirror of
https://github.com/RGBCube/serenity
synced 2025-07-28 03:47:34 +00:00
LibTextCodec+Everywhere: Port Decoders to new Strings
This commit is contained in:
parent
3c5090e172
commit
2db168acc1
21 changed files with 149 additions and 123 deletions
|
@ -206,37 +206,39 @@ Tokenizer::Tokenizer(StringView input, StringView encoding)
|
|||
bool last_was_carriage_return = false;
|
||||
|
||||
// To filter code points from a stream of (unfiltered) code points input:
|
||||
decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) {
|
||||
// Replace any U+000D CARRIAGE RETURN (CR) code points,
|
||||
// U+000C FORM FEED (FF) code points,
|
||||
// or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF)
|
||||
// in input by a single U+000A LINE FEED (LF) code point.
|
||||
if (code_point == '\r') {
|
||||
if (last_was_carriage_return) {
|
||||
builder.append('\n');
|
||||
} else {
|
||||
last_was_carriage_return = true;
|
||||
}
|
||||
} else {
|
||||
if (last_was_carriage_return)
|
||||
builder.append('\n');
|
||||
decoder->process(input, [&builder, &last_was_carriage_return](u32 code_point) -> ErrorOr<void> {
|
||||
// Replace any U+000D CARRIAGE RETURN (CR) code points,
|
||||
// U+000C FORM FEED (FF) code points,
|
||||
// or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF)
|
||||
// in input by a single U+000A LINE FEED (LF) code point.
|
||||
if (code_point == '\r') {
|
||||
if (last_was_carriage_return) {
|
||||
TRY(builder.try_append('\n'));
|
||||
} else {
|
||||
last_was_carriage_return = true;
|
||||
}
|
||||
} else {
|
||||
if (last_was_carriage_return)
|
||||
TRY(builder.try_append('\n'));
|
||||
|
||||
if (code_point == '\n') {
|
||||
if (!last_was_carriage_return)
|
||||
builder.append('\n');
|
||||
if (code_point == '\n') {
|
||||
if (!last_was_carriage_return)
|
||||
TRY(builder.try_append('\n'));
|
||||
|
||||
} else if (code_point == '\f') {
|
||||
builder.append('\n');
|
||||
// Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (<28>).
|
||||
} else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) {
|
||||
builder.append_code_point(REPLACEMENT_CHARACTER);
|
||||
} else {
|
||||
builder.append_code_point(code_point);
|
||||
}
|
||||
} else if (code_point == '\f') {
|
||||
TRY(builder.try_append('\n'));
|
||||
// Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (<28>).
|
||||
} else if (code_point == 0x00 || (code_point >= 0xD800 && code_point <= 0xDFFF)) {
|
||||
TRY(builder.try_append_code_point(REPLACEMENT_CHARACTER));
|
||||
} else {
|
||||
TRY(builder.try_append_code_point(code_point));
|
||||
}
|
||||
|
||||
last_was_carriage_return = false;
|
||||
}
|
||||
});
|
||||
last_was_carriage_return = false;
|
||||
}
|
||||
return {};
|
||||
})
|
||||
.release_value_but_fixme_should_propagate_errors();
|
||||
return builder.to_string();
|
||||
};
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@ WebIDL::ExceptionOr<DeprecatedString> TextDecoder::decode(JS::Handle<JS::Object>
|
|||
if (data_buffer_or_error.is_error())
|
||||
return WebIDL::OperationError::create(realm(), "Failed to copy bytes from ArrayBuffer");
|
||||
auto& data_buffer = data_buffer_or_error.value();
|
||||
return m_decoder.to_utf8({ data_buffer.data(), data_buffer.size() });
|
||||
return TRY_OR_THROW_OOM(vm(), m_decoder.to_utf8({ data_buffer.data(), data_buffer.size() }));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -515,7 +515,7 @@ void HTMLScriptElement::resource_did_load()
|
|||
// we have to re-encode it to UTF-8.
|
||||
if (resource()->has_encoding()) {
|
||||
if (auto codec = TextCodec::decoder_for(resource()->encoding().value()); codec.has_value()) {
|
||||
data = codec->to_utf8(data).to_byte_buffer();
|
||||
data = codec->to_utf8(data).release_value_but_fixme_should_propagate_errors().to_deprecated_string().to_byte_buffer();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2800,7 +2800,7 @@ HTMLTokenizer::HTMLTokenizer(StringView input, DeprecatedString const& encoding)
|
|||
{
|
||||
auto decoder = TextCodec::decoder_for(encoding);
|
||||
VERIFY(decoder.has_value());
|
||||
m_decoded_input = decoder->to_utf8(input);
|
||||
m_decoded_input = decoder->to_utf8(input).release_value_but_fixme_should_propagate_errors().to_deprecated_string();
|
||||
m_utf8_view = Utf8View(m_decoded_input);
|
||||
m_utf8_iterator = m_utf8_view.begin();
|
||||
m_prev_utf8_iterator = m_utf8_view.begin();
|
||||
|
|
|
@ -1421,9 +1421,9 @@ JS_DEFINE_NATIVE_FUNCTION(Window::atob)
|
|||
// NOTE: Any 8-bit encoding -> utf-8 decoder will work for this
|
||||
auto text_decoder = TextCodec::decoder_for("windows-1252"sv);
|
||||
VERIFY(text_decoder.has_value());
|
||||
auto text = text_decoder->to_utf8(decoded.release_value());
|
||||
auto text = TRY_OR_THROW_OOM(vm, text_decoder->to_utf8(decoded.release_value()));
|
||||
|
||||
return JS::PrimitiveString::create(vm, DeprecatedString(text));
|
||||
return JS::PrimitiveString::create(vm, text);
|
||||
}
|
||||
|
||||
JS_DEFINE_NATIVE_FUNCTION(Window::btoa)
|
||||
|
|
|
@ -163,7 +163,7 @@ WebIDL::ExceptionOr<DeprecatedString> WorkerGlobalScope::atob(DeprecatedString c
|
|||
// decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8.
|
||||
auto decoder = TextCodec::decoder_for("windows-1252"sv);
|
||||
VERIFY(decoder.has_value());
|
||||
return decoder->to_utf8(decoded_data.value());
|
||||
return TRY_OR_THROW_OOM(vm(), decoder->to_utf8(decoded_data.value()));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ WebIDL::ExceptionOr<JS::Value> parse_json_bytes_to_javascript_value(JS::VM& vm,
|
|||
{
|
||||
// 1. Let string be the result of running UTF-8 decode on bytes.
|
||||
TextCodec::UTF8Decoder decoder;
|
||||
auto string = decoder.to_utf8(bytes);
|
||||
auto string = TRY_OR_THROW_OOM(vm, decoder.to_utf8(bytes));
|
||||
|
||||
// 2. Return the result of parsing a JSON string to an Infra value given string.
|
||||
return parse_json_string_to_javascript_value(vm, string);
|
||||
|
|
|
@ -224,7 +224,7 @@ DeprecatedString XMLHttpRequest::get_text_response() const
|
|||
// If we don't support the decoder yet, let's crash instead of attempting to return something, as the result would be incorrect and create obscure bugs.
|
||||
VERIFY(decoder.has_value());
|
||||
|
||||
return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, m_received_bytes);
|
||||
return TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, m_received_bytes).release_value_but_fixme_should_propagate_errors().to_deprecated_string();
|
||||
}
|
||||
|
||||
// https://xhr.spec.whatwg.org/#final-mime-type
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue