mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 21:57:43 +00:00
AK+Everywhere: Make UTF-16 to UTF-8 converter fallible
This could fail to allocate the underlying storage needed to store the UTF-8 data. Propagate this error.
This commit is contained in:
parent
1edb96376b
commit
d793262beb
10 changed files with 25 additions and 22 deletions
|
@ -81,7 +81,7 @@ u32 Utf16View::decode_surrogate_pair(u16 high_surrogate, u16 low_surrogate)
|
||||||
return ((high_surrogate - high_surrogate_min) << 10) + (low_surrogate - low_surrogate_min) + first_supplementary_plane_code_point;
|
return ((high_surrogate - high_surrogate_min) << 10) + (low_surrogate - low_surrogate_min) + first_supplementary_plane_code_point;
|
||||||
}
|
}
|
||||||
|
|
||||||
DeprecatedString Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const
|
ErrorOr<DeprecatedString> Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const
|
||||||
{
|
{
|
||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
|
|
||||||
|
@ -92,17 +92,17 @@ DeprecatedString Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_uni
|
||||||
|
|
||||||
if ((next < end_ptr()) && is_low_surrogate(*next)) {
|
if ((next < end_ptr()) && is_low_surrogate(*next)) {
|
||||||
auto code_point = decode_surrogate_pair(*ptr, *next);
|
auto code_point = decode_surrogate_pair(*ptr, *next);
|
||||||
builder.append_code_point(code_point);
|
TRY(builder.try_append_code_point(code_point));
|
||||||
++ptr;
|
++ptr;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
builder.append_code_point(static_cast<u32>(*ptr));
|
TRY(builder.try_append_code_point(static_cast<u32>(*ptr)));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (auto code_point : *this)
|
for (auto code_point : *this)
|
||||||
builder.append_code_point(code_point);
|
TRY(builder.try_append_code_point(code_point));
|
||||||
}
|
}
|
||||||
|
|
||||||
return builder.build();
|
return builder.build();
|
||||||
|
|
|
@ -75,7 +75,7 @@ public:
|
||||||
No,
|
No,
|
||||||
};
|
};
|
||||||
|
|
||||||
DeprecatedString to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
|
ErrorOr<DeprecatedString> to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
|
||||||
|
|
||||||
bool is_null() const { return m_code_units.is_null(); }
|
bool is_null() const { return m_code_units.is_null(); }
|
||||||
bool is_empty() const { return m_code_units.is_empty(); }
|
bool is_empty() const { return m_code_units.is_empty(); }
|
||||||
|
|
|
@ -56,14 +56,14 @@ TEST_CASE(encode_utf8)
|
||||||
DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界");
|
DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界");
|
||||||
auto string = MUST(AK::utf8_to_utf16(utf8_string));
|
auto string = MUST(AK::utf8_to_utf16(utf8_string));
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string);
|
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), utf8_string);
|
||||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string);
|
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), utf8_string);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
auto encoded = Array { (u16)0xd83d };
|
auto encoded = Array { (u16)0xd83d };
|
||||||
Utf16View view { encoded };
|
Utf16View view { encoded };
|
||||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), "\xed\xa0\xbd"sv);
|
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
|
||||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), "\ufffd"sv);
|
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -269,14 +269,14 @@ TEST_CASE(substring_view)
|
||||||
view = view.substring_view(7, 2);
|
view = view.substring_view(7, 2);
|
||||||
|
|
||||||
EXPECT(view.length_in_code_units() == 2);
|
EXPECT(view.length_in_code_units() == 2);
|
||||||
EXPECT_EQ(view.to_utf8(), "😀"sv);
|
EXPECT_EQ(MUST(view.to_utf8()), "😀"sv);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
view = view.substring_view(7, 1);
|
view = view.substring_view(7, 1);
|
||||||
|
|
||||||
EXPECT(view.length_in_code_units() == 1);
|
EXPECT(view.length_in_code_units() == 1);
|
||||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), "\xed\xa0\xbd"sv);
|
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
|
||||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), "\ufffd"sv);
|
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -372,7 +372,7 @@ void HexEditorWidget::update_inspector_values(size_t position)
|
||||||
if (valid_code_units == 0)
|
if (valid_code_units == 0)
|
||||||
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, "");
|
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, "");
|
||||||
else
|
else
|
||||||
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, utf16_view.unicode_substring_view(0, 1).to_utf8());
|
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, utf16_view.unicode_substring_view(0, 1).to_utf8().release_value_but_fixme_should_propagate_errors());
|
||||||
} else {
|
} else {
|
||||||
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, "");
|
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, "");
|
||||||
}
|
}
|
||||||
|
|
|
@ -1265,7 +1265,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
|
||||||
} else if (is_ascii_digit(next)) {
|
} else if (is_ascii_digit(next)) {
|
||||||
bool is_two_digits = (i + 2 < replace_view.length_in_code_units()) && is_ascii_digit(replace_view.code_unit_at(i + 2));
|
bool is_two_digits = (i + 2 < replace_view.length_in_code_units()) && is_ascii_digit(replace_view.code_unit_at(i + 2));
|
||||||
|
|
||||||
auto capture_position_string = replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8();
|
auto capture_position_string = TRY_OR_THROW_OOM(vm, replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8());
|
||||||
auto capture_position = capture_position_string.to_uint();
|
auto capture_position = capture_position_string.to_uint();
|
||||||
|
|
||||||
if (capture_position.has_value() && (*capture_position > 0) && (*capture_position <= captures.size())) {
|
if (capture_position.has_value() && (*capture_position > 0) && (*capture_position <= captures.size())) {
|
||||||
|
@ -1295,7 +1295,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
|
||||||
result.append(curr);
|
result.append(curr);
|
||||||
} else {
|
} else {
|
||||||
auto group_name_view = replace_view.substring_view(start_position, *end_position - start_position);
|
auto group_name_view = replace_view.substring_view(start_position, *end_position - start_position);
|
||||||
auto group_name = group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
|
auto group_name = TRY_OR_THROW_OOM(vm, group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
|
||||||
|
|
||||||
auto capture = TRY(named_captures.as_object().get(group_name));
|
auto capture = TRY(named_captures.as_object().get(group_name));
|
||||||
|
|
||||||
|
@ -1311,7 +1311,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Utf16String(move(result)).to_utf8();
|
return TRY_OR_THROW_OOM(vm, Utf16View { result }.to_utf8());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -718,7 +718,7 @@ ThrowCompletionOr<Vector<PatternPartition>> format_date_time_pattern(VM& vm, Dat
|
||||||
if (formatted_value.length() > 2) {
|
if (formatted_value.length() > 2) {
|
||||||
Utf16String utf16_formatted_value { formatted_value };
|
Utf16String utf16_formatted_value { formatted_value };
|
||||||
if (utf16_formatted_value.length_in_code_units() > 2)
|
if (utf16_formatted_value.length_in_code_units() > 2)
|
||||||
formatted_value = utf16_formatted_value.substring_view(utf16_formatted_value.length_in_code_units() - 2).to_utf8();
|
formatted_value = TRY_OR_THROW_OOM(vm, utf16_formatted_value.substring_view(utf16_formatted_value.length_in_code_units() - 2).to_utf8());
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -68,7 +68,8 @@ DeprecatedString const& PrimitiveString::deprecated_string() const
|
||||||
{
|
{
|
||||||
resolve_rope_if_needed();
|
resolve_rope_if_needed();
|
||||||
if (!m_has_utf8_string) {
|
if (!m_has_utf8_string) {
|
||||||
m_utf8_string = m_utf16_string.to_utf8();
|
// FIXME: Propagate this error.
|
||||||
|
m_utf8_string = MUST(m_utf16_string.to_utf8(vm()));
|
||||||
m_has_utf8_string = true;
|
m_has_utf8_string = true;
|
||||||
}
|
}
|
||||||
return m_utf8_string;
|
return m_utf8_string;
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
#include <LibJS/Runtime/Utf16String.h>
|
#include <LibJS/Runtime/Utf16String.h>
|
||||||
|
#include <LibJS/Runtime/VM.h>
|
||||||
|
|
||||||
namespace JS {
|
namespace JS {
|
||||||
namespace Detail {
|
namespace Detail {
|
||||||
|
@ -96,9 +97,9 @@ Utf16View Utf16String::substring_view(size_t code_unit_offset) const
|
||||||
return view().substring_view(code_unit_offset);
|
return view().substring_view(code_unit_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
DeprecatedString Utf16String::to_utf8() const
|
ThrowCompletionOr<DeprecatedString> Utf16String::to_utf8(VM& vm) const
|
||||||
{
|
{
|
||||||
return view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
|
return TRY_OR_THROW_OOM(vm, view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
|
||||||
}
|
}
|
||||||
|
|
||||||
u16 Utf16String::code_unit_at(size_t index) const
|
u16 Utf16String::code_unit_at(size_t index) const
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <AK/Types.h>
|
#include <AK/Types.h>
|
||||||
#include <AK/Utf16View.h>
|
#include <AK/Utf16View.h>
|
||||||
#include <AK/Vector.h>
|
#include <AK/Vector.h>
|
||||||
|
#include <LibJS/Runtime/Completion.h>
|
||||||
|
|
||||||
namespace JS {
|
namespace JS {
|
||||||
namespace Detail {
|
namespace Detail {
|
||||||
|
@ -49,7 +50,7 @@ public:
|
||||||
Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const;
|
Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const;
|
||||||
Utf16View substring_view(size_t code_unit_offset) const;
|
Utf16View substring_view(size_t code_unit_offset) const;
|
||||||
|
|
||||||
DeprecatedString to_utf8() const;
|
ThrowCompletionOr<DeprecatedString> to_utf8(VM&) const;
|
||||||
u16 code_unit_at(size_t index) const;
|
u16 code_unit_at(size_t index) const;
|
||||||
|
|
||||||
size_t length_in_code_units() const;
|
size_t length_in_code_units() const;
|
||||||
|
|
|
@ -385,7 +385,7 @@ public:
|
||||||
{
|
{
|
||||||
return m_view.visit(
|
return m_view.visit(
|
||||||
[](StringView view) { return view.to_deprecated_string(); },
|
[](StringView view) { return view.to_deprecated_string(); },
|
||||||
[](Utf16View view) { return view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); },
|
[](Utf16View view) { return view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes).release_value_but_fixme_should_propagate_errors(); },
|
||||||
[](auto& view) {
|
[](auto& view) {
|
||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
for (auto it = view.begin(); it != view.end(); ++it)
|
for (auto it = view.begin(); it != view.end(); ++it)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue