1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 21:57:43 +00:00

AK+Everywhere: Make UTF-16 to UTF-8 converter fallible

This could fail to allocate the underlying storage needed to store the
UTF-8 data. Propagate this error.
This commit is contained in:
Timothy Flynn 2023-01-07 13:59:10 -05:00 committed by Linus Groh
parent 1edb96376b
commit d793262beb
10 changed files with 25 additions and 22 deletions

View file

@ -81,7 +81,7 @@ u32 Utf16View::decode_surrogate_pair(u16 high_surrogate, u16 low_surrogate)
return ((high_surrogate - high_surrogate_min) << 10) + (low_surrogate - low_surrogate_min) + first_supplementary_plane_code_point; return ((high_surrogate - high_surrogate_min) << 10) + (low_surrogate - low_surrogate_min) + first_supplementary_plane_code_point;
} }
DeprecatedString Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const ErrorOr<DeprecatedString> Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const
{ {
StringBuilder builder; StringBuilder builder;
@ -92,17 +92,17 @@ DeprecatedString Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_uni
if ((next < end_ptr()) && is_low_surrogate(*next)) { if ((next < end_ptr()) && is_low_surrogate(*next)) {
auto code_point = decode_surrogate_pair(*ptr, *next); auto code_point = decode_surrogate_pair(*ptr, *next);
builder.append_code_point(code_point); TRY(builder.try_append_code_point(code_point));
++ptr; ++ptr;
continue; continue;
} }
} }
builder.append_code_point(static_cast<u32>(*ptr)); TRY(builder.try_append_code_point(static_cast<u32>(*ptr)));
} }
} else { } else {
for (auto code_point : *this) for (auto code_point : *this)
builder.append_code_point(code_point); TRY(builder.try_append_code_point(code_point));
} }
return builder.build(); return builder.build();

View file

@ -75,7 +75,7 @@ public:
No, No,
}; };
DeprecatedString to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const; ErrorOr<DeprecatedString> to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
bool is_null() const { return m_code_units.is_null(); } bool is_null() const { return m_code_units.is_null(); }
bool is_empty() const { return m_code_units.is_empty(); } bool is_empty() const { return m_code_units.is_empty(); }

View file

@ -56,14 +56,14 @@ TEST_CASE(encode_utf8)
DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"); DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界");
auto string = MUST(AK::utf8_to_utf16(utf8_string)); auto string = MUST(AK::utf8_to_utf16(utf8_string));
Utf16View view { string }; Utf16View view { string };
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string); EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), utf8_string);
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string); EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), utf8_string);
} }
{ {
auto encoded = Array { (u16)0xd83d }; auto encoded = Array { (u16)0xd83d };
Utf16View view { encoded }; Utf16View view { encoded };
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), "\xed\xa0\xbd"sv); EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), "\ufffd"sv); EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
} }
} }
@ -269,14 +269,14 @@ TEST_CASE(substring_view)
view = view.substring_view(7, 2); view = view.substring_view(7, 2);
EXPECT(view.length_in_code_units() == 2); EXPECT(view.length_in_code_units() == 2);
EXPECT_EQ(view.to_utf8(), "😀"sv); EXPECT_EQ(MUST(view.to_utf8()), "😀"sv);
} }
{ {
Utf16View view { string }; Utf16View view { string };
view = view.substring_view(7, 1); view = view.substring_view(7, 1);
EXPECT(view.length_in_code_units() == 1); EXPECT(view.length_in_code_units() == 1);
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), "\xed\xa0\xbd"sv); EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), "\ufffd"sv); EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
} }
} }

View file

@ -372,7 +372,7 @@ void HexEditorWidget::update_inspector_values(size_t position)
if (valid_code_units == 0) if (valid_code_units == 0)
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, ""); value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, "");
else else
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, utf16_view.unicode_substring_view(0, 1).to_utf8()); value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, utf16_view.unicode_substring_view(0, 1).to_utf8().release_value_but_fixme_should_propagate_errors());
} else { } else {
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, ""); value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, "");
} }

View file

@ -1265,7 +1265,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
} else if (is_ascii_digit(next)) { } else if (is_ascii_digit(next)) {
bool is_two_digits = (i + 2 < replace_view.length_in_code_units()) && is_ascii_digit(replace_view.code_unit_at(i + 2)); bool is_two_digits = (i + 2 < replace_view.length_in_code_units()) && is_ascii_digit(replace_view.code_unit_at(i + 2));
auto capture_position_string = replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8(); auto capture_position_string = TRY_OR_THROW_OOM(vm, replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8());
auto capture_position = capture_position_string.to_uint(); auto capture_position = capture_position_string.to_uint();
if (capture_position.has_value() && (*capture_position > 0) && (*capture_position <= captures.size())) { if (capture_position.has_value() && (*capture_position > 0) && (*capture_position <= captures.size())) {
@ -1295,7 +1295,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
result.append(curr); result.append(curr);
} else { } else {
auto group_name_view = replace_view.substring_view(start_position, *end_position - start_position); auto group_name_view = replace_view.substring_view(start_position, *end_position - start_position);
auto group_name = group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); auto group_name = TRY_OR_THROW_OOM(vm, group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
auto capture = TRY(named_captures.as_object().get(group_name)); auto capture = TRY(named_captures.as_object().get(group_name));
@ -1311,7 +1311,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
} }
} }
return Utf16String(move(result)).to_utf8(); return TRY_OR_THROW_OOM(vm, Utf16View { result }.to_utf8());
} }
} }

View file

@ -718,7 +718,7 @@ ThrowCompletionOr<Vector<PatternPartition>> format_date_time_pattern(VM& vm, Dat
if (formatted_value.length() > 2) { if (formatted_value.length() > 2) {
Utf16String utf16_formatted_value { formatted_value }; Utf16String utf16_formatted_value { formatted_value };
if (utf16_formatted_value.length_in_code_units() > 2) if (utf16_formatted_value.length_in_code_units() > 2)
formatted_value = utf16_formatted_value.substring_view(utf16_formatted_value.length_in_code_units() - 2).to_utf8(); formatted_value = TRY_OR_THROW_OOM(vm, utf16_formatted_value.substring_view(utf16_formatted_value.length_in_code_units() - 2).to_utf8());
} }
break; break;

View file

@ -68,7 +68,8 @@ DeprecatedString const& PrimitiveString::deprecated_string() const
{ {
resolve_rope_if_needed(); resolve_rope_if_needed();
if (!m_has_utf8_string) { if (!m_has_utf8_string) {
m_utf8_string = m_utf16_string.to_utf8(); // FIXME: Propagate this error.
m_utf8_string = MUST(m_utf16_string.to_utf8(vm()));
m_has_utf8_string = true; m_has_utf8_string = true;
} }
return m_utf8_string; return m_utf8_string;

View file

@ -6,6 +6,7 @@
#include <AK/StringView.h> #include <AK/StringView.h>
#include <LibJS/Runtime/Utf16String.h> #include <LibJS/Runtime/Utf16String.h>
#include <LibJS/Runtime/VM.h>
namespace JS { namespace JS {
namespace Detail { namespace Detail {
@ -96,9 +97,9 @@ Utf16View Utf16String::substring_view(size_t code_unit_offset) const
return view().substring_view(code_unit_offset); return view().substring_view(code_unit_offset);
} }
DeprecatedString Utf16String::to_utf8() const ThrowCompletionOr<DeprecatedString> Utf16String::to_utf8(VM& vm) const
{ {
return view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); return TRY_OR_THROW_OOM(vm, view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
} }
u16 Utf16String::code_unit_at(size_t index) const u16 Utf16String::code_unit_at(size_t index) const

View file

@ -12,6 +12,7 @@
#include <AK/Types.h> #include <AK/Types.h>
#include <AK/Utf16View.h> #include <AK/Utf16View.h>
#include <AK/Vector.h> #include <AK/Vector.h>
#include <LibJS/Runtime/Completion.h>
namespace JS { namespace JS {
namespace Detail { namespace Detail {
@ -49,7 +50,7 @@ public:
Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const; Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const;
Utf16View substring_view(size_t code_unit_offset) const; Utf16View substring_view(size_t code_unit_offset) const;
DeprecatedString to_utf8() const; ThrowCompletionOr<DeprecatedString> to_utf8(VM&) const;
u16 code_unit_at(size_t index) const; u16 code_unit_at(size_t index) const;
size_t length_in_code_units() const; size_t length_in_code_units() const;

View file

@ -385,7 +385,7 @@ public:
{ {
return m_view.visit( return m_view.visit(
[](StringView view) { return view.to_deprecated_string(); }, [](StringView view) { return view.to_deprecated_string(); },
[](Utf16View view) { return view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); }, [](Utf16View view) { return view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes).release_value_but_fixme_should_propagate_errors(); },
[](auto& view) { [](auto& view) {
StringBuilder builder; StringBuilder builder;
for (auto it = view.begin(); it != view.end(); ++it) for (auto it = view.begin(); it != view.end(); ++it)