diff --git a/AK/Utf16View.cpp b/AK/Utf16View.cpp index 1dc0b55d4b..4c23484bee 100644 --- a/AK/Utf16View.cpp +++ b/AK/Utf16View.cpp @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -20,45 +21,46 @@ static constexpr u16 low_surrogate_max = 0xdfff; static constexpr u32 replacement_code_point = 0xfffd; static constexpr u32 first_supplementary_plane_code_point = 0x10000; -template -static Utf16Data to_utf16_impl(UtfViewType const& view) -requires(IsSame || IsSame) +template UtfViewType> +static ErrorOr to_utf16_impl(UtfViewType const& view) { Utf16Data utf16_data; - utf16_data.ensure_capacity(view.length()); + TRY(utf16_data.try_ensure_capacity(view.length())); for (auto code_point : view) - code_point_to_utf16(utf16_data, code_point); + TRY(code_point_to_utf16(utf16_data, code_point)); return utf16_data; } -Utf16Data utf8_to_utf16(StringView utf8_view) +ErrorOr utf8_to_utf16(StringView utf8_view) { return to_utf16_impl(Utf8View { utf8_view }); } -Utf16Data utf8_to_utf16(Utf8View const& utf8_view) +ErrorOr utf8_to_utf16(Utf8View const& utf8_view) { return to_utf16_impl(utf8_view); } -Utf16Data utf32_to_utf16(Utf32View const& utf32_view) +ErrorOr utf32_to_utf16(Utf32View const& utf32_view) { return to_utf16_impl(utf32_view); } -void code_point_to_utf16(Utf16Data& string, u32 code_point) +ErrorOr code_point_to_utf16(Utf16Data& string, u32 code_point) { VERIFY(is_unicode(code_point)); if (code_point < first_supplementary_plane_code_point) { - string.append(static_cast(code_point)); + TRY(string.try_append(static_cast(code_point))); } else { code_point -= first_supplementary_plane_code_point; - string.append(static_cast(high_surrogate_min | (code_point >> 10))); - string.append(static_cast(low_surrogate_min | (code_point & 0x3ff))); + TRY(string.try_append(static_cast(high_surrogate_min | (code_point >> 10)))); + TRY(string.try_append(static_cast(low_surrogate_min | (code_point & 0x3ff)))); } + + return {}; } bool Utf16View::is_high_surrogate(u16 code_unit) diff --git a/AK/Utf16View.h b/AK/Utf16View.h index cb43660b01..101f4e4892 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -7,6 +7,7 @@ #pragma once #include +#include #include #include #include @@ -18,10 +19,10 @@ namespace AK { using Utf16Data = Vector; -Utf16Data utf8_to_utf16(StringView); -Utf16Data utf8_to_utf16(Utf8View const&); -Utf16Data utf32_to_utf16(Utf32View const&); -void code_point_to_utf16(Utf16Data&, u32); +ErrorOr utf8_to_utf16(StringView); +ErrorOr utf8_to_utf16(Utf8View const&); +ErrorOr utf32_to_utf16(Utf32View const&); +ErrorOr code_point_to_utf16(Utf16Data&, u32); class Utf16View; diff --git a/Tests/AK/TestUtf16.cpp b/Tests/AK/TestUtf16.cpp index ee21a72319..56b2057e17 100644 --- a/Tests/AK/TestUtf16.cpp +++ b/Tests/AK/TestUtf16.cpp @@ -14,7 +14,7 @@ TEST_CASE(decode_ascii) { - auto string = AK::utf8_to_utf16("Hello World!11"sv); + auto string = MUST(AK::utf8_to_utf16("Hello World!11"sv)); Utf16View view { string }; size_t valid_code_units = 0; @@ -33,7 +33,7 @@ TEST_CASE(decode_ascii) TEST_CASE(decode_utf8) { - auto string = AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv); + auto string = MUST(AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv)); Utf16View view { string }; size_t valid_code_units = 0; @@ -54,7 +54,7 @@ TEST_CASE(encode_utf8) { { DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"); - auto string = AK::utf8_to_utf16(utf8_string); + auto string = MUST(AK::utf8_to_utf16(utf8_string)); Utf16View view { string }; EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string); EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string); @@ -91,7 +91,7 @@ TEST_CASE(decode_utf16) TEST_CASE(iterate_utf16) { - auto string = AK::utf8_to_utf16("Привет 😀"sv); + auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv)); Utf16View view { string }; auto iterator = view.begin(); @@ -263,7 +263,7 @@ TEST_CASE(decode_invalid_utf16) TEST_CASE(substring_view) { - auto string = AK::utf8_to_utf16("Привет 😀"sv); + auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv)); { Utf16View view { string }; view = view.substring_view(7, 2); diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 366b414819..0e6281bdb1 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -754,7 +754,7 @@ TEST_CASE(ECMA262_unicode_match) for (auto& test : tests) { Regex re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options); - auto subject = AK::utf8_to_utf16(test.subject); + auto subject = MUST(AK::utf8_to_utf16(test.subject)); Utf16View view { subject }; if constexpr (REGEX_DEBUG) { @@ -868,7 +868,7 @@ TEST_CASE(ECMA262_property_match) for (auto& test : tests) { Regex re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options); - auto subject = AK::utf8_to_utf16(test.subject); + auto subject = MUST(AK::utf8_to_utf16(test.subject)); Utf16View view { subject }; if constexpr (REGEX_DEBUG) { diff --git a/Userland/Applications/HexEditor/ValueInspectorModel.h b/Userland/Applications/HexEditor/ValueInspectorModel.h index 3f5a239a52..2778564988 100644 --- a/Userland/Applications/HexEditor/ValueInspectorModel.h +++ b/Userland/Applications/HexEditor/ValueInspectorModel.h @@ -142,8 +142,8 @@ public: return 0; } case UTF16: { - auto utf16_view = Utf16View(utf8_to_utf16(m_values.at(index.row()))); - if (utf16_view.validate()) + auto utf16_data = utf8_to_utf16(m_values.at(index.row())).release_value_but_fixme_should_propagate_errors(); + if (Utf16View utf16_view { utf16_data }; utf16_view.validate()) return static_cast(utf16_view.length_in_code_units() * 2); return 0; } diff --git a/Userland/Libraries/LibJS/Runtime/GlobalObject.cpp b/Userland/Libraries/LibJS/Runtime/GlobalObject.cpp index 952a450de4..10629f97dd 100644 --- a/Userland/Libraries/LibJS/Runtime/GlobalObject.cpp +++ b/Userland/Libraries/LibJS/Runtime/GlobalObject.cpp @@ -490,7 +490,7 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape) { auto string = TRY(vm.argument(0).to_string(vm)); StringBuilder escaped; - for (auto code_point : utf8_to_utf16(string)) { + for (auto code_point : TRY_OR_THROW_OOM(vm, utf8_to_utf16(string))) { if (code_point < 256) { if ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789@*_+-./"sv.contains(static_cast(code_point))) escaped.append(code_point); diff --git a/Userland/Libraries/LibJS/Runtime/RegExpObject.cpp b/Userland/Libraries/LibJS/Runtime/RegExpObject.cpp index 7ba8867743..b9746af464 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpObject.cpp +++ b/Userland/Libraries/LibJS/Runtime/RegExpObject.cpp @@ -89,7 +89,11 @@ ErrorOr parse_regex_pattern(StringView if (unicode && unicode_sets) return ParseRegexPatternError { DeprecatedString::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v') }; - auto utf16_pattern = AK::utf8_to_utf16(pattern); + auto utf16_pattern_result = AK::utf8_to_utf16(pattern); + if (utf16_pattern_result.is_error()) + return ParseRegexPatternError { "Out of memory"sv }; + + auto utf16_pattern = utf16_pattern_result.release_value(); Utf16View utf16_pattern_view { utf16_pattern }; StringBuilder builder; diff --git a/Userland/Libraries/LibJS/Runtime/StringConstructor.cpp b/Userland/Libraries/LibJS/Runtime/StringConstructor.cpp index ca6e5130ea..aa45fd348f 100644 --- a/Userland/Libraries/LibJS/Runtime/StringConstructor.cpp +++ b/Userland/Libraries/LibJS/Runtime/StringConstructor.cpp @@ -123,7 +123,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point) if (code_point < 0 || code_point > 0x10FFFF) return vm.throw_completion(ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects()); - AK::code_point_to_utf16(string, static_cast(code_point)); + TRY_OR_THROW_OOM(vm, code_point_to_utf16(string, static_cast(code_point))); } return PrimitiveString::create(vm, Utf16String(move(string))); diff --git a/Userland/Libraries/LibJS/Runtime/Utf16String.cpp b/Userland/Libraries/LibJS/Runtime/Utf16String.cpp index 821dc88db4..e6a4b4caf2 100644 --- a/Userland/Libraries/LibJS/Runtime/Utf16String.cpp +++ b/Userland/Libraries/LibJS/Runtime/Utf16String.cpp @@ -33,7 +33,7 @@ NonnullRefPtr Utf16StringImpl::create(Utf16Data string) NonnullRefPtr Utf16StringImpl::create(StringView string) { - return create(AK::utf8_to_utf16(string)); + return create(AK::utf8_to_utf16(string).release_value_but_fixme_should_propagate_errors()); } NonnullRefPtr Utf16StringImpl::create(Utf16View const& view) diff --git a/Userland/Libraries/LibRegex/RegexMatch.h b/Userland/Libraries/LibRegex/RegexMatch.h index 4f36cf6fb7..9943d55909 100644 --- a/Userland/Libraries/LibRegex/RegexMatch.h +++ b/Userland/Libraries/LibRegex/RegexMatch.h @@ -280,7 +280,7 @@ public: return RegexStringView { Utf32View { data.data(), data.size() } }; }, [&](Utf16View) { - optional_utf16_storage = AK::utf32_to_utf16(Utf32View { data.data(), data.size() }); + optional_utf16_storage = AK::utf32_to_utf16(Utf32View { data.data(), data.size() }).release_value_but_fixme_should_propagate_errors(); return RegexStringView { Utf16View { optional_utf16_storage } }; }); diff --git a/Userland/Libraries/LibWeb/Infra/Strings.cpp b/Userland/Libraries/LibWeb/Infra/Strings.cpp index 0ad8096ce5..973982833b 100644 --- a/Userland/Libraries/LibWeb/Infra/Strings.cpp +++ b/Userland/Libraries/LibWeb/Infra/Strings.cpp @@ -34,8 +34,8 @@ DeprecatedString strip_and_collapse_whitespace(StringView string) // https://infra.spec.whatwg.org/#code-unit-prefix bool is_code_unit_prefix(StringView potential_prefix, StringView input) { - auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix); - auto input_utf16 = utf8_to_utf16(input); + auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix).release_value_but_fixme_should_propagate_errors(); + auto input_utf16 = utf8_to_utf16(input).release_value_but_fixme_should_propagate_errors(); // 1. Let i be 0. size_t i = 0; diff --git a/Userland/Libraries/LibWeb/SVG/SVGTextContentElement.cpp b/Userland/Libraries/LibWeb/SVG/SVGTextContentElement.cpp index c5eb5e7155..11086090b7 100644 --- a/Userland/Libraries/LibWeb/SVG/SVGTextContentElement.cpp +++ b/Userland/Libraries/LibWeb/SVG/SVGTextContentElement.cpp @@ -5,6 +5,8 @@ */ #include +#include +#include #include #include @@ -17,9 +19,10 @@ SVGTextContentElement::SVGTextContentElement(DOM::Document& document, DOM::Quali } // https://svgwg.org/svg2-draft/text.html#__svg__SVGTextContentElement__getNumberOfChars -int SVGTextContentElement::get_number_of_chars() const +WebIDL::ExceptionOr SVGTextContentElement::get_number_of_chars() const { - return AK::utf8_to_utf16(child_text_content()).size(); + auto chars = TRY_OR_THROW_OOM(vm(), utf8_to_utf16(child_text_content())); + return static_cast(chars.size()); } } diff --git a/Userland/Libraries/LibWeb/SVG/SVGTextContentElement.h b/Userland/Libraries/LibWeb/SVG/SVGTextContentElement.h index ed9f4fedbd..74ded56554 100644 --- a/Userland/Libraries/LibWeb/SVG/SVGTextContentElement.h +++ b/Userland/Libraries/LibWeb/SVG/SVGTextContentElement.h @@ -7,6 +7,7 @@ #pragma once #include +#include namespace Web::SVG { @@ -15,7 +16,7 @@ class SVGTextContentElement : public SVGGraphicsElement { WEB_PLATFORM_OBJECT(SVGTextContentElement, SVGGraphicsElement); public: - int get_number_of_chars() const; + WebIDL::ExceptionOr get_number_of_chars() const; protected: SVGTextContentElement(DOM::Document&, DOM::QualifiedName);