From ca62aeb6bd184adb2644e5d179f60cb3a42171b4 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 19 Jan 2023 10:53:20 -0500 Subject: [PATCH] LibLocale+LibJS: Port locale parsing and processing to String In order to prevent this commit from having to refactor almost all of Intl, the goal here is to update the internal parsing/canonicalization of locales within LibLocale only. Call sites which are already equiped to handle String and OOM errors do so, however. --- .../LibLocale/GenerateLocaleData.cpp | 44 ++- Tests/LibLocale/TestLocale.cpp | 107 ++++-- .../LibJS/Runtime/Intl/AbstractOperations.cpp | 24 +- .../LibJS/Runtime/Intl/DisplayNames.cpp | 2 +- .../Libraries/LibJS/Runtime/Intl/Locale.cpp | 22 +- .../LibJS/Runtime/Intl/LocaleConstructor.cpp | 51 +-- .../LibJS/Runtime/Intl/LocalePrototype.cpp | 20 +- .../LibJS/Runtime/StringPrototype.cpp | 4 +- .../Libraries/LibLocale/DateTimeFormat.cpp | 2 +- Userland/Libraries/LibLocale/Locale.cpp | 362 +++++++++--------- Userland/Libraries/LibLocale/Locale.h | 32 +- 11 files changed, 371 insertions(+), 299 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp index a912160f5c..6f416bde3f 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp @@ -1069,6 +1069,7 @@ static ErrorOr generate_unicode_locale_implementation(Core::Stream::Buffer #include #include #include +#include #include #include #include @@ -1229,24 +1230,25 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~"); generator.append(R"~~~( -struct CanonicalLanguageID { - LanguageID to_unicode_language_id() const +struct CanonicalLanguageID +{ + ErrorOr to_unicode_language_id() const { LanguageID language_id {}; - language_id.variants.ensure_capacity(variants_size); + TRY(language_id.variants.try_ensure_capacity(variants_size)); - language_id.language = decode_string(language); + language_id.language = TRY(String::from_utf8(decode_string(language))); if (script != 0) - language_id.script = decode_string(script); + language_id.script = TRY(String::from_utf8(decode_string(script))); if (region != 0) - language_id.region = decode_string(region); + language_id.region = TRY(String::from_utf8(decode_string(region))); for (size_t i = 0; i < variants_size; ++i) - language_id.variants.append(decode_string(variants[i])); + language_id.variants.append(TRY(String::from_utf8(decode_string(variants[i])))); return language_id; } - bool matches_variants(Vector const& other_variants) const { + bool matches_variants(Vector const& other_variants) const { if (variants_size == 0) return true; if (other_variants.size() != variants_size) @@ -1375,7 +1377,7 @@ static LanguageMapping const* resolve_likely_subtag(LanguageID const& language_i if (!language_id.script.has_value()) continue; - search_key.language = "und"sv; + search_key.language = String::from_utf8("und"sv).release_value_but_fixme_should_propagate_errors(); search_key.script = *language_id.script; break; @@ -1680,9 +1682,9 @@ Optional character_order_for_locale(StringView locale) void resolve_complex_language_aliases(LanguageID& language_id) { for (auto const& map : s_complex_alias) { - auto const& key_language = decode_string(map.key.language); - auto const& key_script = decode_string(map.key.script); - auto const& key_region = decode_string(map.key.region); + auto key_language = decode_string(map.key.language); + auto key_script = decode_string(map.key.script); + auto key_region = decode_string(map.key.region); if ((key_language != language_id.language) && (key_language != "und"sv)) continue; @@ -1693,7 +1695,7 @@ void resolve_complex_language_aliases(LanguageID& language_id) if (!map.key.matches_variants(language_id.variants)) continue; - auto alias = map.alias.to_unicode_language_id(); + auto alias = map.alias.to_unicode_language_id().release_value_but_fixme_should_propagate_errors(); if (alias.language == "und"sv) alias.language = move(language_id.language); @@ -1718,19 +1720,19 @@ Optional add_likely_subtags(LanguageID const& language_id) auto maximized = language_id; - auto const& key_script = decode_string(likely_subtag->key.script); - auto const& key_region = decode_string(likely_subtag->key.region); + auto key_script = decode_string(likely_subtag->key.script); + auto key_region = decode_string(likely_subtag->key.region); - auto const& alias_language = decode_string(likely_subtag->alias.language); - auto const& alias_script = decode_string(likely_subtag->alias.script); - auto const& alias_region = decode_string(likely_subtag->alias.region); + auto alias_language = decode_string(likely_subtag->alias.language); + auto alias_script = decode_string(likely_subtag->alias.script); + auto alias_region = decode_string(likely_subtag->alias.region); if (maximized.language == "und"sv) - maximized.language = alias_language; + maximized.language = String::from_utf8(alias_language).release_value_but_fixme_should_propagate_errors(); if (!maximized.script.has_value() || (!key_script.is_empty() && !alias_script.is_empty())) - maximized.script = alias_script; + maximized.script = String::from_utf8(alias_script).release_value_but_fixme_should_propagate_errors(); if (!maximized.region.has_value() || (!key_region.is_empty() && !alias_region.is_empty())) - maximized.region = alias_region; + maximized.region = String::from_utf8(alias_region).release_value_but_fixme_should_propagate_errors(); return maximized; } diff --git a/Tests/LibLocale/TestLocale.cpp b/Tests/LibLocale/TestLocale.cpp index 7df720eb7c..7300faddac 100644 --- a/Tests/LibLocale/TestLocale.cpp +++ b/Tests/LibLocale/TestLocale.cpp @@ -87,20 +87,34 @@ TEST_CASE(is_type_identifier) EXPECT(!Locale::is_type_identifier("aaaa-"sv)); } +template +[[nodiscard]] static bool compare_vectors(LHS const& lhs, RHS const& rhs) +{ + if (lhs.size() != rhs.size()) + return false; + + for (size_t i = 0; i < lhs.size(); ++i) { + if (lhs[i] != rhs[i]) + return false; + } + + return true; +} + TEST_CASE(parse_unicode_locale_id) { auto fail = [](StringView locale) { - auto locale_id = Locale::parse_unicode_locale_id(locale); + auto locale_id = MUST(Locale::parse_unicode_locale_id(locale)); EXPECT(!locale_id.has_value()); }; - auto pass = [](StringView locale, Optional expected_language, Optional expected_script, Optional expected_region, Vector expected_variants) { - auto locale_id = Locale::parse_unicode_locale_id(locale); + auto pass = [](StringView locale, Optional expected_language, Optional expected_script, Optional expected_region, Vector expected_variants) { + auto locale_id = MUST(Locale::parse_unicode_locale_id(locale)); VERIFY(locale_id.has_value()); EXPECT_EQ(locale_id->language_id.language, expected_language); EXPECT_EQ(locale_id->language_id.script, expected_script); EXPECT_EQ(locale_id->language_id.region, expected_region); - EXPECT_EQ(locale_id->language_id.variants, expected_variants); + EXPECT(compare_vectors(locale_id->language_id.variants, expected_variants)); }; fail("a"sv); @@ -120,17 +134,27 @@ TEST_CASE(parse_unicode_locale_id) TEST_CASE(parse_unicode_locale_id_with_unicode_locale_extension) { + struct LocaleExtension { + struct Keyword { + StringView key {}; + StringView value {}; + }; + + Vector attributes {}; + Vector keywords {}; + }; + auto fail = [](StringView locale) { - auto locale_id = Locale::parse_unicode_locale_id(locale); + auto locale_id = MUST(Locale::parse_unicode_locale_id(locale)); EXPECT(!locale_id.has_value()); }; - auto pass = [](StringView locale, Locale::LocaleExtension const& expected_extension) { - auto locale_id = Locale::parse_unicode_locale_id(locale); + auto pass = [](StringView locale, LocaleExtension const& expected_extension) { + auto locale_id = MUST(Locale::parse_unicode_locale_id(locale)); VERIFY(locale_id.has_value()); EXPECT_EQ(locale_id->extensions.size(), 1u); auto const& actual_extension = locale_id->extensions[0].get(); - VERIFY(actual_extension.attributes == expected_extension.attributes); + EXPECT(compare_vectors(actual_extension.attributes, expected_extension.attributes)); EXPECT_EQ(actual_extension.keywords.size(), expected_extension.keywords.size()); for (size_t i = 0; i < actual_extension.keywords.size(); ++i) { @@ -166,12 +190,30 @@ TEST_CASE(parse_unicode_locale_id_with_unicode_locale_extension) TEST_CASE(parse_unicode_locale_id_with_transformed_extension) { + struct TransformedExtension { + struct LanguageID { + bool is_root { false }; + Optional language {}; + Optional script {}; + Optional region {}; + Vector variants {}; + }; + + struct TransformedField { + StringView key {}; + StringView value {}; + }; + + Optional language {}; + Vector fields {}; + }; + auto fail = [](StringView locale) { - auto locale_id = Locale::parse_unicode_locale_id(locale); + auto locale_id = MUST(Locale::parse_unicode_locale_id(locale)); EXPECT(!locale_id.has_value()); }; - auto pass = [](StringView locale, Locale::TransformedExtension const& expected_extension) { - auto locale_id = Locale::parse_unicode_locale_id(locale); + auto pass = [](StringView locale, TransformedExtension const& expected_extension) { + auto locale_id = MUST(Locale::parse_unicode_locale_id(locale)); VERIFY(locale_id.has_value()); EXPECT_EQ(locale_id->extensions.size(), 1u); @@ -182,7 +224,7 @@ TEST_CASE(parse_unicode_locale_id_with_transformed_extension) EXPECT_EQ(actual_extension.language->language, expected_extension.language->language); EXPECT_EQ(actual_extension.language->script, expected_extension.language->script); EXPECT_EQ(actual_extension.language->region, expected_extension.language->region); - EXPECT_EQ(actual_extension.language->variants, expected_extension.language->variants); + EXPECT(compare_vectors(actual_extension.language->variants, expected_extension.language->variants)); } EXPECT_EQ(actual_extension.fields.size(), expected_extension.fields.size()); @@ -216,28 +258,33 @@ TEST_CASE(parse_unicode_locale_id_with_transformed_extension) fail("en-t-k0-aa"sv); fail("en-t-k0-aaaaaaaaa"sv); - pass("en-t-en"sv, { Locale::LanguageID { false, "en"sv }, {} }); - pass("en-t-en-latn"sv, { Locale::LanguageID { false, "en"sv, "latn"sv }, {} }); - pass("en-t-en-us"sv, { Locale::LanguageID { false, "en"sv, {}, "us"sv }, {} }); - pass("en-t-en-latn-us"sv, { Locale::LanguageID { false, "en"sv, "latn"sv, "us"sv }, {} }); - pass("en-t-en-posix"sv, { Locale::LanguageID { false, "en"sv, {}, {}, { "posix"sv } }, {} }); - pass("en-t-en-latn-posix"sv, { Locale::LanguageID { false, "en"sv, "latn"sv, {}, { "posix"sv } }, {} }); - pass("en-t-en-us-posix"sv, { Locale::LanguageID { false, "en"sv, {}, "us"sv, { "posix"sv } }, {} }); - pass("en-t-en-latn-us-posix"sv, { Locale::LanguageID { false, "en"sv, "latn"sv, "us"sv, { "posix"sv } }, {} }); + pass("en-t-en"sv, { TransformedExtension::LanguageID { false, "en"sv }, {} }); + pass("en-t-en-latn"sv, { TransformedExtension::LanguageID { false, "en"sv, "latn"sv }, {} }); + pass("en-t-en-us"sv, { TransformedExtension::LanguageID { false, "en"sv, {}, "us"sv }, {} }); + pass("en-t-en-latn-us"sv, { TransformedExtension::LanguageID { false, "en"sv, "latn"sv, "us"sv }, {} }); + pass("en-t-en-posix"sv, { TransformedExtension::LanguageID { false, "en"sv, {}, {}, { "posix"sv } }, {} }); + pass("en-t-en-latn-posix"sv, { TransformedExtension::LanguageID { false, "en"sv, "latn"sv, {}, { "posix"sv } }, {} }); + pass("en-t-en-us-posix"sv, { TransformedExtension::LanguageID { false, "en"sv, {}, "us"sv, { "posix"sv } }, {} }); + pass("en-t-en-latn-us-posix"sv, { TransformedExtension::LanguageID { false, "en"sv, "latn"sv, "us"sv, { "posix"sv } }, {} }); pass("en-t-k0-aaa"sv, { {}, { { "k0"sv, { "aaa"sv } } } }); pass("en-t-k0-aaa-bbbb"sv, { {}, { { "k0"sv, "aaa-bbbb"sv } } }); pass("en-t-k0-aaa-k1-bbbb"sv, { {}, { { "k0"sv, { "aaa"sv } }, { "k1"sv, "bbbb"sv } } }); - pass("en-t-en-k0-aaa"sv, { Locale::LanguageID { false, "en"sv }, { { "k0"sv, "aaa"sv } } }); + pass("en-t-en-k0-aaa"sv, { TransformedExtension::LanguageID { false, "en"sv }, { { "k0"sv, "aaa"sv } } }); } TEST_CASE(parse_unicode_locale_id_with_other_extension) { + struct OtherExtension { + char key {}; + StringView value {}; + }; + auto fail = [](StringView locale) { - auto locale_id = Locale::parse_unicode_locale_id(locale); + auto locale_id = MUST(Locale::parse_unicode_locale_id(locale)); EXPECT(!locale_id.has_value()); }; - auto pass = [](StringView locale, Locale::OtherExtension const& expected_extension) { - auto locale_id = Locale::parse_unicode_locale_id(locale); + auto pass = [](StringView locale, OtherExtension const& expected_extension) { + auto locale_id = MUST(Locale::parse_unicode_locale_id(locale)); VERIFY(locale_id.has_value()); EXPECT_EQ(locale_id->extensions.size(), 1u); @@ -267,13 +314,13 @@ TEST_CASE(parse_unicode_locale_id_with_other_extension) TEST_CASE(parse_unicode_locale_id_with_private_use_extension) { auto fail = [](StringView locale) { - auto locale_id = Locale::parse_unicode_locale_id(locale); + auto locale_id = MUST(Locale::parse_unicode_locale_id(locale)); EXPECT(!locale_id.has_value()); }; - auto pass = [](StringView locale, Vector const& expected_extension) { - auto locale_id = Locale::parse_unicode_locale_id(locale); + auto pass = [](StringView locale, Vector const& expected_extension) { + auto locale_id = MUST(Locale::parse_unicode_locale_id(locale)); VERIFY(locale_id.has_value()); - EXPECT_EQ(locale_id->private_use_extensions, expected_extension); + EXPECT(compare_vectors(locale_id->private_use_extensions, expected_extension)); }; fail("en-x"sv); @@ -291,10 +338,10 @@ TEST_CASE(parse_unicode_locale_id_with_private_use_extension) TEST_CASE(canonicalize_unicode_locale_id) { auto test = [](StringView locale, StringView expected_canonical_locale) { - auto locale_id = Locale::parse_unicode_locale_id(locale); + auto locale_id = MUST(Locale::parse_unicode_locale_id(locale)); VERIFY(locale_id.has_value()); - auto canonical_locale = Locale::canonicalize_unicode_locale_id(*locale_id); + auto canonical_locale = MUST(Locale::canonicalize_unicode_locale_id(*locale_id)); EXPECT_EQ(*canonical_locale, expected_canonical_locale); }; diff --git a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp index c5bc4ba8cc..b3a386ff58 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp @@ -29,7 +29,7 @@ Optional<::Locale::LocaleID> is_structurally_valid_language_tag(StringView local quick_sort(variants); for (size_t i = 0; i < variants.size() - 1; ++i) { - if (variants[i].equals_ignoring_case(variants[i + 1])) + if (variants[i].equals_ignoring_case(variants[i + 1]).release_value_but_fixme_should_propagate_errors()) return true; } @@ -39,7 +39,7 @@ Optional<::Locale::LocaleID> is_structurally_valid_language_tag(StringView local // IsStructurallyValidLanguageTag returns true if all of the following conditions hold, false otherwise: // locale can be generated from the EBNF grammar for unicode_locale_id in Unicode Technical Standard #35 LDML § 3.2 Unicode Locale Identifier; - auto locale_id = ::Locale::parse_unicode_locale_id(locale); + auto locale_id = ::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors(); if (!locale_id.has_value()) return {}; @@ -114,11 +114,11 @@ DeprecatedString canonicalize_unicode_locale_id(::Locale::LocaleID& locale) // 1. Let localeId be the string locale after performing the algorithm to transform it to canonical syntax per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers. // 2. Let localeId be the string localeId after performing the algorithm to transform it to canonical form. - auto locale_id = ::Locale::canonicalize_unicode_locale_id(locale); + auto locale_id = ::Locale::canonicalize_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors(); VERIFY(locale_id.has_value()); // 4. Return localeId. - return locale_id.release_value(); + return locale_id->to_deprecated_string(); } // 6.3.1 IsWellFormedCurrencyCode ( currency ), https://tc39.es/ecma402/#sec-iswellformedcurrencycode @@ -301,7 +301,7 @@ static MatcherResult lookup_matcher(Vector const& requested_lo // 2. For each element locale of requestedLocales, do for (auto const& locale : requested_locales) { - auto locale_id = ::Locale::parse_unicode_locale_id(locale); + auto locale_id = ::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors(); VERIFY(locale_id.has_value()); // a. Let noExtensionsLocale be the String value that is locale with any Unicode locale extension sequences removed. @@ -453,10 +453,10 @@ ThrowCompletionOr resolve_locale(Vector const& r // a. If keyLocaleData contains requestedValue, then if (key_locale_data.contains_slow(requested_value)) { // i. Let value be requestedValue. - value = move(requested_value); + value = requested_value.to_deprecated_string(); // ii. Let supportedExtensionAddition be the string-concatenation of "-", key, "-", and value. - supported_extension_addition = ::Locale::Keyword { key, move(entry.value) }; + supported_extension_addition = ::Locale::Keyword { String::from_utf8(key).release_value_but_fixme_should_propagate_errors(), move(entry.value) }; } } // 4. Else if keyLocaleData contains "true", then @@ -465,7 +465,7 @@ ThrowCompletionOr resolve_locale(Vector const& r value = "true"sv; // b. Let supportedExtensionAddition be the string-concatenation of "-" and key. - supported_extension_addition = ::Locale::Keyword { key, {} }; + supported_extension_addition = ::Locale::Keyword { String::from_utf8(key).release_value_but_fixme_should_propagate_errors(), {} }; } break; @@ -480,7 +480,9 @@ ThrowCompletionOr resolve_locale(Vector const& r if (options_value.has_value()) { // 1. Let optionsValue be the string optionsValue after performing the algorithm steps to transform Unicode extension values to canonical syntax per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers, treating key as ukey and optionsValue as uvalue productions. // 2. Let optionsValue be the string optionsValue after performing the algorithm steps to replace Unicode extension values with their canonical form per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers, treating key as ukey and optionsValue as uvalue productions. - ::Locale::canonicalize_unicode_extension_values(key, *options_value, true); + auto options_value_string = String::from_deprecated_string(*options_value).release_value_but_fixme_should_propagate_errors(); + ::Locale::canonicalize_unicode_extension_values(key, options_value_string, true).release_value_but_fixme_should_propagate_errors(); + options_value = options_value_string.to_deprecated_string(); // 3. If optionsValue is the empty String, then if (options_value->is_empty()) { @@ -508,7 +510,7 @@ ThrowCompletionOr resolve_locale(Vector const& r // 10. If supportedExtension is not "-u", then if (!supported_extension.keywords.is_empty()) { - auto locale_id = ::Locale::parse_unicode_locale_id(found_locale); + auto locale_id = ::Locale::parse_unicode_locale_id(found_locale).release_value_but_fixme_should_propagate_errors(); VERIFY(locale_id.has_value()); // a. Set foundLocale to InsertUnicodeExtensionAndCanonicalize(foundLocale, supportedExtension). @@ -530,7 +532,7 @@ Vector lookup_supported_locales(Vector const // 2. For each element locale of requestedLocales, do for (auto const& locale : requested_locales) { - auto locale_id = ::Locale::parse_unicode_locale_id(locale); + auto locale_id = ::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors(); VERIFY(locale_id.has_value()); // a. Let noExtensionsLocale be the String value that is locale with any Unicode locale extension sequences removed. diff --git a/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.cpp b/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.cpp index f928d527ae..05443188f1 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.cpp @@ -106,7 +106,7 @@ ThrowCompletionOr canonical_code_for_display_names(VM& vm, DisplayNames:: // 1. If type is "language", then if (type == DisplayNames::Type::Language) { // a. If code does not match the unicode_language_id production, throw a RangeError exception. - if (!::Locale::parse_unicode_language_id(code).has_value()) + if (!TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_language_id(code)).has_value()) return vm.throw_completion(ErrorType::OptionIsNotValidValue, code, "language"sv); // b. If IsStructurallyValidLanguageTag(code) is false, throw a RangeError exception. diff --git a/Userland/Libraries/LibJS/Runtime/Intl/Locale.cpp b/Userland/Libraries/LibJS/Runtime/Intl/Locale.cpp index 5dfa3ee110..9143ad0ff5 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/Locale.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/Locale.cpp @@ -36,17 +36,17 @@ Locale::Locale(::Locale::LocaleID const& locale_id, Object& prototype) for (auto const& keyword : extension.get<::Locale::LocaleExtension>().keywords) { if (keyword.key == "ca"sv) { - set_calendar(keyword.value); + set_calendar(keyword.value.to_deprecated_string()); } else if (keyword.key == "co"sv) { - set_collation(keyword.value); + set_collation(keyword.value.to_deprecated_string()); } else if (keyword.key == "hc"sv) { - set_hour_cycle(keyword.value); + set_hour_cycle(keyword.value.to_deprecated_string()); } else if (keyword.key == "kf"sv) { - set_case_first(keyword.value); + set_case_first(keyword.value.to_deprecated_string()); } else if (keyword.key == "kn"sv) { set_numeric(keyword.value.is_empty()); } else if (keyword.key == "nu"sv) { - set_numbering_system(keyword.value); + set_numbering_system(keyword.value.to_deprecated_string()); } } @@ -81,7 +81,7 @@ Array* calendars_of_locale(VM& vm, Locale const& locale_object) auto const& locale = locale_object.locale(); // 3. Assert: locale matches the unicode_locale_id production. - VERIFY(::Locale::parse_unicode_locale_id(locale).has_value()); + VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value()); // 4. Let list be a List of 1 or more unique canonical calendar identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, sorted in descending preference of those in common use for date and time formatting in locale. auto list = ::Locale::get_keywords_for_locale(locale, "ca"sv); @@ -100,7 +100,7 @@ Array* collations_of_locale(VM& vm, Locale const& locale_object) auto const& locale = locale_object.locale(); // 3. Assert: locale matches the unicode_locale_id production. - VERIFY(::Locale::parse_unicode_locale_id(locale).has_value()); + VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value()); // 4. Let list be a List of 1 or more unique canonical collation identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, ordered as if an Array of the same values had been sorted, using %Array.prototype.sort% using undefined as comparefn, of those in common use for string comparison in locale. The values "standard" and "search" must be excluded from list. auto list = ::Locale::get_keywords_for_locale(locale, "co"sv); @@ -119,7 +119,7 @@ Array* hour_cycles_of_locale(VM& vm, Locale const& locale_object) auto const& locale = locale_object.locale(); // 3. Assert: locale matches the unicode_locale_id production. - VERIFY(::Locale::parse_unicode_locale_id(locale).has_value()); + VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value()); // 4. Let list be a List of 1 or more unique hour cycle identifiers, which must be lower case String values indicating either the 12-hour format ("h11", "h12") or the 24-hour format ("h23", "h24"), sorted in descending preference of those in common use for date and time formatting in locale. auto list = ::Locale::get_keywords_for_locale(locale, "hc"sv); @@ -138,7 +138,7 @@ Array* numbering_systems_of_locale(VM& vm, Locale const& locale_object) auto const& locale = locale_object.locale(); // 3. Assert: locale matches the unicode_locale_id production. - VERIFY(::Locale::parse_unicode_locale_id(locale).has_value()); + VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value()); // 4. Let list be a List of 1 or more unique canonical numbering system identifiers, which must be lower case String values conforming to the type sequence from UTS 35 Unicode Locale Identifier, section 3.2, sorted in descending preference of those in common use for formatting numeric values in locale. auto list = ::Locale::get_keywords_for_locale(locale, "nu"sv); @@ -174,7 +174,7 @@ StringView character_direction_of_locale(Locale const& locale_object) auto const& locale = locale_object.locale(); // 2. Assert: locale matches the unicode_locale_id production. - VERIFY(::Locale::parse_unicode_locale_id(locale).has_value()); + VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value()); // 3. If the default general ordering of characters (characterOrder) within a line in locale is right-to-left, return "rtl". // NOTE: LibUnicode handles both LTR and RTL character orders in this call, not just RTL. We then fallback to LTR @@ -235,7 +235,7 @@ WeekInfo week_info_of_locale(Locale const& locale_object) auto const& locale = locale_object.locale(); // 2. Assert: locale matches the unicode_locale_id production. - VERIFY(::Locale::parse_unicode_locale_id(locale).has_value()); + VERIFY(::Locale::parse_unicode_locale_id(locale).release_value_but_fixme_should_propagate_errors().has_value()); // 3. Return a record whose fields are defined by Table 1, with values based on locale. WeekInfo week_info {}; diff --git a/Userland/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp b/Userland/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp index 307b64b98e..403890276d 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -17,26 +18,26 @@ namespace JS::Intl { struct LocaleAndKeys { - DeprecatedString locale; - Optional ca; - Optional co; - Optional hc; - Optional kf; - Optional kn; - Optional nu; + String locale; + Optional ca; + Optional co; + Optional hc; + Optional kf; + Optional kn; + Optional nu; }; // Note: This is not an AO in the spec. This just serves to abstract very similar steps in ApplyOptionsToTag and the Intl.Locale constructor. -static ThrowCompletionOr> get_string_option(VM& vm, Object const& options, PropertyKey const& property, Function validator, Span values = {}) +static ThrowCompletionOr> get_string_option(VM& vm, Object const& options, PropertyKey const& property, Function validator, Span values = {}) { auto option = TRY(get_option(vm, options, property, OptionType::String, values, Empty {})); if (option.is_undefined()) - return Optional {}; + return Optional {}; if (validator && !validator(TRY(option.as_string().utf8_string_view()))) return vm.throw_completion(ErrorType::OptionIsNotValidValue, option, property); - return TRY(option.as_string().deprecated_string()); + return TRY(option.as_string().utf8_string()); } // 14.1.2 ApplyOptionsToTag ( tag, options ), https://tc39.es/ecma402/#sec-apply-options-to-tag @@ -69,7 +70,7 @@ static ThrowCompletionOr apply_options_to_tag(VM& vm, StringVi auto canonicalized_tag = JS::Intl::canonicalize_unicode_locale_id(*locale_id); // 11. Assert: tag matches the unicode_locale_id production. - locale_id = ::Locale::parse_unicode_locale_id(canonicalized_tag); + locale_id = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(canonicalized_tag)); VERIFY(locale_id.has_value()); // 12. Let languageId be the substring of tag corresponding to the unicode_language_id production. @@ -109,10 +110,10 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey { // 1. Assert: Type(tag) is String. // 2. Assert: tag matches the unicode_locale_id production. - auto locale_id = ::Locale::parse_unicode_locale_id(tag); + auto locale_id = ::Locale::parse_unicode_locale_id(tag).release_value_but_fixme_should_propagate_errors(); VERIFY(locale_id.has_value()); - Vector attributes; + Vector attributes; Vector<::Locale::Keyword> keywords; // 3. If tag contains a substring that is a Unicode locale extension sequence, then @@ -134,7 +135,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey // a. Let attributes be a new empty List. // b. Let keywords be a new empty List. - auto field_from_key = [](LocaleAndKeys& value, StringView key) -> Optional& { + auto field_from_key = [](LocaleAndKeys& value, StringView key) -> Optional& { if (key == "ca"sv) return value.ca; if (key == "co"sv) @@ -156,7 +157,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey // 6. For each element key of relevantExtensionKeys, do for (auto const& key : relevant_extension_keys) { // a. Let value be undefined. - Optional value {}; + Optional value {}; ::Locale::Keyword* entry = nullptr; // b. If keywords contains an element whose [[Key]] is the same as key, then @@ -188,7 +189,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey // iv. Else, else { // 1. Append the Record { [[Key]]: key, [[Value]]: value } to keywords. - keywords.append({ key, *value }); + keywords.append({ String::from_utf8(key).release_value_but_fixme_should_propagate_errors(), *value }); } } @@ -198,7 +199,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey // 7. Let locale be the String value that is tag with any Unicode locale extension sequences removed. locale_id->remove_extension_type<::Locale::LocaleExtension>(); - auto locale = locale_id->to_deprecated_string(); + auto locale = locale_id->to_string().release_value_but_fixme_should_propagate_errors(); // 8. Let newExtension be a Unicode BCP 47 U Extension based on attributes and keywords. ::Locale::LocaleExtension new_extension { move(attributes), move(keywords) }; @@ -206,7 +207,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey // 9. If newExtension is not the empty String, then if (!new_extension.attributes.is_empty() || !new_extension.keywords.is_empty()) { // a. Let locale be ! InsertUnicodeExtensionAndCanonicalize(locale, newExtension). - locale = insert_unicode_extension_and_canonicalize(locale_id.release_value(), move(new_extension)); + locale = String::from_deprecated_string(insert_unicode_extension_and_canonicalize(locale_id.release_value(), move(new_extension))).release_value_but_fixme_should_propagate_errors(); } // 10. Set result.[[locale]] to locale. @@ -313,7 +314,7 @@ ThrowCompletionOr> LocaleConstructor::construct(FunctionObj // 24. If kn is not undefined, set kn to ! ToString(kn). // 25. Set opt.[[kn]] to kn. if (!kn.is_undefined()) - opt.kn = TRY(kn.to_deprecated_string(vm)); + opt.kn = TRY(kn.to_string(vm)); // 26. Let numberingSystem be ? GetOption(options, "numberingSystem", string, empty, undefined). // 27. If numberingSystem is not undefined, then @@ -325,22 +326,22 @@ ThrowCompletionOr> LocaleConstructor::construct(FunctionObj auto result = apply_unicode_extension_to_tag(tag, move(opt), relevant_extension_keys); // 30. Set locale.[[Locale]] to r.[[locale]]. - locale->set_locale(move(result.locale)); + locale->set_locale(result.locale.to_deprecated_string()); // 31. Set locale.[[Calendar]] to r.[[ca]]. if (result.ca.has_value()) - locale->set_calendar(result.ca.release_value()); + locale->set_calendar(result.ca->to_deprecated_string()); // 32. Set locale.[[Collation]] to r.[[co]]. if (result.co.has_value()) - locale->set_collation(result.co.release_value()); + locale->set_collation(result.co->to_deprecated_string()); // 33. Set locale.[[HourCycle]] to r.[[hc]]. if (result.hc.has_value()) - locale->set_hour_cycle(result.hc.release_value()); + locale->set_hour_cycle(result.hc->to_deprecated_string()); // 34. If relevantExtensionKeys contains "kf", then if (relevant_extension_keys.span().contains_slow("kf"sv)) { // a. Set locale.[[CaseFirst]] to r.[[kf]]. if (result.kf.has_value()) - locale->set_case_first(result.kf.release_value()); + locale->set_case_first(result.kf->to_deprecated_string()); } // 35. If relevantExtensionKeys contains "kn", then @@ -359,7 +360,7 @@ ThrowCompletionOr> LocaleConstructor::construct(FunctionObj // 36. Set locale.[[NumberingSystem]] to r.[[nu]]. if (result.nu.has_value()) - locale->set_numbering_system(result.nu.release_value()); + locale->set_numbering_system(result.nu->to_deprecated_string()); // 37. Return locale. return locale; diff --git a/Userland/Libraries/LibJS/Runtime/Intl/LocalePrototype.cpp b/Userland/Libraries/LibJS/Runtime/Intl/LocalePrototype.cpp index fd50f1fa16..382bcb8f5b 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/LocalePrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/LocalePrototype.cpp @@ -61,7 +61,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::maximize) // 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]). auto* locale_object = TRY(typed_this_object(vm)); - auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale()); + auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale())); VERIFY(locale.has_value()); // 3. Let maximal be the result of the Add Likely Subtags algorithm applied to loc.[[Locale]]. If an error is signaled, set maximal to loc.[[Locale]]. @@ -81,7 +81,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::minimize) // 2. Perform ? RequireInternalSlot(loc, [[InitializedLocale]]). auto* locale_object = TRY(typed_this_object(vm)); - auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale()); + auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale())); VERIFY(locale.has_value()); // 3. Let minimal be the result of the Remove Likely Subtags algorithm applied to loc.[[Locale]]. If an error is signaled, set minimal to loc.[[Locale]]. @@ -111,7 +111,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::base_name) auto* locale_object = TRY(typed_this_object(vm)); // 3. Let locale be loc.[[Locale]]. - auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale()); + auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale())); VERIFY(locale.has_value()); // 4. Return the substring of locale corresponding to the unicode_language_id production. @@ -160,13 +160,13 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::language) auto* locale_object = TRY(typed_this_object(vm)); // 3. Let locale be loc.[[Locale]]. - auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale()); + auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale())); // 4. Assert: locale matches the unicode_locale_id production. VERIFY(locale.has_value()); // 5. Return the substring of locale corresponding to the unicode_language_subtag production of the unicode_language_id. - return PrimitiveString::create(vm, *locale->language_id.language); + return PrimitiveString::create(vm, locale->language_id.language.release_value()); } // 14.3.14 get Intl.Locale.prototype.script, https://tc39.es/ecma402/#sec-Intl.Locale.prototype.script @@ -177,7 +177,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::script) auto* locale_object = TRY(typed_this_object(vm)); // 3. Let locale be loc.[[Locale]]. - auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale()); + auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale())); // 4. Assert: locale matches the unicode_locale_id production. VERIFY(locale.has_value()); @@ -187,7 +187,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::script) return js_undefined(); // 6. Return the substring of locale corresponding to the unicode_script_subtag production of the unicode_language_id. - return PrimitiveString::create(vm, *locale->language_id.script); + return PrimitiveString::create(vm, locale->language_id.script.release_value()); } // 14.3.15 get Intl.Locale.prototype.region, https://tc39.es/ecma402/#sec-Intl.Locale.prototype.region @@ -198,7 +198,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::region) auto* locale_object = TRY(typed_this_object(vm)); // 3. Let locale be loc.[[Locale]]. - auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale()); + auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale())); // 4. Assert: locale matches the unicode_locale_id production. VERIFY(locale.has_value()); @@ -208,7 +208,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::region) return js_undefined(); // 6. Return the substring of locale corresponding to the unicode_region_subtag production of the unicode_language_id. - return PrimitiveString::create(vm, *locale->language_id.region); + return PrimitiveString::create(vm, locale->language_id.region.release_value()); } #define JS_ENUMERATE_LOCALE_INFO_PROPERTIES \ @@ -238,7 +238,7 @@ JS_DEFINE_NATIVE_FUNCTION(LocalePrototype::time_zones) auto* locale_object = TRY(typed_this_object(vm)); // 3. Let locale be loc.[[Locale]]. - auto locale = ::Locale::parse_unicode_locale_id(locale_object->locale()); + auto locale = TRY_OR_THROW_OOM(vm, ::Locale::parse_unicode_locale_id(locale_object->locale())); // 4. If the unicode_language_id production of locale does not contain the ["-" unicode_region_subtag] sequence, return undefined. if (!locale.has_value() || !locale->language_id.region.has_value()) diff --git a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp index a16dc23480..6e964016b7 100644 --- a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp @@ -890,12 +890,12 @@ static ThrowCompletionOr transform_case(VM& vm, String const& string, Va // 2. If requestedLocales is not an empty List, then if (!requested_locales.is_empty()) { // a. Let requestedLocale be requestedLocales[0]. - requested_locale = Locale::parse_unicode_locale_id(requested_locales[0]); + requested_locale = TRY_OR_THROW_OOM(vm, Locale::parse_unicode_locale_id(requested_locales[0])); } // 3. Else, else { // a. Let requestedLocale be ! DefaultLocale(). - requested_locale = Locale::parse_unicode_locale_id(Locale::default_locale()); + requested_locale = TRY_OR_THROW_OOM(vm, Locale::parse_unicode_locale_id(Locale::default_locale())); } VERIFY(requested_locale.has_value()); diff --git a/Userland/Libraries/LibLocale/DateTimeFormat.cpp b/Userland/Libraries/LibLocale/DateTimeFormat.cpp index f4d5e07751..fb330968a0 100644 --- a/Userland/Libraries/LibLocale/DateTimeFormat.cpp +++ b/Userland/Libraries/LibLocale/DateTimeFormat.cpp @@ -109,7 +109,7 @@ static auto find_regional_values_for_locale(StringView locale, GetRegionalValues auto return_default_values = [&]() { return get_regional_values("001"sv); }; - auto language = parse_unicode_language_id(locale); + auto language = parse_unicode_language_id(locale).release_value_but_fixme_should_propagate_errors(); if (!language.has_value()) return return_default_values(); diff --git a/Userland/Libraries/LibLocale/Locale.cpp b/Userland/Libraries/LibLocale/Locale.cpp index a26621500c..2315c7098e 100644 --- a/Userland/Libraries/LibLocale/Locale.cpp +++ b/Userland/Libraries/LibLocale/Locale.cpp @@ -91,7 +91,7 @@ bool is_type_identifier(StringView identifier) return lexer.is_eof() && (lexer.tell() > 0); } -static Optional parse_unicode_language_id(GenericLexer& lexer) +static ErrorOr> parse_unicode_language_id(GenericLexer& lexer) { // https://unicode.org/reports/tr35/#Unicode_language_identifier // @@ -120,25 +120,25 @@ static Optional parse_unicode_language_id(GenericLexer& lexer) while (!lexer.is_eof() && (state != ParseState::Done)) { auto segment = consume_next_segment(lexer, state != ParseState::ParsingLanguageOrScript); if (!segment.has_value()) - return {}; + return OptionalNone {}; switch (state) { case ParseState::ParsingLanguageOrScript: if (is_unicode_language_subtag(*segment)) { state = ParseState::ParsingScript; - language_id.language = *segment; + language_id.language = TRY(String::from_utf8(*segment)); } else if (is_unicode_script_subtag(*segment)) { state = ParseState::ParsingRegion; - language_id.script = *segment; + language_id.script = TRY(String::from_utf8(*segment)); } else { - return {}; + return OptionalNone {}; } break; case ParseState::ParsingScript: if (is_unicode_script_subtag(*segment)) { state = ParseState::ParsingRegion; - language_id.script = *segment; + language_id.script = TRY(String::from_utf8(*segment)); break; } @@ -148,7 +148,7 @@ static Optional parse_unicode_language_id(GenericLexer& lexer) case ParseState::ParsingRegion: if (is_unicode_region_subtag(*segment)) { state = ParseState::ParsingVariant; - language_id.region = *segment; + language_id.region = TRY(String::from_utf8(*segment)); break; } @@ -157,7 +157,7 @@ static Optional parse_unicode_language_id(GenericLexer& lexer) case ParseState::ParsingVariant: if (is_unicode_variant_subtag(*segment)) { - language_id.variants.append(*segment); + TRY(language_id.variants.try_append(TRY(String::from_utf8(*segment)))); } else { lexer.retreat(segment->length() + 1); state = ParseState::Done; @@ -172,7 +172,7 @@ static Optional parse_unicode_language_id(GenericLexer& lexer) return language_id; } -static Optional parse_unicode_locale_extension(GenericLexer& lexer) +static ErrorOr> parse_unicode_locale_extension(GenericLexer& lexer) { // https://unicode.org/reports/tr35/#unicode_locale_extensions // @@ -191,7 +191,7 @@ static Optional parse_unicode_locale_extension(GenericLexer& le while (!lexer.is_eof() && (state != ParseState::Done)) { auto segment = consume_next_segment(lexer); if (!segment.has_value()) - return {}; + return OptionalNone {}; if (state == ParseState::ParsingAttributeOrKeyword) state = is_key(*segment) ? ParseState::ParsingKeyword : ParseState::ParsingAttribute; @@ -199,7 +199,7 @@ static Optional parse_unicode_locale_extension(GenericLexer& le switch (state) { case ParseState::ParsingAttribute: if (is_attribute(*segment)) { - locale_extension.attributes.append(*segment); + TRY(locale_extension.attributes.try_append(TRY(String::from_utf8(*segment)))); break; } @@ -208,7 +208,7 @@ static Optional parse_unicode_locale_extension(GenericLexer& le case ParseState::ParsingKeyword: { // keyword = key (sep type)? - Keyword keyword { .key = *segment }; + Keyword keyword { .key = TRY(String::from_utf8(*segment)) }; Vector keyword_values; if (!is_key(*segment)) { @@ -226,14 +226,14 @@ static Optional parse_unicode_locale_extension(GenericLexer& le break; } - keyword_values.append(*type); + TRY(keyword_values.try_append(*type)); } StringBuilder builder; - builder.join('-', keyword_values); - keyword.value = builder.build(); + TRY(builder.try_join('-', keyword_values)); + keyword.value = TRY(builder.to_string()); - locale_extension.keywords.append(move(keyword)); + TRY(locale_extension.keywords.try_append(move(keyword))); break; } @@ -243,11 +243,11 @@ static Optional parse_unicode_locale_extension(GenericLexer& le } if (locale_extension.attributes.is_empty() && locale_extension.keywords.is_empty()) - return {}; + return OptionalNone {}; return locale_extension; } -static Optional parse_transformed_extension(GenericLexer& lexer) +static ErrorOr> parse_transformed_extension(GenericLexer& lexer) { // https://unicode.org/reports/tr35/#transformed_extensions // @@ -266,7 +266,7 @@ static Optional parse_transformed_extension(GenericLexer& while (!lexer.is_eof() && (state != ParseState::Done)) { auto segment = consume_next_segment(lexer); if (!segment.has_value()) - return {}; + return OptionalNone {}; if (state == ParseState::ParsingLanguageOrField) state = is_unicode_language_subtag(*segment) ? ParseState::ParsingLanguage : ParseState::ParsingField; @@ -275,17 +275,17 @@ static Optional parse_transformed_extension(GenericLexer& case ParseState::ParsingLanguage: lexer.retreat(segment->length()); - if (auto language_id = parse_unicode_language_id(lexer); language_id.has_value()) { + if (auto language_id = TRY(parse_unicode_language_id(lexer)); language_id.has_value()) { transformed_extension.language = language_id.release_value(); state = ParseState::ParsingField; break; } - return {}; + return OptionalNone {}; case ParseState::ParsingField: { // tfield = tkey tvalue; - TransformedField field { .key = *segment }; + TransformedField field { .key = TRY(String::from_utf8(*segment)) }; Vector field_values; if (!is_transformed_key(*segment)) { @@ -303,17 +303,17 @@ static Optional parse_transformed_extension(GenericLexer& break; } - field_values.append(*value); + TRY(field_values.try_append(*value)); } if (field_values.is_empty()) - return {}; + return OptionalNone {}; StringBuilder builder; - builder.join('-', field_values); - field.value = builder.build(); + TRY(builder.try_join('-', field_values)); + field.value = TRY(builder.to_string()); - transformed_extension.fields.append(move(field)); + TRY(transformed_extension.fields.try_append(move(field))); break; } @@ -323,11 +323,11 @@ static Optional parse_transformed_extension(GenericLexer& } if (!transformed_extension.language.has_value() && transformed_extension.fields.is_empty()) - return {}; + return OptionalNone {}; return transformed_extension; } -static Optional parse_other_extension(char key, GenericLexer& lexer) +static ErrorOr> parse_other_extension(char key, GenericLexer& lexer) { // https://unicode.org/reports/tr35/#other_extensions // @@ -336,7 +336,7 @@ static Optional parse_other_extension(char key, GenericLexer& le Vector other_values; if (!is_ascii_alphanumeric(key) || (key == 'x') || (key == 'X')) - return {}; + return OptionalNone {}; while (true) { auto segment = consume_next_segment(lexer); @@ -348,20 +348,20 @@ static Optional parse_other_extension(char key, GenericLexer& le break; } - other_values.append(*segment); + TRY(other_values.try_append(*segment)); } if (other_values.is_empty()) - return {}; + return OptionalNone {}; StringBuilder builder; - builder.join('-', other_values); - other_extension.value = builder.build(); + TRY(builder.try_join('-', other_values)); + other_extension.value = TRY(builder.to_string()); return other_extension; } -static Optional parse_extension(GenericLexer& lexer) +static ErrorOr> parse_extension(GenericLexer& lexer) { // https://unicode.org/reports/tr35/#extensions // @@ -372,28 +372,28 @@ static Optional parse_extension(GenericLexer& lexer) switch (char key = (*header)[0]) { case 'u': case 'U': - if (auto extension = parse_unicode_locale_extension(lexer); extension.has_value()) + if (auto extension = TRY(parse_unicode_locale_extension(lexer)); extension.has_value()) return Extension { extension.release_value() }; break; case 't': case 'T': - if (auto extension = parse_transformed_extension(lexer); extension.has_value()) + if (auto extension = TRY(parse_transformed_extension(lexer)); extension.has_value()) return Extension { extension.release_value() }; break; default: - if (auto extension = parse_other_extension(key, lexer); extension.has_value()) + if (auto extension = TRY(parse_other_extension(key, lexer)); extension.has_value()) return Extension { extension.release_value() }; break; } } lexer.retreat(lexer.tell() - starting_position); - return {}; + return OptionalNone {}; } -static Vector parse_private_use_extensions(GenericLexer& lexer) +static ErrorOr> parse_private_use_extensions(GenericLexer& lexer) { // https://unicode.org/reports/tr35/#pu_extensions // @@ -402,10 +402,10 @@ static Vector parse_private_use_extensions(GenericLexer& lexer auto header = consume_next_segment(lexer); if (!header.has_value()) - return {}; + return Vector {}; - auto parse_values = [&]() -> Vector { - Vector extensions; + auto parse_values = [&]() -> ErrorOr> { + Vector extensions; while (true) { auto segment = consume_next_segment(lexer); @@ -417,33 +417,33 @@ static Vector parse_private_use_extensions(GenericLexer& lexer break; } - extensions.append(*segment); + TRY(extensions.try_append(TRY(String::from_utf8(*segment)))); } return extensions; }; if ((header->length() == 1) && (((*header)[0] == 'x') || ((*header)[0] == 'X'))) { - if (auto extensions = parse_values(); !extensions.is_empty()) + if (auto extensions = TRY(parse_values()); !extensions.is_empty()) return extensions; } lexer.retreat(lexer.tell() - starting_position); - return {}; + return Vector {}; } -Optional parse_unicode_language_id(StringView language) +ErrorOr> parse_unicode_language_id(StringView language) { GenericLexer lexer { language }; - auto language_id = parse_unicode_language_id(lexer); + auto language_id = TRY(parse_unicode_language_id(lexer)); if (!lexer.is_eof()) - return {}; + return OptionalNone {}; return language_id; } -Optional parse_unicode_locale_id(StringView locale) +ErrorOr> parse_unicode_locale_id(StringView locale) { GenericLexer lexer { locale }; @@ -452,28 +452,28 @@ Optional parse_unicode_locale_id(StringView locale) // unicode_locale_id = unicode_language_id // extensions* // pu_extensions? - auto language_id = parse_unicode_language_id(lexer); + auto language_id = TRY(parse_unicode_language_id(lexer)); if (!language_id.has_value()) - return {}; + return OptionalNone {}; LocaleID locale_id { language_id.release_value() }; while (true) { - auto extension = parse_extension(lexer); + auto extension = TRY(parse_extension(lexer)); if (!extension.has_value()) break; - locale_id.extensions.append(extension.release_value()); + TRY(locale_id.extensions.try_append(extension.release_value())); } - locale_id.private_use_extensions = parse_private_use_extensions(lexer); + locale_id.private_use_extensions = TRY(parse_private_use_extensions(lexer)); if (!lexer.is_eof()) - return {}; + return OptionalNone {}; return locale_id; } -static void perform_hard_coded_key_value_substitutions(StringView key, DeprecatedString& value) +static ErrorOr perform_hard_coded_key_value_substitutions(StringView key, String& value) { // FIXME: In the XML export of CLDR, there are some aliases defined in the following files: // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/calendar.xml @@ -483,65 +483,71 @@ static void perform_hard_coded_key_value_substitutions(StringView key, Deprecate // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/transform.xml // // There isn't yet a counterpart in the JSON export. See: https://unicode-org.atlassian.net/browse/CLDR-14571 + Optional result; + if (key == "ca"sv) { if (value == "islamicc"sv) - value = "islamic-civil"sv; + result = "islamic-civil"sv; else if (value == "ethiopic-amete-alem"sv) - value = "ethioaa"sv; + result = "ethioaa"sv; } else if (key.is_one_of("kb"sv, "kc"sv, "kh"sv, "kk"sv, "kn"sv) && (value == "yes"sv)) { - value = "true"sv; + result = "true"sv; } else if (key == "ks"sv) { if (value == "primary"sv) - value = "level1"sv; + result = "level1"sv; else if (value == "tertiary"sv) - value = "level3"sv; + result = "level3"sv; // Note: There are also aliases for "secondary", "quaternary", "quarternary", and "identical", // but those are semantically incorrect values (they are too long), so they can be skipped. } else if ((key == "m0"sv) && (value == "names"sv)) { - value = "prprname"sv; + result = "prprname"sv; } else if ((key == "ms"sv) && (value == "imperial"sv)) { - value = "uksystem"sv; + result = "uksystem"sv; } else if (key == "tz"sv) { // Formatter disabled because this block is easier to read / check against timezone.xml as one-liners. // clang-format off - if (value == "aqams"sv) value = "nzakl"sv; - else if (value == "cnckg"sv) value = "cnsha"sv; - else if (value == "cnhrb"sv) value = "cnsha"sv; - else if (value == "cnkhg"sv) value = "cnurc"sv; - else if (value == "cuba"sv) value = "cuhav"sv; - else if (value == "egypt"sv) value = "egcai"sv; - else if (value == "eire"sv) value = "iedub"sv; - else if (value == "est"sv) value = "utcw05"sv; - else if (value == "gmt0"sv) value = "gmt"sv; - else if (value == "hongkong"sv) value = "hkhkg"sv; - else if (value == "hst"sv) value = "utcw10"sv; - else if (value == "iceland"sv) value = "isrey"sv; - else if (value == "iran"sv) value = "irthr"sv; - else if (value == "israel"sv) value = "jeruslm"sv; - else if (value == "jamaica"sv) value = "jmkin"sv; - else if (value == "japan"sv) value = "jptyo"sv; - else if (value == "kwajalein"sv) value = "mhkwa"sv; - else if (value == "libya"sv) value = "lytip"sv; - else if (value == "mst"sv) value = "utcw07"sv; - else if (value == "navajo"sv) value = "usden"sv; - else if (value == "poland"sv) value = "plwaw"sv; - else if (value == "portugal"sv) value = "ptlis"sv; - else if (value == "prc"sv) value = "cnsha"sv; - else if (value == "roc"sv) value = "twtpe"sv; - else if (value == "rok"sv) value = "krsel"sv; - else if (value == "singapore"sv) value = "sgsin"sv; - else if (value == "turkey"sv) value = "trist"sv; - else if (value == "uct"sv) value = "utc"sv; - else if (value == "usnavajo"sv) value = "usden"sv; - else if (value == "zulu"sv) value = "utc"sv; + if (value == "aqams"sv) result = "nzakl"sv; + else if (value == "cnckg"sv) result = "cnsha"sv; + else if (value == "cnhrb"sv) result = "cnsha"sv; + else if (value == "cnkhg"sv) result = "cnurc"sv; + else if (value == "cuba"sv) result = "cuhav"sv; + else if (value == "egypt"sv) result = "egcai"sv; + else if (value == "eire"sv) result = "iedub"sv; + else if (value == "est"sv) result = "utcw05"sv; + else if (value == "gmt0"sv) result = "gmt"sv; + else if (value == "hongkong"sv) result = "hkhkg"sv; + else if (value == "hst"sv) result = "utcw10"sv; + else if (value == "iceland"sv) result = "isrey"sv; + else if (value == "iran"sv) result = "irthr"sv; + else if (value == "israel"sv) result = "jeruslm"sv; + else if (value == "jamaica"sv) result = "jmkin"sv; + else if (value == "japan"sv) result = "jptyo"sv; + else if (value == "kwajalein"sv) result = "mhkwa"sv; + else if (value == "libya"sv) result = "lytip"sv; + else if (value == "mst"sv) result = "utcw07"sv; + else if (value == "navajo"sv) result = "usden"sv; + else if (value == "poland"sv) result = "plwaw"sv; + else if (value == "portugal"sv) result = "ptlis"sv; + else if (value == "prc"sv) result = "cnsha"sv; + else if (value == "roc"sv) result = "twtpe"sv; + else if (value == "rok"sv) result = "krsel"sv; + else if (value == "singapore"sv) result = "sgsin"sv; + else if (value == "turkey"sv) result = "trist"sv; + else if (value == "uct"sv) result = "utc"sv; + else if (value == "usnavajo"sv) result = "usden"sv; + else if (value == "zulu"sv) result = "utc"sv; // clang-format on } + + if (result.has_value()) + value = TRY(String::from_utf8(*result)); + return {}; } -void canonicalize_unicode_extension_values(StringView key, DeprecatedString& value, bool remove_true) +ErrorOr canonicalize_unicode_extension_values(StringView key, String& value, bool remove_true) { - value = value.to_lowercase(); - perform_hard_coded_key_value_substitutions(key, value); + value = TRY(value.to_lowercase()); + TRY(perform_hard_coded_key_value_substitutions(key, value)); // Note: The spec says to remove "true" type and tfield values but that is believed to be a bug in the spec // because, for tvalues, that would result in invalid syntax: @@ -550,7 +556,7 @@ void canonicalize_unicode_extension_values(StringView key, DeprecatedString& val // https://github.com/tc39/test262/blob/18bb955771669541c56c28748603f6afdb2e25ff/test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js if (remove_true && (value == "true"sv)) { value = {}; - return; + return {}; } if (key.is_one_of("sd"sv, "rg"sv)) { @@ -560,26 +566,28 @@ void canonicalize_unicode_extension_values(StringView key, DeprecatedString& val // FIXME: Subdivision subtags do not appear in the CLDR likelySubtags.json file. // Implement the spec's recommendation of using just the first alias for now, // but we should determine if there's anything else needed here. - value = aliases[0].to_deprecated_string(); + value = TRY(String::from_utf8(aliases[0])); } } + + return {}; } -static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id) +static ErrorOr transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id) { - auto canonicalize_language = [&](LanguageID& language_id, bool force_lowercase) { - language_id.language = language_id.language->to_lowercase(); + auto canonicalize_language = [&](LanguageID& language_id, bool force_lowercase) -> ErrorOr { + language_id.language = TRY(language_id.language->to_lowercase()); if (language_id.script.has_value()) - language_id.script = language_id.script->to_titlecase(); + language_id.script = TRY(language_id.script->to_titlecase()); if (language_id.region.has_value()) - language_id.region = language_id.region->to_uppercase(); + language_id.region = TRY(language_id.region->to_uppercase()); for (auto& variant : language_id.variants) - variant = variant.to_lowercase(); + variant = TRY(variant.to_lowercase()); resolve_complex_language_aliases(language_id); if (auto alias = resolve_language_alias(*language_id.language); alias.has_value()) { - auto language_alias = parse_unicode_language_id(*alias); + auto language_alias = TRY(parse_unicode_language_id(*alias)); VERIFY(language_alias.has_value()); language_id.language = move(language_alias->language); @@ -593,31 +601,33 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id) if (language_id.script.has_value()) { if (auto alias = resolve_script_tag_alias(*language_id.script); alias.has_value()) - language_id.script = move(*alias); + language_id.script = TRY(String::from_utf8(*alias)); } if (language_id.region.has_value()) { if (auto alias = resolve_territory_alias(*language_id.region); alias.has_value()) - language_id.region = resolve_most_likely_territory_alias(language_id, *alias); + language_id.region = TRY(String::from_deprecated_string(resolve_most_likely_territory_alias(language_id, *alias))); } quick_sort(language_id.variants); for (auto& variant : language_id.variants) { - variant = variant.to_lowercase(); + variant = TRY(variant.to_lowercase()); if (auto alias = resolve_variant_alias(variant); alias.has_value()) - variant = move(*alias); + variant = TRY(String::from_utf8(*alias)); } if (force_lowercase) { if (language_id.script.has_value()) - language_id.script = language_id.script->to_lowercase(); + language_id.script = TRY(language_id.script->to_lowercase()); if (language_id.region.has_value()) - language_id.region = language_id.region->to_lowercase(); + language_id.region = TRY(language_id.region->to_lowercase()); } + + return {}; }; - canonicalize_language(locale_id.language_id, false); + TRY(canonicalize_language(locale_id.language_id, false)); quick_sort(locale_id.extensions, [](auto const& left, auto const& right) { auto key = [](auto const& extension) { @@ -631,109 +641,119 @@ static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id) }); for (auto& extension : locale_id.extensions) { - extension.visit( - [&](LocaleExtension& ext) { + TRY(extension.visit( + [&](LocaleExtension& ext) -> ErrorOr { for (auto& attribute : ext.attributes) - attribute = attribute.to_lowercase(); + attribute = TRY(attribute.to_lowercase()); for (auto& keyword : ext.keywords) { - keyword.key = keyword.key.to_lowercase(); - canonicalize_unicode_extension_values(keyword.key, keyword.value, true); + keyword.key = TRY(keyword.key.to_lowercase()); + TRY(canonicalize_unicode_extension_values(keyword.key, keyword.value, true)); } quick_sort(ext.attributes); quick_sort(ext.keywords, [](auto const& a, auto const& b) { return a.key < b.key; }); + return {}; }, - [&](TransformedExtension& ext) { + [&](TransformedExtension& ext) -> ErrorOr { if (ext.language.has_value()) - canonicalize_language(*ext.language, true); + TRY(canonicalize_language(*ext.language, true)); for (auto& field : ext.fields) { - field.key = field.key.to_lowercase(); - canonicalize_unicode_extension_values(field.key, field.value, false); + field.key = TRY(field.key.to_lowercase()); + TRY(canonicalize_unicode_extension_values(field.key, field.value, false)); } quick_sort(ext.fields, [](auto const& a, auto const& b) { return a.key < b.key; }); + return {}; }, - [&](OtherExtension& ext) { + [&](OtherExtension& ext) -> ErrorOr { ext.key = static_cast(to_ascii_lowercase(ext.key)); - ext.value = ext.value.to_lowercase(); - }); + ext.value = TRY(ext.value.to_lowercase()); + return {}; + })); } for (auto& extension : locale_id.private_use_extensions) - extension = extension.to_lowercase(); + extension = TRY(extension.to_lowercase()); + + return {}; } -Optional canonicalize_unicode_locale_id(LocaleID& locale_id) +ErrorOr> canonicalize_unicode_locale_id(LocaleID& locale_id) { // https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers StringBuilder builder; - auto append_sep_and_string = [&](Optional const& string) { + auto append_sep_and_string = [&](Optional const& string) -> ErrorOr { if (!string.has_value() || string->is_empty()) - return; - builder.appendff("-{}", *string); + return {}; + TRY(builder.try_appendff("-{}", *string)); + return {}; }; if (!locale_id.language_id.language.has_value()) - return {}; + return OptionalNone {}; - transform_unicode_locale_id_to_canonical_syntax(locale_id); + TRY(transform_unicode_locale_id_to_canonical_syntax(locale_id)); - builder.append(locale_id.language_id.language->to_lowercase()); - append_sep_and_string(locale_id.language_id.script); - append_sep_and_string(locale_id.language_id.region); + TRY(builder.try_append(TRY(locale_id.language_id.language->to_lowercase()))); + TRY(append_sep_and_string(locale_id.language_id.script)); + TRY(append_sep_and_string(locale_id.language_id.region)); for (auto const& variant : locale_id.language_id.variants) - append_sep_and_string(variant); + TRY(append_sep_and_string(variant)); for (auto const& extension : locale_id.extensions) { - extension.visit( - [&](LocaleExtension const& ext) { - builder.append("-u"sv); + TRY(extension.visit( + [&](LocaleExtension const& ext) -> ErrorOr { + TRY(builder.try_append("-u"sv)); for (auto const& attribute : ext.attributes) - append_sep_and_string(attribute); + TRY(append_sep_and_string(attribute)); for (auto const& keyword : ext.keywords) { - append_sep_and_string(keyword.key); - append_sep_and_string(keyword.value); + TRY(append_sep_and_string(keyword.key)); + TRY(append_sep_and_string(keyword.value)); } + + return {}; }, - [&](TransformedExtension const& ext) { - builder.append("-t"sv); + [&](TransformedExtension const& ext) -> ErrorOr { + TRY(builder.try_append("-t"sv)); if (ext.language.has_value()) { - append_sep_and_string(ext.language->language); - append_sep_and_string(ext.language->script); - append_sep_and_string(ext.language->region); + TRY(append_sep_and_string(ext.language->language)); + TRY(append_sep_and_string(ext.language->script)); + TRY(append_sep_and_string(ext.language->region)); for (auto const& variant : ext.language->variants) - append_sep_and_string(variant); + TRY(append_sep_and_string(variant)); } for (auto const& field : ext.fields) { - append_sep_and_string(field.key); - append_sep_and_string(field.value); + TRY(append_sep_and_string(field.key)); + TRY(append_sep_and_string(field.value)); } + + return {}; }, - [&](OtherExtension const& ext) { - builder.appendff("-{:c}", to_ascii_lowercase(ext.key)); - append_sep_and_string(ext.value); - }); + [&](OtherExtension const& ext) -> ErrorOr { + TRY(builder.try_appendff("-{:c}", to_ascii_lowercase(ext.key))); + TRY(append_sep_and_string(ext.value)); + return {}; + })); } if (!locale_id.private_use_extensions.is_empty()) { - builder.append("-x"sv); + TRY(builder.try_append("-x"sv)); for (auto const& extension : locale_id.private_use_extensions) - append_sep_and_string(extension); + TRY(append_sep_and_string(extension)); } - return builder.build(); + return builder.to_string(); } -DeprecatedString const& default_locale() +StringView default_locale() { - static DeprecatedString locale = "en"sv; - return locale; + return "en"sv; } bool is_locale_available(StringView locale) @@ -872,7 +892,7 @@ Optional remove_likely_subtags(LanguageID const& language_id) // 4. Then for trial in {languagemax, languagemax_regionmax, languagemax_scriptmax}: // If AddLikelySubtags(trial) = max, then return trial + variants. - auto run_trial = [&](Optional language, Optional script, Optional region) -> Optional { + auto run_trial = [&](Optional language, Optional script, Optional region) -> Optional { LanguageID trial { .language = move(language), .script = move(script), .region = move(region) }; if (add_likely_subtags(trial) == maximized) @@ -910,7 +930,7 @@ ErrorOr LanguageID::to_string() const { StringBuilder builder; - auto append_segment = [&](Optional const& segment) -> ErrorOr { + auto append_segment = [&](Optional const& segment) -> ErrorOr { if (!segment.has_value()) return {}; if (!builder.is_empty()) @@ -937,21 +957,21 @@ ErrorOr LocaleID::to_string() const { StringBuilder builder; - auto append_segment = [&](Optional const& segment) -> ErrorOr { - if (!segment.has_value() || segment->is_empty()) + auto append_segment = [&](auto const& segment) -> ErrorOr { + if (segment.is_empty()) return {}; if (!builder.is_empty()) TRY(builder.try_append('-')); - TRY(builder.try_append(*segment)); + TRY(builder.try_append(segment)); return {}; }; - TRY(append_segment(language_id.to_deprecated_string())); + TRY(append_segment(TRY(language_id.to_string()))); for (auto const& extension : extensions) { TRY(extension.visit( [&](LocaleExtension const& ext) -> ErrorOr { - builder.append("-u"sv); + TRY(builder.try_append("-u"sv)); for (auto const& attribute : ext.attributes) TRY(append_segment(attribute)); for (auto const& keyword : ext.keywords) { @@ -961,9 +981,9 @@ ErrorOr LocaleID::to_string() const return {}; }, [&](TransformedExtension const& ext) -> ErrorOr { - builder.append("-t"sv); + TRY(builder.try_append("-t"sv)); if (ext.language.has_value()) - TRY(append_segment(ext.language->to_deprecated_string())); + TRY(append_segment(TRY(ext.language->to_string()))); for (auto const& field : ext.fields) { TRY(append_segment(field.key)); TRY(append_segment(field.value)); @@ -971,14 +991,14 @@ ErrorOr LocaleID::to_string() const return {}; }, [&](OtherExtension const& ext) -> ErrorOr { - builder.appendff("-{}", ext.key); + TRY(builder.try_appendff("-{}", ext.key)); TRY(append_segment(ext.value)); return {}; })); } if (!private_use_extensions.is_empty()) { - builder.append("-x"sv); + TRY(builder.try_append("-x"sv)); for (auto const& extension : private_use_extensions) TRY(append_segment(extension)); } diff --git a/Userland/Libraries/LibLocale/Locale.h b/Userland/Libraries/LibLocale/Locale.h index 893a9a0e0f..be09a66a61 100644 --- a/Userland/Libraries/LibLocale/Locale.h +++ b/Userland/Libraries/LibLocale/Locale.h @@ -24,25 +24,25 @@ struct LanguageID { bool operator==(LanguageID const&) const = default; bool is_root { false }; - Optional language {}; - Optional script {}; - Optional region {}; - Vector variants {}; + Optional language {}; + Optional script {}; + Optional region {}; + Vector variants {}; }; struct Keyword { - DeprecatedString key {}; - DeprecatedString value {}; + String key {}; + String value {}; }; struct LocaleExtension { - Vector attributes {}; + Vector attributes {}; Vector keywords {}; }; struct TransformedField { - DeprecatedString key {}; - DeprecatedString value {}; + String key {}; + String value {}; }; struct TransformedExtension { @@ -52,7 +52,7 @@ struct TransformedExtension { struct OtherExtension { char key {}; - DeprecatedString value {}; + String value {}; }; using Extension = AK::Variant; @@ -79,7 +79,7 @@ struct LocaleID { LanguageID language_id {}; Vector extensions {}; - Vector private_use_extensions {}; + Vector private_use_extensions {}; }; enum class Style : u8 { @@ -140,13 +140,13 @@ constexpr bool is_unicode_variant_subtag(StringView subtag) bool is_type_identifier(StringView); -Optional parse_unicode_language_id(StringView); -Optional parse_unicode_locale_id(StringView); +ErrorOr> parse_unicode_language_id(StringView); +ErrorOr> parse_unicode_locale_id(StringView); -void canonicalize_unicode_extension_values(StringView key, DeprecatedString& value, bool remove_true); -Optional canonicalize_unicode_locale_id(LocaleID&); +ErrorOr canonicalize_unicode_extension_values(StringView key, String& value, bool remove_true); +ErrorOr> canonicalize_unicode_locale_id(LocaleID&); -DeprecatedString const& default_locale(); +StringView default_locale(); bool is_locale_available(StringView locale); Span get_available_keyword_values(StringView key);