diff --git a/Tests/LibUnicode/TestUnicodeLocale.cpp b/Tests/LibUnicode/TestUnicodeLocale.cpp index 80b7ee8ba1..af28c7cc8f 100644 --- a/Tests/LibUnicode/TestUnicodeLocale.cpp +++ b/Tests/LibUnicode/TestUnicodeLocale.cpp @@ -277,7 +277,7 @@ TEST_CASE(canonicalize_unicode_locale_id) VERIFY(locale_id.has_value()); auto canonical_locale = Unicode::canonicalize_unicode_locale_id(*locale_id); - EXPECT_EQ(canonical_locale, expected_canonical_locale); + EXPECT_EQ(*canonical_locale, expected_canonical_locale); }; test("aaa"sv, "aaa"sv); @@ -287,4 +287,44 @@ TEST_CASE(canonicalize_unicode_locale_id) test("aaa-bBBB-cC"sv, "aaa-Bbbb-CC"sv); test("aaa-bbbb-cc-1234"sv, "aaa-Bbbb-CC-1234"sv); test("aaa-bbbb-cc-ABCDE"sv, "aaa-Bbbb-CC-abcde"sv); + + test("en-u-aa"sv, "en-u-aa"sv); + test("EN-U-AA"sv, "en-u-aa"sv); + test("en-u-aa-bbb"sv, "en-u-aa-bbb"sv); + test("EN-U-AA-BBB"sv, "en-u-aa-bbb"sv); + test("en-u-aa-ccc-bbb"sv, "en-u-aa-ccc-bbb"sv); + test("EN-U-AA-CCC-BBB"sv, "en-u-aa-ccc-bbb"sv); + test("en-u-ddd-bbb-ccc"sv, "en-u-bbb-ccc-ddd"sv); + test("EN-U-DDD-BBB-CCC"sv, "en-u-bbb-ccc-ddd"sv); + test("en-u-2k-aaa-1k-bbb"sv, "en-u-1k-bbb-2k-aaa"sv); + test("EN-U-2K-AAA-1K-BBB"sv, "en-u-1k-bbb-2k-aaa"sv); + test("en-u-ccc-bbb-2k-aaa-1k-bbb"sv, "en-u-bbb-ccc-1k-bbb-2k-aaa"sv); + test("EN-U-CCC-BBB-2K-AAA-1K-BBB"sv, "en-u-bbb-ccc-1k-bbb-2k-aaa"sv); + test("en-u-1k-true"sv, "en-u-1k"sv); + test("EN-U-1K-TRUE"sv, "en-u-1k"sv); + + test("en-t-en"sv, "en-t-en"sv); + test("EN-T-EN"sv, "en-t-en"sv); + test("en-latn-t-en-latn"sv, "en-Latn-t-en-latn"sv); + test("EN-LATN-T-EN-LATN"sv, "en-Latn-t-en-latn"sv); + test("en-us-t-en-us"sv, "en-US-t-en-us"sv); + test("EN-US-T-EN-US"sv, "en-US-t-en-us"sv); + test("en-latn-us-t-en-latn-us"sv, "en-Latn-US-t-en-latn-us"sv); + test("EN-LATN-US-T-EN-LATN-US"sv, "en-Latn-US-t-en-latn-us"sv); + test("en-t-en-k2-bbb-k1-aaa"sv, "en-t-en-k1-aaa-k2-bbb"sv); + test("EN-T-EN-K2-BBB-K1-AAA"sv, "en-t-en-k1-aaa-k2-bbb"sv); + test("en-t-k1-true"sv, "en-t-k1-true"sv); + test("EN-T-K1-TRUE"sv, "en-t-k1-true"sv); + + test("en-0-aaa"sv, "en-0-aaa"sv); + test("EN-0-AAA"sv, "en-0-aaa"sv); + test("en-0-bbb-aaa"sv, "en-0-bbb-aaa"sv); + test("EN-0-BBB-AAA"sv, "en-0-bbb-aaa"sv); + test("en-z-bbb-0-aaa"sv, "en-0-aaa-z-bbb"sv); + test("EN-Z-BBB-0-AAA"sv, "en-0-aaa-z-bbb"sv); + + test("en-u-aa-t-en"sv, "en-t-en-u-aa"sv); + test("EN-U-AA-T-EN"sv, "en-t-en-u-aa"sv); + test("en-z-bbb-u-aa-t-en-0-aaa"sv, "en-0-aaa-t-en-u-aa-z-bbb"sv); + test("EN-Z-BBB-U-AA-T-EN-0-AAA"sv, "en-0-aaa-t-en-u-aa-z-bbb"sv); } diff --git a/Userland/Libraries/LibUnicode/Locale.cpp b/Userland/Libraries/LibUnicode/Locale.cpp index 7d6ff44c74..f73895ce48 100644 --- a/Userland/Libraries/LibUnicode/Locale.cpp +++ b/Userland/Libraries/LibUnicode/Locale.cpp @@ -518,7 +518,69 @@ Optional canonicalize_unicode_locale_id(LocaleID& locale_id) for (auto const& variant : locale_id.language_id.variants) append_sep_and_string(variant); - // FIXME: Handle extensions and pu_extensions. + quick_sort(locale_id.extensions, [](auto const& left, auto const& right) { + auto key = [](auto const& extension) { + return extension.visit( + [](LocaleExtension const&) { return 'u'; }, + [](TransformedExtension const&) { return 't'; }, + [](OtherExtension const& ext) { return static_cast(to_ascii_lowercase(ext.key)); }); + }; + + return key(left) < key(right); + }); + + auto append_key_value_list = [&](auto const& key, auto const& values, bool remove_true_values) { + append_sep_and_string(key); + + for (auto const& type : values) { + // Note: The spec says to remove "true" type and tfield values but that is believed to be a bug in the spec + // because, for tvalues, that would result in invalid syntax: + // https://unicode-org.atlassian.net/browse/CLDR-14318 + // This has also been noted by test262: + // https://github.com/tc39/test262/blob/18bb955771669541c56c28748603f6afdb2e25ff/test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js + if (remove_true_values && type.equals_ignoring_case("true"sv)) + continue; + append_sep_and_string(type); + } + }; + + for (auto& extension : locale_id.extensions) { + extension.visit( + [&](LocaleExtension& ext) { + quick_sort(ext.attributes); + quick_sort(ext.keywords, [](auto const& a, auto const& b) { return a.key < b.key; }); + builder.append("-u"sv); + + for (auto const& attribute : ext.attributes) + append_sep_and_string(attribute); + for (auto const& keyword : ext.keywords) + append_key_value_list(keyword.key, keyword.types, true); + }, + [&](TransformedExtension& ext) { + quick_sort(ext.fields, [](auto const& a, auto const& b) { return a.key < b.key; }); + builder.append("-t"sv); + + if (ext.language.has_value()) { + append_sep_and_string(ext.language->language); + append_sep_and_string(ext.language->script); + append_sep_and_string(ext.language->region); + + quick_sort(ext.language->variants); + for (auto const& variant : ext.language->variants) + append_sep_and_string(variant); + } + + for (auto const& field : ext.fields) + append_key_value_list(field.key, field.values, false); + }, + [&](OtherExtension& ext) { + builder.appendff("-{:c}", to_ascii_lowercase(ext.key)); + for (auto const& value : ext.values) + append_sep_and_string(value); + }); + } + + // FIXME: Handle pu_extensions. return builder.build(); }