diff --git a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp index 9422329129..f4e6b9db18 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -78,29 +79,41 @@ static Optional is_structurally_valid_language_tag(StringView // 6.2.3 CanonicalizeUnicodeLocaleId ( locale ), https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid static String canonicalize_unicode_locale_id(Unicode::LocaleID& locale) { + // Note: This implementation differs from the spec in how Step 3 is implemented. The spec assumes + // the input to this method is a string, and is written such that operations are performed on parts + // of that string. LibUnicode gives us the parsed locale in a structure, so we can mutate that + // structure directly. From a footnote in the spec: + // + // The third step of this algorithm ensures that a Unicode locale extension sequence in the + // returned language tag contains: + // * only the first instance of any attribute duplicated in the input, and + // * only the first keyword for a given key in the input. + for (auto& extension : locale.extensions) { + if (!extension.has()) + continue; + + auto& locale_extension = extension.get(); + + auto attributes = move(locale_extension.attributes); + for (auto& attribute : attributes) { + if (!locale_extension.attributes.contains_slow(attribute)) + locale_extension.attributes.append(move(attribute)); + } + + auto keywords = move(locale_extension.keywords); + for (auto& keyword : keywords) { + if (!any_of(locale_extension.keywords, [&](auto const& k) { return k.key == keyword.key; })) + locale_extension.keywords.append(move(keyword)); + } + + break; + } + // 1. Let localeId be the string locale after performing the algorithm to transform it to canonical syntax per Unicode Technical Standard #35 LDML ยง 3.2.1 Canonical Unicode Locale Identifiers. // 2. Let localeId be the string localeId after performing the algorithm to transform it to canonical form. auto locale_id = Unicode::canonicalize_unicode_locale_id(locale); VERIFY(locale_id.has_value()); - // FIXME: Handle extensions. - // 3. If localeId contains a substring extension that is a Unicode locale extension sequence, then - // a. Let components be ! UnicodeExtensionComponents(extension). - // b. Let attributes be components.[[Attributes]]. - // c. Let keywords be components.[[Keywords]]. - // d. Let newExtension be "u". - // e. For each element attr of attributes, do - // i. Append "-" to newExtension. - // ii. Append attr to newExtension. - // f. For each Record { [[Key]], [[Value]] } keyword in keywords, do - // i. Append "-" to newExtension. - // ii. Append keyword.[[Key]] to newExtension. - // iii. If keyword.[[Value]] is not the empty String, then - // 1. Append "-" to newExtension. - // 2. Append keyword.[[Value]] to newExtension. - // g. Assert: newExtension is not equal to "u". - // h. Let localeId be localeId with the substring corresponding to extension replaced by the string newExtension. - // 4. Return localeId. return locale_id.release_value(); } diff --git a/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js b/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js index c248dda559..ba988aa2d0 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js +++ b/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js @@ -96,4 +96,16 @@ describe("normal behavior", () => { expect(Intl.getCanonicalLocales(true)).toEqual([]); expect(Intl.getCanonicalLocales(123)).toEqual([]); }); + + test("duplicate Unicode locale extension attributes", () => { + expect(Intl.getCanonicalLocales("en-us-u-aaa-aaa")).toEqual(["en-US-u-aaa"]); + expect(Intl.getCanonicalLocales("en-us-u-aaa-bbb-aaa")).toEqual(["en-US-u-aaa-bbb"]); + }); + + test("duplicate Unicode locale extension keywords", () => { + expect(Intl.getCanonicalLocales("en-us-u-1k-aaa-1k-bbb")).toEqual(["en-US-u-1k-aaa"]); + expect(Intl.getCanonicalLocales("en-us-u-1k-aaa-2k-ccc-1k-bbb")).toEqual([ + "en-US-u-1k-aaa-2k-ccc", + ]); + }); });