diff --git a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp index 338c0919e2..9422329129 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp @@ -18,6 +18,20 @@ namespace JS::Intl { // 6.2.2 IsStructurallyValidLanguageTag ( locale ), https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag static Optional is_structurally_valid_language_tag(StringView locale) { + auto contains_duplicate_variant = [](Vector& variants) { + if (variants.is_empty()) + return false; + + quick_sort(variants); + + for (size_t i = 0; i < variants.size() - 1; ++i) { + if (variants[i] == variants[i + 1]) + return true; + } + + return false; + }; + // IsStructurallyValidLanguageTag returns true if all of the following conditions hold, false otherwise: // locale can be generated from the EBNF grammar for unicode_locale_id in Unicode Technical Standard #35 LDML ยง 3.2 Unicode Locale Identifier; @@ -31,23 +45,33 @@ static Optional is_structurally_valid_language_tag(StringView return {}; // the unicode_language_id within locale contains no duplicate unicode_variant_subtag subtags; and - if (auto& variants = locale_id->language_id.variants; !variants.is_empty()) { - quick_sort(variants); + if (contains_duplicate_variant(locale_id->language_id.variants)) + return {}; - for (size_t i = 0; i < variants.size() - 1; ++i) { - if (variants[i] == variants[i + 1]) + // if locale contains an extensions* component, that component + Vector unique_keys; + for (auto& extension : locale_id->extensions) { + // does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags, + // contains at most one unicode_locale_extensions component, + // contains at most one transformed_extensions component, and + char key = extension.visit( + [](Unicode::LocaleExtension const&) { return 'u'; }, + [](Unicode::TransformedExtension const&) { return 't'; }, + [](Unicode::OtherExtension const& ext) { return static_cast(to_ascii_lowercase(ext.key)); }); + + if (unique_keys.contains_slow(key)) + return {}; + unique_keys.append(key); + + // if a transformed_extensions component that contains a tlang component is present, then + // the tlang component contains no duplicate unicode_variant_subtag subtags. + if (auto* transformed = extension.get_pointer()) { + auto& language = transformed->language; + if (language.has_value() && contains_duplicate_variant(language->variants)) return {}; } } - // FIXME: Handle extensions. - // if locale contains an extensions* component, that component - // does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags, - // contains at most one unicode_locale_extensions component, - // contains at most one transformed_extensions component, and - // if a transformed_extensions component that contains a tlang component is present, then - // the tlang component contains no duplicate unicode_variant_subtag subtags. - return locale_id; } diff --git a/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js b/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js index c7e64c1468..c248dda559 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js +++ b/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js @@ -45,6 +45,29 @@ describe("errors", () => { Intl.getCanonicalLocales([true]); }).toThrowWithMessage(TypeError, "true is neither an object nor a string"); }); + + test("duplicate extension components", () => { + expect(() => { + Intl.getCanonicalLocales("en-u-aa-U-aa"); + }).toThrowWithMessage(RangeError, "en-u-aa-U-aa is not a structurally valid language tag"); + + expect(() => { + Intl.getCanonicalLocales("en-t-aa-T-aa"); + }).toThrowWithMessage(RangeError, "en-t-aa-T-aa is not a structurally valid language tag"); + + expect(() => { + Intl.getCanonicalLocales("en-z-aa-Z-aa"); + }).toThrowWithMessage(RangeError, "en-z-aa-Z-aa is not a structurally valid language tag"); + }); + + test("duplicate transformed extension variant subtags", () => { + expect(() => { + Intl.getCanonicalLocales("en-t-en-POSIX-POSIX"); + }).toThrowWithMessage( + RangeError, + "en-t-en-POSIX-POSIX is not a structurally valid language tag" + ); + }); }); describe("normal behavior", () => {