mirror of
https://github.com/RGBCube/serenity
synced 2025-07-23 18:57:35 +00:00
LibJS: Reject structurally invalid Unicode locale extensions
This commit is contained in:
parent
f897c2edb3
commit
94e66f500c
2 changed files with 59 additions and 12 deletions
|
@ -18,6 +18,20 @@ namespace JS::Intl {
|
||||||
// 6.2.2 IsStructurallyValidLanguageTag ( locale ), https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag
|
// 6.2.2 IsStructurallyValidLanguageTag ( locale ), https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag
|
||||||
static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView locale)
|
static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView locale)
|
||||||
{
|
{
|
||||||
|
auto contains_duplicate_variant = [](Vector<StringView>& variants) {
|
||||||
|
if (variants.is_empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
quick_sort(variants);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < variants.size() - 1; ++i) {
|
||||||
|
if (variants[i] == variants[i + 1])
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
// IsStructurallyValidLanguageTag returns true if all of the following conditions hold, false otherwise:
|
// IsStructurallyValidLanguageTag returns true if all of the following conditions hold, false otherwise:
|
||||||
|
|
||||||
// locale can be generated from the EBNF grammar for unicode_locale_id in Unicode Technical Standard #35 LDML § 3.2 Unicode Locale Identifier;
|
// locale can be generated from the EBNF grammar for unicode_locale_id in Unicode Technical Standard #35 LDML § 3.2 Unicode Locale Identifier;
|
||||||
|
@ -31,23 +45,33 @@ static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
// the unicode_language_id within locale contains no duplicate unicode_variant_subtag subtags; and
|
// the unicode_language_id within locale contains no duplicate unicode_variant_subtag subtags; and
|
||||||
if (auto& variants = locale_id->language_id.variants; !variants.is_empty()) {
|
if (contains_duplicate_variant(locale_id->language_id.variants))
|
||||||
quick_sort(variants);
|
return {};
|
||||||
|
|
||||||
for (size_t i = 0; i < variants.size() - 1; ++i) {
|
// if locale contains an extensions* component, that component
|
||||||
if (variants[i] == variants[i + 1])
|
Vector<char> unique_keys;
|
||||||
|
for (auto& extension : locale_id->extensions) {
|
||||||
|
// does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags,
|
||||||
|
// contains at most one unicode_locale_extensions component,
|
||||||
|
// contains at most one transformed_extensions component, and
|
||||||
|
char key = extension.visit(
|
||||||
|
[](Unicode::LocaleExtension const&) { return 'u'; },
|
||||||
|
[](Unicode::TransformedExtension const&) { return 't'; },
|
||||||
|
[](Unicode::OtherExtension const& ext) { return static_cast<char>(to_ascii_lowercase(ext.key)); });
|
||||||
|
|
||||||
|
if (unique_keys.contains_slow(key))
|
||||||
|
return {};
|
||||||
|
unique_keys.append(key);
|
||||||
|
|
||||||
|
// if a transformed_extensions component that contains a tlang component is present, then
|
||||||
|
// the tlang component contains no duplicate unicode_variant_subtag subtags.
|
||||||
|
if (auto* transformed = extension.get_pointer<Unicode::TransformedExtension>()) {
|
||||||
|
auto& language = transformed->language;
|
||||||
|
if (language.has_value() && contains_duplicate_variant(language->variants))
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Handle extensions.
|
|
||||||
// if locale contains an extensions* component, that component
|
|
||||||
// does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags,
|
|
||||||
// contains at most one unicode_locale_extensions component,
|
|
||||||
// contains at most one transformed_extensions component, and
|
|
||||||
// if a transformed_extensions component that contains a tlang component is present, then
|
|
||||||
// the tlang component contains no duplicate unicode_variant_subtag subtags.
|
|
||||||
|
|
||||||
return locale_id;
|
return locale_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,29 @@ describe("errors", () => {
|
||||||
Intl.getCanonicalLocales([true]);
|
Intl.getCanonicalLocales([true]);
|
||||||
}).toThrowWithMessage(TypeError, "true is neither an object nor a string");
|
}).toThrowWithMessage(TypeError, "true is neither an object nor a string");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("duplicate extension components", () => {
|
||||||
|
expect(() => {
|
||||||
|
Intl.getCanonicalLocales("en-u-aa-U-aa");
|
||||||
|
}).toThrowWithMessage(RangeError, "en-u-aa-U-aa is not a structurally valid language tag");
|
||||||
|
|
||||||
|
expect(() => {
|
||||||
|
Intl.getCanonicalLocales("en-t-aa-T-aa");
|
||||||
|
}).toThrowWithMessage(RangeError, "en-t-aa-T-aa is not a structurally valid language tag");
|
||||||
|
|
||||||
|
expect(() => {
|
||||||
|
Intl.getCanonicalLocales("en-z-aa-Z-aa");
|
||||||
|
}).toThrowWithMessage(RangeError, "en-z-aa-Z-aa is not a structurally valid language tag");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("duplicate transformed extension variant subtags", () => {
|
||||||
|
expect(() => {
|
||||||
|
Intl.getCanonicalLocales("en-t-en-POSIX-POSIX");
|
||||||
|
}).toThrowWithMessage(
|
||||||
|
RangeError,
|
||||||
|
"en-t-en-POSIX-POSIX is not a structurally valid language tag"
|
||||||
|
);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("normal behavior", () => {
|
describe("normal behavior", () => {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue