mirror of
https://github.com/RGBCube/serenity
synced 2025-07-23 19:37:34 +00:00
LibJS: Canonicalize Unicode locale extension attributes and keywords
This commit is contained in:
parent
94e66f500c
commit
a2f0eeff7a
2 changed files with 43 additions and 18 deletions
|
@ -5,6 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <AK/AllOf.h>
|
#include <AK/AllOf.h>
|
||||||
|
#include <AK/AnyOf.h>
|
||||||
#include <AK/CharacterTypes.h>
|
#include <AK/CharacterTypes.h>
|
||||||
#include <AK/QuickSort.h>
|
#include <AK/QuickSort.h>
|
||||||
#include <AK/TypeCasts.h>
|
#include <AK/TypeCasts.h>
|
||||||
|
@ -78,29 +79,41 @@ static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView
|
||||||
// 6.2.3 CanonicalizeUnicodeLocaleId ( locale ), https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid
|
// 6.2.3 CanonicalizeUnicodeLocaleId ( locale ), https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid
|
||||||
static String canonicalize_unicode_locale_id(Unicode::LocaleID& locale)
|
static String canonicalize_unicode_locale_id(Unicode::LocaleID& locale)
|
||||||
{
|
{
|
||||||
|
// Note: This implementation differs from the spec in how Step 3 is implemented. The spec assumes
|
||||||
|
// the input to this method is a string, and is written such that operations are performed on parts
|
||||||
|
// of that string. LibUnicode gives us the parsed locale in a structure, so we can mutate that
|
||||||
|
// structure directly. From a footnote in the spec:
|
||||||
|
//
|
||||||
|
// The third step of this algorithm ensures that a Unicode locale extension sequence in the
|
||||||
|
// returned language tag contains:
|
||||||
|
// * only the first instance of any attribute duplicated in the input, and
|
||||||
|
// * only the first keyword for a given key in the input.
|
||||||
|
for (auto& extension : locale.extensions) {
|
||||||
|
if (!extension.has<Unicode::LocaleExtension>())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
auto& locale_extension = extension.get<Unicode::LocaleExtension>();
|
||||||
|
|
||||||
|
auto attributes = move(locale_extension.attributes);
|
||||||
|
for (auto& attribute : attributes) {
|
||||||
|
if (!locale_extension.attributes.contains_slow(attribute))
|
||||||
|
locale_extension.attributes.append(move(attribute));
|
||||||
|
}
|
||||||
|
|
||||||
|
auto keywords = move(locale_extension.keywords);
|
||||||
|
for (auto& keyword : keywords) {
|
||||||
|
if (!any_of(locale_extension.keywords, [&](auto const& k) { return k.key == keyword.key; }))
|
||||||
|
locale_extension.keywords.append(move(keyword));
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// 1. Let localeId be the string locale after performing the algorithm to transform it to canonical syntax per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers.
|
// 1. Let localeId be the string locale after performing the algorithm to transform it to canonical syntax per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers.
|
||||||
// 2. Let localeId be the string localeId after performing the algorithm to transform it to canonical form.
|
// 2. Let localeId be the string localeId after performing the algorithm to transform it to canonical form.
|
||||||
auto locale_id = Unicode::canonicalize_unicode_locale_id(locale);
|
auto locale_id = Unicode::canonicalize_unicode_locale_id(locale);
|
||||||
VERIFY(locale_id.has_value());
|
VERIFY(locale_id.has_value());
|
||||||
|
|
||||||
// FIXME: Handle extensions.
|
|
||||||
// 3. If localeId contains a substring extension that is a Unicode locale extension sequence, then
|
|
||||||
// a. Let components be ! UnicodeExtensionComponents(extension).
|
|
||||||
// b. Let attributes be components.[[Attributes]].
|
|
||||||
// c. Let keywords be components.[[Keywords]].
|
|
||||||
// d. Let newExtension be "u".
|
|
||||||
// e. For each element attr of attributes, do
|
|
||||||
// i. Append "-" to newExtension.
|
|
||||||
// ii. Append attr to newExtension.
|
|
||||||
// f. For each Record { [[Key]], [[Value]] } keyword in keywords, do
|
|
||||||
// i. Append "-" to newExtension.
|
|
||||||
// ii. Append keyword.[[Key]] to newExtension.
|
|
||||||
// iii. If keyword.[[Value]] is not the empty String, then
|
|
||||||
// 1. Append "-" to newExtension.
|
|
||||||
// 2. Append keyword.[[Value]] to newExtension.
|
|
||||||
// g. Assert: newExtension is not equal to "u".
|
|
||||||
// h. Let localeId be localeId with the substring corresponding to extension replaced by the string newExtension.
|
|
||||||
|
|
||||||
// 4. Return localeId.
|
// 4. Return localeId.
|
||||||
return locale_id.release_value();
|
return locale_id.release_value();
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,4 +96,16 @@ describe("normal behavior", () => {
|
||||||
expect(Intl.getCanonicalLocales(true)).toEqual([]);
|
expect(Intl.getCanonicalLocales(true)).toEqual([]);
|
||||||
expect(Intl.getCanonicalLocales(123)).toEqual([]);
|
expect(Intl.getCanonicalLocales(123)).toEqual([]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("duplicate Unicode locale extension attributes", () => {
|
||||||
|
expect(Intl.getCanonicalLocales("en-us-u-aaa-aaa")).toEqual(["en-US-u-aaa"]);
|
||||||
|
expect(Intl.getCanonicalLocales("en-us-u-aaa-bbb-aaa")).toEqual(["en-US-u-aaa-bbb"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("duplicate Unicode locale extension keywords", () => {
|
||||||
|
expect(Intl.getCanonicalLocales("en-us-u-1k-aaa-1k-bbb")).toEqual(["en-US-u-1k-aaa"]);
|
||||||
|
expect(Intl.getCanonicalLocales("en-us-u-1k-aaa-2k-ccc-1k-bbb")).toEqual([
|
||||||
|
"en-US-u-1k-aaa-2k-ccc",
|
||||||
|
]);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue