mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 05:48:12 +00:00
LibUnicode: Parse locale private use extensions
This commit is contained in:
parent
29f76ef7c8
commit
30855e6663
3 changed files with 65 additions and 1 deletions
|
@ -246,6 +246,30 @@ TEST_CASE(parse_unicode_locale_id_with_other_extension)
|
||||||
pass("en-z-aa-bbb-cccccccc", { 'z', { "aa"sv, "bbb"sv, "cccccccc"sv } });
|
pass("en-z-aa-bbb-cccccccc", { 'z', { "aa"sv, "bbb"sv, "cccccccc"sv } });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(parse_unicode_locale_id_with_private_use_extension)
|
||||||
|
{
|
||||||
|
auto fail = [](StringView locale) {
|
||||||
|
auto locale_id = Unicode::parse_unicode_locale_id(locale);
|
||||||
|
EXPECT(!locale_id.has_value());
|
||||||
|
};
|
||||||
|
auto pass = [](StringView locale, Vector<StringView> const& expected_extension) {
|
||||||
|
auto locale_id = Unicode::parse_unicode_locale_id(locale);
|
||||||
|
VERIFY(locale_id.has_value());
|
||||||
|
EXPECT_EQ(locale_id->private_use_extensions, expected_extension);
|
||||||
|
};
|
||||||
|
|
||||||
|
fail("en-x"sv);
|
||||||
|
fail("en-x-"sv);
|
||||||
|
fail("en-x-aaaaaaaaa"sv);
|
||||||
|
fail("en-x-aaa-"sv);
|
||||||
|
fail("en-x-aaa-aaaaaaaaa"sv);
|
||||||
|
|
||||||
|
pass("en-x-a", { "a"sv });
|
||||||
|
pass("en-x-aaaaaaaa", { "aaaaaaaa"sv });
|
||||||
|
pass("en-x-aaa-bbb", { "aaa"sv, "bbb"sv });
|
||||||
|
pass("en-x-aaa-x-bbb", { "aaa"sv, "x"sv, "bbb"sv });
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(canonicalize_unicode_locale_id)
|
TEST_CASE(canonicalize_unicode_locale_id)
|
||||||
{
|
{
|
||||||
auto test = [](StringView locale, StringView expected_canonical_locale) {
|
auto test = [](StringView locale, StringView expected_canonical_locale) {
|
||||||
|
|
|
@ -400,6 +400,45 @@ static Optional<Extension> parse_extension(GenericLexer& lexer)
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Vector<StringView> parse_private_use_extensions(GenericLexer& lexer)
|
||||||
|
{
|
||||||
|
// https://unicode.org/reports/tr35/#pu_extensions
|
||||||
|
//
|
||||||
|
// pu_extensions = = sep [xX] (sep alphanum{1,8})+ ;
|
||||||
|
size_t starting_position = lexer.tell();
|
||||||
|
|
||||||
|
auto header = consume_next_segment(lexer);
|
||||||
|
if (!header.has_value())
|
||||||
|
return {};
|
||||||
|
|
||||||
|
auto parse_values = [&]() -> Vector<StringView> {
|
||||||
|
Vector<StringView> extensions;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
auto segment = consume_next_segment(lexer);
|
||||||
|
if (!segment.has_value())
|
||||||
|
break;
|
||||||
|
|
||||||
|
if ((segment->length() < 1) || (segment->length() > 8) || !all_of(*segment, is_ascii_alphanumeric)) {
|
||||||
|
lexer.retreat(segment->length() + 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
extensions.append(*segment);
|
||||||
|
}
|
||||||
|
|
||||||
|
return extensions;
|
||||||
|
};
|
||||||
|
|
||||||
|
if ((header->length() == 1) && (((*header)[0] == 'x') || ((*header)[0] == 'X'))) {
|
||||||
|
if (auto extensions = parse_values(); !extensions.is_empty())
|
||||||
|
return extensions;
|
||||||
|
}
|
||||||
|
|
||||||
|
lexer.retreat(lexer.tell() - starting_position);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
Optional<LanguageID> parse_unicode_language_id(StringView language)
|
Optional<LanguageID> parse_unicode_language_id(StringView language)
|
||||||
{
|
{
|
||||||
GenericLexer lexer { language };
|
GenericLexer lexer { language };
|
||||||
|
@ -433,7 +472,7 @@ Optional<LocaleID> parse_unicode_locale_id(StringView locale)
|
||||||
locale_id.extensions.append(extension.release_value());
|
locale_id.extensions.append(extension.release_value());
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Handle pu_extensions.
|
locale_id.private_use_extensions = parse_private_use_extensions(lexer);
|
||||||
|
|
||||||
if (!lexer.is_eof())
|
if (!lexer.is_eof())
|
||||||
return {};
|
return {};
|
||||||
|
|
|
@ -53,6 +53,7 @@ using Extension = Variant<LocaleExtension, TransformedExtension, OtherExtension>
|
||||||
struct LocaleID {
|
struct LocaleID {
|
||||||
LanguageID language_id {};
|
LanguageID language_id {};
|
||||||
Vector<Extension> extensions {};
|
Vector<Extension> extensions {};
|
||||||
|
Vector<StringView> private_use_extensions {};
|
||||||
};
|
};
|
||||||
|
|
||||||
// Note: These methods only verify that the provided strings match the EBNF grammar of the
|
// Note: These methods only verify that the provided strings match the EBNF grammar of the
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue