1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-14 09:34:59 +00:00

LibUnicode: Add lexer to test if a string matches the "type" production

This commit is contained in:
Timothy Flynn 2021-09-01 07:48:02 -04:00 committed by Linus Groh
parent 113bf4a9dd
commit a05419db55
3 changed files with 35 additions and 0 deletions

View file

@ -69,6 +69,24 @@ TEST_CASE(is_unicode_variant_subtag)
EXPECT(!Unicode::is_unicode_variant_subtag("a234"sv));
}
TEST_CASE(is_type_identifier)
{
EXPECT(Unicode::is_type_identifier("aaaa"sv));
EXPECT(Unicode::is_type_identifier("aaaa-bbbb"sv));
EXPECT(Unicode::is_type_identifier("aaaa-bbbb-cccc"sv));
EXPECT(Unicode::is_type_identifier("1aaa"sv));
EXPECT(Unicode::is_type_identifier("12aa"sv));
EXPECT(Unicode::is_type_identifier("123a"sv));
EXPECT(Unicode::is_type_identifier("1234"sv));
EXPECT(!Unicode::is_type_identifier(""sv));
EXPECT(!Unicode::is_type_identifier("a"sv));
EXPECT(!Unicode::is_type_identifier("aa"sv));
EXPECT(!Unicode::is_type_identifier("aaaaaaaaa"sv));
EXPECT(!Unicode::is_type_identifier("aaaa-"sv));
}
TEST_CASE(parse_unicode_locale_id)
{
auto fail = [](StringView locale) {

View file

@ -114,6 +114,22 @@ static Optional<StringView> consume_next_segment(GenericLexer& lexer, bool with_
return segment;
}
bool is_type_identifier(StringView identifier)
{
// type = alphanum{3,8} (sep alphanum{3,8})*
GenericLexer lexer { identifier };
while (true) {
auto type = consume_next_segment(lexer, lexer.tell() > 0);
if (!type.has_value())
break;
if (!is_single_type(*type))
return false;
}
return lexer.is_eof() && (lexer.tell() > 0);
}
static Optional<LanguageID> parse_unicode_language_id(GenericLexer& lexer)
{
// https://unicode.org/reports/tr35/#Unicode_language_identifier

View file

@ -62,6 +62,7 @@ bool is_unicode_language_subtag(StringView);
bool is_unicode_script_subtag(StringView);
bool is_unicode_region_subtag(StringView);
bool is_unicode_variant_subtag(StringView);
bool is_type_identifier(StringView);
Optional<LanguageID> parse_unicode_language_id(StringView);
Optional<LocaleID> parse_unicode_locale_id(StringView);