diff --git a/Tests/LibUnicode/TestUnicodeLocale.cpp b/Tests/LibUnicode/TestUnicodeLocale.cpp index fb79b32139..53ffddbf2c 100644 --- a/Tests/LibUnicode/TestUnicodeLocale.cpp +++ b/Tests/LibUnicode/TestUnicodeLocale.cpp @@ -69,6 +69,24 @@ TEST_CASE(is_unicode_variant_subtag) EXPECT(!Unicode::is_unicode_variant_subtag("a234"sv)); } +TEST_CASE(is_type_identifier) +{ + EXPECT(Unicode::is_type_identifier("aaaa"sv)); + EXPECT(Unicode::is_type_identifier("aaaa-bbbb"sv)); + EXPECT(Unicode::is_type_identifier("aaaa-bbbb-cccc"sv)); + + EXPECT(Unicode::is_type_identifier("1aaa"sv)); + EXPECT(Unicode::is_type_identifier("12aa"sv)); + EXPECT(Unicode::is_type_identifier("123a"sv)); + EXPECT(Unicode::is_type_identifier("1234"sv)); + + EXPECT(!Unicode::is_type_identifier(""sv)); + EXPECT(!Unicode::is_type_identifier("a"sv)); + EXPECT(!Unicode::is_type_identifier("aa"sv)); + EXPECT(!Unicode::is_type_identifier("aaaaaaaaa"sv)); + EXPECT(!Unicode::is_type_identifier("aaaa-"sv)); +} + TEST_CASE(parse_unicode_locale_id) { auto fail = [](StringView locale) { diff --git a/Userland/Libraries/LibUnicode/Locale.cpp b/Userland/Libraries/LibUnicode/Locale.cpp index 9744b97152..8d3f3c87a9 100644 --- a/Userland/Libraries/LibUnicode/Locale.cpp +++ b/Userland/Libraries/LibUnicode/Locale.cpp @@ -114,6 +114,22 @@ static Optional consume_next_segment(GenericLexer& lexer, bool with_ return segment; } +bool is_type_identifier(StringView identifier) +{ + // type = alphanum{3,8} (sep alphanum{3,8})* + GenericLexer lexer { identifier }; + + while (true) { + auto type = consume_next_segment(lexer, lexer.tell() > 0); + if (!type.has_value()) + break; + if (!is_single_type(*type)) + return false; + } + + return lexer.is_eof() && (lexer.tell() > 0); +} + static Optional parse_unicode_language_id(GenericLexer& lexer) { // https://unicode.org/reports/tr35/#Unicode_language_identifier diff --git a/Userland/Libraries/LibUnicode/Locale.h b/Userland/Libraries/LibUnicode/Locale.h index 7cff3ad5f4..b5938061fa 100644 --- a/Userland/Libraries/LibUnicode/Locale.h +++ b/Userland/Libraries/LibUnicode/Locale.h @@ -62,6 +62,7 @@ bool is_unicode_language_subtag(StringView); bool is_unicode_script_subtag(StringView); bool is_unicode_region_subtag(StringView); bool is_unicode_variant_subtag(StringView); +bool is_type_identifier(StringView); Optional parse_unicode_language_id(StringView); Optional parse_unicode_locale_id(StringView);