diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp index f34cfdd3d4..b4d3fa09d5 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp +++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp @@ -22,6 +22,11 @@ namespace Unicode { #if ENABLE_UNICODE_DATA +static bool has_general_category(UnicodeData const& unicode_data, GeneralCategory general_category) +{ + return (unicode_data.general_category & general_category) != GeneralCategory::None; +} + static bool has_property(UnicodeData const& unicode_data, Property property) { return (unicode_data.properties & property) == property; @@ -197,6 +202,28 @@ String to_unicode_uppercase_full(StringView const& string) #endif } +Optional general_category_from_string([[maybe_unused]] StringView const& general_category) +{ +#if ENABLE_UNICODE_DATA + return Detail::general_category_from_string(general_category); +#else + return {}; +#endif +} + +bool code_point_has_general_category([[maybe_unused]] u32 code_point, [[maybe_unused]] GeneralCategory general_category) +{ +#if ENABLE_UNICODE_DATA + auto unicode_data = Detail::unicode_data_for_code_point(code_point); + if (!unicode_data.has_value()) + return false; + + return has_general_category(*unicode_data, general_category); +#else + return {}; +#endif +} + Optional property_from_string([[maybe_unused]] StringView const& property) { #if ENABLE_UNICODE_DATA diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.h b/Userland/Libraries/LibUnicode/CharacterTypes.h index 46c7a1b8e4..68c32de360 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.h +++ b/Userland/Libraries/LibUnicode/CharacterTypes.h @@ -21,6 +21,9 @@ u32 to_unicode_uppercase(u32 code_point); String to_unicode_lowercase_full(StringView const&); String to_unicode_uppercase_full(StringView const&); +Optional general_category_from_string(StringView const&); +bool code_point_has_general_category(u32 code_point, GeneralCategory general_category); + Optional property_from_string(StringView const&); bool code_point_has_property(u32 code_point, Property property); bool is_ecma262_property(Property); diff --git a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp index de18f2adba..f18dec6518 100644 --- a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp +++ b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp @@ -478,7 +478,7 @@ namespace Unicode { generate_enum("Locale"sv, "None"sv, move(unicode_data.locales)); generate_enum("Condition"sv, "None"sv, move(unicode_data.conditions)); - generate_enum("GeneralCategory"sv, "None"sv, move(unicode_data.general_categories), move(unicode_data.general_category_unions), move(unicode_data.general_category_aliases), true); + generate_enum("GeneralCategory"sv, "None"sv, unicode_data.general_categories, unicode_data.general_category_unions, unicode_data.general_category_aliases, true); generate_enum("Property"sv, "Assigned"sv, unicode_data.prop_list.keys(), {}, unicode_data.prop_aliases, true); generate_enum("WordBreakProperty"sv, "Other"sv, unicode_data.word_break_prop_list.keys()); @@ -541,6 +541,7 @@ namespace Detail { Optional unicode_data_for_code_point(u32 code_point); Optional property_from_string(StringView const& property); +Optional general_category_from_string(StringView const& general_category); } @@ -716,6 +717,32 @@ Optional property_from_string(StringView const& property) return {}; } +Optional general_category_from_string(StringView const& general_category) +{)~~~"); + + for (auto const& general_category : unicode_data.general_categories) { + generator.set("general_category", general_category); + generator.append(R"~~~( + if (general_category == "@general_category@"sv) + return GeneralCategory::@general_category@;)~~~"); + } + for (auto const& union_ : unicode_data.general_category_unions) { + generator.set("general_category", union_.alias); + generator.append(R"~~~( + if (general_category == "@general_category@"sv) + return GeneralCategory::@general_category@;)~~~"); + } + for (auto const& alias : unicode_data.general_category_aliases) { + generator.set("general_category", alias.alias); + generator.append(R"~~~( + if (general_category == "@general_category@"sv) + return GeneralCategory::@general_category@;)~~~"); + } + + generator.append(R"~~~( + return {}; +} + } })~~~");