From b06c104076cba83aecd1fb76823ce0c64050437a Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Tue, 10 Aug 2021 07:27:31 -0400 Subject: [PATCH] LibUnicode: Include Unassigned code points in the Other General Category Now that the generator parses unassigned General Category properties, it can include Unassigned (Cn) in the Other (C) category. --- .../LibUnicode/TestUnicodeCharacterTypes.cpp | 19 +++++++++++++++++++ .../CodeGenerators/GenerateUnicodeData.cpp | 4 +--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp index 481855fd06..6450418c47 100644 --- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp +++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp @@ -234,6 +234,10 @@ TEST_CASE(general_category) auto general_category_private_use = general_category("Private_Use"sv); EXPECT_EQ(general_category_co, general_category_private_use); + auto general_category_cn = general_category("Cn"sv); + auto general_category_unassigned = general_category("Unassigned"sv); + EXPECT_EQ(general_category_cn, general_category_unassigned); + auto general_category_lc = general_category("LC"sv); auto general_category_cased_letter = general_category("Cased_Letter"sv); EXPECT_EQ(general_category_lc, general_category_cased_letter); @@ -251,6 +255,7 @@ TEST_CASE(general_category) EXPECT(Unicode::code_point_has_general_category(code_point, general_category_cc)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co)); + EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lc)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu)); @@ -261,6 +266,18 @@ TEST_CASE(general_category) EXPECT(Unicode::code_point_has_general_category(code_point, general_category_co)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc)); + EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn)); + EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lc)); + EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll)); + EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu)); + } + + for (u32 code_point = 0x101fe; code_point <= 0x1027f; ++code_point) { + EXPECT(Unicode::code_point_has_general_category(code_point, general_category_c)); + EXPECT(Unicode::code_point_has_general_category(code_point, general_category_cn)); + + EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc)); + EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lc)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu)); @@ -273,6 +290,7 @@ TEST_CASE(general_category) EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_c)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co)); + EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu)); } @@ -283,6 +301,7 @@ TEST_CASE(general_category) EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_c)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co)); + EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn)); EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll)); } } diff --git a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp index 58fb81cc0b..6a689d206d 100644 --- a/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp +++ b/Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp @@ -890,9 +890,7 @@ static void populate_general_category_unions(PropList& general_categories) populate_union("P"sv, Array { "Pc"sv, "Pd"sv, "Ps"sv, "Pe"sv, "Pi"sv, "Pf"sv, "Po"sv }); populate_union("S"sv, Array { "Sm"sv, "Sc"sv, "Sk"sv, "So"sv }); populate_union("Z"sv, Array { "Zs"sv, "Zl"sv, "Zp"sv }); - - // FIXME: This union should also contain "Cn" (Unassigned), which we don't parse yet. - populate_union("C"sv, Array { "Cc"sv, "Cf"sv, "Cs"sv, "Co"sv }); + populate_union("C"sv, Array { "Cc"sv, "Cf"sv, "Cs"sv, "Co"sv, "Cn"sv }); } int main(int argc, char** argv)