mirror of
https://github.com/RGBCube/serenity
synced 2025-05-20 13:45:06 +00:00
LibUnicode: Support full case folding for titlecasing a string
Unicode declares that to titlecase a string, the first cased code point after each word boundary should be transformed to its titlecase mapping. All other codepoints are transformed to their lowercase mapping.
This commit is contained in:
parent
b562348d31
commit
bc51017a03
5 changed files with 165 additions and 0 deletions
|
@ -74,6 +74,27 @@ TEST_CASE(to_unicode_titlecase)
|
|||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c9u), 0x01c8u); // "lj" to "Lj"
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01ccu), 0x01cbu); // "nj" to "Nj"
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01f3u), 0x01f2u); // "dz" to "Dz"
|
||||
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(""sv)), ""sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" "sv)), " "sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" - "sv)), " - "sv);
|
||||
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a"sv)), "A"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("A"sv)), "A"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" a"sv)), " A"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a "sv)), "A "sv);
|
||||
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab"sv)), "Ab"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("Ab"sv)), "Ab"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("aB"sv)), "Ab"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("AB"sv)), "Ab"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" ab"sv)), " Ab"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab "sv)), "Ab "sv);
|
||||
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo bar baz"sv)), "Foo Bar Baz"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo \n \r bar \t baz"sv)), "Foo \n \r Bar \t Baz"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("f\"oo\" b'ar'"sv)), "F\"Oo\" B'Ar'"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("123dollars"sv)), "123Dollars"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_lowercase_unconditional_special_casing)
|
||||
|
@ -382,6 +403,78 @@ TEST_CASE(to_unicode_uppercase_special_casing_soft_dotted)
|
|||
EXPECT_EQ(result, "J"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_titlecase_unconditional_special_casing)
|
||||
{
|
||||
// LATIN SMALL LETTER SHARP S
|
||||
auto result = MUST(Unicode::to_unicode_titlecase_full("\u00DF"sv));
|
||||
EXPECT_EQ(result, "\u0053\u0073"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u0130"sv));
|
||||
EXPECT_EQ(result, "\u0130"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FF
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB00"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0066"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FI
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB01"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0069"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FL
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB02"sv));
|
||||
EXPECT_EQ(result, "\u0046\u006C"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FFI
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB03"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0066\u0069"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FFL
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB04"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0066\u006C"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE LONG S T
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB05"sv));
|
||||
EXPECT_EQ(result, "\u0053\u0074"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE ST
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB06"sv));
|
||||
EXPECT_EQ(result, "\u0053\u0074"sv);
|
||||
|
||||
// GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u0390"sv));
|
||||
EXPECT_EQ(result, "\u0399\u0308\u0301"sv);
|
||||
|
||||
// GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u03B0"sv));
|
||||
EXPECT_EQ(result, "\u03A5\u0308\u0301"sv);
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u1FB7"sv));
|
||||
EXPECT_EQ(result, "\u0391\u0342\u0345"sv);
|
||||
|
||||
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u1FC7"sv));
|
||||
EXPECT_EQ(result, "\u0397\u0342\u0345"sv);
|
||||
|
||||
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u1FF7"sv));
|
||||
EXPECT_EQ(result, "\u03A9\u0342\u0345"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_titlecase_special_casing_i)
|
||||
{
|
||||
// LATIN SMALL LETTER I
|
||||
auto result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "en"sv));
|
||||
EXPECT_EQ(result, "I"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "az"sv));
|
||||
EXPECT_EQ(result, "\u0130"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "tr"sv));
|
||||
EXPECT_EQ(result, "\u0130"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(general_category)
|
||||
{
|
||||
auto general_category = [](StringView name) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue