1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-14 11:34:59 +00:00

LibUnicode+Tests: Remove now unused to_unicode_*_full methods

Relocating all of the tests for these in LibUnicode over to the AK
String testsuite.
This commit is contained in:
Shannon Booth 2023-11-27 22:47:08 +13:00 committed by Tim Flynn
parent d1ed04a6cb
commit d777b279e3
4 changed files with 426 additions and 510 deletions

View file

@ -74,54 +74,6 @@ TEST_CASE(to_unicode_titlecase)
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c9u), 0x01c8u); // "lj" to "Lj"
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01ccu), 0x01cbu); // "nj" to "Nj"
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01f3u), 0x01f2u); // "dz" to "Dz"
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(""sv)), ""sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" "sv)), " "sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" - "sv)), " - "sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a"sv)), "A"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("A"sv)), "A"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" a"sv)), " A"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a "sv)), "A "sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab"sv)), "Ab"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("Ab"sv)), "Ab"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("aB"sv)), "Ab"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("AB"sv)), "Ab"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" ab"sv)), " Ab"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab "sv)), "Ab "sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo bar baz"sv)), "Foo Bar Baz"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo \n \r bar \t baz"sv)), "Foo \n \r Bar \t Baz"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("f\"oo\" b'ar'"sv)), "F\"Oo\" B'ar'"sv);
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("123dollars"sv)), "123Dollars"sv);
}
TEST_CASE(to_unicode_casefold)
{
for (u8 code_point = 0; code_point < 0x80; ++code_point) {
auto ascii = tolower(code_point);
auto unicode = MUST(Unicode::to_unicode_casefold_full({ reinterpret_cast<char const*>(&code_point), 1 }));
EXPECT_EQ(unicode.bytes_as_string_view().length(), 1u);
EXPECT_EQ(unicode.bytes_as_string_view()[0], ascii);
}
// LATIN SMALL LETTER SHARP S
auto result = MUST(Unicode::to_unicode_casefold_full("\u00DF"sv));
EXPECT_EQ(result, "\u0073\u0073"sv);
// GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_casefold_full("\u1FB3"sv));
EXPECT_EQ(result, "\u03B1\u03B9"sv);
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI
result = MUST(Unicode::to_unicode_casefold_full("\u1FB6"sv));
EXPECT_EQ(result, "\u03B1\u0342"sv);
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_casefold_full("\u1FB7"sv));
EXPECT_EQ(result, "\u03B1\u0342\u03B9"sv);
}
BENCHMARK_CASE(casing)
@ -130,388 +82,9 @@ BENCHMARK_CASE(casing)
__test_to_unicode_lowercase();
__test_to_unicode_uppercase();
__test_to_unicode_titlecase();
__test_to_unicode_casefold();
}
}
TEST_CASE(to_unicode_lowercase_unconditional_special_casing)
{
// LATIN SMALL LETTER SHARP S
auto result = MUST(Unicode::to_unicode_lowercase_full("\u00DF"sv));
EXPECT_EQ(result, "\u00DF");
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv));
EXPECT_EQ(result, "\u0069\u0307");
// LATIN SMALL LIGATURE FF
result = MUST(Unicode::to_unicode_lowercase_full("\uFB00"sv));
EXPECT_EQ(result, "\uFB00");
// LATIN SMALL LIGATURE FI
result = MUST(Unicode::to_unicode_lowercase_full("\uFB01"sv));
EXPECT_EQ(result, "\uFB01");
// LATIN SMALL LIGATURE FL
result = MUST(Unicode::to_unicode_lowercase_full("\uFB02"sv));
EXPECT_EQ(result, "\uFB02");
// LATIN SMALL LIGATURE FFI
result = MUST(Unicode::to_unicode_lowercase_full("\uFB03"sv));
EXPECT_EQ(result, "\uFB03");
// LATIN SMALL LIGATURE FFL
result = MUST(Unicode::to_unicode_lowercase_full("\uFB04"sv));
EXPECT_EQ(result, "\uFB04");
// LATIN SMALL LIGATURE LONG S T
result = MUST(Unicode::to_unicode_lowercase_full("\uFB05"sv));
EXPECT_EQ(result, "\uFB05");
// LATIN SMALL LIGATURE ST
result = MUST(Unicode::to_unicode_lowercase_full("\uFB06"sv));
EXPECT_EQ(result, "\uFB06");
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_lowercase_full("\u1FB7"sv));
EXPECT_EQ(result, "\u1FB7");
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_lowercase_full("\u1FC7"sv));
EXPECT_EQ(result, "\u1FC7");
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_lowercase_full("\u1FF7"sv));
EXPECT_EQ(result, "\u1FF7");
}
TEST_CASE(to_unicode_lowercase_special_casing_sigma)
{
auto result = MUST(Unicode::to_unicode_lowercase_full("ABCI"sv));
EXPECT_EQ(result, "abci");
// Sigma preceded by A
result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3"sv));
EXPECT_EQ(result, "a\u03C2");
// Sigma preceded by FEMININE ORDINAL INDICATOR
result = MUST(Unicode::to_unicode_lowercase_full("\u00AA\u03A3"sv));
EXPECT_EQ(result, "\u00AA\u03C2");
// Sigma preceded by ROMAN NUMERAL ONE
result = MUST(Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv));
EXPECT_EQ(result, "\u2170\u03C2");
// Sigma preceded by COMBINING GREEK YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_lowercase_full("\u0345\u03A3"sv));
EXPECT_EQ(result, "\u0345\u03C3");
// Sigma preceded by A and FULL STOP
result = MUST(Unicode::to_unicode_lowercase_full("A.\u03A3"sv));
EXPECT_EQ(result, "a.\u03C2");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv));
EXPECT_EQ(result, "a\u180E\u03C2");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by B
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3B"sv));
EXPECT_EQ(result, "a\u180E\u03C3b");
// Sigma followed by A
result = MUST(Unicode::to_unicode_lowercase_full("\u03A3A"sv));
EXPECT_EQ(result, "\u03C3a");
// Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR
result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3\u180E"sv));
EXPECT_EQ(result, "a\u03C2\u180E");
// Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR and B
result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3\u180EB"sv));
EXPECT_EQ(result, "a\u03C3\u180Eb");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180E"sv));
EXPECT_EQ(result, "a\u180E\u03C2\u180E");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR and B
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180EB"sv));
EXPECT_EQ(result, "a\u180E\u03C3\u180Eb");
}
TEST_CASE(to_unicode_lowercase_special_casing_i)
{
// LATIN CAPITAL LETTER I
auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "az"sv));
EXPECT_EQ(result, "\u0131"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "tr"sv));
EXPECT_EQ(result, "\u0131"sv);
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "en"sv));
EXPECT_EQ(result, "\u0069\u0307"sv);
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "az"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "tr"sv));
EXPECT_EQ(result, "i"sv);
// LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "en"sv));
EXPECT_EQ(result, "i\u0307"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "az"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "tr"sv));
EXPECT_EQ(result, "i"sv);
// LATIN CAPITAL LETTER I followed by combining class 0 and COMBINING DOT ABOVE
result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "en"sv));
EXPECT_EQ(result, "ia\u0307"sv);
result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "az"sv));
EXPECT_EQ(result, "\u0131a\u0307"sv);
result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "tr"sv));
EXPECT_EQ(result, "\u0131a\u0307"sv);
}
TEST_CASE(to_unicode_lowercase_special_casing_more_above)
{
// LATIN CAPITAL LETTER I
auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "lt"sv));
EXPECT_EQ(result, "i"sv);
// LATIN CAPITAL LETTER J
result = MUST(Unicode::to_unicode_lowercase_full("J"sv, "en"sv));
EXPECT_EQ(result, "j"sv);
result = MUST(Unicode::to_unicode_lowercase_full("J"sv, "lt"sv));
EXPECT_EQ(result, "j"sv);
// LATIN CAPITAL LETTER I WITH OGONEK
result = MUST(Unicode::to_unicode_lowercase_full("\u012e"sv, "en"sv));
EXPECT_EQ(result, "\u012f"sv);
result = MUST(Unicode::to_unicode_lowercase_full("\u012e"sv, "lt"sv));
EXPECT_EQ(result, "\u012f"sv);
// LATIN CAPITAL LETTER I followed by COMBINING GRAVE ACCENT
result = MUST(Unicode::to_unicode_lowercase_full("I\u0300"sv, "en"sv));
EXPECT_EQ(result, "i\u0300"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I\u0300"sv, "lt"sv));
EXPECT_EQ(result, "i\u0307\u0300"sv);
// LATIN CAPITAL LETTER J followed by COMBINING GRAVE ACCENT
result = MUST(Unicode::to_unicode_lowercase_full("J\u0300"sv, "en"sv));
EXPECT_EQ(result, "j\u0300"sv);
result = MUST(Unicode::to_unicode_lowercase_full("J\u0300"sv, "lt"sv));
EXPECT_EQ(result, "j\u0307\u0300"sv);
// LATIN CAPITAL LETTER I WITH OGONEK followed by COMBINING GRAVE ACCENT
result = MUST(Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "en"sv));
EXPECT_EQ(result, "\u012f\u0300"sv);
result = MUST(Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "lt"sv));
EXPECT_EQ(result, "\u012f\u0307\u0300"sv);
}
TEST_CASE(to_unicode_lowercase_special_casing_not_before_dot)
{
// LATIN CAPITAL LETTER I
auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "az"sv));
EXPECT_EQ(result, "\u0131"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "tr"sv));
EXPECT_EQ(result, "\u0131"sv);
// LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "en"sv));
EXPECT_EQ(result, "i\u0307"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "az"sv));
EXPECT_EQ(result, "i"sv);
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "tr"sv));
EXPECT_EQ(result, "i"sv);
}
TEST_CASE(to_unicode_uppercase_unconditional_special_casing)
{
// LATIN SMALL LETTER SHARP S
auto result = MUST(Unicode::to_unicode_uppercase_full("\u00DF"sv));
EXPECT_EQ(result, "\u0053\u0053");
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST(Unicode::to_unicode_uppercase_full("\u0130"sv));
EXPECT_EQ(result, "\u0130");
// LATIN SMALL LIGATURE FF
result = MUST(Unicode::to_unicode_uppercase_full("\uFB00"sv));
EXPECT_EQ(result, "\u0046\u0046");
// LATIN SMALL LIGATURE FI
result = MUST(Unicode::to_unicode_uppercase_full("\uFB01"sv));
EXPECT_EQ(result, "\u0046\u0049");
// LATIN SMALL LIGATURE FL
result = MUST(Unicode::to_unicode_uppercase_full("\uFB02"sv));
EXPECT_EQ(result, "\u0046\u004C");
// LATIN SMALL LIGATURE FFI
result = MUST(Unicode::to_unicode_uppercase_full("\uFB03"sv));
EXPECT_EQ(result, "\u0046\u0046\u0049");
// LATIN SMALL LIGATURE FFL
result = MUST(Unicode::to_unicode_uppercase_full("\uFB04"sv));
EXPECT_EQ(result, "\u0046\u0046\u004C");
// LATIN SMALL LIGATURE LONG S T
result = MUST(Unicode::to_unicode_uppercase_full("\uFB05"sv));
EXPECT_EQ(result, "\u0053\u0054");
// LATIN SMALL LIGATURE ST
result = MUST(Unicode::to_unicode_uppercase_full("\uFB06"sv));
EXPECT_EQ(result, "\u0053\u0054");
// GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
result = MUST(Unicode::to_unicode_uppercase_full("\u0390"sv));
EXPECT_EQ(result, "\u0399\u0308\u0301");
// GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
result = MUST(Unicode::to_unicode_uppercase_full("\u03B0"sv));
EXPECT_EQ(result, "\u03A5\u0308\u0301");
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_uppercase_full("\u1FB7"sv));
EXPECT_EQ(result, "\u0391\u0342\u0399");
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_uppercase_full("\u1FC7"sv));
EXPECT_EQ(result, "\u0397\u0342\u0399");
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_uppercase_full("\u1FF7"sv));
EXPECT_EQ(result, "\u03A9\u0342\u0399");
}
TEST_CASE(to_unicode_uppercase_special_casing_soft_dotted)
{
// LATIN SMALL LETTER I
auto result = MUST(Unicode::to_unicode_uppercase_full("i"sv, "en"sv));
EXPECT_EQ(result, "I"sv);
result = MUST(Unicode::to_unicode_uppercase_full("i"sv, "lt"sv));
EXPECT_EQ(result, "I"sv);
// LATIN SMALL LETTER J
result = MUST(Unicode::to_unicode_uppercase_full("j"sv, "en"sv));
EXPECT_EQ(result, "J"sv);
result = MUST(Unicode::to_unicode_uppercase_full("j"sv, "lt"sv));
EXPECT_EQ(result, "J"sv);
// LATIN SMALL LETTER I followed by COMBINING DOT ABOVE
result = MUST(Unicode::to_unicode_uppercase_full("i\u0307"sv, "en"sv));
EXPECT_EQ(result, "I\u0307"sv);
result = MUST(Unicode::to_unicode_uppercase_full("i\u0307"sv, "lt"sv));
EXPECT_EQ(result, "I"sv);
// LATIN SMALL LETTER J followed by COMBINING DOT ABOVE
result = MUST(Unicode::to_unicode_uppercase_full("j\u0307"sv, "en"sv));
EXPECT_EQ(result, "J\u0307"sv);
result = MUST(Unicode::to_unicode_uppercase_full("j\u0307"sv, "lt"sv));
EXPECT_EQ(result, "J"sv);
}
TEST_CASE(to_unicode_titlecase_unconditional_special_casing)
{
// LATIN SMALL LETTER SHARP S
auto result = MUST(Unicode::to_unicode_titlecase_full("\u00DF"sv));
EXPECT_EQ(result, "\u0053\u0073"sv);
// LATIN CAPITAL LETTER I WITH DOT ABOVE
result = MUST(Unicode::to_unicode_titlecase_full("\u0130"sv));
EXPECT_EQ(result, "\u0130"sv);
// LATIN SMALL LIGATURE FF
result = MUST(Unicode::to_unicode_titlecase_full("\uFB00"sv));
EXPECT_EQ(result, "\u0046\u0066"sv);
// LATIN SMALL LIGATURE FI
result = MUST(Unicode::to_unicode_titlecase_full("\uFB01"sv));
EXPECT_EQ(result, "\u0046\u0069"sv);
// LATIN SMALL LIGATURE FL
result = MUST(Unicode::to_unicode_titlecase_full("\uFB02"sv));
EXPECT_EQ(result, "\u0046\u006C"sv);
// LATIN SMALL LIGATURE FFI
result = MUST(Unicode::to_unicode_titlecase_full("\uFB03"sv));
EXPECT_EQ(result, "\u0046\u0066\u0069"sv);
// LATIN SMALL LIGATURE FFL
result = MUST(Unicode::to_unicode_titlecase_full("\uFB04"sv));
EXPECT_EQ(result, "\u0046\u0066\u006C"sv);
// LATIN SMALL LIGATURE LONG S T
result = MUST(Unicode::to_unicode_titlecase_full("\uFB05"sv));
EXPECT_EQ(result, "\u0053\u0074"sv);
// LATIN SMALL LIGATURE ST
result = MUST(Unicode::to_unicode_titlecase_full("\uFB06"sv));
EXPECT_EQ(result, "\u0053\u0074"sv);
// GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
result = MUST(Unicode::to_unicode_titlecase_full("\u0390"sv));
EXPECT_EQ(result, "\u0399\u0308\u0301"sv);
// GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
result = MUST(Unicode::to_unicode_titlecase_full("\u03B0"sv));
EXPECT_EQ(result, "\u03A5\u0308\u0301"sv);
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_titlecase_full("\u1FB7"sv));
EXPECT_EQ(result, "\u0391\u0342\u0345"sv);
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_titlecase_full("\u1FC7"sv));
EXPECT_EQ(result, "\u0397\u0342\u0345"sv);
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
result = MUST(Unicode::to_unicode_titlecase_full("\u1FF7"sv));
EXPECT_EQ(result, "\u03A9\u0342\u0345"sv);
}
TEST_CASE(to_unicode_titlecase_special_casing_i)
{
// LATIN SMALL LETTER I
auto result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "en"sv));
EXPECT_EQ(result, "I"sv);
result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "az"sv));
EXPECT_EQ(result, "\u0130"sv);
result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "tr"sv));
EXPECT_EQ(result, "\u0130"sv);
}
TEST_CASE(general_category)
{
auto general_category = [](StringView name) {