diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp index 18cff2bdcb..e53b951945 100644 --- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp +++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp @@ -198,6 +198,51 @@ TEST_CASE(to_unicode_lowercase_special_casing_i) EXPECT_EQ(result, "\u0131a\u0307"sv); } +TEST_CASE(to_unicode_lowercase_special_casing_more_above) +{ + // LATIN CAPITAL LETTER I + auto result = Unicode::to_unicode_lowercase_full("I"sv, "en"sv); + EXPECT_EQ(result, "i"sv); + + result = Unicode::to_unicode_lowercase_full("I"sv, "lt"sv); + EXPECT_EQ(result, "i"sv); + + // LATIN CAPITAL LETTER J + result = Unicode::to_unicode_lowercase_full("J"sv, "en"sv); + EXPECT_EQ(result, "j"sv); + + result = Unicode::to_unicode_lowercase_full("J"sv, "lt"sv); + EXPECT_EQ(result, "j"sv); + + // LATIN CAPITAL LETTER I WITH OGONEK + result = Unicode::to_unicode_lowercase_full("\u012e"sv, "en"sv); + EXPECT_EQ(result, "\u012f"sv); + + result = Unicode::to_unicode_lowercase_full("\u012e"sv, "lt"sv); + EXPECT_EQ(result, "\u012f"sv); + + // LATIN CAPITAL LETTER I followed by COMBINING GRAVE ACCENT + result = Unicode::to_unicode_lowercase_full("I\u0300"sv, "en"sv); + EXPECT_EQ(result, "i\u0300"sv); + + result = Unicode::to_unicode_lowercase_full("I\u0300"sv, "lt"sv); + EXPECT_EQ(result, "i\u0307\u0300"sv); + + // LATIN CAPITAL LETTER J followed by COMBINING GRAVE ACCENT + result = Unicode::to_unicode_lowercase_full("J\u0300"sv, "en"sv); + EXPECT_EQ(result, "j\u0300"sv); + + result = Unicode::to_unicode_lowercase_full("J\u0300"sv, "lt"sv); + EXPECT_EQ(result, "j\u0307\u0300"sv); + + // LATIN CAPITAL LETTER I WITH OGONEK followed by COMBINING GRAVE ACCENT + result = Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "en"sv); + EXPECT_EQ(result, "\u012f\u0300"sv); + + result = Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "lt"sv); + EXPECT_EQ(result, "\u012f\u0307\u0300"sv); +} + TEST_CASE(to_unicode_uppercase_unconditional_special_casing) { // LATIN SMALL LETTER SHARP S diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp index b6f3555632..347d2f0c04 100644 --- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp +++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp @@ -115,6 +115,26 @@ static bool is_final_code_point(Utf8View const& string, size_t index, size_t byt return true; } +static bool is_followed_by_combining_class_above(Utf8View const& string, size_t index, size_t byte_length) +{ + // C is followed by a character of combining class 230 (Above) with no intervening character of combining class 0 or 230 (Above). + auto following_view = ((index + byte_length) < string.byte_length()) + ? string.substring_view(index + byte_length) + : Utf8View {}; + + for (auto code_point : following_view) { + auto unicode_data = Detail::unicode_data_for_code_point(code_point); + if (!unicode_data.has_value()) + return false; + if (unicode_data->canonical_combining_class == 0) + return false; + if (unicode_data->canonical_combining_class == 230) + return true; + } + + return false; +} + static SpecialCasing const* find_matching_special_case(Utf8View const& string, Optional locale, size_t index, size_t byte_length, UnicodeData const& unicode_data) { auto requested_locale = Locale::None; @@ -149,6 +169,11 @@ static SpecialCasing const* find_matching_special_case(Utf8View const& string, O return special_casing; break; + case Condition::MoreAbove: + if (is_followed_by_combining_class_above(string, index, byte_length)) + return special_casing; + break; + default: break; }