From c4bfda7f7fecdea9b255eee00c1d4beca25aa0c2 Mon Sep 17 00:00:00 2001
From: Timothy Flynn <trflynn89@pm.me>
Date: Tue, 27 Jul 2021 18:47:41 -0400
Subject: [PATCH] LibUnicode: Handle code points that are both cased and
 case-ignorable

Apparently, some code points fit both categories, for example U+0345
(COMBINING GREEK YPOGEGRAMMENI). Handle this fact when determining if
a code point is a final code point in a string.
---
 Tests/LibUnicode/TestUnicodeCharacterTypes.cpp   | 4 ++++
 Userland/Libraries/LibUnicode/CharacterTypes.cpp | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
index 43f779ebd5..a71fd0d9f3 100644
--- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
+++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
@@ -118,6 +118,10 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma)
     result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
     EXPECT_EQ(result, "\u2170\u03C2");
 
+    // Sigma preceded by COMBINING GREEK YPOGEGRAMMENI
+    result = Unicode::to_unicode_lowercase_full("\u0345\u03A3"sv);
+    EXPECT_EQ(result, "\u0345\u03C3");
+
     // Sigma preceded by A and FULL STOP
     result = Unicode::to_unicode_lowercase_full("A.\u03A3"sv);
     EXPECT_EQ(result, "a.\u03C2");
diff --git a/Userland/Libraries/LibUnicode/CharacterTypes.cpp b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
index 901acb5277..9fd675fc84 100644
--- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
@@ -95,7 +95,7 @@ static bool is_final_code_point(Utf8View const& string, size_t index, size_t byt
         if (!unicode_data.has_value())
             return false;
 
-        if (is_cased_letter(*unicode_data))
+        if (is_cased_letter(*unicode_data) && !is_case_ignorable(*unicode_data))
             ++cased_letter_count;
         else if (!is_case_ignorable(*unicode_data))
             cased_letter_count = 0;