mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 12:48:10 +00:00
LibUnicode: Handle code points that are both cased and case-ignorable
Apparently, some code points fit both categories, for example U+0345 (COMBINING GREEK YPOGEGRAMMENI). Handle this fact when determining if a code point is a final code point in a string.
This commit is contained in:
parent
dff156b7c6
commit
c4bfda7f7f
2 changed files with 5 additions and 1 deletions
|
@ -118,6 +118,10 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma)
|
||||||
result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
|
result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
|
||||||
EXPECT_EQ(result, "\u2170\u03C2");
|
EXPECT_EQ(result, "\u2170\u03C2");
|
||||||
|
|
||||||
|
// Sigma preceded by COMBINING GREEK YPOGEGRAMMENI
|
||||||
|
result = Unicode::to_unicode_lowercase_full("\u0345\u03A3"sv);
|
||||||
|
EXPECT_EQ(result, "\u0345\u03C3");
|
||||||
|
|
||||||
// Sigma preceded by A and FULL STOP
|
// Sigma preceded by A and FULL STOP
|
||||||
result = Unicode::to_unicode_lowercase_full("A.\u03A3"sv);
|
result = Unicode::to_unicode_lowercase_full("A.\u03A3"sv);
|
||||||
EXPECT_EQ(result, "a.\u03C2");
|
EXPECT_EQ(result, "a.\u03C2");
|
||||||
|
|
|
@ -95,7 +95,7 @@ static bool is_final_code_point(Utf8View const& string, size_t index, size_t byt
|
||||||
if (!unicode_data.has_value())
|
if (!unicode_data.has_value())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (is_cased_letter(*unicode_data))
|
if (is_cased_letter(*unicode_data) && !is_case_ignorable(*unicode_data))
|
||||||
++cased_letter_count;
|
++cased_letter_count;
|
||||||
else if (!is_case_ignorable(*unicode_data))
|
else if (!is_case_ignorable(*unicode_data))
|
||||||
cased_letter_count = 0;
|
cased_letter_count = 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue