mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 11:08:11 +00:00
LibUnicode: Check word break when deciding on case-ignorable code points
This commit is contained in:
parent
12fb3ae033
commit
7827aede6f
2 changed files with 16 additions and 2 deletions
|
@ -118,6 +118,10 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma)
|
||||||
result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
|
result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
|
||||||
EXPECT_EQ(result, "\u2170\u03C2");
|
EXPECT_EQ(result, "\u2170\u03C2");
|
||||||
|
|
||||||
|
// Sigma preceded by A and FULL STOP
|
||||||
|
result = Unicode::to_unicode_lowercase_full("A.\u03A3"sv);
|
||||||
|
EXPECT_EQ(result, "a.\u03C2");
|
||||||
|
|
||||||
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
|
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
|
||||||
result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv);
|
result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv);
|
||||||
EXPECT_EQ(result, "a\u180E\u03C2");
|
EXPECT_EQ(result, "a\u180E\u03C2");
|
||||||
|
|
|
@ -63,9 +63,19 @@ static bool is_case_ignorable(UnicodeData const& unicode_data)
|
||||||
case GeneralCategory::Sk:
|
case GeneralCategory::Sk:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
// FIXME: Handle word break properties (auxiliary/WordBreakProperty.txt).
|
break;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch (unicode_data.word_break_property) {
|
||||||
|
case WordBreakProperty::MidLetter:
|
||||||
|
case WordBreakProperty::MidNumLet:
|
||||||
|
case WordBreakProperty::SingleQuote:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_final_code_point(Utf8View const& string, size_t index, size_t byte_length)
|
static bool is_final_code_point(Utf8View const& string, size_t index, size_t byte_length)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue