mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 08:08:12 +00:00
LibUnicode: Implement locale-aware MORE_ABOVE special casing
This commit is contained in:
parent
1427ebc622
commit
436faf9fd9
2 changed files with 70 additions and 0 deletions
|
@ -198,6 +198,51 @@ TEST_CASE(to_unicode_lowercase_special_casing_i)
|
||||||
EXPECT_EQ(result, "\u0131a\u0307"sv);
|
EXPECT_EQ(result, "\u0131a\u0307"sv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(to_unicode_lowercase_special_casing_more_above)
|
||||||
|
{
|
||||||
|
// LATIN CAPITAL LETTER I
|
||||||
|
auto result = Unicode::to_unicode_lowercase_full("I"sv, "en"sv);
|
||||||
|
EXPECT_EQ(result, "i"sv);
|
||||||
|
|
||||||
|
result = Unicode::to_unicode_lowercase_full("I"sv, "lt"sv);
|
||||||
|
EXPECT_EQ(result, "i"sv);
|
||||||
|
|
||||||
|
// LATIN CAPITAL LETTER J
|
||||||
|
result = Unicode::to_unicode_lowercase_full("J"sv, "en"sv);
|
||||||
|
EXPECT_EQ(result, "j"sv);
|
||||||
|
|
||||||
|
result = Unicode::to_unicode_lowercase_full("J"sv, "lt"sv);
|
||||||
|
EXPECT_EQ(result, "j"sv);
|
||||||
|
|
||||||
|
// LATIN CAPITAL LETTER I WITH OGONEK
|
||||||
|
result = Unicode::to_unicode_lowercase_full("\u012e"sv, "en"sv);
|
||||||
|
EXPECT_EQ(result, "\u012f"sv);
|
||||||
|
|
||||||
|
result = Unicode::to_unicode_lowercase_full("\u012e"sv, "lt"sv);
|
||||||
|
EXPECT_EQ(result, "\u012f"sv);
|
||||||
|
|
||||||
|
// LATIN CAPITAL LETTER I followed by COMBINING GRAVE ACCENT
|
||||||
|
result = Unicode::to_unicode_lowercase_full("I\u0300"sv, "en"sv);
|
||||||
|
EXPECT_EQ(result, "i\u0300"sv);
|
||||||
|
|
||||||
|
result = Unicode::to_unicode_lowercase_full("I\u0300"sv, "lt"sv);
|
||||||
|
EXPECT_EQ(result, "i\u0307\u0300"sv);
|
||||||
|
|
||||||
|
// LATIN CAPITAL LETTER J followed by COMBINING GRAVE ACCENT
|
||||||
|
result = Unicode::to_unicode_lowercase_full("J\u0300"sv, "en"sv);
|
||||||
|
EXPECT_EQ(result, "j\u0300"sv);
|
||||||
|
|
||||||
|
result = Unicode::to_unicode_lowercase_full("J\u0300"sv, "lt"sv);
|
||||||
|
EXPECT_EQ(result, "j\u0307\u0300"sv);
|
||||||
|
|
||||||
|
// LATIN CAPITAL LETTER I WITH OGONEK followed by COMBINING GRAVE ACCENT
|
||||||
|
result = Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "en"sv);
|
||||||
|
EXPECT_EQ(result, "\u012f\u0300"sv);
|
||||||
|
|
||||||
|
result = Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "lt"sv);
|
||||||
|
EXPECT_EQ(result, "\u012f\u0307\u0300"sv);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(to_unicode_uppercase_unconditional_special_casing)
|
TEST_CASE(to_unicode_uppercase_unconditional_special_casing)
|
||||||
{
|
{
|
||||||
// LATIN SMALL LETTER SHARP S
|
// LATIN SMALL LETTER SHARP S
|
||||||
|
|
|
@ -115,6 +115,26 @@ static bool is_final_code_point(Utf8View const& string, size_t index, size_t byt
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool is_followed_by_combining_class_above(Utf8View const& string, size_t index, size_t byte_length)
|
||||||
|
{
|
||||||
|
// C is followed by a character of combining class 230 (Above) with no intervening character of combining class 0 or 230 (Above).
|
||||||
|
auto following_view = ((index + byte_length) < string.byte_length())
|
||||||
|
? string.substring_view(index + byte_length)
|
||||||
|
: Utf8View {};
|
||||||
|
|
||||||
|
for (auto code_point : following_view) {
|
||||||
|
auto unicode_data = Detail::unicode_data_for_code_point(code_point);
|
||||||
|
if (!unicode_data.has_value())
|
||||||
|
return false;
|
||||||
|
if (unicode_data->canonical_combining_class == 0)
|
||||||
|
return false;
|
||||||
|
if (unicode_data->canonical_combining_class == 230)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static SpecialCasing const* find_matching_special_case(Utf8View const& string, Optional<StringView> locale, size_t index, size_t byte_length, UnicodeData const& unicode_data)
|
static SpecialCasing const* find_matching_special_case(Utf8View const& string, Optional<StringView> locale, size_t index, size_t byte_length, UnicodeData const& unicode_data)
|
||||||
{
|
{
|
||||||
auto requested_locale = Locale::None;
|
auto requested_locale = Locale::None;
|
||||||
|
@ -149,6 +169,11 @@ static SpecialCasing const* find_matching_special_case(Utf8View const& string, O
|
||||||
return special_casing;
|
return special_casing;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case Condition::MoreAbove:
|
||||||
|
if (is_followed_by_combining_class_above(string, index, byte_length))
|
||||||
|
return special_casing;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue