diff --git a/Tests/LibUnicode/TestUnicodeNormalization.cpp b/Tests/LibUnicode/TestUnicodeNormalization.cpp index 8157b6d966..daef39d765 100644 --- a/Tests/LibUnicode/TestUnicodeNormalization.cpp +++ b/Tests/LibUnicode/TestUnicodeNormalization.cpp @@ -63,6 +63,7 @@ TEST_CASE(normalize_nfc) EXPECT_EQ(normalize("\u1103\u1161\u11B0"sv, NormalizationForm::NFC), "닭"sv); EXPECT_EQ(normalize("\u1100\uAC00\u11A8"sv, NormalizationForm::NFC), "\u1100\uAC01"sv); + EXPECT_EQ(normalize("\u1103\u1161\u11B0\u11B0"sv, NormalizationForm::NFC), "닭\u11B0"); } TEST_CASE(normalize_nfkd) diff --git a/Userland/Libraries/LibUnicode/Normalize.cpp b/Userland/Libraries/LibUnicode/Normalize.cpp index 8f5a59f7c3..bc1ab70221 100644 --- a/Userland/Libraries/LibUnicode/Normalize.cpp +++ b/Userland/Libraries/LibUnicode/Normalize.cpp @@ -111,7 +111,8 @@ static u32 combine_hangul_code_points(u32 a, u32 b) auto const leading_vowel_index = leading_index * HANGUL_BLOCK_COUNT + vowel_index * HANGUL_TRAILING_COUNT; return HANGUL_SYLLABLE_BASE + leading_vowel_index; } - if (is_hangul_code_point(a) && is_hangul_trailing(b)) { + // LV characters are the first in each "T block", so use this check to avoid combining LVT with T. + if (is_hangul_code_point(a) && (a - HANGUL_SYLLABLE_BASE) % HANGUL_TRAILING_COUNT == 0 && is_hangul_trailing(b)) { return a + b - HANGUL_TRAILING_BASE; } return 0;