1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-14 11:44:58 +00:00

LibUnicode: Introduce a Unicode library for interacting with UCD files

The Unicode standard publishes the Unicode Character Database (UCD) with
information about every code point, such as each code point's upper case
mapping. LibUnicode exists to download and parse UCD files at build time
and to provide accessors to that data.

As a start, LibUnicode includes upper- and lower-case code point
converters.
This commit is contained in:
Timothy Flynn 2021-07-25 15:10:51 -04:00 committed by Linus Groh
parent 83f88df757
commit 4dda3edc9e
11 changed files with 473 additions and 0 deletions

View file

@ -0,0 +1,50 @@
/*
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <LibUnicode/CharacterTypes.h>
#include <ctype.h>
static void compare_to_ascii(auto& old_function, auto& new_function)
{
i64 result1 = 0;
i64 result2 = 0;
for (u32 i = 0; i < 0x80; ++i) {
EXPECT_EQ(result1 = old_function(i), result2 = new_function(i));
if (result1 != result2)
dbgln("Function input value was {}.", i);
}
}
TEST_CASE(to_unicode_lowercase)
{
compare_to_ascii(tolower, Unicode::to_unicode_lowercase);
EXPECT_EQ(Unicode::to_unicode_lowercase(0x03c9u), 0x03c9u); // "ω" to "ω"
EXPECT_EQ(Unicode::to_unicode_lowercase(0x03a9u), 0x03c9u); // "Ω" to "ω"
// Code points encoded by ranges in UnicodeData.txt
EXPECT_EQ(Unicode::to_unicode_lowercase(0x3400u), 0x3400u);
EXPECT_EQ(Unicode::to_unicode_lowercase(0x3401u), 0x3401u);
EXPECT_EQ(Unicode::to_unicode_lowercase(0x3402u), 0x3402u);
EXPECT_EQ(Unicode::to_unicode_lowercase(0x4dbfu), 0x4dbfu);
}
TEST_CASE(to_unicode_uppercase)
{
compare_to_ascii(toupper, Unicode::to_unicode_uppercase);
EXPECT_EQ(Unicode::to_unicode_uppercase(0x03c9u), 0x03a9u); // "ω" to "Ω"
EXPECT_EQ(Unicode::to_unicode_uppercase(0x03a9u), 0x03a9u); // "Ω" to "Ω"
// Code points encoded by ranges in UnicodeData.txt
EXPECT_EQ(Unicode::to_unicode_uppercase(0x3400u), 0x3400u);
EXPECT_EQ(Unicode::to_unicode_uppercase(0x3401u), 0x3401u);
EXPECT_EQ(Unicode::to_unicode_uppercase(0x3402u), 0x3402u);
EXPECT_EQ(Unicode::to_unicode_uppercase(0x4dbfu), 0x4dbfu);
}