mirror of
https://github.com/RGBCube/serenity
synced 2025-05-14 11:44:58 +00:00
LibUnicode: Introduce a Unicode library for interacting with UCD files
The Unicode standard publishes the Unicode Character Database (UCD) with information about every code point, such as each code point's upper case mapping. LibUnicode exists to download and parse UCD files at build time and to provide accessors to that data. As a start, LibUnicode includes upper- and lower-case code point converters.
This commit is contained in:
parent
83f88df757
commit
4dda3edc9e
11 changed files with 473 additions and 0 deletions
50
Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
Normal file
50
Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
Normal file
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibTest/TestCase.h>
|
||||
|
||||
#include <LibUnicode/CharacterTypes.h>
|
||||
#include <ctype.h>
|
||||
|
||||
static void compare_to_ascii(auto& old_function, auto& new_function)
|
||||
{
|
||||
i64 result1 = 0;
|
||||
i64 result2 = 0;
|
||||
|
||||
for (u32 i = 0; i < 0x80; ++i) {
|
||||
EXPECT_EQ(result1 = old_function(i), result2 = new_function(i));
|
||||
if (result1 != result2)
|
||||
dbgln("Function input value was {}.", i);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_lowercase)
|
||||
{
|
||||
compare_to_ascii(tolower, Unicode::to_unicode_lowercase);
|
||||
|
||||
EXPECT_EQ(Unicode::to_unicode_lowercase(0x03c9u), 0x03c9u); // "ω" to "ω"
|
||||
EXPECT_EQ(Unicode::to_unicode_lowercase(0x03a9u), 0x03c9u); // "Ω" to "ω"
|
||||
|
||||
// Code points encoded by ranges in UnicodeData.txt
|
||||
EXPECT_EQ(Unicode::to_unicode_lowercase(0x3400u), 0x3400u);
|
||||
EXPECT_EQ(Unicode::to_unicode_lowercase(0x3401u), 0x3401u);
|
||||
EXPECT_EQ(Unicode::to_unicode_lowercase(0x3402u), 0x3402u);
|
||||
EXPECT_EQ(Unicode::to_unicode_lowercase(0x4dbfu), 0x4dbfu);
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_uppercase)
|
||||
{
|
||||
compare_to_ascii(toupper, Unicode::to_unicode_uppercase);
|
||||
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x03c9u), 0x03a9u); // "ω" to "Ω"
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x03a9u), 0x03a9u); // "Ω" to "Ω"
|
||||
|
||||
// Code points encoded by ranges in UnicodeData.txt
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x3400u), 0x3400u);
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x3401u), 0x3401u);
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x3402u), 0x3402u);
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x4dbfu), 0x4dbfu);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue