mirror of
https://github.com/RGBCube/serenity
synced 2025-05-14 11:34:59 +00:00
LibUnicode: Perform code point property lookups in constant time
We currently produce a single table for all categories of code point properties (GeneralCategory, Script, etc.). Each row contains a field indicating the range of code points to which that property applies. At runtime, we then do a binary search through that table to decide if a code point has a property. This changes our approach to generate a 2-stage lookup table for each of those categories. There is an in-depth explanation of these tables above the new `create_code_point_tables` method. The end effect is that code point property lookup is reduced from a binary search to constant-time array lookups. In total, this change: * Increases the size of libunicode.so from 2.7 MB to 2.9 MB. * Reduces the runtime of the new benchmark test case added here from 3.576s to 1.020s (a 3.5x speedup). * In a profile of resizing a TextEditor window with a 3MB file open, the runtime of checking if a code point has a word break property reduces from ~81% to ~56%.
This commit is contained in:
parent
8f1d73abde
commit
0652cc48c0
2 changed files with 247 additions and 61 deletions
|
@ -594,6 +594,28 @@ TEST_CASE(general_category)
|
|||
}
|
||||
}
|
||||
|
||||
BENCHMARK_CASE(general_category_performance)
|
||||
{
|
||||
auto general_category_cased_letter = Unicode::general_category_from_string("Cased_Letter"sv).value();
|
||||
|
||||
for (size_t i = 0; i < 1'000'000; ++i) {
|
||||
for (u32 code_point = 0; code_point <= 0x1f; ++code_point)
|
||||
EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cased_letter));
|
||||
|
||||
for (u32 code_point = 0x41; code_point <= 0x5a; ++code_point)
|
||||
EXPECT(Unicode::code_point_has_general_category(code_point, general_category_cased_letter));
|
||||
|
||||
for (u32 code_point = 0x61; code_point <= 0x7a; ++code_point)
|
||||
EXPECT(Unicode::code_point_has_general_category(code_point, general_category_cased_letter));
|
||||
|
||||
for (u32 code_point = 0xe000; code_point <= 0xe100; ++code_point)
|
||||
EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cased_letter));
|
||||
|
||||
for (u32 code_point = 0x101fe; code_point <= 0x1027f; ++code_point)
|
||||
EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cased_letter));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(property)
|
||||
{
|
||||
auto property = [](StringView name) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue