From 3fae92eea2b5cab82c39e57b06babfd133b62d64 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 26 Jul 2023 07:30:09 -0400 Subject: [PATCH] LibUnicode: Search code point properties sequentially at compile time When generating code point property tables, we currently binary search the code point range lists for each property to decide if a code point has that property. However, we are both iterating over the code points and through the sorted properties in order. This means we do not need to search code point ranges that are below the current code point at all. We can even remove the code point ranges that fall below the current code point, as we will not see a code point in those ranges again. On my machine, this reduces the run time of GenerateUnicodeData from 3.4 seconds to 1.2 seconds. --- .../LibUnicode/GenerateUnicodeData.cpp | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp index 144dcfddfa..c8348c7d0b 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp @@ -7,7 +7,6 @@ #include "GeneratorUtil.h" #include #include -#include #include #include #include @@ -1540,7 +1539,7 @@ static ErrorOr create_code_point_tables(UnicodeData& unicode_data) static constexpr auto MAX_CODE_POINT = 0x10ffffu; struct TableMetadata { - static ErrorOr create(PropList const& property_list) + static ErrorOr create(PropList& property_list) { TableMetadata data; TRY(data.property_values.try_ensure_capacity(property_list.size())); @@ -1549,15 +1548,15 @@ static ErrorOr create_code_point_tables(UnicodeData& unicode_data) auto property_names = property_list.keys(); quick_sort(property_names); - for (auto const& property_name : property_names) { - auto const& code_point_rages = property_list.get(property_name).value(); - data.property_values.unchecked_append(code_point_rages); + for (auto& property_name : property_names) { + auto& code_point_ranges = property_list.get(property_name).value(); + data.property_values.unchecked_append(move(code_point_ranges)); } return data; } - Vector property_values; + Vector property_values; Vector property_set; Vector current_block; @@ -1566,10 +1565,23 @@ static ErrorOr create_code_point_tables(UnicodeData& unicode_data) auto update_tables = [](auto code_point, auto& tables, auto& metadata) -> ErrorOr { static constexpr auto BLOCK_SIZE = CodePointTables::LSB_MASK + 1; + static Unicode::CodePointRangeComparator comparator {}; + + for (auto& property_values : metadata.property_values) { + size_t ranges_to_remove = 0; + auto has_property = false; + + for (auto const& range : property_values) { + if (auto comparison = comparator(code_point, range); comparison <= 0) { + has_property = comparison == 0; + break; + } + + ++ranges_to_remove; + } - for (auto const& property_values : metadata.property_values) { - auto has_property = binary_search(property_values, code_point, nullptr, Unicode::CodePointRangeComparator {}) != nullptr; metadata.property_set.unchecked_append(has_property); + property_values.remove(0, ranges_to_remove); } size_t unique_properties_index = 0;