mirror of
https://github.com/RGBCube/serenity
synced 2025-05-14 20:15:00 +00:00
LibUnicode: Search code point properties sequentially at compile time
When generating code point property tables, we currently binary search the code point range lists for each property to decide if a code point has that property. However, we are both iterating over the code points and through the sorted properties in order. This means we do not need to search code point ranges that are below the current code point at all. We can even remove the code point ranges that fall below the current code point, as we will not see a code point in those ranges again. On my machine, this reduces the run time of GenerateUnicodeData from 3.4 seconds to 1.2 seconds.
This commit is contained in:
parent
1f1d5ed119
commit
3fae92eea2
1 changed files with 20 additions and 8 deletions
|
@ -7,7 +7,6 @@
|
|||
#include "GeneratorUtil.h"
|
||||
#include <AK/AllOf.h>
|
||||
#include <AK/Array.h>
|
||||
#include <AK/BinarySearch.h>
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/DeprecatedString.h>
|
||||
#include <AK/Error.h>
|
||||
|
@ -1540,7 +1539,7 @@ static ErrorOr<void> create_code_point_tables(UnicodeData& unicode_data)
|
|||
static constexpr auto MAX_CODE_POINT = 0x10ffffu;
|
||||
|
||||
struct TableMetadata {
|
||||
static ErrorOr<TableMetadata> create(PropList const& property_list)
|
||||
static ErrorOr<TableMetadata> create(PropList& property_list)
|
||||
{
|
||||
TableMetadata data;
|
||||
TRY(data.property_values.try_ensure_capacity(property_list.size()));
|
||||
|
@ -1549,15 +1548,15 @@ static ErrorOr<void> create_code_point_tables(UnicodeData& unicode_data)
|
|||
auto property_names = property_list.keys();
|
||||
quick_sort(property_names);
|
||||
|
||||
for (auto const& property_name : property_names) {
|
||||
auto const& code_point_rages = property_list.get(property_name).value();
|
||||
data.property_values.unchecked_append(code_point_rages);
|
||||
for (auto& property_name : property_names) {
|
||||
auto& code_point_ranges = property_list.get(property_name).value();
|
||||
data.property_values.unchecked_append(move(code_point_ranges));
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
Vector<typename PropList::ValueType const&> property_values;
|
||||
Vector<typename PropList::ValueType> property_values;
|
||||
Vector<bool> property_set;
|
||||
|
||||
Vector<size_t> current_block;
|
||||
|
@ -1566,10 +1565,23 @@ static ErrorOr<void> create_code_point_tables(UnicodeData& unicode_data)
|
|||
|
||||
auto update_tables = [](auto code_point, auto& tables, auto& metadata) -> ErrorOr<void> {
|
||||
static constexpr auto BLOCK_SIZE = CodePointTables::LSB_MASK + 1;
|
||||
static Unicode::CodePointRangeComparator comparator {};
|
||||
|
||||
for (auto& property_values : metadata.property_values) {
|
||||
size_t ranges_to_remove = 0;
|
||||
auto has_property = false;
|
||||
|
||||
for (auto const& range : property_values) {
|
||||
if (auto comparison = comparator(code_point, range); comparison <= 0) {
|
||||
has_property = comparison == 0;
|
||||
break;
|
||||
}
|
||||
|
||||
++ranges_to_remove;
|
||||
}
|
||||
|
||||
for (auto const& property_values : metadata.property_values) {
|
||||
auto has_property = binary_search(property_values, code_point, nullptr, Unicode::CodePointRangeComparator {}) != nullptr;
|
||||
metadata.property_set.unchecked_append(has_property);
|
||||
property_values.remove(0, ranges_to_remove);
|
||||
}
|
||||
|
||||
size_t unique_properties_index = 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue