From ca2b030336d9cd58f49248fca8cde04fefe9319a Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sun, 5 Mar 2023 10:14:25 -0500 Subject: [PATCH] LibUnicode: Use binary search for lookups into the generated emoji data This sorts the array of generated emoji data by code point (first by code point length, then by code point value). This lets us use a binary search to find emoji data, rather than the current linear search. In a profile of scrolling around /home/anon/Documents/emoji.txt, this reduces the runtime of Gfx::Emoji::emoji_for_code_points from 69.03% to 28.42%. Within that, Unicode::find_emoji_for_code_points reduces from 28.42% to just 1.95%. --- .../LibUnicode/GenerateEmojiData.cpp | 71 +++++++++++++------ 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp index fa9ade687d..0474357668 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateEmojiData.cpp @@ -7,6 +7,7 @@ #include "GeneratorUtil.h" #include #include +#include #include #include #include @@ -306,13 +307,27 @@ static constexpr Array s_emojis { {)~~~"); generator.append(R"~~~( } }; +struct EmojiCodePointComparator { + constexpr int operator()(ReadonlySpan code_points, EmojiData const& emoji) + { + auto emoji_code_points = emoji.code_points(); + + if (code_points.size() != emoji_code_points.size()) + return static_cast(code_points.size()) - static_cast(emoji_code_points.size()); + + for (size_t i = 0; i < code_points.size(); ++i) { + if (code_points[i] != emoji_code_points[i]) + return static_cast(code_points[i]) - static_cast(emoji_code_points[i]); + } + + return 0; + } +}; + Optional find_emoji_for_code_points(ReadonlySpan code_points) { - for (auto& emoji : s_emojis) { - if (emoji.code_points() == code_points) - return emoji.to_unicode_emoji(); - } - + if (auto const* emoji = binary_search(s_emojis, code_points, nullptr, EmojiCodePointComparator {})) + return emoji->to_unicode_emoji(); return {}; } @@ -400,22 +415,8 @@ ErrorOr serenity_main(Main::Arguments arguments) TRY(validate_emoji(emoji_resource_path, emoji_data)); } - size_t code_point_array_index { 0 }; - for (auto& emoji : emoji_data.emojis) { - emoji.code_point_array_index = code_point_array_index; - code_point_array_index += emoji.code_points.size(); - + for (auto& emoji : emoji_data.emojis) set_image_path_for_emoji(emoji_resource_path, emoji_data, emoji); - } - - if (!generated_header_path.is_empty()) { - auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write)); - TRY(generate_emoji_data_header(*generated_header_file, emoji_data)); - } - if (!generated_implementation_path.is_empty()) { - auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write)); - TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data)); - } if (!generated_installation_path.is_empty()) { TRY(Core::Directory::create(LexicalPath { generated_installation_path }.parent(), Core::Directory::CreateDirectories::Yes)); @@ -424,5 +425,35 @@ ErrorOr serenity_main(Main::Arguments arguments) TRY(generate_emoji_installation(*generated_installation_file, emoji_data)); } + if (!generated_header_path.is_empty()) { + auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write)); + TRY(generate_emoji_data_header(*generated_header_file, emoji_data)); + } + + if (!generated_implementation_path.is_empty()) { + quick_sort(emoji_data.emojis, [](auto const& lhs, auto const& rhs) { + if (lhs.code_points.size() != rhs.code_points.size()) + return lhs.code_points.size() < rhs.code_points.size(); + + for (size_t i = 0; i < lhs.code_points.size(); ++i) { + if (lhs.code_points[i] < rhs.code_points[i]) + return true; + if (lhs.code_points[i] > rhs.code_points[i]) + return false; + } + + return false; + }); + + size_t code_point_array_index { 0 }; + for (auto& emoji : emoji_data.emojis) { + emoji.code_point_array_index = code_point_array_index; + code_point_array_index += emoji.code_points.size(); + } + + auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write)); + TRY(generate_emoji_data_implementation(*generated_implementation_file, emoji_data)); + } + return 0; }