mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 13:28:11 +00:00
Meta+LibUnicode: Avoid relocations for static unicode data
Previously the s_decomposition_mappings variable would refer to other data in s_decomposition_mappings_data. This would cause thousands of avoidable relocations at load time. This saves about 128kB RAM for each process which uses LibUnicode.
This commit is contained in:
parent
fb71df5cb1
commit
2d3567ee92
3 changed files with 26 additions and 12 deletions
|
@ -768,6 +768,13 @@ struct SpecialCasing {
|
||||||
Condition condition { Condition::None };
|
Condition condition { Condition::None };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct CodePointDecompositionRaw {
|
||||||
|
u32 code_point { 0 };
|
||||||
|
CompatibilityFormattingTag tag { CompatibilityFormattingTag::Canonical };
|
||||||
|
size_t decomposition_index { 0 };
|
||||||
|
size_t decomposition_count { 0 };
|
||||||
|
};
|
||||||
|
|
||||||
struct CodePointDecomposition {
|
struct CodePointDecomposition {
|
||||||
u32 code_point { 0 };
|
u32 code_point { 0 };
|
||||||
CompatibilityFormattingTag tag { CompatibilityFormattingTag::Canonical };
|
CompatibilityFormattingTag tag { CompatibilityFormattingTag::Canonical };
|
||||||
|
@ -947,7 +954,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
|
||||||
generator.set("tag", mapping->tag);
|
generator.set("tag", mapping->tag);
|
||||||
generator.set("start", String::number(mapping->decomposition_index));
|
generator.set("start", String::number(mapping->decomposition_index));
|
||||||
generator.set("size", String::number(mapping->decomposition_size));
|
generator.set("size", String::number(mapping->decomposition_size));
|
||||||
generator.append(", CompatibilityFormattingTag::@tag@, Span<u32 const> { s_decomposition_mappings_data.data() + @start@, @size@ } },");
|
generator.append(", CompatibilityFormattingTag::@tag@, @start@, @size@ },");
|
||||||
} else {
|
} else {
|
||||||
append_list_and_size(data.special_casing_indices, "&s_special_casing[{}]"sv);
|
append_list_and_size(data.special_casing_indices, "&s_special_casing[{}]"sv);
|
||||||
generator.append(" },");
|
generator.append(" },");
|
||||||
|
@ -974,7 +981,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
|
||||||
append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; });
|
append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; });
|
||||||
append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; });
|
append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; });
|
||||||
|
|
||||||
append_code_point_mappings("decomposition"sv, "CodePointDecomposition"sv, unicode_data.code_points_with_decomposition_mapping,
|
append_code_point_mappings("decomposition"sv, "CodePointDecompositionRaw"sv, unicode_data.code_points_with_decomposition_mapping,
|
||||||
[](auto const& data) {
|
[](auto const& data) {
|
||||||
return data.decomposition_mapping;
|
return data.decomposition_mapping;
|
||||||
});
|
});
|
||||||
|
@ -1153,17 +1160,20 @@ Optional<StringView> code_point_abbreviation(u32 code_point)
|
||||||
return decode_string(mapping->abbreviation);
|
return decode_string(mapping->abbreviation);
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<CodePointDecomposition const&> code_point_decomposition(u32 code_point)
|
Optional<CodePointDecomposition const> code_point_decomposition(u32 code_point)
|
||||||
{
|
{
|
||||||
auto const* mapping = binary_search(s_decomposition_mappings, code_point, nullptr, CodePointComparator<CodePointDecomposition> {});
|
auto const* mapping = binary_search(s_decomposition_mappings, code_point, nullptr, CodePointComparator<CodePointDecompositionRaw> {});
|
||||||
if (mapping == nullptr)
|
if (mapping == nullptr)
|
||||||
return {};
|
return {};
|
||||||
return *mapping;
|
return CodePointDecomposition { mapping->code_point, mapping->tag, Span<u32 const> { s_decomposition_mappings_data.data() + mapping->decomposition_index, mapping->decomposition_count } };
|
||||||
}
|
}
|
||||||
|
|
||||||
Span<CodePointDecomposition const> code_point_decompositions()
|
Optional<CodePointDecomposition const> code_point_decomposition_by_index(size_t index)
|
||||||
{
|
{
|
||||||
return s_decomposition_mappings;
|
if (index >= s_decomposition_mappings.size())
|
||||||
|
return {};
|
||||||
|
auto const& mapping = s_decomposition_mappings[index];
|
||||||
|
return CodePointDecomposition { mapping.code_point, mapping.tag, Span<u32 const> { s_decomposition_mappings_data.data() + mapping.decomposition_index, mapping.decomposition_count } };
|
||||||
}
|
}
|
||||||
)~~~");
|
)~~~");
|
||||||
|
|
||||||
|
|
|
@ -14,8 +14,8 @@
|
||||||
|
|
||||||
namespace Unicode {
|
namespace Unicode {
|
||||||
|
|
||||||
Optional<CodePointDecomposition const&> __attribute__((weak)) code_point_decomposition(u32) { return {}; }
|
Optional<CodePointDecomposition const> __attribute__((weak)) code_point_decomposition(u32) { return {}; }
|
||||||
Span<CodePointDecomposition const> __attribute__((weak)) code_point_decompositions() { return {}; }
|
Optional<CodePointDecomposition const> __attribute__((weak)) code_point_decomposition_by_index(size_t) { return {}; }
|
||||||
|
|
||||||
NormalizationForm normalization_form_from_string(StringView form)
|
NormalizationForm normalization_form_from_string(StringView form)
|
||||||
{
|
{
|
||||||
|
@ -122,7 +122,11 @@ static u32 combine_code_points(u32 a, u32 b)
|
||||||
{
|
{
|
||||||
Array<u32, 2> const points { a, b };
|
Array<u32, 2> const points { a, b };
|
||||||
// FIXME: Do something better than linear search to find reverse mappings.
|
// FIXME: Do something better than linear search to find reverse mappings.
|
||||||
for (auto const& mapping : Unicode::code_point_decompositions()) {
|
for (size_t index = 0;; ++index) {
|
||||||
|
auto mapping_maybe = Unicode::code_point_decomposition_by_index(index);
|
||||||
|
if (!mapping_maybe.has_value())
|
||||||
|
break;
|
||||||
|
auto& mapping = mapping_maybe.value();
|
||||||
if (mapping.tag == CompatibilityFormattingTag::Canonical && mapping.decomposition == points) {
|
if (mapping.tag == CompatibilityFormattingTag::Canonical && mapping.decomposition == points) {
|
||||||
if (code_point_has_property(mapping.code_point, Property::Full_Composition_Exclusion))
|
if (code_point_has_property(mapping.code_point, Property::Full_Composition_Exclusion))
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -15,8 +15,8 @@
|
||||||
|
|
||||||
namespace Unicode {
|
namespace Unicode {
|
||||||
|
|
||||||
Optional<CodePointDecomposition const&> code_point_decomposition(u32 code_point);
|
Optional<CodePointDecomposition const> code_point_decomposition(u32 code_point);
|
||||||
Span<CodePointDecomposition const> code_point_decompositions();
|
Optional<CodePointDecomposition const> code_point_decomposition_by_index(size_t index);
|
||||||
|
|
||||||
enum class NormalizationForm {
|
enum class NormalizationForm {
|
||||||
NFD,
|
NFD,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue