mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 14:28:12 +00:00
LibUnicode: Generate standalone compile-time array for combining class
This commit is contained in:
parent
9f83774913
commit
d83b262e64
2 changed files with 31 additions and 24 deletions
|
@ -88,6 +88,8 @@ struct CodePointData {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct UnicodeData {
|
struct UnicodeData {
|
||||||
|
u32 code_points_with_non_zero_combining_class { 0 };
|
||||||
|
|
||||||
u32 simple_uppercase_mapping_size { 0 };
|
u32 simple_uppercase_mapping_size { 0 };
|
||||||
u32 simple_lowercase_mapping_size { 0 };
|
u32 simple_lowercase_mapping_size { 0 };
|
||||||
|
|
||||||
|
@ -438,6 +440,7 @@ static void parse_unicode_data(Core::File& file, UnicodeData& unicode_data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unicode_data.code_points_with_non_zero_combining_class += data.canonical_combining_class != 0;
|
||||||
unicode_data.simple_uppercase_mapping_size += data.simple_uppercase_mapping.has_value();
|
unicode_data.simple_uppercase_mapping_size += data.simple_uppercase_mapping.has_value();
|
||||||
unicode_data.simple_lowercase_mapping_size += data.simple_lowercase_mapping.has_value();
|
unicode_data.simple_lowercase_mapping_size += data.simple_lowercase_mapping.has_value();
|
||||||
|
|
||||||
|
@ -564,6 +567,8 @@ namespace Detail {
|
||||||
|
|
||||||
Optional<UnicodeData> unicode_data_for_code_point(u32 code_point);
|
Optional<UnicodeData> unicode_data_for_code_point(u32 code_point);
|
||||||
|
|
||||||
|
u32 canonical_combining_class(u32 code_point);
|
||||||
|
|
||||||
u32 simple_uppercase_mapping(u32 code_point);
|
u32 simple_uppercase_mapping(u32 code_point);
|
||||||
u32 simple_lowercase_mapping(u32 code_point);
|
u32 simple_lowercase_mapping(u32 code_point);
|
||||||
Span<SpecialCasing const* const> special_case_mapping(u32 code_point);
|
Span<SpecialCasing const* const> special_case_mapping(u32 code_point);
|
||||||
|
@ -750,6 +755,12 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
|
||||||
)~~~");
|
)~~~");
|
||||||
};
|
};
|
||||||
|
|
||||||
|
append_code_point_mappings("combining_class"sv, "CodePointMapping"sv, unicode_data.code_points_with_non_zero_combining_class,
|
||||||
|
[](auto const& data) -> Optional<u32> {
|
||||||
|
if (data.canonical_combining_class == 0)
|
||||||
|
return {};
|
||||||
|
return data.canonical_combining_class;
|
||||||
|
});
|
||||||
append_code_point_mappings("uppercase"sv, "CodePointMapping"sv, unicode_data.simple_uppercase_mapping_size, [](auto const& data) { return data.simple_uppercase_mapping; });
|
append_code_point_mappings("uppercase"sv, "CodePointMapping"sv, unicode_data.simple_uppercase_mapping_size, [](auto const& data) { return data.simple_uppercase_mapping; });
|
||||||
append_code_point_mappings("lowercase"sv, "CodePointMapping"sv, unicode_data.simple_lowercase_mapping_size, [](auto const& data) { return data.simple_lowercase_mapping; });
|
append_code_point_mappings("lowercase"sv, "CodePointMapping"sv, unicode_data.simple_lowercase_mapping_size, [](auto const& data) { return data.simple_lowercase_mapping; });
|
||||||
append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; });
|
append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; });
|
||||||
|
@ -877,20 +888,22 @@ Optional<UnicodeData> unicode_data_for_code_point(u32 code_point)
|
||||||
}
|
}
|
||||||
)~~~");
|
)~~~");
|
||||||
|
|
||||||
auto append_code_point_mapping_search = [&](StringView method, StringView mappings) {
|
auto append_code_point_mapping_search = [&](StringView method, StringView mappings, StringView fallback) {
|
||||||
generator.set("method", method);
|
generator.set("method", method);
|
||||||
generator.set("mappings", mappings);
|
generator.set("mappings", mappings);
|
||||||
|
generator.set("fallback", fallback);
|
||||||
generator.append(R"~~~(
|
generator.append(R"~~~(
|
||||||
u32 @method@(u32 code_point)
|
u32 @method@(u32 code_point)
|
||||||
{
|
{
|
||||||
auto const* mapping = binary_search(@mappings@, code_point, nullptr, CodePointComparator<CodePointMapping> {});
|
auto const* mapping = binary_search(@mappings@, code_point, nullptr, CodePointComparator<CodePointMapping> {});
|
||||||
return mapping ? mapping->mapping : code_point;
|
return mapping ? mapping->mapping : @fallback@;
|
||||||
}
|
}
|
||||||
)~~~");
|
)~~~");
|
||||||
};
|
};
|
||||||
|
|
||||||
append_code_point_mapping_search("simple_uppercase_mapping"sv, "s_uppercase_mappings"sv);
|
append_code_point_mapping_search("canonical_combining_class"sv, "s_combining_class_mappings"sv, "0"sv);
|
||||||
append_code_point_mapping_search("simple_lowercase_mapping"sv, "s_lowercase_mappings"sv);
|
append_code_point_mapping_search("simple_uppercase_mapping"sv, "s_uppercase_mappings"sv, "code_point"sv);
|
||||||
|
append_code_point_mapping_search("simple_lowercase_mapping"sv, "s_lowercase_mappings"sv, "code_point"sv);
|
||||||
|
|
||||||
generator.append(R"~~~(
|
generator.append(R"~~~(
|
||||||
Span<SpecialCasing const* const> special_case_mapping(u32 code_point)
|
Span<SpecialCasing const* const> special_case_mapping(u32 code_point)
|
||||||
|
|
|
@ -36,13 +36,11 @@ static bool is_after_uppercase_i(Utf8View const& string, size_t index)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto unicode_data = Detail::unicode_data_for_code_point(code_point);
|
u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
|
||||||
if (!unicode_data.has_value())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (unicode_data->canonical_combining_class == 0)
|
if (canonical_combining_class == 0)
|
||||||
found_uppercase_i = false;
|
found_uppercase_i = false;
|
||||||
else if (unicode_data->canonical_combining_class == 230)
|
else if (canonical_combining_class == 230)
|
||||||
found_uppercase_i = false;
|
found_uppercase_i = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,13 +60,11 @@ static bool is_after_soft_dotted_code_point(Utf8View const& string, size_t index
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto unicode_data = Detail::unicode_data_for_code_point(code_point);
|
u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
|
||||||
if (!unicode_data.has_value())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (unicode_data->canonical_combining_class == 0)
|
if (canonical_combining_class == 0)
|
||||||
found_soft_dotted_code_point = false;
|
found_soft_dotted_code_point = false;
|
||||||
else if (unicode_data->canonical_combining_class == 230)
|
else if (canonical_combining_class == 230)
|
||||||
found_soft_dotted_code_point = false;
|
found_soft_dotted_code_point = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,12 +119,11 @@ static bool is_followed_by_combining_class_above(Utf8View const& string, size_t
|
||||||
: Utf8View {};
|
: Utf8View {};
|
||||||
|
|
||||||
for (auto code_point : following_view) {
|
for (auto code_point : following_view) {
|
||||||
auto unicode_data = Detail::unicode_data_for_code_point(code_point);
|
u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
|
||||||
if (!unicode_data.has_value())
|
|
||||||
|
if (canonical_combining_class == 0)
|
||||||
return false;
|
return false;
|
||||||
if (unicode_data->canonical_combining_class == 0)
|
if (canonical_combining_class == 230)
|
||||||
return false;
|
|
||||||
if (unicode_data->canonical_combining_class == 230)
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,12 +142,11 @@ static bool is_followed_by_combining_dot_above(Utf8View const& string, size_t in
|
||||||
if (code_point == 0x307)
|
if (code_point == 0x307)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
auto unicode_data = Detail::unicode_data_for_code_point(code_point);
|
u32 canonical_combining_class = Detail::canonical_combining_class(code_point);
|
||||||
if (!unicode_data.has_value())
|
|
||||||
|
if (canonical_combining_class == 0)
|
||||||
return false;
|
return false;
|
||||||
if (unicode_data->canonical_combining_class == 0)
|
if (canonical_combining_class == 230)
|
||||||
return false;
|
|
||||||
if (unicode_data->canonical_combining_class == 230)
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue