diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp index d58bbd974d..4c41aa29e1 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp @@ -768,6 +768,13 @@ struct SpecialCasing { Condition condition { Condition::None }; }; +struct CodePointDecompositionRaw { + u32 code_point { 0 }; + CompatibilityFormattingTag tag { CompatibilityFormattingTag::Canonical }; + size_t decomposition_index { 0 }; + size_t decomposition_count { 0 }; +}; + struct CodePointDecomposition { u32 code_point { 0 }; CompatibilityFormattingTag tag { CompatibilityFormattingTag::Canonical }; @@ -947,7 +954,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { { generator.set("tag", mapping->tag); generator.set("start", String::number(mapping->decomposition_index)); generator.set("size", String::number(mapping->decomposition_size)); - generator.append(", CompatibilityFormattingTag::@tag@, Span { s_decomposition_mappings_data.data() + @start@, @size@ } },"); + generator.append(", CompatibilityFormattingTag::@tag@, @start@, @size@ },"); } else { append_list_and_size(data.special_casing_indices, "&s_special_casing[{}]"sv); generator.append(" },"); @@ -974,7 +981,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { { append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; }); append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; }); - append_code_point_mappings("decomposition"sv, "CodePointDecomposition"sv, unicode_data.code_points_with_decomposition_mapping, + append_code_point_mappings("decomposition"sv, "CodePointDecompositionRaw"sv, unicode_data.code_points_with_decomposition_mapping, [](auto const& data) { return data.decomposition_mapping; }); @@ -1153,17 +1160,20 @@ Optional code_point_abbreviation(u32 code_point) return decode_string(mapping->abbreviation); } -Optional code_point_decomposition(u32 code_point) +Optional code_point_decomposition(u32 code_point) { - auto const* mapping = binary_search(s_decomposition_mappings, code_point, nullptr, CodePointComparator {}); + auto const* mapping = binary_search(s_decomposition_mappings, code_point, nullptr, CodePointComparator {}); if (mapping == nullptr) return {}; - return *mapping; + return CodePointDecomposition { mapping->code_point, mapping->tag, Span { s_decomposition_mappings_data.data() + mapping->decomposition_index, mapping->decomposition_count } }; } -Span code_point_decompositions() +Optional code_point_decomposition_by_index(size_t index) { - return s_decomposition_mappings; + if (index >= s_decomposition_mappings.size()) + return {}; + auto const& mapping = s_decomposition_mappings[index]; + return CodePointDecomposition { mapping.code_point, mapping.tag, Span { s_decomposition_mappings_data.data() + mapping.decomposition_index, mapping.decomposition_count } }; } )~~~"); diff --git a/Userland/Libraries/LibUnicode/Normalize.cpp b/Userland/Libraries/LibUnicode/Normalize.cpp index bc1ab70221..64393e157a 100644 --- a/Userland/Libraries/LibUnicode/Normalize.cpp +++ b/Userland/Libraries/LibUnicode/Normalize.cpp @@ -14,8 +14,8 @@ namespace Unicode { -Optional __attribute__((weak)) code_point_decomposition(u32) { return {}; } -Span __attribute__((weak)) code_point_decompositions() { return {}; } +Optional __attribute__((weak)) code_point_decomposition(u32) { return {}; } +Optional __attribute__((weak)) code_point_decomposition_by_index(size_t) { return {}; } NormalizationForm normalization_form_from_string(StringView form) { @@ -122,7 +122,11 @@ static u32 combine_code_points(u32 a, u32 b) { Array const points { a, b }; // FIXME: Do something better than linear search to find reverse mappings. - for (auto const& mapping : Unicode::code_point_decompositions()) { + for (size_t index = 0;; ++index) { + auto mapping_maybe = Unicode::code_point_decomposition_by_index(index); + if (!mapping_maybe.has_value()) + break; + auto& mapping = mapping_maybe.value(); if (mapping.tag == CompatibilityFormattingTag::Canonical && mapping.decomposition == points) { if (code_point_has_property(mapping.code_point, Property::Full_Composition_Exclusion)) continue; diff --git a/Userland/Libraries/LibUnicode/Normalize.h b/Userland/Libraries/LibUnicode/Normalize.h index 23531afefe..4bce9df35f 100644 --- a/Userland/Libraries/LibUnicode/Normalize.h +++ b/Userland/Libraries/LibUnicode/Normalize.h @@ -15,8 +15,8 @@ namespace Unicode { -Optional code_point_decomposition(u32 code_point); -Span code_point_decompositions(); +Optional code_point_decomposition(u32 code_point); +Optional code_point_decomposition_by_index(size_t index); enum class NormalizationForm { NFD,