mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 13:38:11 +00:00
Meta: Do not hard-code index types for UCD/CLDR/TZDB code generators
Hand-picking the smallest index type that fits a particular generated
array started with commit 3ad159537e
. This
was to reduce the size of the generated library.
Since then, the number of types using UniqueStorage has grown a ton,
creating a long list of types for which index types are manually picked.
When a new UCD/CLDR/TZDB is released, and the current index type no
longer fits the generated data, we fail to generate. Tracking down which
index caused the failure is a pretty annoying process.
Instead, we can just use size_t while in the generators themselves, then
automatically pick the size needed for the generated code.
This commit is contained in:
parent
fa2579ffa9
commit
b2164ad979
9 changed files with 268 additions and 379 deletions
|
@ -15,11 +15,8 @@
|
|||
#include <LibCore/Stream.h>
|
||||
#include <LibUnicode/Emoji.h>
|
||||
|
||||
using StringIndexType = u16;
|
||||
constexpr auto s_string_index_type = "u16"sv;
|
||||
|
||||
struct Emoji {
|
||||
StringIndexType name { 0 };
|
||||
size_t name { 0 };
|
||||
Optional<String> image_path;
|
||||
Unicode::EmojiGroup group;
|
||||
String subgroup;
|
||||
|
@ -31,7 +28,7 @@ struct Emoji {
|
|||
};
|
||||
|
||||
struct EmojiData {
|
||||
UniqueStringStorage<StringIndexType> unique_strings;
|
||||
UniqueStringStorage unique_strings;
|
||||
Vector<Emoji> emojis;
|
||||
};
|
||||
|
||||
|
@ -180,7 +177,7 @@ static ErrorOr<void> generate_emoji_data_implementation(Core::Stream::BufferedFi
|
|||
StringBuilder builder;
|
||||
SourceGenerator generator { builder };
|
||||
|
||||
generator.set("string_index_type"sv, s_string_index_type);
|
||||
generator.set("string_index_type"sv, emoji_data.unique_strings.type_that_fits());
|
||||
generator.set("emojis_size"sv, String::number(emoji_data.emojis.size()));
|
||||
|
||||
generator.append(R"~~~(
|
||||
|
|
|
@ -20,9 +20,6 @@
|
|||
#include <LibCore/ArgsParser.h>
|
||||
#include <LibCore/Stream.h>
|
||||
|
||||
using StringIndexType = u16;
|
||||
constexpr auto s_string_index_type = "u16"sv;
|
||||
|
||||
// Some code points are excluded from UnicodeData.txt, and instead are part of a "range" of code
|
||||
// points, as indicated by the "name" field. For example:
|
||||
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
|
||||
|
@ -74,7 +71,7 @@ using NormalizationProps = HashMap<String, Vector<Normalization>>;
|
|||
|
||||
struct CodePointName {
|
||||
CodePointRange code_point_range;
|
||||
StringIndexType name { 0 };
|
||||
size_t name { 0 };
|
||||
};
|
||||
|
||||
// UnicodeData source: https://www.unicode.org/Public/13.0.0/ucd/UnicodeData.txt
|
||||
|
@ -83,7 +80,7 @@ struct CodePointName {
|
|||
struct CodePointData {
|
||||
u32 code_point { 0 };
|
||||
String name;
|
||||
Optional<StringIndexType> abbreviation;
|
||||
Optional<size_t> abbreviation;
|
||||
u8 canonical_combining_class { 0 };
|
||||
String bidi_class;
|
||||
Optional<CodePointDecomposition> decomposition_mapping;
|
||||
|
@ -101,11 +98,11 @@ struct CodePointData {
|
|||
|
||||
struct BlockName {
|
||||
CodePointRange code_point_range;
|
||||
StringIndexType name { 0 };
|
||||
size_t name { 0 };
|
||||
};
|
||||
|
||||
struct UnicodeData {
|
||||
UniqueStringStorage<StringIndexType> unique_strings;
|
||||
UniqueStringStorage unique_strings;
|
||||
|
||||
u32 code_points_with_non_zero_combining_class { 0 };
|
||||
|
||||
|
@ -125,8 +122,8 @@ struct UnicodeData {
|
|||
|
||||
Vector<CodePointData> code_point_data;
|
||||
|
||||
HashMap<u32, StringIndexType> code_point_abbreviations;
|
||||
HashMap<u32, StringIndexType> code_point_display_name_aliases;
|
||||
HashMap<u32, size_t> code_point_abbreviations;
|
||||
HashMap<u32, size_t> code_point_display_name_aliases;
|
||||
Vector<CodePointName> code_point_display_names;
|
||||
|
||||
PropList general_categories;
|
||||
|
@ -795,7 +792,7 @@ static ErrorOr<void> generate_unicode_data_implementation(Core::Stream::Buffered
|
|||
StringBuilder builder;
|
||||
SourceGenerator generator { builder };
|
||||
|
||||
generator.set("string_index_type"sv, s_string_index_type);
|
||||
generator.set("string_index_type"sv, unicode_data.unique_strings.type_that_fits());
|
||||
generator.set("largest_special_casing_size", String::number(unicode_data.largest_special_casing_size));
|
||||
generator.set("special_casing_size", String::number(unicode_data.special_casing.size()));
|
||||
|
||||
|
@ -947,7 +944,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
|
|||
generator.set("code_point", String::formatted("{:#x}", data.code_point));
|
||||
generator.append("{ @code_point@");
|
||||
|
||||
if constexpr (IsSame<decltype(mapping), Optional<u32>> || IsSame<decltype(mapping), Optional<StringIndexType>>) {
|
||||
if constexpr (IsSame<decltype(mapping), Optional<u32>> || IsSame<decltype(mapping), Optional<size_t>>) {
|
||||
generator.set("mapping", String::formatted("{:#x}", *mapping));
|
||||
generator.append(", @mapping@ },");
|
||||
} else if constexpr (IsSame<decltype(mapping), Optional<CodePointDecomposition>>) {
|
||||
|
|
|
@ -51,10 +51,10 @@ struct AK::Traits<Vector<T>> : public GenericTraits<Vector<T>> {
|
|||
}
|
||||
};
|
||||
|
||||
template<typename StorageType, typename IndexType>
|
||||
template<typename StorageType>
|
||||
class UniqueStorage {
|
||||
public:
|
||||
IndexType ensure(StorageType value)
|
||||
size_t ensure(StorageType value)
|
||||
{
|
||||
// We maintain a set of unique values in two structures: a vector which stores the values in
|
||||
// the order they are added, and a hash map which maps that value to its index in the vector.
|
||||
|
@ -68,17 +68,14 @@ public:
|
|||
return *index;
|
||||
|
||||
m_storage.append(move(value));
|
||||
size_t index = m_storage.size();
|
||||
|
||||
VERIFY(index < NumericLimits<IndexType>::max());
|
||||
|
||||
auto storage_index = static_cast<IndexType>(index);
|
||||
auto storage_index = m_storage.size();
|
||||
m_storage_indices.set(m_storage.last(), storage_index);
|
||||
|
||||
return storage_index;
|
||||
}
|
||||
|
||||
StorageType const& get(IndexType index) const
|
||||
StorageType const& get(size_t index) const
|
||||
{
|
||||
if (index == 0) {
|
||||
static StorageType empty {};
|
||||
|
@ -89,6 +86,17 @@ public:
|
|||
return m_storage.at(index - 1);
|
||||
}
|
||||
|
||||
StringView type_that_fits() const
|
||||
{
|
||||
if (m_storage.size() <= NumericLimits<u8>::max())
|
||||
return "u8"sv;
|
||||
if (m_storage.size() <= NumericLimits<u16>::max())
|
||||
return "u16"sv;
|
||||
if (m_storage.size() <= NumericLimits<u32>::max())
|
||||
return "u32"sv;
|
||||
return "u64"sv;
|
||||
}
|
||||
|
||||
void generate(SourceGenerator& generator, StringView type, StringView name, size_t max_values_per_row) requires(!StorageTypeIsList<StorageType>)
|
||||
{
|
||||
generator.set("type"sv, type);
|
||||
|
@ -177,13 +185,12 @@ static constexpr Array<Span<@type@ const>, @size@ + 1> @name@ { {
|
|||
// clang-format gets confused by the requires() clauses above, and formats this section very weirdly.
|
||||
protected:
|
||||
Vector<StorageType> m_storage;
|
||||
HashMap<StorageType, IndexType> m_storage_indices;
|
||||
HashMap<StorageType, size_t> m_storage_indices;
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
template<typename StringIndexType>
|
||||
class UniqueStringStorage : public UniqueStorage<String, StringIndexType> {
|
||||
using Base = UniqueStorage<String, StringIndexType>;
|
||||
class UniqueStringStorage : public UniqueStorage<String> {
|
||||
using Base = UniqueStorage<String>;
|
||||
|
||||
public:
|
||||
// The goal of the string table generator is to ensure the table is located within the read-only
|
||||
|
@ -275,9 +282,8 @@ struct Alias {
|
|||
String alias;
|
||||
};
|
||||
|
||||
template<typename StringIndexType>
|
||||
struct CanonicalLanguageID {
|
||||
static ErrorOr<CanonicalLanguageID> parse(UniqueStringStorage<StringIndexType>& unique_strings, StringView language)
|
||||
static ErrorOr<CanonicalLanguageID> parse(UniqueStringStorage& unique_strings, StringView language)
|
||||
{
|
||||
CanonicalLanguageID language_id {};
|
||||
|
||||
|
@ -314,10 +320,10 @@ struct CanonicalLanguageID {
|
|||
return language_id;
|
||||
}
|
||||
|
||||
StringIndexType language { 0 };
|
||||
StringIndexType script { 0 };
|
||||
StringIndexType region { 0 };
|
||||
Vector<StringIndexType> variants {};
|
||||
size_t language { 0 };
|
||||
size_t script { 0 };
|
||||
size_t region { 0 };
|
||||
Vector<size_t> variants {};
|
||||
};
|
||||
|
||||
inline ErrorOr<NonnullOwnPtr<Core::Stream::BufferedFile>> open_file(StringView path, Core::Stream::OpenMode mode)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue