mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 07:38:10 +00:00
LibTimeZone+LibUnicode: Generate string data with run-length encoding
Currently, the unique string lists are stored in the initialized data sections of their shared libraries. In order to move the data to the read-only section, generate the strings using RLE arrays. We generate two arrays: the first is the RLE data itself, the second is a list of indices into the RLE array for each string. We then generate a decoding method to convert an RLE string to a StringView.
This commit is contained in:
parent
de980de0e4
commit
becec3578f
6 changed files with 138 additions and 59 deletions
|
@ -11,6 +11,7 @@
|
|||
#include <AK/HashMap.h>
|
||||
#include <AK/JsonValue.h>
|
||||
#include <AK/LexicalPath.h>
|
||||
#include <AK/NumericLimits.h>
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/QuickSort.h>
|
||||
#include <AK/SourceGenerator.h>
|
||||
|
@ -174,7 +175,7 @@ static constexpr Array<Span<@type@ const>, @size@ + 1> @name@ { {
|
|||
|
||||
// clang-format off
|
||||
// clang-format gets confused by the requires() clauses above, and formats this section very weirdly.
|
||||
private:
|
||||
protected:
|
||||
Vector<StorageType> m_storage;
|
||||
HashMap<StorageType, IndexType> m_storage_indices;
|
||||
// clang-format on
|
||||
|
@ -185,9 +186,87 @@ class UniqueStringStorage : public UniqueStorage<String, StringIndexType> {
|
|||
using Base = UniqueStorage<String, StringIndexType>;
|
||||
|
||||
public:
|
||||
// The goal of the string table generator is to ensure the table is located within the read-only
|
||||
// section of the shared library. If StringViews are generated directly, the table will be located
|
||||
// in the initialized data section. So instead, we generate run-length encoded (RLE) arrays to
|
||||
// represent the strings.
|
||||
void generate(SourceGenerator& generator)
|
||||
{
|
||||
Base::generate(generator, "StringView"sv, "s_string_list"sv, 40);
|
||||
constexpr size_t max_values_per_row = 300;
|
||||
size_t values_in_current_row = 0;
|
||||
|
||||
auto append_hex_value = [&](auto value) {
|
||||
if (values_in_current_row++ > 0)
|
||||
generator.append(", ");
|
||||
|
||||
generator.append(String::formatted("{:#x}", value));
|
||||
|
||||
if (values_in_current_row == max_values_per_row) {
|
||||
values_in_current_row = 0;
|
||||
generator.append(",\n ");
|
||||
}
|
||||
};
|
||||
|
||||
Vector<u32> string_indices;
|
||||
string_indices.ensure_capacity(Base::m_storage.size());
|
||||
u32 next_index { 0 };
|
||||
|
||||
for (auto const& string : Base::m_storage) {
|
||||
// Ensure the string length may be encoded as two u8s.
|
||||
VERIFY(string.length() <= NumericLimits<u16>::max());
|
||||
|
||||
string_indices.unchecked_append(next_index);
|
||||
next_index += string.length() + 2;
|
||||
}
|
||||
|
||||
generator.set("size", String::number(next_index));
|
||||
generator.append(R"~~~(
|
||||
static constexpr Array<u8, @size@> s_encoded_strings { {
|
||||
)~~~");
|
||||
|
||||
for (auto const& string : Base::m_storage) {
|
||||
auto length = string.length();
|
||||
append_hex_value((length & 0xff00) >> 8);
|
||||
append_hex_value(length & 0x00ff);
|
||||
|
||||
for (auto ch : string)
|
||||
append_hex_value(static_cast<u8>(ch));
|
||||
}
|
||||
|
||||
generator.append(R"~~~(
|
||||
} };
|
||||
)~~~");
|
||||
|
||||
generator.set("size", String::number(string_indices.size()));
|
||||
generator.append(R"~~~(
|
||||
static constexpr Array<u32, @size@> s_encoded_string_indices { {
|
||||
)~~~");
|
||||
|
||||
values_in_current_row = 0;
|
||||
for (auto index : string_indices)
|
||||
append_hex_value(index);
|
||||
|
||||
generator.append(R"~~~(
|
||||
} };
|
||||
|
||||
static constexpr StringView decode_string(size_t index)
|
||||
{
|
||||
if (index == 0)
|
||||
return {};
|
||||
|
||||
index = s_encoded_string_indices[index - 1];
|
||||
|
||||
auto length_high = s_encoded_strings[index];
|
||||
auto length_low = s_encoded_strings[index + 1];
|
||||
|
||||
size_t length = (length_high << 8) | length_low;
|
||||
if (length == 0)
|
||||
return {};
|
||||
|
||||
auto const* start = &s_encoded_strings[index + 2];
|
||||
return { reinterpret_cast<char const*>(start), length };
|
||||
}
|
||||
)~~~");
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue