From 267eb3b329e730d38261128f06a0823f50a7567e Mon Sep 17 00:00:00 2001 From: Jan de Visser Date: Thu, 17 Jun 2021 14:07:29 -0400 Subject: [PATCH] LibSQL: Hash index implementation for the SQL storage layer This patch implements a basic hash index. It uses the extendible hashing algorith. Also includes a test file. --- Tests/LibSQL/TestSqlHashIndex.cpp | 329 ++++++++++++++++++ Userland/Libraries/LibSQL/CMakeLists.txt | 1 + Userland/Libraries/LibSQL/Forward.h | 4 + Userland/Libraries/LibSQL/HashIndex.cpp | 423 +++++++++++++++++++++++ Userland/Libraries/LibSQL/HashIndex.h | 188 ++++++++++ 5 files changed, 945 insertions(+) create mode 100644 Tests/LibSQL/TestSqlHashIndex.cpp create mode 100644 Userland/Libraries/LibSQL/HashIndex.cpp create mode 100644 Userland/Libraries/LibSQL/HashIndex.h diff --git a/Tests/LibSQL/TestSqlHashIndex.cpp b/Tests/LibSQL/TestSqlHashIndex.cpp new file mode 100644 index 0000000000..e1878569a5 --- /dev/null +++ b/Tests/LibSQL/TestSqlHashIndex.cpp @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include +#include + +constexpr static int keys[] = { + 39, + 87, + 77, + 42, + 98, + 40, + 53, + 8, + 37, + 12, + 90, + 72, + 73, + 11, + 88, + 22, + 10, + 82, + 25, + 61, + 97, + 18, + 60, + 68, + 21, + 3, + 58, + 29, + 13, + 17, + 89, + 81, + 16, + 64, + 5, + 41, + 36, + 91, + 38, + 24, + 32, + 50, + 34, + 94, + 49, + 47, + 1, + 6, + 44, + 76, +}; +constexpr static u32 pointers[] = { + 92, + 4, + 50, + 47, + 68, + 73, + 24, + 28, + 50, + 93, + 60, + 36, + 92, + 72, + 53, + 26, + 91, + 84, + 25, + 43, + 88, + 12, + 62, + 35, + 96, + 27, + 96, + 27, + 99, + 30, + 21, + 89, + 54, + 60, + 37, + 68, + 35, + 55, + 80, + 2, + 33, + 26, + 93, + 70, + 45, + 44, + 3, + 66, + 75, + 4, +}; + +NonnullRefPtr setup_hash_index(SQL::Heap& heap); +void insert_and_get_to_and_from_hash_index(int num_keys); +void insert_into_and_scan_hash_index(int num_keys); + +NonnullRefPtr setup_hash_index(SQL::Heap& heap) +{ + SQL::TupleDescriptor tuple_descriptor; + tuple_descriptor.append({ "key_value", SQL::SQLType::Integer, SQL::Order::Ascending }); + tuple_descriptor.append({ "text_value", SQL::SQLType::Text, SQL::Order::Ascending }); + + auto directory_pointer = heap.user_value(0); + if (!directory_pointer) { + directory_pointer = heap.new_record_pointer(); + heap.set_user_value(0, directory_pointer); + } + auto hash_index = SQL::HashIndex::construct(heap, tuple_descriptor, directory_pointer); + return hash_index; +} + +void insert_and_get_to_and_from_hash_index(int num_keys) +{ + ScopeGuard guard([]() { unlink("test.db"); }); + { + auto heap = SQL::Heap::construct("test.db"); + auto hash_index = setup_hash_index(heap); + + for (auto ix = 0; ix < num_keys; ix++) { + SQL::Key k(hash_index->descriptor()); + k[0] = keys[ix]; + k[1] = String::formatted("The key value is {} and the pointer is {}", keys[ix], pointers[ix]); + k.set_pointer(pointers[ix]); + hash_index->insert(k); + } +#ifdef LIST_HASH_INDEX + hash_index->list_hash(); +#endif + } + + { + auto heap = SQL::Heap::construct("test.db"); + auto hash_index = setup_hash_index(heap); + + for (auto ix = 0; ix < num_keys; ix++) { + SQL::Key k(hash_index->descriptor()); + k[0] = keys[ix]; + k[1] = String::formatted("The key value is {} and the pointer is {}", keys[ix], pointers[ix]); + auto pointer_opt = hash_index->get(k); + EXPECT(pointer_opt.has_value()); + EXPECT_EQ(pointer_opt.value(), pointers[ix]); + } + } +} + +TEST_CASE(hash_index_one_key) +{ + insert_and_get_to_and_from_hash_index(1); +} + +TEST_CASE(hash_index_four_keys) +{ + insert_and_get_to_and_from_hash_index(4); +} + +TEST_CASE(hash_index_five_keys) +{ + insert_and_get_to_and_from_hash_index(5); +} + +TEST_CASE(hash_index_10_keys) +{ + insert_and_get_to_and_from_hash_index(10); +} + +TEST_CASE(hash_index_13_keys) +{ + insert_and_get_to_and_from_hash_index(13); +} + +TEST_CASE(hash_index_20_keys) +{ + insert_and_get_to_and_from_hash_index(20); +} + +TEST_CASE(hash_index_25_keys) +{ + insert_and_get_to_and_from_hash_index(25); +} + +TEST_CASE(hash_index_30_keys) +{ + insert_and_get_to_and_from_hash_index(30); +} + +TEST_CASE(hash_index_35_keys) +{ + insert_and_get_to_and_from_hash_index(35); +} + +TEST_CASE(hash_index_40_keys) +{ + insert_and_get_to_and_from_hash_index(40); +} + +TEST_CASE(hash_index_45_keys) +{ + insert_and_get_to_and_from_hash_index(45); +} + +TEST_CASE(hash_index_50_keys) +{ + insert_and_get_to_and_from_hash_index(50); +} + +void insert_into_and_scan_hash_index(int num_keys) +{ + ScopeGuard guard([]() { unlink("test.db"); }); + { + auto heap = SQL::Heap::construct("test.db"); + auto hash_index = setup_hash_index(heap); + + for (auto ix = 0; ix < num_keys; ix++) { + SQL::Key k(hash_index->descriptor()); + k[0] = keys[ix]; + k[1] = String::formatted("The key value is {} and the pointer is {}", keys[ix], pointers[ix]); + k.set_pointer(pointers[ix]); + hash_index->insert(k); + } +#ifdef LIST_HASH_INDEX + hash_index->list_hash(); +#endif + } + + { + auto heap = SQL::Heap::construct("test.db"); + auto hash_index = setup_hash_index(heap); + Vector found; + for (auto ix = 0; ix < num_keys; ix++) { + found.append(false); + } + + int count = 0; + for (auto iter = hash_index->begin(); !iter.is_end(); iter++, count++) { + auto key = (*iter); + auto key_value = (int)key[0]; + for (auto ix = 0; ix < num_keys; ix++) { + if (keys[ix] == key_value) { + EXPECT_EQ(key.pointer(), pointers[ix]); + if (found[ix]) + FAIL(String::formatted("Key {}, index {} already found previously", key_value, ix)); + found[ix] = true; + break; + } + } + } + +#ifdef LIST_HASH_INDEX + hash_index->list_hash(); +#endif + EXPECT_EQ(count, num_keys); + for (auto ix = 0; ix < num_keys; ix++) { + if (!found[ix]) + FAIL(String::formatted("Key {}, index {} not found", keys[ix], ix)); + } + } +} + +TEST_CASE(hash_index_scan_one_key) +{ + insert_into_and_scan_hash_index(1); +} + +TEST_CASE(hash_index_scan_four_keys) +{ + insert_into_and_scan_hash_index(4); +} + +TEST_CASE(hash_index_scan_five_keys) +{ + insert_into_and_scan_hash_index(5); +} + +TEST_CASE(hash_index_scan_10_keys) +{ + insert_into_and_scan_hash_index(10); +} + +TEST_CASE(hash_index_scan_15_keys) +{ + insert_into_and_scan_hash_index(15); +} + +TEST_CASE(hash_index_scan_20_keys) +{ + insert_into_and_scan_hash_index(20); +} + +TEST_CASE(hash_index_scan_30_keys) +{ + insert_into_and_scan_hash_index(30); +} + +TEST_CASE(hash_index_scan_40_keys) +{ + insert_into_and_scan_hash_index(40); +} + +TEST_CASE(hash_index_scan_50_keys) +{ + insert_into_and_scan_hash_index(50); +} diff --git a/Userland/Libraries/LibSQL/CMakeLists.txt b/Userland/Libraries/LibSQL/CMakeLists.txt index 1a726afa18..5c8ac80d43 100644 --- a/Userland/Libraries/LibSQL/CMakeLists.txt +++ b/Userland/Libraries/LibSQL/CMakeLists.txt @@ -1,6 +1,7 @@ set(SOURCES BTree.cpp BTreeIterator.cpp + HashIndex.cpp Heap.cpp Index.cpp Key.cpp diff --git a/Userland/Libraries/LibSQL/Forward.h b/Userland/Libraries/LibSQL/Forward.h index 51fa732100..4d139afb8d 100644 --- a/Userland/Libraries/LibSQL/Forward.h +++ b/Userland/Libraries/LibSQL/Forward.h @@ -35,6 +35,10 @@ class ErrorStatement; class ExistsExpression; class Expression; class GroupByClause; +class HashBucket; +class HashDirectoryNode; +class HashIndex; +class HashIndexIterator; class Heap; class InChainedExpression; class Index; diff --git a/Userland/Libraries/LibSQL/HashIndex.cpp b/Userland/Libraries/LibSQL/HashIndex.cpp new file mode 100644 index 0000000000..3d56c0b754 --- /dev/null +++ b/Userland/Libraries/LibSQL/HashIndex.cpp @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include + +namespace SQL { + +HashDirectoryNode::HashDirectoryNode(HashIndex& index, u32 node_number, size_t offset) + : IndexNode(index.node_pointer(node_number)) + , m_hash_index(index) + , m_node_number(node_number) + , m_offset(offset) +{ +} + +HashDirectoryNode::HashDirectoryNode(HashIndex& index, u32 pointer, ByteBuffer& buffer) + : IndexNode(pointer) + , m_hash_index(index) +{ + dbgln_if(SQL_DEBUG, "Deserializing Hash Directory Node"); + size_t offset = 0; + deserialize_from(buffer, offset, index.m_global_depth); + u32 size; + deserialize_from(buffer, offset, size); + dbgln_if(SQL_DEBUG, "Global Depth {}, #Bucket pointers {}", index.global_depth(), size); + u32 next_node; + deserialize_from(buffer, offset, next_node); + if (next_node) { + dbgln_if(SQL_DEBUG, "Next node {}", next_node); + m_hash_index.m_nodes.append(next_node); + } else { + dbgln_if(SQL_DEBUG, "This is the last directory node"); + m_is_last = true; + } + for (auto ix = 0u; ix < size; ix++) { + u32 bucket_pointer; + deserialize_from(buffer, offset, bucket_pointer); + u32 local_depth; + deserialize_from(buffer, offset, local_depth); + dbgln_if(SQL_DEBUG, "Bucket pointer {} local depth {}", bucket_pointer, local_depth); + index.append_bucket(ix, local_depth, bucket_pointer); + } +} + +void HashDirectoryNode::serialize(ByteBuffer& buffer) const +{ + dbgln_if(SQL_DEBUG, "Serializing directory node #{}. Offset {}", m_node_number, m_offset); + serialize_to(buffer, m_hash_index.global_depth()); + serialize_to(buffer, number_of_pointers()); + dbgln_if(SQL_DEBUG, "Global depth {}, #bucket pointers {}", m_hash_index.global_depth(), number_of_pointers()); + + u32 next_node; + if (m_node_number < (m_hash_index.m_nodes.size() - 1)) { + next_node = m_hash_index.m_nodes[m_node_number + 1]; + dbgln_if(SQL_DEBUG, "Next directory node pointer {}", next_node); + } else { + next_node = 0u; + dbgln_if(SQL_DEBUG, "This is the last directory node"); + } + + serialize_to(buffer, next_node); + for (auto ix = 0u; ix < number_of_pointers(); ix++) { + auto& bucket = m_hash_index.m_buckets[m_offset + ix]; + dbgln_if(SQL_DEBUG, "Bucket pointer {} local depth {}", bucket->pointer(), bucket->local_depth()); + serialize_to(buffer, bucket->pointer()); + serialize_to(buffer, bucket->local_depth()); + } +} + +HashBucket::HashBucket(HashIndex& hash_index, u32 index, u32 local_depth, u32 pointer) + : IndexNode(pointer) + , m_hash_index(hash_index) + , m_local_depth(local_depth) + , m_index(index) +{ +} + +void HashBucket::serialize(ByteBuffer& buffer) const +{ + dbgln_if(SQL_DEBUG, "Serializing bucket: pointer {}, index #{}, local depth {} size {}", + pointer(), index(), local_depth(), size()); + dbgln_if(SQL_DEBUG, "key_length: {} max_entries: {}", m_hash_index.descriptor().data_length(), max_entries_in_bucket()); + serialize_to(buffer, local_depth()); + serialize_to(buffer, size()); + dbgln_if(SQL_DEBUG, "buffer size after prolog {}", buffer.size()); + for (auto& key : m_entries) { + key.serialize(buffer); + dbgln_if(SQL_DEBUG, "Key {} buffer size {}", key.to_string(), buffer.size()); + } +} + +void HashBucket::inflate() +{ + if (m_inflated || !pointer()) + return; + dbgln_if(SQL_DEBUG, "Inflating Hash Bucket {}", pointer()); + auto buffer = m_hash_index.read_block(pointer()); + size_t offset = 0; + deserialize_from(buffer, offset, m_local_depth); + dbgln_if(SQL_DEBUG, "Bucket Local Depth {}", m_local_depth); + u32 size; + deserialize_from(buffer, offset, size); + dbgln_if(SQL_DEBUG, "Bucket has {} keys", size); + for (auto ix = 0u; ix < size; ix++) { + Key key(m_hash_index.descriptor(), buffer, offset); + dbgln_if(SQL_DEBUG, "Key {}: {}", ix, key.to_string()); + m_entries.append(key); + } + m_inflated = true; +} + +size_t HashBucket::max_entries_in_bucket() const +{ + auto key_size = m_hash_index.descriptor().data_length() + sizeof(u32); + return (BLOCKSIZE - 2 * sizeof(u32)) / key_size; +} + +Optional HashBucket::get(Key& key) +{ + auto optional_index = find_key_in_bucket(key); + if (optional_index.has_value()) { + auto& k = m_entries[optional_index.value()]; + key.set_pointer(k.pointer()); + return k.pointer(); + } + return {}; +} + +bool HashBucket::insert(Key const& key) +{ + inflate(); + if (find_key_in_bucket(key).has_value()) { + return false; + } + if (size() >= max_entries_in_bucket()) { + return false; + } + m_entries.append(key); + m_hash_index.add_to_write_ahead_log(this); + return true; +} + +Optional HashBucket::find_key_in_bucket(Key const& key) +{ + for (auto ix = 0u; ix < size(); ix++) { + auto& k = entries()[ix]; + if (k == key) { + return ix; + } + } + return {}; +} + +HashBucket const* HashBucket::next_bucket() +{ + for (auto ix = m_index + 1; ix < m_hash_index.size(); ix++) { + auto bucket = m_hash_index.get_bucket_by_index(ix); + bucket->inflate(); + if (bucket->size()) + return bucket; + } + return nullptr; +} + +HashBucket const* HashBucket::previous_bucket() +{ + for (auto ix = m_index - 1; ix > 0; ix--) { + auto bucket = m_hash_index.get_bucket_by_index(ix); + if (bucket->pointer()) + return bucket; + } + return nullptr; +} + +Key const& HashBucket::operator[](size_t ix) +{ + inflate(); + VERIFY(ix < size()); + return m_entries[ix]; +} + +void HashBucket::list_bucket() +{ + warnln("Bucket #{} size {} local depth {} pointer {}{}", + index(), size(), local_depth(), pointer(), (pointer() ? "" : " (VIRTUAL)")); + for (auto& key : entries()) { + warnln(" {} hash {}", key.to_string(), key.hash()); + } +} + +HashIndex::HashIndex(Heap& heap, TupleDescriptor const& descriptor, u32 first_node) + : Index(heap, descriptor, true, first_node) + , m_nodes() + , m_buckets() +{ + if (!first_node) { + set_pointer(new_record_pointer()); + } + if (this->heap().has_block(first_node)) { + u32 pointer = first_node; + do { + VERIFY(this->heap().has_block(pointer)); + auto buffer = read_block(pointer); + auto node = HashDirectoryNode(*this, pointer, buffer); + if (node.is_last()) + break; + pointer = m_nodes.last(); // FIXME Ugly + } while (pointer); + } else { + auto bucket = append_bucket(0u, 1u, new_record_pointer()); + bucket->m_inflated = true; + add_to_write_ahead_log(bucket); + bucket = append_bucket(1u, 1u, new_record_pointer()); + bucket->m_inflated = true; + add_to_write_ahead_log(bucket); + m_nodes.append(first_node); + write_directory_to_write_ahead_log(); + } +} + +HashBucket* HashIndex::get_bucket(u32 index) +{ + VERIFY(index < m_buckets.size()); + auto divisor = size() / 2; + while (!m_buckets[index]->pointer()) { + VERIFY(divisor > 1); + index = index % divisor; + divisor /= 2; + } + auto& bucket = m_buckets[index]; + return bucket; +} + +HashBucket* HashIndex::get_bucket_for_insert(Key const& key) +{ + auto key_hash = key.hash(); + + do { + auto bucket = get_bucket(key_hash % size()); + if (bucket->size() < bucket->max_entries_in_bucket()) { + return bucket; + } + + // We previously doubled the directory but the target bucket is + // still at an older depth. Create new buckets at the current global + // depth and allocate the contents of the existing buckets to the + // newly created ones: + while (bucket->local_depth() < global_depth()) { + auto base_index = bucket->index(); + auto step = 1 << (global_depth() - bucket->local_depth()); + for (auto ix = base_index + step; ix < size(); ix += step) { + auto& sub_bucket = m_buckets[ix]; + sub_bucket->set_local_depth(bucket->local_depth() + 1); + for (auto entry_index = (int)bucket->m_entries.size() - 1; entry_index >= 0; entry_index--) { + if (bucket->m_entries[entry_index].hash() % size() == ix) { + if (!sub_bucket->pointer()) { + sub_bucket->set_pointer(new_record_pointer()); + } + sub_bucket->insert(bucket->m_entries.take(entry_index)); + } + } + if (m_buckets[ix]->pointer()) + add_to_write_ahead_log(m_buckets[ix]); + } + bucket->set_local_depth(bucket->local_depth() + 1); + add_to_write_ahead_log(bucket); + write_directory_to_write_ahead_log(); + + auto bucket_after_redistribution = get_bucket(key_hash % size()); + if (bucket_after_redistribution->size() < bucket_after_redistribution->max_entries_in_bucket()) { + return bucket_after_redistribution; + } + } + expand(); + } while (true); +} + +void HashIndex::expand() +{ + auto sz = size(); + for (auto i = 0u; i < sz; i++) { + auto bucket = get_bucket(i); + bucket = append_bucket(sz + i, bucket->local_depth(), 0u); + bucket->m_inflated = true; + } + m_global_depth++; + write_directory_to_write_ahead_log(); +} + +void HashIndex::write_directory_to_write_ahead_log() +{ + auto num_nodes_required = (size() / HashDirectoryNode::max_pointers_in_node()) + 1; + while (m_nodes.size() < num_nodes_required) + m_nodes.append(new_record_pointer()); + + size_t offset = 0u; + size_t num_node = 0u; + while (offset < size()) { + HashDirectoryNode node(*this, num_node, offset); + add_to_write_ahead_log(node.as_index_node()); + offset += node.number_of_pointers(); + } +} + +HashBucket* HashIndex::append_bucket(u32 index, u32 local_depth, u32 pointer) +{ + m_buckets.append(make(*this, index, local_depth, pointer)); + return m_buckets.last(); +} + +HashBucket* HashIndex::get_bucket_by_index(u32 index) +{ + if (index >= size()) + return nullptr; + return m_buckets[index]; +} + +Optional HashIndex::get(Key& key) +{ + auto hash = key.hash(); + auto bucket_index = hash % size(); + auto bucket = get_bucket(bucket_index); + return bucket->get(key); +} + +bool HashIndex::insert(Key const& key) +{ + auto bucket = get_bucket_for_insert(key); + bucket->insert(key); + return true; +} + +HashIndexIterator HashIndex::begin() +{ + return HashIndexIterator(get_bucket(0)); +} + +HashIndexIterator HashIndex::end() +{ + return HashIndexIterator::end(); +} + +HashIndexIterator HashIndex::find(Key const& key) +{ + auto hash = key.hash(); + auto bucket_index = hash % size(); + auto bucket = get_bucket(bucket_index); + auto optional_index = bucket->find_key_in_bucket(key); + if (!optional_index.has_value()) + return end(); + return HashIndexIterator(bucket, optional_index.value()); +} + +void HashIndex::list_hash() +{ + warnln("Number of buckets: {} (Global depth {})", size(), global_depth()); + warn("Directory pointer(s): "); + for (auto ptr : m_nodes) { + warn("{}, ", ptr); + } + warnln(); + + bool first_bucket = true; + for (auto& bucket : m_buckets) { + if (first_bucket) { + warnln("Max. keys in bucket {}", bucket->max_entries_in_bucket()); + first_bucket = false; + } + bucket->list_bucket(); + } +} + +HashIndexIterator::HashIndexIterator(HashBucket const* bucket, size_t index) + : m_current(bucket) + , m_index(index) +{ + VERIFY(!m_current || !index || (index < m_current->size())); + while (m_current && (m_current->size() == 0)) { + m_current = m_current->next_bucket(); + m_index = 0; + } +} + +HashIndexIterator HashIndexIterator::next() +{ + if (is_end()) + return *this; + if (m_index < (m_current->size() - 1)) + return HashIndexIterator(m_current.ptr(), m_index + 1); + return HashIndexIterator(m_current->next_bucket()); +} + +HashIndexIterator HashIndexIterator::previous() +{ + TODO(); +} + +bool HashIndexIterator::operator==(HashIndexIterator const& other) const +{ + if (is_end()) + return other.is_end(); + if (other.is_end()) + return false; + VERIFY(&other.m_current->hash_index() == &m_current->hash_index()); + return (m_current.ptr() == other.m_current.ptr()) && (m_index == other.m_index); +} + +bool HashIndexIterator::operator==(Key const& other) const +{ + if (is_end()) + return false; + if (other.is_null()) + return false; + return (**this).compare(other); +} + +} diff --git a/Userland/Libraries/LibSQL/HashIndex.h b/Userland/Libraries/LibSQL/HashIndex.h new file mode 100644 index 0000000000..ca933a1820 --- /dev/null +++ b/Userland/Libraries/LibSQL/HashIndex.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace SQL { + +/** + * The HashIndex class is a straightforward implementation of a persisted + * extendible hash table (see + * https://en.wikipedia.org/wiki/Extendible_hashing). + */ + +class HashBucket : public IndexNode + , public Weakable { +public: + HashBucket(HashIndex&, u32 index, u32 local_depth, u32 pointer); + ~HashBucket() override = default; + Optional get(Key&); + bool insert(Key const&); + Vector const& entries() + { + inflate(); + return m_entries; + } + Key const& operator[](size_t); + Key const& operator[](size_t ix) const + { + VERIFY(ix < m_entries.size()); + return m_entries[ix]; + } + [[nodiscard]] u32 local_depth() const { return m_local_depth; } + [[nodiscard]] u32 size() { return entries().size(); } + [[nodiscard]] u32 size() const { return m_entries.size(); } + [[nodiscard]] u32 index() const { return m_index; } + void serialize(ByteBuffer&) const override; + IndexNode* as_index_node() override { return dynamic_cast(this); } + [[nodiscard]] HashIndex const& hash_index() const { return m_hash_index; } + [[nodiscard]] HashBucket const* next_bucket(); + [[nodiscard]] HashBucket const* previous_bucket(); + void list_bucket(); + +private: + Optional find_key_in_bucket(Key const&); + void set_index(u32 index) { m_index = index; } + void set_local_depth(u32 depth) { m_local_depth = depth; } + [[nodiscard]] size_t max_entries_in_bucket() const; + void inflate(); + + HashIndex& m_hash_index; + u32 m_local_depth { 1 }; + u32 m_index { 0 }; + Vector m_entries; + bool m_inflated { false }; + + friend HashIndex; +}; + +class HashIndex : public Index { + C_OBJECT(HashIndex); + +public: + ~HashIndex() override = default; + + Optional get(Key&); + bool insert(Key const&); + bool insert(Key const&& entry) { return insert(entry); } + HashIndexIterator find(Key const&); + HashIndexIterator begin(); + static HashIndexIterator end(); + + [[nodiscard]] u32 global_depth() const { return m_global_depth; } + [[nodiscard]] u32 size() const { return 1 << m_global_depth; } + [[nodiscard]] HashBucket* get_bucket(u32); + [[nodiscard]] u32 node_pointer(u32 node_number) const { return m_nodes[node_number]; } + [[nodiscard]] u32 first_node_pointer() const { return m_nodes[0]; } + [[nodiscard]] size_t nodes() const { return m_nodes.size(); } + void list_hash(); + +private: + HashIndex(Heap&, TupleDescriptor const&, u32); + + void expand(); + void write_directory_to_write_ahead_log(); + HashBucket* append_bucket(u32 index, u32 local_depth, u32 pointer); + HashBucket* get_bucket_for_insert(Key const&); + [[nodiscard]] HashBucket* get_bucket_by_index(u32 index); + + u32 m_global_depth { 1 }; + Vector m_nodes; + Vector> m_buckets; + + friend HashBucket; + friend HashDirectoryNode; +}; + +class HashDirectoryNode : public IndexNode { +public: + HashDirectoryNode(HashIndex&, u32, size_t); + HashDirectoryNode(HashIndex&, u32, ByteBuffer&); + HashDirectoryNode(HashDirectoryNode const& other) = default; + void serialize(ByteBuffer&) const override; + IndexNode* as_index_node() override { return dynamic_cast(this); } + [[nodiscard]] u32 number_of_pointers() const { return min(max_pointers_in_node(), m_hash_index.size() - m_offset); } + [[nodiscard]] bool is_last() const { return m_is_last; } + static constexpr size_t max_pointers_in_node() { return (BLOCKSIZE - 3 * sizeof(u32)) / (2 * sizeof(u32)); } + +private: + HashIndex& m_hash_index; + size_t m_node_number { 0 }; + size_t m_offset { 0 }; + bool m_is_last { false }; +}; + +class HashIndexIterator { +public: + [[nodiscard]] bool is_end() const { return !m_current; } + + bool operator==(HashIndexIterator const& other) const; + bool operator!=(HashIndexIterator const& other) const { return !(*this == other); } + bool operator==(Key const& other) const; + bool operator!=(Key const& other) const { return !(*this == other); } + + HashIndexIterator operator++() + { + *this = next(); + return *this; + } + + HashIndexIterator operator++(int) + { + *this = next(); + return *this; + } + + HashIndexIterator operator--() + { + *this = previous(); + return *this; + } + + HashIndexIterator const operator--(int) + { + *this = previous(); + return *this; + } + + Key const& operator*() const + { + VERIFY(!is_end()); + return (*m_current)[m_index]; + } + + Key const& operator->() const + { + VERIFY(!is_end()); + return (*m_current)[m_index]; + } + + HashIndexIterator& operator=(HashIndexIterator const&) = default; + HashIndexIterator(HashIndexIterator const&) = default; + +private: + HashIndexIterator() = default; + explicit HashIndexIterator(HashBucket const*, size_t key_index = 0); + static HashIndexIterator end() { return HashIndexIterator(); } + + [[nodiscard]] HashIndexIterator next(); + [[nodiscard]] HashIndexIterator previous(); + [[nodiscard]] Key key() const { return **this; } + + WeakPtr m_current; + size_t m_index { 0 }; + + friend HashIndex; +}; + +}