diff --git a/Tests/LibSQL/CMakeLists.txt b/Tests/LibSQL/CMakeLists.txt index 3b272143ff..016570b558 100644 --- a/Tests/LibSQL/CMakeLists.txt +++ b/Tests/LibSQL/CMakeLists.txt @@ -2,7 +2,6 @@ set(TEST_SOURCES TestSqlBtreeIndex.cpp TestSqlDatabase.cpp TestSqlExpressionParser.cpp - TestSqlHashIndex.cpp TestSqlHeap.cpp TestSqlStatementExecution.cpp TestSqlStatementParser.cpp diff --git a/Tests/LibSQL/TestSqlHashIndex.cpp b/Tests/LibSQL/TestSqlHashIndex.cpp deleted file mode 100644 index c6fa210eb2..0000000000 --- a/Tests/LibSQL/TestSqlHashIndex.cpp +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Copyright (c) 2021, Jan de Visser - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include -#include -#include -#include -#include - -constexpr static int keys[] = { - 39, - 87, - 77, - 42, - 98, - 40, - 53, - 8, - 37, - 12, - 90, - 72, - 73, - 11, - 88, - 22, - 10, - 82, - 25, - 61, - 97, - 18, - 60, - 68, - 21, - 3, - 58, - 29, - 13, - 17, - 89, - 81, - 16, - 64, - 5, - 41, - 36, - 91, - 38, - 24, - 32, - 50, - 34, - 94, - 49, - 47, - 1, - 6, - 44, - 76, -}; -constexpr static u32 pointers[] = { - 92, - 4, - 50, - 47, - 68, - 73, - 24, - 28, - 50, - 93, - 60, - 36, - 92, - 72, - 53, - 26, - 91, - 84, - 25, - 43, - 88, - 12, - 62, - 35, - 96, - 27, - 96, - 27, - 99, - 30, - 21, - 89, - 54, - 60, - 37, - 68, - 35, - 55, - 80, - 2, - 33, - 26, - 93, - 70, - 45, - 44, - 3, - 66, - 75, - 4, -}; - -NonnullRefPtr setup_hash_index(SQL::Serializer&); -void insert_and_get_to_and_from_hash_index(int); -void insert_into_and_scan_hash_index(int); - -NonnullRefPtr setup_hash_index(SQL::Serializer& serializer) -{ - NonnullRefPtr tuple_descriptor = adopt_ref(*new SQL::TupleDescriptor); - tuple_descriptor->append({ "schema", "table", "key_value", SQL::SQLType::Integer, SQL::Order::Ascending }); - tuple_descriptor->append({ "schema", "table", "text_value", SQL::SQLType::Text, SQL::Order::Ascending }); - - auto directory_block_index = serializer.heap().user_value(0); - if (!directory_block_index) { - directory_block_index = serializer.heap().request_new_block_index(); - serializer.heap().set_user_value(0, directory_block_index); - } - auto hash_index = SQL::HashIndex::construct(serializer, tuple_descriptor, directory_block_index); - return hash_index; -} - -void insert_and_get_to_and_from_hash_index(int num_keys) -{ - ScopeGuard guard([]() { unlink("/tmp/test.db"); }); - { - auto heap = MUST(SQL::Heap::create("/tmp/test.db")); - TRY_OR_FAIL(heap->open()); - SQL::Serializer serializer(heap); - auto hash_index = setup_hash_index(serializer); - - for (auto ix = 0; ix < num_keys; ix++) { - SQL::Key k(hash_index->descriptor()); - k[0] = keys[ix]; - k[1] = DeprecatedString::formatted("The key value is {} and the pointer is {}", keys[ix], pointers[ix]); - k.set_block_index(pointers[ix]); - hash_index->insert(k); - } -#ifdef LIST_HASH_INDEX - hash_index->list_hash(); -#endif - } - - { - auto heap = MUST(SQL::Heap::create("/tmp/test.db")); - TRY_OR_FAIL(heap->open()); - SQL::Serializer serializer(heap); - auto hash_index = setup_hash_index(serializer); - - for (auto ix = 0; ix < num_keys; ix++) { - SQL::Key k(hash_index->descriptor()); - k[0] = keys[ix]; - k[1] = DeprecatedString::formatted("The key value is {} and the pointer is {}", keys[ix], pointers[ix]); - auto pointer_opt = hash_index->get(k); - VERIFY(pointer_opt.has_value()); - EXPECT_EQ(pointer_opt.value(), pointers[ix]); - } - } -} - -TEST_CASE(hash_index_one_key) -{ - insert_and_get_to_and_from_hash_index(1); -} - -TEST_CASE(hash_index_four_keys) -{ - insert_and_get_to_and_from_hash_index(4); -} - -TEST_CASE(hash_index_five_keys) -{ - insert_and_get_to_and_from_hash_index(5); -} - -TEST_CASE(hash_index_10_keys) -{ - insert_and_get_to_and_from_hash_index(10); -} - -TEST_CASE(hash_index_13_keys) -{ - insert_and_get_to_and_from_hash_index(13); -} - -TEST_CASE(hash_index_20_keys) -{ - insert_and_get_to_and_from_hash_index(20); -} - -TEST_CASE(hash_index_25_keys) -{ - insert_and_get_to_and_from_hash_index(25); -} - -TEST_CASE(hash_index_30_keys) -{ - insert_and_get_to_and_from_hash_index(30); -} - -TEST_CASE(hash_index_35_keys) -{ - insert_and_get_to_and_from_hash_index(35); -} - -TEST_CASE(hash_index_40_keys) -{ - insert_and_get_to_and_from_hash_index(40); -} - -TEST_CASE(hash_index_45_keys) -{ - insert_and_get_to_and_from_hash_index(45); -} - -TEST_CASE(hash_index_50_keys) -{ - insert_and_get_to_and_from_hash_index(50); -} - -void insert_into_and_scan_hash_index(int num_keys) -{ - ScopeGuard guard([]() { unlink("/tmp/test.db"); }); - { - auto heap = MUST(SQL::Heap::create("/tmp/test.db")); - TRY_OR_FAIL(heap->open()); - SQL::Serializer serializer(heap); - auto hash_index = setup_hash_index(serializer); - - for (auto ix = 0; ix < num_keys; ix++) { - SQL::Key k(hash_index->descriptor()); - k[0] = keys[ix]; - k[1] = DeprecatedString::formatted("The key value is {} and the pointer is {}", keys[ix], pointers[ix]); - k.set_block_index(pointers[ix]); - hash_index->insert(k); - } -#ifdef LIST_HASH_INDEX - hash_index->list_hash(); -#endif - } - - { - auto heap = MUST(SQL::Heap::create("/tmp/test.db")); - TRY_OR_FAIL(heap->open()); - SQL::Serializer serializer(heap); - auto hash_index = setup_hash_index(serializer); - Vector found; - for (auto ix = 0; ix < num_keys; ix++) { - found.append(false); - } - - int count = 0; - for (auto iter = hash_index->begin(); !iter.is_end(); iter++, count++) { - auto key = (*iter); - auto key_value = key[0].to_int(); - VERIFY(key_value.has_value()); - - for (auto ix = 0; ix < num_keys; ix++) { - if (keys[ix] == key_value) { - EXPECT_EQ(key.block_index(), pointers[ix]); - if (found[ix]) - FAIL(DeprecatedString::formatted("Key {}, index {} already found previously", *key_value, ix)); - found[ix] = true; - break; - } - } - } - -#ifdef LIST_HASH_INDEX - hash_index->list_hash(); -#endif - EXPECT_EQ(count, num_keys); - for (auto ix = 0; ix < num_keys; ix++) { - if (!found[ix]) - FAIL(DeprecatedString::formatted("Key {}, index {} not found", keys[ix], ix)); - } - } -} - -TEST_CASE(hash_index_scan_one_key) -{ - insert_into_and_scan_hash_index(1); -} - -TEST_CASE(hash_index_scan_four_keys) -{ - insert_into_and_scan_hash_index(4); -} - -TEST_CASE(hash_index_scan_five_keys) -{ - insert_into_and_scan_hash_index(5); -} - -TEST_CASE(hash_index_scan_10_keys) -{ - insert_into_and_scan_hash_index(10); -} - -TEST_CASE(hash_index_scan_15_keys) -{ - insert_into_and_scan_hash_index(15); -} - -TEST_CASE(hash_index_scan_20_keys) -{ - insert_into_and_scan_hash_index(20); -} - -TEST_CASE(hash_index_scan_30_keys) -{ - insert_into_and_scan_hash_index(30); -} - -TEST_CASE(hash_index_scan_40_keys) -{ - insert_into_and_scan_hash_index(40); -} - -TEST_CASE(hash_index_scan_50_keys) -{ - insert_into_and_scan_hash_index(50); -} diff --git a/Userland/Libraries/LibSQL/CMakeLists.txt b/Userland/Libraries/LibSQL/CMakeLists.txt index b782d824b4..43001cb3bd 100644 --- a/Userland/Libraries/LibSQL/CMakeLists.txt +++ b/Userland/Libraries/LibSQL/CMakeLists.txt @@ -15,7 +15,6 @@ set(SOURCES BTree.cpp BTreeIterator.cpp Database.cpp - HashIndex.cpp Heap.cpp Index.cpp Key.cpp diff --git a/Userland/Libraries/LibSQL/Forward.h b/Userland/Libraries/LibSQL/Forward.h index 0ae3ebba8f..b38e333686 100644 --- a/Userland/Libraries/LibSQL/Forward.h +++ b/Userland/Libraries/LibSQL/Forward.h @@ -11,10 +11,6 @@ class BTree; class BTreeIterator; class ColumnDef; class Database; -class HashBucket; -class HashDirectoryNode; -class HashIndex; -class HashIndexIterator; class Heap; class Index; class IndexNode; diff --git a/Userland/Libraries/LibSQL/HashIndex.cpp b/Userland/Libraries/LibSQL/HashIndex.cpp deleted file mode 100644 index 1d3f9ee0c7..0000000000 --- a/Userland/Libraries/LibSQL/HashIndex.cpp +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright (c) 2021, Jan de Visser - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include -#include - -namespace SQL { - -HashDirectoryNode::HashDirectoryNode(HashIndex& index, u32 node_number, size_t offset) - : IndexNode(index.node_pointer(node_number)) - , m_hash_index(index) - , m_node_number(node_number) - , m_offset(offset) -{ -} - -HashDirectoryNode::HashDirectoryNode(HashIndex& index, u32 pointer) - : IndexNode(pointer) - , m_hash_index(index) -{ -} - -void HashDirectoryNode::deserialize(Serializer& serializer) -{ - dbgln_if(SQL_DEBUG, "Deserializing Hash Directory Node"); - m_hash_index.m_global_depth = serializer.deserialize(); - auto size = serializer.deserialize(); - dbgln_if(SQL_DEBUG, "Global Depth {}, #Bucket pointers {}", m_hash_index.global_depth(), size); - auto next_node = serializer.deserialize(); - if (next_node) { - dbgln_if(SQL_DEBUG, "Next node {}", next_node); - m_hash_index.m_nodes.append(next_node); - } else { - dbgln_if(SQL_DEBUG, "This is the last directory node"); - m_is_last = true; - } - for (auto ix = 0u; ix < size; ix++) { - auto bucket_pointer = serializer.deserialize(); - auto local_depth = serializer.deserialize(); - dbgln_if(SQL_DEBUG, "--Index {} bucket pointer {} local depth {}", ix, bucket_pointer, local_depth); - m_hash_index.append_bucket(ix, local_depth, bucket_pointer); - } -} - -void HashDirectoryNode::serialize(Serializer& serializer) const -{ - dbgln_if(SQL_DEBUG, "Serializing directory node #{}. Offset {}", m_node_number, m_offset); - serializer.serialize((u32)m_hash_index.global_depth()); - serializer.serialize(number_of_pointers()); - dbgln_if(SQL_DEBUG, "Global depth {}, #bucket pointers {}", m_hash_index.global_depth(), number_of_pointers()); - - u32 next_node; - if (m_node_number < (m_hash_index.m_nodes.size() - 1)) { - next_node = m_hash_index.m_nodes[m_node_number + 1]; - dbgln_if(SQL_DEBUG, "Next directory node pointer {}", next_node); - } else { - next_node = 0u; - dbgln_if(SQL_DEBUG, "This is the last directory node"); - } - - serializer.serialize(next_node); - for (auto ix = 0u; ix < number_of_pointers(); ix++) { - auto& bucket = m_hash_index.m_buckets[m_offset + ix]; - dbgln_if(SQL_DEBUG, "Bucket index #{} block_index {} local depth {} size {}", ix, bucket->block_index(), bucket->local_depth(), bucket->size()); - serializer.serialize(bucket->block_index()); - serializer.serialize(bucket->local_depth()); - } -} - -HashBucket::HashBucket(HashIndex& hash_index, Block::Index index, u32 local_depth, Block::Index pointer) - : IndexNode(pointer) - , m_hash_index(hash_index) - , m_local_depth(local_depth) - , m_index(index) -{ -} - -void HashBucket::serialize(Serializer& serializer) const -{ - dbgln_if(SQL_DEBUG, "Serializing bucket: block_index {}, index #{}, local depth {} size {}", - block_index(), index(), local_depth(), size()); - serializer.serialize(local_depth()); - serializer.serialize(size()); - for (auto& key : m_entries) - serializer.serialize(key); -} - -void HashBucket::deserialize(Serializer& serializer) -{ - if (m_inflated || !block_index()) - return; - dbgln_if(SQL_DEBUG, "Inflating Hash Bucket {}", block_index()); - m_local_depth = serializer.deserialize(); - dbgln_if(SQL_DEBUG, "Bucket Local Depth {}", m_local_depth); - auto size = serializer.deserialize(); - dbgln_if(SQL_DEBUG, "Bucket has {} keys", size); - for (auto ix = 0u; ix < size; ix++) { - auto key = serializer.deserialize(m_hash_index.descriptor()); - dbgln_if(SQL_DEBUG, "Key {}: {}", ix, key.to_deprecated_string()); - m_entries.append(key); - } - m_inflated = true; -} - -size_t HashBucket::length() const -{ - size_t len = 2 * sizeof(u32); - for (auto& key : m_entries) - len += key.length(); - return len; -} - -Optional HashBucket::get(Key& key) -{ - auto optional_index = find_key_in_bucket(key); - if (optional_index.has_value()) { - auto& k = m_entries[optional_index.value()]; - key.set_block_index(k.block_index()); - return k.block_index(); - } - return {}; -} - -bool HashBucket::insert(Key const& key) -{ - if (!m_inflated) - m_hash_index.serializer().deserialize_block_to(block_index(), *this); - if (find_key_in_bucket(key).has_value()) - return false; - if (length() + key.length() > Block::DATA_SIZE) { - dbgln_if(SQL_DEBUG, "Adding key {} would make length exceed block size", key.to_deprecated_string()); - return false; - } - m_entries.append(key); - m_hash_index.serializer().serialize_and_write(*this); - return true; -} - -Optional HashBucket::find_key_in_bucket(Key const& key) -{ - for (auto ix = 0u; ix < size(); ix++) { - auto& k = entries()[ix]; - if (k == key) - return ix; - } - return {}; -} - -HashBucket const* HashBucket::next_bucket() -{ - for (auto ix = m_index + 1; ix < m_hash_index.size(); ix++) { - auto bucket = m_hash_index.get_bucket_by_index(ix); - m_hash_index.serializer().deserialize_block_to(bucket->block_index(), *bucket); - if (bucket->size()) - return bucket; - } - return nullptr; -} - -HashBucket const* HashBucket::previous_bucket() -{ - for (auto ix = m_index - 1; ix > 0; ix--) { - auto bucket = m_hash_index.get_bucket_by_index(ix); - if (bucket->block_index() > 0) - return bucket; - } - return nullptr; -} - -Vector const& HashBucket::entries() -{ - if (!m_inflated) - m_hash_index.serializer().deserialize_block_to(block_index(), *this); - return m_entries; -} - -Key const& HashBucket::operator[](size_t ix) -{ - if (!m_inflated) - m_hash_index.serializer().deserialize_block_to(block_index(), *this); - return m_entries[ix]; -} - -Key const& HashBucket::operator[](size_t ix) const -{ - return m_entries[ix]; -} - -void HashBucket::list_bucket() -{ - warnln("Bucket #{} size {} local depth {} block_index {}{}", - index(), size(), local_depth(), block_index(), (block_index() > 0 ? "" : " (VIRTUAL)")); - for (auto& key : entries()) - warnln(" {} hash {}", key.to_deprecated_string(), key.hash()); -} - -HashIndex::HashIndex(Serializer& serializer, NonnullRefPtr const& descriptor, Block::Index first_node) - : Index(serializer, descriptor, true, first_node) -{ - if (first_node == 0) - set_block_index(request_new_block_index()); - if (serializer.has_block(first_node)) { - Block::Index block_index = first_node; - do { - VERIFY(serializer.has_block(block_index)); - auto node = serializer.deserialize_block(block_index, *this, block_index); - if (node.is_last()) - break; - block_index = m_nodes.last(); // FIXME Ugly - } while (block_index); - } else { - auto bucket = append_bucket(0u, 1u, request_new_block_index()); - bucket->m_inflated = true; - serializer.serialize_and_write(*bucket); - bucket = append_bucket(1u, 1u, request_new_block_index()); - bucket->m_inflated = true; - serializer.serialize_and_write(*bucket); - m_nodes.append(first_node); - write_directory(); - } -} - -HashBucket* HashIndex::get_bucket(u32 index) -{ - VERIFY(index < m_buckets.size()); - auto divisor = size() / 2; - while (m_buckets[index]->block_index() == 0) { - VERIFY(divisor > 1); - index = index % divisor; - divisor /= 2; - } - auto& bucket = m_buckets[index]; - return bucket; -} - -HashBucket* HashIndex::get_bucket_for_insert(Key const& key) -{ - auto key_hash = key.hash(); - - do { - dbgln_if(SQL_DEBUG, "HashIndex::get_bucket_for_insert({}) bucket {} of {}", key.to_deprecated_string(), key_hash % size(), size()); - auto bucket = get_bucket(key_hash % size()); - if (bucket->length() + key.length() < Block::DATA_SIZE) - return bucket; - dbgln_if(SQL_DEBUG, "Bucket is full (bucket size {}/length {} key length {}). Expanding directory", bucket->size(), bucket->length(), key.length()); - - // We previously doubled the directory but the target bucket is - // still at an older depth. Create new buckets at the current global - // depth and allocate the contents of the existing buckets to the - // newly created ones: - while (bucket->local_depth() < global_depth()) { - auto base_index = bucket->index(); - auto step = 1 << (global_depth() - bucket->local_depth()); - auto total_moved = 0; - for (auto ix = base_index + step; ix < size(); ix += step) { - auto& sub_bucket = m_buckets[ix]; - sub_bucket->set_local_depth(bucket->local_depth() + 1); - auto moved = 0; - for (auto entry_index = (int)bucket->m_entries.size() - 1; entry_index >= 0; entry_index--) { - if (bucket->m_entries[entry_index].hash() % size() == ix) { - if (!sub_bucket->block_index()) - sub_bucket->set_block_index(request_new_block_index()); - sub_bucket->insert(bucket->m_entries.take(entry_index)); - moved++; - } - } - if (moved > 0) { - dbgln_if(SQL_DEBUG, "Moved {} entries from bucket #{} to #{}", moved, base_index, ix); - serializer().serialize_and_write(*sub_bucket); - } - total_moved += moved; - } - if (total_moved) - dbgln_if(SQL_DEBUG, "Redistributed {} entries from bucket #{}", total_moved, base_index); - else - dbgln_if(SQL_DEBUG, "Nothing redistributed from bucket #{}", base_index); - bucket->set_local_depth(bucket->local_depth() + 1); - serializer().serialize_and_write(*bucket); - write_directory(); - - auto bucket_after_redistribution = get_bucket(key_hash % size()); - if (bucket_after_redistribution->length() + key.length() < Block::DATA_SIZE) - return bucket_after_redistribution; - } - expand(); - } while (true); - VERIFY_NOT_REACHED(); -} - -void HashIndex::expand() -{ - auto sz = size(); - dbgln_if(SQL_DEBUG, "Expanding directory from {} to {} buckets", sz, 2 * sz); - for (auto i = 0u; i < sz; i++) { - auto bucket = get_bucket(i); - bucket = append_bucket(sz + i, bucket->local_depth(), 0u); - bucket->m_inflated = true; - } - m_global_depth++; - write_directory(); -} - -void HashIndex::write_directory() -{ - auto num_nodes_required = (size() / HashDirectoryNode::max_pointers_in_node()) + 1; - while (m_nodes.size() < num_nodes_required) - m_nodes.append(request_new_block_index()); - - size_t offset = 0u; - size_t num_node = 0u; - while (offset < size()) { - HashDirectoryNode node(*this, num_node, offset); - serializer().serialize_and_write(node); - offset += node.number_of_pointers(); - } -} - -HashBucket* HashIndex::append_bucket(u32 index, u32 local_depth, u32 pointer) -{ - m_buckets.append(make(*this, index, local_depth, pointer)); - return m_buckets.last(); -} - -HashBucket* HashIndex::get_bucket_by_index(u32 index) -{ - if (index >= size()) - return nullptr; - return m_buckets[index]; -} - -Optional HashIndex::get(Key& key) -{ - auto hash = key.hash(); - auto bucket_index = hash % size(); - dbgln_if(SQL_DEBUG, "HashIndex::get({}) bucket_index {}", key.to_deprecated_string(), bucket_index); - auto bucket = get_bucket(bucket_index); - if constexpr (SQL_DEBUG) - bucket->list_bucket(); - return bucket->get(key); -} - -bool HashIndex::insert(Key const& key) -{ - dbgln_if(SQL_DEBUG, "HashIndex::insert({})", key.to_deprecated_string()); - auto bucket = get_bucket_for_insert(key); - bucket->insert(key); - if constexpr (SQL_DEBUG) - bucket->list_bucket(); - return true; -} - -HashIndexIterator HashIndex::begin() -{ - return HashIndexIterator(get_bucket(0)); -} - -HashIndexIterator HashIndex::end() -{ - return HashIndexIterator::end(); -} - -HashIndexIterator HashIndex::find(Key const& key) -{ - auto hash = key.hash(); - auto bucket_index = hash % size(); - auto bucket = get_bucket(bucket_index); - auto optional_index = bucket->find_key_in_bucket(key); - if (!optional_index.has_value()) - return end(); - return HashIndexIterator(bucket, optional_index.value()); -} - -void HashIndex::list_hash() -{ - warnln("Number of buckets: {} (Global depth {})", size(), global_depth()); - warn("Directory pointer(s): "); - for (auto ptr : m_nodes) - warn("{}, ", ptr); - warnln(); - - for (auto& bucket : m_buckets) - bucket->list_bucket(); -} - -HashIndexIterator::HashIndexIterator(HashBucket const* bucket, size_t index) - : m_current(bucket) - , m_index(index) -{ - VERIFY(!m_current || !index || (index < m_current->size())); - while (m_current && (m_current->size() == 0)) { - m_current = m_current->next_bucket(); - m_index = 0; - } -} - -HashIndexIterator HashIndexIterator::next() -{ - if (is_end()) - return *this; - if (m_index < (m_current->size() - 1)) - return HashIndexIterator(m_current.ptr(), m_index + 1); - return HashIndexIterator(m_current->next_bucket()); -} - -HashIndexIterator HashIndexIterator::previous() -{ - TODO(); -} - -bool HashIndexIterator::operator==(HashIndexIterator const& other) const -{ - if (is_end()) - return other.is_end(); - if (other.is_end()) - return false; - VERIFY(&other.m_current->hash_index() == &m_current->hash_index()); - return (m_current.ptr() == other.m_current.ptr()) && (m_index == other.m_index); -} - -bool HashIndexIterator::operator==(Key const& other) const -{ - if (is_end()) - return false; - if (other.is_null()) - return false; - return (**this).compare(other); -} - -} diff --git a/Userland/Libraries/LibSQL/HashIndex.h b/Userland/Libraries/LibSQL/HashIndex.h deleted file mode 100644 index 86eb2afd03..0000000000 --- a/Userland/Libraries/LibSQL/HashIndex.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2021, Jan de Visser - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace SQL { - -/** - * The HashIndex class is a straightforward implementation of a persisted - * extendible hash table (see - * https://en.wikipedia.org/wiki/Extendible_hashing). - */ - -class HashBucket : public IndexNode - , public Weakable { -public: - HashBucket(HashIndex&, Block::Index index, u32 local_depth, Block::Index pointer); - ~HashBucket() override = default; - Optional get(Key&); - bool insert(Key const&); - Vector const& entries(); - Key const& operator[](size_t); - Key const& operator[](size_t ix) const; - [[nodiscard]] u32 local_depth() const { return m_local_depth; } - [[nodiscard]] u32 size() { return entries().size(); } - [[nodiscard]] size_t length() const; - [[nodiscard]] u32 size() const { return m_entries.size(); } - [[nodiscard]] Block::Index index() const { return m_index; } - void serialize(Serializer&) const; - void deserialize(Serializer&); - [[nodiscard]] HashIndex const& hash_index() const { return m_hash_index; } - [[nodiscard]] HashBucket const* next_bucket(); - [[nodiscard]] HashBucket const* previous_bucket(); - void list_bucket(); - -private: - Optional find_key_in_bucket(Key const&); - void set_index(Block::Index index) { m_index = index; } - void set_local_depth(u32 depth) { m_local_depth = depth; } - - HashIndex& m_hash_index; - u32 m_local_depth { 1 }; - Block::Index m_index { 0 }; - Vector m_entries; - bool m_inflated { false }; - - friend HashIndex; -}; - -class HashIndex : public Index { - C_OBJECT(HashIndex); - -public: - ~HashIndex() override = default; - - Optional get(Key&); - bool insert(Key const&); - bool insert(Key const&& entry) { return insert(entry); } - HashIndexIterator find(Key const&); - HashIndexIterator begin(); - static HashIndexIterator end(); - - [[nodiscard]] u32 global_depth() const { return m_global_depth; } - [[nodiscard]] u32 size() const { return 1 << m_global_depth; } - [[nodiscard]] HashBucket* get_bucket(u32); - [[nodiscard]] u32 node_pointer(u32 node_number) const { return m_nodes[node_number]; } - [[nodiscard]] u32 first_node_pointer() const { return m_nodes[0]; } - [[nodiscard]] size_t nodes() const { return m_nodes.size(); } - void list_hash(); - -private: - HashIndex(Serializer&, NonnullRefPtr const&, Block::Index); - - void expand(); - void write_directory(); - HashBucket* append_bucket(u32 index, u32 local_depth, u32 pointer); - HashBucket* get_bucket_for_insert(Key const&); - [[nodiscard]] HashBucket* get_bucket_by_index(u32 index); - - u32 m_global_depth { 1 }; - Vector m_nodes; - Vector> m_buckets; - - friend HashBucket; - friend HashDirectoryNode; -}; - -class HashDirectoryNode : public IndexNode { -public: - HashDirectoryNode(HashIndex&, u32, size_t); - HashDirectoryNode(HashIndex&, u32); - HashDirectoryNode(HashDirectoryNode const& other) = default; - void deserialize(Serializer&); - void serialize(Serializer&) const; - [[nodiscard]] u32 number_of_pointers() const { return min(max_pointers_in_node(), m_hash_index.size() - m_offset); } - [[nodiscard]] bool is_last() const { return m_is_last; } - static constexpr size_t max_pointers_in_node() { return (Block::DATA_SIZE - 3 * sizeof(u32)) / (2 * sizeof(u32)); } - -private: - HashIndex& m_hash_index; - size_t m_node_number { 0 }; - size_t m_offset { 0 }; - bool m_is_last { false }; -}; - -class HashIndexIterator { -public: - [[nodiscard]] bool is_end() const { return !m_current; } - - bool operator==(HashIndexIterator const& other) const; - bool operator==(Key const& other) const; - - HashIndexIterator operator++() - { - *this = next(); - return *this; - } - - HashIndexIterator operator++(int) - { - *this = next(); - return *this; - } - - HashIndexIterator operator--() - { - *this = previous(); - return *this; - } - - HashIndexIterator const operator--(int) - { - *this = previous(); - return *this; - } - - Key const& operator*() const - { - VERIFY(!is_end()); - return (*m_current)[m_index]; - } - - Key const& operator->() const - { - VERIFY(!is_end()); - return (*m_current)[m_index]; - } - - HashIndexIterator& operator=(HashIndexIterator const&) = default; - HashIndexIterator(HashIndexIterator const&) = default; - -private: - HashIndexIterator() = default; - explicit HashIndexIterator(HashBucket const*, size_t key_index = 0); - static HashIndexIterator end() { return HashIndexIterator(); } - - [[nodiscard]] HashIndexIterator next(); - [[nodiscard]] HashIndexIterator previous(); - [[nodiscard]] Key key() const { return **this; } - - WeakPtr m_current; - size_t m_index { 0 }; - - friend HashIndex; -}; - -}