LibSQL: Remove unused HashIndex and related classes

The features provided by these classes should be used eventually, but so far we've been maintaining these classes for over 2 years without any actual use. We can restore them when it comes time to actually use them.
2025-07-27 17:27:35 +00:00 · 2023-08-07 11:27:12 -04:00 · 2023-08-07 11:27:12 -04:00 · 1b40bf9783
commit 1b40bf9783
parent 1151ba333a
6 changed files with 0 additions and 956 deletions
--- a/Userland/Libraries/LibSQL/CMakeLists.txt
+++ b/Userland/Libraries/LibSQL/CMakeLists.txt
@ -15,7 +15,6 @@ set(SOURCES
    BTree.cpp
    BTreeIterator.cpp
    Database.cpp
-    HashIndex.cpp
    Heap.cpp
    Index.cpp
    Key.cpp
--- a/Userland/Libraries/LibSQL/Forward.h
+++ b/Userland/Libraries/LibSQL/Forward.h
@ -11,10 +11,6 @@ class BTree;
 class BTreeIterator;
 class ColumnDef;
 class Database;
-class HashBucket;
-class HashDirectoryNode;
-class HashIndex;
-class HashIndexIterator;
 class Heap;
 class Index;
 class IndexNode;
--- a/Userland/Libraries/LibSQL/HashIndex.cpp
+++ b/Userland/Libraries/LibSQL/HashIndex.cpp
@ -1,434 +0,0 @@
-/*
- * Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
-
-#include <LibSQL/HashIndex.h>
-#include <LibSQL/Heap.h>
-#include <LibSQL/Key.h>
-#include <LibSQL/Serializer.h>
-
-namespace SQL {
-
-HashDirectoryNode::HashDirectoryNode(HashIndex& index, u32 node_number, size_t offset)
-    : IndexNode(index.node_pointer(node_number))
-    , m_hash_index(index)
-    , m_node_number(node_number)
-    , m_offset(offset)
-{
-}
-
-HashDirectoryNode::HashDirectoryNode(HashIndex& index, u32 pointer)
-    : IndexNode(pointer)
-    , m_hash_index(index)
-{
-}
-
-void HashDirectoryNode::deserialize(Serializer& serializer)
-{
-    dbgln_if(SQL_DEBUG, "Deserializing Hash Directory Node");
-    m_hash_index.m_global_depth = serializer.deserialize<u32>();
-    auto size = serializer.deserialize<u32>();
-    dbgln_if(SQL_DEBUG, "Global Depth {}, #Bucket pointers {}", m_hash_index.global_depth(), size);
-    auto next_node = serializer.deserialize<u32>();
-    if (next_node) {
-        dbgln_if(SQL_DEBUG, "Next node {}", next_node);
-        m_hash_index.m_nodes.append(next_node);
-    } else {
-        dbgln_if(SQL_DEBUG, "This is the last directory node");
-        m_is_last = true;
-    }
-    for (auto ix = 0u; ix < size; ix++) {
-        auto bucket_pointer = serializer.deserialize<u32>();
-        auto local_depth = serializer.deserialize<u32>();
-        dbgln_if(SQL_DEBUG, "--Index {} bucket pointer {} local depth {}", ix, bucket_pointer, local_depth);
-        m_hash_index.append_bucket(ix, local_depth, bucket_pointer);
-    }
-}
-
-void HashDirectoryNode::serialize(Serializer& serializer) const
-{
-    dbgln_if(SQL_DEBUG, "Serializing directory node #{}. Offset {}", m_node_number, m_offset);
-    serializer.serialize<u32>((u32)m_hash_index.global_depth());
-    serializer.serialize<u32>(number_of_pointers());
-    dbgln_if(SQL_DEBUG, "Global depth {}, #bucket pointers {}", m_hash_index.global_depth(), number_of_pointers());
-
-    u32 next_node;
-    if (m_node_number < (m_hash_index.m_nodes.size() - 1)) {
-        next_node = m_hash_index.m_nodes[m_node_number + 1];
-        dbgln_if(SQL_DEBUG, "Next directory node pointer {}", next_node);
-    } else {
-        next_node = 0u;
-        dbgln_if(SQL_DEBUG, "This is the last directory node");
-    }
-
-    serializer.serialize<u32>(next_node);
-    for (auto ix = 0u; ix < number_of_pointers(); ix++) {
-        auto& bucket = m_hash_index.m_buckets[m_offset + ix];
-        dbgln_if(SQL_DEBUG, "Bucket index #{} block_index {} local depth {} size {}", ix, bucket->block_index(), bucket->local_depth(), bucket->size());
-        serializer.serialize<u32>(bucket->block_index());
-        serializer.serialize<u32>(bucket->local_depth());
-    }
-}
-
-HashBucket::HashBucket(HashIndex& hash_index, Block::Index index, u32 local_depth, Block::Index pointer)
-    : IndexNode(pointer)
-    , m_hash_index(hash_index)
-    , m_local_depth(local_depth)
-    , m_index(index)
-{
-}
-
-void HashBucket::serialize(Serializer& serializer) const
-{
-    dbgln_if(SQL_DEBUG, "Serializing bucket: block_index {}, index #{}, local depth {} size {}",
-        block_index(), index(), local_depth(), size());
-    serializer.serialize<u32>(local_depth());
-    serializer.serialize<u32>(size());
-    for (auto& key : m_entries)
-        serializer.serialize<Key>(key);
-}
-
-void HashBucket::deserialize(Serializer& serializer)
-{
-    if (m_inflated || !block_index())
-        return;
-    dbgln_if(SQL_DEBUG, "Inflating Hash Bucket {}", block_index());
-    m_local_depth = serializer.deserialize<u32>();
-    dbgln_if(SQL_DEBUG, "Bucket Local Depth {}", m_local_depth);
-    auto size = serializer.deserialize<u32>();
-    dbgln_if(SQL_DEBUG, "Bucket has {} keys", size);
-    for (auto ix = 0u; ix < size; ix++) {
-        auto key = serializer.deserialize<Key>(m_hash_index.descriptor());
-        dbgln_if(SQL_DEBUG, "Key {}: {}", ix, key.to_deprecated_string());
-        m_entries.append(key);
-    }
-    m_inflated = true;
-}
-
-size_t HashBucket::length() const
-{
-    size_t len = 2 * sizeof(u32);
-    for (auto& key : m_entries)
-        len += key.length();
-    return len;
-}
-
-Optional<u32> HashBucket::get(Key& key)
-{
-    auto optional_index = find_key_in_bucket(key);
-    if (optional_index.has_value()) {
-        auto& k = m_entries[optional_index.value()];
-        key.set_block_index(k.block_index());
-        return k.block_index();
-    }
-    return {};
-}
-
-bool HashBucket::insert(Key const& key)
-{
-    if (!m_inflated)
-        m_hash_index.serializer().deserialize_block_to(block_index(), *this);
-    if (find_key_in_bucket(key).has_value())
-        return false;
-    if (length() + key.length() > Block::DATA_SIZE) {
-        dbgln_if(SQL_DEBUG, "Adding key {} would make length exceed block size", key.to_deprecated_string());
-        return false;
-    }
-    m_entries.append(key);
-    m_hash_index.serializer().serialize_and_write(*this);
-    return true;
-}
-
-Optional<size_t> HashBucket::find_key_in_bucket(Key const& key)
-{
-    for (auto ix = 0u; ix < size(); ix++) {
-        auto& k = entries()[ix];
-        if (k == key)
-            return ix;
-    }
-    return {};
-}
-
-HashBucket const* HashBucket::next_bucket()
-{
-    for (auto ix = m_index + 1; ix < m_hash_index.size(); ix++) {
-        auto bucket = m_hash_index.get_bucket_by_index(ix);
-        m_hash_index.serializer().deserialize_block_to<HashBucket>(bucket->block_index(), *bucket);
-        if (bucket->size())
-            return bucket;
-    }
-    return nullptr;
-}
-
-HashBucket const* HashBucket::previous_bucket()
-{
-    for (auto ix = m_index - 1; ix > 0; ix--) {
-        auto bucket = m_hash_index.get_bucket_by_index(ix);
-        if (bucket->block_index() > 0)
-            return bucket;
-    }
-    return nullptr;
-}
-
-Vector<Key> const& HashBucket::entries()
-{
-    if (!m_inflated)
-        m_hash_index.serializer().deserialize_block_to(block_index(), *this);
-    return m_entries;
-}
-
-Key const& HashBucket::operator[](size_t ix)
-{
-    if (!m_inflated)
-        m_hash_index.serializer().deserialize_block_to(block_index(), *this);
-    return m_entries[ix];
-}
-
-Key const& HashBucket::operator[](size_t ix) const
-{
-    return m_entries[ix];
-}
-
-void HashBucket::list_bucket()
-{
-    warnln("Bucket #{} size {} local depth {} block_index {}{}",
-        index(), size(), local_depth(), block_index(), (block_index() > 0 ? "" : " (VIRTUAL)"));
-    for (auto& key : entries())
-        warnln("  {} hash {}", key.to_deprecated_string(), key.hash());
-}
-
-HashIndex::HashIndex(Serializer& serializer, NonnullRefPtr<TupleDescriptor> const& descriptor, Block::Index first_node)
-    : Index(serializer, descriptor, true, first_node)
-{
-    if (first_node == 0)
-        set_block_index(request_new_block_index());
-    if (serializer.has_block(first_node)) {
-        Block::Index block_index = first_node;
-        do {
-            VERIFY(serializer.has_block(block_index));
-            auto node = serializer.deserialize_block<HashDirectoryNode>(block_index, *this, block_index);
-            if (node.is_last())
-                break;
-            block_index = m_nodes.last(); // FIXME Ugly
-        } while (block_index);
-    } else {
-        auto bucket = append_bucket(0u, 1u, request_new_block_index());
-        bucket->m_inflated = true;
-        serializer.serialize_and_write(*bucket);
-        bucket = append_bucket(1u, 1u, request_new_block_index());
-        bucket->m_inflated = true;
-        serializer.serialize_and_write(*bucket);
-        m_nodes.append(first_node);
-        write_directory();
-    }
-}
-
-HashBucket* HashIndex::get_bucket(u32 index)
-{
-    VERIFY(index < m_buckets.size());
-    auto divisor = size() / 2;
-    while (m_buckets[index]->block_index() == 0) {
-        VERIFY(divisor > 1);
-        index = index % divisor;
-        divisor /= 2;
-    }
-    auto& bucket = m_buckets[index];
-    return bucket;
-}
-
-HashBucket* HashIndex::get_bucket_for_insert(Key const& key)
-{
-    auto key_hash = key.hash();
-
-    do {
-        dbgln_if(SQL_DEBUG, "HashIndex::get_bucket_for_insert({}) bucket {} of {}", key.to_deprecated_string(), key_hash % size(), size());
-        auto bucket = get_bucket(key_hash % size());
-        if (bucket->length() + key.length() < Block::DATA_SIZE)
-            return bucket;
-        dbgln_if(SQL_DEBUG, "Bucket is full (bucket size {}/length {} key length {}). Expanding directory", bucket->size(), bucket->length(), key.length());
-
-        // We previously doubled the directory but the target bucket is
-        // still at an older depth. Create new buckets at the current global
-        // depth and allocate the contents of the existing buckets to the
-        // newly created ones:
-        while (bucket->local_depth() < global_depth()) {
-            auto base_index = bucket->index();
-            auto step = 1 << (global_depth() - bucket->local_depth());
-            auto total_moved = 0;
-            for (auto ix = base_index + step; ix < size(); ix += step) {
-                auto& sub_bucket = m_buckets[ix];
-                sub_bucket->set_local_depth(bucket->local_depth() + 1);
-                auto moved = 0;
-                for (auto entry_index = (int)bucket->m_entries.size() - 1; entry_index >= 0; entry_index--) {
-                    if (bucket->m_entries[entry_index].hash() % size() == ix) {
-                        if (!sub_bucket->block_index())
-                            sub_bucket->set_block_index(request_new_block_index());
-                        sub_bucket->insert(bucket->m_entries.take(entry_index));
-                        moved++;
-                    }
-                }
-                if (moved > 0) {
-                    dbgln_if(SQL_DEBUG, "Moved {} entries from bucket #{} to #{}", moved, base_index, ix);
-                    serializer().serialize_and_write(*sub_bucket);
-                }
-                total_moved += moved;
-            }
-            if (total_moved)
-                dbgln_if(SQL_DEBUG, "Redistributed {} entries from bucket #{}", total_moved, base_index);
-            else
-                dbgln_if(SQL_DEBUG, "Nothing redistributed from bucket #{}", base_index);
-            bucket->set_local_depth(bucket->local_depth() + 1);
-            serializer().serialize_and_write(*bucket);
-            write_directory();
-
-            auto bucket_after_redistribution = get_bucket(key_hash % size());
-            if (bucket_after_redistribution->length() + key.length() < Block::DATA_SIZE)
-                return bucket_after_redistribution;
-        }
-        expand();
-    } while (true);
-    VERIFY_NOT_REACHED();
-}
-
-void HashIndex::expand()
-{
-    auto sz = size();
-    dbgln_if(SQL_DEBUG, "Expanding directory from {} to {} buckets", sz, 2 * sz);
-    for (auto i = 0u; i < sz; i++) {
-        auto bucket = get_bucket(i);
-        bucket = append_bucket(sz + i, bucket->local_depth(), 0u);
-        bucket->m_inflated = true;
-    }
-    m_global_depth++;
-    write_directory();
-}
-
-void HashIndex::write_directory()
-{
-    auto num_nodes_required = (size() / HashDirectoryNode::max_pointers_in_node()) + 1;
-    while (m_nodes.size() < num_nodes_required)
-        m_nodes.append(request_new_block_index());
-
-    size_t offset = 0u;
-    size_t num_node = 0u;
-    while (offset < size()) {
-        HashDirectoryNode node(*this, num_node, offset);
-        serializer().serialize_and_write(node);
-        offset += node.number_of_pointers();
-    }
-}
-
-HashBucket* HashIndex::append_bucket(u32 index, u32 local_depth, u32 pointer)
-{
-    m_buckets.append(make<HashBucket>(*this, index, local_depth, pointer));
-    return m_buckets.last();
-}
-
-HashBucket* HashIndex::get_bucket_by_index(u32 index)
-{
-    if (index >= size())
-        return nullptr;
-    return m_buckets[index];
-}
-
-Optional<u32> HashIndex::get(Key& key)
-{
-    auto hash = key.hash();
-    auto bucket_index = hash % size();
-    dbgln_if(SQL_DEBUG, "HashIndex::get({}) bucket_index {}", key.to_deprecated_string(), bucket_index);
-    auto bucket = get_bucket(bucket_index);
-    if constexpr (SQL_DEBUG)
-        bucket->list_bucket();
-    return bucket->get(key);
-}
-
-bool HashIndex::insert(Key const& key)
-{
-    dbgln_if(SQL_DEBUG, "HashIndex::insert({})", key.to_deprecated_string());
-    auto bucket = get_bucket_for_insert(key);
-    bucket->insert(key);
-    if constexpr (SQL_DEBUG)
-        bucket->list_bucket();
-    return true;
-}
-
-HashIndexIterator HashIndex::begin()
-{
-    return HashIndexIterator(get_bucket(0));
-}
-
-HashIndexIterator HashIndex::end()
-{
-    return HashIndexIterator::end();
-}
-
-HashIndexIterator HashIndex::find(Key const& key)
-{
-    auto hash = key.hash();
-    auto bucket_index = hash % size();
-    auto bucket = get_bucket(bucket_index);
-    auto optional_index = bucket->find_key_in_bucket(key);
-    if (!optional_index.has_value())
-        return end();
-    return HashIndexIterator(bucket, optional_index.value());
-}
-
-void HashIndex::list_hash()
-{
-    warnln("Number of buckets: {} (Global depth {})", size(), global_depth());
-    warn("Directory pointer(s): ");
-    for (auto ptr : m_nodes)
-        warn("{}, ", ptr);
-    warnln();
-
-    for (auto& bucket : m_buckets)
-        bucket->list_bucket();
-}
-
-HashIndexIterator::HashIndexIterator(HashBucket const* bucket, size_t index)
-    : m_current(bucket)
-    , m_index(index)
-{
-    VERIFY(!m_current || !index || (index < m_current->size()));
-    while (m_current && (m_current->size() == 0)) {
-        m_current = m_current->next_bucket();
-        m_index = 0;
-    }
-}
-
-HashIndexIterator HashIndexIterator::next()
-{
-    if (is_end())
-        return *this;
-    if (m_index < (m_current->size() - 1))
-        return HashIndexIterator(m_current.ptr(), m_index + 1);
-    return HashIndexIterator(m_current->next_bucket());
-}
-
-HashIndexIterator HashIndexIterator::previous()
-{
-    TODO();
-}
-
-bool HashIndexIterator::operator==(HashIndexIterator const& other) const
-{
-    if (is_end())
-        return other.is_end();
-    if (other.is_end())
-        return false;
-    VERIFY(&other.m_current->hash_index() == &m_current->hash_index());
-    return (m_current.ptr() == other.m_current.ptr()) && (m_index == other.m_index);
-}
-
-bool HashIndexIterator::operator==(Key const& other) const
-{
-    if (is_end())
-        return false;
-    if (other.is_null())
-        return false;
-    return (**this).compare(other);
-}
-
-}
--- a/Userland/Libraries/LibSQL/HashIndex.h
+++ b/Userland/Libraries/LibSQL/HashIndex.h
@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
-
-#pragma once
-
-#include <AK/WeakPtr.h>
-#include <LibCore/EventReceiver.h>
-#include <LibSQL/Forward.h>
-#include <LibSQL/Heap.h>
-#include <LibSQL/Index.h>
-#include <LibSQL/Key.h>
-
-namespace SQL {
-
-/**
- * The HashIndex class is a straightforward implementation of a persisted
- * extendible hash table (see
- * https://en.wikipedia.org/wiki/Extendible_hashing).
- */
-
-class HashBucket : public IndexNode
-    , public Weakable<HashBucket> {
-public:
-    HashBucket(HashIndex&, Block::Index index, u32 local_depth, Block::Index pointer);
-    ~HashBucket() override = default;
-    Optional<u32> get(Key&);
-    bool insert(Key const&);
-    Vector<Key> const& entries();
-    Key const& operator[](size_t);
-    Key const& operator[](size_t ix) const;
-    [[nodiscard]] u32 local_depth() const { return m_local_depth; }
-    [[nodiscard]] u32 size() { return entries().size(); }
-    [[nodiscard]] size_t length() const;
-    [[nodiscard]] u32 size() const { return m_entries.size(); }
-    [[nodiscard]] Block::Index index() const { return m_index; }
-    void serialize(Serializer&) const;
-    void deserialize(Serializer&);
-    [[nodiscard]] HashIndex const& hash_index() const { return m_hash_index; }
-    [[nodiscard]] HashBucket const* next_bucket();
-    [[nodiscard]] HashBucket const* previous_bucket();
-    void list_bucket();
-
-private:
-    Optional<size_t> find_key_in_bucket(Key const&);
-    void set_index(Block::Index index) { m_index = index; }
-    void set_local_depth(u32 depth) { m_local_depth = depth; }
-
-    HashIndex& m_hash_index;
-    u32 m_local_depth { 1 };
-    Block::Index m_index { 0 };
-    Vector<Key> m_entries;
-    bool m_inflated { false };
-
-    friend HashIndex;
-};
-
-class HashIndex : public Index {
-    C_OBJECT(HashIndex);
-
-public:
-    ~HashIndex() override = default;
-
-    Optional<u32> get(Key&);
-    bool insert(Key const&);
-    bool insert(Key const&& entry) { return insert(entry); }
-    HashIndexIterator find(Key const&);
-    HashIndexIterator begin();
-    static HashIndexIterator end();
-
-    [[nodiscard]] u32 global_depth() const { return m_global_depth; }
-    [[nodiscard]] u32 size() const { return 1 << m_global_depth; }
-    [[nodiscard]] HashBucket* get_bucket(u32);
-    [[nodiscard]] u32 node_pointer(u32 node_number) const { return m_nodes[node_number]; }
-    [[nodiscard]] u32 first_node_pointer() const { return m_nodes[0]; }
-    [[nodiscard]] size_t nodes() const { return m_nodes.size(); }
-    void list_hash();
-
-private:
-    HashIndex(Serializer&, NonnullRefPtr<TupleDescriptor> const&, Block::Index);
-
-    void expand();
-    void write_directory();
-    HashBucket* append_bucket(u32 index, u32 local_depth, u32 pointer);
-    HashBucket* get_bucket_for_insert(Key const&);
-    [[nodiscard]] HashBucket* get_bucket_by_index(u32 index);
-
-    u32 m_global_depth { 1 };
-    Vector<u32> m_nodes;
-    Vector<OwnPtr<HashBucket>> m_buckets;
-
-    friend HashBucket;
-    friend HashDirectoryNode;
-};
-
-class HashDirectoryNode : public IndexNode {
-public:
-    HashDirectoryNode(HashIndex&, u32, size_t);
-    HashDirectoryNode(HashIndex&, u32);
-    HashDirectoryNode(HashDirectoryNode const& other) = default;
-    void deserialize(Serializer&);
-    void serialize(Serializer&) const;
-    [[nodiscard]] u32 number_of_pointers() const { return min(max_pointers_in_node(), m_hash_index.size() - m_offset); }
-    [[nodiscard]] bool is_last() const { return m_is_last; }
-    static constexpr size_t max_pointers_in_node() { return (Block::DATA_SIZE - 3 * sizeof(u32)) / (2 * sizeof(u32)); }
-
-private:
-    HashIndex& m_hash_index;
-    size_t m_node_number { 0 };
-    size_t m_offset { 0 };
-    bool m_is_last { false };
-};
-
-class HashIndexIterator {
-public:
-    [[nodiscard]] bool is_end() const { return !m_current; }
-
-    bool operator==(HashIndexIterator const& other) const;
-    bool operator==(Key const& other) const;
-
-    HashIndexIterator operator++()
-    {
-        *this = next();
-        return *this;
-    }
-
-    HashIndexIterator operator++(int)
-    {
-        *this = next();
-        return *this;
-    }
-
-    HashIndexIterator operator--()
-    {
-        *this = previous();
-        return *this;
-    }
-
-    HashIndexIterator const operator--(int)
-    {
-        *this = previous();
-        return *this;
-    }
-
-    Key const& operator*() const
-    {
-        VERIFY(!is_end());
-        return (*m_current)[m_index];
-    }
-
-    Key const& operator->() const
-    {
-        VERIFY(!is_end());
-        return (*m_current)[m_index];
-    }
-
-    HashIndexIterator& operator=(HashIndexIterator const&) = default;
-    HashIndexIterator(HashIndexIterator const&) = default;
-
-private:
-    HashIndexIterator() = default;
-    explicit HashIndexIterator(HashBucket const*, size_t key_index = 0);
-    static HashIndexIterator end() { return HashIndexIterator(); }
-
-    [[nodiscard]] HashIndexIterator next();
-    [[nodiscard]] HashIndexIterator previous();
-    [[nodiscard]] Key key() const { return **this; }
-
-    WeakPtr<HashBucket> m_current;
-    size_t m_index { 0 };
-
-    friend HashIndex;
-};
-
-}