diff --git a/AK/HashFunctions.h b/AK/HashFunctions.h index 757c43653e..8395f7a109 100644 --- a/AK/HashFunctions.h +++ b/AK/HashFunctions.h @@ -39,6 +39,16 @@ inline unsigned int_hash(u32 key) return key; } +inline unsigned double_hash(u32 key) +{ + key = ~key + (key >> 23); + key ^= (key << 12); + key ^= (key >> 7); + key ^= (key << 2); + key ^= (key >> 20); + return key; +} + inline unsigned pair_int_hash(u32 key1, u32 key2) { return int_hash((int_hash(key1) * 209) ^ (int_hash(key2 * 413))); diff --git a/AK/HashTable.h b/AK/HashTable.h index f74dfa5487..8d3bf7cd2a 100644 --- a/AK/HashTable.h +++ b/AK/HashTable.h @@ -26,11 +26,11 @@ #pragma once -#include -#include +#include +#include #include -#include -#include +#include +#include namespace AK { @@ -39,137 +39,102 @@ enum class HashSetResult { ReplacedExistingEntry }; -template -class HashTable; - -template +template class HashTableIterator { -public: - bool operator!=(const HashTableIterator& other) const - { - if (m_is_end && other.m_is_end) - return false; - return &m_table != &other.m_table - || m_is_end != other.m_is_end - || m_bucket_index != other.m_bucket_index - || m_bucket_iterator != other.m_bucket_iterator; - } - bool operator==(const HashTableIterator& other) const { return !(*this != other); } - ElementType& operator*() { return *m_bucket_iterator; } - ElementType* operator->() { return m_bucket_iterator.operator->(); } - HashTableIterator& operator++() - { - skip_to_next(); - return *this; - } - - void skip_to_next() - { - while (!m_is_end) { - if (m_bucket_iterator.is_end()) { - ++m_bucket_index; - if (m_bucket_index >= m_table.capacity()) { - m_is_end = true; - return; - } - m_bucket_iterator = m_table.bucket(m_bucket_index).begin(); - } else { - ++m_bucket_iterator; - } - if (!m_bucket_iterator.is_end()) - return; - } - } - -private: friend HashTableType; - explicit HashTableIterator(HashTableType& table, bool is_end, BucketIteratorType bucket_iterator = {}, size_t bucket_index = 0) - : m_table(table) - , m_bucket_index(bucket_index) - , m_is_end(is_end) - , m_bucket_iterator(bucket_iterator) +public: + bool operator==(const HashTableIterator& other) const { return m_bucket == other.m_bucket; } + bool operator!=(const HashTableIterator& other) const { return m_bucket != other.m_bucket; } + T& operator*() { return *m_bucket->slot(); } + T* operator->() { return m_bucket->slot(); } + void operator++() { skip_to_next(); } + +private: + void skip_to_next() { - ASSERT(!table.m_clearing); - ASSERT(!table.m_rehashing); - if (!is_end && !m_table.is_empty() && m_bucket_iterator.is_end()) { - m_bucket_iterator = m_table.bucket(0).begin(); - if (m_bucket_iterator.is_end()) - skip_to_next(); - } + if (!m_bucket) + return; + do { + ++m_bucket; + if (m_bucket->used) + return; + } while (!m_bucket->end); + if (m_bucket->end) + m_bucket = nullptr; } - HashTableType& m_table; - size_t m_bucket_index { 0 }; - bool m_is_end { false }; - BucketIteratorType m_bucket_iterator; + explicit HashTableIterator(BucketType* bucket) + : m_bucket(bucket) + { + } + + BucketType* m_bucket { nullptr }; }; template class HashTable { -private: - using Bucket = SinglyLinkedList; + struct Bucket { + bool used; + bool deleted; + bool end; + alignas(T) u8 storage[sizeof(T)]; + + T* slot() { return reinterpret_cast(storage); } + const T* slot() const { return reinterpret_cast(storage); } + }; public: HashTable() { } - HashTable(size_t capacity) - : m_buckets(new Bucket[capacity]) - , m_capacity(capacity) - { - } + HashTable(size_t capacity) { rehash(capacity); } + ~HashTable() { clear(); } + HashTable(const HashTable& other) { - ensure_capacity(other.size()); + rehash(other.capacity()); for (auto& it : other) set(it); } + HashTable& operator=(const HashTable& other) { if (this != &other) { clear(); - ensure_capacity(other.size()); + rehash(other.capacity()); for (auto& it : other) set(it); } return *this; } + HashTable(HashTable&& other) : m_buckets(other.m_buckets) , m_size(other.m_size) , m_capacity(other.m_capacity) + , m_deleted_count(other.m_deleted_count) { other.m_size = 0; other.m_capacity = 0; + other.m_deleted_count = 0; other.m_buckets = nullptr; } + HashTable& operator=(HashTable&& other) { if (this != &other) { clear(); - m_buckets = other.m_buckets; - m_size = other.m_size; - m_capacity = other.m_capacity; - other.m_size = 0; - other.m_capacity = 0; - other.m_buckets = nullptr; + m_buckets = exchange(other.m_buckets, nullptr); + m_size = exchange(other.m_size, 0); + m_capacity = exchange(other.m_capacity, 0); + m_deleted_count = exchange(other.m_deleted_count, 0); } return *this; } - ~HashTable() { clear(); } bool is_empty() const { return !m_size; } size_t size() const { return m_size; } size_t capacity() const { return m_capacity; } - void ensure_capacity(size_t capacity) - { - ASSERT(capacity >= size()); - rehash(capacity); - } - - HashSetResult set(const T&); - HashSetResult set(T&&); - template void set_from(U (&from_array)[N]) { @@ -178,43 +143,93 @@ public: } } - bool contains(const T&) const; - void clear(); + void ensure_capacity(size_t capacity) + { + ASSERT(capacity >= size()); + rehash(capacity * 2); + } - using Iterator = HashTableIterator; - friend Iterator; - Iterator begin() { return Iterator(*this, is_empty()); } - Iterator end() { return Iterator(*this, true); } + bool contains(const T& value) const + { + return find(value) != end(); + } - using ConstIterator = HashTableIterator; - friend ConstIterator; - ConstIterator begin() const { return ConstIterator(*this, is_empty()); } - ConstIterator end() const { return ConstIterator(*this, true); } + using Iterator = HashTableIterator; + + Iterator begin() + { + for (size_t i = 0; i < m_capacity; ++i) { + if (m_buckets[i].used) + return Iterator(&m_buckets[i]); + } + return end(); + } + + Iterator end() + { + return Iterator(nullptr); + } + + using ConstIterator = HashTableIterator; + + ConstIterator begin() const + { + for (size_t i = 0; i < m_capacity; ++i) { + if (m_buckets[i].used) + return ConstIterator(&m_buckets[i]); + } + return end(); + } + + ConstIterator end() const + { + return ConstIterator(nullptr); + } + + void clear() + { + if (!m_buckets) + return; + + for (size_t i = 0; i < m_capacity; ++i) { + if (m_buckets[i].used) + m_buckets[i].slot()->~T(); + } + + kfree(m_buckets); + m_buckets = nullptr; + m_capacity = 0; + m_size = 0; + m_deleted_count = 0; + } + + HashSetResult set(T&& value) + { + auto& bucket = lookup_for_writing(value); + if (bucket.used) { + (*bucket.slot()) = move(value); + return HashSetResult::ReplacedExistingEntry; + } + + new (bucket.slot()) T(move(value)); + bucket.used = true; + if (bucket.deleted) { + bucket.deleted = false; + --m_deleted_count; + } + ++m_size; + return HashSetResult::InsertedNewEntry; + } + + HashSetResult set(const T& value) + { + return set(T(value)); + } template Iterator find(unsigned hash, Finder finder) { - if (is_empty()) - return end(); - size_t bucket_index; - auto& bucket = lookup_with_hash(hash, &bucket_index); - auto bucket_iterator = bucket.find(finder); - if (bucket_iterator != bucket.end()) - return Iterator(*this, false, bucket_iterator, bucket_index); - return end(); - } - - template - ConstIterator find(unsigned hash, Finder finder) const - { - if (is_empty()) - return end(); - size_t bucket_index; - auto& bucket = lookup_with_hash(hash, &bucket_index); - auto bucket_iterator = bucket.find(finder); - if (bucket_iterator != bucket.end()) - return ConstIterator(*this, false, bucket_iterator, bucket_index); - return end(); + return Iterator(lookup_with_hash(hash, move(finder))); } Iterator find(const T& value) @@ -222,6 +237,12 @@ public: return find(TraitsForT::hash(value), [&](auto& other) { return TraitsForT::equals(value, other); }); } + template + ConstIterator find(unsigned hash, Finder finder) const + { + return ConstIterator(lookup_with_hash(hash, move(finder))); + } + ConstIterator find(const T& value) const { return find(TraitsForT::hash(value), [&](auto& other) { return TraitsForT::equals(value, other); }); @@ -237,170 +258,120 @@ public: return false; } - void remove(Iterator); + void remove(Iterator iterator) + { + ASSERT(iterator.m_bucket); + auto& bucket = *iterator.m_bucket; + ASSERT(bucket.used); + ASSERT(!bucket.end); + ASSERT(!bucket.deleted); + bucket.slot()->~T(); + bucket.used = false; + bucket.deleted = true; + --m_size; + ++m_deleted_count; + } private: - Bucket& lookup(const T&, size_t* bucket_index = nullptr); - const Bucket& lookup(const T&, size_t* bucket_index = nullptr) const; - - Bucket& lookup_with_hash(unsigned hash, size_t* bucket_index) + void insert_during_rehash(T&& value) { - if (bucket_index) - *bucket_index = hash % m_capacity; - return m_buckets[hash % m_capacity]; + auto& bucket = lookup_for_writing(value); + new (bucket.slot()) T(move(value)); + bucket.used = true; } - const Bucket& lookup_with_hash(unsigned hash, size_t* bucket_index) const + void rehash(size_t new_capacity) { - if (bucket_index) - *bucket_index = hash % m_capacity; - return m_buckets[hash % m_capacity]; + new_capacity = max(new_capacity, static_cast(4)); + + auto* old_buckets = m_buckets; + auto old_capacity = m_capacity; + + m_buckets = (Bucket*)kmalloc(sizeof(Bucket) * (new_capacity + 1)); + __builtin_memset(m_buckets, 0, sizeof(Bucket) * (new_capacity + 1)); + m_capacity = new_capacity; + m_deleted_count = 0; + + m_buckets[m_capacity].end = true; + + if (!old_buckets) + return; + + for (size_t i = 0; i < old_capacity; ++i) { + auto& old_bucket = old_buckets[i]; + if (old_bucket.used) { + insert_during_rehash(move(*old_bucket.slot())); + old_bucket.slot()->~T(); + } + } + + kfree(old_buckets); } - void rehash(size_t capacity); - void insert(const T&); - void insert(T&&); + template + Bucket* lookup_with_hash(unsigned hash, Finder finder, Bucket** usable_bucket_for_writing = nullptr) const + { + if (is_empty()) + return nullptr; + size_t bucket_index = hash % m_capacity; + for (;;) { + auto& bucket = m_buckets[bucket_index]; - Bucket& bucket(size_t index) { return m_buckets[index]; } - const Bucket& bucket(size_t index) const { return m_buckets[index]; } + if (usable_bucket_for_writing && !*usable_bucket_for_writing && !bucket.used) { + *usable_bucket_for_writing = &bucket; + } + + if (bucket.used && finder(*bucket.slot())) + return &bucket; + + if (!bucket.used && !bucket.deleted) + return nullptr; + + hash = double_hash(hash); + bucket_index = hash % m_capacity; + } + } + + const Bucket* lookup_for_reading(const T& value) const + { + return lookup_with_hash(TraitsForT::hash(value), [&value](auto& entry) { return TraitsForT::equals(entry, value); }); + } + + Bucket& lookup_for_writing(const T& value) + { + auto hash = TraitsForT::hash(value); + Bucket* usable_bucket_for_writing = nullptr; + if (auto* bucket_for_reading = lookup_with_hash( + hash, + [&value](auto& entry) { return TraitsForT::equals(entry, value); }, + &usable_bucket_for_writing)) { + return *const_cast(bucket_for_reading); + } + + if ((used_bucket_count() + 1) >= m_capacity) + rehash((size() + 1) * 2); + else if (usable_bucket_for_writing) + return *usable_bucket_for_writing; + + size_t bucket_index = hash % m_capacity; + + for (;;) { + auto& bucket = m_buckets[bucket_index]; + if (!bucket.used) + return bucket; + hash = double_hash(hash); + bucket_index = hash % m_capacity; + } + } + + size_t used_bucket_count() const { return m_size + m_deleted_count; } Bucket* m_buckets { nullptr }; - size_t m_size { 0 }; size_t m_capacity { 0 }; - bool m_clearing { false }; - bool m_rehashing { false }; + size_t m_deleted_count { 0 }; }; -template -HashSetResult HashTable::set(T&& value) -{ - if (!m_capacity) - rehash(1); - auto& bucket = lookup(value); - for (auto& e : bucket) { - if (TraitsForT::equals(e, value)) { - e = move(value); - return HashSetResult::ReplacedExistingEntry; - } - } - if (size() >= capacity()) { - rehash(size() + 1); - insert(move(value)); - } else { - bucket.append(move(value)); - } - m_size++; - return HashSetResult::InsertedNewEntry; -} - -template -HashSetResult HashTable::set(const T& value) -{ - if (!m_capacity) - rehash(1); - auto& bucket = lookup(value); - for (auto& e : bucket) { - if (TraitsForT::equals(e, value)) { - e = value; - return HashSetResult::ReplacedExistingEntry; - } - } - if (size() >= capacity()) { - rehash(size() + 1); - insert(value); - } else { - bucket.append(value); - } - m_size++; - return HashSetResult::InsertedNewEntry; -} - -template -void HashTable::rehash(size_t new_capacity) -{ - TemporaryChange change(m_rehashing, true); - new_capacity *= 2; - auto* new_buckets = new Bucket[new_capacity]; - auto* old_buckets = m_buckets; - size_t old_capacity = m_capacity; - m_buckets = new_buckets; - m_capacity = new_capacity; - - for (size_t i = 0; i < old_capacity; ++i) { - for (auto& value : old_buckets[i]) { - insert(move(value)); - } - } - - delete[] old_buckets; -} - -template -void HashTable::clear() -{ - TemporaryChange change(m_clearing, true); - if (m_buckets) { - delete[] m_buckets; - m_buckets = nullptr; - } - m_capacity = 0; - m_size = 0; -} - -template -void HashTable::insert(T&& value) -{ - auto& bucket = lookup(value); - bucket.append(move(value)); -} - -template -void HashTable::insert(const T& value) -{ - auto& bucket = lookup(value); - bucket.append(value); -} - -template -bool HashTable::contains(const T& value) const -{ - if (is_empty()) - return false; - auto& bucket = lookup(value); - for (auto& e : bucket) { - if (TraitsForT::equals(e, value)) - return true; - } - return false; -} - -template -void HashTable::remove(Iterator it) -{ - ASSERT(!is_empty()); - m_buckets[it.m_bucket_index].remove(it.m_bucket_iterator); - --m_size; -} - -template -auto HashTable::lookup(const T& value, size_t* bucket_index) -> Bucket& -{ - unsigned hash = TraitsForT::hash(value); - if (bucket_index) - *bucket_index = hash % m_capacity; - return m_buckets[hash % m_capacity]; -} - -template -auto HashTable::lookup(const T& value, size_t* bucket_index) const -> const Bucket& -{ - unsigned hash = TraitsForT::hash(value); - if (bucket_index) - *bucket_index = hash % m_capacity; - return m_buckets[hash % m_capacity]; -} - } using AK::HashTable;