diff --git a/Tests/LibSQL/TestSqlBtreeIndex.cpp b/Tests/LibSQL/TestSqlBtreeIndex.cpp new file mode 100644 index 0000000000..73aa2af2ab --- /dev/null +++ b/Tests/LibSQL/TestSqlBtreeIndex.cpp @@ -0,0 +1,311 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr static int keys[] = { + 39, + 87, + 77, + 42, + 98, + 40, + 53, + 8, + 37, + 12, + 90, + 72, + 73, + 11, + 88, + 22, + 10, + 82, + 25, + 61, + 97, + 18, + 60, + 68, + 21, + 3, + 58, + 29, + 13, + 17, + 89, + 81, + 16, + 64, + 5, + 41, + 36, + 91, + 38, + 24, + 32, + 50, + 34, + 94, + 49, + 47, + 1, + 6, + 44, + 76, +}; +constexpr static u32 pointers[] = { + 92, + 4, + 50, + 47, + 68, + 73, + 24, + 28, + 50, + 93, + 60, + 36, + 92, + 72, + 53, + 26, + 91, + 84, + 25, + 43, + 88, + 12, + 62, + 35, + 96, + 27, + 96, + 27, + 99, + 30, + 21, + 89, + 54, + 60, + 37, + 68, + 35, + 55, + 80, + 2, + 33, + 26, + 93, + 70, + 45, + 44, + 3, + 66, + 75, + 4, +}; + +NonnullRefPtr setup_btree(SQL::Heap& heap); +void insert_and_get_to_and_from_btree(int num_keys); +void insert_into_and_scan_btree(int num_keys); + +NonnullRefPtr setup_btree(SQL::Heap& heap) +{ + SQL::TupleDescriptor tuple_descriptor; + tuple_descriptor.append({ "key_value", SQL::SQLType::Integer, SQL::Order::Ascending }); + + auto root_pointer = heap.user_value(0); + if (!root_pointer) { + root_pointer = heap.new_record_pointer(); + heap.set_user_value(0, root_pointer); + } + auto btree = SQL::BTree::construct(heap, tuple_descriptor, true, root_pointer); + btree->on_new_root = [&]() { + heap.set_user_value(0, btree->root()); + }; + return btree; +} + +void insert_and_get_to_and_from_btree(int num_keys) +{ + ScopeGuard guard([]() { unlink("test.db"); }); + { + auto heap = SQL::Heap::construct("test.db"); + auto btree = setup_btree(heap); + + for (auto ix = 0; ix < num_keys; ix++) { + SQL::Key k(btree->descriptor()); + k[0] = keys[ix]; + k.set_pointer(pointers[ix]); + btree->insert(k); + } +#ifdef LIST_TREE + btree->list_tree(); +#endif + } + + { + auto heap = SQL::Heap::construct("test.db"); + auto btree = setup_btree(heap); + + for (auto ix = 0; ix < num_keys; ix++) { + SQL::Key k(btree->descriptor()); + k[0] = keys[ix]; + auto pointer_opt = btree->get(k); + EXPECT(pointer_opt.has_value()); + EXPECT_EQ(pointer_opt.value(), pointers[ix]); + } + } +} + +void insert_into_and_scan_btree(int num_keys) +{ + ScopeGuard guard([]() { unlink("test.db"); }); + { + auto heap = SQL::Heap::construct("test.db"); + auto btree = setup_btree(heap); + + for (auto ix = 0; ix < num_keys; ix++) { + SQL::Key k(btree->descriptor()); + k[0] = keys[ix]; + k.set_pointer(pointers[ix]); + btree->insert(k); + } +#ifdef LIST_TREE + btree->list_tree(); +#endif + } + + { + auto heap = SQL::Heap::construct("test.db"); + auto btree = setup_btree(heap); + + int count = 0; + SQL::Tuple prev; + for (auto iter = btree->begin(); !iter.is_end(); iter++, count++) { + auto key = (*iter); + if (prev.length()) { + EXPECT(prev < key); + } + auto key_value = (int)key[0]; + for (auto ix = 0; ix < num_keys; ix++) { + if (keys[ix] == key_value) { + EXPECT_EQ(key.pointer(), pointers[ix]); + break; + } + } + prev = key; + } + EXPECT_EQ(count, num_keys); + } +} + +TEST_CASE(btree_one_key) +{ + insert_and_get_to_and_from_btree(1); +} + +TEST_CASE(btree_four_keys) +{ + insert_and_get_to_and_from_btree(4); +} + +TEST_CASE(btree_five_keys) +{ + insert_and_get_to_and_from_btree(5); +} + +TEST_CASE(btree_10_keys) +{ + insert_and_get_to_and_from_btree(10); +} + +TEST_CASE(btree_13_keys) +{ + insert_and_get_to_and_from_btree(13); +} + +TEST_CASE(btree_20_keys) +{ + insert_and_get_to_and_from_btree(20); +} + +TEST_CASE(btree_25_keys) +{ + insert_and_get_to_and_from_btree(25); +} + +TEST_CASE(btree_30_keys) +{ + insert_and_get_to_and_from_btree(30); +} + +TEST_CASE(btree_35_keys) +{ + insert_and_get_to_and_from_btree(35); +} + +TEST_CASE(btree_40_keys) +{ + insert_and_get_to_and_from_btree(40); +} + +TEST_CASE(btree_45_keys) +{ + insert_and_get_to_and_from_btree(45); +} + +TEST_CASE(btree_50_keys) +{ + insert_and_get_to_and_from_btree(50); +} + +TEST_CASE(btree_scan_one_key) +{ + insert_into_and_scan_btree(1); +} + +TEST_CASE(btree_scan_four_keys) +{ + insert_into_and_scan_btree(4); +} + +TEST_CASE(btree_scan_five_keys) +{ + insert_into_and_scan_btree(5); +} + +TEST_CASE(btree_scan_10_keys) +{ + insert_into_and_scan_btree(10); +} + +TEST_CASE(btree_scan_15_keys) +{ + insert_into_and_scan_btree(15); +} + +TEST_CASE(btree_scan_30_keys) +{ + insert_into_and_scan_btree(15); +} + +TEST_CASE(btree_scan_50_keys) +{ + insert_into_and_scan_btree(50); +} diff --git a/Userland/Libraries/LibSQL/BTree.cpp b/Userland/Libraries/LibSQL/BTree.cpp new file mode 100644 index 0000000000..6d90d6bdb1 --- /dev/null +++ b/Userland/Libraries/LibSQL/BTree.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +namespace SQL { + +BTree::BTree(Heap& heap, TupleDescriptor const& descriptor, bool unique, u32 pointer) + : Index(heap, descriptor, unique, pointer) + , m_root(nullptr) +{ +} + +BTree::BTree(Heap& heap, TupleDescriptor const& descriptor, u32 pointer) + : BTree(heap, descriptor, true, pointer) +{ +} + +BTreeIterator BTree::begin() +{ + if (!m_root) + initialize_root(); + VERIFY(m_root); + return BTreeIterator(m_root, -1); +} + +BTreeIterator BTree::end() +{ + return BTreeIterator(nullptr, -1); +} + +void BTree::initialize_root() +{ + if (pointer()) { + if (pointer() < heap().size()) { + auto buffer = read_block(pointer()); + size_t offset = 0; + m_root = make(*this, nullptr, pointer(), buffer, offset); + } else { + m_root = make(*this, nullptr, pointer()); + } + } else { + set_pointer(new_record_pointer()); + m_root = make(*this, nullptr, pointer()); + if (on_new_root) + on_new_root(); + } +} + +TreeNode* BTree::new_root() +{ + set_pointer(new_record_pointer()); + m_root = make(*this, nullptr, m_root.leak_ptr(), pointer()); + add_to_write_ahead_log(m_root->as_index_node()); + if (on_new_root) + on_new_root(); + return m_root; +} + +bool BTree::insert(Key const& key) +{ + if (!m_root) + initialize_root(); + VERIFY(m_root); + return m_root->insert(key); +} + +bool BTree::update_key_pointer(Key const& key) +{ + if (!m_root) + initialize_root(); + VERIFY(m_root); + return m_root->update_key_pointer(key); +} + +Optional BTree::get(Key& key) +{ + if (!m_root) + initialize_root(); + VERIFY(m_root); + return m_root->get(key); +} + +BTreeIterator BTree::find(Key const& key) +{ + if (!m_root) + initialize_root(); + VERIFY(m_root); + for (auto node = m_root->node_for(key); node; node = node->up()) { + for (auto ix = 0u; ix < node->size(); ix++) { + auto match = (*node)[ix].match(key); + if (match == 0) + return BTreeIterator(node, (int)ix); + else if (match > 0) + return end(); + } + } + return end(); +} + +void BTree::list_tree() +{ + if (!m_root) + initialize_root(); + m_root->list_node(0); +} + +} diff --git a/Userland/Libraries/LibSQL/BTree.h b/Userland/Libraries/LibSQL/BTree.h new file mode 100644 index 0000000000..c356e3b05c --- /dev/null +++ b/Userland/Libraries/LibSQL/BTree.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace SQL { + +/** + * The BTree class models a B-Tree index. It contains a collection of + * Key objects organized in TreeNode objects. Keys can be inserted, + * located, deleted, and the set can be traversed in sort order. All keys in + * a tree have the same underlying structure. A BTree's TreeNodes and + * the keys it includes are lazily loaded from the Heap when needed. + * + * The classes implementing the B-Tree functionality are BTree, TreeNode, + * BTreeIterator, and DownPointer (a smart pointer-like helper class). + */ +class DownPointer { +public: + explicit DownPointer(TreeNode*, u32 = 0); + DownPointer(TreeNode*, TreeNode*); + DownPointer(DownPointer const&); + DownPointer(TreeNode*, DownPointer&); + ~DownPointer() = default; + [[nodiscard]] u32 pointer() const { return m_pointer; } + TreeNode* node(); + +private: + void inflate(); + + TreeNode* m_owner; + u32 m_pointer { 0 }; + OwnPtr m_node { nullptr }; + friend TreeNode; +}; + +class TreeNode : public IndexNode { +public: + TreeNode(BTree&, TreeNode*, u32 = 0); + TreeNode(BTree&, TreeNode*, TreeNode*, u32 = 0); + TreeNode(BTree&, TreeNode*, u32 pointer, ByteBuffer&, size_t&); + ~TreeNode() override = default; + + [[nodiscard]] BTree& tree() const { return m_tree; } + [[nodiscard]] TreeNode* up() const { return m_up; } + [[nodiscard]] size_t size() const { return m_entries.size(); } + [[nodiscard]] Vector entries() const { return m_entries; } + [[nodiscard]] u32 down_pointer(size_t) const; + [[nodiscard]] TreeNode* down_node(size_t); + [[nodiscard]] bool is_leaf() const { return m_is_leaf; } + + [[nodiscard]] size_t max_keys_in_node(); + Key const& operator[](size_t) const; + bool insert(Key const&); + bool update_key_pointer(Key const&); + TreeNode* node_for(Key const&); + Optional get(Key&); + void serialize(ByteBuffer&) const override; + IndexNode* as_index_node() override { return dynamic_cast(this); } + +private: + TreeNode(BTree&, TreeNode*, DownPointer&, u32 = 0); + void dump_if(int, String&& = ""); + bool insert_in_leaf(Key const&); + void just_insert(Key const&, TreeNode* = nullptr); + void split(); + void list_node(int); + + BTree& m_tree; + TreeNode* m_up; + Vector m_entries; + bool m_is_leaf { true }; + Vector m_down; + + friend BTree; + friend BTreeIterator; +}; + +class BTree : public Index { + C_OBJECT(BTree); + +public: + ~BTree() override = default; + + u32 root() const { return (m_root) ? m_root->pointer() : 0; } + bool insert(Key const&); + bool update_key_pointer(Key const&); + Optional get(Key&); + BTreeIterator find(Key const& key); + BTreeIterator begin(); + static BTreeIterator end(); + void list_tree(); + + Function on_new_root; + +private: + BTree(Heap& heap, TupleDescriptor const&, bool unique, u32 pointer); + BTree(Heap& heap, TupleDescriptor const&, u32 pointer); + void initialize_root(); + TreeNode* new_root(); + OwnPtr m_root { nullptr }; + + friend BTreeIterator; + friend DownPointer; + friend TreeNode; +}; + +class BTreeIterator { +public: + [[nodiscard]] bool is_end() const { return m_where == Where::End; } + [[nodiscard]] size_t index() const { return m_index; } + bool update(Key const&); + + bool operator==(BTreeIterator const& other) const { return cmp(other) == 0; } + bool operator!=(BTreeIterator const& other) const { return cmp(other) != 0; } + bool operator<(BTreeIterator const& other) const { return cmp(other) < 0; } + bool operator>(BTreeIterator const& other) const { return cmp(other) > 0; } + bool operator<=(BTreeIterator const& other) const { return cmp(other) <= 0; } + bool operator>=(BTreeIterator const& other) const { return cmp(other) >= 0; } + bool operator==(Key const& other) const { return cmp(other) == 0; } + bool operator!=(Key const& other) const { return cmp(other) != 0; } + bool operator<(Key const& other) const { return cmp(other) < 0; } + bool operator>(Key const& other) const { return cmp(other) > 0; } + bool operator<=(Key const& other) const { return cmp(other) <= 0; } + bool operator>=(Key const& other) const { return cmp(other) >= 0; } + + BTreeIterator operator++() + { + *this = next(); + return *this; + } + + BTreeIterator operator++(int) + { + *this = next(); + return *this; + } + + BTreeIterator operator--() + { + *this = previous(); + return *this; + } + + BTreeIterator const operator--(int) + { + *this = previous(); + return *this; + } + + Key const& operator*() const + { + VERIFY(!is_end()); + return (*m_current)[m_index]; + } + + Key const& operator->() const + { + VERIFY(!is_end()); + return (*m_current)[m_index]; + } + + BTreeIterator& operator=(BTreeIterator const&); + BTreeIterator(BTreeIterator const&) = default; + +private: + BTreeIterator(TreeNode*, int index); + static BTreeIterator end() { return BTreeIterator(nullptr, -1); } + + [[nodiscard]] int cmp(BTreeIterator const&) const; + [[nodiscard]] int cmp(Key const&) const; + [[nodiscard]] BTreeIterator next() const; + [[nodiscard]] BTreeIterator previous() const; + [[nodiscard]] Key key() const; + + enum class Where { + Valid, + End + }; + + Where m_where { Where::Valid }; + TreeNode* m_current { nullptr }; + int m_index { -1 }; + + friend BTree; +}; + +} diff --git a/Userland/Libraries/LibSQL/BTreeIterator.cpp b/Userland/Libraries/LibSQL/BTreeIterator.cpp new file mode 100644 index 0000000000..a926b49c0a --- /dev/null +++ b/Userland/Libraries/LibSQL/BTreeIterator.cpp @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace SQL { + +BTreeIterator::BTreeIterator(TreeNode* node, int index) + : m_current(node) + , m_index(index) +{ + if (!node) { + m_where = Where::End; + } else { + if (index < 0) { + while (!node->is_leaf() && (node->size() != 0)) { + node = node->down_node(0); + } + if (node->size() == 0) { + m_where = Where::End; + m_current = nullptr; + m_index = -1; + } else { + m_where = Where::Valid; + m_current = node; + m_index = 0; + } + } else { + VERIFY(m_index < (int)m_current->size()); + } + } +} + +int BTreeIterator::cmp(BTreeIterator const& other) const +{ + if (is_end()) + return (other.is_end()) ? 0 : 1; + if (other.is_end()) + return -1; + VERIFY(&other.m_current->tree() == &m_current->tree()); + VERIFY((m_current->size() > 0) && (other.m_current->size() > 0)); + if (&m_current != &other.m_current) + return (*m_current)[m_current->size() - 1].compare((*(other.m_current))[0]); + return (*m_current)[m_index].compare((*(other.m_current))[other.m_index]); +} + +int BTreeIterator::cmp(Key const& other) const +{ + if (is_end()) + return 1; + if (other.is_null()) + return -1; + return key().compare(other); +} + +BTreeIterator BTreeIterator::next() const +{ + if (is_end()) + return end(); + + auto ix = m_index; + auto node = m_current; + if (ix < (int)(node->size() - 1)) { + if (node->is_leaf()) { + // We're in the middle of a leaf node. Next entry is + // is the next entry of the node: + return BTreeIterator(node, ix + 1); + } else { + /* + * We're in the middle of a non-leaf node. The iterator's + * next value is all the way down to the right, first entry. + * + * | + * +--+--+--+--+ + * | |##| | | + * +--+--+--+--+ + * / | | | \ + * | + * +--+--+--+--+ + * | | | | | + * +--+--+--+--+ + * / + * +--+--+--+--+ + * |++| | | | + * +--+--+--+--+ + */ + ix++; + while (!node->is_leaf()) { + node = node->down_node(ix); + ix = 0; + } + } + VERIFY(node->is_leaf() && (ix < (int)node->size())); + return BTreeIterator(node, ix); + } + + if (node->is_leaf()) { + // We currently at the last entry of a leaf node. We need to check + // one or more levels up until we end up in the "middle" of a node. + // If one level up we're still at the end of the node, we need + // to keep going up until we hit the root node. If we're at the + // end of the root node, we reached the end of the btree. + for (auto up = node->up(); up; up = node->up()) { + for (size_t i = 0; i < up->size(); i++) { + // One level up, try to find the entry with the current + // node's pointer as the left pointer: + if (up->down_pointer(i) == node->pointer()) + // Found it. This is the iterator's next value: + return BTreeIterator(up, (int)i); + } + // We didn't find the m_current's pointer as a left node. So + // it must be the right node all the way at the end and we need + // to go one more level up: + node = up; + } + // We reached the root node and we're still at the end of the node. + // That means we're at the end of the btree. + return end(); + } + + // If we're at the end of a non-leaf node, we need to follow the + // right pointer down until we find a leaf: + TreeNode* down; + for (down = node->down_node(node->size()); !down->is_leaf(); down = down->down_node(0)) + ; + return BTreeIterator(down, 0); +} + +// FIXME Reverse iterating doesn't quite work; we don't recognize the +// end (which is really the beginning) of the tree. +BTreeIterator BTreeIterator::previous() const +{ + if (is_end()) { + return end(); + } + + auto node = m_current; + auto ix = m_index; + if (ix > 0) { + if (node->is_leaf()) { + // We're in the middle of a leaf node. Previous entry is + // is the previous entry of the node: + return BTreeIterator(node, ix - 1); + } else { + /* + * We're in the middle of a non-leaf node. The iterator's + * previous value is all the way down to the left, last entry. + * + * | + * +--+--+--+--+ + * | | |##| | + * +--+--+--+--+ + * / | | | \ + * | + * +--+--+--+--+ + * | | | | | + * +--+--+--+--+ + * \ + * +--+--+--+--+ + * | | | |++| + * +--+--+--+--+ + */ + while (!node->is_leaf()) { + node = node->down_node(ix); + ix = (int)node->size(); + } + } + VERIFY(node->is_leaf() && (ix <= (int)node->size())); + return BTreeIterator(node, ix); + } + + if (node->is_leaf()) { + // We currently at the first entry of a leaf node. We need to check one + // or more levels up until we end up in the "middle" of a node. + // If one level up we're still at the start of the node, we need + // to keep going up until we hit the root node. If we're at the + // start of the root node, we reached the start of the btree. + auto stash_current = node; + for (auto up = node->up(); up; up = node->up()) { + for (size_t i = up->size(); i > 0; i--) { + // One level up, try to find the entry with the current + // node's pointer as the right pointer: + if (up->down_pointer(i) == node->pointer()) { + // Found it. This is the iterator's next value: + node = up; + ix = (int)i - 1; + return BTreeIterator(node, ix); + } + } + // We didn't find the m_current's pointer as a right node. So + // it must be the left node all the way at the start and we need + // to go one more level up: + node = up; + } + // We reached the root node and we're still at the start of the node. + // That means we're at the start of the btree. + return BTreeIterator(stash_current, 0); + } + + // If we're at the start of a non-leaf node, we need to follow the + // left pointer down until we find a leaf: + TreeNode* down = node->down_node(0); + while (!down->is_leaf()) + down = down->down_node(down->size()); + return BTreeIterator(down, down->size() - 1); +} + +Key BTreeIterator::key() const +{ + if (is_end()) + return {}; + return (*m_current)[m_index]; +} + +bool BTreeIterator::update(Key const& new_value) +{ + if (is_end()) + return false; + if ((cmp(new_value) == 0) && (key().pointer() == new_value.pointer())) + return true; + auto previous_iter = previous(); + auto next_iter = next(); + if (!m_current->tree().duplicates_allowed() && ((previous_iter == new_value) || (next_iter == new_value))) { + return false; + } + if ((previous_iter > new_value) || (next_iter < new_value)) + return false; + + // We are friend of BTree and TreeNode. Don't know how I feel about that. + m_current->m_entries[m_index] = new_value; + m_current->tree().add_to_write_ahead_log(m_current); + return true; +} + +BTreeIterator& BTreeIterator::operator=(BTreeIterator const& other) +{ + if (&other != this) { + m_current = other.m_current; + m_index = other.m_index; + m_where = other.m_where; + } + return *this; +} + +} diff --git a/Userland/Libraries/LibSQL/CMakeLists.txt b/Userland/Libraries/LibSQL/CMakeLists.txt index f4d79cd413..1a726afa18 100644 --- a/Userland/Libraries/LibSQL/CMakeLists.txt +++ b/Userland/Libraries/LibSQL/CMakeLists.txt @@ -1,8 +1,15 @@ set(SOURCES + BTree.cpp + BTreeIterator.cpp + Heap.cpp + Index.cpp + Key.cpp Lexer.cpp + Meta.cpp Parser.cpp SyntaxHighlighter.cpp Token.cpp + TreeNode.cpp Tuple.cpp Value.cpp ) diff --git a/Userland/Libraries/LibSQL/Forward.h b/Userland/Libraries/LibSQL/Forward.h index fffb12748f..51fa732100 100644 --- a/Userland/Libraries/LibSQL/Forward.h +++ b/Userland/Libraries/LibSQL/Forward.h @@ -13,10 +13,13 @@ class ASTNode; class BetweenExpression; class BinaryOperatorExpression; class BlobLiteral; +class BTree; +class BTreeIterator; class CaseExpression; class CastExpression; class ChainedExpression; class CollateExpression; +class ColumnDef; class ColumnDefinition; class ColumnNameExpression; class CommonTableExpression; @@ -32,13 +35,19 @@ class ErrorStatement; class ExistsExpression; class Expression; class GroupByClause; +class Heap; class InChainedExpression; +class Index; +class IndexNode; +class IndexDef; class InSelectionExpression; class Insert; class InTableExpression; class InvertibleNestedDoubleExpression; class InvertibleNestedExpression; class IsExpression; +class Key; +class KeyPartDef; class Lexer; class LimitClause; class MatchExpression; @@ -54,12 +63,15 @@ class RenameColumn; class RenameTable; class ResultColumn; class ReturningClause; +class Row; class Select; class SignedNumber; class Statement; class StringLiteral; +class TableDef; class TableOrSubquery; class Token; +class TreeNode; class Tuple; class TypeName; class UnaryOperatorExpression; diff --git a/Userland/Libraries/LibSQL/Heap.cpp b/Userland/Libraries/LibSQL/Heap.cpp new file mode 100644 index 0000000000..cab2e89611 --- /dev/null +++ b/Userland/Libraries/LibSQL/Heap.cpp @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace SQL { + +Heap::Heap(String file_name) +{ + set_name(move(file_name)); + size_t file_size = 0; + struct stat stat_buffer; + if (stat(name().characters(), &stat_buffer) != 0) { + if (errno != ENOENT) { + perror("stat"); + VERIFY_NOT_REACHED(); + } + } else { + file_size = stat_buffer.st_size; + } + if (file_size > 0) + m_next_block = m_end_of_file = file_size / BLOCKSIZE; + + auto file_or_error = Core::File::open(name(), Core::OpenMode::ReadWrite); + if (file_or_error.is_error()) { + warnln("Couldn't open '{}': {}", name(), file_or_error.error()); + VERIFY_NOT_REACHED(); + } + m_file = file_or_error.value(); + if (file_size > 0) + read_zero_block(); + else + initialize_zero_block(); +} + +Result Heap::read_block(u32 block) +{ + auto buffer_or_empty = m_write_ahead_log.get(block); + if (buffer_or_empty.has_value()) + return buffer_or_empty.value(); + + VERIFY(block < m_next_block); + dbgln_if(SQL_DEBUG, "Read heap block {}", block); + if (!seek_block(block)) + VERIFY_NOT_REACHED(); + auto ret = m_file->read(BLOCKSIZE); + if (ret.is_empty()) + return String("Could not read block"); + return ret; +} + +bool Heap::write_block(u32 block, ByteBuffer& buffer) +{ + VERIFY(block < m_next_block); + if (!seek_block(block)) + VERIFY_NOT_REACHED(); + dbgln_if(SQL_DEBUG, "Write heap block {} size {}", block, buffer.size()); + VERIFY(buffer.size() <= BLOCKSIZE); + auto sz = buffer.size(); + if (sz < BLOCKSIZE) { + buffer.resize(BLOCKSIZE); + memset(buffer.offset_pointer((int)sz), 0, BLOCKSIZE - sz); + } + if (m_file->write(buffer.data(), (int)buffer.size())) { + if (block == m_end_of_file) + m_end_of_file++; + return true; + } + return false; +} + +bool Heap::seek_block(u32 block) +{ + if (block == m_end_of_file) { + off_t pos; + if (!m_file->seek(0, Core::SeekMode::FromEndPosition, &pos)) { + warnln("Could not seek block {} from file {}, which is at the end of the file", block, name()); + warnln("FD: {} Position: {} error: {}", m_file->fd(), pos, m_file->error_string()); + return false; + } + } else if (block > m_end_of_file) { + warnln("Seeking block {} of file {} which is beyond the end of the file", block, name()); + return false; + } else { + if (!m_file->seek(block * BLOCKSIZE)) { + warnln("Could not seek block {} of file {}. The current size is {} blocks", + block, name(), m_end_of_file); + return false; + } + } + return true; +} + +u32 Heap::new_record_pointer() +{ + if (m_free_list) { + auto block_or_error = read_block(m_free_list); + if (block_or_error.is_error()) { + warnln("FREE LIST CORRUPTION"); + VERIFY_NOT_REACHED(); + } + auto new_pointer = m_free_list; + size_t offset = 0; + deserialize_from(block_or_error.value(), offset, m_free_list); + update_zero_block(); + return new_pointer; + } + return m_next_block++; +} + +void Heap::flush() +{ + Vector blocks; + for (auto& wal_entry : m_write_ahead_log) { + blocks.append(wal_entry.key); + } + quick_sort(blocks); + for (auto& block : blocks) { + auto buffer_or_empty = m_write_ahead_log.get(block); + if (buffer_or_empty->is_empty()) { + VERIFY_NOT_REACHED(); + } + dbgln_if(SQL_DEBUG, "Flushing block {} to {}", block, name()); + write_block(block, buffer_or_empty.value()); + } + m_write_ahead_log.clear(); +} + +constexpr static const char* FILE_ID = "SerenitySQL "; +constexpr static int VERSION_OFFSET = 12; +constexpr static int SCHEMAS_ROOT_OFFSET = 16; +constexpr static int TABLES_ROOT_OFFSET = 20; +constexpr static int TABLE_COLUMNS_ROOT_OFFSET = 24; +constexpr static int FREE_LIST_OFFSET = 28; +constexpr static int USER_VALUES_OFFSET = 32; + +void Heap::read_zero_block() +{ + char file_id[256]; + auto bytes_or_error = read_block(0); + if (bytes_or_error.is_error()) + VERIFY_NOT_REACHED(); + auto buffer = bytes_or_error.value(); + memcpy(file_id, buffer.offset_pointer(0), strlen(FILE_ID)); + file_id[strlen(FILE_ID)] = 0; + if (strncmp(file_id, FILE_ID, strlen(FILE_ID)) != 0) { + warnln("Corrupt zero page in {}", name()); + VERIFY_NOT_REACHED(); + } + dbgln_if(SQL_DEBUG, "Read zero block from {}", name()); + memcpy(&m_version, buffer.offset_pointer(VERSION_OFFSET), sizeof(u32)); + dbgln_if(SQL_DEBUG, "Version: {}.{}", (m_version & 0xFFFF0000) >> 16, (m_version & 0x0000FFFF)); + memcpy(&m_schemas_root, buffer.offset_pointer(SCHEMAS_ROOT_OFFSET), sizeof(u32)); + dbgln_if(SQL_DEBUG, "Schemas root node: {}", m_tables_root); + memcpy(&m_tables_root, buffer.offset_pointer(TABLES_ROOT_OFFSET), sizeof(u32)); + dbgln_if(SQL_DEBUG, "Tables root node: {}", m_tables_root); + memcpy(&m_table_columns_root, buffer.offset_pointer(TABLE_COLUMNS_ROOT_OFFSET), sizeof(u32)); + dbgln_if(SQL_DEBUG, "Table columns root node: {}", m_table_columns_root); + memcpy(&m_free_list, buffer.offset_pointer(FREE_LIST_OFFSET), sizeof(u32)); + dbgln_if(SQL_DEBUG, "Free list: {}", m_free_list); + memcpy(m_user_values.data(), buffer.offset_pointer(USER_VALUES_OFFSET), m_user_values.size() * sizeof(u32)); + for (auto ix = 0u; ix < m_user_values.size(); ix++) { + if (m_user_values[ix]) { + dbgln_if(SQL_DEBUG, "User value {}: {}", ix, m_user_values[ix]); + } + } +} + +void Heap::update_zero_block() +{ + dbgln_if(SQL_DEBUG, "Write zero block to {}", name()); + dbgln_if(SQL_DEBUG, "Version: {}.{}", (m_version & 0xFFFF0000) >> 16, (m_version & 0x0000FFFF)); + dbgln_if(SQL_DEBUG, "Schemas root node: {}", m_schemas_root); + dbgln_if(SQL_DEBUG, "Tables root node: {}", m_tables_root); + dbgln_if(SQL_DEBUG, "Table Columns root node: {}", m_table_columns_root); + dbgln_if(SQL_DEBUG, "Free list: {}", m_free_list); + for (auto ix = 0u; ix < m_user_values.size(); ix++) { + if (m_user_values[ix]) { + dbgln_if(SQL_DEBUG, "User value {}: {}", ix, m_user_values[ix]); + } + } + + auto buffer = ByteBuffer::create_zeroed(BLOCKSIZE); + buffer.overwrite(0, FILE_ID, strlen(FILE_ID)); + buffer.overwrite(VERSION_OFFSET, &m_version, sizeof(u32)); + buffer.overwrite(SCHEMAS_ROOT_OFFSET, &m_schemas_root, sizeof(u32)); + buffer.overwrite(TABLES_ROOT_OFFSET, &m_tables_root, sizeof(u32)); + buffer.overwrite(TABLE_COLUMNS_ROOT_OFFSET, &m_table_columns_root, sizeof(u32)); + buffer.overwrite(FREE_LIST_OFFSET, &m_free_list, sizeof(u32)); + buffer.overwrite(USER_VALUES_OFFSET, m_user_values.data(), m_user_values.size() * sizeof(u32)); + + add_to_wal(0, buffer); +} + +void Heap::initialize_zero_block() +{ + m_version = 0x00000001; + m_schemas_root = 0; + m_tables_root = 0; + m_table_columns_root = 0; + m_next_block = 1; + m_free_list = 0; + for (auto& user : m_user_values) { + user = 0u; + } + update_zero_block(); +} + +} diff --git a/Userland/Libraries/LibSQL/Heap.h b/Userland/Libraries/LibSQL/Heap.h new file mode 100644 index 0000000000..9375106f8f --- /dev/null +++ b/Userland/Libraries/LibSQL/Heap.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace SQL { + +constexpr static u32 BLOCKSIZE = 1024; + +/** + * A Heap is a logical container for database (SQL) data. Conceptually a + * Heap can be a database file, or a memory block, or another storage medium. + * It contains datastructures, like B-Trees, hash_index tables, or tuple stores + * (basically a list of data tuples). + * + * A Heap can be thought of the backing storage of a single database. It's + * assumed that a single SQL database is backed by a single Heap. + * + * Currently only B-Trees and tuple stores are implemented. + */ +class Heap : public Core::Object { + C_OBJECT(Heap); + +public: + explicit Heap(String); + virtual ~Heap() override { flush(); } + + u32 size() const { return m_end_of_file; } + Result read_block(u32); + bool write_block(u32, ByteBuffer&); + u32 new_record_pointer(); + [[nodiscard]] bool has_block(u32 block) const { return block < size(); } + + u32 schemas_root() const { return m_schemas_root; } + + void set_schemas_root(u32 root) + { + m_schemas_root = root; + update_zero_block(); + } + + u32 tables_root() const { return m_tables_root; } + + void set_tables_root(u32 root) + { + m_tables_root = root; + update_zero_block(); + } + + u32 table_columns_root() const { return m_table_columns_root; } + + void set_table_columns_root(u32 root) + { + m_table_columns_root = root; + update_zero_block(); + } + u32 version() const { return m_version; } + + u32 user_value(size_t index) const + { + VERIFY(index < m_user_values.size()); + return m_user_values[index]; + } + + void set_user_value(size_t index, u32 value) + { + VERIFY(index < m_user_values.size()); + m_user_values[index] = value; + update_zero_block(); + } + + void add_to_wal(u32 block, ByteBuffer& buffer) { m_write_ahead_log.set(block, buffer); } + void flush(); + +private: + bool seek_block(u32); + void read_zero_block(); + void initialize_zero_block(); + void update_zero_block(); + + RefPtr m_file; + u32 m_free_list { 0 }; + u32 m_next_block { 1 }; + u32 m_end_of_file { 1 }; + u32 m_schemas_root { 0 }; + u32 m_tables_root { 0 }; + u32 m_table_columns_root { 0 }; + u32 m_version { 0x00000001 }; + Array m_user_values; + HashMap m_write_ahead_log; +}; + +} diff --git a/Userland/Libraries/LibSQL/Index.cpp b/Userland/Libraries/LibSQL/Index.cpp new file mode 100644 index 0000000000..1ccdc7f7bc --- /dev/null +++ b/Userland/Libraries/LibSQL/Index.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +namespace SQL { + +Index::Index(Heap& heap, TupleDescriptor const& descriptor, bool unique, u32 pointer) + : m_heap(heap) + , m_descriptor(descriptor) + , m_unique(unique) + , m_pointer(pointer) +{ +} + +Index::Index(Heap& heap, TupleDescriptor const& descriptor, u32 pointer) + : m_heap(heap) + , m_descriptor(descriptor) + , m_pointer(pointer) +{ +} + +ByteBuffer Index::read_block(u32 block) +{ + auto ret = m_heap.read_block(block); + if (ret.is_error()) { + warnln("Error reading block {}: {}", block, ret.error()); + VERIFY_NOT_REACHED(); + } + return ret.value(); +} + +void Index::add_to_write_ahead_log(IndexNode* node) +{ + VERIFY(node->pointer()); + ByteBuffer buffer; + node->serialize(buffer); + m_heap.add_to_wal(node->pointer(), buffer); +} + +} diff --git a/Userland/Libraries/LibSQL/Index.h b/Userland/Libraries/LibSQL/Index.h new file mode 100644 index 0000000000..78cc805fe9 --- /dev/null +++ b/Userland/Libraries/LibSQL/Index.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace SQL { + +class IndexNode { +public: + virtual ~IndexNode() = default; + [[nodiscard]] u32 pointer() const { return m_pointer; } + virtual void serialize(ByteBuffer&) const = 0; + virtual IndexNode* as_index_node() = 0; + +protected: + explicit IndexNode(u32 pointer) + : m_pointer(pointer) + { + } + + void set_pointer(u32 pointer) { m_pointer = pointer; } + +private: + u32 m_pointer; +}; + +class Index : public Core::Object { + C_OBJECT_ABSTRACT(Index); + +public: + ~Index() override = default; + + TupleDescriptor descriptor() const { return m_descriptor; } + [[nodiscard]] bool duplicates_allowed() const { return !m_unique; } + [[nodiscard]] bool unique() const { return m_unique; } + [[nodiscard]] u32 pointer() const { return m_pointer; } + +protected: + Index(Heap& heap, TupleDescriptor const&, bool unique, u32 pointer); + Index(Heap& heap, TupleDescriptor const&, u32 pointer); + + [[nodiscard]] Heap const& heap() const { return m_heap; } + [[nodiscard]] Heap& heap() { return m_heap; } + void set_pointer(u32 pointer) { m_pointer = pointer; } + u32 new_record_pointer() { return m_heap.new_record_pointer(); } + ByteBuffer read_block(u32); + void add_to_write_ahead_log(IndexNode*); + +private: + Heap& m_heap; + TupleDescriptor m_descriptor; + bool m_unique { false }; + u32 m_pointer { 0 }; +}; + +} diff --git a/Userland/Libraries/LibSQL/Key.cpp b/Userland/Libraries/LibSQL/Key.cpp new file mode 100644 index 0000000000..aba561acc0 --- /dev/null +++ b/Userland/Libraries/LibSQL/Key.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace SQL { + +Key::Key() + : Tuple() +{ +} + +Key::Key(TupleDescriptor const& descriptor) + : Tuple(descriptor) +{ +} + +Key::Key(RefPtr index) + : Tuple(index->to_tuple_descriptor()) + , m_index(index) +{ +} + +Key::Key(TupleDescriptor const& descriptor, ByteBuffer& buffer, size_t& offset) + : Tuple(descriptor, buffer, offset) +{ +} + +Key::Key(RefPtr index, ByteBuffer& buffer, size_t& offset) + : Key(index->to_tuple_descriptor()) +{ + deserialize(buffer, offset); +} + +} diff --git a/Userland/Libraries/LibSQL/Key.h b/Userland/Libraries/LibSQL/Key.h new file mode 100644 index 0000000000..1200497cb4 --- /dev/null +++ b/Userland/Libraries/LibSQL/Key.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace SQL { + +class Key : public Tuple { +public: + Key(); + explicit Key(TupleDescriptor const&); + explicit Key(RefPtr); + Key(TupleDescriptor const&, ByteBuffer&, size_t& offset); + Key(RefPtr, ByteBuffer&, size_t& offset); + Key(Key const&) = default; + RefPtr index() const { return m_index; } + [[nodiscard]] virtual size_t data_length() const override { return Tuple::data_length() + sizeof(u32); } + +private: + RefPtr m_index; +}; + +} diff --git a/Userland/Libraries/LibSQL/Meta.cpp b/Userland/Libraries/LibSQL/Meta.cpp new file mode 100644 index 0000000000..43a240f9af --- /dev/null +++ b/Userland/Libraries/LibSQL/Meta.cpp @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +namespace SQL { + +SchemaDef::SchemaDef(String name) + : Relation(move(name)) +{ +} + +SchemaDef::SchemaDef(Key const& key) + : Relation(key["schema_name"].to_string().value()) +{ +} + +Key SchemaDef::key() const +{ + auto key = Key(index_def()->to_tuple_descriptor()); + key["schema_name"] = name(); + key.set_pointer(pointer()); + return key; +} + +Key SchemaDef::make_key() +{ + return Key(index_def()); +} + +NonnullRefPtr SchemaDef::index_def() +{ + NonnullRefPtr s_index_def = IndexDef::construct("$schema", true, 0); + if (!s_index_def->size()) { + s_index_def->append_column("schema_name", SQLType::Text, Order::Ascending); + } + return s_index_def; +} + +ColumnDef::ColumnDef(Relation* parent, size_t column_number, String name, SQLType sql_type) + : Relation(move(name), parent) + , m_index(column_number) + , m_type(sql_type) +{ +} + +Key ColumnDef::key() const +{ + auto key = Key(index_def()); + key["table_hash"] = parent_relation()->hash(); + key["column_number"] = (int)column_number(); + key["column_name"] = name(); + key["column_type"] = (int)type(); + return key; +} + +Key ColumnDef::make_key(TableDef const& table_def) +{ + Key key(index_def()); + key["table_hash"] = table_def.key().hash(); + return key; +} + +NonnullRefPtr ColumnDef::index_def() +{ + NonnullRefPtr s_index_def = IndexDef::construct("$column", true, 0); + if (!s_index_def->size()) { + s_index_def->append_column("table_hash", SQLType::Integer, Order::Ascending); + s_index_def->append_column("column_number", SQLType::Integer, Order::Ascending); + s_index_def->append_column("column_name", SQLType::Text, Order::Ascending); + s_index_def->append_column("column_type", SQLType::Integer, Order::Ascending); + } + return s_index_def; +} + +KeyPartDef::KeyPartDef(IndexDef* index, String name, SQLType sql_type, Order sort_order) + : ColumnDef(index, index->size(), move(name), sql_type) + , m_sort_order(sort_order) +{ +} + +IndexDef::IndexDef(TableDef* table, String name, bool unique, u32 pointer) + : Relation(move(name), pointer, table) + , m_key_definition() + , m_unique(unique) +{ +} + +IndexDef::IndexDef(String name, bool unique, u32 pointer) + : IndexDef(nullptr, move(name), unique, pointer) +{ +} + +void IndexDef::append_column(String name, SQLType sql_type, Order sort_order) +{ + auto part = KeyPartDef::construct(this, move(name), sql_type, sort_order); + m_key_definition.append(part); +} + +TupleDescriptor IndexDef::to_tuple_descriptor() const +{ + TupleDescriptor ret; + for (auto& part : m_key_definition) { + ret.append({ part.name(), part.type(), part.sort_order() }); + } + return ret; +} + +Key IndexDef::key() const +{ + auto key = Key(index_def()->to_tuple_descriptor()); + key["table_hash"] = parent_relation()->key().hash(); + key["index_name"] = name(); + key["unique"] = unique() ? 1 : 0; + return key; +} + +Key IndexDef::make_key(TableDef const& table_def) +{ + Key key(index_def()); + key["table_hash"] = table_def.key().hash(); + return key; +} + +NonnullRefPtr IndexDef::index_def() +{ + NonnullRefPtr s_index_def = IndexDef::construct("$index", true, 0); + if (!s_index_def->size()) { + s_index_def->append_column("table_hash", SQLType::Integer, Order::Ascending); + s_index_def->append_column("index_name", SQLType::Text, Order::Ascending); + s_index_def->append_column("unique", SQLType::Integer, Order::Ascending); + } + return s_index_def; +} + +TableDef::TableDef(SchemaDef* schema, String name) + : Relation(move(name), schema) + , m_columns() + , m_indexes() +{ +} + +TupleDescriptor TableDef::to_tuple_descriptor() const +{ + TupleDescriptor ret; + for (auto& part : m_columns) { + ret.append({ part.name(), part.type(), Order::Ascending }); + } + return ret; +} + +Key TableDef::key() const +{ + auto key = Key(index_def()->to_tuple_descriptor()); + key["schema_hash"] = parent_relation()->key().hash(); + key["table_name"] = name(); + key.set_pointer(pointer()); + return key; +} + +void TableDef::append_column(String name, SQLType sql_type) +{ + auto column = ColumnDef::construct(this, num_columns(), move(name), sql_type); + m_columns.append(column); +} + +void TableDef::append_column(Key const& column) +{ + append_column( + (String)column["column_name"], + (SQLType)((int)column["column_type"])); +} + +Key TableDef::make_key(SchemaDef const& schema_def) +{ + return TableDef::make_key(schema_def.key()); +} + +Key TableDef::make_key(Key const& schema_key) +{ + Key key(index_def()); + key["schema_hash"] = schema_key.hash(); + return key; +} + +NonnullRefPtr TableDef::index_def() +{ + NonnullRefPtr s_index_def = IndexDef::construct("$table", true, 0); + if (!s_index_def->size()) { + s_index_def->append_column("schema_hash", SQLType::Integer, Order::Ascending); + s_index_def->append_column("table_name", SQLType::Text, Order::Ascending); + } + return s_index_def; +} + +} diff --git a/Userland/Libraries/LibSQL/Meta.h b/Userland/Libraries/LibSQL/Meta.h new file mode 100644 index 0000000000..06a01b18db --- /dev/null +++ b/Userland/Libraries/LibSQL/Meta.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace SQL { + +/** + * This file declares objects describing tables, indexes, and columns. + * It remains to be seen if this will survive in it's current form. + */ + +class Relation : public Core::Object { + C_OBJECT_ABSTRACT(Relation); + +public: + u32 hash() const { return key().hash(); } + u32 pointer() const { return m_pointer; } + void set_pointer(u32 pointer) { m_pointer = pointer; } + ~Relation() override = default; + virtual Key key() const = 0; + Relation const* parent_relation() const { return dynamic_cast(parent()); } + +protected: + Relation(String name, u32 pointer, Relation* parent = nullptr) + : Core::Object(parent) + , m_pointer(pointer) + { + set_name(move(name)); + } + + explicit Relation(String name, Relation* parent = nullptr) + : Core::Object(parent) + , m_pointer(0) + { + set_name(move(name)); + } + +private: + u32 m_pointer { 0 }; +}; + +class SchemaDef : public Relation { + C_OBJECT(SchemaDef); + +public: + Key key() const override; + static NonnullRefPtr index_def(); + static Key make_key(); + +private: + explicit SchemaDef(String); + explicit SchemaDef(Key const&); +}; + +class ColumnDef : public Relation { + C_OBJECT(ColumnDef); + +public: + Key key() const override; + SQLType type() const { return m_type; } + size_t column_number() const { return m_index; } + static NonnullRefPtr index_def(); + static Key make_key(TableDef const&); + +protected: + ColumnDef(Relation*, size_t, String, SQLType); + +private: + size_t m_index; + SQLType m_type { SQLType::Text }; +}; + +class KeyPartDef : public ColumnDef { + C_OBJECT(KeyPartDef); + +public: + KeyPartDef(IndexDef*, String, SQLType, Order = Order::Ascending); + Order sort_order() const { return m_sort_order; } + +private: + Order m_sort_order { Order::Ascending }; +}; + +class IndexDef : public Relation { + C_OBJECT(IndexDef); + +public: + ~IndexDef() override = default; + + NonnullRefPtrVector key_definition() const { return m_key_definition; } + bool unique() const { return m_unique; } + [[nodiscard]] size_t size() const { return m_key_definition.size(); } + void append_column(String, SQLType, Order = Order::Ascending); + Key key() const override; + [[nodiscard]] TupleDescriptor to_tuple_descriptor() const; + static NonnullRefPtr index_def(); + static Key make_key(TableDef const& table_def); + +private: + IndexDef(TableDef*, String, bool unique = true, u32 pointer = 0); + explicit IndexDef(String, bool unique = true, u32 pointer = 0); + + NonnullRefPtrVector m_key_definition; + bool m_unique { false }; + + friend TableDef; +}; + +class TableDef : public Relation { + C_OBJECT(TableDef); + +public: + Key key() const override; + void append_column(String, SQLType); + void append_column(Key const&); + size_t num_columns() { return m_columns.size(); } + size_t num_indexes() { return m_indexes.size(); } + NonnullRefPtrVector columns() const { return m_columns; } + NonnullRefPtrVector indexes() const { return m_indexes; } + [[nodiscard]] TupleDescriptor to_tuple_descriptor() const; + + static NonnullRefPtr index_def(); + static Key make_key(SchemaDef const& schema_def); + static Key make_key(Key const& schema_key); + +private: + explicit TableDef(SchemaDef*, String); + + NonnullRefPtrVector m_columns; + NonnullRefPtrVector m_indexes; +}; + +} diff --git a/Userland/Libraries/LibSQL/TreeNode.cpp b/Userland/Libraries/LibSQL/TreeNode.cpp new file mode 100644 index 0000000000..f6814a5643 --- /dev/null +++ b/Userland/Libraries/LibSQL/TreeNode.cpp @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2021, Jan de Visser + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include + +namespace SQL { + +DownPointer::DownPointer(TreeNode* owner, u32 pointer) + : m_owner(owner) + , m_pointer(pointer) + , m_node(nullptr) +{ +} + +DownPointer::DownPointer(TreeNode* owner, TreeNode* node) + : m_owner(owner) + , m_pointer((node) ? node->pointer() : 0) + , m_node(adopt_own_if_nonnull(node)) +{ +} + +DownPointer::DownPointer(TreeNode* owner, DownPointer& down) + : m_owner(owner) + , m_pointer(down.m_pointer) + , m_node(move(down.m_node)) +{ +} + +DownPointer::DownPointer(DownPointer const& other) + : m_owner(other.m_owner) + , m_pointer(other.pointer()) +{ + if (other.m_node) + // FIXME This is gross. We modify the other object which we promised + // to be const. However, this particular constructor is needed + // when we take DownPointers from the Vector they live in when + // we split a node. The original object is going to go away, so + // there is no harm done. However, it's yucky. If anybody has + // a better idea... + m_node = move(const_cast(other).m_node); + else + m_node = nullptr; +} + +TreeNode* DownPointer::node() +{ + if (!m_node) + inflate(); + return m_node; +} + +void DownPointer::inflate() +{ + if (m_node || !m_pointer) + return; + auto buffer = m_owner->tree().read_block(m_pointer); + size_t offset = 0; + m_node = make(m_owner->tree(), m_owner, m_pointer, buffer, offset); +} + +TreeNode::TreeNode(BTree& tree, TreeNode* up, u32 pointer) + : IndexNode(pointer) + , m_tree(tree) + , m_up(up) + , m_entries() + , m_down() +{ + m_down.append(DownPointer(this, nullptr)); + m_is_leaf = true; +} + +TreeNode::TreeNode(BTree& tree, TreeNode* up, DownPointer& left, u32 pointer) + : IndexNode(pointer) + , m_tree(tree) + , m_up(up) + , m_entries() + , m_down() +{ + if (left.m_node != nullptr) + left.m_node->m_up = this; + m_down.append(DownPointer(this, left)); + m_is_leaf = left.pointer() == 0; + if (!pointer) + set_pointer(m_tree.new_record_pointer()); +} + +TreeNode::TreeNode(BTree& tree, TreeNode* up, TreeNode* left, u32 pointer) + : IndexNode(pointer) + , m_tree(tree) + , m_up(up) + , m_entries() + , m_down() +{ + m_down.append(DownPointer(this, left)); + m_is_leaf = left->pointer() == 0; +} + +TreeNode::TreeNode(BTree& tree, TreeNode* up, u32 pointer, ByteBuffer& buffer, size_t& at_offset) + : IndexNode(pointer) + , m_tree(tree) + , m_up(up) + , m_entries() + , m_down() +{ + u32 nodes; + deserialize_from(buffer, at_offset, nodes); + dbgln_if(SQL_DEBUG, "Deserializing node. Size {}", nodes); + if (nodes > 0) { + for (u32 i = 0; i < nodes; i++) { + u32 left; + deserialize_from(buffer, at_offset, left); + dbgln_if(SQL_DEBUG, "Down[{}] {}", i, left); + if (!m_down.is_empty()) + VERIFY((left == 0) == m_is_leaf); + else + m_is_leaf = (left == 0); + m_entries.append(Key(m_tree.descriptor(), buffer, at_offset)); + m_down.empend(this, left); + } + u32 right; + deserialize_from(buffer, at_offset, right); + dbgln_if(SQL_DEBUG, "Right {}", right); + VERIFY((right == 0) == m_is_leaf); + m_down.empend(this, right); + } +} + +bool TreeNode::insert(Key const& key) +{ + dbgln_if(SQL_DEBUG, "[#{}] INSERT({})", pointer(), key.to_string()); + if (!is_leaf()) + return node_for(key)->insert_in_leaf(key); + return insert_in_leaf(key); +} + +bool TreeNode::update_key_pointer(Key const& key) +{ + dbgln_if(SQL_DEBUG, "[#{}] UPDATE({}, {})", pointer(), key.to_string(), key.pointer()); + if (!is_leaf()) + return node_for(key)->update_key_pointer(key); + + for (auto ix = 0u; ix < size(); ix++) { + if (key == m_entries[ix]) { + dbgln_if(SQL_DEBUG, "[#{}] {} == {}", + pointer(), key.to_string(), m_entries[ix].to_string()); + if (m_entries[ix].pointer() != key.pointer()) { + m_entries[ix].set_pointer(key.pointer()); + dump_if(SQL_DEBUG, "To WAL"); + tree().add_to_write_ahead_log(this); + } + return true; + } + } + return false; +} + +bool TreeNode::insert_in_leaf(Key const& key) +{ + VERIFY(is_leaf()); + if (!m_tree.duplicates_allowed()) { + for (auto& entry : m_entries) { + if (key == entry) { + dbgln_if(SQL_DEBUG, "[#{}] duplicate key {}", pointer(), key.to_string()); + return false; + } + } + } + + dbgln_if(SQL_DEBUG, "[#{}] insert_in_leaf({})", pointer(), key.to_string()); + just_insert(key, nullptr); + return true; +} + +size_t TreeNode::max_keys_in_node() +{ + auto descriptor = m_tree.descriptor(); + auto key_size = descriptor.data_length() + sizeof(u32); + auto ret = (BLOCKSIZE - 2 * sizeof(u32)) / key_size; + if ((ret % 2) == 0) + --ret; + return ret; +} + +Key const& TreeNode::operator[](size_t ix) const +{ + VERIFY(ix < size()); + return m_entries[ix]; +} + +u32 TreeNode::down_pointer(size_t ix) const +{ + VERIFY(ix < m_down.size()); + return m_down[ix].pointer(); +} + +TreeNode* TreeNode::down_node(size_t ix) +{ + VERIFY(ix < m_down.size()); + return m_down[ix].node(); +} + +TreeNode* TreeNode::node_for(Key const& key) +{ + dump_if(SQL_DEBUG, String::formatted("node_for(Key {})", key.to_string())); + if (is_leaf()) + return this; + for (size_t ix = 0; ix < size(); ix++) { + if (key < m_entries[ix]) { + dbgln_if(SQL_DEBUG, "[{}] {} < {} v{}", + pointer(), (String)key, (String)m_entries[ix], m_down[ix].pointer()); + return down_node(ix)->node_for(key); + } + } + dbgln_if(SQL_DEBUG, "[#{}] {} >= {} v{}", + pointer(), key.to_string(), (String)m_entries[size() - 1], m_down[size()].pointer()); + return down_node(size())->node_for(key); +} + +Optional TreeNode::get(Key& key) +{ + dump_if(SQL_DEBUG, String::formatted("get({})", key.to_string())); + for (auto ix = 0u; ix < size(); ix++) { + if (key < m_entries[ix]) { + if (is_leaf()) { + dbgln_if(SQL_DEBUG, "[#{}] {} < {} -> 0", + pointer(), key.to_string(), (String)m_entries[ix]); + return {}; + } else { + dbgln_if(SQL_DEBUG, "[{}] {} < {} ({} -> {})", + pointer(), key.to_string(), (String)m_entries[ix], + ix, m_down[ix].pointer()); + return down_node(ix)->get(key); + } + } + if (key == m_entries[ix]) { + dbgln_if(SQL_DEBUG, "[#{}] {} == {} -> {}", + pointer(), key.to_string(), (String)m_entries[ix], + m_entries[ix].pointer()); + key.set_pointer(m_entries[ix].pointer()); + return m_entries[ix].pointer(); + } + } + if (m_entries.is_empty()) { + dbgln_if(SQL_DEBUG, "[#{}] {} Empty node??", pointer(), key.to_string()); + VERIFY_NOT_REACHED(); + } + if (is_leaf()) { + dbgln_if(SQL_DEBUG, "[#{}] {} > {} -> 0", + pointer(), key.to_string(), (String)m_entries[size() - 1]); + return {}; + } + dbgln_if(SQL_DEBUG, "[#{}] {} > {} ({} -> {})", + pointer(), key.to_string(), (String)m_entries[size() - 1], + size(), m_down[size()].pointer()); + return down_node(size())->get(key); +} + +void TreeNode::serialize(ByteBuffer& buffer) const +{ + u32 sz = size(); + serialize_to(buffer, sz); + if (sz > 0) { + for (auto ix = 0u; ix < size(); ix++) { + auto& entry = m_entries[ix]; + dbgln_if(SQL_DEBUG, "Serializing Left[{}] = {}", ix, m_down[ix].pointer()); + serialize_to(buffer, is_leaf() ? 0u : m_down[ix].pointer()); + entry.serialize(buffer); + } + dbgln_if(SQL_DEBUG, "Serializing Right = {}", m_down[size()].pointer()); + serialize_to(buffer, is_leaf() ? 0u : m_down[size()].pointer()); + } +} + +void TreeNode::just_insert(Key const& key, TreeNode* right) +{ + dbgln_if(SQL_DEBUG, "[#{}] just_insert({}, right = {})", + pointer(), (String)key, (right) ? right->pointer() : 0); + dump_if(SQL_DEBUG, "Before"); + for (auto ix = 0u; ix < size(); ix++) { + if (key < m_entries[ix]) { + m_entries.insert(ix, key); + VERIFY(is_leaf() == (right == nullptr)); + m_down.insert(ix + 1, DownPointer(this, right)); + if (size() > max_keys_in_node()) { + split(); + } else { + dump_if(SQL_DEBUG, "To WAL"); + tree().add_to_write_ahead_log(this); + } + return; + } + } + m_entries.append(key); + m_down.empend(this, right); + + if (size() > max_keys_in_node()) { + split(); + } else { + dump_if(SQL_DEBUG, "To WAL"); + tree().add_to_write_ahead_log(this); + } +} + +void TreeNode::split() +{ + dump_if(SQL_DEBUG, "Splitting node"); + if (!m_up) + // Make new m_up. This is the new root node. + m_up = m_tree.new_root(); + + // Take the left pointer for the new node: + DownPointer left = m_down.take(max_keys_in_node() / 2 + 1); + + // Create the new right node: + auto* new_node = new TreeNode(tree(), m_up, left); + + // Move the rightmost keys from this node to the new right node: + while (m_entries.size() > max_keys_in_node() / 2 + 1) { + auto entry = m_entries.take(max_keys_in_node() / 2 + 1); + auto down = m_down.take(max_keys_in_node() / 2 + 1); + + // Reparent to new right node: + if (down.m_node != nullptr) { + down.m_node->m_up = new_node; + } + new_node->m_entries.append(entry); + new_node->m_down.append(down); + } + + // Move the median key in the node one level up. Its right node will + // be the new node: + auto median = m_entries.take_last(); + + dump_if(SQL_DEBUG, "Split Left To WAL"); + tree().add_to_write_ahead_log(this); + new_node->dump_if(SQL_DEBUG, "Split Right to WAL"); + tree().add_to_write_ahead_log(new_node); + + m_up->just_insert(median, new_node); +} + +void TreeNode::dump_if(int flag, String&& msg) +{ + if (!flag) + return; + StringBuilder builder; + builder.appendff("[#{}] ", pointer()); + if (!msg.is_empty()) + builder.appendff("{}", msg); + builder.append(": "); + if (m_up) + builder.appendff("[^{}] -> ", m_up->pointer()); + else + builder.append("* -> "); + for (size_t ix = 0; ix < m_entries.size(); ix++) { + if (!is_leaf()) + builder.appendff("[v{}] ", m_down[ix].pointer()); + else + VERIFY(m_down[ix].pointer() == 0); + builder.appendff("'{}' ", (String)m_entries[ix]); + } + if (!is_leaf()) { + builder.appendff("[v{}]", m_down[size()].pointer()); + } else { + VERIFY(m_down[size()].pointer() == 0); + } + builder.appendff(" (size {}", (int)size()); + if (is_leaf()) { + builder.append(", leaf"); + } + builder.append(")"); + dbgln(builder.build()); +} + +void TreeNode::list_node(int indent) +{ + auto do_indent = [&]() { + for (int i = 0; i < indent; ++i) { + warn(" "); + } + }; + do_indent(); + warnln("--> #{}", pointer()); + for (auto ix = 0u; ix < size(); ix++) { + if (!is_leaf()) { + down_node(ix)->list_node(indent + 2); + } + do_indent(); + warnln("{}", m_entries[ix].to_string()); + } + if (!is_leaf()) { + down_node(size())->list_node(indent + 2); + } +} + +}