From 02a95a196f02b838c1538b527bcc7b98a6afe2eb Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 2 Apr 2022 20:01:29 +0200 Subject: [PATCH] Kernel: Use AddressSpace region tree for range allocation This patch stops using VirtualRangeAllocator in AddressSpace and instead looks for holes in the region tree when allocating VM space. There are many benefits: - VirtualRangeAllocator is non-intrusive and would call kmalloc/kfree when used. This new solution is allocation-free. This was a source of unpleasant MM/kmalloc deadlocks. - We consolidate authority on what the address space looks like in a single place. Previously, we had both the range allocator *and* the region tree both being used to determine if an address was valid. Now there is only the region tree. - Deallocation of VM when splitting regions is no longer complicated, as we don't need to keep two separate trees in sync. --- Kernel/Memory/AddressSpace.cpp | 140 +++++++++++++++++++++++--- Kernel/Memory/AddressSpace.h | 7 +- Kernel/Memory/PageDirectory.cpp | 13 +-- Kernel/Memory/PageDirectory.h | 2 +- Kernel/Memory/Region.cpp | 6 +- Kernel/Memory/VirtualRange.cpp | 12 +++ Kernel/Memory/VirtualRange.h | 2 + Kernel/Memory/VirtualRangeAllocator.h | 2 + Kernel/Syscalls/clock.cpp | 2 +- Kernel/Syscalls/mmap.cpp | 2 +- 10 files changed, 158 insertions(+), 30 deletions(-) diff --git a/Kernel/Memory/AddressSpace.cpp b/Kernel/Memory/AddressSpace.cpp index cccc3163c4..441defa9a4 100644 --- a/Kernel/Memory/AddressSpace.cpp +++ b/Kernel/Memory/AddressSpace.cpp @@ -1,10 +1,11 @@ /* - * Copyright (c) 2021, Andreas Kling + * Copyright (c) 2021-2022, Andreas Kling * Copyright (c) 2021, Leon Albrecht * * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include #include @@ -13,20 +14,33 @@ #include #include #include +#include #include namespace Kernel::Memory { ErrorOr> AddressSpace::try_create(AddressSpace const* parent) { - auto page_directory = TRY(PageDirectory::try_create_for_userspace(parent ? &parent->page_directory().range_allocator() : nullptr)); - auto space = TRY(adopt_nonnull_own_or_enomem(new (nothrow) AddressSpace(page_directory))); + auto page_directory = TRY(PageDirectory::try_create_for_userspace()); + + VirtualRange total_range = [&]() -> VirtualRange { + if (parent) + return parent->m_total_range; + constexpr FlatPtr userspace_range_base = USER_RANGE_BASE; + FlatPtr const userspace_range_ceiling = USER_RANGE_CEILING; + size_t random_offset = (get_fast_random() % 32 * MiB) & PAGE_MASK; + FlatPtr base = userspace_range_base + random_offset; + return VirtualRange(VirtualAddress { base }, userspace_range_ceiling - base); + }(); + + auto space = TRY(adopt_nonnull_own_or_enomem(new (nothrow) AddressSpace(move(page_directory), total_range))); space->page_directory().set_space({}, *space); return space; } -AddressSpace::AddressSpace(NonnullRefPtr page_directory) +AddressSpace::AddressSpace(NonnullRefPtr page_directory, VirtualRange total_range) : m_page_directory(move(page_directory)) + , m_total_range(total_range) { } @@ -78,9 +92,6 @@ ErrorOr AddressSpace::unmap_mmap_range(VirtualAddress addr, size_t size) auto new_regions = TRY(try_split_region_around_range(*region, range_to_unmap)); - // Instead we give back the unwanted VM manually. - page_directory().range_allocator().deallocate(range_to_unmap); - // And finally we map the new region(s) using our page directory (they were just allocated and don't have one). for (auto* new_region : new_regions) { // TODO: Ideally we should do this in a way that can be rolled back on failure, as failing here @@ -126,9 +137,6 @@ ErrorOr AddressSpace::unmap_mmap_range(VirtualAddress addr, size_t size) TRY(new_regions.try_extend(split_regions)); } - // Give back any unwanted VM to the range allocator. - page_directory().range_allocator().deallocate(range_to_unmap); - // And finally map the new region(s) into our page directory. for (auto* new_region : new_regions) { // TODO: Ideally we should do this in a way that can be rolled back on failure, as failing here @@ -141,13 +149,121 @@ ErrorOr AddressSpace::unmap_mmap_range(VirtualAddress addr, size_t size) return {}; } +ErrorOr AddressSpace::try_allocate_anywhere(size_t size, size_t alignment) +{ + if (!size) + return EINVAL; + + VERIFY((size % PAGE_SIZE) == 0); + VERIFY((alignment % PAGE_SIZE) == 0); + + if (Checked::addition_would_overflow(size, alignment)) + return EOVERFLOW; + + VirtualAddress window_start = m_total_range.base(); + + for (auto it = m_regions.begin(); !it.is_end(); ++it) { + auto& region = *it; + + if (window_start == region.vaddr()) { + window_start = region.range().end(); + continue; + } + + VirtualRange available_range { window_start, region.vaddr().get() - window_start.get() }; + + window_start = region.range().end(); + + // FIXME: This check is probably excluding some valid candidates when using a large alignment. + if (available_range.size() < (size + alignment)) + continue; + + FlatPtr initial_base = available_range.base().get(); + FlatPtr aligned_base = round_up_to_power_of_two(initial_base, alignment); + + return VirtualRange { VirtualAddress(aligned_base), size }; + } + + VirtualRange available_range { window_start, m_total_range.end().get() - window_start.get() }; + if (m_total_range.contains(available_range)) + return available_range; + + dmesgln("VirtualRangeAllocator: Failed to allocate anywhere: size={}, alignment={}", size, alignment); + return ENOMEM; +} + +ErrorOr AddressSpace::try_allocate_specific(VirtualAddress base, size_t size) +{ + if (!size) + return EINVAL; + + VERIFY(base.is_page_aligned()); + VERIFY((size % PAGE_SIZE) == 0); + + VirtualRange const range { base, size }; + if (!m_total_range.contains(range)) + return ENOMEM; + + auto* region = m_regions.find_largest_not_above(base.get()); + if (!region) { + // The range can be accommodated below the current lowest range. + return range; + } + + if (region->range().intersects(range)) { + // Requested range overlaps an existing range. + return ENOMEM; + } + + auto it = m_regions.begin_from(region->vaddr().get()); + VERIFY(!it.is_end()); + ++it; + + if (it.is_end()) { + // The range can be accommodated above the nearest range. + return range; + } + + if (it->range().intersects(range)) { + // Requested range overlaps the next neighbor. + return ENOMEM; + } + + // Requested range fits between first region and its next neighbor. + return range; +} + +ErrorOr AddressSpace::try_allocate_randomized(size_t size, size_t alignment) +{ + if (!size) + return EINVAL; + + VERIFY((size % PAGE_SIZE) == 0); + VERIFY((alignment % PAGE_SIZE) == 0); + + // FIXME: I'm sure there's a smarter way to do this. + constexpr size_t maximum_randomization_attempts = 1000; + for (size_t i = 0; i < maximum_randomization_attempts; ++i) { + VirtualAddress random_address { round_up_to_power_of_two(get_fast_random() % m_total_range.end().get(), alignment) }; + + if (!m_total_range.contains(random_address, size)) + continue; + + auto range_or_error = try_allocate_specific(random_address, size); + if (!range_or_error.is_error()) + return range_or_error.release_value(); + } + + return try_allocate_anywhere(size, alignment); +} + ErrorOr AddressSpace::try_allocate_range(VirtualAddress vaddr, size_t size, size_t alignment) { vaddr.mask(PAGE_MASK); size = TRY(page_round_up(size)); if (vaddr.is_null()) - return page_directory().range_allocator().try_allocate_anywhere(size, alignment); - return page_directory().range_allocator().try_allocate_specific(vaddr, size); + return try_allocate_anywhere(size, alignment); + return try_allocate_specific(vaddr, size); } ErrorOr AddressSpace::try_allocate_split_region(Region const& source_region, VirtualRange const& range, size_t offset_in_vmobject) diff --git a/Kernel/Memory/AddressSpace.h b/Kernel/Memory/AddressSpace.h index a4c04a3d17..f89c2b3ece 100644 --- a/Kernel/Memory/AddressSpace.h +++ b/Kernel/Memory/AddressSpace.h @@ -66,8 +66,12 @@ public: size_t amount_purgeable_volatile() const; size_t amount_purgeable_nonvolatile() const; + ErrorOr try_allocate_anywhere(size_t size, size_t alignment); + ErrorOr try_allocate_specific(VirtualAddress base, size_t size); + ErrorOr try_allocate_randomized(size_t size, size_t alignment); + private: - explicit AddressSpace(NonnullRefPtr); + AddressSpace(NonnullRefPtr, VirtualRange total_range); void delete_all_regions_assuming_they_are_unmapped(); @@ -76,6 +80,7 @@ private: RefPtr m_page_directory; IntrusiveRedBlackTree<&Region::m_tree_node> m_regions; + VirtualRange const m_total_range; bool m_enforces_syscall_regions { false }; }; diff --git a/Kernel/Memory/PageDirectory.cpp b/Kernel/Memory/PageDirectory.cpp index d8c41cc198..a1056074aa 100644 --- a/Kernel/Memory/PageDirectory.cpp +++ b/Kernel/Memory/PageDirectory.cpp @@ -34,21 +34,10 @@ UNMAP_AFTER_INIT NonnullRefPtr PageDirectory::must_create_kernel_ return directory; } -ErrorOr> PageDirectory::try_create_for_userspace(VirtualRangeAllocator const* parent_range_allocator) +ErrorOr> PageDirectory::try_create_for_userspace() { - constexpr FlatPtr userspace_range_base = USER_RANGE_BASE; - FlatPtr const userspace_range_ceiling = USER_RANGE_CEILING; - auto directory = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) PageDirectory)); - if (parent_range_allocator) { - TRY(directory->m_range_allocator.initialize_from_parent(*parent_range_allocator)); - } else { - size_t random_offset = (get_fast_random() % 32 * MiB) & PAGE_MASK; - u32 base = userspace_range_base + random_offset; - TRY(directory->m_range_allocator.initialize_with_range(VirtualAddress(base), userspace_range_ceiling - base)); - } - // NOTE: Take the MM lock since we need it for quickmap. SpinlockLocker lock(s_mm_lock); diff --git a/Kernel/Memory/PageDirectory.h b/Kernel/Memory/PageDirectory.h index bdb1d8dfc5..5bad876153 100644 --- a/Kernel/Memory/PageDirectory.h +++ b/Kernel/Memory/PageDirectory.h @@ -21,7 +21,7 @@ class PageDirectory : public RefCounted { friend class MemoryManager; public: - static ErrorOr> try_create_for_userspace(VirtualRangeAllocator const* parent_range_allocator = nullptr); + static ErrorOr> try_create_for_userspace(); static NonnullRefPtr must_create_kernel_page_directory(); static RefPtr find_current(); diff --git a/Kernel/Memory/Region.cpp b/Kernel/Memory/Region.cpp index 3fe340d862..962e8087e0 100644 --- a/Kernel/Memory/Region.cpp +++ b/Kernel/Memory/Region.cpp @@ -58,7 +58,8 @@ Region::~Region() if (!is_readable() && !is_writable() && !is_executable()) { // If the region is "PROT_NONE", we didn't map it in the first place, // so all we need to do here is deallocate the VM. - m_page_directory->range_allocator().deallocate(range()); + if (is_kernel()) + m_page_directory->range_allocator().deallocate(range()); } else { SpinlockLocker mm_locker(s_mm_lock); unmap_with_locks_held(ShouldDeallocateVirtualRange::Yes, ShouldFlushTLB::Yes, pd_locker, mm_locker); @@ -270,7 +271,8 @@ void Region::unmap_with_locks_held(ShouldDeallocateVirtualRange deallocate_range if (should_flush_tlb == ShouldFlushTLB::Yes) MemoryManager::flush_tlb(m_page_directory, vaddr(), page_count()); if (deallocate_range == ShouldDeallocateVirtualRange::Yes) { - m_page_directory->range_allocator().deallocate(range()); + if (is_kernel()) + m_page_directory->range_allocator().deallocate(range()); } m_page_directory = nullptr; } diff --git a/Kernel/Memory/VirtualRange.cpp b/Kernel/Memory/VirtualRange.cpp index 235974ca65..607937f872 100644 --- a/Kernel/Memory/VirtualRange.cpp +++ b/Kernel/Memory/VirtualRange.cpp @@ -25,6 +25,18 @@ Vector VirtualRange::carve(VirtualRange const& taken) const parts.append({ taken.end(), end().get() - taken.end().get() }); return parts; } + +bool VirtualRange::intersects(VirtualRange const& other) const +{ + auto a = *this; + auto b = other; + + if (a.base() > b.base()) + swap(a, b); + + return a.base() < b.end() && b.base() < a.end(); +} + VirtualRange VirtualRange::intersect(VirtualRange const& other) const { if (*this == other) { diff --git a/Kernel/Memory/VirtualRange.h b/Kernel/Memory/VirtualRange.h index fb959f3e52..3d69fa6b2b 100644 --- a/Kernel/Memory/VirtualRange.h +++ b/Kernel/Memory/VirtualRange.h @@ -51,6 +51,8 @@ public: Vector carve(VirtualRange const&) const; VirtualRange intersect(VirtualRange const&) const; + bool intersects(VirtualRange const&) const; + static ErrorOr expand_to_page_boundaries(FlatPtr address, size_t size); private: diff --git a/Kernel/Memory/VirtualRangeAllocator.h b/Kernel/Memory/VirtualRangeAllocator.h index fdbb409f08..aadee6a8d0 100644 --- a/Kernel/Memory/VirtualRangeAllocator.h +++ b/Kernel/Memory/VirtualRangeAllocator.h @@ -30,6 +30,8 @@ public: bool contains(VirtualRange const& range) const { return m_total_range.contains(range); } + VirtualRange total_range() const { return m_total_range; } + private: ErrorOr carve_from_region(VirtualRange const& from, VirtualRange const&); diff --git a/Kernel/Syscalls/clock.cpp b/Kernel/Syscalls/clock.cpp index b6668b989b..a7e1ff4fd4 100644 --- a/Kernel/Syscalls/clock.cpp +++ b/Kernel/Syscalls/clock.cpp @@ -17,7 +17,7 @@ ErrorOr Process::sys$map_time_page() auto& vmobject = TimeManagement::the().time_page_vmobject(); - auto range = TRY(address_space().page_directory().range_allocator().try_allocate_randomized(PAGE_SIZE, PAGE_SIZE)); + auto range = TRY(address_space().try_allocate_randomized(PAGE_SIZE, PAGE_SIZE)); auto* region = TRY(address_space().allocate_region_with_vmobject(range, vmobject, 0, "Kernel time page"sv, PROT_READ, true)); return region->vaddr().get(); } diff --git a/Kernel/Syscalls/mmap.cpp b/Kernel/Syscalls/mmap.cpp index 3a77238396..8cdbd57466 100644 --- a/Kernel/Syscalls/mmap.cpp +++ b/Kernel/Syscalls/mmap.cpp @@ -193,7 +193,7 @@ ErrorOr Process::sys$mmap(Userspace use auto range = TRY([&]() -> ErrorOr { if (map_randomized) - return address_space().page_directory().range_allocator().try_allocate_randomized(rounded_size, alignment); + return address_space().try_allocate_randomized(rounded_size, alignment); // If MAP_FIXED is specified, existing mappings that intersect the requested range are removed. if (map_fixed)