1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 08:08:12 +00:00

Kernel: Simplify VMObject locking & page fault handlers

This patch greatly simplifies VMObject locking by doing two things:

1. Giving VMObject an IntrusiveList of all its mapping Region objects.
2. Removing VMObject::m_paging_lock in favor of VMObject::m_lock

Before (1), VMObject::for_each_region() was forced to acquire the
global MM lock (since it worked by walking MemoryManager's list of
all regions and checking for regions that pointed to itself.)

With each VMObject having its own list of Regions, VMObject's own
m_lock is all we need.

Before (2), page fault handlers used a separate mutex for preventing
overlapping work. This design required multiple temporary unlocks
and was generally extremely hard to reason about.

Instead, page fault handlers now use VMObject's own m_lock as well.
This commit is contained in:
Andreas Kling 2021-07-23 02:40:16 +02:00
parent 64babcaa83
commit 082ed6f417
10 changed files with 116 additions and 155 deletions

View file

@ -24,7 +24,6 @@ KResultOr<FlatPtr> Process::sys$purge(int mode)
NonnullRefPtrVector<AnonymousVMObject> vmobjects;
{
KResult result(KSuccess);
InterruptDisabler disabler;
MM.for_each_vmobject([&](auto& vmobject) {
if (vmobject.is_anonymous()) {
// In the event that the append fails, only attempt to continue
@ -48,7 +47,6 @@ KResultOr<FlatPtr> Process::sys$purge(int mode)
NonnullRefPtrVector<InodeVMObject> vmobjects;
{
KResult result(KSuccess);
InterruptDisabler disabler;
MM.for_each_vmobject([&](auto& vmobject) {
if (vmobject.is_inode()) {
// In the event that the append fails, only attempt to continue

View file

@ -153,27 +153,6 @@ AnonymousVMObject::~AnonymousVMObject()
}
int AnonymousVMObject::purge()
{
MutexLocker locker(m_paging_lock);
return purge_impl();
}
int AnonymousVMObject::purge_with_interrupts_disabled(Badge<MemoryManager>)
{
VERIFY_INTERRUPTS_DISABLED();
if (m_paging_lock.is_locked())
return 0;
return purge_impl();
}
void AnonymousVMObject::set_was_purged(VolatilePageRange const& range)
{
VERIFY(m_lock.is_locked());
for (auto* purgeable_ranges : m_purgeable_ranges)
purgeable_ranges->set_was_purged(range);
}
int AnonymousVMObject::purge_impl()
{
int purged_page_count = 0;
ScopedSpinLock lock(m_lock);
@ -193,7 +172,6 @@ int AnonymousVMObject::purge_impl()
purged_page_count += purged_in_range;
set_was_purged(range);
for_each_region([&](auto& region) {
if (&region.vmobject() == this) {
if (auto owner = region.get_owner()) {
// we need to hold a reference the process here (if there is one) as we may not own this region
dmesgln("Purged {} pages from region {} owned by {} at {} - {}",
@ -210,13 +188,19 @@ int AnonymousVMObject::purge_impl()
region.vaddr_from_page_index(range.base + range.count));
}
region.remap_vmobject_page_range(range.base, range.count);
}
});
}
});
return purged_page_count;
}
void AnonymousVMObject::set_was_purged(VolatilePageRange const& range)
{
VERIFY(m_lock.is_locked());
for (auto* purgeable_ranges : m_purgeable_ranges)
purgeable_ranges->set_was_purged(range);
}
void AnonymousVMObject::register_purgeable_page_ranges(PurgeablePageRanges& purgeable_page_ranges)
{
ScopedSpinLock lock(m_lock);

View file

@ -36,7 +36,6 @@ public:
void unregister_purgeable_page_ranges(PurgeablePageRanges&);
int purge();
int purge_with_interrupts_disabled(Badge<MemoryManager>);
bool is_any_volatile() const;
@ -122,7 +121,6 @@ private:
virtual StringView class_name() const override { return "AnonymousVMObject"sv; }
int purge_impl();
void update_volatile_cache();
void set_was_purged(VolatilePageRange const&);
size_t remove_lazy_commit_pages(VolatilePageRange const&);

View file

@ -4,7 +4,6 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Arch/x86/InterruptDisabler.h>
#include <Kernel/FileSystem/Inode.h>
#include <Kernel/VM/InodeVMObject.h>
@ -53,23 +52,20 @@ size_t InodeVMObject::amount_dirty() const
int InodeVMObject::release_all_clean_pages()
{
MutexLocker locker(m_paging_lock);
return release_all_clean_pages_impl();
}
ScopedSpinLock locker(m_lock);
int InodeVMObject::release_all_clean_pages_impl()
{
int count = 0;
InterruptDisabler disabler;
for (size_t i = 0; i < page_count(); ++i) {
if (!m_dirty_pages.get(i) && m_physical_pages[i]) {
m_physical_pages[i] = nullptr;
++count;
}
}
if (count) {
for_each_region([](auto& region) {
region.remap();
});
}
return count;
}

View file

@ -37,8 +37,6 @@ protected:
virtual bool is_inode() const final { return true; }
int release_all_clean_pages_impl();
NonnullRefPtr<Inode> m_inode;
Bitmap m_dirty_pages;
};

View file

@ -684,7 +684,6 @@ Region* MemoryManager::find_region_from_vaddr(VirtualAddress vaddr)
PageFaultResponse MemoryManager::handle_page_fault(PageFault const& fault)
{
VERIFY_INTERRUPTS_DISABLED();
ScopedSpinLock lock(s_mm_lock);
if (Processor::current().in_irq()) {
dbgln("CPU[{}] BUG! Page fault while handling IRQ! code={}, vaddr={}, irq level: {}",
Processor::id(), fault.code(), fault.vaddr(), Processor::current().in_irq());
@ -696,8 +695,7 @@ PageFaultResponse MemoryManager::handle_page_fault(PageFault const& fault)
if (!region) {
return PageFaultResponse::ShouldCrash;
}
return region->handle_fault(fault, lock);
return region->handle_fault(fault);
}
OwnPtr<Region> MemoryManager::allocate_contiguous_kernel_region(size_t size, StringView name, Region::Access access, Region::Cacheable cacheable)
@ -878,7 +876,7 @@ RefPtr<PhysicalPage> MemoryManager::allocate_user_physical_page(ShouldZeroFill s
for_each_vmobject([&](auto& vmobject) {
if (!vmobject.is_anonymous())
return IterationDecision::Continue;
int purged_page_count = static_cast<AnonymousVMObject&>(vmobject).purge_with_interrupts_disabled({});
int purged_page_count = static_cast<AnonymousVMObject&>(vmobject).purge();
if (purged_page_count) {
dbgln("MM: Purge saved the day! Purged {} pages from AnonymousVMObject", purged_page_count);
page = find_free_user_physical_page(false);

View file

@ -175,6 +175,7 @@ public:
template<IteratorFunction<VMObject&> Callback>
static void for_each_vmobject(Callback callback)
{
ScopedSpinLock locker(s_mm_lock);
for (auto& vmobject : MM.m_vmobjects) {
if (callback(vmobject) == IterationDecision::Break)
break;
@ -255,8 +256,8 @@ private:
PhysicalPageEntry* m_physical_page_entries { nullptr };
size_t m_physical_page_entries_count { 0 };
Region::List m_user_regions;
Region::List m_kernel_regions;
Region::ListInMemoryManager m_user_regions;
Region::ListInMemoryManager m_kernel_regions;
Vector<UsedMemoryRange> m_used_memory_ranges;
Vector<PhysicalMemoryRange> m_physical_memory_ranges;
Vector<ContiguousReservedMemoryRange> m_reserved_memory_ranges;
@ -264,22 +265,6 @@ private:
VMObject::List m_vmobjects;
};
template<typename Callback>
void VMObject::for_each_region(Callback callback)
{
ScopedSpinLock lock(s_mm_lock);
// FIXME: Figure out a better data structure so we don't have to walk every single region every time an inode changes.
// Perhaps VMObject could have a Vector<Region*> with all of his mappers?
for (auto& region : MM.m_user_regions) {
if (&region.vmobject() == this)
callback(region);
}
for (auto& region : MM.m_kernel_regions) {
if (&region.vmobject() == this)
callback(region);
}
}
inline bool is_user_address(VirtualAddress vaddr)
{
return vaddr.get() < USER_RANGE_CEILING;

View file

@ -33,14 +33,14 @@ Region::Region(Range const& range, NonnullRefPtr<VMObject> vmobject, size_t offs
VERIFY(m_range.size());
VERIFY((m_range.size() % PAGE_SIZE) == 0);
m_vmobject->ref_region();
m_vmobject->add_region(*this);
register_purgeable_page_ranges();
MM.register_region(*this);
}
Region::~Region()
{
m_vmobject->unref_region();
m_vmobject->remove_region(*this);
unregister_purgeable_page_ranges();
// Make sure we disable interrupts so we don't get interrupted between unmapping and unregistering.
@ -130,9 +130,9 @@ void Region::set_vmobject(NonnullRefPtr<VMObject>&& obj)
if (m_vmobject.ptr() == obj.ptr())
return;
unregister_purgeable_page_ranges();
m_vmobject->unref_region();
m_vmobject->remove_region(*this);
m_vmobject = move(obj);
m_vmobject->ref_region();
m_vmobject->add_region(*this);
register_purgeable_page_ranges();
}
@ -255,6 +255,9 @@ bool Region::map_individual_page_impl(size_t page_index)
PANIC("About to map mmap'ed page at a kernel address");
}
// NOTE: We have to take the MM lock for PTE's to stay valid while we use them.
ScopedSpinLock mm_locker(s_mm_lock);
auto* pte = MM.ensure_pte(*m_page_directory, page_vaddr);
if (!pte)
return false;
@ -279,7 +282,6 @@ bool Region::map_individual_page_impl(size_t page_index)
bool Region::do_remap_vmobject_page_range(size_t page_index, size_t page_count)
{
bool success = true;
VERIFY(s_mm_lock.own_lock());
if (!m_page_directory)
return success; // not an error, region may have not yet mapped it
if (!translate_vmobject_page_range(page_index, page_count))
@ -301,7 +303,6 @@ bool Region::do_remap_vmobject_page_range(size_t page_index, size_t page_count)
bool Region::remap_vmobject_page_range(size_t page_index, size_t page_count)
{
bool success = true;
ScopedSpinLock lock(s_mm_lock);
auto& vmobject = this->vmobject();
if (vmobject.is_shared_by_multiple_regions()) {
vmobject.for_each_region([&](auto& region) {
@ -317,7 +318,7 @@ bool Region::remap_vmobject_page_range(size_t page_index, size_t page_count)
bool Region::do_remap_vmobject_page(size_t page_index, bool with_flush)
{
ScopedSpinLock lock(s_mm_lock);
ScopedSpinLock lock(vmobject().m_lock);
if (!m_page_directory)
return true; // not an error, region may have not yet mapped it
if (!translate_vmobject_page(page_index))
@ -333,8 +334,8 @@ bool Region::do_remap_vmobject_page(size_t page_index, bool with_flush)
bool Region::remap_vmobject_page(size_t page_index, bool with_flush)
{
bool success = true;
ScopedSpinLock lock(s_mm_lock);
auto& vmobject = this->vmobject();
ScopedSpinLock lock(vmobject.m_lock);
if (vmobject.is_shared_by_multiple_regions()) {
vmobject.for_each_region([&](auto& region) {
if (!region.do_remap_vmobject_page(page_index, with_flush))
@ -406,7 +407,7 @@ void Region::remap()
map(*m_page_directory);
}
PageFaultResponse Region::handle_fault(PageFault const& fault, ScopedSpinLock<RecursiveSpinLock>& mm_lock)
PageFaultResponse Region::handle_fault(PageFault const& fault)
{
auto page_index_in_region = page_index_from_address(fault.vaddr());
if (fault.type() == PageFault::Type::PageNotPresent) {
@ -420,7 +421,7 @@ PageFaultResponse Region::handle_fault(PageFault const& fault, ScopedSpinLock<Re
}
if (vmobject().is_inode()) {
dbgln_if(PAGE_FAULT_DEBUG, "NP(inode) fault in Region({})[{}]", this, page_index_in_region);
return handle_inode_fault(page_index_in_region, mm_lock);
return handle_inode_fault(page_index_in_region);
}
auto& page_slot = physical_page_slot(page_index_in_region);
@ -440,7 +441,7 @@ PageFaultResponse Region::handle_fault(PageFault const& fault, ScopedSpinLock<Re
auto* phys_page = physical_page(page_index_in_region);
if (phys_page->is_shared_zero_page() || phys_page->is_lazy_committed_page()) {
dbgln_if(PAGE_FAULT_DEBUG, "NP(zero) fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
return handle_zero_fault(page_index_in_region, mm_lock);
return handle_zero_fault(page_index_in_region);
}
return handle_cow_fault(page_index_in_region);
}
@ -448,33 +449,16 @@ PageFaultResponse Region::handle_fault(PageFault const& fault, ScopedSpinLock<Re
return PageFaultResponse::ShouldCrash;
}
PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region, ScopedSpinLock<RecursiveSpinLock>& mm_lock)
PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region)
{
VERIFY_INTERRUPTS_DISABLED();
VERIFY(vmobject().is_anonymous());
bool can_lock = Thread::current() && !g_scheduler_lock.own_lock();
if (can_lock) {
// TODO: This seems rather weird. If we don't have a current thread
// then we're in the Kernel in early initialization still. So we
// can't actually wait on the paging lock. And if we currently
// own the scheduler lock and we trigger zero faults, we also
// can't really wait. But do we actually need to wait here?
mm_lock.unlock();
VERIFY(!s_mm_lock.own_lock());
vmobject().m_paging_lock.lock();
mm_lock.lock();
}
ScopeGuard guard([&]() {
if (can_lock)
vmobject().m_paging_lock.unlock();
});
auto& page_slot = physical_page_slot(page_index_in_region);
auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region);
ScopedSpinLock locker(vmobject().m_lock);
if (!page_slot.is_null() && !page_slot->is_shared_zero_page() && !page_slot->is_lazy_committed_page()) {
dbgln_if(PAGE_FAULT_DEBUG, "MM: zero_page() but page already present. Fine with me!");
if (!remap_vmobject_page(page_index_in_vmobject))
@ -523,33 +507,21 @@ PageFaultResponse Region::handle_cow_fault(size_t page_index_in_region)
return response;
}
PageFaultResponse Region::handle_inode_fault(size_t page_index_in_region, ScopedSpinLock<RecursiveSpinLock>& mm_lock)
PageFaultResponse Region::handle_inode_fault(size_t page_index_in_region)
{
VERIFY_INTERRUPTS_DISABLED();
VERIFY(vmobject().is_inode());
mm_lock.unlock();
VERIFY(!s_mm_lock.own_lock());
VERIFY(!g_scheduler_lock.own_lock());
MutexLocker locker(vmobject().m_paging_lock);
mm_lock.lock();
VERIFY_INTERRUPTS_DISABLED();
auto& inode_vmobject = static_cast<InodeVMObject&>(vmobject());
auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region);
auto& vmobject_physical_page_entry = inode_vmobject.physical_pages()[page_index_in_vmobject];
VERIFY(vmobject_physical_page_entry.is_null());
dbgln_if(PAGE_FAULT_DEBUG, "Inode fault in {} page index: {}", name(), page_index_in_region);
if (!vmobject_physical_page_entry.is_null()) {
dbgln_if(PAGE_FAULT_DEBUG, "MM: page_in_from_inode() but page already present. Fine with me!");
if (!remap_vmobject_page(page_index_in_vmobject))
return PageFaultResponse::OutOfMemory;
return PageFaultResponse::Continue;
}
auto current_thread = Thread::current();
if (current_thread)
current_thread->did_inode_fault();
@ -557,29 +529,33 @@ PageFaultResponse Region::handle_inode_fault(size_t page_index_in_region, Scoped
u8 page_buffer[PAGE_SIZE];
auto& inode = inode_vmobject.inode();
// Reading the page may block, so release the MM lock temporarily
mm_lock.unlock();
KResultOr<size_t> result(KSuccess);
{
ScopedLockRelease release_paging_lock(vmobject().m_paging_lock);
auto buffer = UserOrKernelBuffer::for_kernel_buffer(page_buffer);
result = inode.read_bytes(page_index_in_vmobject * PAGE_SIZE, PAGE_SIZE, buffer, nullptr);
}
mm_lock.lock();
auto result = inode.read_bytes(page_index_in_vmobject * PAGE_SIZE, PAGE_SIZE, buffer, nullptr);
if (result.is_error()) {
dmesgln("MM: handle_inode_fault had error ({}) while reading!", result.error());
dmesgln("handle_inode_fault: Error ({}) while reading from inode", result.error());
return PageFaultResponse::ShouldCrash;
}
auto nread = result.value();
if (nread < PAGE_SIZE) {
// If we read less than a page, zero out the rest to avoid leaking uninitialized data.
memset(page_buffer + nread, 0, PAGE_SIZE - nread);
}
ScopedSpinLock locker(inode_vmobject.m_lock);
if (!vmobject_physical_page_entry.is_null()) {
// Someone else faulted in this page while we were reading from the inode.
// No harm done (other than some duplicate work), remap the page here and return.
dbgln_if(PAGE_FAULT_DEBUG, "handle_inode_fault: Page faulted in by someone else, remapping.");
if (!remap_vmobject_page(page_index_in_vmobject))
return PageFaultResponse::OutOfMemory;
return PageFaultResponse::Continue;
}
vmobject_physical_page_entry = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::No);
if (vmobject_physical_page_entry.is_null()) {
dmesgln("MM: handle_inode_fault was unable to allocate a physical page");
return PageFaultResponse::OutOfMemory;

View file

@ -15,10 +15,10 @@
#include <Kernel/Heap/SlabAllocator.h>
#include <Kernel/KString.h>
#include <Kernel/Sections.h>
#include <Kernel/UnixTypes.h>
#include <Kernel/VM/PageFaultResponse.h>
#include <Kernel/VM/PurgeablePageRanges.h>
#include <Kernel/VM/RangeAllocator.h>
#include <Kernel/VM/VMObject.h>
namespace Kernel {
@ -88,7 +88,7 @@ public:
bool is_user() const { return !is_kernel(); }
bool is_kernel() const { return vaddr().get() < 0x00800000 || vaddr().get() >= kernel_base; }
PageFaultResponse handle_fault(PageFault const&, ScopedSpinLock<RecursiveSpinLock>&);
PageFaultResponse handle_fault(PageFault const&);
OwnPtr<Region> clone(Process&);
@ -165,17 +165,8 @@ public:
return size() / PAGE_SIZE;
}
PhysicalPage const* physical_page(size_t index) const
{
VERIFY(index < page_count());
return vmobject().physical_pages()[first_page_index() + index];
}
RefPtr<PhysicalPage>& physical_page_slot(size_t index)
{
VERIFY(index < page_count());
return vmobject().physical_pages()[first_page_index() + index];
}
PhysicalPage const* physical_page(size_t index) const;
RefPtr<PhysicalPage>& physical_page_slot(size_t index);
size_t offset_in_vmobject() const
{
@ -242,8 +233,8 @@ private:
bool remap_vmobject_page(size_t index, bool with_flush = true);
PageFaultResponse handle_cow_fault(size_t page_index);
PageFaultResponse handle_inode_fault(size_t page_index, ScopedSpinLock<RecursiveSpinLock>&);
PageFaultResponse handle_zero_fault(size_t page_index, ScopedSpinLock<RecursiveSpinLock>&);
PageFaultResponse handle_inode_fault(size_t page_index);
PageFaultResponse handle_zero_fault(size_t page_index);
bool map_individual_page_impl(size_t page_index);
@ -262,10 +253,12 @@ private:
bool m_mmap : 1 { false };
bool m_syscall_region : 1 { false };
WeakPtr<Process> m_owner;
IntrusiveListNode<Region> m_list_node;
IntrusiveListNode<Region> m_memory_manager_list_node;
IntrusiveListNode<Region> m_vmobject_list_node;
public:
using List = IntrusiveList<Region, RawPtr<Region>, &Region::m_list_node>;
using ListInMemoryManager = IntrusiveList<Region, RawPtr<Region>, &Region::m_memory_manager_list_node>;
using ListInVMObject = IntrusiveList<Region, RawPtr<Region>, &Region::m_vmobject_list_node>;
};
AK_ENUM_BITWISE_OPERATORS(Region::Access)

View file

@ -15,6 +15,7 @@
#include <AK/Weakable.h>
#include <Kernel/Forward.h>
#include <Kernel/Mutex.h>
#include <Kernel/VM/Region.h>
namespace Kernel {
@ -48,8 +49,20 @@ public:
virtual StringView class_name() const = 0;
ALWAYS_INLINE void ref_region() { m_regions_count++; }
ALWAYS_INLINE void unref_region() { m_regions_count--; }
ALWAYS_INLINE void add_region(Region& region)
{
ScopedSpinLock locker(m_lock);
m_regions_count++;
m_regions.append(region);
}
ALWAYS_INLINE void remove_region(Region& region)
{
ScopedSpinLock locker(m_lock);
m_regions_count--;
m_regions.remove(region);
}
ALWAYS_INLINE bool is_shared_by_multiple_regions() const { return m_regions_count > 1; }
void register_on_deleted_handler(VMObjectDeletedHandler& handler)
@ -70,9 +83,8 @@ protected:
IntrusiveListNode<VMObject> m_list_node;
FixedArray<RefPtr<PhysicalPage>> m_physical_pages;
Mutex m_paging_lock { "VMObject" };
mutable SpinLock<u8> m_lock;
mutable RecursiveSpinLock m_lock;
private:
VMObject& operator=(VMObject const&) = delete;
@ -83,8 +95,31 @@ private:
HashTable<VMObjectDeletedHandler*> m_on_deleted;
SpinLock<u8> m_on_deleted_lock;
Region::ListInVMObject m_regions;
public:
using List = IntrusiveList<VMObject, RawPtr<VMObject>, &VMObject::m_list_node>;
};
template<typename Callback>
inline void VMObject::for_each_region(Callback callback)
{
ScopedSpinLock lock(m_lock);
for (auto& region : m_regions) {
callback(region);
}
}
inline PhysicalPage const* Region::physical_page(size_t index) const
{
VERIFY(index < page_count());
return vmobject().physical_pages()[first_page_index() + index];
}
inline RefPtr<PhysicalPage>& Region::physical_page_slot(size_t index)
{
VERIFY(index < page_count());
return vmobject().physical_pages()[first_page_index() + index];
}
}