From dbb644f20cc12104a60474953c68e2bcff78b316 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Mon, 9 Dec 2019 19:12:38 +0100 Subject: [PATCH] Kernel: Start implementing purgeable memory support It's now possible to get purgeable memory by using mmap(MAP_PURGEABLE). Purgeable memory has a "volatile" flag that can be set using madvise(): - madvise(..., MADV_SET_VOLATILE) - madvise(..., MADV_SET_NONVOLATILE) When in the "volatile" state, the kernel may take away the underlying physical memory pages at any time, without notifying the owner. This gives you a guilt discount when caching very large things. :^) Setting a purgeable region to non-volatile will return whether or not the memory has been taken away by the kernel while being volatile. Basically, if madvise(..., MADV_SET_NONVOLATILE) returns 1, that means the memory was purged while volatile, and whatever was in that piece of memory needs to be reconstructed before use. --- Kernel/FileSystem/ProcFS.cpp | 8 ++++ Kernel/Makefile | 1 + Kernel/Process.cpp | 82 ++++++++++++++++++++++++++++++++- Kernel/Process.h | 4 ++ Kernel/Syscall.h | 4 +- Kernel/UnixTypes.h | 4 ++ Kernel/VM/AnonymousVMObject.h | 6 ++- Kernel/VM/PurgeableVMObject.cpp | 41 +++++++++++++++++ Kernel/VM/PurgeableVMObject.h | 32 +++++++++++++ Kernel/VM/Region.cpp | 7 +-- Kernel/VM/VMObject.h | 4 +- Libraries/LibC/mman.cpp | 7 +++ Libraries/LibC/mman.h | 5 ++ 13 files changed, 196 insertions(+), 9 deletions(-) create mode 100644 Kernel/VM/PurgeableVMObject.cpp create mode 100644 Kernel/VM/PurgeableVMObject.h diff --git a/Kernel/FileSystem/ProcFS.cpp b/Kernel/FileSystem/ProcFS.cpp index 5cd8875949..7de3e2f401 100644 --- a/Kernel/FileSystem/ProcFS.cpp +++ b/Kernel/FileSystem/ProcFS.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include enum ProcParentDirectory { @@ -262,6 +263,11 @@ Optional procfs$pid_vm(InodeIdentifier identifier) region_object.add("writable", region.is_writable()); region_object.add("stack", region.is_stack()); region_object.add("shared", region.is_shared()); + region_object.add("purgeable", region.vmobject().is_purgeable()); + if (region.vmobject().is_purgeable()) { + region_object.add("volatile", static_cast(region.vmobject()).is_volatile()); + } + region_object.add("purgeable", region.vmobject().is_purgeable()); region_object.add("address", region.vaddr().get()); region_object.add("size", (u32)region.size()); region_object.add("amount_resident", (u32)region.amount_resident()); @@ -716,6 +722,8 @@ Optional procfs$all(InodeIdentifier) process_object.add("amount_virtual", (u32)process.amount_virtual()); process_object.add("amount_resident", (u32)process.amount_resident()); process_object.add("amount_shared", (u32)process.amount_shared()); + process_object.add("amount_purgeable_volatile", (u32)process.amount_purgeable_volatile()); + process_object.add("amount_purgeable_nonvolatile", (u32)process.amount_purgeable_nonvolatile()); process_object.add("icon_id", process.icon_id()); auto thread_array = process_object.add_array("threads"); process.for_each_thread([&](const Thread& thread) { diff --git a/Kernel/Makefile b/Kernel/Makefile index bcc8b014a0..932bcfef42 100644 --- a/Kernel/Makefile +++ b/Kernel/Makefile @@ -93,6 +93,7 @@ CXX_OBJS = \ VM/PageDirectory.o \ VM/PhysicalPage.o \ VM/PhysicalRegion.o \ + VM/PurgeableVMObject.o \ VM/RangeAllocator.o \ VM/Region.o \ VM/VMObject.o \ diff --git a/Kernel/Process.cpp b/Kernel/Process.cpp index a8a4ff1660..ca8c6fbccb 100644 --- a/Kernel/Process.cpp +++ b/Kernel/Process.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -224,6 +225,18 @@ void* Process::sys$mmap(const Syscall::SC_mmap_params* params) return (void*)-EINVAL; // FIXME: The rest of this function seems like it could share more code.. + if (flags & MAP_PURGEABLE) { + auto vmobject = PurgeableVMObject::create_with_size(size); + auto* region = allocate_region_with_vmo(VirtualAddress((u32)addr), size, vmobject, 0, name ? name : "mmap (purgeable)", prot); + if (!region) + return (void*)-ENOMEM; + if (flags & MAP_SHARED) + region->set_shared(true); + + region->set_mmap(true); + return region->vaddr().as_ptr(); + } + if (flags & MAP_ANONYMOUS) { auto* region = allocate_region(VirtualAddress((u32)addr), size, name ? name : "mmap", prot, false); if (!region) @@ -312,6 +325,52 @@ int Process::sys$mprotect(void* addr, size_t size, int prot) return 0; } +int Process::sys$madvise(void* address, size_t size, int advice) +{ + auto* region = region_from_range({ VirtualAddress((u32)address), size }); + if (!region) + return -EINVAL; + if (!region->is_mmap()) + return -EPERM; + if ((advice & MADV_SET_VOLATILE) && (advice & MADV_SET_NONVOLATILE)) + return -EINVAL; + if (advice & MADV_SET_VOLATILE) { + if (!region->vmobject().is_purgeable()) + return -EPERM; + auto& vmobject = static_cast(region->vmobject()); + vmobject.set_volatile(true); + return 0; + } + if (advice & MADV_SET_NONVOLATILE) { + if (!region->vmobject().is_purgeable()) + return -EPERM; + auto& vmobject = static_cast(region->vmobject()); + vmobject.set_volatile(false); + bool was_purged = vmobject.was_purged(); + vmobject.set_was_purged(false); + return was_purged ? 1 : 0; + } + return -EINVAL; +} + +int Process::sys$purge() +{ + NonnullRefPtrVector vmobjects; + { + InterruptDisabler disabler; + MM.for_each_vmobject([&](auto& vmobject) { + if (vmobject.is_purgeable()) + vmobjects.append(static_cast(vmobject)); + return IterationDecision::Continue; + }); + } + int purged_page_count = 0; + for (auto& vmobject : vmobjects) { + purged_page_count += vmobject.purge(); + } + return purged_page_count; +} + int Process::sys$gethostname(char* buffer, ssize_t size) { if (size < 0) @@ -842,7 +901,7 @@ void Process::dump_regions() kprintf("Process %s(%u) regions:\n", name().characters(), pid()); kprintf("BEGIN END SIZE ACCESS NAME\n"); for (auto& region : m_regions) { - kprintf("%08x -- %08x %08x %c%c%c%c%c %s\n", + kprintf("%08x -- %08x %08x %c%c%c%c%c%c %s\n", region.vaddr().get(), region.vaddr().offset(region.size() - 1).get(), region.size(), @@ -851,6 +910,7 @@ void Process::dump_regions() region.is_executable() ? 'X' : ' ', region.is_shared() ? 'S' : ' ', region.is_stack() ? 'T' : ' ', + region.vmobject().is_purgeable() ? 'P' : ' ', region.name().characters()); } } @@ -2410,6 +2470,26 @@ size_t Process::amount_shared() const return amount; } +size_t Process::amount_purgeable_volatile() const +{ + size_t amount = 0; + for (auto& region : m_regions) { + if (region.vmobject().is_purgeable() && static_cast(region.vmobject()).is_volatile()) + amount += region.amount_resident(); + } + return amount; +} + +size_t Process::amount_purgeable_nonvolatile() const +{ + size_t amount = 0; + for (auto& region : m_regions) { + if (region.vmobject().is_purgeable() && !static_cast(region.vmobject()).is_volatile()) + amount += region.amount_resident(); + } + return amount; +} + int Process::sys$socket(int domain, int type, int protocol) { int fd = alloc_fd(); diff --git a/Kernel/Process.h b/Kernel/Process.h index ee1c935c28..e7e2a08782 100644 --- a/Kernel/Process.h +++ b/Kernel/Process.h @@ -139,6 +139,8 @@ public: int sys$munmap(void*, size_t size); int sys$set_mmap_name(void*, size_t, const char*); int sys$mprotect(void*, size_t, int prot); + int sys$madvise(void*, size_t, int advice); + int sys$purge(); int sys$select(const Syscall::SC_select_params*); int sys$poll(pollfd*, int nfds, int timeout); ssize_t sys$get_dir_entries(int fd, void*, ssize_t); @@ -266,6 +268,8 @@ public: size_t amount_virtual() const; size_t amount_resident() const; size_t amount_shared() const; + size_t amount_purgeable_volatile() const; + size_t amount_purgeable_nonvolatile() const; Process* fork(RegisterDump&); int exec(String path, Vector arguments, Vector environment); diff --git a/Kernel/Syscall.h b/Kernel/Syscall.h index aa70ab1699..a7db981c4c 100644 --- a/Kernel/Syscall.h +++ b/Kernel/Syscall.h @@ -144,7 +144,9 @@ typedef u32 socklen_t; __ENUMERATE_SYSCALL(module_unload) \ __ENUMERATE_SYSCALL(detach_thread) \ __ENUMERATE_SYSCALL(set_thread_name) \ - __ENUMERATE_SYSCALL(get_thread_name) + __ENUMERATE_SYSCALL(get_thread_name) \ + __ENUMERATE_SYSCALL(madvise) \ + __ENUMERATE_SYSCALL(purge) namespace Syscall { diff --git a/Kernel/UnixTypes.h b/Kernel/UnixTypes.h index d23b3c7fc5..28b984f3fa 100644 --- a/Kernel/UnixTypes.h +++ b/Kernel/UnixTypes.h @@ -27,12 +27,16 @@ #define MAP_ANONYMOUS 0x20 #define MAP_ANON MAP_ANONYMOUS #define MAP_STACK 0x40 +#define MAP_PURGEABLE 0x80 #define PROT_READ 0x1 #define PROT_WRITE 0x2 #define PROT_EXEC 0x4 #define PROT_NONE 0x0 +#define MADV_SET_VOLATILE 0x100 +#define MADV_SET_NONVOLATILE 0x200 + #define F_DUPFD 0 #define F_GETFD 1 #define F_SETFD 2 diff --git a/Kernel/VM/AnonymousVMObject.h b/Kernel/VM/AnonymousVMObject.h index ceec9de8d5..4a9182d83f 100644 --- a/Kernel/VM/AnonymousVMObject.h +++ b/Kernel/VM/AnonymousVMObject.h @@ -3,7 +3,7 @@ #include #include -class AnonymousVMObject final : public VMObject { +class AnonymousVMObject : public VMObject { public: virtual ~AnonymousVMObject() override; @@ -11,9 +11,11 @@ public: static NonnullRefPtr create_for_physical_range(PhysicalAddress, size_t); virtual NonnullRefPtr clone() override; -private: +protected: explicit AnonymousVMObject(size_t); explicit AnonymousVMObject(const AnonymousVMObject&); + +private: AnonymousVMObject(PhysicalAddress, size_t); AnonymousVMObject& operator=(const AnonymousVMObject&) = delete; diff --git a/Kernel/VM/PurgeableVMObject.cpp b/Kernel/VM/PurgeableVMObject.cpp new file mode 100644 index 0000000000..605d3a5be5 --- /dev/null +++ b/Kernel/VM/PurgeableVMObject.cpp @@ -0,0 +1,41 @@ +#include +#include + +NonnullRefPtr PurgeableVMObject::create_with_size(size_t size) +{ + return adopt(*new PurgeableVMObject(size)); +} + +PurgeableVMObject::PurgeableVMObject(size_t size) + : AnonymousVMObject(size) +{ +} + +PurgeableVMObject::PurgeableVMObject(const PurgeableVMObject& other) + : AnonymousVMObject(other) +{ +} + +PurgeableVMObject::~PurgeableVMObject() +{ +} + +NonnullRefPtr PurgeableVMObject::clone() +{ + return adopt(*new PurgeableVMObject(*this)); +} + +int PurgeableVMObject::purge() +{ + LOCKER(m_paging_lock); + if (!m_volatile) + return 0; + int purged_page_count = 0; + for (size_t i = 0; i < m_physical_pages.size(); ++i) { + if (m_physical_pages[i]) + ++purged_page_count; + m_physical_pages[i] = nullptr; + } + m_was_purged = true; + return purged_page_count; +} diff --git a/Kernel/VM/PurgeableVMObject.h b/Kernel/VM/PurgeableVMObject.h new file mode 100644 index 0000000000..d352ff7c97 --- /dev/null +++ b/Kernel/VM/PurgeableVMObject.h @@ -0,0 +1,32 @@ +#pragma once + +#include + +class PurgeableVMObject final : public AnonymousVMObject { +public: + virtual ~PurgeableVMObject() override; + + static NonnullRefPtr create_with_size(size_t); + virtual NonnullRefPtr clone() override; + + int purge(); + + bool was_purged() const { return m_was_purged; } + void set_was_purged(bool b) { m_was_purged = b; } + + bool is_volatile() const { return m_volatile; } + void set_volatile(bool b) { m_volatile = b; } + +private: + explicit PurgeableVMObject(size_t); + explicit PurgeableVMObject(const PurgeableVMObject&); + + PurgeableVMObject& operator=(const PurgeableVMObject&) = delete; + PurgeableVMObject& operator=(PurgeableVMObject&&) = delete; + PurgeableVMObject(PurgeableVMObject&&) = delete; + + virtual bool is_purgeable() const override { return true; } + + bool m_was_purged { false }; + bool m_volatile { false }; +}; diff --git a/Kernel/VM/Region.cpp b/Kernel/VM/Region.cpp index a9d5342430..3b2af1d7eb 100644 --- a/Kernel/VM/Region.cpp +++ b/Kernel/VM/Region.cpp @@ -299,10 +299,11 @@ PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region) ASSERT_INTERRUPTS_DISABLED(); ASSERT(vmobject().is_anonymous()); - auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region]; + sti(); + LOCKER(vmobject().m_paging_lock); + cli(); - // NOTE: We don't need to acquire the VMObject's lock. - // This function is already exclusive due to interrupts being blocked. + auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region]; if (!vmobject_physical_page_entry.is_null()) { #ifdef PAGE_FAULT_DEBUG diff --git a/Kernel/VM/VMObject.h b/Kernel/VM/VMObject.h index 3f224f0937..06a32a03f2 100644 --- a/Kernel/VM/VMObject.h +++ b/Kernel/VM/VMObject.h @@ -22,6 +22,7 @@ public: virtual NonnullRefPtr clone() = 0; virtual bool is_anonymous() const { return false; } + virtual bool is_purgeable() const { return false; } virtual bool is_inode() const { return false; } size_t page_count() const { return m_physical_pages.size(); } @@ -42,11 +43,10 @@ protected: void for_each_region(Callback); FixedArray> m_physical_pages; + Lock m_paging_lock { "VMObject" }; private: VMObject& operator=(const VMObject&) = delete; VMObject& operator=(VMObject&&) = delete; VMObject(VMObject&&) = delete; - - Lock m_paging_lock { "VMObject" }; }; diff --git a/Libraries/LibC/mman.cpp b/Libraries/LibC/mman.cpp index 65c4371ca7..8ee60929e9 100644 --- a/Libraries/LibC/mman.cpp +++ b/Libraries/LibC/mman.cpp @@ -56,4 +56,11 @@ int shm_unlink(const char* name) int rc = syscall(SC_shm_unlink, name); __RETURN_WITH_ERRNO(rc, rc, -1); } + +int madvise(void* address, size_t size, int advice) +{ + int rc = syscall(SC_madvise, address, size, advice); + __RETURN_WITH_ERRNO(rc, rc, -1); +} + } diff --git a/Libraries/LibC/mman.h b/Libraries/LibC/mman.h index 1720242915..b594166312 100644 --- a/Libraries/LibC/mman.h +++ b/Libraries/LibC/mman.h @@ -9,6 +9,7 @@ #define MAP_ANONYMOUS 0x20 #define MAP_ANON MAP_ANONYMOUS #define MAP_STACK 0x40 +#define MAP_PURGEABLE 0x80 #define PROT_READ 0x1 #define PROT_WRITE 0x2 @@ -17,6 +18,9 @@ #define MAP_FAILED ((void*)-1) +#define MADV_SET_VOLATILE 0x100 +#define MADV_SET_NONVOLATILE 0x200 + __BEGIN_DECLS void* mmap(void* addr, size_t, int prot, int flags, int fd, off_t); @@ -26,5 +30,6 @@ int mprotect(void*, size_t, int prot); int set_mmap_name(void*, size_t, const char*); int shm_open(const char* name, int flags, mode_t); int shm_unlink(const char* name); +int madvise(void*, size_t, int advice); __END_DECLS