1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 11:27:35 +00:00

Kernel: Rename Kernel/VM/ to Kernel/Memory/

This directory isn't just about virtual memory, it's about all kinds
of memory management.
This commit is contained in:
Andreas Kling 2021-08-06 10:45:34 +02:00
parent 4e8e1b7b3a
commit a1d7ebf85a
117 changed files with 207 additions and 204 deletions

View file

@ -0,0 +1,17 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
namespace Kernel {
enum class AllocationStrategy {
Reserve = 0,
AllocateNow,
None
};
}

View file

@ -0,0 +1,383 @@
/*
* Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Arch/x86/SmapDisabler.h>
#include <Kernel/Debug.h>
#include <Kernel/Memory/AnonymousVMObject.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/PhysicalPage.h>
#include <Kernel/Process.h>
namespace Kernel {
RefPtr<VMObject> AnonymousVMObject::try_clone()
{
// We need to acquire our lock so we copy a sane state
ScopedSpinLock lock(m_lock);
if (is_purgeable() && is_volatile()) {
// If this object is purgeable+volatile, create a new zero-filled purgeable+volatile
// object, effectively "pre-purging" it in the child process.
auto clone = try_create_purgeable_with_size(size(), AllocationStrategy::None);
if (!clone)
return {};
clone->m_volatile = true;
return clone;
}
// We're the parent. Since we're about to become COW we need to
// commit the number of pages that we need to potentially allocate
// so that the parent is still guaranteed to be able to have all
// non-volatile memory available.
size_t new_cow_pages_needed = page_count();
dbgln_if(COMMIT_DEBUG, "Cloning {:p}, need {} committed cow pages", this, new_cow_pages_needed);
auto committed_pages = MM.commit_user_physical_pages(new_cow_pages_needed);
if (!committed_pages.has_value())
return {};
// Create or replace the committed cow pages. When cloning a previously
// cloned vmobject, we want to essentially "fork", leaving us and the
// new clone with one set of shared committed cow pages, and the original
// one would keep the one it still has. This ensures that the original
// one and this one, as well as the clone have sufficient resources
// to cow all pages as needed
auto new_shared_committed_cow_pages = try_create<SharedCommittedCowPages>(committed_pages.release_value());
if (!new_shared_committed_cow_pages)
return {};
auto clone = adopt_ref_if_nonnull(new (nothrow) AnonymousVMObject(*this, *new_shared_committed_cow_pages));
if (!clone)
return {};
m_shared_committed_cow_pages = move(new_shared_committed_cow_pages);
// Both original and clone become COW. So create a COW map for ourselves
// or reset all pages to be copied again if we were previously cloned
ensure_or_reset_cow_map();
if (m_unused_committed_pages.has_value() && !m_unused_committed_pages->is_empty()) {
// The parent vmobject didn't use up all committed pages. When
// cloning (fork) we will overcommit. For this purpose we drop all
// lazy-commit references and replace them with shared zero pages.
for (size_t i = 0; i < page_count(); i++) {
auto& page = clone->m_physical_pages[i];
if (page && page->is_lazy_committed_page()) {
page = MM.shared_zero_page();
}
}
}
return clone;
}
RefPtr<AnonymousVMObject> AnonymousVMObject::try_create_with_size(size_t size, AllocationStrategy strategy)
{
Optional<CommittedPhysicalPageSet> committed_pages;
if (strategy == AllocationStrategy::Reserve || strategy == AllocationStrategy::AllocateNow) {
committed_pages = MM.commit_user_physical_pages(ceil_div(size, static_cast<size_t>(PAGE_SIZE)));
if (!committed_pages.has_value())
return {};
}
return adopt_ref_if_nonnull(new (nothrow) AnonymousVMObject(size, strategy, move(committed_pages)));
}
RefPtr<AnonymousVMObject> AnonymousVMObject::try_create_physically_contiguous_with_size(size_t size)
{
auto contiguous_physical_pages = MM.allocate_contiguous_supervisor_physical_pages(size);
if (contiguous_physical_pages.is_empty())
return {};
return adopt_ref_if_nonnull(new (nothrow) AnonymousVMObject(contiguous_physical_pages.span()));
}
RefPtr<AnonymousVMObject> AnonymousVMObject::try_create_purgeable_with_size(size_t size, AllocationStrategy strategy)
{
Optional<CommittedPhysicalPageSet> committed_pages;
if (strategy == AllocationStrategy::Reserve || strategy == AllocationStrategy::AllocateNow) {
committed_pages = MM.commit_user_physical_pages(ceil_div(size, static_cast<size_t>(PAGE_SIZE)));
if (!committed_pages.has_value())
return {};
}
auto vmobject = adopt_ref_if_nonnull(new (nothrow) AnonymousVMObject(size, strategy, move(committed_pages)));
if (!vmobject)
return {};
vmobject->m_purgeable = true;
return vmobject;
}
RefPtr<AnonymousVMObject> AnonymousVMObject::try_create_with_physical_pages(Span<NonnullRefPtr<PhysicalPage>> physical_pages)
{
return adopt_ref_if_nonnull(new (nothrow) AnonymousVMObject(physical_pages));
}
RefPtr<AnonymousVMObject> AnonymousVMObject::try_create_for_physical_range(PhysicalAddress paddr, size_t size)
{
if (paddr.offset(size) < paddr) {
dbgln("Shenanigans! try_create_for_physical_range({}, {}) would wrap around", paddr, size);
return nullptr;
}
return adopt_ref_if_nonnull(new (nothrow) AnonymousVMObject(paddr, size));
}
AnonymousVMObject::AnonymousVMObject(size_t size, AllocationStrategy strategy, Optional<CommittedPhysicalPageSet> committed_pages)
: VMObject(size)
, m_unused_committed_pages(move(committed_pages))
{
if (strategy == AllocationStrategy::AllocateNow) {
// Allocate all pages right now. We know we can get all because we committed the amount needed
for (size_t i = 0; i < page_count(); ++i)
physical_pages()[i] = m_unused_committed_pages->take_one();
} else {
auto& initial_page = (strategy == AllocationStrategy::Reserve) ? MM.lazy_committed_page() : MM.shared_zero_page();
for (size_t i = 0; i < page_count(); ++i)
physical_pages()[i] = initial_page;
}
}
AnonymousVMObject::AnonymousVMObject(PhysicalAddress paddr, size_t size)
: VMObject(size)
{
VERIFY(paddr.page_base() == paddr);
for (size_t i = 0; i < page_count(); ++i)
physical_pages()[i] = PhysicalPage::create(paddr.offset(i * PAGE_SIZE), MayReturnToFreeList::No);
}
AnonymousVMObject::AnonymousVMObject(Span<NonnullRefPtr<PhysicalPage>> physical_pages)
: VMObject(physical_pages.size() * PAGE_SIZE)
{
for (size_t i = 0; i < physical_pages.size(); ++i) {
m_physical_pages[i] = physical_pages[i];
}
}
AnonymousVMObject::AnonymousVMObject(AnonymousVMObject const& other, NonnullRefPtr<SharedCommittedCowPages> shared_committed_cow_pages)
: VMObject(other)
, m_shared_committed_cow_pages(move(shared_committed_cow_pages))
, m_purgeable(other.m_purgeable)
{
ensure_cow_map();
}
AnonymousVMObject::~AnonymousVMObject()
{
}
size_t AnonymousVMObject::purge()
{
ScopedSpinLock lock(m_lock);
if (!is_purgeable() || !is_volatile())
return 0;
size_t total_pages_purged = 0;
for (auto& page : m_physical_pages) {
VERIFY(page);
if (page->is_shared_zero_page())
continue;
page = MM.shared_zero_page();
++total_pages_purged;
}
m_was_purged = true;
for_each_region([](Region& region) {
region.remap();
});
return total_pages_purged;
}
KResult AnonymousVMObject::set_volatile(bool is_volatile, bool& was_purged)
{
VERIFY(is_purgeable());
ScopedSpinLock locker(m_lock);
was_purged = m_was_purged;
if (m_volatile == is_volatile)
return KSuccess;
if (is_volatile) {
// When a VMObject is made volatile, it gives up all of its committed memory.
// Any physical pages already allocated remain in the VMObject for now, but the kernel is free to take them at any moment.
for (auto& page : m_physical_pages) {
if (page && page->is_lazy_committed_page())
page = MM.shared_zero_page();
}
m_unused_committed_pages = {};
m_shared_committed_cow_pages = nullptr;
if (!m_cow_map.is_null())
m_cow_map = {};
m_volatile = true;
m_was_purged = false;
for_each_region([&](auto& region) { region.remap(); });
return KSuccess;
}
// When a VMObject is made non-volatile, we try to commit however many pages are not currently available.
// If that fails, we return false to indicate that memory allocation failed.
size_t committed_pages_needed = 0;
for (auto& page : m_physical_pages) {
VERIFY(page);
if (page->is_shared_zero_page())
++committed_pages_needed;
}
if (!committed_pages_needed) {
m_volatile = false;
return KSuccess;
}
m_unused_committed_pages = MM.commit_user_physical_pages(committed_pages_needed);
if (!m_unused_committed_pages.has_value())
return ENOMEM;
for (auto& page : m_physical_pages) {
if (page->is_shared_zero_page())
page = MM.lazy_committed_page();
}
m_volatile = false;
m_was_purged = false;
for_each_region([&](auto& region) { region.remap(); });
return KSuccess;
}
NonnullRefPtr<PhysicalPage> AnonymousVMObject::allocate_committed_page(Badge<Region>)
{
return m_unused_committed_pages->take_one();
}
Bitmap& AnonymousVMObject::ensure_cow_map()
{
if (m_cow_map.is_null())
m_cow_map = Bitmap { page_count(), true };
return m_cow_map;
}
void AnonymousVMObject::ensure_or_reset_cow_map()
{
if (m_cow_map.is_null())
ensure_cow_map();
else
m_cow_map.fill(true);
}
bool AnonymousVMObject::should_cow(size_t page_index, bool is_shared) const
{
auto& page = physical_pages()[page_index];
if (page && (page->is_shared_zero_page() || page->is_lazy_committed_page()))
return true;
if (is_shared)
return false;
return !m_cow_map.is_null() && m_cow_map.get(page_index);
}
void AnonymousVMObject::set_should_cow(size_t page_index, bool cow)
{
ensure_cow_map().set(page_index, cow);
}
size_t AnonymousVMObject::cow_pages() const
{
if (m_cow_map.is_null())
return 0;
return m_cow_map.count_slow(true);
}
PageFaultResponse AnonymousVMObject::handle_cow_fault(size_t page_index, VirtualAddress vaddr)
{
VERIFY_INTERRUPTS_DISABLED();
ScopedSpinLock lock(m_lock);
if (is_volatile()) {
// A COW fault in a volatile region? Userspace is writing to volatile memory, this is a bug. Crash.
dbgln("COW fault in volatile region, will crash.");
return PageFaultResponse::ShouldCrash;
}
auto& page_slot = physical_pages()[page_index];
// If we were sharing committed COW pages with another process, and the other process
// has exhausted the supply, we can stop counting the shared pages.
if (m_shared_committed_cow_pages && m_shared_committed_cow_pages->is_empty())
m_shared_committed_cow_pages = nullptr;
if (page_slot->ref_count() == 1) {
dbgln_if(PAGE_FAULT_DEBUG, " >> It's a COW page but nobody is sharing it anymore. Remap r/w");
set_should_cow(page_index, false);
if (m_shared_committed_cow_pages) {
m_shared_committed_cow_pages->uncommit_one();
if (m_shared_committed_cow_pages->is_empty())
m_shared_committed_cow_pages = nullptr;
}
return PageFaultResponse::Continue;
}
RefPtr<PhysicalPage> page;
if (m_shared_committed_cow_pages) {
dbgln_if(PAGE_FAULT_DEBUG, " >> It's a committed COW page and it's time to COW!");
page = m_shared_committed_cow_pages->take_one();
} else {
dbgln_if(PAGE_FAULT_DEBUG, " >> It's a COW page and it's time to COW!");
page = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::No);
if (page.is_null()) {
dmesgln("MM: handle_cow_fault was unable to allocate a physical page");
return PageFaultResponse::OutOfMemory;
}
}
u8* dest_ptr = MM.quickmap_page(*page);
dbgln_if(PAGE_FAULT_DEBUG, " >> COW {} <- {}", page->paddr(), page_slot->paddr());
{
SmapDisabler disabler;
void* fault_at;
if (!safe_memcpy(dest_ptr, vaddr.as_ptr(), PAGE_SIZE, fault_at)) {
if ((u8*)fault_at >= dest_ptr && (u8*)fault_at <= dest_ptr + PAGE_SIZE)
dbgln(" >> COW: error copying page {}/{} to {}/{}: failed to write to page at {}",
page_slot->paddr(), vaddr, page->paddr(), VirtualAddress(dest_ptr), VirtualAddress(fault_at));
else if ((u8*)fault_at >= vaddr.as_ptr() && (u8*)fault_at <= vaddr.as_ptr() + PAGE_SIZE)
dbgln(" >> COW: error copying page {}/{} to {}/{}: failed to read from page at {}",
page_slot->paddr(), vaddr, page->paddr(), VirtualAddress(dest_ptr), VirtualAddress(fault_at));
else
VERIFY_NOT_REACHED();
}
}
page_slot = move(page);
MM.unquickmap_page();
set_should_cow(page_index, false);
return PageFaultResponse::Continue;
}
AnonymousVMObject::SharedCommittedCowPages::SharedCommittedCowPages(CommittedPhysicalPageSet&& committed_pages)
: m_committed_pages(move(committed_pages))
{
}
AnonymousVMObject::SharedCommittedCowPages::~SharedCommittedCowPages()
{
}
NonnullRefPtr<PhysicalPage> AnonymousVMObject::SharedCommittedCowPages::take_one()
{
ScopedSpinLock locker(m_lock);
return m_committed_pages.take_one();
}
void AnonymousVMObject::SharedCommittedCowPages::uncommit_one()
{
ScopedSpinLock locker(m_lock);
m_committed_pages.uncommit_one();
}
}

View file

@ -0,0 +1,90 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <Kernel/Memory/AllocationStrategy.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/PageFaultResponse.h>
#include <Kernel/Memory/VMObject.h>
#include <Kernel/PhysicalAddress.h>
namespace Kernel {
class AnonymousVMObject final : public VMObject {
public:
virtual ~AnonymousVMObject() override;
static RefPtr<AnonymousVMObject> try_create_with_size(size_t, AllocationStrategy);
static RefPtr<AnonymousVMObject> try_create_for_physical_range(PhysicalAddress paddr, size_t size);
static RefPtr<AnonymousVMObject> try_create_with_physical_pages(Span<NonnullRefPtr<PhysicalPage>>);
static RefPtr<AnonymousVMObject> try_create_purgeable_with_size(size_t, AllocationStrategy);
static RefPtr<AnonymousVMObject> try_create_physically_contiguous_with_size(size_t);
virtual RefPtr<VMObject> try_clone() override;
[[nodiscard]] NonnullRefPtr<PhysicalPage> allocate_committed_page(Badge<Region>);
PageFaultResponse handle_cow_fault(size_t, VirtualAddress);
size_t cow_pages() const;
bool should_cow(size_t page_index, bool) const;
void set_should_cow(size_t page_index, bool);
bool is_purgeable() const { return m_purgeable; }
bool is_volatile() const { return m_volatile; }
KResult set_volatile(bool is_volatile, bool& was_purged);
size_t purge();
private:
class SharedCommittedCowPages;
explicit AnonymousVMObject(size_t, AllocationStrategy, Optional<CommittedPhysicalPageSet>);
explicit AnonymousVMObject(PhysicalAddress, size_t);
explicit AnonymousVMObject(Span<NonnullRefPtr<PhysicalPage>>);
explicit AnonymousVMObject(AnonymousVMObject const&, NonnullRefPtr<SharedCommittedCowPages>);
virtual StringView class_name() const override { return "AnonymousVMObject"sv; }
AnonymousVMObject& operator=(AnonymousVMObject const&) = delete;
AnonymousVMObject& operator=(AnonymousVMObject&&) = delete;
AnonymousVMObject(AnonymousVMObject&&) = delete;
virtual bool is_anonymous() const override { return true; }
Bitmap& ensure_cow_map();
void ensure_or_reset_cow_map();
Optional<CommittedPhysicalPageSet> m_unused_committed_pages;
Bitmap m_cow_map;
// AnonymousVMObject shares committed COW pages with cloned children (happens on fork)
class SharedCommittedCowPages : public RefCounted<SharedCommittedCowPages> {
AK_MAKE_NONCOPYABLE(SharedCommittedCowPages);
public:
SharedCommittedCowPages() = delete;
explicit SharedCommittedCowPages(CommittedPhysicalPageSet&&);
~SharedCommittedCowPages();
[[nodiscard]] bool is_empty() const { return m_committed_pages.is_empty(); }
[[nodiscard]] NonnullRefPtr<PhysicalPage> take_one();
void uncommit_one();
public:
SpinLock<u8> m_lock;
CommittedPhysicalPageSet m_committed_pages;
};
RefPtr<SharedCommittedCowPages> m_shared_committed_cow_pages;
bool m_purgeable { false };
bool m_volatile { false };
bool m_was_purged { false };
};
}

View file

@ -0,0 +1,92 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/FileSystem/Inode.h>
#include <Kernel/Memory/InodeVMObject.h>
namespace Kernel {
InodeVMObject::InodeVMObject(Inode& inode, size_t size)
: VMObject(size)
, m_inode(inode)
, m_dirty_pages(page_count(), false)
{
}
InodeVMObject::InodeVMObject(InodeVMObject const& other)
: VMObject(other)
, m_inode(other.m_inode)
, m_dirty_pages(page_count(), false)
{
for (size_t i = 0; i < page_count(); ++i)
m_dirty_pages.set(i, other.m_dirty_pages.get(i));
}
InodeVMObject::~InodeVMObject()
{
}
size_t InodeVMObject::amount_clean() const
{
size_t count = 0;
VERIFY(page_count() == m_dirty_pages.size());
for (size_t i = 0; i < page_count(); ++i) {
if (!m_dirty_pages.get(i) && m_physical_pages[i])
++count;
}
return count * PAGE_SIZE;
}
size_t InodeVMObject::amount_dirty() const
{
size_t count = 0;
for (size_t i = 0; i < m_dirty_pages.size(); ++i) {
if (m_dirty_pages.get(i))
++count;
}
return count * PAGE_SIZE;
}
int InodeVMObject::release_all_clean_pages()
{
ScopedSpinLock locker(m_lock);
int count = 0;
for (size_t i = 0; i < page_count(); ++i) {
if (!m_dirty_pages.get(i) && m_physical_pages[i]) {
m_physical_pages[i] = nullptr;
++count;
}
}
if (count) {
for_each_region([](auto& region) {
region.remap();
});
}
return count;
}
u32 InodeVMObject::writable_mappings() const
{
u32 count = 0;
const_cast<InodeVMObject&>(*this).for_each_region([&](auto& region) {
if (region.is_writable())
++count;
});
return count;
}
u32 InodeVMObject::executable_mappings() const
{
u32 count = 0;
const_cast<InodeVMObject&>(*this).for_each_region([&](auto& region) {
if (region.is_executable())
++count;
});
return count;
}
}

View file

@ -0,0 +1,44 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Bitmap.h>
#include <Kernel/Memory/VMObject.h>
#include <Kernel/UnixTypes.h>
namespace Kernel {
class InodeVMObject : public VMObject {
public:
virtual ~InodeVMObject() override;
Inode& inode() { return *m_inode; }
Inode const& inode() const { return *m_inode; }
size_t amount_dirty() const;
size_t amount_clean() const;
int release_all_clean_pages();
u32 writable_mappings() const;
u32 executable_mappings() const;
protected:
explicit InodeVMObject(Inode&, size_t);
explicit InodeVMObject(InodeVMObject const&);
InodeVMObject& operator=(InodeVMObject const&) = delete;
InodeVMObject& operator=(InodeVMObject&&) = delete;
InodeVMObject(InodeVMObject&&) = delete;
virtual bool is_inode() const final { return true; }
NonnullRefPtr<Inode> m_inode;
Bitmap m_dirty_pages;
};
}

36
Kernel/Memory/MappedROM.h Normal file
View file

@ -0,0 +1,36 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/OwnPtr.h>
#include <Kernel/Memory/Region.h>
#include <Kernel/PhysicalAddress.h>
namespace Kernel {
class MappedROM {
public:
const u8* base() const { return region->vaddr().offset(offset).as_ptr(); }
const u8* end() const { return base() + size; }
OwnPtr<Region> region;
size_t size { 0 };
size_t offset { 0 };
PhysicalAddress paddr;
Optional<PhysicalAddress> find_chunk_starting_with(StringView prefix, size_t chunk_size) const
{
for (auto* candidate = base(); candidate < end(); candidate += chunk_size) {
if (!__builtin_memcmp(prefix.characters_without_null_termination(), candidate, prefix.length()))
return paddr_of(candidate);
}
return {};
}
PhysicalAddress paddr_of(const u8* ptr) const { return paddr.offset(ptr - this->base()); }
};
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,325 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Concepts.h>
#include <AK/HashTable.h>
#include <AK/NonnullOwnPtrVector.h>
#include <AK/NonnullRefPtrVector.h>
#include <AK/String.h>
#include <Kernel/Arch/x86/PageFault.h>
#include <Kernel/Arch/x86/TrapFrame.h>
#include <Kernel/Forward.h>
#include <Kernel/Memory/AllocationStrategy.h>
#include <Kernel/Memory/PhysicalPage.h>
#include <Kernel/Memory/PhysicalRegion.h>
#include <Kernel/Memory/Region.h>
#include <Kernel/Memory/VMObject.h>
#include <Kernel/SpinLock.h>
namespace Kernel {
constexpr bool page_round_up_would_wrap(FlatPtr x)
{
return x > (explode_byte(0xFF) & ~0xFFF);
}
constexpr FlatPtr page_round_up(FlatPtr x)
{
FlatPtr rounded = (((FlatPtr)(x)) + PAGE_SIZE - 1) & (~(PAGE_SIZE - 1));
// Rounding up >0xfffff000 wraps back to 0. That's never what we want.
VERIFY(x == 0 || rounded != 0);
return rounded;
}
constexpr FlatPtr page_round_down(FlatPtr x)
{
return ((FlatPtr)(x)) & ~(PAGE_SIZE - 1);
}
inline FlatPtr virtual_to_low_physical(FlatPtr virtual_)
{
return virtual_ - physical_to_virtual_offset;
}
enum class UsedMemoryRangeType {
LowMemory = 0,
Prekernel,
Kernel,
BootModule,
PhysicalPages,
};
static constexpr StringView UserMemoryRangeTypeNames[] {
"Low memory",
"Prekernel",
"Kernel",
"Boot module",
"Physical Pages"
};
struct UsedMemoryRange {
UsedMemoryRangeType type {};
PhysicalAddress start;
PhysicalAddress end;
};
struct ContiguousReservedMemoryRange {
PhysicalAddress start;
PhysicalSize length {};
};
enum class PhysicalMemoryRangeType {
Usable = 0,
Reserved,
ACPI_Reclaimable,
ACPI_NVS,
BadMemory,
Unknown,
};
struct PhysicalMemoryRange {
PhysicalMemoryRangeType type { PhysicalMemoryRangeType::Unknown };
PhysicalAddress start;
PhysicalSize length {};
};
#define MM Kernel::MemoryManager::the()
struct MemoryManagerData {
static ProcessorSpecificDataID processor_specific_data_id() { return ProcessorSpecificDataID::MemoryManager; }
SpinLock<u8> m_quickmap_in_use;
u32 m_quickmap_prev_flags;
PhysicalAddress m_last_quickmap_pd;
PhysicalAddress m_last_quickmap_pt;
};
extern RecursiveSpinLock s_mm_lock;
// This class represents a set of committed physical pages.
// When you ask MemoryManager to commit pages for you, you get one of these in return.
// You can allocate pages from it via `take_one()`
// It will uncommit any (unallocated) remaining pages when destroyed.
class CommittedPhysicalPageSet {
AK_MAKE_NONCOPYABLE(CommittedPhysicalPageSet);
public:
CommittedPhysicalPageSet(Badge<MemoryManager>, size_t page_count)
: m_page_count(page_count)
{
}
CommittedPhysicalPageSet(CommittedPhysicalPageSet&& other)
: m_page_count(exchange(other.m_page_count, 0))
{
}
~CommittedPhysicalPageSet();
bool is_empty() const { return m_page_count == 0; }
size_t page_count() const { return m_page_count; }
[[nodiscard]] NonnullRefPtr<PhysicalPage> take_one();
void uncommit_one();
void operator=(CommittedPhysicalPageSet&&) = delete;
private:
size_t m_page_count { 0 };
};
class MemoryManager {
AK_MAKE_ETERNAL
friend class PageDirectory;
friend class AnonymousVMObject;
friend class Region;
friend class VMObject;
public:
static MemoryManager& the();
static bool is_initialized();
static void initialize(u32 cpu);
static inline MemoryManagerData& get_data()
{
return ProcessorSpecific<MemoryManagerData>::get();
}
PageFaultResponse handle_page_fault(PageFault const&);
void set_page_writable_direct(VirtualAddress, bool);
void protect_readonly_after_init_memory();
void unmap_text_after_init();
void unmap_ksyms_after_init();
static void enter_process_paging_scope(Process&);
static void enter_space(Space&);
bool validate_user_stack_no_lock(Space&, VirtualAddress) const;
bool validate_user_stack(Space&, VirtualAddress) const;
enum class ShouldZeroFill {
No,
Yes
};
Optional<CommittedPhysicalPageSet> commit_user_physical_pages(size_t page_count);
void uncommit_user_physical_pages(Badge<CommittedPhysicalPageSet>, size_t page_count);
NonnullRefPtr<PhysicalPage> allocate_committed_user_physical_page(Badge<CommittedPhysicalPageSet>, ShouldZeroFill = ShouldZeroFill::Yes);
RefPtr<PhysicalPage> allocate_user_physical_page(ShouldZeroFill = ShouldZeroFill::Yes, bool* did_purge = nullptr);
RefPtr<PhysicalPage> allocate_supervisor_physical_page();
NonnullRefPtrVector<PhysicalPage> allocate_contiguous_supervisor_physical_pages(size_t size);
void deallocate_physical_page(PhysicalAddress);
OwnPtr<Region> allocate_contiguous_kernel_region(size_t, StringView name, Region::Access access, Region::Cacheable = Region::Cacheable::Yes);
OwnPtr<Region> allocate_kernel_region(size_t, StringView name, Region::Access access, AllocationStrategy strategy = AllocationStrategy::Reserve, Region::Cacheable = Region::Cacheable::Yes);
OwnPtr<Region> allocate_kernel_region(PhysicalAddress, size_t, StringView name, Region::Access access, Region::Cacheable = Region::Cacheable::Yes);
OwnPtr<Region> allocate_kernel_region_identity(PhysicalAddress, size_t, StringView name, Region::Access access, Region::Cacheable = Region::Cacheable::Yes);
OwnPtr<Region> allocate_kernel_region_with_vmobject(VMObject&, size_t, StringView name, Region::Access access, Region::Cacheable = Region::Cacheable::Yes);
OwnPtr<Region> allocate_kernel_region_with_vmobject(Range const&, VMObject&, StringView name, Region::Access access, Region::Cacheable = Region::Cacheable::Yes);
struct SystemMemoryInfo {
PhysicalSize user_physical_pages { 0 };
PhysicalSize user_physical_pages_used { 0 };
PhysicalSize user_physical_pages_committed { 0 };
PhysicalSize user_physical_pages_uncommitted { 0 };
PhysicalSize super_physical_pages { 0 };
PhysicalSize super_physical_pages_used { 0 };
};
SystemMemoryInfo get_system_memory_info()
{
ScopedSpinLock lock(s_mm_lock);
return m_system_memory_info;
}
template<IteratorFunction<VMObject&> Callback>
static void for_each_vmobject(Callback callback)
{
ScopedSpinLock locker(s_mm_lock);
for (auto& vmobject : MM.m_vmobjects) {
if (callback(vmobject) == IterationDecision::Break)
break;
}
}
template<VoidFunction<VMObject&> Callback>
static void for_each_vmobject(Callback callback)
{
for (auto& vmobject : MM.m_vmobjects)
callback(vmobject);
}
static Region* find_user_region_from_vaddr(Space&, VirtualAddress);
static Region* find_user_region_from_vaddr_no_lock(Space&, VirtualAddress);
static void validate_syscall_preconditions(Space&, RegisterState const&);
void dump_kernel_regions();
PhysicalPage& shared_zero_page() { return *m_shared_zero_page; }
PhysicalPage& lazy_committed_page() { return *m_lazy_committed_page; }
PageDirectory& kernel_page_directory() { return *m_kernel_page_directory; }
Vector<UsedMemoryRange> const& used_memory_ranges() { return m_used_memory_ranges; }
bool is_allowed_to_mmap_to_userspace(PhysicalAddress, Range const&) const;
PhysicalPageEntry& get_physical_page_entry(PhysicalAddress);
PhysicalAddress get_physical_address(PhysicalPage const&);
private:
MemoryManager();
~MemoryManager();
void initialize_physical_pages();
void register_reserved_ranges();
void register_vmobject(VMObject&);
void unregister_vmobject(VMObject&);
void register_region(Region&);
void unregister_region(Region&);
void protect_kernel_image();
void parse_memory_map();
static void flush_tlb_local(VirtualAddress, size_t page_count = 1);
static void flush_tlb(PageDirectory const*, VirtualAddress, size_t page_count = 1);
static Region* kernel_region_from_vaddr(VirtualAddress);
static Region* find_region_from_vaddr(VirtualAddress);
RefPtr<PhysicalPage> find_free_user_physical_page(bool);
ALWAYS_INLINE u8* quickmap_page(PhysicalPage& page)
{
return quickmap_page(page.paddr());
}
u8* quickmap_page(PhysicalAddress const&);
void unquickmap_page();
PageDirectoryEntry* quickmap_pd(PageDirectory&, size_t pdpt_index);
PageTableEntry* quickmap_pt(PhysicalAddress);
PageTableEntry* pte(PageDirectory&, VirtualAddress);
PageTableEntry* ensure_pte(PageDirectory&, VirtualAddress);
void release_pte(PageDirectory&, VirtualAddress, bool);
RefPtr<PageDirectory> m_kernel_page_directory;
RefPtr<PhysicalPage> m_shared_zero_page;
RefPtr<PhysicalPage> m_lazy_committed_page;
SystemMemoryInfo m_system_memory_info;
NonnullOwnPtrVector<PhysicalRegion> m_user_physical_regions;
OwnPtr<PhysicalRegion> m_super_physical_region;
OwnPtr<PhysicalRegion> m_physical_pages_region;
PhysicalPageEntry* m_physical_page_entries { nullptr };
size_t m_physical_page_entries_count { 0 };
Region::ListInMemoryManager m_user_regions;
Region::ListInMemoryManager m_kernel_regions;
Vector<UsedMemoryRange> m_used_memory_ranges;
Vector<PhysicalMemoryRange> m_physical_memory_ranges;
Vector<ContiguousReservedMemoryRange> m_reserved_memory_ranges;
VMObject::List m_vmobjects;
};
inline bool is_user_address(VirtualAddress vaddr)
{
return vaddr.get() < USER_RANGE_CEILING;
}
inline bool is_user_range(VirtualAddress vaddr, size_t size)
{
if (vaddr.offset(size) < vaddr)
return false;
return is_user_address(vaddr) && is_user_address(vaddr.offset(size));
}
inline bool is_user_range(Range const& range)
{
return is_user_range(range.base(), range.size());
}
inline bool PhysicalPage::is_shared_zero_page() const
{
return this == &MM.shared_zero_page();
}
inline bool PhysicalPage::is_lazy_committed_page() const
{
return this == &MM.lazy_committed_page();
}
}

View file

@ -0,0 +1,168 @@
/*
* Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Memory.h>
#include <AK/Singleton.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/PageDirectory.h>
#include <Kernel/Prekernel/Prekernel.h>
#include <Kernel/Process.h>
#include <Kernel/Random.h>
#include <Kernel/Sections.h>
extern u8 end_of_kernel_image[];
namespace Kernel {
static AK::Singleton<HashMap<FlatPtr, PageDirectory*>> s_cr3_map;
static HashMap<FlatPtr, PageDirectory*>& cr3_map()
{
VERIFY_INTERRUPTS_DISABLED();
return *s_cr3_map;
}
RefPtr<PageDirectory> PageDirectory::find_by_cr3(FlatPtr cr3)
{
ScopedSpinLock lock(s_mm_lock);
return cr3_map().get(cr3).value_or({});
}
UNMAP_AFTER_INIT NonnullRefPtr<PageDirectory> PageDirectory::must_create_kernel_page_directory()
{
auto directory = adopt_ref_if_nonnull(new (nothrow) PageDirectory).release_nonnull();
// make sure this starts in a new page directory to make MemoryManager::initialize_physical_pages() happy
FlatPtr start_of_range = ((FlatPtr)end_of_kernel_image & ~(FlatPtr)0x1fffff) + 0x200000;
directory->m_range_allocator.initialize_with_range(VirtualAddress(start_of_range), KERNEL_PD_END - start_of_range);
directory->m_identity_range_allocator.initialize_with_range(VirtualAddress(FlatPtr(0x00000000)), 0x00200000);
return directory;
}
RefPtr<PageDirectory> PageDirectory::try_create_for_userspace(RangeAllocator const* parent_range_allocator)
{
constexpr FlatPtr userspace_range_base = 0x00800000;
FlatPtr const userspace_range_ceiling = USER_RANGE_CEILING;
auto directory = adopt_ref_if_nonnull(new (nothrow) PageDirectory);
if (!directory)
return {};
if (parent_range_allocator) {
directory->m_range_allocator.initialize_from_parent(*parent_range_allocator);
} else {
size_t random_offset = (get_fast_random<u8>() % 32 * MiB) & PAGE_MASK;
u32 base = userspace_range_base + random_offset;
directory->m_range_allocator.initialize_with_range(VirtualAddress(base), userspace_range_ceiling - base);
}
// NOTE: Take the MM lock since we need it for quickmap.
ScopedSpinLock lock(s_mm_lock);
#if ARCH(X86_64)
directory->m_pml4t = MM.allocate_user_physical_page();
if (!directory->m_pml4t)
return {};
#endif
directory->m_directory_table = MM.allocate_user_physical_page();
if (!directory->m_directory_table)
return {};
auto kernel_pd_index = (kernel_mapping_base >> 30) & 0x1ffu;
for (size_t i = 0; i < kernel_pd_index; i++) {
directory->m_directory_pages[i] = MM.allocate_user_physical_page();
if (!directory->m_directory_pages[i])
return {};
}
// Share the top 1 GiB of kernel-only mappings (>=kernel_mapping_base)
directory->m_directory_pages[kernel_pd_index] = MM.kernel_page_directory().m_directory_pages[kernel_pd_index];
#if ARCH(X86_64)
{
auto& table = *(PageDirectoryPointerTable*)MM.quickmap_page(*directory->m_pml4t);
table.raw[0] = (FlatPtr)directory->m_directory_table->paddr().as_ptr() | 7;
MM.unquickmap_page();
}
#endif
{
auto& table = *(PageDirectoryPointerTable*)MM.quickmap_page(*directory->m_directory_table);
for (size_t i = 0; i < sizeof(m_directory_pages) / sizeof(m_directory_pages[0]); i++) {
if (directory->m_directory_pages[i]) {
#if ARCH(I386)
table.raw[i] = (FlatPtr)directory->m_directory_pages[i]->paddr().as_ptr() | 1;
#else
table.raw[i] = (FlatPtr)directory->m_directory_pages[i]->paddr().as_ptr() | 7;
#endif
}
}
// 2 ** MAXPHYADDR - 1
// Where MAXPHYADDR = physical_address_bit_width
u64 max_physical_address = (1ULL << Processor::current().physical_address_bit_width()) - 1;
// bit 63 = no execute
// bit 7 = page size
// bit 5 = accessed
// bit 4 = cache disable
// bit 3 = write through
// bit 2 = user/supervisor
// bit 1 = read/write
// bit 0 = present
constexpr u64 pdpte_bit_flags = 0x80000000000000BF;
// This is to notify us of bugs where we're:
// 1. Going over what the processor is capable of.
// 2. Writing into the reserved bits (51:MAXPHYADDR), where doing so throws a GPF
// when writing out the PDPT pointer to CR3.
// The reason we're not checking the page directory's physical address directly is because
// we're checking for sign extension when putting it into a PDPTE. See issue #4584.
for (auto table_entry : table.raw)
VERIFY((table_entry & ~pdpte_bit_flags) <= max_physical_address);
MM.unquickmap_page();
}
// Clone bottom 2 MiB of mappings from kernel_page_directory
PageDirectoryEntry buffer;
auto* kernel_pd = MM.quickmap_pd(MM.kernel_page_directory(), 0);
memcpy(&buffer, kernel_pd, sizeof(PageDirectoryEntry));
auto* new_pd = MM.quickmap_pd(*directory, 0);
memcpy(new_pd, &buffer, sizeof(PageDirectoryEntry));
cr3_map().set(directory->cr3(), directory.ptr());
return directory;
}
PageDirectory::PageDirectory()
{
}
UNMAP_AFTER_INIT void PageDirectory::allocate_kernel_directory()
{
// Adopt the page tables already set up by boot.S
#if ARCH(X86_64)
dmesgln("MM: boot_pml4t @ {}", boot_pml4t);
m_pml4t = PhysicalPage::create(boot_pml4t, MayReturnToFreeList::No);
#endif
dmesgln("MM: boot_pdpt @ {}", boot_pdpt);
dmesgln("MM: boot_pd0 @ {}", boot_pd0);
dmesgln("MM: boot_pd_kernel @ {}", boot_pd_kernel);
m_directory_table = PhysicalPage::create(boot_pdpt, MayReturnToFreeList::No);
m_directory_pages[0] = PhysicalPage::create(boot_pd0, MayReturnToFreeList::No);
m_directory_pages[(kernel_mapping_base >> 30) & 0x1ff] = PhysicalPage::create(boot_pd_kernel, MayReturnToFreeList::No);
}
PageDirectory::~PageDirectory()
{
ScopedSpinLock lock(s_mm_lock);
if (m_space)
cr3_map().remove(cr3());
}
}

View file

@ -0,0 +1,70 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/HashMap.h>
#include <AK/RefCounted.h>
#include <AK/RefPtr.h>
#include <Kernel/Forward.h>
#include <Kernel/Memory/PhysicalPage.h>
#include <Kernel/Memory/RangeAllocator.h>
namespace Kernel {
class PageDirectory : public RefCounted<PageDirectory> {
friend class MemoryManager;
public:
static RefPtr<PageDirectory> try_create_for_userspace(RangeAllocator const* parent_range_allocator = nullptr);
static NonnullRefPtr<PageDirectory> must_create_kernel_page_directory();
static RefPtr<PageDirectory> find_by_cr3(FlatPtr);
~PageDirectory();
void allocate_kernel_directory();
FlatPtr cr3() const
{
#if ARCH(X86_64)
return m_pml4t->paddr().get();
#else
return m_directory_table->paddr().get();
#endif
}
RangeAllocator& range_allocator() { return m_range_allocator; }
const RangeAllocator& range_allocator() const { return m_range_allocator; }
RangeAllocator& identity_range_allocator() { return m_identity_range_allocator; }
Space* space() { return m_space; }
const Space* space() const { return m_space; }
void set_space(Badge<Space>, Space& space) { m_space = &space; }
RecursiveSpinLock& get_lock() { return m_lock; }
private:
PageDirectory();
Space* m_space { nullptr };
RangeAllocator m_range_allocator;
RangeAllocator m_identity_range_allocator;
#if ARCH(X86_64)
RefPtr<PhysicalPage> m_pml4t;
#endif
RefPtr<PhysicalPage> m_directory_table;
#if ARCH(X86_64)
RefPtr<PhysicalPage> m_directory_pages[512];
#else
RefPtr<PhysicalPage> m_directory_pages[4];
#endif
HashMap<FlatPtr, RefPtr<PhysicalPage>> m_page_tables;
RecursiveSpinLock m_lock;
};
}

View file

@ -0,0 +1,17 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
namespace Kernel {
enum class PageFaultResponse {
ShouldCrash,
OutOfMemory,
Continue,
};
}

View file

@ -0,0 +1,43 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Heap/kmalloc.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/PhysicalPage.h>
namespace Kernel {
NonnullRefPtr<PhysicalPage> PhysicalPage::create(PhysicalAddress paddr, MayReturnToFreeList may_return_to_freelist)
{
auto& physical_page_entry = MM.get_physical_page_entry(paddr);
return adopt_ref(*new (&physical_page_entry.allocated.physical_page) PhysicalPage(may_return_to_freelist));
}
PhysicalPage::PhysicalPage(MayReturnToFreeList may_return_to_freelist)
: m_may_return_to_freelist(may_return_to_freelist)
{
}
PhysicalAddress PhysicalPage::paddr() const
{
return MM.get_physical_address(*this);
}
void PhysicalPage::free_this()
{
auto paddr = MM.get_physical_address(*this);
if (m_may_return_to_freelist == MayReturnToFreeList::Yes) {
auto& this_as_freelist_entry = MM.get_physical_page_entry(paddr).freelist;
this->~PhysicalPage(); // delete in place
this_as_freelist_entry.next_index = -1;
this_as_freelist_entry.prev_index = -1;
MM.deallocate_physical_page(paddr);
} else {
this->~PhysicalPage(); // delete in place
}
}
}

View file

@ -0,0 +1,71 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/NonnullRefPtr.h>
#include <Kernel/PhysicalAddress.h>
namespace Kernel {
enum class MayReturnToFreeList : bool {
No,
Yes
};
class PhysicalPage {
AK_MAKE_NONCOPYABLE(PhysicalPage);
AK_MAKE_NONMOVABLE(PhysicalPage);
friend class MemoryManager;
public:
PhysicalAddress paddr() const;
void ref()
{
m_ref_count.fetch_add(1, AK::memory_order_acq_rel);
}
void unref()
{
if (m_ref_count.fetch_sub(1, AK::memory_order_acq_rel) == 1)
free_this();
}
static NonnullRefPtr<PhysicalPage> create(PhysicalAddress, MayReturnToFreeList may_return_to_freelist = MayReturnToFreeList::Yes);
u32 ref_count() const { return m_ref_count.load(AK::memory_order_consume); }
bool is_shared_zero_page() const;
bool is_lazy_committed_page() const;
private:
explicit PhysicalPage(MayReturnToFreeList may_return_to_freelist);
~PhysicalPage() = default;
void free_this();
Atomic<u32> m_ref_count { 1 };
MayReturnToFreeList m_may_return_to_freelist { MayReturnToFreeList::Yes };
};
struct PhysicalPageEntry {
union {
// If it's a live PhysicalPage object:
struct {
PhysicalPage physical_page;
} allocated;
// If it's an entry in a PhysicalZone::Bucket's freelist.
struct {
i16 next_index;
i16 prev_index;
} freelist;
};
};
}

View file

@ -0,0 +1,141 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/NonnullRefPtr.h>
#include <AK/RefPtr.h>
#include <Kernel/Assertions.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/PhysicalRegion.h>
#include <Kernel/Memory/PhysicalZone.h>
#include <Kernel/Random.h>
namespace Kernel {
static constexpr u32 next_power_of_two(u32 value)
{
value--;
value |= value >> 1;
value |= value >> 2;
value |= value >> 4;
value |= value >> 8;
value |= value >> 16;
value++;
return value;
}
PhysicalRegion::~PhysicalRegion()
{
}
PhysicalRegion::PhysicalRegion(PhysicalAddress lower, PhysicalAddress upper)
: m_lower(lower)
, m_upper(upper)
{
m_pages = (m_upper.get() - m_lower.get()) / PAGE_SIZE;
}
void PhysicalRegion::initialize_zones()
{
size_t remaining_pages = m_pages;
auto base_address = m_lower;
auto make_zones = [&](size_t pages_per_zone) {
size_t zone_count = 0;
auto first_address = base_address;
while (remaining_pages >= pages_per_zone) {
m_zones.append(make<PhysicalZone>(base_address, pages_per_zone));
base_address = base_address.offset(pages_per_zone * PAGE_SIZE);
m_usable_zones.append(m_zones.last());
remaining_pages -= pages_per_zone;
++zone_count;
}
if (zone_count)
dmesgln(" * {}x PhysicalZone ({} MiB) @ {:016x}-{:016x}", zone_count, pages_per_zone / 256, first_address.get(), base_address.get() - pages_per_zone * PAGE_SIZE - 1);
};
// First make 16 MiB zones (with 4096 pages each)
make_zones(4096);
// Then divide any remaining space into 1 MiB zones (with 256 pages each)
make_zones(256);
}
OwnPtr<PhysicalRegion> PhysicalRegion::try_take_pages_from_beginning(unsigned page_count)
{
VERIFY(page_count > 0);
VERIFY(page_count < m_pages);
auto taken_lower = m_lower;
auto taken_upper = taken_lower.offset((PhysicalPtr)page_count * PAGE_SIZE);
m_lower = m_lower.offset((PhysicalPtr)page_count * PAGE_SIZE);
m_pages = (m_upper.get() - m_lower.get()) / PAGE_SIZE;
return try_create(taken_lower, taken_upper);
}
NonnullRefPtrVector<PhysicalPage> PhysicalRegion::take_contiguous_free_pages(size_t count)
{
auto rounded_page_count = next_power_of_two(count);
auto order = __builtin_ctz(rounded_page_count);
Optional<PhysicalAddress> page_base;
for (auto& zone : m_usable_zones) {
page_base = zone.allocate_block(order);
if (page_base.has_value()) {
if (zone.is_empty()) {
// We've exhausted this zone, move it to the full zones list.
m_full_zones.append(zone);
}
break;
}
}
if (!page_base.has_value())
return {};
NonnullRefPtrVector<PhysicalPage> physical_pages;
physical_pages.ensure_capacity(count);
for (size_t i = 0; i < count; ++i)
physical_pages.append(PhysicalPage::create(page_base.value().offset(i * PAGE_SIZE)));
return physical_pages;
}
RefPtr<PhysicalPage> PhysicalRegion::take_free_page()
{
if (m_usable_zones.is_empty())
return nullptr;
auto& zone = *m_usable_zones.first();
auto page = zone.allocate_block(0);
VERIFY(page.has_value());
if (zone.is_empty()) {
// We've exhausted this zone, move it to the full zones list.
m_full_zones.append(zone);
}
return PhysicalPage::create(page.value());
}
void PhysicalRegion::return_page(PhysicalAddress paddr)
{
// FIXME: Find a way to avoid looping over the zones here.
// (Do some math on the address to find the right zone index.)
// The main thing that gets in the way of this is non-uniform zone sizes.
// Perhaps it would be better if all zones had the same size.
for (auto& zone : m_zones) {
if (zone.contains(paddr)) {
zone.deallocate_block(paddr, 0);
if (m_full_zones.contains(zone))
m_usable_zones.append(zone);
return;
}
}
VERIFY_NOT_REACHED();
}
}

View file

@ -0,0 +1,54 @@
/*
* Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/OwnPtr.h>
#include <Kernel/Memory/PhysicalPage.h>
#include <Kernel/Memory/PhysicalZone.h>
namespace Kernel {
class PhysicalRegion {
AK_MAKE_ETERNAL;
AK_MAKE_NONCOPYABLE(PhysicalRegion);
AK_MAKE_NONMOVABLE(PhysicalRegion);
public:
static OwnPtr<PhysicalRegion> try_create(PhysicalAddress lower, PhysicalAddress upper)
{
return adopt_own_if_nonnull(new PhysicalRegion { lower, upper });
}
~PhysicalRegion();
void initialize_zones();
PhysicalAddress lower() const { return m_lower; }
PhysicalAddress upper() const { return m_upper; }
unsigned size() const { return m_pages; }
bool contains(PhysicalAddress paddr) const { return paddr >= m_lower && paddr < m_upper; }
OwnPtr<PhysicalRegion> try_take_pages_from_beginning(unsigned);
RefPtr<PhysicalPage> take_free_page();
NonnullRefPtrVector<PhysicalPage> take_contiguous_free_pages(size_t count);
void return_page(PhysicalAddress);
private:
PhysicalRegion(PhysicalAddress lower, PhysicalAddress upper);
NonnullOwnPtrVector<PhysicalZone> m_zones;
PhysicalZone::List m_usable_zones;
PhysicalZone::List m_full_zones;
PhysicalAddress m_lower;
PhysicalAddress m_upper;
unsigned m_pages { 0 };
};
}

View file

@ -0,0 +1,198 @@
/*
* Copyright (c) 2021, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Format.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/PhysicalPage.h>
#include <Kernel/Memory/PhysicalZone.h>
namespace Kernel {
PhysicalPageEntry& PhysicalZone::get_freelist_entry(ChunkIndex index) const
{
return MM.get_physical_page_entry(m_base_address.offset(index * ZONE_CHUNK_SIZE));
}
PhysicalZone::PhysicalZone(PhysicalAddress base_address, size_t page_count)
: m_base_address(base_address)
, m_page_count(page_count)
, m_used_chunks(0)
{
size_t const chunk_count = page_count * 2;
for (int order = max_order; order >= 0; --order) {
auto& bucket = m_buckets[order];
size_t block_size = 2u << order;
size_t bitmap_size_for_order = ceil_div((size_t)(chunk_count / block_size), (size_t)2);
bucket.order = order;
if (bitmap_size_for_order)
bucket.bitmap.grow(bitmap_size_for_order, false);
}
auto first_order = __builtin_ctz(page_count);
size_t block_size = 2u << first_order;
auto& bucket = m_buckets[first_order];
size_t remaining_chunk_count = chunk_count;
size_t initial_bundle_count = remaining_chunk_count / block_size;
size_t offset = 0;
for (size_t i = 0; i < initial_bundle_count; ++i) {
ChunkIndex index = offset + i;
bucket.set_buddy_bit(index, true);
auto& freelist_entry = get_freelist_entry(index).freelist;
freelist_entry.next_index = bucket.freelist;
freelist_entry.prev_index = -1;
bucket.freelist = index;
remaining_chunk_count -= block_size;
offset += block_size;
}
}
Optional<PhysicalAddress> PhysicalZone::allocate_block(size_t order)
{
size_t block_size = 2u << order;
auto result = allocate_block_impl(order);
if (!result.has_value())
return {};
m_used_chunks += block_size;
VERIFY(!(result.value() & 1));
return m_base_address.offset(result.value() * ZONE_CHUNK_SIZE);
}
Optional<PhysicalZone::ChunkIndex> PhysicalZone::allocate_block_impl(size_t order)
{
if (order > max_order)
return {};
size_t block_size = 2u << order;
auto& bucket = m_buckets[order];
if (bucket.freelist == -1) {
// The freelist for this order is empty, try to allocate a block from one order higher, and split it.
auto buddies = allocate_block_impl(order + 1);
if (!buddies.has_value()) {
// Looks like we're unable to satisfy this allocation request.
return {};
}
// Split the block from order+1 into two parts.
// We keep one (in the freelist for this order) and return the other.
ChunkIndex index = buddies.value();
// First half goes in the freelist
auto& freelist_entry = get_freelist_entry(index).freelist;
freelist_entry.next_index = -1;
freelist_entry.prev_index = -1;
bucket.freelist = index;
VERIFY(bucket.get_buddy_bit(index) == false);
// Set buddy bit to 1 (one used, one unused).
bucket.set_buddy_bit(index, true);
// Second half is returned.
return index + block_size;
}
// Freelist has at least one entry, return that.
ChunkIndex index = bucket.freelist;
bucket.freelist = get_freelist_entry(bucket.freelist).freelist.next_index;
if (bucket.freelist != -1) {
get_freelist_entry(bucket.freelist).freelist.prev_index = -1;
}
VERIFY(bucket.get_buddy_bit(index) == true);
bucket.set_buddy_bit(index, false);
return index;
}
void PhysicalZone::deallocate_block(PhysicalAddress address, size_t order)
{
size_t block_size = 2u << order;
ChunkIndex index = (address.get() - m_base_address.get()) / ZONE_CHUNK_SIZE;
deallocate_block_impl(index, order);
m_used_chunks -= block_size;
}
void PhysicalZone::deallocate_block_impl(ChunkIndex index, size_t order)
{
size_t block_size = 2u << order;
// Basic algorithm:
// If the buddy block is free (buddy bit is 1 -- because this block was the only used one):
// Then,
// 1. Merge with buddy.
// 2. Return the merged block to order+1.
// Else (buddy bit is 0 -- because both blocks are used)
// 1. Add the block to the freelist.
// 2. Set buddy bit to 1.
auto& bucket = m_buckets[order];
if (bucket.get_buddy_bit(index)) {
// Buddy is free! Merge with buddy and coalesce upwards to the next order.
auto buddy_bit_index = bucket.buddy_bit_index(index);
ChunkIndex buddy_base_index = (buddy_bit_index << 1) << (1 + order);
if (index == buddy_base_index)
remove_from_freelist(bucket, buddy_base_index + block_size);
else
remove_from_freelist(bucket, buddy_base_index);
bucket.set_buddy_bit(index, false);
deallocate_block_impl(buddy_base_index, order + 1);
} else {
// Buddy is in use. Add freed block to freelist and set buddy bit to 1.
if (bucket.freelist != -1) {
get_freelist_entry(bucket.freelist).freelist.prev_index = index;
}
auto& freelist_entry = get_freelist_entry(index).freelist;
freelist_entry.next_index = bucket.freelist;
freelist_entry.prev_index = -1;
bucket.freelist = index;
bucket.set_buddy_bit(index, true);
}
}
void PhysicalZone::remove_from_freelist(BuddyBucket& bucket, ChunkIndex index)
{
auto& freelist_entry = get_freelist_entry(index).freelist;
VERIFY(freelist_entry.prev_index >= -1);
VERIFY(freelist_entry.next_index >= -1);
if (freelist_entry.prev_index != -1) {
auto& prev_entry = get_freelist_entry(freelist_entry.prev_index).freelist;
prev_entry.next_index = freelist_entry.next_index;
}
if (freelist_entry.next_index != -1) {
auto& next_entry = get_freelist_entry(freelist_entry.next_index).freelist;
next_entry.prev_index = freelist_entry.prev_index;
}
if (bucket.freelist == index)
bucket.freelist = freelist_entry.next_index;
freelist_entry.next_index = -1;
freelist_entry.prev_index = -1;
}
void PhysicalZone::dump() const
{
dbgln("(( {} used, {} available, page_count: {} ))", m_used_chunks, available(), m_page_count);
for (size_t i = 0; i <= max_order; ++i) {
auto& bucket = m_buckets[i];
dbgln("[{:2} / {:4}] ", i, (size_t)(2u << i));
auto entry = bucket.freelist;
while (entry != -1) {
dbgln(" {}", entry);
entry = get_freelist_entry(entry).freelist.next_index;
}
}
}
}

View file

@ -0,0 +1,95 @@
/*
* Copyright (c) 2021, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Bitmap.h>
#include <AK/IntrusiveList.h>
namespace Kernel {
// A PhysicalZone is an allocator that manages a sub-area of a PhysicalRegion.
// Its total size is always a power of two.
// You allocate chunks at a time. One chunk is PAGE_SIZE/2, and the minimum allocation size is 2 chunks.
// The allocator uses a buddy block scheme internally.
class PhysicalZone {
AK_MAKE_ETERNAL;
AK_MAKE_NONCOPYABLE(PhysicalZone);
AK_MAKE_NONMOVABLE(PhysicalZone);
public:
static constexpr size_t ZONE_CHUNK_SIZE = PAGE_SIZE / 2;
using ChunkIndex = i16;
PhysicalZone(PhysicalAddress base, size_t page_count);
Optional<PhysicalAddress> allocate_block(size_t order);
void deallocate_block(PhysicalAddress, size_t order);
void dump() const;
size_t available() const { return m_page_count - (m_used_chunks / 2); }
bool is_empty() const { return !available(); }
PhysicalAddress base() const { return m_base_address; }
bool contains(PhysicalAddress paddr) const
{
return paddr >= m_base_address && paddr < m_base_address.offset(m_page_count * PAGE_SIZE);
}
private:
Optional<ChunkIndex> allocate_block_impl(size_t order);
void deallocate_block_impl(ChunkIndex, size_t order);
struct BuddyBucket {
bool get_buddy_bit(ChunkIndex index) const
{
return bitmap.get(buddy_bit_index(index));
}
void set_buddy_bit(ChunkIndex index, bool value)
{
bitmap.set(buddy_bit_index(index), value);
}
size_t buddy_bit_index(ChunkIndex index) const
{
// NOTE: We cut the index in half since one chunk is half a page.
return (index >> 1) >> (1 + order);
}
// This bucket's index in the m_buckets array. (Redundant data kept here for convenience.)
size_t order { 0 };
// This is the start of the freelist for this buddy size.
// It's an index into the global PhysicalPageEntry array (offset by this PhysicalRegion's base.)
// A value of -1 indicates an empty freelist.
ChunkIndex freelist { -1 };
// Bitmap with 1 bit per buddy pair.
// 0 == Both blocks either free or used.
// 1 == One block free, one block used.
Bitmap bitmap;
};
static constexpr size_t max_order = 12;
BuddyBucket m_buckets[max_order + 1];
PhysicalPageEntry& get_freelist_entry(ChunkIndex) const;
void remove_from_freelist(BuddyBucket&, ChunkIndex);
PhysicalAddress m_base_address { 0 };
size_t m_page_count { 0 };
size_t m_used_chunks { 0 };
IntrusiveListNode<PhysicalZone> m_list_node;
public:
using List = IntrusiveList<PhysicalZone, RawPtr<PhysicalZone>, &PhysicalZone::m_list_node>;
};
}

View file

@ -0,0 +1,36 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/FileSystem/Inode.h>
#include <Kernel/Memory/PrivateInodeVMObject.h>
namespace Kernel {
RefPtr<PrivateInodeVMObject> PrivateInodeVMObject::try_create_with_inode(Inode& inode)
{
return adopt_ref_if_nonnull(new (nothrow) PrivateInodeVMObject(inode, inode.size()));
}
RefPtr<VMObject> PrivateInodeVMObject::try_clone()
{
return adopt_ref_if_nonnull(new (nothrow) PrivateInodeVMObject(*this));
}
PrivateInodeVMObject::PrivateInodeVMObject(Inode& inode, size_t size)
: InodeVMObject(inode, size)
{
}
PrivateInodeVMObject::PrivateInodeVMObject(PrivateInodeVMObject const& other)
: InodeVMObject(other)
{
}
PrivateInodeVMObject::~PrivateInodeVMObject()
{
}
}

View file

@ -0,0 +1,35 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Bitmap.h>
#include <Kernel/Memory/InodeVMObject.h>
#include <Kernel/UnixTypes.h>
namespace Kernel {
class PrivateInodeVMObject final : public InodeVMObject {
AK_MAKE_NONMOVABLE(PrivateInodeVMObject);
public:
virtual ~PrivateInodeVMObject() override;
static RefPtr<PrivateInodeVMObject> try_create_with_inode(Inode&);
virtual RefPtr<VMObject> try_clone() override;
private:
virtual bool is_private_inode() const override { return true; }
explicit PrivateInodeVMObject(Inode&, size_t);
explicit PrivateInodeVMObject(PrivateInodeVMObject const&);
virtual StringView class_name() const override { return "PrivateInodeVMObject"sv; }
PrivateInodeVMObject& operator=(PrivateInodeVMObject const&) = delete;
};
}

View file

@ -0,0 +1,27 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Arch/x86/InterruptDisabler.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/ProcessPagingScope.h>
namespace Kernel {
ProcessPagingScope::ProcessPagingScope(Process& process)
{
VERIFY(Thread::current() != nullptr);
m_previous_cr3 = read_cr3();
MM.enter_process_paging_scope(process);
}
ProcessPagingScope::~ProcessPagingScope()
{
InterruptDisabler disabler;
Thread::current()->regs().cr3 = m_previous_cr3;
write_cr3(m_previous_cr3);
}
}

View file

@ -0,0 +1,23 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Types.h>
#include <Kernel/Forward.h>
namespace Kernel {
class ProcessPagingScope {
public:
explicit ProcessPagingScope(Process&);
~ProcessPagingScope();
private:
u32 m_previous_cr3 { 0 };
};
}

56
Kernel/Memory/Range.cpp Normal file
View file

@ -0,0 +1,56 @@
/*
* Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Vector.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/Range.h>
#include <LibC/limits.h>
namespace Kernel {
Vector<Range, 2> Range::carve(const Range& taken) const
{
VERIFY((taken.size() % PAGE_SIZE) == 0);
Vector<Range, 2> parts;
if (taken == *this)
return {};
if (taken.base() > base())
parts.append({ base(), taken.base().get() - base().get() });
if (taken.end() < end())
parts.append({ taken.end(), end().get() - taken.end().get() });
return parts;
}
Range Range::intersect(const Range& other) const
{
if (*this == other) {
return *this;
}
auto new_base = max(base(), other.base());
auto new_end = min(end(), other.end());
VERIFY(new_base < new_end);
return Range(new_base, (new_end - new_base).get());
}
KResultOr<Range> Range::expand_to_page_boundaries(FlatPtr address, size_t size)
{
if (page_round_up_would_wrap(size))
return EINVAL;
if ((address + size) < address)
return EINVAL;
if (page_round_up_would_wrap(address + size))
return EINVAL;
auto base = VirtualAddress { address }.page_base();
auto end = page_round_up(address + size);
return Range { base, end - base.get() };
}
}

69
Kernel/Memory/Range.h Normal file
View file

@ -0,0 +1,69 @@
/*
* Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <Kernel/KResult.h>
#include <Kernel/VirtualAddress.h>
namespace Kernel {
class Range {
friend class RangeAllocator;
public:
Range() = delete;
Range(VirtualAddress base, size_t size)
: m_base(base)
, m_size(size)
{
}
VirtualAddress base() const { return m_base; }
size_t size() const { return m_size; }
bool is_valid() const { return !m_base.is_null(); }
bool contains(VirtualAddress vaddr) const { return vaddr >= base() && vaddr < end(); }
VirtualAddress end() const { return m_base.offset(m_size); }
bool operator==(const Range& other) const
{
return m_base == other.m_base && m_size == other.m_size;
}
bool contains(VirtualAddress base, size_t size) const
{
if (base.offset(size) < base)
return false;
return base >= m_base && base.offset(size) <= end();
}
bool contains(const Range& other) const
{
return contains(other.base(), other.size());
}
Vector<Range, 2> carve(const Range&) const;
Range intersect(const Range&) const;
static KResultOr<Range> expand_to_page_boundaries(FlatPtr address, size_t size);
private:
VirtualAddress m_base;
size_t m_size { 0 };
};
}
template<>
struct AK::Formatter<Kernel::Range> : Formatter<FormatString> {
void format(FormatBuilder& builder, Kernel::Range value)
{
return Formatter<FormatString>::format(builder, "{} - {} (size {:p})", value.base().as_ptr(), value.base().offset(value.size() - 1).as_ptr(), value.size());
}
};

View file

@ -0,0 +1,194 @@
/*
* Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Checked.h>
#include <Kernel/Memory/RangeAllocator.h>
#include <Kernel/Random.h>
#define VM_GUARD_PAGES
namespace Kernel {
RangeAllocator::RangeAllocator()
: m_total_range({}, 0)
{
}
void RangeAllocator::initialize_with_range(VirtualAddress base, size_t size)
{
m_total_range = { base, size };
m_available_ranges.insert(base.get(), Range { base, size });
}
void RangeAllocator::initialize_from_parent(RangeAllocator const& parent_allocator)
{
ScopedSpinLock lock(parent_allocator.m_lock);
m_total_range = parent_allocator.m_total_range;
m_available_ranges.clear();
for (auto it = parent_allocator.m_available_ranges.begin(); !it.is_end(); ++it) {
m_available_ranges.insert(it.key(), *it);
}
}
void RangeAllocator::dump() const
{
VERIFY(m_lock.is_locked());
dbgln("RangeAllocator({})", this);
for (auto& range : m_available_ranges) {
dbgln(" {:x} -> {:x}", range.base().get(), range.end().get() - 1);
}
}
void RangeAllocator::carve_at_iterator(auto& it, Range const& range)
{
VERIFY(m_lock.is_locked());
auto remaining_parts = (*it).carve(range);
VERIFY(remaining_parts.size() >= 1);
VERIFY(m_total_range.contains(remaining_parts[0]));
m_available_ranges.remove(it.key());
m_available_ranges.insert(remaining_parts[0].base().get(), remaining_parts[0]);
if (remaining_parts.size() == 2) {
VERIFY(m_total_range.contains(remaining_parts[1]));
m_available_ranges.insert(remaining_parts[1].base().get(), remaining_parts[1]);
}
}
Optional<Range> RangeAllocator::allocate_randomized(size_t size, size_t alignment)
{
if (!size)
return {};
VERIFY((size % PAGE_SIZE) == 0);
VERIFY((alignment % PAGE_SIZE) == 0);
// FIXME: I'm sure there's a smarter way to do this.
static constexpr size_t maximum_randomization_attempts = 1000;
for (size_t i = 0; i < maximum_randomization_attempts; ++i) {
VirtualAddress random_address { round_up_to_power_of_two(get_fast_random<FlatPtr>() % m_total_range.end().get(), alignment) };
if (!m_total_range.contains(random_address, size))
continue;
auto range = allocate_specific(random_address, size);
if (range.has_value())
return range;
}
return allocate_anywhere(size, alignment);
}
Optional<Range> RangeAllocator::allocate_anywhere(size_t size, size_t alignment)
{
if (!size)
return {};
VERIFY((size % PAGE_SIZE) == 0);
VERIFY((alignment % PAGE_SIZE) == 0);
#ifdef VM_GUARD_PAGES
// NOTE: We pad VM allocations with a guard page on each side.
if (Checked<size_t>::addition_would_overflow(size, PAGE_SIZE * 2))
return {};
size_t effective_size = size + PAGE_SIZE * 2;
size_t offset_from_effective_base = PAGE_SIZE;
#else
size_t effective_size = size;
size_t offset_from_effective_base = 0;
#endif
if (Checked<size_t>::addition_would_overflow(effective_size, alignment))
return {};
ScopedSpinLock lock(m_lock);
for (auto it = m_available_ranges.begin(); !it.is_end(); ++it) {
auto& available_range = *it;
// FIXME: This check is probably excluding some valid candidates when using a large alignment.
if (available_range.size() < (effective_size + alignment))
continue;
FlatPtr initial_base = available_range.base().offset(offset_from_effective_base).get();
FlatPtr aligned_base = round_up_to_power_of_two(initial_base, alignment);
Range const allocated_range(VirtualAddress(aligned_base), size);
VERIFY(m_total_range.contains(allocated_range));
if (available_range == allocated_range) {
m_available_ranges.remove(it.key());
return allocated_range;
}
carve_at_iterator(it, allocated_range);
return allocated_range;
}
dmesgln("RangeAllocator: Failed to allocate anywhere: size={}, alignment={}", size, alignment);
return {};
}
Optional<Range> RangeAllocator::allocate_specific(VirtualAddress base, size_t size)
{
if (!size)
return {};
VERIFY(base.is_page_aligned());
VERIFY((size % PAGE_SIZE) == 0);
Range const allocated_range(base, size);
if (!m_total_range.contains(allocated_range)) {
return {};
}
ScopedSpinLock lock(m_lock);
for (auto it = m_available_ranges.begin(); !it.is_end(); ++it) {
auto& available_range = *it;
if (!available_range.contains(base, size))
continue;
if (available_range == allocated_range) {
m_available_ranges.remove(it.key());
return allocated_range;
}
carve_at_iterator(it, allocated_range);
return allocated_range;
}
return {};
}
void RangeAllocator::deallocate(Range const& range)
{
ScopedSpinLock lock(m_lock);
VERIFY(m_total_range.contains(range));
VERIFY(range.size());
VERIFY((range.size() % PAGE_SIZE) == 0);
VERIFY(range.base() < range.end());
VERIFY(!m_available_ranges.is_empty());
Range merged_range = range;
{
// Try merging with preceding range.
auto* preceding_range = m_available_ranges.find_largest_not_above(range.base().get());
if (preceding_range && preceding_range->end() == range.base()) {
preceding_range->m_size += range.size();
merged_range = *preceding_range;
} else {
m_available_ranges.insert(range.base().get(), range);
}
}
{
// Try merging with following range.
auto* following_range = m_available_ranges.find_largest_not_above(range.end().get());
if (following_range && merged_range.end() == following_range->base()) {
auto* existing_range = m_available_ranges.find_largest_not_above(range.base().get());
VERIFY(existing_range->base() == merged_range.base());
existing_range->m_size += following_range->size();
m_available_ranges.remove(following_range->base().get());
}
}
}
}

View file

@ -0,0 +1,48 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/RedBlackTree.h>
#include <AK/Traits.h>
#include <Kernel/Memory/Range.h>
#include <Kernel/SpinLock.h>
namespace Kernel {
class RangeAllocator {
public:
RangeAllocator();
~RangeAllocator() = default;
void initialize_with_range(VirtualAddress, size_t);
void initialize_from_parent(RangeAllocator const&);
Optional<Range> allocate_anywhere(size_t, size_t alignment = PAGE_SIZE);
Optional<Range> allocate_specific(VirtualAddress, size_t);
Optional<Range> allocate_randomized(size_t, size_t alignment);
void deallocate(Range const&);
void dump() const;
bool contains(Range const& range) const { return m_total_range.contains(range); }
private:
void carve_at_iterator(auto&, Range const&);
RedBlackTree<FlatPtr, Range> m_available_ranges;
Range m_total_range;
mutable SpinLock<u8> m_lock;
};
}
namespace AK {
template<>
struct Traits<Kernel::Range> : public GenericTraits<Kernel::Range> {
static constexpr bool is_trivial() { return true; }
};
}

458
Kernel/Memory/Region.cpp Normal file
View file

@ -0,0 +1,458 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Memory.h>
#include <AK/StringView.h>
#include <Kernel/Debug.h>
#include <Kernel/FileSystem/Inode.h>
#include <Kernel/Memory/AnonymousVMObject.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/PageDirectory.h>
#include <Kernel/Memory/Region.h>
#include <Kernel/Memory/SharedInodeVMObject.h>
#include <Kernel/Panic.h>
#include <Kernel/Process.h>
#include <Kernel/Thread.h>
namespace Kernel {
Region::Region(Range const& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, OwnPtr<KString> name, Region::Access access, Cacheable cacheable, bool shared)
: m_range(range)
, m_offset_in_vmobject(offset_in_vmobject)
, m_vmobject(move(vmobject))
, m_name(move(name))
, m_access(access | ((access & 0x7) << 4))
, m_shared(shared)
, m_cacheable(cacheable == Cacheable::Yes)
{
VERIFY(m_range.base().is_page_aligned());
VERIFY(m_range.size());
VERIFY((m_range.size() % PAGE_SIZE) == 0);
m_vmobject->add_region(*this);
MM.register_region(*this);
}
Region::~Region()
{
m_vmobject->remove_region(*this);
// Make sure we disable interrupts so we don't get interrupted between unmapping and unregistering.
// Unmapping the region will give the VM back to the RangeAllocator, so an interrupt handler would
// find the address<->region mappings in an invalid state there.
ScopedSpinLock lock(s_mm_lock);
if (m_page_directory) {
unmap(ShouldDeallocateVirtualMemoryRange::Yes);
VERIFY(!m_page_directory);
}
MM.unregister_region(*this);
}
OwnPtr<Region> Region::clone()
{
VERIFY(Process::current());
ScopedSpinLock lock(s_mm_lock);
if (m_shared) {
VERIFY(!m_stack);
if (vmobject().is_inode())
VERIFY(vmobject().is_shared_inode());
// Create a new region backed by the same VMObject.
auto region = Region::try_create_user_accessible(
m_range, m_vmobject, m_offset_in_vmobject, m_name ? m_name->try_clone() : OwnPtr<KString> {}, access(), m_cacheable ? Cacheable::Yes : Cacheable::No, m_shared);
if (!region) {
dbgln("Region::clone: Unable to allocate new Region");
return nullptr;
}
region->set_mmap(m_mmap);
region->set_shared(m_shared);
region->set_syscall_region(is_syscall_region());
return region;
}
if (vmobject().is_inode())
VERIFY(vmobject().is_private_inode());
auto vmobject_clone = vmobject().try_clone();
if (!vmobject_clone)
return {};
// Set up a COW region. The parent (this) region becomes COW as well!
remap();
auto clone_region = Region::try_create_user_accessible(
m_range, vmobject_clone.release_nonnull(), m_offset_in_vmobject, m_name ? m_name->try_clone() : OwnPtr<KString> {}, access(), m_cacheable ? Cacheable::Yes : Cacheable::No, m_shared);
if (!clone_region) {
dbgln("Region::clone: Unable to allocate new Region for COW");
return nullptr;
}
if (m_stack) {
VERIFY(is_readable());
VERIFY(is_writable());
VERIFY(vmobject().is_anonymous());
clone_region->set_stack(true);
}
clone_region->set_syscall_region(is_syscall_region());
clone_region->set_mmap(m_mmap);
return clone_region;
}
void Region::set_vmobject(NonnullRefPtr<VMObject>&& obj)
{
if (m_vmobject.ptr() == obj.ptr())
return;
m_vmobject->remove_region(*this);
m_vmobject = move(obj);
m_vmobject->add_region(*this);
}
size_t Region::cow_pages() const
{
if (!vmobject().is_anonymous())
return 0;
return static_cast<AnonymousVMObject const&>(vmobject()).cow_pages();
}
size_t Region::amount_dirty() const
{
if (!vmobject().is_inode())
return amount_resident();
return static_cast<InodeVMObject const&>(vmobject()).amount_dirty();
}
size_t Region::amount_resident() const
{
size_t bytes = 0;
for (size_t i = 0; i < page_count(); ++i) {
auto* page = physical_page(i);
if (page && !page->is_shared_zero_page() && !page->is_lazy_committed_page())
bytes += PAGE_SIZE;
}
return bytes;
}
size_t Region::amount_shared() const
{
size_t bytes = 0;
for (size_t i = 0; i < page_count(); ++i) {
auto* page = physical_page(i);
if (page && page->ref_count() > 1 && !page->is_shared_zero_page() && !page->is_lazy_committed_page())
bytes += PAGE_SIZE;
}
return bytes;
}
OwnPtr<Region> Region::try_create_user_accessible(Range const& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, OwnPtr<KString> name, Region::Access access, Cacheable cacheable, bool shared)
{
auto region = adopt_own_if_nonnull(new (nothrow) Region(range, move(vmobject), offset_in_vmobject, move(name), access, cacheable, shared));
if (!region)
return nullptr;
return region;
}
OwnPtr<Region> Region::try_create_kernel_only(Range const& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, OwnPtr<KString> name, Region::Access access, Cacheable cacheable)
{
return adopt_own_if_nonnull(new (nothrow) Region(range, move(vmobject), offset_in_vmobject, move(name), access, cacheable, false));
}
bool Region::should_cow(size_t page_index) const
{
if (!vmobject().is_anonymous())
return false;
return static_cast<AnonymousVMObject const&>(vmobject()).should_cow(first_page_index() + page_index, m_shared);
}
void Region::set_should_cow(size_t page_index, bool cow)
{
VERIFY(!m_shared);
if (vmobject().is_anonymous())
static_cast<AnonymousVMObject&>(vmobject()).set_should_cow(first_page_index() + page_index, cow);
}
bool Region::map_individual_page_impl(size_t page_index)
{
VERIFY(m_page_directory->get_lock().own_lock());
auto page_vaddr = vaddr_from_page_index(page_index);
bool user_allowed = page_vaddr.get() >= 0x00800000 && is_user_address(page_vaddr);
if (is_mmap() && !user_allowed) {
PANIC("About to map mmap'ed page at a kernel address");
}
// NOTE: We have to take the MM lock for PTE's to stay valid while we use them.
ScopedSpinLock mm_locker(s_mm_lock);
auto* pte = MM.ensure_pte(*m_page_directory, page_vaddr);
if (!pte)
return false;
auto* page = physical_page(page_index);
if (!page || (!is_readable() && !is_writable())) {
pte->clear();
} else {
pte->set_cache_disabled(!m_cacheable);
pte->set_physical_page_base(page->paddr().get());
pte->set_present(true);
if (page->is_shared_zero_page() || page->is_lazy_committed_page() || should_cow(page_index))
pte->set_writable(false);
else
pte->set_writable(is_writable());
if (Processor::current().has_feature(CPUFeature::NX))
pte->set_execute_disabled(!is_executable());
pte->set_user_allowed(user_allowed);
}
return true;
}
bool Region::do_remap_vmobject_page(size_t page_index, bool with_flush)
{
ScopedSpinLock lock(vmobject().m_lock);
if (!m_page_directory)
return true; // not an error, region may have not yet mapped it
if (!translate_vmobject_page(page_index))
return true; // not an error, region doesn't map this page
ScopedSpinLock page_lock(m_page_directory->get_lock());
VERIFY(physical_page(page_index));
bool success = map_individual_page_impl(page_index);
if (with_flush)
MM.flush_tlb(m_page_directory, vaddr_from_page_index(page_index));
return success;
}
bool Region::remap_vmobject_page(size_t page_index, bool with_flush)
{
auto& vmobject = this->vmobject();
bool success = true;
vmobject.for_each_region([&](auto& region) {
if (!region.do_remap_vmobject_page(page_index, with_flush))
success = false;
});
return success;
}
void Region::unmap(ShouldDeallocateVirtualMemoryRange deallocate_range)
{
ScopedSpinLock lock(s_mm_lock);
if (!m_page_directory)
return;
ScopedSpinLock page_lock(m_page_directory->get_lock());
size_t count = page_count();
for (size_t i = 0; i < count; ++i) {
auto vaddr = vaddr_from_page_index(i);
MM.release_pte(*m_page_directory, vaddr, i == count - 1);
}
MM.flush_tlb(m_page_directory, vaddr(), page_count());
if (deallocate_range == ShouldDeallocateVirtualMemoryRange::Yes) {
if (m_page_directory->range_allocator().contains(range()))
m_page_directory->range_allocator().deallocate(range());
else
m_page_directory->identity_range_allocator().deallocate(range());
}
m_page_directory = nullptr;
}
void Region::set_page_directory(PageDirectory& page_directory)
{
VERIFY(!m_page_directory || m_page_directory == &page_directory);
VERIFY(s_mm_lock.own_lock());
m_page_directory = page_directory;
}
bool Region::map(PageDirectory& page_directory, ShouldFlushTLB should_flush_tlb)
{
ScopedSpinLock lock(s_mm_lock);
ScopedSpinLock page_lock(page_directory.get_lock());
// FIXME: Find a better place for this sanity check(?)
if (is_user() && !is_shared()) {
VERIFY(!vmobject().is_shared_inode());
}
set_page_directory(page_directory);
size_t page_index = 0;
while (page_index < page_count()) {
if (!map_individual_page_impl(page_index))
break;
++page_index;
}
if (page_index > 0) {
if (should_flush_tlb == ShouldFlushTLB::Yes)
MM.flush_tlb(m_page_directory, vaddr(), page_index);
return page_index == page_count();
}
return false;
}
void Region::remap()
{
VERIFY(m_page_directory);
map(*m_page_directory);
}
PageFaultResponse Region::handle_fault(PageFault const& fault)
{
auto page_index_in_region = page_index_from_address(fault.vaddr());
if (fault.type() == PageFault::Type::PageNotPresent) {
if (fault.is_read() && !is_readable()) {
dbgln("NP(non-readable) fault in Region({})[{}]", this, page_index_in_region);
return PageFaultResponse::ShouldCrash;
}
if (fault.is_write() && !is_writable()) {
dbgln("NP(non-writable) write fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
return PageFaultResponse::ShouldCrash;
}
if (vmobject().is_inode()) {
dbgln_if(PAGE_FAULT_DEBUG, "NP(inode) fault in Region({})[{}]", this, page_index_in_region);
return handle_inode_fault(page_index_in_region);
}
auto& page_slot = physical_page_slot(page_index_in_region);
if (page_slot->is_lazy_committed_page()) {
auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region);
VERIFY(m_vmobject->is_anonymous());
page_slot = static_cast<AnonymousVMObject&>(*m_vmobject).allocate_committed_page({});
remap_vmobject_page(page_index_in_vmobject);
return PageFaultResponse::Continue;
}
dbgln("BUG! Unexpected NP fault at {}", fault.vaddr());
return PageFaultResponse::ShouldCrash;
}
VERIFY(fault.type() == PageFault::Type::ProtectionViolation);
if (fault.access() == PageFault::Access::Write && is_writable() && should_cow(page_index_in_region)) {
dbgln_if(PAGE_FAULT_DEBUG, "PV(cow) fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
auto* phys_page = physical_page(page_index_in_region);
if (phys_page->is_shared_zero_page() || phys_page->is_lazy_committed_page()) {
dbgln_if(PAGE_FAULT_DEBUG, "NP(zero) fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
return handle_zero_fault(page_index_in_region);
}
return handle_cow_fault(page_index_in_region);
}
dbgln("PV(error) fault in Region({})[{}] at {}", this, page_index_in_region, fault.vaddr());
return PageFaultResponse::ShouldCrash;
}
PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region)
{
VERIFY_INTERRUPTS_DISABLED();
VERIFY(vmobject().is_anonymous());
auto& page_slot = physical_page_slot(page_index_in_region);
auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region);
ScopedSpinLock locker(vmobject().m_lock);
if (!page_slot.is_null() && !page_slot->is_shared_zero_page() && !page_slot->is_lazy_committed_page()) {
dbgln_if(PAGE_FAULT_DEBUG, "MM: zero_page() but page already present. Fine with me!");
if (!remap_vmobject_page(page_index_in_vmobject))
return PageFaultResponse::OutOfMemory;
return PageFaultResponse::Continue;
}
auto current_thread = Thread::current();
if (current_thread != nullptr)
current_thread->did_zero_fault();
if (page_slot->is_lazy_committed_page()) {
VERIFY(m_vmobject->is_anonymous());
page_slot = static_cast<AnonymousVMObject&>(*m_vmobject).allocate_committed_page({});
dbgln_if(PAGE_FAULT_DEBUG, " >> ALLOCATED COMMITTED {}", page_slot->paddr());
} else {
page_slot = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::Yes);
if (page_slot.is_null()) {
dmesgln("MM: handle_zero_fault was unable to allocate a physical page");
return PageFaultResponse::OutOfMemory;
}
dbgln_if(PAGE_FAULT_DEBUG, " >> ALLOCATED {}", page_slot->paddr());
}
if (!remap_vmobject_page(page_index_in_vmobject)) {
dmesgln("MM: handle_zero_fault was unable to allocate a page table to map {}", page_slot);
return PageFaultResponse::OutOfMemory;
}
return PageFaultResponse::Continue;
}
PageFaultResponse Region::handle_cow_fault(size_t page_index_in_region)
{
VERIFY_INTERRUPTS_DISABLED();
auto current_thread = Thread::current();
if (current_thread)
current_thread->did_cow_fault();
if (!vmobject().is_anonymous())
return PageFaultResponse::ShouldCrash;
auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region);
auto response = reinterpret_cast<AnonymousVMObject&>(vmobject()).handle_cow_fault(page_index_in_vmobject, vaddr().offset(page_index_in_region * PAGE_SIZE));
if (!remap_vmobject_page(page_index_in_vmobject))
return PageFaultResponse::OutOfMemory;
return response;
}
PageFaultResponse Region::handle_inode_fault(size_t page_index_in_region)
{
VERIFY_INTERRUPTS_DISABLED();
VERIFY(vmobject().is_inode());
VERIFY(!s_mm_lock.own_lock());
VERIFY(!g_scheduler_lock.own_lock());
auto& inode_vmobject = static_cast<InodeVMObject&>(vmobject());
auto page_index_in_vmobject = translate_to_vmobject_page(page_index_in_region);
auto& vmobject_physical_page_entry = inode_vmobject.physical_pages()[page_index_in_vmobject];
VERIFY(vmobject_physical_page_entry.is_null());
dbgln_if(PAGE_FAULT_DEBUG, "Inode fault in {} page index: {}", name(), page_index_in_region);
auto current_thread = Thread::current();
if (current_thread)
current_thread->did_inode_fault();
u8 page_buffer[PAGE_SIZE];
auto& inode = inode_vmobject.inode();
auto buffer = UserOrKernelBuffer::for_kernel_buffer(page_buffer);
auto result = inode.read_bytes(page_index_in_vmobject * PAGE_SIZE, PAGE_SIZE, buffer, nullptr);
if (result.is_error()) {
dmesgln("handle_inode_fault: Error ({}) while reading from inode", result.error());
return PageFaultResponse::ShouldCrash;
}
auto nread = result.value();
if (nread < PAGE_SIZE) {
// If we read less than a page, zero out the rest to avoid leaking uninitialized data.
memset(page_buffer + nread, 0, PAGE_SIZE - nread);
}
ScopedSpinLock locker(inode_vmobject.m_lock);
if (!vmobject_physical_page_entry.is_null()) {
// Someone else faulted in this page while we were reading from the inode.
// No harm done (other than some duplicate work), remap the page here and return.
dbgln_if(PAGE_FAULT_DEBUG, "handle_inode_fault: Page faulted in by someone else, remapping.");
if (!remap_vmobject_page(page_index_in_vmobject))
return PageFaultResponse::OutOfMemory;
return PageFaultResponse::Continue;
}
vmobject_physical_page_entry = MM.allocate_user_physical_page(MemoryManager::ShouldZeroFill::No);
if (vmobject_physical_page_entry.is_null()) {
dmesgln("MM: handle_inode_fault was unable to allocate a physical page");
return PageFaultResponse::OutOfMemory;
}
u8* dest_ptr = MM.quickmap_page(*vmobject_physical_page_entry);
memcpy(dest_ptr, page_buffer, PAGE_SIZE);
MM.unquickmap_page();
remap_vmobject_page(page_index_in_vmobject);
return PageFaultResponse::Continue;
}
}

247
Kernel/Memory/Region.h Normal file
View file

@ -0,0 +1,247 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/EnumBits.h>
#include <AK/IntrusiveList.h>
#include <AK/Weakable.h>
#include <Kernel/Arch/x86/PageFault.h>
#include <Kernel/Forward.h>
#include <Kernel/Heap/SlabAllocator.h>
#include <Kernel/KString.h>
#include <Kernel/Memory/PageFaultResponse.h>
#include <Kernel/Memory/RangeAllocator.h>
#include <Kernel/Sections.h>
#include <Kernel/UnixTypes.h>
namespace Kernel {
enum class ShouldFlushTLB {
No,
Yes,
};
class Region final
: public Weakable<Region> {
friend class MemoryManager;
MAKE_SLAB_ALLOCATED(Region)
public:
enum Access : u8 {
None = 0,
Read = 1,
Write = 2,
Execute = 4,
HasBeenReadable = 16,
HasBeenWritable = 32,
HasBeenExecutable = 64,
};
enum class Cacheable {
No = 0,
Yes,
};
static OwnPtr<Region> try_create_user_accessible(Range const&, NonnullRefPtr<VMObject>, size_t offset_in_vmobject, OwnPtr<KString> name, Region::Access access, Cacheable, bool shared);
static OwnPtr<Region> try_create_kernel_only(Range const&, NonnullRefPtr<VMObject>, size_t offset_in_vmobject, OwnPtr<KString> name, Region::Access access, Cacheable = Cacheable::Yes);
~Region();
Range const& range() const { return m_range; }
VirtualAddress vaddr() const { return m_range.base(); }
size_t size() const { return m_range.size(); }
bool is_readable() const { return m_access & Access::Read; }
bool is_writable() const { return m_access & Access::Write; }
bool is_executable() const { return m_access & Access::Execute; }
bool has_been_readable() const { return m_access & Access::HasBeenReadable; }
bool has_been_writable() const { return m_access & Access::HasBeenWritable; }
bool has_been_executable() const { return m_access & Access::HasBeenExecutable; }
bool is_cacheable() const { return m_cacheable; }
StringView name() const { return m_name ? m_name->view() : StringView {}; }
OwnPtr<KString> take_name() { return move(m_name); }
Region::Access access() const { return static_cast<Region::Access>(m_access); }
void set_name(OwnPtr<KString> name) { m_name = move(name); }
VMObject const& vmobject() const { return *m_vmobject; }
VMObject& vmobject() { return *m_vmobject; }
void set_vmobject(NonnullRefPtr<VMObject>&&);
bool is_shared() const { return m_shared; }
void set_shared(bool shared) { m_shared = shared; }
bool is_stack() const { return m_stack; }
void set_stack(bool stack) { m_stack = stack; }
bool is_mmap() const { return m_mmap; }
void set_mmap(bool mmap) { m_mmap = mmap; }
bool is_user() const { return !is_kernel(); }
bool is_kernel() const { return vaddr().get() < 0x00800000 || vaddr().get() >= kernel_mapping_base; }
PageFaultResponse handle_fault(PageFault const&);
OwnPtr<Region> clone();
bool contains(VirtualAddress vaddr) const
{
return m_range.contains(vaddr);
}
bool contains(Range const& range) const
{
return m_range.contains(range);
}
unsigned page_index_from_address(VirtualAddress vaddr) const
{
return (vaddr - m_range.base()).get() / PAGE_SIZE;
}
VirtualAddress vaddr_from_page_index(size_t page_index) const
{
return vaddr().offset(page_index * PAGE_SIZE);
}
bool translate_vmobject_page(size_t& index) const
{
auto first_index = first_page_index();
if (index < first_index) {
index = first_index;
return false;
}
index -= first_index;
auto total_page_count = this->page_count();
if (index >= total_page_count) {
index = first_index + total_page_count - 1;
return false;
}
return true;
}
ALWAYS_INLINE size_t translate_to_vmobject_page(size_t page_index) const
{
return first_page_index() + page_index;
}
size_t first_page_index() const
{
return m_offset_in_vmobject / PAGE_SIZE;
}
size_t page_count() const
{
return size() / PAGE_SIZE;
}
PhysicalPage const* physical_page(size_t index) const;
RefPtr<PhysicalPage>& physical_page_slot(size_t index);
size_t offset_in_vmobject() const
{
return m_offset_in_vmobject;
}
size_t offset_in_vmobject_from_vaddr(VirtualAddress vaddr) const
{
return m_offset_in_vmobject + vaddr.get() - this->vaddr().get();
}
size_t amount_resident() const;
size_t amount_shared() const;
size_t amount_dirty() const;
bool should_cow(size_t page_index) const;
void set_should_cow(size_t page_index, bool);
size_t cow_pages() const;
void set_readable(bool b) { set_access_bit(Access::Read, b); }
void set_writable(bool b) { set_access_bit(Access::Write, b); }
void set_executable(bool b) { set_access_bit(Access::Execute, b); }
void set_page_directory(PageDirectory&);
bool map(PageDirectory&, ShouldFlushTLB = ShouldFlushTLB::Yes);
enum class ShouldDeallocateVirtualMemoryRange {
No,
Yes,
};
void unmap(ShouldDeallocateVirtualMemoryRange = ShouldDeallocateVirtualMemoryRange::Yes);
void remap();
bool is_syscall_region() const { return m_syscall_region; }
void set_syscall_region(bool b) { m_syscall_region = b; }
private:
Region(Range const&, NonnullRefPtr<VMObject>, size_t offset_in_vmobject, OwnPtr<KString>, Region::Access access, Cacheable, bool shared);
bool remap_vmobject_page(size_t page_index, bool with_flush = true);
bool do_remap_vmobject_page(size_t page_index, bool with_flush = true);
void set_access_bit(Access access, bool b)
{
if (b)
m_access |= access | (access << 4);
else
m_access &= ~access;
}
PageFaultResponse handle_cow_fault(size_t page_index);
PageFaultResponse handle_inode_fault(size_t page_index);
PageFaultResponse handle_zero_fault(size_t page_index);
bool map_individual_page_impl(size_t page_index);
RefPtr<PageDirectory> m_page_directory;
Range m_range;
size_t m_offset_in_vmobject { 0 };
NonnullRefPtr<VMObject> m_vmobject;
OwnPtr<KString> m_name;
u8 m_access { Region::None };
bool m_shared : 1 { false };
bool m_cacheable : 1 { false };
bool m_stack : 1 { false };
bool m_mmap : 1 { false };
bool m_syscall_region : 1 { false };
IntrusiveListNode<Region> m_memory_manager_list_node;
IntrusiveListNode<Region> m_vmobject_list_node;
public:
using ListInMemoryManager = IntrusiveList<Region, RawPtr<Region>, &Region::m_memory_manager_list_node>;
using ListInVMObject = IntrusiveList<Region, RawPtr<Region>, &Region::m_vmobject_list_node>;
};
AK_ENUM_BITWISE_OPERATORS(Region::Access)
inline Region::Access prot_to_region_access_flags(int prot)
{
Region::Access access = Region::Access::None;
if (prot & PROT_READ)
access |= Region::Access::Read;
if (prot & PROT_WRITE)
access |= Region::Access::Write;
if (prot & PROT_EXEC)
access |= Region::Access::Execute;
return access;
}
inline int region_access_flags_to_prot(Region::Access access)
{
int prot = 0;
if (access & Region::Access::Read)
prot |= PROT_READ;
if (access & Region::Access::Write)
prot |= PROT_WRITE;
if (access & Region::Access::Execute)
prot |= PROT_EXEC;
return prot;
}
}

View file

@ -0,0 +1,66 @@
/*
* Copyright (c) 2021, Sahan Fernando <sahan.h.fernando@gmail.com>.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/RingBuffer.h>
#include <Kernel/UserOrKernelBuffer.h>
namespace Kernel {
RingBuffer::RingBuffer(String region_name, size_t capacity)
: m_region(MM.allocate_contiguous_kernel_region(page_round_up(capacity), move(region_name), Region::Access::Read | Region::Access::Write))
, m_capacity_in_bytes(capacity)
{
}
bool RingBuffer::copy_data_in(const UserOrKernelBuffer& buffer, size_t offset, size_t length, PhysicalAddress& start_of_copied_data, size_t& bytes_copied)
{
size_t start_of_free_area = (m_start_of_used + m_num_used_bytes) % m_capacity_in_bytes;
bytes_copied = min(m_capacity_in_bytes - m_num_used_bytes, min(m_capacity_in_bytes - start_of_free_area, length));
if (bytes_copied == 0)
return false;
if (buffer.read(m_region->vaddr().offset(start_of_free_area).as_ptr(), offset, bytes_copied)) {
m_num_used_bytes += bytes_copied;
start_of_copied_data = m_region->physical_page(start_of_free_area / PAGE_SIZE)->paddr().offset(start_of_free_area % PAGE_SIZE);
return true;
}
return false;
}
KResultOr<size_t> RingBuffer::copy_data_out(size_t size, UserOrKernelBuffer& buffer) const
{
auto start = m_start_of_used % m_capacity_in_bytes;
auto num_bytes = min(min(m_num_used_bytes, size), m_capacity_in_bytes - start);
if (!buffer.write(m_region->vaddr().offset(start).as_ptr(), num_bytes))
return EIO;
return num_bytes;
}
KResultOr<PhysicalAddress> RingBuffer::reserve_space(size_t size)
{
if (m_capacity_in_bytes < m_num_used_bytes + size)
return ENOSPC;
size_t start_of_free_area = (m_start_of_used + m_num_used_bytes) % m_capacity_in_bytes;
m_num_used_bytes += size;
PhysicalAddress start_of_reserved_space = m_region->physical_page(start_of_free_area / PAGE_SIZE)->paddr().offset(start_of_free_area % PAGE_SIZE);
return start_of_reserved_space;
}
void RingBuffer::reclaim_space(PhysicalAddress chunk_start, size_t chunk_size)
{
VERIFY(start_of_used() == chunk_start);
VERIFY(m_num_used_bytes >= chunk_size);
m_num_used_bytes -= chunk_size;
m_start_of_used += chunk_size;
}
PhysicalAddress RingBuffer::start_of_used() const
{
size_t start = m_start_of_used % m_capacity_in_bytes;
return m_region->physical_page(start / PAGE_SIZE)->paddr().offset(start % PAGE_SIZE);
}
}

View file

@ -0,0 +1,40 @@
/*
* Copyright (c) 2021, Sahan Fernando <sahan.h.fernando@gmail.com>.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/String.h>
#include <Kernel/PhysicalAddress.h>
#include <Kernel/UserOrKernelBuffer.h>
namespace Kernel {
class RingBuffer {
public:
RingBuffer(String region_name, size_t capacity);
bool has_space() const { return m_num_used_bytes < m_capacity_in_bytes; }
bool copy_data_in(const UserOrKernelBuffer& buffer, size_t offset, size_t length, PhysicalAddress& start_of_copied_data, size_t& bytes_copied);
KResultOr<size_t> copy_data_out(size_t size, UserOrKernelBuffer& buffer) const;
KResultOr<PhysicalAddress> reserve_space(size_t size);
void reclaim_space(PhysicalAddress chunk_start, size_t chunk_size);
PhysicalAddress start_of_used() const;
SpinLock<u8>& lock() { return m_lock; }
size_t used_bytes() const { return m_num_used_bytes; }
PhysicalAddress start_of_region() const { return m_region->physical_page(0)->paddr(); }
VirtualAddress vaddr() const { return m_region->vaddr(); }
size_t bytes_till_end() const { return (m_capacity_in_bytes - ((m_start_of_used + m_num_used_bytes) % m_capacity_in_bytes)) % m_capacity_in_bytes; };
private:
OwnPtr<Region> m_region;
SpinLock<u8> m_lock;
size_t m_start_of_used {};
size_t m_num_used_bytes {};
size_t m_capacity_in_bytes {};
};
}

View file

@ -0,0 +1,25 @@
/*
* Copyright (c) 2021, the SerenityOS developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Memory/ScatterGatherList.h>
namespace Kernel {
RefPtr<ScatterGatherList> ScatterGatherList::try_create(AsyncBlockDeviceRequest& request, Span<NonnullRefPtr<PhysicalPage>> allocated_pages, size_t device_block_size)
{
auto vm_object = AnonymousVMObject::try_create_with_physical_pages(allocated_pages);
if (!vm_object)
return {};
return adopt_ref_if_nonnull(new (nothrow) ScatterGatherList(vm_object.release_nonnull(), request, device_block_size));
}
ScatterGatherList::ScatterGatherList(NonnullRefPtr<AnonymousVMObject> vm_object, AsyncBlockDeviceRequest& request, size_t device_block_size)
: m_vm_object(move(vm_object))
{
m_dma_region = MM.allocate_kernel_region_with_vmobject(m_vm_object, page_round_up((request.block_count() * device_block_size)), "AHCI Scattered DMA", Region::Access::Read | Region::Access::Write, Region::Cacheable::Yes);
}
}

View file

@ -0,0 +1,32 @@
/*
* Copyright (c) 2021, the SerenityOS developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Vector.h>
#include <Kernel/Devices/BlockDevice.h>
#include <Kernel/Memory/AnonymousVMObject.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/PhysicalAddress.h>
namespace Kernel {
// A Scatter-Gather List type that owns its buffers
class ScatterGatherList : public RefCounted<ScatterGatherList> {
public:
static RefPtr<ScatterGatherList> try_create(AsyncBlockDeviceRequest&, Span<NonnullRefPtr<PhysicalPage>> allocated_pages, size_t device_block_size);
const VMObject& vmobject() const { return m_vm_object; }
VirtualAddress dma_region() const { return m_dma_region->vaddr(); }
size_t scatters_count() const { return m_vm_object->physical_pages().size(); }
private:
ScatterGatherList(NonnullRefPtr<AnonymousVMObject>, AsyncBlockDeviceRequest&, size_t device_block_size);
NonnullRefPtr<AnonymousVMObject> m_vm_object;
OwnPtr<Region> m_dma_region;
};
}

View file

@ -0,0 +1,39 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/FileSystem/Inode.h>
#include <Kernel/Memory/SharedInodeVMObject.h>
namespace Kernel {
RefPtr<SharedInodeVMObject> SharedInodeVMObject::try_create_with_inode(Inode& inode)
{
size_t size = inode.size();
if (auto shared_vmobject = inode.shared_vmobject())
return shared_vmobject.release_nonnull();
auto vmobject = adopt_ref_if_nonnull(new (nothrow) SharedInodeVMObject(inode, size));
if (!vmobject)
return nullptr;
vmobject->inode().set_shared_vmobject(*vmobject);
return vmobject;
}
RefPtr<VMObject> SharedInodeVMObject::try_clone()
{
return adopt_ref_if_nonnull(new (nothrow) SharedInodeVMObject(*this));
}
SharedInodeVMObject::SharedInodeVMObject(Inode& inode, size_t size)
: InodeVMObject(inode, size)
{
}
SharedInodeVMObject::SharedInodeVMObject(SharedInodeVMObject const& other)
: InodeVMObject(other)
{
}
}

View file

@ -0,0 +1,33 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Bitmap.h>
#include <Kernel/Memory/InodeVMObject.h>
#include <Kernel/UnixTypes.h>
namespace Kernel {
class SharedInodeVMObject final : public InodeVMObject {
AK_MAKE_NONMOVABLE(SharedInodeVMObject);
public:
static RefPtr<SharedInodeVMObject> try_create_with_inode(Inode&);
virtual RefPtr<VMObject> try_clone() override;
private:
virtual bool is_shared_inode() const override { return true; }
explicit SharedInodeVMObject(Inode&, size_t);
explicit SharedInodeVMObject(SharedInodeVMObject const&);
virtual StringView class_name() const override { return "SharedInodeVMObject"sv; }
SharedInodeVMObject& operator=(SharedInodeVMObject const&) = delete;
};
}

439
Kernel/Memory/Space.cpp Normal file
View file

@ -0,0 +1,439 @@
/*
* Copyright (c) 2021, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Memory/AnonymousVMObject.h>
#include <Kernel/Memory/InodeVMObject.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/Space.h>
#include <Kernel/PerformanceManager.h>
#include <Kernel/Process.h>
#include <Kernel/SpinLock.h>
namespace Kernel {
OwnPtr<Space> Space::try_create(Process& process, Space const* parent)
{
auto page_directory = PageDirectory::try_create_for_userspace(parent ? &parent->page_directory().range_allocator() : nullptr);
if (!page_directory)
return {};
auto space = adopt_own_if_nonnull(new (nothrow) Space(process, page_directory.release_nonnull()));
if (!space)
return {};
space->page_directory().set_space({}, *space);
return space;
}
Space::Space(Process& process, NonnullRefPtr<PageDirectory> page_directory)
: m_process(&process)
, m_page_directory(move(page_directory))
{
}
Space::~Space()
{
}
KResult Space::unmap_mmap_range(VirtualAddress addr, size_t size)
{
if (!size)
return EINVAL;
auto range_or_error = Range::expand_to_page_boundaries(addr.get(), size);
if (range_or_error.is_error())
return range_or_error.error();
auto range_to_unmap = range_or_error.value();
if (!is_user_range(range_to_unmap))
return EFAULT;
if (auto* whole_region = find_region_from_range(range_to_unmap)) {
if (!whole_region->is_mmap())
return EPERM;
PerformanceManager::add_unmap_perf_event(*Process::current(), whole_region->range());
deallocate_region(*whole_region);
return KSuccess;
}
if (auto* old_region = find_region_containing(range_to_unmap)) {
if (!old_region->is_mmap())
return EPERM;
// Remove the old region from our regions tree, since were going to add another region
// with the exact same start address, but don't deallocate it yet.
auto region = take_region(*old_region);
// We manually unmap the old region here, specifying that we *don't* want the VM deallocated.
region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No);
auto new_regions_or_error = try_split_region_around_range(*region, range_to_unmap);
if (new_regions_or_error.is_error())
return new_regions_or_error.error();
auto& new_regions = new_regions_or_error.value();
// Instead we give back the unwanted VM manually.
page_directory().range_allocator().deallocate(range_to_unmap);
// And finally we map the new region(s) using our page directory (they were just allocated and don't have one).
for (auto* new_region : new_regions) {
new_region->map(page_directory());
}
PerformanceManager::add_unmap_perf_event(*Process::current(), range_to_unmap);
return KSuccess;
}
// Try again while checking multiple regions at a time.
auto const& regions = find_regions_intersecting(range_to_unmap);
if (regions.is_empty())
return KSuccess;
// Check if any of the regions is not mmap'ed, to not accidentally
// error out with just half a region map left.
for (auto* region : regions) {
if (!region->is_mmap())
return EPERM;
}
Vector<Region*, 2> new_regions;
for (auto* old_region : regions) {
// If it's a full match we can remove the entire old region.
if (old_region->range().intersect(range_to_unmap).size() == old_region->size()) {
deallocate_region(*old_region);
continue;
}
// Remove the old region from our regions tree, since were going to add another region
// with the exact same start address, but don't deallocate it yet.
auto region = take_region(*old_region);
// We manually unmap the old region here, specifying that we *don't* want the VM deallocated.
region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No);
// Otherwise, split the regions and collect them for future mapping.
auto split_regions_or_error = try_split_region_around_range(*region, range_to_unmap);
if (split_regions_or_error.is_error())
return split_regions_or_error.error();
if (new_regions.try_extend(split_regions_or_error.value()))
return ENOMEM;
}
// Give back any unwanted VM to the range allocator.
page_directory().range_allocator().deallocate(range_to_unmap);
// And finally map the new region(s) into our page directory.
for (auto* new_region : new_regions) {
new_region->map(page_directory());
}
PerformanceManager::add_unmap_perf_event(*Process::current(), range_to_unmap);
return KSuccess;
}
Optional<Range> Space::allocate_range(VirtualAddress vaddr, size_t size, size_t alignment)
{
vaddr.mask(PAGE_MASK);
size = page_round_up(size);
if (vaddr.is_null())
return page_directory().range_allocator().allocate_anywhere(size, alignment);
return page_directory().range_allocator().allocate_specific(vaddr, size);
}
KResultOr<Region*> Space::try_allocate_split_region(Region const& source_region, Range const& range, size_t offset_in_vmobject)
{
auto new_region = Region::try_create_user_accessible(
range, source_region.vmobject(), offset_in_vmobject, KString::try_create(source_region.name()), source_region.access(), source_region.is_cacheable() ? Region::Cacheable::Yes : Region::Cacheable::No, source_region.is_shared());
if (!new_region)
return ENOMEM;
auto* region = add_region(new_region.release_nonnull());
if (!region)
return ENOMEM;
region->set_syscall_region(source_region.is_syscall_region());
region->set_mmap(source_region.is_mmap());
region->set_stack(source_region.is_stack());
size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE;
for (size_t i = 0; i < region->page_count(); ++i) {
if (source_region.should_cow(page_offset_in_source_region + i))
region->set_should_cow(i, true);
}
return region;
}
KResultOr<Region*> Space::allocate_region(Range const& range, StringView name, int prot, AllocationStrategy strategy)
{
VERIFY(range.is_valid());
auto vmobject = AnonymousVMObject::try_create_with_size(range.size(), strategy);
if (!vmobject)
return ENOMEM;
auto region = Region::try_create_user_accessible(range, vmobject.release_nonnull(), 0, KString::try_create(name), prot_to_region_access_flags(prot), Region::Cacheable::Yes, false);
if (!region)
return ENOMEM;
if (!region->map(page_directory()))
return ENOMEM;
auto* added_region = add_region(region.release_nonnull());
if (!added_region)
return ENOMEM;
return added_region;
}
KResultOr<Region*> Space::allocate_region_with_vmobject(Range const& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, StringView name, int prot, bool shared)
{
VERIFY(range.is_valid());
size_t end_in_vmobject = offset_in_vmobject + range.size();
if (end_in_vmobject <= offset_in_vmobject) {
dbgln("allocate_region_with_vmobject: Overflow (offset + size)");
return EINVAL;
}
if (offset_in_vmobject >= vmobject->size()) {
dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject.");
return EINVAL;
}
if (end_in_vmobject > vmobject->size()) {
dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject.");
return EINVAL;
}
offset_in_vmobject &= PAGE_MASK;
auto region = Region::try_create_user_accessible(range, move(vmobject), offset_in_vmobject, KString::try_create(name), prot_to_region_access_flags(prot), Region::Cacheable::Yes, shared);
if (!region) {
dbgln("allocate_region_with_vmobject: Unable to allocate Region");
return ENOMEM;
}
auto* added_region = add_region(region.release_nonnull());
if (!added_region)
return ENOMEM;
if (!added_region->map(page_directory()))
return ENOMEM;
return added_region;
}
void Space::deallocate_region(Region& region)
{
take_region(region);
}
NonnullOwnPtr<Region> Space::take_region(Region& region)
{
ScopedSpinLock lock(m_lock);
if (m_region_lookup_cache.region.unsafe_ptr() == &region)
m_region_lookup_cache.region = nullptr;
auto found_region = m_regions.unsafe_remove(region.vaddr().get());
VERIFY(found_region.ptr() == &region);
return found_region;
}
Region* Space::find_region_from_range(const Range& range)
{
ScopedSpinLock lock(m_lock);
if (m_region_lookup_cache.range.has_value() && m_region_lookup_cache.range.value() == range && m_region_lookup_cache.region)
return m_region_lookup_cache.region.unsafe_ptr();
auto found_region = m_regions.find(range.base().get());
if (!found_region)
return nullptr;
auto& region = *found_region;
size_t size = page_round_up(range.size());
if (region->size() != size)
return nullptr;
m_region_lookup_cache.range = range;
m_region_lookup_cache.region = *region;
return region;
}
Region* Space::find_region_containing(const Range& range)
{
ScopedSpinLock lock(m_lock);
auto candidate = m_regions.find_largest_not_above(range.base().get());
if (!candidate)
return nullptr;
return (*candidate)->range().contains(range) ? candidate->ptr() : nullptr;
}
Vector<Region*> Space::find_regions_intersecting(const Range& range)
{
Vector<Region*> regions = {};
size_t total_size_collected = 0;
ScopedSpinLock lock(m_lock);
auto found_region = m_regions.find_largest_not_above(range.base().get());
if (!found_region)
return regions;
for (auto iter = m_regions.begin_from((*found_region)->vaddr().get()); !iter.is_end(); ++iter) {
if ((*iter)->range().base() < range.end() && (*iter)->range().end() > range.base()) {
regions.append(*iter);
total_size_collected += (*iter)->size() - (*iter)->range().intersect(range).size();
if (total_size_collected == range.size())
break;
}
}
return regions;
}
Region* Space::add_region(NonnullOwnPtr<Region> region)
{
auto* ptr = region.ptr();
ScopedSpinLock lock(m_lock);
auto success = m_regions.try_insert(region->vaddr().get(), move(region));
return success ? ptr : nullptr;
}
// Carve out a virtual address range from a region and return the two regions on either side
KResultOr<Vector<Region*, 2>> Space::try_split_region_around_range(const Region& source_region, const Range& desired_range)
{
Range old_region_range = source_region.range();
auto remaining_ranges_after_unmap = old_region_range.carve(desired_range);
VERIFY(!remaining_ranges_after_unmap.is_empty());
auto try_make_replacement_region = [&](const Range& new_range) -> KResultOr<Region*> {
VERIFY(old_region_range.contains(new_range));
size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get());
return try_allocate_split_region(source_region, new_range, new_range_offset_in_vmobject);
};
Vector<Region*, 2> new_regions;
for (auto& new_range : remaining_ranges_after_unmap) {
auto new_region_or_error = try_make_replacement_region(new_range);
if (new_region_or_error.is_error())
return new_region_or_error.error();
new_regions.unchecked_append(new_region_or_error.value());
}
return new_regions;
}
void Space::dump_regions()
{
dbgln("Process regions:");
#if ARCH(I386)
auto addr_padding = "";
#else
auto addr_padding = " ";
#endif
dbgln("BEGIN{} END{} SIZE{} ACCESS NAME",
addr_padding, addr_padding, addr_padding);
ScopedSpinLock lock(m_lock);
for (auto& sorted_region : m_regions) {
auto& region = *sorted_region;
dbgln("{:p} -- {:p} {:p} {:c}{:c}{:c}{:c}{:c}{:c} {}", region.vaddr().get(), region.vaddr().offset(region.size() - 1).get(), region.size(),
region.is_readable() ? 'R' : ' ',
region.is_writable() ? 'W' : ' ',
region.is_executable() ? 'X' : ' ',
region.is_shared() ? 'S' : ' ',
region.is_stack() ? 'T' : ' ',
region.is_syscall_region() ? 'C' : ' ',
region.name());
}
MM.dump_kernel_regions();
}
void Space::remove_all_regions(Badge<Process>)
{
ScopedSpinLock lock(m_lock);
m_regions.clear();
}
size_t Space::amount_dirty_private() const
{
ScopedSpinLock lock(m_lock);
// FIXME: This gets a bit more complicated for Regions sharing the same underlying VMObject.
// The main issue I'm thinking of is when the VMObject has physical pages that none of the Regions are mapping.
// That's probably a situation that needs to be looked at in general.
size_t amount = 0;
for (auto& region : m_regions) {
if (!region->is_shared())
amount += region->amount_dirty();
}
return amount;
}
size_t Space::amount_clean_inode() const
{
ScopedSpinLock lock(m_lock);
HashTable<const InodeVMObject*> vmobjects;
for (auto& region : m_regions) {
if (region->vmobject().is_inode())
vmobjects.set(&static_cast<const InodeVMObject&>(region->vmobject()));
}
size_t amount = 0;
for (auto& vmobject : vmobjects)
amount += vmobject->amount_clean();
return amount;
}
size_t Space::amount_virtual() const
{
ScopedSpinLock lock(m_lock);
size_t amount = 0;
for (auto& region : m_regions) {
amount += region->size();
}
return amount;
}
size_t Space::amount_resident() const
{
ScopedSpinLock lock(m_lock);
// FIXME: This will double count if multiple regions use the same physical page.
size_t amount = 0;
for (auto& region : m_regions) {
amount += region->amount_resident();
}
return amount;
}
size_t Space::amount_shared() const
{
ScopedSpinLock lock(m_lock);
// FIXME: This will double count if multiple regions use the same physical page.
// FIXME: It doesn't work at the moment, since it relies on PhysicalPage ref counts,
// and each PhysicalPage is only reffed by its VMObject. This needs to be refactored
// so that every Region contributes +1 ref to each of its PhysicalPages.
size_t amount = 0;
for (auto& region : m_regions) {
amount += region->amount_shared();
}
return amount;
}
size_t Space::amount_purgeable_volatile() const
{
ScopedSpinLock lock(m_lock);
size_t amount = 0;
for (auto& region : m_regions) {
if (!region->vmobject().is_anonymous())
continue;
auto const& vmobject = static_cast<AnonymousVMObject const&>(region->vmobject());
if (vmobject.is_purgeable() && vmobject.is_volatile())
amount += region->amount_resident();
}
return amount;
}
size_t Space::amount_purgeable_nonvolatile() const
{
ScopedSpinLock lock(m_lock);
size_t amount = 0;
for (auto& region : m_regions) {
if (!region->vmobject().is_anonymous())
continue;
auto const& vmobject = static_cast<AnonymousVMObject const&>(region->vmobject());
if (vmobject.is_purgeable() && !vmobject.is_volatile())
amount += region->amount_resident();
}
return amount;
}
}

87
Kernel/Memory/Space.h Normal file
View file

@ -0,0 +1,87 @@
/*
* Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/RedBlackTree.h>
#include <AK/Vector.h>
#include <AK/WeakPtr.h>
#include <Kernel/Memory/AllocationStrategy.h>
#include <Kernel/Memory/PageDirectory.h>
#include <Kernel/UnixTypes.h>
namespace Kernel {
class Space {
public:
static OwnPtr<Space> try_create(Process&, Space const* parent);
~Space();
PageDirectory& page_directory() { return *m_page_directory; }
const PageDirectory& page_directory() const { return *m_page_directory; }
Region* add_region(NonnullOwnPtr<Region>);
size_t region_count() const { return m_regions.size(); }
RedBlackTree<FlatPtr, NonnullOwnPtr<Region>>& regions() { return m_regions; }
const RedBlackTree<FlatPtr, NonnullOwnPtr<Region>>& regions() const { return m_regions; }
void dump_regions();
KResult unmap_mmap_range(VirtualAddress, size_t);
Optional<Range> allocate_range(VirtualAddress, size_t, size_t alignment = PAGE_SIZE);
KResultOr<Region*> allocate_region_with_vmobject(const Range&, NonnullRefPtr<VMObject>, size_t offset_in_vmobject, StringView name, int prot, bool shared);
KResultOr<Region*> allocate_region(const Range&, StringView name, int prot = PROT_READ | PROT_WRITE, AllocationStrategy strategy = AllocationStrategy::Reserve);
void deallocate_region(Region& region);
NonnullOwnPtr<Region> take_region(Region& region);
KResultOr<Region*> try_allocate_split_region(Region const& source_region, Range const&, size_t offset_in_vmobject);
KResultOr<Vector<Region*, 2>> try_split_region_around_range(Region const& source_region, Range const&);
Region* find_region_from_range(const Range&);
Region* find_region_containing(const Range&);
Vector<Region*> find_regions_intersecting(const Range&);
bool enforces_syscall_regions() const { return m_enforces_syscall_regions; }
void set_enforces_syscall_regions(bool b) { m_enforces_syscall_regions = b; }
void remove_all_regions(Badge<Process>);
RecursiveSpinLock& get_lock() const { return m_lock; }
size_t amount_clean_inode() const;
size_t amount_dirty_private() const;
size_t amount_virtual() const;
size_t amount_resident() const;
size_t amount_shared() const;
size_t amount_purgeable_volatile() const;
size_t amount_purgeable_nonvolatile() const;
private:
Space(Process&, NonnullRefPtr<PageDirectory>);
Process* m_process { nullptr };
mutable RecursiveSpinLock m_lock;
RefPtr<PageDirectory> m_page_directory;
RedBlackTree<FlatPtr, NonnullOwnPtr<Region>> m_regions;
struct RegionLookupCache {
Optional<Range> range;
WeakPtr<Region> region;
};
RegionLookupCache m_region_lookup_cache;
bool m_enforces_syscall_regions { false };
};
}

View file

@ -0,0 +1,48 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/StringView.h>
#include <Kernel/Memory/MemoryManager.h>
namespace Kernel {
template<typename T>
struct TypedMapping {
const T* ptr() const { return reinterpret_cast<const T*>(region->vaddr().offset(offset).as_ptr()); }
T* ptr() { return reinterpret_cast<T*>(region->vaddr().offset(offset).as_ptr()); }
const T* operator->() const { return ptr(); }
T* operator->() { return ptr(); }
const T& operator*() const { return *ptr(); }
T& operator*() { return *ptr(); }
OwnPtr<Region> region;
size_t offset { 0 };
};
template<typename T>
static TypedMapping<T> map_typed(PhysicalAddress paddr, size_t length, Region::Access access = Region::Access::Read)
{
TypedMapping<T> table;
size_t mapping_length = page_round_up(paddr.offset_in_page() + length);
table.region = MM.allocate_kernel_region(paddr.page_base(), mapping_length, {}, access);
table.offset = paddr.offset_in_page();
return table;
}
template<typename T>
static TypedMapping<T> map_typed(PhysicalAddress paddr)
{
return map_typed<T>(paddr, sizeof(T));
}
template<typename T>
static TypedMapping<T> map_typed_writable(PhysicalAddress paddr)
{
return map_typed<T>(paddr, sizeof(T), Region::Access::Read | Region::Access::Write);
}
}

View file

@ -0,0 +1,37 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Memory/VMObject.h>
namespace Kernel {
VMObject::VMObject(VMObject const& other)
: m_physical_pages(other.m_physical_pages)
{
MM.register_vmobject(*this);
}
VMObject::VMObject(size_t size)
: m_physical_pages(ceil_div(size, static_cast<size_t>(PAGE_SIZE)))
{
MM.register_vmobject(*this);
}
VMObject::~VMObject()
{
{
ScopedSpinLock lock(m_on_deleted_lock);
for (auto& it : m_on_deleted)
it->vmobject_deleted(*this);
m_on_deleted.clear();
}
MM.unregister_vmobject(*this);
VERIFY(m_regions.is_empty());
}
}

122
Kernel/Memory/VMObject.h Normal file
View file

@ -0,0 +1,122 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/FixedArray.h>
#include <AK/HashTable.h>
#include <AK/IntrusiveList.h>
#include <AK/RefCounted.h>
#include <AK/RefPtr.h>
#include <AK/Vector.h>
#include <AK/Weakable.h>
#include <Kernel/Forward.h>
#include <Kernel/Memory/Region.h>
#include <Kernel/Mutex.h>
namespace Kernel {
class VMObjectDeletedHandler {
public:
virtual ~VMObjectDeletedHandler() = default;
virtual void vmobject_deleted(VMObject&) = 0;
};
class VMObject : public RefCounted<VMObject>
, public Weakable<VMObject> {
friend class MemoryManager;
friend class Region;
public:
virtual ~VMObject();
virtual RefPtr<VMObject> try_clone() = 0;
virtual bool is_anonymous() const { return false; }
virtual bool is_inode() const { return false; }
virtual bool is_shared_inode() const { return false; }
virtual bool is_private_inode() const { return false; }
virtual bool is_contiguous() const { return false; }
size_t page_count() const { return m_physical_pages.size(); }
Span<RefPtr<PhysicalPage> const> physical_pages() const { return m_physical_pages.span(); }
Span<RefPtr<PhysicalPage>> physical_pages() { return m_physical_pages.span(); }
size_t size() const { return m_physical_pages.size() * PAGE_SIZE; }
virtual StringView class_name() const = 0;
ALWAYS_INLINE void add_region(Region& region)
{
ScopedSpinLock locker(m_lock);
m_regions.append(region);
}
ALWAYS_INLINE void remove_region(Region& region)
{
ScopedSpinLock locker(m_lock);
m_regions.remove(region);
}
void register_on_deleted_handler(VMObjectDeletedHandler& handler)
{
ScopedSpinLock locker(m_on_deleted_lock);
m_on_deleted.set(&handler);
}
void unregister_on_deleted_handler(VMObjectDeletedHandler& handler)
{
ScopedSpinLock locker(m_on_deleted_lock);
m_on_deleted.remove(&handler);
}
protected:
explicit VMObject(size_t);
explicit VMObject(VMObject const&);
template<typename Callback>
void for_each_region(Callback);
IntrusiveListNode<VMObject> m_list_node;
FixedArray<RefPtr<PhysicalPage>> m_physical_pages;
mutable RecursiveSpinLock m_lock;
private:
VMObject& operator=(VMObject const&) = delete;
VMObject& operator=(VMObject&&) = delete;
VMObject(VMObject&&) = delete;
HashTable<VMObjectDeletedHandler*> m_on_deleted;
SpinLock<u8> m_on_deleted_lock;
Region::ListInVMObject m_regions;
public:
using List = IntrusiveList<VMObject, RawPtr<VMObject>, &VMObject::m_list_node>;
};
template<typename Callback>
inline void VMObject::for_each_region(Callback callback)
{
ScopedSpinLock lock(m_lock);
for (auto& region : m_regions) {
callback(region);
}
}
inline PhysicalPage const* Region::physical_page(size_t index) const
{
VERIFY(index < page_count());
return vmobject().physical_pages()[first_page_index() + index];
}
inline RefPtr<PhysicalPage>& Region::physical_page_slot(size_t index)
{
VERIFY(index < page_count());
return vmobject().physical_pages()[first_page_index() + index];
}
}