1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 15:48:12 +00:00

Kernel: Factor address space management out of the Process class

This patch adds Space, a class representing a process's address space.

- Each Process has a Space.
- The Space owns the PageDirectory and all Regions in the Process.

This allows us to reorganize sys$execve() so that it constructs and
populates a new Space fully before committing to it.

Previously, we would construct the new address space while still
running in the old one, and encountering an error meant we had to do
tedious and error-prone rollback.

Those problems are now gone, replaced by what's hopefully a set of much
smaller problems and missing cleanups. :^)
This commit is contained in:
Andreas Kling 2021-02-08 15:45:40 +01:00
parent b2cba3036e
commit f1b5def8fd
27 changed files with 494 additions and 404 deletions

View file

@ -212,6 +212,7 @@ set(KERNEL_SOURCES
VM/RangeAllocator.cpp VM/RangeAllocator.cpp
VM/Region.cpp VM/Region.cpp
VM/SharedInodeVMObject.cpp VM/SharedInodeVMObject.cpp
VM/Space.cpp
VM/VMObject.cpp VM/VMObject.cpp
WaitQueue.cpp WaitQueue.cpp
init.cpp init.cpp

View file

@ -59,7 +59,7 @@ OwnPtr<CoreDump> CoreDump::create(NonnullRefPtr<Process> process, const String&
CoreDump::CoreDump(NonnullRefPtr<Process> process, NonnullRefPtr<FileDescription>&& fd) CoreDump::CoreDump(NonnullRefPtr<Process> process, NonnullRefPtr<FileDescription>&& fd)
: m_process(move(process)) : m_process(move(process))
, m_fd(move(fd)) , m_fd(move(fd))
, m_num_program_headers(m_process->m_regions.size() + 1) // +1 for NOTE segment , m_num_program_headers(m_process->space().region_count() + 1) // +1 for NOTE segment
{ {
} }
@ -137,7 +137,7 @@ KResult CoreDump::write_elf_header()
KResult CoreDump::write_program_headers(size_t notes_size) KResult CoreDump::write_program_headers(size_t notes_size)
{ {
size_t offset = sizeof(Elf32_Ehdr) + m_num_program_headers * sizeof(Elf32_Phdr); size_t offset = sizeof(Elf32_Ehdr) + m_num_program_headers * sizeof(Elf32_Phdr);
for (auto& region : m_process->m_regions) { for (auto& region : m_process->space().regions()) {
Elf32_Phdr phdr {}; Elf32_Phdr phdr {};
phdr.p_type = PT_LOAD; phdr.p_type = PT_LOAD;
@ -178,7 +178,7 @@ KResult CoreDump::write_program_headers(size_t notes_size)
KResult CoreDump::write_regions() KResult CoreDump::write_regions()
{ {
for (auto& region : m_process->m_regions) { for (auto& region : m_process->space().regions()) {
if (region.is_kernel()) if (region.is_kernel())
continue; continue;
@ -258,13 +258,13 @@ ByteBuffer CoreDump::create_notes_threads_data() const
ByteBuffer CoreDump::create_notes_regions_data() const ByteBuffer CoreDump::create_notes_regions_data() const
{ {
ByteBuffer regions_data; ByteBuffer regions_data;
for (size_t region_index = 0; region_index < m_process->m_regions.size(); ++region_index) { for (size_t region_index = 0; region_index < m_process->space().region_count(); ++region_index) {
ByteBuffer memory_region_info_buffer; ByteBuffer memory_region_info_buffer;
ELF::Core::MemoryRegionInfo info {}; ELF::Core::MemoryRegionInfo info {};
info.header.type = ELF::Core::NotesEntryHeader::Type::MemoryRegionInfo; info.header.type = ELF::Core::NotesEntryHeader::Type::MemoryRegionInfo;
auto& region = m_process->m_regions[region_index]; auto& region = m_process->space().regions()[region_index];
info.region_start = reinterpret_cast<uint32_t>(region.vaddr().as_ptr()); info.region_start = reinterpret_cast<uint32_t>(region.vaddr().as_ptr());
info.region_end = reinterpret_cast<uint32_t>(region.vaddr().as_ptr() + region.size()); info.region_end = reinterpret_cast<uint32_t>(region.vaddr().as_ptr() + region.size());
info.program_header_index = region_index; info.program_header_index = region_index;
@ -316,7 +316,7 @@ ByteBuffer CoreDump::create_notes_segment_data() const
KResult CoreDump::write() KResult CoreDump::write()
{ {
ScopedSpinLock lock(m_process->get_lock()); ScopedSpinLock lock(m_process->space().get_lock());
ProcessPagingScope scope(m_process); ProcessPagingScope scope(m_process);
ByteBuffer notes_segment = create_notes_segment_data(); ByteBuffer notes_segment = create_notes_segment_data();

View file

@ -185,7 +185,7 @@ KResultOr<Region*> BXVGADevice::mmap(Process& process, FileDescription&, const R
auto vmobject = AnonymousVMObject::create_for_physical_range(m_framebuffer_address, framebuffer_size_in_bytes()); auto vmobject = AnonymousVMObject::create_for_physical_range(m_framebuffer_address, framebuffer_size_in_bytes());
if (!vmobject) if (!vmobject)
return ENOMEM; return ENOMEM;
return process.allocate_region_with_vmobject( return process.space().allocate_region_with_vmobject(
range, range,
vmobject.release_nonnull(), vmobject.release_nonnull(),
0, 0,

View file

@ -64,7 +64,7 @@ KResultOr<Region*> MBVGADevice::mmap(Process& process, FileDescription&, const R
auto vmobject = AnonymousVMObject::create_for_physical_range(m_framebuffer_address, framebuffer_size_in_bytes()); auto vmobject = AnonymousVMObject::create_for_physical_range(m_framebuffer_address, framebuffer_size_in_bytes());
if (!vmobject) if (!vmobject)
return ENOMEM; return ENOMEM;
return process.allocate_region_with_vmobject( return process.space().allocate_region_with_vmobject(
range, range,
vmobject.release_nonnull(), vmobject.release_nonnull(),
0, 0,

View file

@ -66,7 +66,7 @@ KResultOr<Region*> MemoryDevice::mmap(Process& process, FileDescription&, const
if (!vmobject) if (!vmobject)
return ENOMEM; return ENOMEM;
dbgln("MemoryDevice: Mapped physical memory at {} for range of {} bytes", viewed_address, range.size()); dbgln("MemoryDevice: Mapped physical memory at {} for range of {} bytes", viewed_address, range.size());
return process.allocate_region_with_vmobject( return process.space().allocate_region_with_vmobject(
range, range,
vmobject.release_nonnull(), vmobject.release_nonnull(),
0, 0,

View file

@ -47,7 +47,7 @@ KResultOr<Region*> AnonymousFile::mmap(Process& process, FileDescription&, const
if (range.size() != m_vmobject->size()) if (range.size() != m_vmobject->size())
return EINVAL; return EINVAL;
return process.allocate_region_with_vmobject(range, m_vmobject, offset, {}, prot, shared); return process.space().allocate_region_with_vmobject(range, m_vmobject, offset, {}, prot, shared);
} }
} }

View file

@ -117,7 +117,7 @@ KResultOr<Region*> InodeFile::mmap(Process& process, FileDescription& descriptio
vmobject = PrivateInodeVMObject::create_with_inode(inode()); vmobject = PrivateInodeVMObject::create_with_inode(inode());
if (!vmobject) if (!vmobject)
return ENOMEM; return ENOMEM;
return process.allocate_region_with_vmobject(range, vmobject.release_nonnull(), offset, description.absolute_path(), prot, shared); return process.space().allocate_region_with_vmobject(range, vmobject.release_nonnull(), offset, description.absolute_path(), prot, shared);
} }
String InodeFile::absolute_path(const FileDescription& description) const String InodeFile::absolute_path(const FileDescription& description) const

View file

@ -317,8 +317,8 @@ static bool procfs$pid_vm(InodeIdentifier identifier, KBufferBuilder& builder)
return false; return false;
JsonArraySerializer array { builder }; JsonArraySerializer array { builder };
{ {
ScopedSpinLock lock(process->get_lock()); ScopedSpinLock lock(process->space().get_lock());
for (auto& region : process->regions()) { for (auto& region : process->space().regions()) {
if (!region.is_user_accessible() && !Process::current()->is_superuser()) if (!region.is_user_accessible() && !Process::current()->is_superuser())
continue; continue;
auto region_object = array.add_object(); auto region_object = array.add_object();

View file

@ -62,6 +62,7 @@ class Region;
class Scheduler; class Scheduler;
class SchedulerPerProcessorData; class SchedulerPerProcessorData;
class Socket; class Socket;
class Space;
template<typename BaseType> template<typename BaseType>
class SpinLock; class SpinLock;
class RecursiveSpinLock; class RecursiveSpinLock;

View file

@ -121,7 +121,7 @@ bool PerformanceEventBuffer::to_json(KBufferBuilder& builder, ProcessID pid, con
{ {
auto process = Process::from_pid(pid); auto process = Process::from_pid(pid);
ASSERT(process); ASSERT(process);
ScopedSpinLock locker(process->get_lock()); ScopedSpinLock locker(process->space().get_lock());
JsonObjectSerializer object(builder); JsonObjectSerializer object(builder);
object.add("pid", pid.value()); object.add("pid", pid.value());
@ -129,7 +129,7 @@ bool PerformanceEventBuffer::to_json(KBufferBuilder& builder, ProcessID pid, con
{ {
auto region_array = object.add_array("regions"); auto region_array = object.add_array("regions");
for (const auto& region : process->regions()) { for (const auto& region : process->space().regions()) {
auto region_object = region_array.add_object(); auto region_object = region_array.add_object();
region_object.add("base", region.vaddr().get()); region_object.add("base", region.vaddr().get());
region_object.add("size", region.size()); region_object.add("size", region.size());

View file

@ -116,110 +116,6 @@ bool Process::in_group(gid_t gid) const
return m_gid == gid || m_extra_gids.contains_slow(gid); return m_gid == gid || m_extra_gids.contains_slow(gid);
} }
Optional<Range> Process::allocate_range(VirtualAddress vaddr, size_t size, size_t alignment)
{
vaddr.mask(PAGE_MASK);
size = PAGE_ROUND_UP(size);
if (vaddr.is_null())
return page_directory().range_allocator().allocate_anywhere(size, alignment);
return page_directory().range_allocator().allocate_specific(vaddr, size);
}
Region& Process::allocate_split_region(const Region& source_region, const Range& range, size_t offset_in_vmobject)
{
auto& region = add_region(
Region::create_user_accessible(this, range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access(), source_region.is_cacheable(), source_region.is_shared()));
region.set_syscall_region(source_region.is_syscall_region());
region.set_mmap(source_region.is_mmap());
region.set_stack(source_region.is_stack());
size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE;
for (size_t i = 0; i < region.page_count(); ++i) {
if (source_region.should_cow(page_offset_in_source_region + i))
region.set_should_cow(i, true);
}
return region;
}
KResultOr<Region*> Process::allocate_region(const Range& range, const String& name, int prot, AllocationStrategy strategy)
{
ASSERT(range.is_valid());
auto vmobject = AnonymousVMObject::create_with_size(range.size(), strategy);
if (!vmobject)
return ENOMEM;
auto region = Region::create_user_accessible(this, range, vmobject.release_nonnull(), 0, name, prot_to_region_access_flags(prot), true, false);
if (!region->map(page_directory()))
return ENOMEM;
return &add_region(move(region));
}
KResultOr<Region*> Process::allocate_region_with_vmobject(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const String& name, int prot, bool shared)
{
ASSERT(range.is_valid());
size_t end_in_vmobject = offset_in_vmobject + range.size();
if (end_in_vmobject <= offset_in_vmobject) {
dbgln("allocate_region_with_vmobject: Overflow (offset + size)");
return EINVAL;
}
if (offset_in_vmobject >= vmobject->size()) {
dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject.");
return EINVAL;
}
if (end_in_vmobject > vmobject->size()) {
dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject.");
return EINVAL;
}
offset_in_vmobject &= PAGE_MASK;
auto& region = add_region(Region::create_user_accessible(this, range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot), true, shared));
if (!region.map(page_directory())) {
// FIXME: What is an appropriate error code here, really?
return ENOMEM;
}
return &region;
}
bool Process::deallocate_region(Region& region)
{
OwnPtr<Region> region_protector;
ScopedSpinLock lock(m_lock);
if (m_region_lookup_cache.region.unsafe_ptr() == &region)
m_region_lookup_cache.region = nullptr;
for (size_t i = 0; i < m_regions.size(); ++i) {
if (&m_regions[i] == &region) {
region_protector = m_regions.unstable_take(i);
return true;
}
}
return false;
}
Region* Process::find_region_from_range(const Range& range)
{
ScopedSpinLock lock(m_lock);
if (m_region_lookup_cache.range.has_value() && m_region_lookup_cache.range.value() == range && m_region_lookup_cache.region)
return m_region_lookup_cache.region.unsafe_ptr();
size_t size = PAGE_ROUND_UP(range.size());
for (auto& region : m_regions) {
if (region.vaddr() == range.base() && region.size() == size) {
m_region_lookup_cache.range = range;
m_region_lookup_cache.region = region;
return &region;
}
}
return nullptr;
}
Region* Process::find_region_containing(const Range& range)
{
ScopedSpinLock lock(m_lock);
for (auto& region : m_regions) {
if (region.contains(range))
return &region;
}
return nullptr;
}
void Process::kill_threads_except_self() void Process::kill_threads_except_self()
{ {
InterruptDisabler disabler; InterruptDisabler disabler;
@ -339,7 +235,7 @@ Process::Process(RefPtr<Thread>& first_thread, const String& name, uid_t uid, gi
{ {
dbgln_if(PROCESS_DEBUG, "Created new process {}({})", m_name, m_pid.value()); dbgln_if(PROCESS_DEBUG, "Created new process {}({})", m_name, m_pid.value());
m_page_directory = PageDirectory::create_for_userspace(*this, fork_parent ? &fork_parent->page_directory().range_allocator() : nullptr); m_space = Space::create(*this, fork_parent ? &fork_parent->space() : nullptr);
if (fork_parent) { if (fork_parent) {
// NOTE: fork() doesn't clone all threads; the thread that called fork() becomes the only thread in the new process. // NOTE: fork() doesn't clone all threads; the thread that called fork() becomes the only thread in the new process.
@ -365,28 +261,6 @@ Process::~Process()
} }
} }
void Process::dump_regions()
{
klog() << "Process regions:";
klog() << "BEGIN END SIZE ACCESS NAME";
ScopedSpinLock lock(m_lock);
Vector<Region*> sorted_regions;
sorted_regions.ensure_capacity(m_regions.size());
for (auto& region : m_regions)
sorted_regions.append(&region);
quick_sort(sorted_regions, [](auto& a, auto& b) {
return a->vaddr() < b->vaddr();
});
for (auto& sorted_region : sorted_regions) {
auto& region = *sorted_region;
klog() << String::format("%08x", region.vaddr().get()) << " -- " << String::format("%08x", region.vaddr().offset(region.size() - 1).get()) << " " << String::format("%08zx", region.size()) << " " << (region.is_readable() ? 'R' : ' ') << (region.is_writable() ? 'W' : ' ') << (region.is_executable() ? 'X' : ' ') << (region.is_shared() ? 'S' : ' ') << (region.is_stack() ? 'T' : ' ') << (region.vmobject().is_anonymous() ? 'A' : ' ') << " " << region.name().characters();
}
MM.dump_kernel_regions();
}
// Make sure the compiler doesn't "optimize away" this function: // Make sure the compiler doesn't "optimize away" this function:
extern void signal_trampoline_dummy(); extern void signal_trampoline_dummy();
void signal_trampoline_dummy() void signal_trampoline_dummy()
@ -457,7 +331,7 @@ void Process::crash(int signal, u32 eip, bool out_of_memory)
} }
m_termination_signal = signal; m_termination_signal = signal;
set_dump_core(!out_of_memory); set_dump_core(!out_of_memory);
dump_regions(); space().dump_regions();
ASSERT(is_user_process()); ASSERT(is_user_process());
die(); die();
// We can not return from here, as there is nowhere // We can not return from here, as there is nowhere
@ -643,10 +517,7 @@ void Process::finalize()
unblock_waiters(Thread::WaitBlocker::UnblockFlags::Terminated); unblock_waiters(Thread::WaitBlocker::UnblockFlags::Terminated);
{ m_space->remove_all_regions({});
ScopedSpinLock lock(m_lock);
m_regions.clear();
}
ASSERT(ref_count() > 0); ASSERT(ref_count() > 0);
// WaitBlockCondition::finalize will be in charge of dropping the last // WaitBlockCondition::finalize will be in charge of dropping the last
@ -689,8 +560,8 @@ size_t Process::amount_dirty_private() const
// The main issue I'm thinking of is when the VMObject has physical pages that none of the Regions are mapping. // The main issue I'm thinking of is when the VMObject has physical pages that none of the Regions are mapping.
// That's probably a situation that needs to be looked at in general. // That's probably a situation that needs to be looked at in general.
size_t amount = 0; size_t amount = 0;
ScopedSpinLock lock(m_lock); ScopedSpinLock lock(space().get_lock());
for (auto& region : m_regions) { for (auto& region : space().regions()) {
if (!region.is_shared()) if (!region.is_shared())
amount += region.amount_dirty(); amount += region.amount_dirty();
} }
@ -701,8 +572,8 @@ size_t Process::amount_clean_inode() const
{ {
HashTable<const InodeVMObject*> vmobjects; HashTable<const InodeVMObject*> vmobjects;
{ {
ScopedSpinLock lock(m_lock); ScopedSpinLock lock(space().get_lock());
for (auto& region : m_regions) { for (auto& region : space().regions()) {
if (region.vmobject().is_inode()) if (region.vmobject().is_inode())
vmobjects.set(&static_cast<const InodeVMObject&>(region.vmobject())); vmobjects.set(&static_cast<const InodeVMObject&>(region.vmobject()));
} }
@ -716,8 +587,8 @@ size_t Process::amount_clean_inode() const
size_t Process::amount_virtual() const size_t Process::amount_virtual() const
{ {
size_t amount = 0; size_t amount = 0;
ScopedSpinLock lock(m_lock); ScopedSpinLock lock(space().get_lock());
for (auto& region : m_regions) { for (auto& region : space().regions()) {
amount += region.size(); amount += region.size();
} }
return amount; return amount;
@ -727,8 +598,8 @@ size_t Process::amount_resident() const
{ {
// FIXME: This will double count if multiple regions use the same physical page. // FIXME: This will double count if multiple regions use the same physical page.
size_t amount = 0; size_t amount = 0;
ScopedSpinLock lock(m_lock); ScopedSpinLock lock(space().get_lock());
for (auto& region : m_regions) { for (auto& region : space().regions()) {
amount += region.amount_resident(); amount += region.amount_resident();
} }
return amount; return amount;
@ -741,8 +612,8 @@ size_t Process::amount_shared() const
// and each PhysicalPage is only reffed by its VMObject. This needs to be refactored // and each PhysicalPage is only reffed by its VMObject. This needs to be refactored
// so that every Region contributes +1 ref to each of its PhysicalPages. // so that every Region contributes +1 ref to each of its PhysicalPages.
size_t amount = 0; size_t amount = 0;
ScopedSpinLock lock(m_lock); ScopedSpinLock lock(space().get_lock());
for (auto& region : m_regions) { for (auto& region : space().regions()) {
amount += region.amount_shared(); amount += region.amount_shared();
} }
return amount; return amount;
@ -751,8 +622,8 @@ size_t Process::amount_shared() const
size_t Process::amount_purgeable_volatile() const size_t Process::amount_purgeable_volatile() const
{ {
size_t amount = 0; size_t amount = 0;
ScopedSpinLock lock(m_lock); ScopedSpinLock lock(space().get_lock());
for (auto& region : m_regions) { for (auto& region : space().regions()) {
if (region.vmobject().is_anonymous() && static_cast<const AnonymousVMObject&>(region.vmobject()).is_any_volatile()) if (region.vmobject().is_anonymous() && static_cast<const AnonymousVMObject&>(region.vmobject()).is_any_volatile())
amount += region.amount_resident(); amount += region.amount_resident();
} }
@ -762,8 +633,8 @@ size_t Process::amount_purgeable_volatile() const
size_t Process::amount_purgeable_nonvolatile() const size_t Process::amount_purgeable_nonvolatile() const
{ {
size_t amount = 0; size_t amount = 0;
ScopedSpinLock lock(m_lock); ScopedSpinLock lock(space().get_lock());
for (auto& region : m_regions) { for (auto& region : space().regions()) {
if (region.vmobject().is_anonymous() && !static_cast<const AnonymousVMObject&>(region.vmobject()).is_any_volatile()) if (region.vmobject().is_anonymous() && !static_cast<const AnonymousVMObject&>(region.vmobject()).is_any_volatile())
amount += region.amount_resident(); amount += region.amount_resident();
} }
@ -858,14 +729,6 @@ void Process::set_root_directory(const Custody& root)
m_root_directory = root; m_root_directory = root;
} }
Region& Process::add_region(NonnullOwnPtr<Region> region)
{
auto* ptr = region.ptr();
ScopedSpinLock lock(m_lock);
m_regions.append(move(region));
return *ptr;
}
void Process::set_tty(TTY* tty) void Process::set_tty(TTY* tty)
{ {
m_tty = tty; m_tty = tty;

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -48,6 +48,7 @@
#include <Kernel/UnveilNode.h> #include <Kernel/UnveilNode.h>
#include <Kernel/VM/AllocationStrategy.h> #include <Kernel/VM/AllocationStrategy.h>
#include <Kernel/VM/RangeAllocator.h> #include <Kernel/VM/RangeAllocator.h>
#include <Kernel/VM/Space.h>
#include <LibC/signal_numbers.h> #include <LibC/signal_numbers.h>
#include <LibELF/exec_elf.h> #include <LibELF/exec_elf.h>
@ -99,6 +100,8 @@ enum class VeilState {
typedef HashMap<FlatPtr, RefPtr<FutexQueue>> FutexQueues; typedef HashMap<FlatPtr, RefPtr<FutexQueue>> FutexQueues;
struct LoadResult;
class Process class Process
: public RefCounted<Process> : public RefCounted<Process>
, public InlineLinkedListNode<Process> , public InlineLinkedListNode<Process>
@ -164,9 +167,6 @@ public:
bool is_kernel_process() const { return m_is_kernel_process; } bool is_kernel_process() const { return m_is_kernel_process; }
bool is_user_process() const { return !m_is_kernel_process; } bool is_user_process() const { return !m_is_kernel_process; }
PageDirectory& page_directory() { return *m_page_directory; }
const PageDirectory& page_directory() const { return *m_page_directory; }
static RefPtr<Process> from_pid(ProcessID); static RefPtr<Process> from_pid(ProcessID);
static SessionID get_sid_from_pgid(ProcessGroupID pgid); static SessionID get_sid_from_pgid(ProcessGroupID pgid);
@ -188,8 +188,6 @@ public:
bool is_dumpable() const { return m_dumpable; } bool is_dumpable() const { return m_dumpable; }
void set_dumpable(bool dumpable) { m_dumpable = dumpable; } void set_dumpable(bool dumpable) { m_dumpable = dumpable; }
ThreadID exec_tid() const { return m_exec_tid; }
mode_t umask() const { return m_umask; } mode_t umask() const { return m_umask; }
bool in_group(gid_t) const; bool in_group(gid_t) const;
@ -209,8 +207,6 @@ public:
void die(); void die();
void finalize(); void finalize();
ALWAYS_INLINE SpinLock<u32>& get_lock() const { return m_lock; }
ThreadTracer* tracer() { return m_tracer.ptr(); } ThreadTracer* tracer() { return m_tracer.ptr(); }
bool is_traced() const { return !!m_tracer; } bool is_traced() const { return !!m_tracer; }
void start_tracing_from(ProcessID tracer); void start_tracing_from(ProcessID tracer);
@ -373,14 +369,6 @@ public:
const TTY* tty() const { return m_tty; } const TTY* tty() const { return m_tty; }
void set_tty(TTY*); void set_tty(TTY*);
size_t region_count() const { return m_regions.size(); }
const NonnullOwnPtrVector<Region>& regions() const
{
ASSERT(m_lock.is_locked());
return m_regions;
}
void dump_regions();
u32 m_ticks_in_user { 0 }; u32 m_ticks_in_user { 0 };
u32 m_ticks_in_kernel { 0 }; u32 m_ticks_in_kernel { 0 };
@ -410,25 +398,12 @@ public:
int exec(String path, Vector<String> arguments, Vector<String> environment, int recusion_depth = 0); int exec(String path, Vector<String> arguments, Vector<String> environment, int recusion_depth = 0);
struct LoadResult {
FlatPtr load_base { 0 };
FlatPtr entry_eip { 0 };
size_t size { 0 };
FlatPtr program_headers { 0 };
size_t num_program_headers { 0 };
WeakPtr<Region> tls_region;
size_t tls_size { 0 };
size_t tls_alignment { 0 };
WeakPtr<Region> stack_region;
};
enum class ShouldAllocateTls { enum class ShouldAllocateTls {
No = 0, No = 0,
Yes, Yes,
}; };
KResultOr<LoadResult> load(NonnullRefPtr<FileDescription> main_program_description, RefPtr<FileDescription> interpreter_description, const Elf32_Ehdr& main_program_header); KResultOr<LoadResult> load(NonnullRefPtr<FileDescription> main_program_description, RefPtr<FileDescription> interpreter_description, const Elf32_Ehdr& main_program_header);
KResultOr<LoadResult> load_elf_object(FileDescription& object_description, FlatPtr load_offset, ShouldAllocateTls);
KResultOr<FlatPtr> get_interpreter_load_offset(const Elf32_Ehdr& main_program_header, FileDescription& main_program_description, FileDescription& interpreter_description); KResultOr<FlatPtr> get_interpreter_load_offset(const Elf32_Ehdr& main_program_header, FileDescription& main_program_description, FileDescription& interpreter_description);
bool is_superuser() const bool is_superuser() const
@ -436,13 +411,6 @@ public:
return m_euid == 0; return m_euid == 0;
} }
KResultOr<Region*> allocate_region_with_vmobject(const Range&, NonnullRefPtr<VMObject>, size_t offset_in_vmobject, const String& name, int prot, bool shared);
KResultOr<Region*> allocate_region(const Range&, const String& name, int prot = PROT_READ | PROT_WRITE, AllocationStrategy strategy = AllocationStrategy::Reserve);
bool deallocate_region(Region& region);
Region& allocate_split_region(const Region& source_region, const Range&, size_t offset_in_vmobject);
Vector<Region*, 2> split_region_around_range(const Region& source_region, const Range&);
void terminate_due_to_signal(u8 signal); void terminate_due_to_signal(u8 signal);
KResult send_signal(u8 signal, Process* sender); KResult send_signal(u8 signal, Process* sender);
@ -503,7 +471,8 @@ public:
PerformanceEventBuffer* perf_events() { return m_perf_event_buffer; } PerformanceEventBuffer* perf_events() { return m_perf_event_buffer; }
bool enforces_syscall_regions() const { return m_enforces_syscall_regions; } Space& space() { return *m_space; }
const Space& space() const { return *m_space; }
private: private:
friend class MemoryManager; friend class MemoryManager;
@ -518,10 +487,6 @@ private:
Process(RefPtr<Thread>& first_thread, const String& name, uid_t, gid_t, ProcessID ppid, bool is_kernel_process, RefPtr<Custody> cwd = nullptr, RefPtr<Custody> executable = nullptr, TTY* = nullptr, Process* fork_parent = nullptr); Process(RefPtr<Thread>& first_thread, const String& name, uid_t, gid_t, ProcessID ppid, bool is_kernel_process, RefPtr<Custody> cwd = nullptr, RefPtr<Custody> executable = nullptr, TTY* = nullptr, Process* fork_parent = nullptr);
static ProcessID allocate_pid(); static ProcessID allocate_pid();
Optional<Range> allocate_range(VirtualAddress, size_t, size_t alignment = PAGE_SIZE);
Region& add_region(NonnullOwnPtr<Region>);
void kill_threads_except_self(); void kill_threads_except_self();
void kill_all_threads(); void kill_all_threads();
bool dump_core(); bool dump_core();
@ -552,13 +517,13 @@ private:
void clear_futex_queues_on_exec(); void clear_futex_queues_on_exec();
RefPtr<PageDirectory> m_page_directory;
Process* m_prev { nullptr }; Process* m_prev { nullptr };
Process* m_next { nullptr }; Process* m_next { nullptr };
String m_name; String m_name;
OwnPtr<Space> m_space;
ProcessID m_pid { 0 }; ProcessID m_pid { 0 };
SessionID m_sid { 0 }; SessionID m_sid { 0 };
RefPtr<ProcessGroup> m_pg; RefPtr<ProcessGroup> m_pg;
@ -570,8 +535,6 @@ private:
uid_t m_suid { 0 }; uid_t m_suid { 0 };
gid_t m_sgid { 0 }; gid_t m_sgid { 0 };
ThreadID m_exec_tid { 0 };
OwnPtr<ThreadTracer> m_tracer; OwnPtr<ThreadTracer> m_tracer;
static const int m_max_open_file_descriptors { FD_SETSIZE }; static const int m_max_open_file_descriptors { FD_SETSIZE };
@ -617,16 +580,6 @@ private:
RefPtr<TTY> m_tty; RefPtr<TTY> m_tty;
Region* find_region_from_range(const Range&);
Region* find_region_containing(const Range&);
NonnullOwnPtrVector<Region> m_regions;
struct RegionLookupCache {
Optional<Range> range;
WeakPtr<Region> region;
};
RegionLookupCache m_region_lookup_cache;
ProcessID m_ppid { 0 }; ProcessID m_ppid { 0 };
mode_t m_umask { 022 }; mode_t m_umask { 022 };
@ -639,12 +592,9 @@ private:
size_t m_master_tls_alignment { 0 }; size_t m_master_tls_alignment { 0 };
Lock m_big_lock { "Process" }; Lock m_big_lock { "Process" };
mutable SpinLock<u32> m_lock;
RefPtr<Timer> m_alarm_timer; RefPtr<Timer> m_alarm_timer;
bool m_enforces_syscall_regions { false };
bool m_has_promises { false }; bool m_has_promises { false };
u32 m_promises { 0 }; u32 m_promises { 0 };
bool m_has_execpromises { false }; bool m_has_execpromises { false };

View file

@ -176,7 +176,7 @@ void syscall_handler(TrapFrame* trap)
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();
} }
auto* calling_region = MM.find_region_from_vaddr(process, VirtualAddress(regs.eip)); auto* calling_region = MM.find_region_from_vaddr(process.space(), VirtualAddress(regs.eip));
if (!calling_region) { if (!calling_region) {
dbgln("Syscall from {:p} which has no associated region", regs.eip); dbgln("Syscall from {:p} which has no associated region", regs.eip);
handle_crash(regs, "Syscall from unknown region", SIGSEGV); handle_crash(regs, "Syscall from unknown region", SIGSEGV);
@ -189,7 +189,7 @@ void syscall_handler(TrapFrame* trap)
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();
} }
if (process.enforces_syscall_regions() && !calling_region->is_syscall_region()) { if (process.space().enforces_syscall_regions() && !calling_region->is_syscall_region()) {
dbgln("Syscall from non-syscall region"); dbgln("Syscall from non-syscall region");
handle_crash(regs, "Syscall from non-syscall region", SIGSEGV); handle_crash(regs, "Syscall from non-syscall region", SIGSEGV);
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();

View file

@ -47,6 +47,19 @@
namespace Kernel { namespace Kernel {
struct LoadResult {
OwnPtr<Space> space;
FlatPtr load_base { 0 };
FlatPtr entry_eip { 0 };
size_t size { 0 };
FlatPtr program_headers { 0 };
size_t num_program_headers { 0 };
WeakPtr<Region> tls_region;
size_t tls_size { 0 };
size_t tls_alignment { 0 };
WeakPtr<Region> stack_region;
};
static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, uid_t uid, uid_t euid, gid_t gid, gid_t egid, String executable_path, int main_program_fd); static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, uid_t uid, uid_t euid, gid_t gid, gid_t egid, String executable_path, int main_program_fd);
static bool validate_stack_size(const Vector<String>& arguments, const Vector<String>& environment) static bool validate_stack_size(const Vector<String>& arguments, const Vector<String>& environment)
@ -142,7 +155,7 @@ static KResultOr<FlatPtr> make_userspace_stack_for_main_thread(Region& region, V
return new_esp; return new_esp;
} }
KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_description, FlatPtr load_offset, ShouldAllocateTls should_allocate_tls) static KResultOr<LoadResult> load_elf_object(NonnullOwnPtr<Space> new_space, FileDescription& object_description, FlatPtr load_offset, Process::ShouldAllocateTls should_allocate_tls)
{ {
auto& inode = *(object_description.inode()); auto& inode = *(object_description.inode());
auto vmobject = SharedInodeVMObject::create_with_inode(inode); auto vmobject = SharedInodeVMObject::create_with_inode(inode);
@ -172,10 +185,12 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_
String elf_name = object_description.absolute_path(); String elf_name = object_description.absolute_path();
ASSERT(!Processor::current().in_critical()); ASSERT(!Processor::current().in_critical());
MemoryManager::enter_space(*new_space);
KResult ph_load_result = KSuccess; KResult ph_load_result = KSuccess;
elf_image.for_each_program_header([&](const ELF::Image::ProgramHeader& program_header) { elf_image.for_each_program_header([&](const ELF::Image::ProgramHeader& program_header) {
if (program_header.type() == PT_TLS) { if (program_header.type() == PT_TLS) {
ASSERT(should_allocate_tls == ShouldAllocateTls::Yes); ASSERT(should_allocate_tls == Process::ShouldAllocateTls::Yes);
ASSERT(program_header.size_in_memory()); ASSERT(program_header.size_in_memory());
if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) { if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
@ -184,13 +199,13 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_
return IterationDecision::Break; return IterationDecision::Break;
} }
auto range = allocate_range({}, program_header.size_in_memory()); auto range = new_space->allocate_range({}, program_header.size_in_memory());
if (!range.has_value()) { if (!range.has_value()) {
ph_load_result = ENOMEM; ph_load_result = ENOMEM;
return IterationDecision::Break; return IterationDecision::Break;
} }
auto region_or_error = allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); auto region_or_error = new_space->allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve);
if (region_or_error.is_error()) { if (region_or_error.is_error()) {
ph_load_result = region_or_error.error(); ph_load_result = region_or_error.error();
return IterationDecision::Break; return IterationDecision::Break;
@ -225,12 +240,12 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_
if (program_header.is_writable()) if (program_header.is_writable())
prot |= PROT_WRITE; prot |= PROT_WRITE;
auto region_name = String::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : ""); auto region_name = String::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : "");
auto range = allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); auto range = new_space->allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory());
if (!range.has_value()) { if (!range.has_value()) {
ph_load_result = ENOMEM; ph_load_result = ENOMEM;
return IterationDecision::Break; return IterationDecision::Break;
} }
auto region_or_error = allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve); auto region_or_error = new_space->allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve);
if (region_or_error.is_error()) { if (region_or_error.is_error()) {
ph_load_result = region_or_error.error(); ph_load_result = region_or_error.error();
return IterationDecision::Break; return IterationDecision::Break;
@ -262,12 +277,12 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_
prot |= PROT_WRITE; prot |= PROT_WRITE;
if (program_header.is_executable()) if (program_header.is_executable())
prot |= PROT_EXEC; prot |= PROT_EXEC;
auto range = allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); auto range = new_space->allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory());
if (!range.has_value()) { if (!range.has_value()) {
ph_load_result = ENOMEM; ph_load_result = ENOMEM;
return IterationDecision::Break; return IterationDecision::Break;
} }
auto region_or_error = allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true); auto region_or_error = new_space->allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true);
if (region_or_error.is_error()) { if (region_or_error.is_error()) {
ph_load_result = region_or_error.error(); ph_load_result = region_or_error.error();
return IterationDecision::Break; return IterationDecision::Break;
@ -287,19 +302,20 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_
return ENOEXEC; return ENOEXEC;
} }
auto stack_range = allocate_range({}, Thread::default_userspace_stack_size); auto stack_range = new_space->allocate_range({}, Thread::default_userspace_stack_size);
if (!stack_range.has_value()) { if (!stack_range.has_value()) {
dbgln("do_exec: Failed to allocate VM range for stack"); dbgln("do_exec: Failed to allocate VM range for stack");
return ENOMEM; return ENOMEM;
} }
auto stack_region_or_error = allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); auto stack_region_or_error = new_space->allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve);
if (stack_region_or_error.is_error()) if (stack_region_or_error.is_error())
return stack_region_or_error.error(); return stack_region_or_error.error();
auto& stack_region = *stack_region_or_error.value(); auto& stack_region = *stack_region_or_error.value();
stack_region.set_stack(true); stack_region.set_stack(true);
return LoadResult { return LoadResult {
move(new_space),
load_base_address, load_base_address,
elf_image.entry().offset(load_offset).get(), elf_image.entry().offset(load_offset).get(),
executable_size, executable_size,
@ -312,44 +328,20 @@ KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_
}; };
} }
KResultOr<Process::LoadResult> Process::load(NonnullRefPtr<FileDescription> main_program_description, RefPtr<FileDescription> interpreter_description, const Elf32_Ehdr& main_program_header) KResultOr<LoadResult> Process::load(NonnullRefPtr<FileDescription> main_program_description, RefPtr<FileDescription> interpreter_description, const Elf32_Ehdr& main_program_header)
{ {
RefPtr<PageDirectory> old_page_directory; auto new_space = Space::create(*this, nullptr);
NonnullOwnPtrVector<Region> old_regions; if (!new_space)
return ENOMEM;
{ ScopeGuard space_guard([&]() {
auto page_directory = PageDirectory::create_for_userspace(*this); MemoryManager::enter_process_paging_scope(*this);
if (!page_directory)
return ENOMEM;
// Need to make sure we don't swap contexts in the middle
ScopedCritical critical;
old_page_directory = move(m_page_directory);
old_regions = move(m_regions);
m_page_directory = page_directory.release_nonnull();
MM.enter_process_paging_scope(*this);
}
ArmedScopeGuard rollback_regions_guard([&]() {
ASSERT(Process::current() == this);
// Need to make sure we don't swap contexts in the middle
ScopedCritical critical;
// Explicitly clear m_regions *before* restoring the page directory,
// otherwise we may silently corrupt memory!
m_regions.clear();
// Now that we freed the regions, revert to the original page directory
// and restore the original regions
m_page_directory = move(old_page_directory);
MM.enter_process_paging_scope(*this);
m_regions = move(old_regions);
}); });
if (interpreter_description.is_null()) { if (interpreter_description.is_null()) {
auto result = load_elf_object(main_program_description, FlatPtr { 0 }, ShouldAllocateTls::Yes); auto result = load_elf_object(new_space.release_nonnull(), main_program_description, FlatPtr { 0 }, ShouldAllocateTls::Yes);
if (result.is_error()) if (result.is_error())
return result.error(); return result.error();
rollback_regions_guard.disarm();
return result; return result;
} }
@ -358,7 +350,7 @@ KResultOr<Process::LoadResult> Process::load(NonnullRefPtr<FileDescription> main
return interpreter_load_offset.error(); return interpreter_load_offset.error();
} }
auto interpreter_load_result = load_elf_object(*interpreter_description, interpreter_load_offset.value(), ShouldAllocateTls::No); auto interpreter_load_result = load_elf_object(new_space.release_nonnull(), *interpreter_description, interpreter_load_offset.value(), ShouldAllocateTls::No);
if (interpreter_load_result.is_error()) if (interpreter_load_result.is_error())
return interpreter_load_result.error(); return interpreter_load_result.error();
@ -368,7 +360,6 @@ KResultOr<Process::LoadResult> Process::load(NonnullRefPtr<FileDescription> main
ASSERT(!interpreter_load_result.value().tls_alignment); ASSERT(!interpreter_load_result.value().tls_alignment);
ASSERT(!interpreter_load_result.value().tls_size); ASSERT(!interpreter_load_result.value().tls_size);
rollback_regions_guard.disarm();
return interpreter_load_result; return interpreter_load_result;
} }
@ -481,34 +472,22 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
if (parts.is_empty()) if (parts.is_empty())
return -ENOENT; return -ENOENT;
auto main_program_metadata = main_program_description->metadata();
auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header);
if (load_result_or_error.is_error()) {
dbgln("do_exec({}): Failed to load main program or interpreter", path);
return load_result_or_error.error();
}
// We commit to the new executable at this point. There is no turning back!
// Disable profiling temporarily in case it's running on this process. // Disable profiling temporarily in case it's running on this process.
TemporaryChange profiling_disabler(m_profiling, false); TemporaryChange profiling_disabler(m_profiling, false);
// Mark this thread as the current thread that does exec kill_threads_except_self();
// No other thread from this process will be scheduled to run
auto current_thread = Thread::current();
m_exec_tid = current_thread->tid();
// NOTE: We switch credentials before altering the memory layout of the process.
// This ensures that ptrace access control takes the right credentials into account.
// FIXME: This still feels rickety. Perhaps it would be better to simply block ptrace
// clients until we're ready to be traced? Or reject them with EPERM?
auto main_program_metadata = main_program_description->metadata();
auto old_euid = m_euid;
auto old_suid = m_suid;
auto old_egid = m_egid;
auto old_sgid = m_sgid;
ArmedScopeGuard cred_restore_guard = [&] {
m_euid = old_euid;
m_suid = old_suid;
m_egid = old_egid;
m_sgid = old_sgid;
};
auto& load_result = load_result_or_error.value();
bool executable_is_setid = false; bool executable_is_setid = false;
if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) { if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) {
@ -522,17 +501,8 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
} }
} }
auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header); m_space = load_result.space.release_nonnull();
if (load_result_or_error.is_error()) { MemoryManager::enter_space(*m_space);
dbgln("do_exec({}): Failed to load main program or interpreter", path);
return load_result_or_error.error();
}
auto& load_result = load_result_or_error.value();
// We can commit to the new credentials at this point.
cred_restore_guard.disarm();
kill_threads_except_self();
#if EXEC_DEBUG #if EXEC_DEBUG
dbgln("Memory layout after ELF load:"); dbgln("Memory layout after ELF load:");
@ -549,20 +519,17 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
m_execpromises = 0; m_execpromises = 0;
m_has_execpromises = false; m_has_execpromises = false;
m_enforces_syscall_regions = false;
m_veil_state = VeilState::None; m_veil_state = VeilState::None;
m_unveiled_paths.clear(); m_unveiled_paths.clear();
m_coredump_metadata.clear(); m_coredump_metadata.clear();
auto current_thread = Thread::current();
current_thread->set_default_signal_dispositions(); current_thread->set_default_signal_dispositions();
current_thread->clear_signals(); current_thread->clear_signals();
clear_futex_queues_on_exec(); clear_futex_queues_on_exec();
m_region_lookup_cache = {};
set_dumpable(!executable_is_setid); set_dumpable(!executable_is_setid);
for (size_t i = 0; i < m_fds.size(); ++i) { for (size_t i = 0; i < m_fds.size(); ++i) {
@ -616,8 +583,10 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
// FIXME: PID/TID ISSUE // FIXME: PID/TID ISSUE
m_pid = new_main_thread->tid().value(); m_pid = new_main_thread->tid().value();
auto tsr_result = new_main_thread->make_thread_specific_region({}); auto tsr_result = new_main_thread->make_thread_specific_region({});
if (tsr_result.is_error()) if (tsr_result.is_error()) {
return tsr_result.error(); // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable.
ASSERT_NOT_REACHED();
}
new_main_thread->reset_fpu_state(); new_main_thread->reset_fpu_state();
auto& tss = new_main_thread->m_tss; auto& tss = new_main_thread->m_tss;
@ -629,7 +598,7 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
tss.gs = GDT_SELECTOR_TLS | 3; tss.gs = GDT_SELECTOR_TLS | 3;
tss.eip = load_result.entry_eip; tss.eip = load_result.entry_eip;
tss.esp = new_userspace_esp; tss.esp = new_userspace_esp;
tss.cr3 = m_page_directory->cr3(); tss.cr3 = space().page_directory().cr3();
tss.ss2 = m_pid.value(); tss.ss2 = m_pid.value();
// Throw away any recorded performance events in this process. // Throw away any recorded performance events in this process.
@ -870,8 +839,6 @@ int Process::exec(String path, Vector<String> arguments, Vector<String> environm
u32 prev_flags = 0; u32 prev_flags = 0;
int rc = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags, *main_program_header); int rc = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags, *main_program_header);
m_exec_tid = 0;
if (rc < 0) if (rc < 0)
return rc; return rc;

View file

@ -47,15 +47,14 @@ pid_t Process::sys$fork(RegisterState& regs)
child->m_has_execpromises = m_has_execpromises; child->m_has_execpromises = m_has_execpromises;
child->m_veil_state = m_veil_state; child->m_veil_state = m_veil_state;
child->m_unveiled_paths = m_unveiled_paths.deep_copy(); child->m_unveiled_paths = m_unveiled_paths.deep_copy();
child->m_enforces_syscall_regions = m_enforces_syscall_regions;
child->m_fds = m_fds; child->m_fds = m_fds;
child->m_sid = m_sid; child->m_sid = m_sid;
child->m_pg = m_pg; child->m_pg = m_pg;
child->m_umask = m_umask; child->m_umask = m_umask;
child->m_extra_gids = m_extra_gids;
dbgln_if(FORK_DEBUG, "fork: child={}", child); dbgln_if(FORK_DEBUG, "fork: child={}", child);
child->space().set_enforces_syscall_regions(space().enforces_syscall_regions());
child->m_extra_gids = m_extra_gids;
auto& child_tss = child_first_thread->m_tss; auto& child_tss = child_first_thread->m_tss;
child_tss.eax = 0; // fork() returns 0 in the child :^) child_tss.eax = 0; // fork() returns 0 in the child :^)
@ -80,8 +79,8 @@ pid_t Process::sys$fork(RegisterState& regs)
#endif #endif
{ {
ScopedSpinLock lock(m_lock); ScopedSpinLock lock(space().get_lock());
for (auto& region : m_regions) { for (auto& region : space().regions()) {
dbgln_if(FORK_DEBUG, "fork: cloning Region({}) '{}' @ {}", &region, region.name(), region.vaddr()); dbgln_if(FORK_DEBUG, "fork: cloning Region({}) '{}' @ {}", &region, region.name(), region.vaddr());
auto region_clone = region.clone(*child); auto region_clone = region.clone(*child);
if (!region_clone) { if (!region_clone) {
@ -90,8 +89,8 @@ pid_t Process::sys$fork(RegisterState& regs)
return -ENOMEM; return -ENOMEM;
} }
auto& child_region = child->add_region(region_clone.release_nonnull()); auto& child_region = child->space().add_region(region_clone.release_nonnull());
child_region.map(child->page_directory()); child_region.map(child->space().page_directory());
if (&region == m_master_tls_region.unsafe_ptr()) if (&region == m_master_tls_region.unsafe_ptr())
child->m_master_tls_region = child_region; child->m_master_tls_region = child_region;

View file

@ -147,7 +147,7 @@ int Process::sys$futex(Userspace<const Syscall::SC_futex_params*> user_params)
if (!is_private) { if (!is_private) {
if (!Kernel::is_user_range(VirtualAddress(user_address_or_offset), sizeof(u32))) if (!Kernel::is_user_range(VirtualAddress(user_address_or_offset), sizeof(u32)))
return -EFAULT; return -EFAULT;
auto region = MM.find_region_from_vaddr(*Process::current(), VirtualAddress(user_address_or_offset)); auto region = MM.find_region_from_vaddr(space(), VirtualAddress(user_address_or_offset));
if (!region) if (!region)
return -EFAULT; return -EFAULT;
vmobject = region->vmobject(); vmobject = region->vmobject();
@ -159,7 +159,7 @@ int Process::sys$futex(Userspace<const Syscall::SC_futex_params*> user_params)
case FUTEX_WAKE_OP: { case FUTEX_WAKE_OP: {
if (!Kernel::is_user_range(VirtualAddress(user_address_or_offset2), sizeof(u32))) if (!Kernel::is_user_range(VirtualAddress(user_address_or_offset2), sizeof(u32)))
return -EFAULT; return -EFAULT;
auto region2 = MM.find_region_from_vaddr(*Process::current(), VirtualAddress(user_address_or_offset2)); auto region2 = MM.find_region_from_vaddr(space(), VirtualAddress(user_address_or_offset2));
if (!region2) if (!region2)
return -EFAULT; return -EFAULT;
vmobject2 = region2->vmobject(); vmobject2 = region2->vmobject();

View file

@ -32,7 +32,7 @@ namespace Kernel {
int Process::sys$get_stack_bounds(FlatPtr* user_stack_base, size_t* user_stack_size) int Process::sys$get_stack_bounds(FlatPtr* user_stack_base, size_t* user_stack_size)
{ {
FlatPtr stack_pointer = Thread::current()->get_register_dump_from_stack().userspace_esp; FlatPtr stack_pointer = Thread::current()->get_register_dump_from_stack().userspace_esp;
auto* stack_region = MM.find_region_from_vaddr(*this, VirtualAddress(stack_pointer)); auto* stack_region = MM.find_region_from_vaddr(space(), VirtualAddress(stack_pointer));
if (!stack_region) { if (!stack_region) {
ASSERT_NOT_REACHED(); ASSERT_NOT_REACHED();
return -EINVAL; return -EINVAL;

View file

@ -204,13 +204,13 @@ void* Process::sys$mmap(Userspace<const Syscall::SC_mmap_params*> user_params)
Optional<Range> range; Optional<Range> range;
if (map_randomized) { if (map_randomized) {
range = page_directory().range_allocator().allocate_randomized(PAGE_ROUND_UP(size), alignment); range = space().page_directory().range_allocator().allocate_randomized(PAGE_ROUND_UP(size), alignment);
} else { } else {
range = allocate_range(VirtualAddress(addr), size, alignment); range = space().allocate_range(VirtualAddress(addr), size, alignment);
if (!range.has_value()) { if (!range.has_value()) {
if (addr && !map_fixed) { if (addr && !map_fixed) {
// If there's an address but MAP_FIXED wasn't specified, the address is just a hint. // If there's an address but MAP_FIXED wasn't specified, the address is just a hint.
range = allocate_range({}, size, alignment); range = space().allocate_range({}, size, alignment);
} }
} }
} }
@ -220,7 +220,7 @@ void* Process::sys$mmap(Userspace<const Syscall::SC_mmap_params*> user_params)
if (map_anonymous) { if (map_anonymous) {
auto strategy = map_noreserve ? AllocationStrategy::None : AllocationStrategy::Reserve; auto strategy = map_noreserve ? AllocationStrategy::None : AllocationStrategy::Reserve;
auto region_or_error = allocate_region(range.value(), !name.is_null() ? name : "mmap", prot, strategy); auto region_or_error = space().allocate_region(range.value(), !name.is_null() ? name : "mmap", prot, strategy);
if (region_or_error.is_error()) if (region_or_error.is_error())
return (void*)region_or_error.error().error(); return (void*)region_or_error.error().error();
region = region_or_error.value(); region = region_or_error.value();
@ -280,7 +280,7 @@ int Process::sys$mprotect(void* addr, size_t size, int prot)
Range range_to_mprotect = { VirtualAddress(addr), size }; Range range_to_mprotect = { VirtualAddress(addr), size };
if (auto* whole_region = find_region_from_range(range_to_mprotect)) { if (auto* whole_region = space().find_region_from_range(range_to_mprotect)) {
if (!whole_region->is_mmap()) if (!whole_region->is_mmap())
return -EPERM; return -EPERM;
if (!validate_mmap_prot(prot, whole_region->is_stack(), whole_region->vmobject().is_anonymous(), whole_region)) if (!validate_mmap_prot(prot, whole_region->is_stack(), whole_region->vmobject().is_anonymous(), whole_region))
@ -300,7 +300,7 @@ int Process::sys$mprotect(void* addr, size_t size, int prot)
} }
// Check if we can carve out the desired range from an existing region // Check if we can carve out the desired range from an existing region
if (auto* old_region = find_region_containing(range_to_mprotect)) { if (auto* old_region = space().find_region_containing(range_to_mprotect)) {
if (!old_region->is_mmap()) if (!old_region->is_mmap())
return -EPERM; return -EPERM;
if (!validate_mmap_prot(prot, old_region->is_stack(), old_region->vmobject().is_anonymous(), old_region)) if (!validate_mmap_prot(prot, old_region->is_stack(), old_region->vmobject().is_anonymous(), old_region))
@ -314,23 +314,23 @@ int Process::sys$mprotect(void* addr, size_t size, int prot)
// This vector is the region(s) adjacent to our range. // This vector is the region(s) adjacent to our range.
// We need to allocate a new region for the range we wanted to change permission bits on. // We need to allocate a new region for the range we wanted to change permission bits on.
auto adjacent_regions = split_region_around_range(*old_region, range_to_mprotect); auto adjacent_regions = space().split_region_around_range(*old_region, range_to_mprotect);
size_t new_range_offset_in_vmobject = old_region->offset_in_vmobject() + (range_to_mprotect.base().get() - old_region->range().base().get()); size_t new_range_offset_in_vmobject = old_region->offset_in_vmobject() + (range_to_mprotect.base().get() - old_region->range().base().get());
auto& new_region = allocate_split_region(*old_region, range_to_mprotect, new_range_offset_in_vmobject); auto& new_region = space().allocate_split_region(*old_region, range_to_mprotect, new_range_offset_in_vmobject);
new_region.set_readable(prot & PROT_READ); new_region.set_readable(prot & PROT_READ);
new_region.set_writable(prot & PROT_WRITE); new_region.set_writable(prot & PROT_WRITE);
new_region.set_executable(prot & PROT_EXEC); new_region.set_executable(prot & PROT_EXEC);
// Unmap the old region here, specifying that we *don't* want the VM deallocated. // Unmap the old region here, specifying that we *don't* want the VM deallocated.
old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No);
deallocate_region(*old_region); space().deallocate_region(*old_region);
// Map the new regions using our page directory (they were just allocated and don't have one). // Map the new regions using our page directory (they were just allocated and don't have one).
for (auto* adjacent_region : adjacent_regions) { for (auto* adjacent_region : adjacent_regions) {
adjacent_region->map(page_directory()); adjacent_region->map(space().page_directory());
} }
new_region.map(page_directory()); new_region.map(space().page_directory());
return 0; return 0;
} }
@ -349,7 +349,7 @@ int Process::sys$madvise(void* address, size_t size, int advice)
if (!is_user_range(VirtualAddress(address), size)) if (!is_user_range(VirtualAddress(address), size))
return -EFAULT; return -EFAULT;
auto* region = find_region_from_range({ VirtualAddress(address), size }); auto* region = space().find_region_from_range({ VirtualAddress(address), size });
if (!region) if (!region)
return -EINVAL; return -EINVAL;
if (!region->is_mmap()) if (!region->is_mmap())
@ -397,7 +397,7 @@ int Process::sys$set_mmap_name(Userspace<const Syscall::SC_set_mmap_name_params*
if (name.is_null()) if (name.is_null())
return -EFAULT; return -EFAULT;
auto* region = find_region_from_range({ VirtualAddress(params.addr), params.size }); auto* region = space().find_region_from_range({ VirtualAddress(params.addr), params.size });
if (!region) if (!region)
return -EINVAL; return -EINVAL;
if (!region->is_mmap()) if (!region->is_mmap())
@ -406,24 +406,6 @@ int Process::sys$set_mmap_name(Userspace<const Syscall::SC_set_mmap_name_params*
return 0; return 0;
} }
// Carve out a virtual address range from a region and return the two regions on either side
Vector<Region*, 2> Process::split_region_around_range(const Region& source_region, const Range& desired_range)
{
Range old_region_range = source_region.range();
auto remaining_ranges_after_unmap = old_region_range.carve(desired_range);
ASSERT(!remaining_ranges_after_unmap.is_empty());
auto make_replacement_region = [&](const Range& new_range) -> Region& {
ASSERT(old_region_range.contains(new_range));
size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get());
return allocate_split_region(source_region, new_range, new_range_offset_in_vmobject);
};
Vector<Region*, 2> new_regions;
for (auto& new_range : remaining_ranges_after_unmap) {
new_regions.unchecked_append(&make_replacement_region(new_range));
}
return new_regions;
}
int Process::sys$munmap(void* addr, size_t size) int Process::sys$munmap(void* addr, size_t size)
{ {
REQUIRE_PROMISE(stdio); REQUIRE_PROMISE(stdio);
@ -435,30 +417,30 @@ int Process::sys$munmap(void* addr, size_t size)
return -EFAULT; return -EFAULT;
Range range_to_unmap { VirtualAddress(addr), size }; Range range_to_unmap { VirtualAddress(addr), size };
if (auto* whole_region = find_region_from_range(range_to_unmap)) { if (auto* whole_region = space().find_region_from_range(range_to_unmap)) {
if (!whole_region->is_mmap()) if (!whole_region->is_mmap())
return -EPERM; return -EPERM;
bool success = deallocate_region(*whole_region); bool success = space().deallocate_region(*whole_region);
ASSERT(success); ASSERT(success);
return 0; return 0;
} }
if (auto* old_region = find_region_containing(range_to_unmap)) { if (auto* old_region = space().find_region_containing(range_to_unmap)) {
if (!old_region->is_mmap()) if (!old_region->is_mmap())
return -EPERM; return -EPERM;
auto new_regions = split_region_around_range(*old_region, range_to_unmap); auto new_regions = space().split_region_around_range(*old_region, range_to_unmap);
// We manually unmap the old region here, specifying that we *don't* want the VM deallocated. // We manually unmap the old region here, specifying that we *don't* want the VM deallocated.
old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No);
deallocate_region(*old_region); space().deallocate_region(*old_region);
// Instead we give back the unwanted VM manually. // Instead we give back the unwanted VM manually.
page_directory().range_allocator().deallocate(range_to_unmap); space().page_directory().range_allocator().deallocate(range_to_unmap);
// And finally we map the new region(s) using our page directory (they were just allocated and don't have one). // And finally we map the new region(s) using our page directory (they were just allocated and don't have one).
for (auto* new_region : new_regions) { for (auto* new_region : new_regions) {
new_region->map(page_directory()); new_region->map(space().page_directory());
} }
return 0; return 0;
} }
@ -476,7 +458,7 @@ void* Process::sys$mremap(Userspace<const Syscall::SC_mremap_params*> user_param
if (!copy_from_user(&params, user_params)) if (!copy_from_user(&params, user_params))
return (void*)-EFAULT; return (void*)-EFAULT;
auto* old_region = find_region_from_range(Range { VirtualAddress(params.old_address), params.old_size }); auto* old_region = space().find_region_from_range(Range { VirtualAddress(params.old_address), params.old_size });
if (!old_region) if (!old_region)
return (void*)-EINVAL; return (void*)-EINVAL;
@ -491,11 +473,11 @@ void* Process::sys$mremap(Userspace<const Syscall::SC_mremap_params*> user_param
// Unmap without deallocating the VM range since we're going to reuse it. // Unmap without deallocating the VM range since we're going to reuse it.
old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No);
deallocate_region(*old_region); space().deallocate_region(*old_region);
auto new_vmobject = PrivateInodeVMObject::create_with_inode(inode); auto new_vmobject = PrivateInodeVMObject::create_with_inode(inode);
auto new_region_or_error = allocate_region_with_vmobject(range, new_vmobject, 0, old_name, old_prot, false); auto new_region_or_error = space().allocate_region_with_vmobject(range, new_vmobject, 0, old_name, old_prot, false);
if (new_region_or_error.is_error()) if (new_region_or_error.is_error())
return (void*)new_region_or_error.error().error(); return (void*)new_region_or_error.error().error();
auto& new_region = *new_region_or_error.value(); auto& new_region = *new_region_or_error.value();
@ -527,11 +509,11 @@ void* Process::sys$allocate_tls(size_t size)
}); });
ASSERT(main_thread); ASSERT(main_thread);
auto range = allocate_range({}, size); auto range = space().allocate_range({}, size);
if (!range.has_value()) if (!range.has_value())
return (void*)-ENOMEM; return (void*)-ENOMEM;
auto region_or_error = allocate_region(range.value(), String(), PROT_READ | PROT_WRITE); auto region_or_error = space().allocate_region(range.value(), String(), PROT_READ | PROT_WRITE);
if (region_or_error.is_error()) if (region_or_error.is_error())
return (void*)region_or_error.error().error(); return (void*)region_or_error.error().error();
@ -552,15 +534,15 @@ void* Process::sys$allocate_tls(size_t size)
int Process::sys$msyscall(void* address) int Process::sys$msyscall(void* address)
{ {
if (m_enforces_syscall_regions) if (space().enforces_syscall_regions())
return -EPERM; return -EPERM;
if (!address) { if (!address) {
m_enforces_syscall_regions = true; space().set_enforces_syscall_regions(true);
return 0; return 0;
} }
auto* region = find_region_containing(Range { VirtualAddress { address }, 1 }); auto* region = space().find_region_containing(Range { VirtualAddress { address }, 1 });
if (!region) if (!region)
return -EINVAL; return -EINVAL;

View file

@ -73,7 +73,7 @@ KResultOr<u32> Process::peek_user_data(Userspace<const u32*> address)
KResult Process::poke_user_data(Userspace<u32*> address, u32 data) KResult Process::poke_user_data(Userspace<u32*> address, u32 data)
{ {
Range range = { VirtualAddress(address), sizeof(u32) }; Range range = { VirtualAddress(address), sizeof(u32) };
auto* region = find_region_containing(range); auto* region = space().find_region_containing(range);
if (!region) if (!region)
return EFAULT; return EFAULT;
ProcessPagingScope scope(*this); ProcessPagingScope scope(*this);

View file

@ -80,7 +80,7 @@ int Process::sys$create_thread(void* (*entry)(void*), Userspace<const Syscall::S
auto& tss = thread->tss(); auto& tss = thread->tss();
tss.eip = (FlatPtr)entry; tss.eip = (FlatPtr)entry;
tss.eflags = 0x0202; tss.eflags = 0x0202;
tss.cr3 = page_directory().cr3(); tss.cr3 = space().page_directory().cr3();
tss.esp = (u32)user_stack_address; tss.esp = (u32)user_stack_address;
auto tsr_result = thread->make_thread_specific_region({}); auto tsr_result = thread->make_thread_specific_region({});

View file

@ -108,7 +108,7 @@ Thread::Thread(NonnullRefPtr<Process> process, NonnullOwnPtr<Region> kernel_stac
m_tss.gs = GDT_SELECTOR_TLS | 3; m_tss.gs = GDT_SELECTOR_TLS | 3;
} }
m_tss.cr3 = m_process->page_directory().cr3(); m_tss.cr3 = m_process->space().page_directory().cr3();
m_kernel_stack_base = m_kernel_stack_region->vaddr().get(); m_kernel_stack_base = m_kernel_stack_region->vaddr().get();
m_kernel_stack_top = m_kernel_stack_region->vaddr().offset(default_kernel_stack_size).get() & 0xfffffff8u; m_kernel_stack_top = m_kernel_stack_region->vaddr().offset(default_kernel_stack_size).get() & 0xfffffff8u;
@ -1015,11 +1015,11 @@ KResult Thread::make_thread_specific_region(Badge<Process>)
if (!process().m_master_tls_region) if (!process().m_master_tls_region)
return KSuccess; return KSuccess;
auto range = process().allocate_range({}, thread_specific_region_size()); auto range = process().space().allocate_range({}, thread_specific_region_size());
if (!range.has_value()) if (!range.has_value())
return ENOMEM; return ENOMEM;
auto region_or_error = process().allocate_region(range.value(), "Thread-specific", PROT_READ | PROT_WRITE); auto region_or_error = process().space().allocate_region(range.value(), "Thread-specific", PROT_READ | PROT_WRITE);
if (region_or_error.is_error()) if (region_or_error.is_error())
return region_or_error.error(); return region_or_error.error();

View file

@ -401,29 +401,29 @@ Region* MemoryManager::kernel_region_from_vaddr(VirtualAddress vaddr)
return nullptr; return nullptr;
} }
Region* MemoryManager::user_region_from_vaddr(Process& process, VirtualAddress vaddr) Region* MemoryManager::user_region_from_vaddr(Space& space, VirtualAddress vaddr)
{ {
ScopedSpinLock lock(s_mm_lock);
// FIXME: Use a binary search tree (maybe red/black?) or some other more appropriate data structure! // FIXME: Use a binary search tree (maybe red/black?) or some other more appropriate data structure!
for (auto& region : process.m_regions) { ScopedSpinLock lock(space.get_lock());
for (auto& region : space.regions()) {
if (region.contains(vaddr)) if (region.contains(vaddr))
return &region; return &region;
} }
return nullptr; return nullptr;
} }
Region* MemoryManager::find_region_from_vaddr(Process& process, VirtualAddress vaddr) Region* MemoryManager::find_region_from_vaddr(Space& space, VirtualAddress vaddr)
{ {
ScopedSpinLock lock(s_mm_lock); ScopedSpinLock lock(s_mm_lock);
if (auto* region = user_region_from_vaddr(process, vaddr)) if (auto* region = user_region_from_vaddr(space, vaddr))
return region; return region;
return kernel_region_from_vaddr(vaddr); return kernel_region_from_vaddr(vaddr);
} }
const Region* MemoryManager::find_region_from_vaddr(const Process& process, VirtualAddress vaddr) const Region* MemoryManager::find_region_from_vaddr(const Space& space, VirtualAddress vaddr)
{ {
ScopedSpinLock lock(s_mm_lock); ScopedSpinLock lock(s_mm_lock);
if (auto* region = user_region_from_vaddr(const_cast<Process&>(process), vaddr)) if (auto* region = user_region_from_vaddr(const_cast<Space&>(space), vaddr))
return region; return region;
return kernel_region_from_vaddr(vaddr); return kernel_region_from_vaddr(vaddr);
} }
@ -436,8 +436,8 @@ Region* MemoryManager::find_region_from_vaddr(VirtualAddress vaddr)
auto page_directory = PageDirectory::find_by_cr3(read_cr3()); auto page_directory = PageDirectory::find_by_cr3(read_cr3());
if (!page_directory) if (!page_directory)
return nullptr; return nullptr;
ASSERT(page_directory->process()); ASSERT(page_directory->space());
return user_region_from_vaddr(*page_directory->process(), vaddr); return user_region_from_vaddr(*page_directory->space(), vaddr);
} }
PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault) PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault)
@ -734,13 +734,18 @@ RefPtr<PhysicalPage> MemoryManager::allocate_supervisor_physical_page()
} }
void MemoryManager::enter_process_paging_scope(Process& process) void MemoryManager::enter_process_paging_scope(Process& process)
{
enter_space(process.space());
}
void MemoryManager::enter_space(Space& space)
{ {
auto current_thread = Thread::current(); auto current_thread = Thread::current();
ASSERT(current_thread != nullptr); ASSERT(current_thread != nullptr);
ScopedSpinLock lock(s_mm_lock); ScopedSpinLock lock(s_mm_lock);
current_thread->tss().cr3 = process.page_directory().cr3(); current_thread->tss().cr3 = space.page_directory().cr3();
write_cr3(process.page_directory().cr3()); write_cr3(space.page_directory().cr3());
} }
void MemoryManager::flush_tlb_local(VirtualAddress vaddr, size_t page_count) void MemoryManager::flush_tlb_local(VirtualAddress vaddr, size_t page_count)
@ -846,7 +851,7 @@ bool MemoryManager::validate_user_stack(const Process& process, VirtualAddress v
if (!is_user_address(vaddr)) if (!is_user_address(vaddr))
return false; return false;
ScopedSpinLock lock(s_mm_lock); ScopedSpinLock lock(s_mm_lock);
auto* region = user_region_from_vaddr(const_cast<Process&>(process), vaddr); auto* region = user_region_from_vaddr(const_cast<Process&>(process).space(), vaddr);
return region && region->is_user_accessible() && region->is_stack(); return region && region->is_user_accessible() && region->is_stack();
} }

View file

@ -143,7 +143,8 @@ public:
PageFaultResponse handle_page_fault(const PageFault&); PageFaultResponse handle_page_fault(const PageFault&);
void enter_process_paging_scope(Process&); static void enter_process_paging_scope(Process&);
static void enter_space(Space&);
bool validate_user_stack(const Process&, VirtualAddress) const; bool validate_user_stack(const Process&, VirtualAddress) const;
@ -196,8 +197,8 @@ public:
} }
} }
static Region* find_region_from_vaddr(Process&, VirtualAddress); static Region* find_region_from_vaddr(Space&, VirtualAddress);
static const Region* find_region_from_vaddr(const Process&, VirtualAddress); static const Region* find_region_from_vaddr(const Space&, VirtualAddress);
void dump_kernel_regions(); void dump_kernel_regions();
@ -225,7 +226,7 @@ private:
static void flush_tlb_local(VirtualAddress, size_t page_count = 1); static void flush_tlb_local(VirtualAddress, size_t page_count = 1);
static void flush_tlb(const PageDirectory*, VirtualAddress, size_t page_count = 1); static void flush_tlb(const PageDirectory*, VirtualAddress, size_t page_count = 1);
static Region* user_region_from_vaddr(Process&, VirtualAddress); static Region* user_region_from_vaddr(Space&, VirtualAddress);
static Region* kernel_region_from_vaddr(VirtualAddress); static Region* kernel_region_from_vaddr(VirtualAddress);
static Region* find_region_from_vaddr(VirtualAddress); static Region* find_region_from_vaddr(VirtualAddress);

View file

@ -73,7 +73,7 @@ PageDirectory::PageDirectory()
m_directory_pages[3] = PhysicalPage::create(boot_pd3_paddr, true, false); m_directory_pages[3] = PhysicalPage::create(boot_pd3_paddr, true, false);
} }
PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_range_allocator) PageDirectory::PageDirectory(const RangeAllocator* parent_range_allocator)
{ {
ScopedSpinLock lock(s_mm_lock); ScopedSpinLock lock(s_mm_lock);
if (parent_range_allocator) { if (parent_range_allocator) {
@ -142,8 +142,8 @@ PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_rang
auto* new_pd = MM.quickmap_pd(*this, 0); auto* new_pd = MM.quickmap_pd(*this, 0);
memcpy(new_pd, &buffer, sizeof(PageDirectoryEntry)); memcpy(new_pd, &buffer, sizeof(PageDirectoryEntry));
// If we got here, we successfully created it. Set m_process now // If we got here, we successfully created it. Set m_space now
m_process = &process; m_valid = true;
cr3_map().set(cr3(), this); cr3_map().set(cr3(), this);
} }
@ -151,7 +151,7 @@ PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_rang
PageDirectory::~PageDirectory() PageDirectory::~PageDirectory()
{ {
ScopedSpinLock lock(s_mm_lock); ScopedSpinLock lock(s_mm_lock);
if (m_process) if (m_space)
cr3_map().remove(cr3()); cr3_map().remove(cr3());
} }

View file

@ -40,10 +40,10 @@ class PageDirectory : public RefCounted<PageDirectory> {
friend class MemoryManager; friend class MemoryManager;
public: public:
static RefPtr<PageDirectory> create_for_userspace(Process& process, const RangeAllocator* parent_range_allocator = nullptr) static RefPtr<PageDirectory> create_for_userspace(const RangeAllocator* parent_range_allocator = nullptr)
{ {
auto page_directory = adopt(*new PageDirectory(process, parent_range_allocator)); auto page_directory = adopt(*new PageDirectory(parent_range_allocator));
if (!page_directory->process()) if (!page_directory->is_valid())
return {}; return {};
return page_directory; return page_directory;
} }
@ -55,24 +55,31 @@ public:
u32 cr3() const { return m_directory_table->paddr().get(); } u32 cr3() const { return m_directory_table->paddr().get(); }
RangeAllocator& range_allocator() { return m_range_allocator; } RangeAllocator& range_allocator() { return m_range_allocator; }
const RangeAllocator& range_allocator() const { return m_range_allocator; }
RangeAllocator& identity_range_allocator() { return m_identity_range_allocator; } RangeAllocator& identity_range_allocator() { return m_identity_range_allocator; }
Process* process() { return m_process; } bool is_valid() const { return m_valid; }
const Process* process() const { return m_process; }
Space* space() { return m_space; }
const Space* space() const { return m_space; }
void set_space(Badge<Space>, Space& space) { m_space = &space; }
RecursiveSpinLock& get_lock() { return m_lock; } RecursiveSpinLock& get_lock() { return m_lock; }
private: private:
PageDirectory(Process&, const RangeAllocator* parent_range_allocator); explicit PageDirectory(const RangeAllocator* parent_range_allocator);
PageDirectory(); PageDirectory();
Process* m_process { nullptr }; Space* m_space { nullptr };
RangeAllocator m_range_allocator; RangeAllocator m_range_allocator;
RangeAllocator m_identity_range_allocator; RangeAllocator m_identity_range_allocator;
RefPtr<PhysicalPage> m_directory_table; RefPtr<PhysicalPage> m_directory_table;
RefPtr<PhysicalPage> m_directory_pages[4]; RefPtr<PhysicalPage> m_directory_pages[4];
HashMap<u32, RefPtr<PhysicalPage>> m_page_tables; HashMap<u32, RefPtr<PhysicalPage>> m_page_tables;
RecursiveSpinLock m_lock; RecursiveSpinLock m_lock;
bool m_valid { false };
}; };
} }

222
Kernel/VM/Space.cpp Normal file
View file

@ -0,0 +1,222 @@
/*
* Copyright (c) 2021, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/QuickSort.h>
#include <Kernel/Process.h>
#include <Kernel/SpinLock.h>
#include <Kernel/VM/AnonymousVMObject.h>
#include <Kernel/VM/MemoryManager.h>
#include <Kernel/VM/Space.h>
namespace Kernel {
OwnPtr<Space> Space::create(Process& process, const Space* parent)
{
auto page_directory = PageDirectory::create_for_userspace(parent ? &parent->page_directory().range_allocator() : nullptr);
if (!page_directory)
return {};
auto space = adopt_own(*new Space(process, page_directory.release_nonnull()));
space->page_directory().set_space({}, *space);
return space;
}
Space::Space(Process& process, NonnullRefPtr<PageDirectory> page_directory)
: m_process(&process)
, m_page_directory(move(page_directory))
{
}
Space::~Space()
{
}
Optional<Range> Space::allocate_range(VirtualAddress vaddr, size_t size, size_t alignment)
{
vaddr.mask(PAGE_MASK);
size = PAGE_ROUND_UP(size);
if (vaddr.is_null())
return page_directory().range_allocator().allocate_anywhere(size, alignment);
return page_directory().range_allocator().allocate_specific(vaddr, size);
}
Region& Space::allocate_split_region(const Region& source_region, const Range& range, size_t offset_in_vmobject)
{
auto& region = add_region(Region::create_user_accessible(
m_process, range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access(), source_region.is_cacheable(), source_region.is_shared()));
region.set_syscall_region(source_region.is_syscall_region());
region.set_mmap(source_region.is_mmap());
region.set_stack(source_region.is_stack());
size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE;
for (size_t i = 0; i < region.page_count(); ++i) {
if (source_region.should_cow(page_offset_in_source_region + i))
region.set_should_cow(i, true);
}
return region;
}
KResultOr<Region*> Space::allocate_region(const Range& range, const String& name, int prot, AllocationStrategy strategy)
{
ASSERT(range.is_valid());
auto vmobject = AnonymousVMObject::create_with_size(range.size(), strategy);
if (!vmobject)
return ENOMEM;
auto region = Region::create_user_accessible(m_process, range, vmobject.release_nonnull(), 0, name, prot_to_region_access_flags(prot), true, false);
if (!region->map(page_directory()))
return ENOMEM;
return &add_region(move(region));
}
KResultOr<Region*> Space::allocate_region_with_vmobject(const Range& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, const String& name, int prot, bool shared)
{
ASSERT(range.is_valid());
size_t end_in_vmobject = offset_in_vmobject + range.size();
if (end_in_vmobject <= offset_in_vmobject) {
dbgln("allocate_region_with_vmobject: Overflow (offset + size)");
return EINVAL;
}
if (offset_in_vmobject >= vmobject->size()) {
dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject.");
return EINVAL;
}
if (end_in_vmobject > vmobject->size()) {
dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject.");
return EINVAL;
}
offset_in_vmobject &= PAGE_MASK;
auto& region = add_region(Region::create_user_accessible(m_process, range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot), true, shared));
if (!region.map(page_directory())) {
// FIXME: What is an appropriate error code here, really?
return ENOMEM;
}
return &region;
}
bool Space::deallocate_region(Region& region)
{
OwnPtr<Region> region_protector;
ScopedSpinLock lock(m_lock);
if (m_region_lookup_cache.region.unsafe_ptr() == &region)
m_region_lookup_cache.region = nullptr;
for (size_t i = 0; i < m_regions.size(); ++i) {
if (&m_regions[i] == &region) {
region_protector = m_regions.unstable_take(i);
return true;
}
}
return false;
}
Region* Space::find_region_from_range(const Range& range)
{
ScopedSpinLock lock(m_lock);
if (m_region_lookup_cache.range.has_value() && m_region_lookup_cache.range.value() == range && m_region_lookup_cache.region)
return m_region_lookup_cache.region.unsafe_ptr();
size_t size = PAGE_ROUND_UP(range.size());
for (auto& region : m_regions) {
if (region.vaddr() == range.base() && region.size() == size) {
m_region_lookup_cache.range = range;
m_region_lookup_cache.region = region;
return &region;
}
}
return nullptr;
}
Region* Space::find_region_containing(const Range& range)
{
ScopedSpinLock lock(m_lock);
for (auto& region : m_regions) {
if (region.contains(range))
return &region;
}
return nullptr;
}
Region& Space::add_region(NonnullOwnPtr<Region> region)
{
auto* ptr = region.ptr();
ScopedSpinLock lock(m_lock);
m_regions.append(move(region));
return *ptr;
}
// Carve out a virtual address range from a region and return the two regions on either side
Vector<Region*, 2> Space::split_region_around_range(const Region& source_region, const Range& desired_range)
{
Range old_region_range = source_region.range();
auto remaining_ranges_after_unmap = old_region_range.carve(desired_range);
ASSERT(!remaining_ranges_after_unmap.is_empty());
auto make_replacement_region = [&](const Range& new_range) -> Region& {
ASSERT(old_region_range.contains(new_range));
size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get());
return allocate_split_region(source_region, new_range, new_range_offset_in_vmobject);
};
Vector<Region*, 2> new_regions;
for (auto& new_range : remaining_ranges_after_unmap) {
new_regions.unchecked_append(&make_replacement_region(new_range));
}
return new_regions;
}
void Space::dump_regions()
{
klog() << "Process regions:";
klog() << "BEGIN END SIZE ACCESS NAME";
ScopedSpinLock lock(m_lock);
Vector<Region*> sorted_regions;
sorted_regions.ensure_capacity(m_regions.size());
for (auto& region : m_regions)
sorted_regions.append(&region);
quick_sort(sorted_regions, [](auto& a, auto& b) {
return a->vaddr() < b->vaddr();
});
for (auto& sorted_region : sorted_regions) {
auto& region = *sorted_region;
dmesgln("{:08x} -- {:08x} {:08x} {:c}{:c}{:c}{:c}{:c} {}", region.vaddr().get(), region.vaddr().offset(region.size() - 1).get(), region.size(),
region.is_readable() ? 'R' : ' ',
region.is_writable() ? 'W' : ' ',
region.is_executable() ? 'X' : ' ',
region.is_shared() ? 'S' : ' ',
region.is_stack() ? 'T' : ' ',
region.is_syscall_region() ? 'C' : ' ',
region.name());
}
MM.dump_kernel_regions();
}
void Space::remove_all_regions(Badge<Process>)
{
ScopedSpinLock lock(m_lock);
m_regions.clear();
}
}

92
Kernel/VM/Space.h Normal file
View file

@ -0,0 +1,92 @@
/*
* Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/NonnullOwnPtrVector.h>
#include <AK/WeakPtr.h>
#include <Kernel/UnixTypes.h>
#include <Kernel/VM/AllocationStrategy.h>
#include <Kernel/VM/PageDirectory.h>
namespace Kernel {
class Space {
public:
static OwnPtr<Space> create(Process&, const Space* parent);
~Space();
PageDirectory& page_directory() { return *m_page_directory; }
const PageDirectory& page_directory() const { return *m_page_directory; }
Region& add_region(NonnullOwnPtr<Region>);
size_t region_count() const { return m_regions.size(); }
NonnullOwnPtrVector<Region>& regions() { return m_regions; }
const NonnullOwnPtrVector<Region>& regions() const { return m_regions; }
void dump_regions();
Optional<Range> allocate_range(VirtualAddress, size_t, size_t alignment = PAGE_SIZE);
KResultOr<Region*> allocate_region_with_vmobject(const Range&, NonnullRefPtr<VMObject>, size_t offset_in_vmobject, const String& name, int prot, bool shared);
KResultOr<Region*> allocate_region(const Range&, const String& name, int prot = PROT_READ | PROT_WRITE, AllocationStrategy strategy = AllocationStrategy::Reserve);
bool deallocate_region(Region& region);
Region& allocate_split_region(const Region& source_region, const Range&, size_t offset_in_vmobject);
Vector<Region*, 2> split_region_around_range(const Region& source_region, const Range&);
Region* find_region_from_range(const Range&);
Region* find_region_containing(const Range&);
bool enforces_syscall_regions() const { return m_enforces_syscall_regions; }
void set_enforces_syscall_regions(bool b) { m_enforces_syscall_regions = b; }
void remove_all_regions(Badge<Process>);
SpinLock<u32>& get_lock() const { return m_lock; }
private:
Space(Process&, NonnullRefPtr<PageDirectory>);
Process* m_process { nullptr };
mutable SpinLock<u32> m_lock;
RefPtr<PageDirectory> m_page_directory;
NonnullOwnPtrVector<Region> m_regions;
struct RegionLookupCache {
Optional<Range> range;
WeakPtr<Region> region;
};
RegionLookupCache m_region_lookup_cache;
bool m_enforces_syscall_regions { false };
};
}