diff --git a/Kernel/CMakeLists.txt b/Kernel/CMakeLists.txt index 23367237f6..a5e8762685 100644 --- a/Kernel/CMakeLists.txt +++ b/Kernel/CMakeLists.txt @@ -212,6 +212,7 @@ set(KERNEL_SOURCES VM/RangeAllocator.cpp VM/Region.cpp VM/SharedInodeVMObject.cpp + VM/Space.cpp VM/VMObject.cpp WaitQueue.cpp init.cpp diff --git a/Kernel/CoreDump.cpp b/Kernel/CoreDump.cpp index 2f3007e863..4c947f197f 100644 --- a/Kernel/CoreDump.cpp +++ b/Kernel/CoreDump.cpp @@ -59,7 +59,7 @@ OwnPtr CoreDump::create(NonnullRefPtr process, const String& CoreDump::CoreDump(NonnullRefPtr process, NonnullRefPtr&& fd) : m_process(move(process)) , m_fd(move(fd)) - , m_num_program_headers(m_process->m_regions.size() + 1) // +1 for NOTE segment + , m_num_program_headers(m_process->space().region_count() + 1) // +1 for NOTE segment { } @@ -137,7 +137,7 @@ KResult CoreDump::write_elf_header() KResult CoreDump::write_program_headers(size_t notes_size) { size_t offset = sizeof(Elf32_Ehdr) + m_num_program_headers * sizeof(Elf32_Phdr); - for (auto& region : m_process->m_regions) { + for (auto& region : m_process->space().regions()) { Elf32_Phdr phdr {}; phdr.p_type = PT_LOAD; @@ -178,7 +178,7 @@ KResult CoreDump::write_program_headers(size_t notes_size) KResult CoreDump::write_regions() { - for (auto& region : m_process->m_regions) { + for (auto& region : m_process->space().regions()) { if (region.is_kernel()) continue; @@ -258,13 +258,13 @@ ByteBuffer CoreDump::create_notes_threads_data() const ByteBuffer CoreDump::create_notes_regions_data() const { ByteBuffer regions_data; - for (size_t region_index = 0; region_index < m_process->m_regions.size(); ++region_index) { + for (size_t region_index = 0; region_index < m_process->space().region_count(); ++region_index) { ByteBuffer memory_region_info_buffer; ELF::Core::MemoryRegionInfo info {}; info.header.type = ELF::Core::NotesEntryHeader::Type::MemoryRegionInfo; - auto& region = m_process->m_regions[region_index]; + auto& region = m_process->space().regions()[region_index]; info.region_start = reinterpret_cast(region.vaddr().as_ptr()); info.region_end = reinterpret_cast(region.vaddr().as_ptr() + region.size()); info.program_header_index = region_index; @@ -316,7 +316,7 @@ ByteBuffer CoreDump::create_notes_segment_data() const KResult CoreDump::write() { - ScopedSpinLock lock(m_process->get_lock()); + ScopedSpinLock lock(m_process->space().get_lock()); ProcessPagingScope scope(m_process); ByteBuffer notes_segment = create_notes_segment_data(); diff --git a/Kernel/Devices/BXVGADevice.cpp b/Kernel/Devices/BXVGADevice.cpp index 349cbe1079..307f66d2d6 100644 --- a/Kernel/Devices/BXVGADevice.cpp +++ b/Kernel/Devices/BXVGADevice.cpp @@ -185,7 +185,7 @@ KResultOr BXVGADevice::mmap(Process& process, FileDescription&, const R auto vmobject = AnonymousVMObject::create_for_physical_range(m_framebuffer_address, framebuffer_size_in_bytes()); if (!vmobject) return ENOMEM; - return process.allocate_region_with_vmobject( + return process.space().allocate_region_with_vmobject( range, vmobject.release_nonnull(), 0, diff --git a/Kernel/Devices/MBVGADevice.cpp b/Kernel/Devices/MBVGADevice.cpp index 55e7d176e8..999108c554 100644 --- a/Kernel/Devices/MBVGADevice.cpp +++ b/Kernel/Devices/MBVGADevice.cpp @@ -64,7 +64,7 @@ KResultOr MBVGADevice::mmap(Process& process, FileDescription&, const R auto vmobject = AnonymousVMObject::create_for_physical_range(m_framebuffer_address, framebuffer_size_in_bytes()); if (!vmobject) return ENOMEM; - return process.allocate_region_with_vmobject( + return process.space().allocate_region_with_vmobject( range, vmobject.release_nonnull(), 0, diff --git a/Kernel/Devices/MemoryDevice.cpp b/Kernel/Devices/MemoryDevice.cpp index acbe53ae90..8763271ebc 100644 --- a/Kernel/Devices/MemoryDevice.cpp +++ b/Kernel/Devices/MemoryDevice.cpp @@ -66,7 +66,7 @@ KResultOr MemoryDevice::mmap(Process& process, FileDescription&, const if (!vmobject) return ENOMEM; dbgln("MemoryDevice: Mapped physical memory at {} for range of {} bytes", viewed_address, range.size()); - return process.allocate_region_with_vmobject( + return process.space().allocate_region_with_vmobject( range, vmobject.release_nonnull(), 0, diff --git a/Kernel/FileSystem/AnonymousFile.cpp b/Kernel/FileSystem/AnonymousFile.cpp index aede4e9474..82a5669f84 100644 --- a/Kernel/FileSystem/AnonymousFile.cpp +++ b/Kernel/FileSystem/AnonymousFile.cpp @@ -47,7 +47,7 @@ KResultOr AnonymousFile::mmap(Process& process, FileDescription&, const if (range.size() != m_vmobject->size()) return EINVAL; - return process.allocate_region_with_vmobject(range, m_vmobject, offset, {}, prot, shared); + return process.space().allocate_region_with_vmobject(range, m_vmobject, offset, {}, prot, shared); } } diff --git a/Kernel/FileSystem/InodeFile.cpp b/Kernel/FileSystem/InodeFile.cpp index fe52944006..755e9abbea 100644 --- a/Kernel/FileSystem/InodeFile.cpp +++ b/Kernel/FileSystem/InodeFile.cpp @@ -117,7 +117,7 @@ KResultOr InodeFile::mmap(Process& process, FileDescription& descriptio vmobject = PrivateInodeVMObject::create_with_inode(inode()); if (!vmobject) return ENOMEM; - return process.allocate_region_with_vmobject(range, vmobject.release_nonnull(), offset, description.absolute_path(), prot, shared); + return process.space().allocate_region_with_vmobject(range, vmobject.release_nonnull(), offset, description.absolute_path(), prot, shared); } String InodeFile::absolute_path(const FileDescription& description) const diff --git a/Kernel/FileSystem/ProcFS.cpp b/Kernel/FileSystem/ProcFS.cpp index bb83131cf1..8551c6fece 100644 --- a/Kernel/FileSystem/ProcFS.cpp +++ b/Kernel/FileSystem/ProcFS.cpp @@ -317,8 +317,8 @@ static bool procfs$pid_vm(InodeIdentifier identifier, KBufferBuilder& builder) return false; JsonArraySerializer array { builder }; { - ScopedSpinLock lock(process->get_lock()); - for (auto& region : process->regions()) { + ScopedSpinLock lock(process->space().get_lock()); + for (auto& region : process->space().regions()) { if (!region.is_user_accessible() && !Process::current()->is_superuser()) continue; auto region_object = array.add_object(); diff --git a/Kernel/Forward.h b/Kernel/Forward.h index d2e1c26c54..e5a3de0c27 100644 --- a/Kernel/Forward.h +++ b/Kernel/Forward.h @@ -62,6 +62,7 @@ class Region; class Scheduler; class SchedulerPerProcessorData; class Socket; +class Space; template class SpinLock; class RecursiveSpinLock; diff --git a/Kernel/PerformanceEventBuffer.cpp b/Kernel/PerformanceEventBuffer.cpp index 0aaa07c3fd..c0e6d2e06c 100644 --- a/Kernel/PerformanceEventBuffer.cpp +++ b/Kernel/PerformanceEventBuffer.cpp @@ -121,7 +121,7 @@ bool PerformanceEventBuffer::to_json(KBufferBuilder& builder, ProcessID pid, con { auto process = Process::from_pid(pid); ASSERT(process); - ScopedSpinLock locker(process->get_lock()); + ScopedSpinLock locker(process->space().get_lock()); JsonObjectSerializer object(builder); object.add("pid", pid.value()); @@ -129,7 +129,7 @@ bool PerformanceEventBuffer::to_json(KBufferBuilder& builder, ProcessID pid, con { auto region_array = object.add_array("regions"); - for (const auto& region : process->regions()) { + for (const auto& region : process->space().regions()) { auto region_object = region_array.add_object(); region_object.add("base", region.vaddr().get()); region_object.add("size", region.size()); diff --git a/Kernel/Process.cpp b/Kernel/Process.cpp index 462ae5bda9..7a39711418 100644 --- a/Kernel/Process.cpp +++ b/Kernel/Process.cpp @@ -116,110 +116,6 @@ bool Process::in_group(gid_t gid) const return m_gid == gid || m_extra_gids.contains_slow(gid); } -Optional Process::allocate_range(VirtualAddress vaddr, size_t size, size_t alignment) -{ - vaddr.mask(PAGE_MASK); - size = PAGE_ROUND_UP(size); - if (vaddr.is_null()) - return page_directory().range_allocator().allocate_anywhere(size, alignment); - return page_directory().range_allocator().allocate_specific(vaddr, size); -} - -Region& Process::allocate_split_region(const Region& source_region, const Range& range, size_t offset_in_vmobject) -{ - auto& region = add_region( - Region::create_user_accessible(this, range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access(), source_region.is_cacheable(), source_region.is_shared())); - region.set_syscall_region(source_region.is_syscall_region()); - region.set_mmap(source_region.is_mmap()); - region.set_stack(source_region.is_stack()); - size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE; - for (size_t i = 0; i < region.page_count(); ++i) { - if (source_region.should_cow(page_offset_in_source_region + i)) - region.set_should_cow(i, true); - } - return region; -} - -KResultOr Process::allocate_region(const Range& range, const String& name, int prot, AllocationStrategy strategy) -{ - ASSERT(range.is_valid()); - auto vmobject = AnonymousVMObject::create_with_size(range.size(), strategy); - if (!vmobject) - return ENOMEM; - auto region = Region::create_user_accessible(this, range, vmobject.release_nonnull(), 0, name, prot_to_region_access_flags(prot), true, false); - if (!region->map(page_directory())) - return ENOMEM; - return &add_region(move(region)); -} - -KResultOr Process::allocate_region_with_vmobject(const Range& range, NonnullRefPtr vmobject, size_t offset_in_vmobject, const String& name, int prot, bool shared) -{ - ASSERT(range.is_valid()); - size_t end_in_vmobject = offset_in_vmobject + range.size(); - if (end_in_vmobject <= offset_in_vmobject) { - dbgln("allocate_region_with_vmobject: Overflow (offset + size)"); - return EINVAL; - } - if (offset_in_vmobject >= vmobject->size()) { - dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject."); - return EINVAL; - } - if (end_in_vmobject > vmobject->size()) { - dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject."); - return EINVAL; - } - offset_in_vmobject &= PAGE_MASK; - auto& region = add_region(Region::create_user_accessible(this, range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot), true, shared)); - if (!region.map(page_directory())) { - // FIXME: What is an appropriate error code here, really? - return ENOMEM; - } - return ®ion; -} - -bool Process::deallocate_region(Region& region) -{ - OwnPtr region_protector; - ScopedSpinLock lock(m_lock); - - if (m_region_lookup_cache.region.unsafe_ptr() == ®ion) - m_region_lookup_cache.region = nullptr; - for (size_t i = 0; i < m_regions.size(); ++i) { - if (&m_regions[i] == ®ion) { - region_protector = m_regions.unstable_take(i); - return true; - } - } - return false; -} - -Region* Process::find_region_from_range(const Range& range) -{ - ScopedSpinLock lock(m_lock); - if (m_region_lookup_cache.range.has_value() && m_region_lookup_cache.range.value() == range && m_region_lookup_cache.region) - return m_region_lookup_cache.region.unsafe_ptr(); - - size_t size = PAGE_ROUND_UP(range.size()); - for (auto& region : m_regions) { - if (region.vaddr() == range.base() && region.size() == size) { - m_region_lookup_cache.range = range; - m_region_lookup_cache.region = region; - return ®ion; - } - } - return nullptr; -} - -Region* Process::find_region_containing(const Range& range) -{ - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { - if (region.contains(range)) - return ®ion; - } - return nullptr; -} - void Process::kill_threads_except_self() { InterruptDisabler disabler; @@ -339,7 +235,7 @@ Process::Process(RefPtr& first_thread, const String& name, uid_t uid, gi { dbgln_if(PROCESS_DEBUG, "Created new process {}({})", m_name, m_pid.value()); - m_page_directory = PageDirectory::create_for_userspace(*this, fork_parent ? &fork_parent->page_directory().range_allocator() : nullptr); + m_space = Space::create(*this, fork_parent ? &fork_parent->space() : nullptr); if (fork_parent) { // NOTE: fork() doesn't clone all threads; the thread that called fork() becomes the only thread in the new process. @@ -365,28 +261,6 @@ Process::~Process() } } -void Process::dump_regions() -{ - klog() << "Process regions:"; - klog() << "BEGIN END SIZE ACCESS NAME"; - - ScopedSpinLock lock(m_lock); - - Vector sorted_regions; - sorted_regions.ensure_capacity(m_regions.size()); - for (auto& region : m_regions) - sorted_regions.append(®ion); - quick_sort(sorted_regions, [](auto& a, auto& b) { - return a->vaddr() < b->vaddr(); - }); - - for (auto& sorted_region : sorted_regions) { - auto& region = *sorted_region; - klog() << String::format("%08x", region.vaddr().get()) << " -- " << String::format("%08x", region.vaddr().offset(region.size() - 1).get()) << " " << String::format("%08zx", region.size()) << " " << (region.is_readable() ? 'R' : ' ') << (region.is_writable() ? 'W' : ' ') << (region.is_executable() ? 'X' : ' ') << (region.is_shared() ? 'S' : ' ') << (region.is_stack() ? 'T' : ' ') << (region.vmobject().is_anonymous() ? 'A' : ' ') << " " << region.name().characters(); - } - MM.dump_kernel_regions(); -} - // Make sure the compiler doesn't "optimize away" this function: extern void signal_trampoline_dummy(); void signal_trampoline_dummy() @@ -457,7 +331,7 @@ void Process::crash(int signal, u32 eip, bool out_of_memory) } m_termination_signal = signal; set_dump_core(!out_of_memory); - dump_regions(); + space().dump_regions(); ASSERT(is_user_process()); die(); // We can not return from here, as there is nowhere @@ -643,10 +517,7 @@ void Process::finalize() unblock_waiters(Thread::WaitBlocker::UnblockFlags::Terminated); - { - ScopedSpinLock lock(m_lock); - m_regions.clear(); - } + m_space->remove_all_regions({}); ASSERT(ref_count() > 0); // WaitBlockCondition::finalize will be in charge of dropping the last @@ -689,8 +560,8 @@ size_t Process::amount_dirty_private() const // The main issue I'm thinking of is when the VMObject has physical pages that none of the Regions are mapping. // That's probably a situation that needs to be looked at in general. size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (!region.is_shared()) amount += region.amount_dirty(); } @@ -701,8 +572,8 @@ size_t Process::amount_clean_inode() const { HashTable vmobjects; { - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (region.vmobject().is_inode()) vmobjects.set(&static_cast(region.vmobject())); } @@ -716,8 +587,8 @@ size_t Process::amount_clean_inode() const size_t Process::amount_virtual() const { size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { amount += region.size(); } return amount; @@ -727,8 +598,8 @@ size_t Process::amount_resident() const { // FIXME: This will double count if multiple regions use the same physical page. size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { amount += region.amount_resident(); } return amount; @@ -741,8 +612,8 @@ size_t Process::amount_shared() const // and each PhysicalPage is only reffed by its VMObject. This needs to be refactored // so that every Region contributes +1 ref to each of its PhysicalPages. size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { amount += region.amount_shared(); } return amount; @@ -751,8 +622,8 @@ size_t Process::amount_shared() const size_t Process::amount_purgeable_volatile() const { size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (region.vmobject().is_anonymous() && static_cast(region.vmobject()).is_any_volatile()) amount += region.amount_resident(); } @@ -762,8 +633,8 @@ size_t Process::amount_purgeable_volatile() const size_t Process::amount_purgeable_nonvolatile() const { size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (region.vmobject().is_anonymous() && !static_cast(region.vmobject()).is_any_volatile()) amount += region.amount_resident(); } @@ -858,14 +729,6 @@ void Process::set_root_directory(const Custody& root) m_root_directory = root; } -Region& Process::add_region(NonnullOwnPtr region) -{ - auto* ptr = region.ptr(); - ScopedSpinLock lock(m_lock); - m_regions.append(move(region)); - return *ptr; -} - void Process::set_tty(TTY* tty) { m_tty = tty; diff --git a/Kernel/Process.h b/Kernel/Process.h index 9fdd863efd..427dbb660d 100644 --- a/Kernel/Process.h +++ b/Kernel/Process.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, Andreas Kling + * Copyright (c) 2018-2021, Andreas Kling * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -99,6 +100,8 @@ enum class VeilState { typedef HashMap> FutexQueues; +struct LoadResult; + class Process : public RefCounted , public InlineLinkedListNode @@ -164,9 +167,6 @@ public: bool is_kernel_process() const { return m_is_kernel_process; } bool is_user_process() const { return !m_is_kernel_process; } - PageDirectory& page_directory() { return *m_page_directory; } - const PageDirectory& page_directory() const { return *m_page_directory; } - static RefPtr from_pid(ProcessID); static SessionID get_sid_from_pgid(ProcessGroupID pgid); @@ -188,8 +188,6 @@ public: bool is_dumpable() const { return m_dumpable; } void set_dumpable(bool dumpable) { m_dumpable = dumpable; } - ThreadID exec_tid() const { return m_exec_tid; } - mode_t umask() const { return m_umask; } bool in_group(gid_t) const; @@ -209,8 +207,6 @@ public: void die(); void finalize(); - ALWAYS_INLINE SpinLock& get_lock() const { return m_lock; } - ThreadTracer* tracer() { return m_tracer.ptr(); } bool is_traced() const { return !!m_tracer; } void start_tracing_from(ProcessID tracer); @@ -373,14 +369,6 @@ public: const TTY* tty() const { return m_tty; } void set_tty(TTY*); - size_t region_count() const { return m_regions.size(); } - const NonnullOwnPtrVector& regions() const - { - ASSERT(m_lock.is_locked()); - return m_regions; - } - void dump_regions(); - u32 m_ticks_in_user { 0 }; u32 m_ticks_in_kernel { 0 }; @@ -410,25 +398,12 @@ public: int exec(String path, Vector arguments, Vector environment, int recusion_depth = 0); - struct LoadResult { - FlatPtr load_base { 0 }; - FlatPtr entry_eip { 0 }; - size_t size { 0 }; - FlatPtr program_headers { 0 }; - size_t num_program_headers { 0 }; - WeakPtr tls_region; - size_t tls_size { 0 }; - size_t tls_alignment { 0 }; - WeakPtr stack_region; - }; - enum class ShouldAllocateTls { No = 0, Yes, }; KResultOr load(NonnullRefPtr main_program_description, RefPtr interpreter_description, const Elf32_Ehdr& main_program_header); - KResultOr load_elf_object(FileDescription& object_description, FlatPtr load_offset, ShouldAllocateTls); KResultOr get_interpreter_load_offset(const Elf32_Ehdr& main_program_header, FileDescription& main_program_description, FileDescription& interpreter_description); bool is_superuser() const @@ -436,13 +411,6 @@ public: return m_euid == 0; } - KResultOr allocate_region_with_vmobject(const Range&, NonnullRefPtr, size_t offset_in_vmobject, const String& name, int prot, bool shared); - KResultOr allocate_region(const Range&, const String& name, int prot = PROT_READ | PROT_WRITE, AllocationStrategy strategy = AllocationStrategy::Reserve); - bool deallocate_region(Region& region); - - Region& allocate_split_region(const Region& source_region, const Range&, size_t offset_in_vmobject); - Vector split_region_around_range(const Region& source_region, const Range&); - void terminate_due_to_signal(u8 signal); KResult send_signal(u8 signal, Process* sender); @@ -503,7 +471,8 @@ public: PerformanceEventBuffer* perf_events() { return m_perf_event_buffer; } - bool enforces_syscall_regions() const { return m_enforces_syscall_regions; } + Space& space() { return *m_space; } + const Space& space() const { return *m_space; } private: friend class MemoryManager; @@ -518,10 +487,6 @@ private: Process(RefPtr& first_thread, const String& name, uid_t, gid_t, ProcessID ppid, bool is_kernel_process, RefPtr cwd = nullptr, RefPtr executable = nullptr, TTY* = nullptr, Process* fork_parent = nullptr); static ProcessID allocate_pid(); - Optional allocate_range(VirtualAddress, size_t, size_t alignment = PAGE_SIZE); - - Region& add_region(NonnullOwnPtr); - void kill_threads_except_self(); void kill_all_threads(); bool dump_core(); @@ -552,13 +517,13 @@ private: void clear_futex_queues_on_exec(); - RefPtr m_page_directory; - Process* m_prev { nullptr }; Process* m_next { nullptr }; String m_name; + OwnPtr m_space; + ProcessID m_pid { 0 }; SessionID m_sid { 0 }; RefPtr m_pg; @@ -570,8 +535,6 @@ private: uid_t m_suid { 0 }; gid_t m_sgid { 0 }; - ThreadID m_exec_tid { 0 }; - OwnPtr m_tracer; static const int m_max_open_file_descriptors { FD_SETSIZE }; @@ -617,16 +580,6 @@ private: RefPtr m_tty; - Region* find_region_from_range(const Range&); - Region* find_region_containing(const Range&); - - NonnullOwnPtrVector m_regions; - struct RegionLookupCache { - Optional range; - WeakPtr region; - }; - RegionLookupCache m_region_lookup_cache; - ProcessID m_ppid { 0 }; mode_t m_umask { 022 }; @@ -639,12 +592,9 @@ private: size_t m_master_tls_alignment { 0 }; Lock m_big_lock { "Process" }; - mutable SpinLock m_lock; RefPtr m_alarm_timer; - bool m_enforces_syscall_regions { false }; - bool m_has_promises { false }; u32 m_promises { 0 }; bool m_has_execpromises { false }; diff --git a/Kernel/Syscall.cpp b/Kernel/Syscall.cpp index 8eb79eca52..62652008a5 100644 --- a/Kernel/Syscall.cpp +++ b/Kernel/Syscall.cpp @@ -176,7 +176,7 @@ void syscall_handler(TrapFrame* trap) ASSERT_NOT_REACHED(); } - auto* calling_region = MM.find_region_from_vaddr(process, VirtualAddress(regs.eip)); + auto* calling_region = MM.find_region_from_vaddr(process.space(), VirtualAddress(regs.eip)); if (!calling_region) { dbgln("Syscall from {:p} which has no associated region", regs.eip); handle_crash(regs, "Syscall from unknown region", SIGSEGV); @@ -189,7 +189,7 @@ void syscall_handler(TrapFrame* trap) ASSERT_NOT_REACHED(); } - if (process.enforces_syscall_regions() && !calling_region->is_syscall_region()) { + if (process.space().enforces_syscall_regions() && !calling_region->is_syscall_region()) { dbgln("Syscall from non-syscall region"); handle_crash(regs, "Syscall from non-syscall region", SIGSEGV); ASSERT_NOT_REACHED(); diff --git a/Kernel/Syscalls/execve.cpp b/Kernel/Syscalls/execve.cpp index e102f56970..a06969f905 100644 --- a/Kernel/Syscalls/execve.cpp +++ b/Kernel/Syscalls/execve.cpp @@ -47,6 +47,19 @@ namespace Kernel { +struct LoadResult { + OwnPtr space; + FlatPtr load_base { 0 }; + FlatPtr entry_eip { 0 }; + size_t size { 0 }; + FlatPtr program_headers { 0 }; + size_t num_program_headers { 0 }; + WeakPtr tls_region; + size_t tls_size { 0 }; + size_t tls_alignment { 0 }; + WeakPtr stack_region; +}; + static Vector generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, uid_t uid, uid_t euid, gid_t gid, gid_t egid, String executable_path, int main_program_fd); static bool validate_stack_size(const Vector& arguments, const Vector& environment) @@ -142,7 +155,7 @@ static KResultOr make_userspace_stack_for_main_thread(Region& region, V return new_esp; } -KResultOr Process::load_elf_object(FileDescription& object_description, FlatPtr load_offset, ShouldAllocateTls should_allocate_tls) +static KResultOr load_elf_object(NonnullOwnPtr new_space, FileDescription& object_description, FlatPtr load_offset, Process::ShouldAllocateTls should_allocate_tls) { auto& inode = *(object_description.inode()); auto vmobject = SharedInodeVMObject::create_with_inode(inode); @@ -172,10 +185,12 @@ KResultOr Process::load_elf_object(FileDescription& object_ String elf_name = object_description.absolute_path(); ASSERT(!Processor::current().in_critical()); + MemoryManager::enter_space(*new_space); + KResult ph_load_result = KSuccess; elf_image.for_each_program_header([&](const ELF::Image::ProgramHeader& program_header) { if (program_header.type() == PT_TLS) { - ASSERT(should_allocate_tls == ShouldAllocateTls::Yes); + ASSERT(should_allocate_tls == Process::ShouldAllocateTls::Yes); ASSERT(program_header.size_in_memory()); if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) { @@ -184,13 +199,13 @@ KResultOr Process::load_elf_object(FileDescription& object_ return IterationDecision::Break; } - auto range = allocate_range({}, program_header.size_in_memory()); + auto range = new_space->allocate_range({}, program_header.size_in_memory()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } - auto region_or_error = allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); + auto region_or_error = new_space->allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; @@ -225,12 +240,12 @@ KResultOr Process::load_elf_object(FileDescription& object_ if (program_header.is_writable()) prot |= PROT_WRITE; auto region_name = String::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : ""); - auto range = allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); + auto range = new_space->allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } - auto region_or_error = allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve); + auto region_or_error = new_space->allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; @@ -262,12 +277,12 @@ KResultOr Process::load_elf_object(FileDescription& object_ prot |= PROT_WRITE; if (program_header.is_executable()) prot |= PROT_EXEC; - auto range = allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); + auto range = new_space->allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } - auto region_or_error = allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true); + auto region_or_error = new_space->allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; @@ -287,19 +302,20 @@ KResultOr Process::load_elf_object(FileDescription& object_ return ENOEXEC; } - auto stack_range = allocate_range({}, Thread::default_userspace_stack_size); + auto stack_range = new_space->allocate_range({}, Thread::default_userspace_stack_size); if (!stack_range.has_value()) { dbgln("do_exec: Failed to allocate VM range for stack"); return ENOMEM; } - auto stack_region_or_error = allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); + auto stack_region_or_error = new_space->allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); if (stack_region_or_error.is_error()) return stack_region_or_error.error(); auto& stack_region = *stack_region_or_error.value(); stack_region.set_stack(true); return LoadResult { + move(new_space), load_base_address, elf_image.entry().offset(load_offset).get(), executable_size, @@ -312,44 +328,20 @@ KResultOr Process::load_elf_object(FileDescription& object_ }; } -KResultOr Process::load(NonnullRefPtr main_program_description, RefPtr interpreter_description, const Elf32_Ehdr& main_program_header) +KResultOr Process::load(NonnullRefPtr main_program_description, RefPtr interpreter_description, const Elf32_Ehdr& main_program_header) { - RefPtr old_page_directory; - NonnullOwnPtrVector old_regions; + auto new_space = Space::create(*this, nullptr); + if (!new_space) + return ENOMEM; - { - auto page_directory = PageDirectory::create_for_userspace(*this); - if (!page_directory) - return ENOMEM; - - // Need to make sure we don't swap contexts in the middle - ScopedCritical critical; - old_page_directory = move(m_page_directory); - old_regions = move(m_regions); - m_page_directory = page_directory.release_nonnull(); - MM.enter_process_paging_scope(*this); - } - - ArmedScopeGuard rollback_regions_guard([&]() { - ASSERT(Process::current() == this); - // Need to make sure we don't swap contexts in the middle - ScopedCritical critical; - // Explicitly clear m_regions *before* restoring the page directory, - // otherwise we may silently corrupt memory! - m_regions.clear(); - // Now that we freed the regions, revert to the original page directory - // and restore the original regions - m_page_directory = move(old_page_directory); - MM.enter_process_paging_scope(*this); - m_regions = move(old_regions); + ScopeGuard space_guard([&]() { + MemoryManager::enter_process_paging_scope(*this); }); if (interpreter_description.is_null()) { - auto result = load_elf_object(main_program_description, FlatPtr { 0 }, ShouldAllocateTls::Yes); + auto result = load_elf_object(new_space.release_nonnull(), main_program_description, FlatPtr { 0 }, ShouldAllocateTls::Yes); if (result.is_error()) return result.error(); - - rollback_regions_guard.disarm(); return result; } @@ -358,7 +350,7 @@ KResultOr Process::load(NonnullRefPtr main return interpreter_load_offset.error(); } - auto interpreter_load_result = load_elf_object(*interpreter_description, interpreter_load_offset.value(), ShouldAllocateTls::No); + auto interpreter_load_result = load_elf_object(new_space.release_nonnull(), *interpreter_description, interpreter_load_offset.value(), ShouldAllocateTls::No); if (interpreter_load_result.is_error()) return interpreter_load_result.error(); @@ -368,7 +360,6 @@ KResultOr Process::load(NonnullRefPtr main ASSERT(!interpreter_load_result.value().tls_alignment); ASSERT(!interpreter_load_result.value().tls_size); - rollback_regions_guard.disarm(); return interpreter_load_result; } @@ -481,34 +472,22 @@ int Process::do_exec(NonnullRefPtr main_program_description, Ve if (parts.is_empty()) return -ENOENT; + auto main_program_metadata = main_program_description->metadata(); + + auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header); + if (load_result_or_error.is_error()) { + dbgln("do_exec({}): Failed to load main program or interpreter", path); + return load_result_or_error.error(); + } + + // We commit to the new executable at this point. There is no turning back! + // Disable profiling temporarily in case it's running on this process. TemporaryChange profiling_disabler(m_profiling, false); - // Mark this thread as the current thread that does exec - // No other thread from this process will be scheduled to run - auto current_thread = Thread::current(); - m_exec_tid = current_thread->tid(); - - // NOTE: We switch credentials before altering the memory layout of the process. - // This ensures that ptrace access control takes the right credentials into account. - - // FIXME: This still feels rickety. Perhaps it would be better to simply block ptrace - // clients until we're ready to be traced? Or reject them with EPERM? - - auto main_program_metadata = main_program_description->metadata(); - - auto old_euid = m_euid; - auto old_suid = m_suid; - auto old_egid = m_egid; - auto old_sgid = m_sgid; - - ArmedScopeGuard cred_restore_guard = [&] { - m_euid = old_euid; - m_suid = old_suid; - m_egid = old_egid; - m_sgid = old_sgid; - }; + kill_threads_except_self(); + auto& load_result = load_result_or_error.value(); bool executable_is_setid = false; if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) { @@ -522,17 +501,8 @@ int Process::do_exec(NonnullRefPtr main_program_description, Ve } } - auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header); - if (load_result_or_error.is_error()) { - dbgln("do_exec({}): Failed to load main program or interpreter", path); - return load_result_or_error.error(); - } - auto& load_result = load_result_or_error.value(); - - // We can commit to the new credentials at this point. - cred_restore_guard.disarm(); - - kill_threads_except_self(); + m_space = load_result.space.release_nonnull(); + MemoryManager::enter_space(*m_space); #if EXEC_DEBUG dbgln("Memory layout after ELF load:"); @@ -549,20 +519,17 @@ int Process::do_exec(NonnullRefPtr main_program_description, Ve m_execpromises = 0; m_has_execpromises = false; - m_enforces_syscall_regions = false; - m_veil_state = VeilState::None; m_unveiled_paths.clear(); m_coredump_metadata.clear(); + auto current_thread = Thread::current(); current_thread->set_default_signal_dispositions(); current_thread->clear_signals(); clear_futex_queues_on_exec(); - m_region_lookup_cache = {}; - set_dumpable(!executable_is_setid); for (size_t i = 0; i < m_fds.size(); ++i) { @@ -616,8 +583,10 @@ int Process::do_exec(NonnullRefPtr main_program_description, Ve // FIXME: PID/TID ISSUE m_pid = new_main_thread->tid().value(); auto tsr_result = new_main_thread->make_thread_specific_region({}); - if (tsr_result.is_error()) - return tsr_result.error(); + if (tsr_result.is_error()) { + // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable. + ASSERT_NOT_REACHED(); + } new_main_thread->reset_fpu_state(); auto& tss = new_main_thread->m_tss; @@ -629,7 +598,7 @@ int Process::do_exec(NonnullRefPtr main_program_description, Ve tss.gs = GDT_SELECTOR_TLS | 3; tss.eip = load_result.entry_eip; tss.esp = new_userspace_esp; - tss.cr3 = m_page_directory->cr3(); + tss.cr3 = space().page_directory().cr3(); tss.ss2 = m_pid.value(); // Throw away any recorded performance events in this process. @@ -870,8 +839,6 @@ int Process::exec(String path, Vector arguments, Vector environm u32 prev_flags = 0; int rc = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags, *main_program_header); - m_exec_tid = 0; - if (rc < 0) return rc; diff --git a/Kernel/Syscalls/fork.cpp b/Kernel/Syscalls/fork.cpp index 62287358c8..c5b4a35bc8 100644 --- a/Kernel/Syscalls/fork.cpp +++ b/Kernel/Syscalls/fork.cpp @@ -47,15 +47,14 @@ pid_t Process::sys$fork(RegisterState& regs) child->m_has_execpromises = m_has_execpromises; child->m_veil_state = m_veil_state; child->m_unveiled_paths = m_unveiled_paths.deep_copy(); - child->m_enforces_syscall_regions = m_enforces_syscall_regions; child->m_fds = m_fds; child->m_sid = m_sid; child->m_pg = m_pg; child->m_umask = m_umask; + child->m_extra_gids = m_extra_gids; dbgln_if(FORK_DEBUG, "fork: child={}", child); - - child->m_extra_gids = m_extra_gids; + child->space().set_enforces_syscall_regions(space().enforces_syscall_regions()); auto& child_tss = child_first_thread->m_tss; child_tss.eax = 0; // fork() returns 0 in the child :^) @@ -80,8 +79,8 @@ pid_t Process::sys$fork(RegisterState& regs) #endif { - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { dbgln_if(FORK_DEBUG, "fork: cloning Region({}) '{}' @ {}", ®ion, region.name(), region.vaddr()); auto region_clone = region.clone(*child); if (!region_clone) { @@ -90,8 +89,8 @@ pid_t Process::sys$fork(RegisterState& regs) return -ENOMEM; } - auto& child_region = child->add_region(region_clone.release_nonnull()); - child_region.map(child->page_directory()); + auto& child_region = child->space().add_region(region_clone.release_nonnull()); + child_region.map(child->space().page_directory()); if (®ion == m_master_tls_region.unsafe_ptr()) child->m_master_tls_region = child_region; diff --git a/Kernel/Syscalls/futex.cpp b/Kernel/Syscalls/futex.cpp index 27763ea965..bee8bf9bef 100644 --- a/Kernel/Syscalls/futex.cpp +++ b/Kernel/Syscalls/futex.cpp @@ -147,7 +147,7 @@ int Process::sys$futex(Userspace user_params) if (!is_private) { if (!Kernel::is_user_range(VirtualAddress(user_address_or_offset), sizeof(u32))) return -EFAULT; - auto region = MM.find_region_from_vaddr(*Process::current(), VirtualAddress(user_address_or_offset)); + auto region = MM.find_region_from_vaddr(space(), VirtualAddress(user_address_or_offset)); if (!region) return -EFAULT; vmobject = region->vmobject(); @@ -159,7 +159,7 @@ int Process::sys$futex(Userspace user_params) case FUTEX_WAKE_OP: { if (!Kernel::is_user_range(VirtualAddress(user_address_or_offset2), sizeof(u32))) return -EFAULT; - auto region2 = MM.find_region_from_vaddr(*Process::current(), VirtualAddress(user_address_or_offset2)); + auto region2 = MM.find_region_from_vaddr(space(), VirtualAddress(user_address_or_offset2)); if (!region2) return -EFAULT; vmobject2 = region2->vmobject(); diff --git a/Kernel/Syscalls/get_stack_bounds.cpp b/Kernel/Syscalls/get_stack_bounds.cpp index a91a401946..a1b115ec3c 100644 --- a/Kernel/Syscalls/get_stack_bounds.cpp +++ b/Kernel/Syscalls/get_stack_bounds.cpp @@ -32,7 +32,7 @@ namespace Kernel { int Process::sys$get_stack_bounds(FlatPtr* user_stack_base, size_t* user_stack_size) { FlatPtr stack_pointer = Thread::current()->get_register_dump_from_stack().userspace_esp; - auto* stack_region = MM.find_region_from_vaddr(*this, VirtualAddress(stack_pointer)); + auto* stack_region = MM.find_region_from_vaddr(space(), VirtualAddress(stack_pointer)); if (!stack_region) { ASSERT_NOT_REACHED(); return -EINVAL; diff --git a/Kernel/Syscalls/mmap.cpp b/Kernel/Syscalls/mmap.cpp index eaf0f5c1b5..4a6bb96662 100644 --- a/Kernel/Syscalls/mmap.cpp +++ b/Kernel/Syscalls/mmap.cpp @@ -204,13 +204,13 @@ void* Process::sys$mmap(Userspace user_params) Optional range; if (map_randomized) { - range = page_directory().range_allocator().allocate_randomized(PAGE_ROUND_UP(size), alignment); + range = space().page_directory().range_allocator().allocate_randomized(PAGE_ROUND_UP(size), alignment); } else { - range = allocate_range(VirtualAddress(addr), size, alignment); + range = space().allocate_range(VirtualAddress(addr), size, alignment); if (!range.has_value()) { if (addr && !map_fixed) { // If there's an address but MAP_FIXED wasn't specified, the address is just a hint. - range = allocate_range({}, size, alignment); + range = space().allocate_range({}, size, alignment); } } } @@ -220,7 +220,7 @@ void* Process::sys$mmap(Userspace user_params) if (map_anonymous) { auto strategy = map_noreserve ? AllocationStrategy::None : AllocationStrategy::Reserve; - auto region_or_error = allocate_region(range.value(), !name.is_null() ? name : "mmap", prot, strategy); + auto region_or_error = space().allocate_region(range.value(), !name.is_null() ? name : "mmap", prot, strategy); if (region_or_error.is_error()) return (void*)region_or_error.error().error(); region = region_or_error.value(); @@ -280,7 +280,7 @@ int Process::sys$mprotect(void* addr, size_t size, int prot) Range range_to_mprotect = { VirtualAddress(addr), size }; - if (auto* whole_region = find_region_from_range(range_to_mprotect)) { + if (auto* whole_region = space().find_region_from_range(range_to_mprotect)) { if (!whole_region->is_mmap()) return -EPERM; if (!validate_mmap_prot(prot, whole_region->is_stack(), whole_region->vmobject().is_anonymous(), whole_region)) @@ -300,7 +300,7 @@ int Process::sys$mprotect(void* addr, size_t size, int prot) } // Check if we can carve out the desired range from an existing region - if (auto* old_region = find_region_containing(range_to_mprotect)) { + if (auto* old_region = space().find_region_containing(range_to_mprotect)) { if (!old_region->is_mmap()) return -EPERM; if (!validate_mmap_prot(prot, old_region->is_stack(), old_region->vmobject().is_anonymous(), old_region)) @@ -314,23 +314,23 @@ int Process::sys$mprotect(void* addr, size_t size, int prot) // This vector is the region(s) adjacent to our range. // We need to allocate a new region for the range we wanted to change permission bits on. - auto adjacent_regions = split_region_around_range(*old_region, range_to_mprotect); + auto adjacent_regions = space().split_region_around_range(*old_region, range_to_mprotect); size_t new_range_offset_in_vmobject = old_region->offset_in_vmobject() + (range_to_mprotect.base().get() - old_region->range().base().get()); - auto& new_region = allocate_split_region(*old_region, range_to_mprotect, new_range_offset_in_vmobject); + auto& new_region = space().allocate_split_region(*old_region, range_to_mprotect, new_range_offset_in_vmobject); new_region.set_readable(prot & PROT_READ); new_region.set_writable(prot & PROT_WRITE); new_region.set_executable(prot & PROT_EXEC); // Unmap the old region here, specifying that we *don't* want the VM deallocated. old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); - deallocate_region(*old_region); + space().deallocate_region(*old_region); // Map the new regions using our page directory (they were just allocated and don't have one). for (auto* adjacent_region : adjacent_regions) { - adjacent_region->map(page_directory()); + adjacent_region->map(space().page_directory()); } - new_region.map(page_directory()); + new_region.map(space().page_directory()); return 0; } @@ -349,7 +349,7 @@ int Process::sys$madvise(void* address, size_t size, int advice) if (!is_user_range(VirtualAddress(address), size)) return -EFAULT; - auto* region = find_region_from_range({ VirtualAddress(address), size }); + auto* region = space().find_region_from_range({ VirtualAddress(address), size }); if (!region) return -EINVAL; if (!region->is_mmap()) @@ -397,7 +397,7 @@ int Process::sys$set_mmap_name(Userspaceis_mmap()) @@ -406,24 +406,6 @@ int Process::sys$set_mmap_name(Userspace Process::split_region_around_range(const Region& source_region, const Range& desired_range) -{ - Range old_region_range = source_region.range(); - auto remaining_ranges_after_unmap = old_region_range.carve(desired_range); - - ASSERT(!remaining_ranges_after_unmap.is_empty()); - auto make_replacement_region = [&](const Range& new_range) -> Region& { - ASSERT(old_region_range.contains(new_range)); - size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get()); - return allocate_split_region(source_region, new_range, new_range_offset_in_vmobject); - }; - Vector new_regions; - for (auto& new_range : remaining_ranges_after_unmap) { - new_regions.unchecked_append(&make_replacement_region(new_range)); - } - return new_regions; -} int Process::sys$munmap(void* addr, size_t size) { REQUIRE_PROMISE(stdio); @@ -435,30 +417,30 @@ int Process::sys$munmap(void* addr, size_t size) return -EFAULT; Range range_to_unmap { VirtualAddress(addr), size }; - if (auto* whole_region = find_region_from_range(range_to_unmap)) { + if (auto* whole_region = space().find_region_from_range(range_to_unmap)) { if (!whole_region->is_mmap()) return -EPERM; - bool success = deallocate_region(*whole_region); + bool success = space().deallocate_region(*whole_region); ASSERT(success); return 0; } - if (auto* old_region = find_region_containing(range_to_unmap)) { + if (auto* old_region = space().find_region_containing(range_to_unmap)) { if (!old_region->is_mmap()) return -EPERM; - auto new_regions = split_region_around_range(*old_region, range_to_unmap); + auto new_regions = space().split_region_around_range(*old_region, range_to_unmap); // We manually unmap the old region here, specifying that we *don't* want the VM deallocated. old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); - deallocate_region(*old_region); + space().deallocate_region(*old_region); // Instead we give back the unwanted VM manually. - page_directory().range_allocator().deallocate(range_to_unmap); + space().page_directory().range_allocator().deallocate(range_to_unmap); // And finally we map the new region(s) using our page directory (they were just allocated and don't have one). for (auto* new_region : new_regions) { - new_region->map(page_directory()); + new_region->map(space().page_directory()); } return 0; } @@ -476,7 +458,7 @@ void* Process::sys$mremap(Userspace user_param if (!copy_from_user(¶ms, user_params)) return (void*)-EFAULT; - auto* old_region = find_region_from_range(Range { VirtualAddress(params.old_address), params.old_size }); + auto* old_region = space().find_region_from_range(Range { VirtualAddress(params.old_address), params.old_size }); if (!old_region) return (void*)-EINVAL; @@ -491,11 +473,11 @@ void* Process::sys$mremap(Userspace user_param // Unmap without deallocating the VM range since we're going to reuse it. old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); - deallocate_region(*old_region); + space().deallocate_region(*old_region); auto new_vmobject = PrivateInodeVMObject::create_with_inode(inode); - auto new_region_or_error = allocate_region_with_vmobject(range, new_vmobject, 0, old_name, old_prot, false); + auto new_region_or_error = space().allocate_region_with_vmobject(range, new_vmobject, 0, old_name, old_prot, false); if (new_region_or_error.is_error()) return (void*)new_region_or_error.error().error(); auto& new_region = *new_region_or_error.value(); @@ -527,11 +509,11 @@ void* Process::sys$allocate_tls(size_t size) }); ASSERT(main_thread); - auto range = allocate_range({}, size); + auto range = space().allocate_range({}, size); if (!range.has_value()) return (void*)-ENOMEM; - auto region_or_error = allocate_region(range.value(), String(), PROT_READ | PROT_WRITE); + auto region_or_error = space().allocate_region(range.value(), String(), PROT_READ | PROT_WRITE); if (region_or_error.is_error()) return (void*)region_or_error.error().error(); @@ -552,15 +534,15 @@ void* Process::sys$allocate_tls(size_t size) int Process::sys$msyscall(void* address) { - if (m_enforces_syscall_regions) + if (space().enforces_syscall_regions()) return -EPERM; if (!address) { - m_enforces_syscall_regions = true; + space().set_enforces_syscall_regions(true); return 0; } - auto* region = find_region_containing(Range { VirtualAddress { address }, 1 }); + auto* region = space().find_region_containing(Range { VirtualAddress { address }, 1 }); if (!region) return -EINVAL; diff --git a/Kernel/Syscalls/ptrace.cpp b/Kernel/Syscalls/ptrace.cpp index 26e91fcaae..22cb45ec49 100644 --- a/Kernel/Syscalls/ptrace.cpp +++ b/Kernel/Syscalls/ptrace.cpp @@ -73,7 +73,7 @@ KResultOr Process::peek_user_data(Userspace address) KResult Process::poke_user_data(Userspace address, u32 data) { Range range = { VirtualAddress(address), sizeof(u32) }; - auto* region = find_region_containing(range); + auto* region = space().find_region_containing(range); if (!region) return EFAULT; ProcessPagingScope scope(*this); diff --git a/Kernel/Syscalls/thread.cpp b/Kernel/Syscalls/thread.cpp index d65591833a..ff8d4b0ae3 100644 --- a/Kernel/Syscalls/thread.cpp +++ b/Kernel/Syscalls/thread.cpp @@ -80,7 +80,7 @@ int Process::sys$create_thread(void* (*entry)(void*), Userspacetss(); tss.eip = (FlatPtr)entry; tss.eflags = 0x0202; - tss.cr3 = page_directory().cr3(); + tss.cr3 = space().page_directory().cr3(); tss.esp = (u32)user_stack_address; auto tsr_result = thread->make_thread_specific_region({}); diff --git a/Kernel/Thread.cpp b/Kernel/Thread.cpp index 3a0fba1b4e..2dc0c7d924 100644 --- a/Kernel/Thread.cpp +++ b/Kernel/Thread.cpp @@ -108,7 +108,7 @@ Thread::Thread(NonnullRefPtr process, NonnullOwnPtr kernel_stac m_tss.gs = GDT_SELECTOR_TLS | 3; } - m_tss.cr3 = m_process->page_directory().cr3(); + m_tss.cr3 = m_process->space().page_directory().cr3(); m_kernel_stack_base = m_kernel_stack_region->vaddr().get(); m_kernel_stack_top = m_kernel_stack_region->vaddr().offset(default_kernel_stack_size).get() & 0xfffffff8u; @@ -1015,11 +1015,11 @@ KResult Thread::make_thread_specific_region(Badge) if (!process().m_master_tls_region) return KSuccess; - auto range = process().allocate_range({}, thread_specific_region_size()); + auto range = process().space().allocate_range({}, thread_specific_region_size()); if (!range.has_value()) return ENOMEM; - auto region_or_error = process().allocate_region(range.value(), "Thread-specific", PROT_READ | PROT_WRITE); + auto region_or_error = process().space().allocate_region(range.value(), "Thread-specific", PROT_READ | PROT_WRITE); if (region_or_error.is_error()) return region_or_error.error(); diff --git a/Kernel/VM/MemoryManager.cpp b/Kernel/VM/MemoryManager.cpp index 3ee5f39671..60c299b170 100644 --- a/Kernel/VM/MemoryManager.cpp +++ b/Kernel/VM/MemoryManager.cpp @@ -401,29 +401,29 @@ Region* MemoryManager::kernel_region_from_vaddr(VirtualAddress vaddr) return nullptr; } -Region* MemoryManager::user_region_from_vaddr(Process& process, VirtualAddress vaddr) +Region* MemoryManager::user_region_from_vaddr(Space& space, VirtualAddress vaddr) { - ScopedSpinLock lock(s_mm_lock); // FIXME: Use a binary search tree (maybe red/black?) or some other more appropriate data structure! - for (auto& region : process.m_regions) { + ScopedSpinLock lock(space.get_lock()); + for (auto& region : space.regions()) { if (region.contains(vaddr)) return ®ion; } return nullptr; } -Region* MemoryManager::find_region_from_vaddr(Process& process, VirtualAddress vaddr) +Region* MemoryManager::find_region_from_vaddr(Space& space, VirtualAddress vaddr) { ScopedSpinLock lock(s_mm_lock); - if (auto* region = user_region_from_vaddr(process, vaddr)) + if (auto* region = user_region_from_vaddr(space, vaddr)) return region; return kernel_region_from_vaddr(vaddr); } -const Region* MemoryManager::find_region_from_vaddr(const Process& process, VirtualAddress vaddr) +const Region* MemoryManager::find_region_from_vaddr(const Space& space, VirtualAddress vaddr) { ScopedSpinLock lock(s_mm_lock); - if (auto* region = user_region_from_vaddr(const_cast(process), vaddr)) + if (auto* region = user_region_from_vaddr(const_cast(space), vaddr)) return region; return kernel_region_from_vaddr(vaddr); } @@ -436,8 +436,8 @@ Region* MemoryManager::find_region_from_vaddr(VirtualAddress vaddr) auto page_directory = PageDirectory::find_by_cr3(read_cr3()); if (!page_directory) return nullptr; - ASSERT(page_directory->process()); - return user_region_from_vaddr(*page_directory->process(), vaddr); + ASSERT(page_directory->space()); + return user_region_from_vaddr(*page_directory->space(), vaddr); } PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault) @@ -734,13 +734,18 @@ RefPtr MemoryManager::allocate_supervisor_physical_page() } void MemoryManager::enter_process_paging_scope(Process& process) +{ + enter_space(process.space()); +} + +void MemoryManager::enter_space(Space& space) { auto current_thread = Thread::current(); ASSERT(current_thread != nullptr); ScopedSpinLock lock(s_mm_lock); - current_thread->tss().cr3 = process.page_directory().cr3(); - write_cr3(process.page_directory().cr3()); + current_thread->tss().cr3 = space.page_directory().cr3(); + write_cr3(space.page_directory().cr3()); } void MemoryManager::flush_tlb_local(VirtualAddress vaddr, size_t page_count) @@ -846,7 +851,7 @@ bool MemoryManager::validate_user_stack(const Process& process, VirtualAddress v if (!is_user_address(vaddr)) return false; ScopedSpinLock lock(s_mm_lock); - auto* region = user_region_from_vaddr(const_cast(process), vaddr); + auto* region = user_region_from_vaddr(const_cast(process).space(), vaddr); return region && region->is_user_accessible() && region->is_stack(); } diff --git a/Kernel/VM/MemoryManager.h b/Kernel/VM/MemoryManager.h index 13c3a79d78..ceee66796e 100644 --- a/Kernel/VM/MemoryManager.h +++ b/Kernel/VM/MemoryManager.h @@ -143,7 +143,8 @@ public: PageFaultResponse handle_page_fault(const PageFault&); - void enter_process_paging_scope(Process&); + static void enter_process_paging_scope(Process&); + static void enter_space(Space&); bool validate_user_stack(const Process&, VirtualAddress) const; @@ -196,8 +197,8 @@ public: } } - static Region* find_region_from_vaddr(Process&, VirtualAddress); - static const Region* find_region_from_vaddr(const Process&, VirtualAddress); + static Region* find_region_from_vaddr(Space&, VirtualAddress); + static const Region* find_region_from_vaddr(const Space&, VirtualAddress); void dump_kernel_regions(); @@ -225,7 +226,7 @@ private: static void flush_tlb_local(VirtualAddress, size_t page_count = 1); static void flush_tlb(const PageDirectory*, VirtualAddress, size_t page_count = 1); - static Region* user_region_from_vaddr(Process&, VirtualAddress); + static Region* user_region_from_vaddr(Space&, VirtualAddress); static Region* kernel_region_from_vaddr(VirtualAddress); static Region* find_region_from_vaddr(VirtualAddress); diff --git a/Kernel/VM/PageDirectory.cpp b/Kernel/VM/PageDirectory.cpp index 939853810f..3f38942353 100644 --- a/Kernel/VM/PageDirectory.cpp +++ b/Kernel/VM/PageDirectory.cpp @@ -73,7 +73,7 @@ PageDirectory::PageDirectory() m_directory_pages[3] = PhysicalPage::create(boot_pd3_paddr, true, false); } -PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_range_allocator) +PageDirectory::PageDirectory(const RangeAllocator* parent_range_allocator) { ScopedSpinLock lock(s_mm_lock); if (parent_range_allocator) { @@ -142,8 +142,8 @@ PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_rang auto* new_pd = MM.quickmap_pd(*this, 0); memcpy(new_pd, &buffer, sizeof(PageDirectoryEntry)); - // If we got here, we successfully created it. Set m_process now - m_process = &process; + // If we got here, we successfully created it. Set m_space now + m_valid = true; cr3_map().set(cr3(), this); } @@ -151,7 +151,7 @@ PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_rang PageDirectory::~PageDirectory() { ScopedSpinLock lock(s_mm_lock); - if (m_process) + if (m_space) cr3_map().remove(cr3()); } diff --git a/Kernel/VM/PageDirectory.h b/Kernel/VM/PageDirectory.h index f4bd7f41ff..ba26e37f01 100644 --- a/Kernel/VM/PageDirectory.h +++ b/Kernel/VM/PageDirectory.h @@ -40,10 +40,10 @@ class PageDirectory : public RefCounted { friend class MemoryManager; public: - static RefPtr create_for_userspace(Process& process, const RangeAllocator* parent_range_allocator = nullptr) + static RefPtr create_for_userspace(const RangeAllocator* parent_range_allocator = nullptr) { - auto page_directory = adopt(*new PageDirectory(process, parent_range_allocator)); - if (!page_directory->process()) + auto page_directory = adopt(*new PageDirectory(parent_range_allocator)); + if (!page_directory->is_valid()) return {}; return page_directory; } @@ -55,24 +55,31 @@ public: u32 cr3() const { return m_directory_table->paddr().get(); } RangeAllocator& range_allocator() { return m_range_allocator; } + const RangeAllocator& range_allocator() const { return m_range_allocator; } + RangeAllocator& identity_range_allocator() { return m_identity_range_allocator; } - Process* process() { return m_process; } - const Process* process() const { return m_process; } + bool is_valid() const { return m_valid; } + + Space* space() { return m_space; } + const Space* space() const { return m_space; } + + void set_space(Badge, Space& space) { m_space = &space; } RecursiveSpinLock& get_lock() { return m_lock; } private: - PageDirectory(Process&, const RangeAllocator* parent_range_allocator); + explicit PageDirectory(const RangeAllocator* parent_range_allocator); PageDirectory(); - Process* m_process { nullptr }; + Space* m_space { nullptr }; RangeAllocator m_range_allocator; RangeAllocator m_identity_range_allocator; RefPtr m_directory_table; RefPtr m_directory_pages[4]; HashMap> m_page_tables; RecursiveSpinLock m_lock; + bool m_valid { false }; }; } diff --git a/Kernel/VM/Space.cpp b/Kernel/VM/Space.cpp new file mode 100644 index 0000000000..bbee739db1 --- /dev/null +++ b/Kernel/VM/Space.cpp @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2021, Andreas Kling + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +namespace Kernel { + +OwnPtr Space::create(Process& process, const Space* parent) +{ + auto page_directory = PageDirectory::create_for_userspace(parent ? &parent->page_directory().range_allocator() : nullptr); + if (!page_directory) + return {}; + auto space = adopt_own(*new Space(process, page_directory.release_nonnull())); + space->page_directory().set_space({}, *space); + return space; +} + +Space::Space(Process& process, NonnullRefPtr page_directory) + : m_process(&process) + , m_page_directory(move(page_directory)) +{ +} + +Space::~Space() +{ +} + +Optional Space::allocate_range(VirtualAddress vaddr, size_t size, size_t alignment) +{ + vaddr.mask(PAGE_MASK); + size = PAGE_ROUND_UP(size); + if (vaddr.is_null()) + return page_directory().range_allocator().allocate_anywhere(size, alignment); + return page_directory().range_allocator().allocate_specific(vaddr, size); +} + +Region& Space::allocate_split_region(const Region& source_region, const Range& range, size_t offset_in_vmobject) +{ + auto& region = add_region(Region::create_user_accessible( + m_process, range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access(), source_region.is_cacheable(), source_region.is_shared())); + region.set_syscall_region(source_region.is_syscall_region()); + region.set_mmap(source_region.is_mmap()); + region.set_stack(source_region.is_stack()); + size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE; + for (size_t i = 0; i < region.page_count(); ++i) { + if (source_region.should_cow(page_offset_in_source_region + i)) + region.set_should_cow(i, true); + } + return region; +} + +KResultOr Space::allocate_region(const Range& range, const String& name, int prot, AllocationStrategy strategy) +{ + ASSERT(range.is_valid()); + auto vmobject = AnonymousVMObject::create_with_size(range.size(), strategy); + if (!vmobject) + return ENOMEM; + auto region = Region::create_user_accessible(m_process, range, vmobject.release_nonnull(), 0, name, prot_to_region_access_flags(prot), true, false); + if (!region->map(page_directory())) + return ENOMEM; + return &add_region(move(region)); +} + +KResultOr Space::allocate_region_with_vmobject(const Range& range, NonnullRefPtr vmobject, size_t offset_in_vmobject, const String& name, int prot, bool shared) +{ + ASSERT(range.is_valid()); + size_t end_in_vmobject = offset_in_vmobject + range.size(); + if (end_in_vmobject <= offset_in_vmobject) { + dbgln("allocate_region_with_vmobject: Overflow (offset + size)"); + return EINVAL; + } + if (offset_in_vmobject >= vmobject->size()) { + dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject."); + return EINVAL; + } + if (end_in_vmobject > vmobject->size()) { + dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject."); + return EINVAL; + } + offset_in_vmobject &= PAGE_MASK; + auto& region = add_region(Region::create_user_accessible(m_process, range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot), true, shared)); + if (!region.map(page_directory())) { + // FIXME: What is an appropriate error code here, really? + return ENOMEM; + } + return ®ion; +} + +bool Space::deallocate_region(Region& region) +{ + OwnPtr region_protector; + ScopedSpinLock lock(m_lock); + + if (m_region_lookup_cache.region.unsafe_ptr() == ®ion) + m_region_lookup_cache.region = nullptr; + for (size_t i = 0; i < m_regions.size(); ++i) { + if (&m_regions[i] == ®ion) { + region_protector = m_regions.unstable_take(i); + return true; + } + } + return false; +} + +Region* Space::find_region_from_range(const Range& range) +{ + ScopedSpinLock lock(m_lock); + if (m_region_lookup_cache.range.has_value() && m_region_lookup_cache.range.value() == range && m_region_lookup_cache.region) + return m_region_lookup_cache.region.unsafe_ptr(); + + size_t size = PAGE_ROUND_UP(range.size()); + for (auto& region : m_regions) { + if (region.vaddr() == range.base() && region.size() == size) { + m_region_lookup_cache.range = range; + m_region_lookup_cache.region = region; + return ®ion; + } + } + return nullptr; +} + +Region* Space::find_region_containing(const Range& range) +{ + ScopedSpinLock lock(m_lock); + for (auto& region : m_regions) { + if (region.contains(range)) + return ®ion; + } + return nullptr; +} + +Region& Space::add_region(NonnullOwnPtr region) +{ + auto* ptr = region.ptr(); + ScopedSpinLock lock(m_lock); + m_regions.append(move(region)); + return *ptr; +} + +// Carve out a virtual address range from a region and return the two regions on either side +Vector Space::split_region_around_range(const Region& source_region, const Range& desired_range) +{ + Range old_region_range = source_region.range(); + auto remaining_ranges_after_unmap = old_region_range.carve(desired_range); + + ASSERT(!remaining_ranges_after_unmap.is_empty()); + auto make_replacement_region = [&](const Range& new_range) -> Region& { + ASSERT(old_region_range.contains(new_range)); + size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get()); + return allocate_split_region(source_region, new_range, new_range_offset_in_vmobject); + }; + Vector new_regions; + for (auto& new_range : remaining_ranges_after_unmap) { + new_regions.unchecked_append(&make_replacement_region(new_range)); + } + return new_regions; +} + +void Space::dump_regions() +{ + klog() << "Process regions:"; + klog() << "BEGIN END SIZE ACCESS NAME"; + + ScopedSpinLock lock(m_lock); + + Vector sorted_regions; + sorted_regions.ensure_capacity(m_regions.size()); + for (auto& region : m_regions) + sorted_regions.append(®ion); + quick_sort(sorted_regions, [](auto& a, auto& b) { + return a->vaddr() < b->vaddr(); + }); + + for (auto& sorted_region : sorted_regions) { + auto& region = *sorted_region; + dmesgln("{:08x} -- {:08x} {:08x} {:c}{:c}{:c}{:c}{:c} {}", region.vaddr().get(), region.vaddr().offset(region.size() - 1).get(), region.size(), + region.is_readable() ? 'R' : ' ', + region.is_writable() ? 'W' : ' ', + region.is_executable() ? 'X' : ' ', + region.is_shared() ? 'S' : ' ', + region.is_stack() ? 'T' : ' ', + region.is_syscall_region() ? 'C' : ' ', + region.name()); + } + MM.dump_kernel_regions(); +} + +void Space::remove_all_regions(Badge) +{ + ScopedSpinLock lock(m_lock); + m_regions.clear(); +} + +} diff --git a/Kernel/VM/Space.h b/Kernel/VM/Space.h new file mode 100644 index 0000000000..4e4a76dc13 --- /dev/null +++ b/Kernel/VM/Space.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace Kernel { + +class Space { +public: + static OwnPtr create(Process&, const Space* parent); + ~Space(); + + PageDirectory& page_directory() { return *m_page_directory; } + const PageDirectory& page_directory() const { return *m_page_directory; } + + Region& add_region(NonnullOwnPtr); + + size_t region_count() const { return m_regions.size(); } + + NonnullOwnPtrVector& regions() { return m_regions; } + const NonnullOwnPtrVector& regions() const { return m_regions; } + + void dump_regions(); + + Optional allocate_range(VirtualAddress, size_t, size_t alignment = PAGE_SIZE); + + KResultOr allocate_region_with_vmobject(const Range&, NonnullRefPtr, size_t offset_in_vmobject, const String& name, int prot, bool shared); + KResultOr allocate_region(const Range&, const String& name, int prot = PROT_READ | PROT_WRITE, AllocationStrategy strategy = AllocationStrategy::Reserve); + bool deallocate_region(Region& region); + + Region& allocate_split_region(const Region& source_region, const Range&, size_t offset_in_vmobject); + Vector split_region_around_range(const Region& source_region, const Range&); + + Region* find_region_from_range(const Range&); + Region* find_region_containing(const Range&); + + bool enforces_syscall_regions() const { return m_enforces_syscall_regions; } + void set_enforces_syscall_regions(bool b) { m_enforces_syscall_regions = b; } + + void remove_all_regions(Badge); + + SpinLock& get_lock() const { return m_lock; } + +private: + Space(Process&, NonnullRefPtr); + + Process* m_process { nullptr }; + mutable SpinLock m_lock; + + RefPtr m_page_directory; + + NonnullOwnPtrVector m_regions; + + struct RegionLookupCache { + Optional range; + WeakPtr region; + }; + RegionLookupCache m_region_lookup_cache; + + bool m_enforces_syscall_regions { false }; +}; + +}