From 823186031d9250217f9a51829d34a96b74113334 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Tue, 2 Feb 2021 19:56:11 +0100 Subject: [PATCH] Kernel: Add a way to specify which memory regions can make syscalls This patch adds sys$msyscall() which is loosely based on an OpenBSD mechanism for preventing syscalls from non-blessed memory regions. It works similarly to pledge and unveil, you can call it as many times as you like, and when you're finished, you call it with a null pointer and it will stop accepting new regions from then on. If a syscall later happens and doesn't originate from one of the previously blessed regions, the kernel will simply crash the process. --- Kernel/API/Syscall.h | 3 ++- Kernel/FileSystem/ProcFS.cpp | 1 + Kernel/Process.cpp | 2 ++ Kernel/Process.h | 5 +++++ Kernel/Syscall.cpp | 6 ++++++ Kernel/Syscalls/execve.cpp | 2 ++ Kernel/Syscalls/fork.cpp | 1 + Kernel/Syscalls/mmap.cpp | 18 ++++++++++++++++++ Kernel/VM/Region.cpp | 2 ++ Kernel/VM/Region.h | 4 ++++ 10 files changed, 43 insertions(+), 1 deletion(-) diff --git a/Kernel/API/Syscall.h b/Kernel/API/Syscall.h index 3abfd91e16..5de5ecb669 100644 --- a/Kernel/API/Syscall.h +++ b/Kernel/API/Syscall.h @@ -190,7 +190,8 @@ namespace Kernel { S(mremap) \ S(set_coredump_metadata) \ S(abort) \ - S(anon_create) + S(anon_create) \ + S(msyscall) namespace Syscall { diff --git a/Kernel/FileSystem/ProcFS.cpp b/Kernel/FileSystem/ProcFS.cpp index d377410fdb..485e299e8c 100644 --- a/Kernel/FileSystem/ProcFS.cpp +++ b/Kernel/FileSystem/ProcFS.cpp @@ -327,6 +327,7 @@ static bool procfs$pid_vm(InodeIdentifier identifier, KBufferBuilder& builder) region_object.add("executable", region.is_executable()); region_object.add("stack", region.is_stack()); region_object.add("shared", region.is_shared()); + region_object.add("syscall", region.is_syscall_region()); region_object.add("user_accessible", region.is_user_accessible()); region_object.add("purgeable", region.vmobject().is_anonymous()); if (region.vmobject().is_anonymous()) { diff --git a/Kernel/Process.cpp b/Kernel/Process.cpp index 8e49cc4187..d9e273f129 100644 --- a/Kernel/Process.cpp +++ b/Kernel/Process.cpp @@ -129,6 +129,7 @@ Region& Process::allocate_split_region(const Region& source_region, const Range& { auto& region = add_region( Region::create_user_accessible(this, range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access(), source_region.is_cacheable(), source_region.is_shared())); + region.set_syscall_region(source_region.is_syscall_region()); region.set_mmap(source_region.is_mmap()); region.set_stack(source_region.is_stack()); size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE; @@ -423,6 +424,7 @@ void create_signal_trampolines() InterruptDisabler disabler; // NOTE: We leak this region. auto* trampoline_region = MM.allocate_user_accessible_kernel_region(PAGE_SIZE, "Signal trampolines", Region::Access::Read | Region::Access::Write | Region::Access::Execute, false).leak_ptr(); + trampoline_region->set_syscall_region(true); g_return_to_ring3_from_signal_trampoline = trampoline_region->vaddr(); u8* trampoline = (u8*)asm_signal_trampoline; diff --git a/Kernel/Process.h b/Kernel/Process.h index 62d5a71f89..2acf335f4e 100644 --- a/Kernel/Process.h +++ b/Kernel/Process.h @@ -266,6 +266,7 @@ public: int sys$set_mmap_name(Userspace); int sys$mprotect(void*, size_t, int prot); int sys$madvise(void*, size_t, int advice); + int sys$msyscall(void*); int sys$purge(int mode); int sys$select(const Syscall::SC_select_params*); int sys$poll(Userspace); @@ -510,6 +511,8 @@ public: PerformanceEventBuffer* perf_events() { return m_perf_event_buffer; } + bool enforces_syscall_regions() const { return m_enforces_syscall_regions; } + private: friend class MemoryManager; friend class Scheduler; @@ -648,6 +651,8 @@ private: RefPtr m_alarm_timer; + bool m_enforces_syscall_regions { false }; + bool m_has_promises { false }; u32 m_promises { 0 }; bool m_has_execpromises { false }; diff --git a/Kernel/Syscall.cpp b/Kernel/Syscall.cpp index 279cc98f10..8eb79eca52 100644 --- a/Kernel/Syscall.cpp +++ b/Kernel/Syscall.cpp @@ -189,6 +189,12 @@ void syscall_handler(TrapFrame* trap) ASSERT_NOT_REACHED(); } + if (process.enforces_syscall_regions() && !calling_region->is_syscall_region()) { + dbgln("Syscall from non-syscall region"); + handle_crash(regs, "Syscall from non-syscall region", SIGSEGV); + ASSERT_NOT_REACHED(); + } + process.big_lock().lock(); u32 function = regs.eax; u32 arg1 = regs.edx; diff --git a/Kernel/Syscalls/execve.cpp b/Kernel/Syscalls/execve.cpp index 83ee25b8e8..e102f56970 100644 --- a/Kernel/Syscalls/execve.cpp +++ b/Kernel/Syscalls/execve.cpp @@ -549,6 +549,8 @@ int Process::do_exec(NonnullRefPtr main_program_description, Ve m_execpromises = 0; m_has_execpromises = false; + m_enforces_syscall_regions = false; + m_veil_state = VeilState::None; m_unveiled_paths.clear(); diff --git a/Kernel/Syscalls/fork.cpp b/Kernel/Syscalls/fork.cpp index 0f12b412a5..f20859dc5a 100644 --- a/Kernel/Syscalls/fork.cpp +++ b/Kernel/Syscalls/fork.cpp @@ -47,6 +47,7 @@ pid_t Process::sys$fork(RegisterState& regs) child->m_has_execpromises = m_has_execpromises; child->m_veil_state = m_veil_state; child->m_unveiled_paths = m_unveiled_paths.deep_copy(); + child->m_enforces_syscall_regions = m_enforces_syscall_regions; child->m_fds = m_fds; child->m_sid = m_sid; child->m_pg = m_pg; diff --git a/Kernel/Syscalls/mmap.cpp b/Kernel/Syscalls/mmap.cpp index 6c5fa0d2f5..543c1b0189 100644 --- a/Kernel/Syscalls/mmap.cpp +++ b/Kernel/Syscalls/mmap.cpp @@ -550,4 +550,22 @@ void* Process::sys$allocate_tls(size_t size) return m_master_tls_region.unsafe_ptr()->vaddr().as_ptr(); } +int Process::sys$msyscall(void* address) +{ + if (m_enforces_syscall_regions) + return -EPERM; + + if (!address) { + m_enforces_syscall_regions = true; + return 0; + } + + auto* region = find_region_containing(Range { VirtualAddress { address }, 1 }); + if (!region) + return -EINVAL; + + region->set_syscall_region(true); + return 0; +} + } diff --git a/Kernel/VM/Region.cpp b/Kernel/VM/Region.cpp index b21b2d9cd8..f8a27b70c8 100644 --- a/Kernel/VM/Region.cpp +++ b/Kernel/VM/Region.cpp @@ -105,6 +105,7 @@ OwnPtr Region::clone(Process& new_owner) region->copy_purgeable_page_ranges(*this); region->set_mmap(m_mmap); region->set_shared(m_shared); + region->set_syscall_region(is_syscall_region()); return region; } @@ -127,6 +128,7 @@ OwnPtr Region::clone(Process& new_owner) ASSERT(vmobject().is_anonymous()); clone_region->set_stack(true); } + clone_region->set_syscall_region(is_syscall_region()); clone_region->set_mmap(m_mmap); return clone_region; } diff --git a/Kernel/VM/Region.h b/Kernel/VM/Region.h index 37e2b616c6..8627fdab0f 100644 --- a/Kernel/VM/Region.h +++ b/Kernel/VM/Region.h @@ -247,6 +247,9 @@ public: RefPtr get_owner(); + bool is_syscall_region() const { return m_syscall_region; } + void set_syscall_region(bool b) { m_syscall_region = b; } + private: bool do_remap_vmobject_page_range(size_t page_index, size_t page_count); @@ -282,6 +285,7 @@ private: bool m_stack : 1 { false }; bool m_mmap : 1 { false }; bool m_kernel : 1 { false }; + bool m_syscall_region : 1 { false }; WeakPtr m_owner; };