From f415218afe008ba2de123040781c550f3cf48352 Mon Sep 17 00:00:00 2001 From: Idan Horowitz Date: Thu, 28 Oct 2021 23:33:41 +0300 Subject: [PATCH] Kernel+LibC: Implement sigaltstack() This is required for compiling wine for serenity --- Kernel/API/POSIX/signal.h | 13 +++ Kernel/API/Syscall.h | 1 + Kernel/Memory/Region.h | 2 + Kernel/Process.h | 3 + Kernel/Syscalls/sigaction.cpp | 178 +++++++++++++++++++++++++++++ Kernel/Thread.cpp | 22 +++- Kernel/Thread.h | 5 + Userland/Libraries/LibC/signal.cpp | 6 + Userland/Libraries/LibC/signal.h | 1 + 9 files changed, 229 insertions(+), 2 deletions(-) diff --git a/Kernel/API/POSIX/signal.h b/Kernel/API/POSIX/signal.h index a95ac6ad4e..b8f3b24597 100644 --- a/Kernel/API/POSIX/signal.h +++ b/Kernel/API/POSIX/signal.h @@ -42,6 +42,19 @@ struct sigaction { int sa_flags; }; +typedef struct { + void* ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +// FIXME: These values are arbitrary, and might be platform dependent +#define MINSIGSTKSZ 4096 // Minimum allowed +#define SIGSTKSZ 32768 // Recommended size + #define SIG_DFL ((__sighandler_t)0) #define SIG_ERR ((__sighandler_t)-1) #define SIG_IGN ((__sighandler_t)1) diff --git a/Kernel/API/Syscall.h b/Kernel/API/Syscall.h index 334a5c5213..eb27f678cd 100644 --- a/Kernel/API/Syscall.h +++ b/Kernel/API/Syscall.h @@ -172,6 +172,7 @@ enum class NeedsBigProcessLock { S(setuid, NeedsBigProcessLock::Yes) \ S(shutdown, NeedsBigProcessLock::Yes) \ S(sigaction, NeedsBigProcessLock::Yes) \ + S(sigaltstack, NeedsBigProcessLock::Yes) \ S(sigpending, NeedsBigProcessLock::Yes) \ S(sigprocmask, NeedsBigProcessLock::Yes) \ S(sigreturn, NeedsBigProcessLock::Yes) \ diff --git a/Kernel/Memory/Region.h b/Kernel/Memory/Region.h index 8a86317b40..403e9b743b 100644 --- a/Kernel/Memory/Region.h +++ b/Kernel/Memory/Region.h @@ -172,6 +172,8 @@ public: void set_writable(bool b) { set_access_bit(Access::Write, b); } void set_executable(bool b) { set_access_bit(Access::Execute, b); } + void unsafe_clear_access() { m_access = Region::None; } + void set_page_directory(PageDirectory&); ErrorOr map(PageDirectory&, ShouldFlushTLB = ShouldFlushTLB::Yes); enum class ShouldDeallocateVirtualRange { diff --git a/Kernel/Process.h b/Kernel/Process.h index 3184fab5b4..e0ac48fecc 100644 --- a/Kernel/Process.h +++ b/Kernel/Process.h @@ -335,6 +335,7 @@ public: ErrorOr sys$execve(Userspace); ErrorOr sys$dup2(int old_fd, int new_fd); ErrorOr sys$sigaction(int signum, Userspace act, Userspace old_act); + ErrorOr sys$sigaltstack(Userspace ss, Userspace old_ss); ErrorOr sys$sigprocmask(int how, Userspace set, Userspace old_set); ErrorOr sys$sigpending(Userspace); ErrorOr sys$getgroups(size_t, Userspace); @@ -556,6 +557,8 @@ private: void setup_socket_fd(int fd, NonnullRefPtr description, int type); + ErrorOr remap_range_as_stack(FlatPtr address, size_t size); + public: NonnullRefPtr procfs_traits() const { return *m_procfs_traits; } ErrorOr procfs_get_fds_stats(KBufferBuilder& builder) const; diff --git a/Kernel/Syscalls/sigaction.cpp b/Kernel/Syscalls/sigaction.cpp index 7c19b3c249..343ff92e1e 100644 --- a/Kernel/Syscalls/sigaction.cpp +++ b/Kernel/Syscalls/sigaction.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2018-2020, Andreas Kling + * Copyright (c) 2021, Idan Horowitz * * SPDX-License-Identifier: BSD-2-Clause */ @@ -129,4 +130,181 @@ ErrorOr Process::sys$sigreturn([[maybe_unused]] RegisterState& register #endif } +ErrorOr Process::remap_range_as_stack(FlatPtr address, size_t size) +{ + // FIXME: This duplicates a lot of logic from sys$mprotect, this should be abstracted out somehow + auto range_to_remap = TRY(Memory::expand_range_to_page_boundaries(address, size)); + if (!range_to_remap.size()) + return EINVAL; + + if (!is_user_range(range_to_remap)) + return EFAULT; + + if (auto* whole_region = address_space().find_region_from_range(range_to_remap)) { + if (!whole_region->is_mmap()) + return EPERM; + if (!whole_region->vmobject().is_anonymous() || whole_region->is_shared()) + return EINVAL; + whole_region->unsafe_clear_access(); + whole_region->set_readable(true); + whole_region->set_writable(true); + whole_region->set_stack(true); + whole_region->set_syscall_region(false); + whole_region->clear_to_zero(); + whole_region->remap(); + + return {}; + } + + if (auto* old_region = address_space().find_region_containing(range_to_remap)) { + if (!old_region->is_mmap()) + return EPERM; + if (!old_region->vmobject().is_anonymous() || old_region->is_shared()) + return EINVAL; + + // Remove the old region from our regions tree, since were going to add another region + // with the exact same start address, but do not deallocate it yet + auto region = address_space().take_region(*old_region); + + // Unmap the old region here, specifying that we *don't* want the VM deallocated. + region->unmap(Memory::Region::ShouldDeallocateVirtualRange::No); + + // This vector is the region(s) adjacent to our range. + // We need to allocate a new region for the range we wanted to change permission bits on. + auto adjacent_regions = TRY(address_space().try_split_region_around_range(*region, range_to_remap)); + + size_t new_range_offset_in_vmobject = region->offset_in_vmobject() + (range_to_remap.base().get() - region->range().base().get()); + auto new_region = TRY(address_space().try_allocate_split_region(*region, range_to_remap, new_range_offset_in_vmobject)); + new_region->unsafe_clear_access(); + new_region->set_readable(true); + new_region->set_writable(true); + new_region->set_stack(true); + new_region->set_syscall_region(false); + new_region->clear_to_zero(); + + // Map the new regions using our page directory (they were just allocated and don't have one). + for (auto* adjacent_region : adjacent_regions) { + TRY(adjacent_region->map(address_space().page_directory())); + } + TRY(new_region->map(address_space().page_directory())); + + return {}; + } + + if (const auto& regions = address_space().find_regions_intersecting(range_to_remap); regions.size()) { + size_t full_size_found = 0; + // Check that all intersecting regions are compatible. + for (const auto* region : regions) { + if (!region->is_mmap()) + return EPERM; + if (!region->vmobject().is_anonymous() || region->is_shared()) + return EINVAL; + full_size_found += region->range().intersect(range_to_remap).size(); + } + + if (full_size_found != range_to_remap.size()) + return ENOMEM; + + // Finally, iterate over each region, either updating its access flags if the range covers it wholly, + // or carving out a new subregion with the appropriate access flags set. + for (auto* old_region : regions) { + const auto intersection_to_remap = range_to_remap.intersect(old_region->range()); + // If the region is completely covered by range, simply update the access flags + if (intersection_to_remap == old_region->range()) { + old_region->unsafe_clear_access(); + old_region->set_readable(true); + old_region->set_writable(true); + old_region->set_stack(true); + old_region->set_syscall_region(false); + old_region->clear_to_zero(); + old_region->remap(); + continue; + } + // Remove the old region from our regions tree, since were going to add another region + // with the exact same start address, but dont deallocate it yet + auto region = address_space().take_region(*old_region); + + // Unmap the old region here, specifying that we *don't* want the VM deallocated. + region->unmap(Memory::Region::ShouldDeallocateVirtualRange::No); + + // This vector is the region(s) adjacent to our range. + // We need to allocate a new region for the range we wanted to change permission bits on. + auto adjacent_regions = TRY(address_space().try_split_region_around_range(*old_region, intersection_to_remap)); + + // Since the range is not contained in a single region, it can only partially cover its starting and ending region, + // therefore carving out a chunk from the region will always produce a single extra region, and not two. + VERIFY(adjacent_regions.size() == 1); + + size_t new_range_offset_in_vmobject = old_region->offset_in_vmobject() + (intersection_to_remap.base().get() - old_region->range().base().get()); + auto* new_region = TRY(address_space().try_allocate_split_region(*region, intersection_to_remap, new_range_offset_in_vmobject)); + + new_region->unsafe_clear_access(); + new_region->set_readable(true); + new_region->set_writable(true); + new_region->set_stack(true); + new_region->set_syscall_region(false); + new_region->clear_to_zero(); + + // Map the new region using our page directory (they were just allocated and don't have one) if any. + if (adjacent_regions.size()) + TRY(adjacent_regions[0]->map(address_space().page_directory())); + + TRY(new_region->map(address_space().page_directory())); + } + + return {}; + } + + return EINVAL; +} + +ErrorOr Process::sys$sigaltstack(Userspace ss, Userspace old_ss) +{ + VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this) + REQUIRE_PROMISE(sigaction); + + if (old_ss) { + stack_t old_ss_value; + old_ss_value.ss_sp = (void*)Thread::current()->m_alternative_signal_stack; + old_ss_value.ss_size = Thread::current()->m_alternative_signal_stack_size; + old_ss_value.ss_flags = 0; + if (!Thread::current()->has_alternative_signal_stack()) + old_ss_value.ss_flags = SS_DISABLE; + else if (Thread::current()->is_in_alternative_signal_stack()) + old_ss_value.ss_flags = SS_ONSTACK; + TRY(copy_to_user(old_ss, &old_ss_value)); + } + + if (ss) { + stack_t ss_value; + TRY(copy_from_user(&ss_value, ss)); + + if (Thread::current()->is_in_alternative_signal_stack()) + return EPERM; + + if (ss_value.ss_flags == SS_DISABLE) { + Thread::current()->m_alternative_signal_stack_size = 0; + Thread::current()->m_alternative_signal_stack = 0; + } else if (ss_value.ss_flags == 0) { + if (ss_value.ss_size <= MINSIGSTKSZ) + return ENOMEM; + if (Checked::addition_would_overflow((FlatPtr)ss_value.ss_sp, ss_value.ss_size)) + return ENOMEM; + + // In order to preserve compatibility with our MAP_STACK, W^X and syscall region + // protections, sigaltstack ranges are carved out of their regions, zeroed, and + // turned into read/writable MAP_STACK-enabled regions. + // This is inspired by OpenBSD's solution: https://man.openbsd.org/sigaltstack.2 + TRY(remap_range_as_stack((FlatPtr)ss_value.ss_sp, ss_value.ss_size)); + + Thread::current()->m_alternative_signal_stack = (FlatPtr)ss_value.ss_sp; + Thread::current()->m_alternative_signal_stack_size = ss_value.ss_size; + } else { + return EINVAL; + } + } + + return 0; +} + } diff --git a/Kernel/Thread.cpp b/Kernel/Thread.cpp index 45cd0fb34c..ea4fc3a9a3 100644 --- a/Kernel/Thread.cpp +++ b/Kernel/Thread.cpp @@ -802,6 +802,17 @@ bool Thread::has_signal_handler(u8 signal) const return !action.handler_or_sigaction.is_null(); } +bool Thread::has_alternative_signal_stack() const +{ + return m_alternative_signal_stack_size != 0; +} + +bool Thread::is_in_alternative_signal_stack() const +{ + auto sp = get_register_dump_from_stack().userspace_sp(); + return sp >= m_alternative_signal_stack && sp < m_alternative_signal_stack + m_alternative_signal_stack_size; +} + static void push_value_on_user_stack(FlatPtr& stack, FlatPtr data) { stack -= sizeof(FlatPtr); @@ -923,9 +934,16 @@ DispatchSignalResult Thread::dispatch_signal(u8 signal) m_signal_mask |= new_signal_mask; m_have_any_unmasked_pending_signals.store((m_pending_signals & ~m_signal_mask) != 0, AK::memory_order_release); + bool use_alternative_stack = ((action.flags & SA_ONSTACK) != 0) && has_alternative_signal_stack() && !is_in_alternative_signal_stack(); + auto setup_stack = [&](RegisterState& state) { - FlatPtr stack = state.userspace_sp(); - FlatPtr old_sp = stack; + FlatPtr old_sp = state.userspace_sp(); + FlatPtr stack; + if (use_alternative_stack) + stack = m_alternative_signal_stack + m_alternative_signal_stack_size; + else + stack = old_sp; + FlatPtr ret_ip = state.ip(); FlatPtr ret_flags = state.flags(); diff --git a/Kernel/Thread.h b/Kernel/Thread.h index bde964f2d6..29a7bc784e 100644 --- a/Kernel/Thread.h +++ b/Kernel/Thread.h @@ -1026,6 +1026,9 @@ public: u32 pending_signals() const; u32 pending_signals_for_state() const; + [[nodiscard]] bool has_alternative_signal_stack() const; + [[nodiscard]] bool is_in_alternative_signal_stack() const; + FPUState& fpu_state() { return m_fpu_state; } ErrorOr make_thread_specific_region(Badge); @@ -1295,6 +1298,8 @@ private: u32 m_ticks_in_kernel { 0 }; u32 m_pending_signals { 0 }; u32 m_signal_mask { 0 }; + FlatPtr m_alternative_signal_stack { 0 }; + FlatPtr m_alternative_signal_stack_size { 0 }; FlatPtr m_kernel_stack_base { 0 }; FlatPtr m_kernel_stack_top { 0 }; OwnPtr m_kernel_stack_region; diff --git a/Userland/Libraries/LibC/signal.cpp b/Userland/Libraries/LibC/signal.cpp index c6f213268c..3f43db5496 100644 --- a/Userland/Libraries/LibC/signal.cpp +++ b/Userland/Libraries/LibC/signal.cpp @@ -75,6 +75,12 @@ int sigaddset(sigset_t* set, int sig) return 0; } +int sigaltstack(const stack_t* ss, stack_t* old_ss) +{ + int rc = syscall(SC_sigaltstack, ss, old_ss); + __RETURN_WITH_ERRNO(rc, rc, -1); +} + int sigdelset(sigset_t* set, int sig) { if (sig < 1 || sig > 32) { diff --git a/Userland/Libraries/LibC/signal.h b/Userland/Libraries/LibC/signal.h index 5284d8bc68..e11bf80b8d 100644 --- a/Userland/Libraries/LibC/signal.h +++ b/Userland/Libraries/LibC/signal.h @@ -21,6 +21,7 @@ int sigaction(int sig, const struct sigaction* act, struct sigaction* old_act); int sigemptyset(sigset_t*); int sigfillset(sigset_t*); int sigaddset(sigset_t*, int sig); +int sigaltstack(const stack_t* ss, stack_t* old_ss); int sigdelset(sigset_t*, int sig); int sigismember(const sigset_t*, int sig); int sigprocmask(int how, const sigset_t* set, sigset_t* old_set);