diff --git a/Kernel/Process.cpp b/Kernel/Process.cpp index be18d487b8..2c904cf789 100644 --- a/Kernel/Process.cpp +++ b/Kernel/Process.cpp @@ -735,6 +735,29 @@ void Process::sys$exit(int status) ASSERT_NOT_REACHED(); } +// The trampoline preserves the current eax, pushes the signal code and +// then calls the signal handler. We do this because, when interrupting a +// blocking syscall, that syscall may return some special error code in eax; +// This error code would likely be overwritten by the signal handler, so it's +// neccessary to preserve it here. +asm( + ".intel_syntax noprefix\n" + "asm_signal_trampoline:\n" + "push ebp\n" + "mov ebp, esp\n" + "push eax\n" // we have to store eax 'cause it might be the return value from a syscall + "mov eax, [ebp+12]\n" // push the signal code + "push eax\n" + "call [ebp+8]\n" // call the signal handler + "add esp, 4\n" + "mov eax, 0x2d\n" // FIXME: We shouldn't be hardcoding this. + "int 0x82\n" // sigreturn syscall + "asm_signal_trampoline_end:\n" + ".att_syntax"); + +extern "C" void asm_signal_trampoline(void); +extern "C" void asm_signal_trampoline_end(void); + void create_signal_trampolines() { InterruptDisabler disabler; @@ -743,42 +766,12 @@ void create_signal_trampolines() auto* trampoline_region = MM.allocate_user_accessible_kernel_region(PAGE_SIZE, "Signal trampolines").leak_ref(); g_return_to_ring3_from_signal_trampoline = trampoline_region->vaddr(); + u8* trampoline = (u8*)asm_signal_trampoline; + u8* trampoline_end = (u8*)asm_signal_trampoline_end; + size_t trampoline_size = trampoline_end - trampoline; + u8* code_ptr = (u8*)trampoline_region->vaddr().as_ptr(); - *code_ptr++ = 0x58; // pop eax (Argument to signal handler (ignored here)) - *code_ptr++ = 0x5a; // pop edx (Original signal mask to restore) - *code_ptr++ = 0xb8; // mov eax, - *(u32*)code_ptr = Syscall::SC_restore_signal_mask; - code_ptr += sizeof(u32); - *code_ptr++ = 0xcd; // int 0x82 - *code_ptr++ = 0x82; - - *code_ptr++ = 0x83; // add esp, (stack alignment padding) - *code_ptr++ = 0xc4; - *code_ptr++ = sizeof(u32) * 3; - - *code_ptr++ = 0x61; // popa - *code_ptr++ = 0x9d; // popf - *code_ptr++ = 0xc3; // ret - *code_ptr++ = 0x0f; // ud2 - *code_ptr++ = 0x0b; - - g_return_to_ring0_from_signal_trampoline = VirtualAddress((u32)code_ptr); - *code_ptr++ = 0x58; // pop eax (Argument to signal handler (ignored here)) - *code_ptr++ = 0x5a; // pop edx (Original signal mask to restore) - *code_ptr++ = 0xb8; // mov eax, - *(u32*)code_ptr = Syscall::SC_restore_signal_mask; - code_ptr += sizeof(u32); - *code_ptr++ = 0xcd; // int 0x82 - // NOTE: Stack alignment padding doesn't matter when returning to ring0. - // Nothing matters really, as we're returning by replacing the entire TSS. - *code_ptr++ = 0x82; - *code_ptr++ = 0xb8; // mov eax, - *(u32*)code_ptr = Syscall::SC_sigreturn; - code_ptr += sizeof(u32); - *code_ptr++ = 0xcd; // int 0x82 - *code_ptr++ = 0x82; - *code_ptr++ = 0x0f; // ud2 - *code_ptr++ = 0x0b; + memcpy(code_ptr, trampoline, trampoline_size); trampoline_region->set_writable(false); MM.remap_region(*trampoline_region->page_directory(), *trampoline_region); @@ -790,21 +783,27 @@ int Process::sys$restore_signal_mask(u32 mask) return 0; } -void Process::sys$sigreturn() +int Process::sys$sigreturn(RegisterDump& registers) { - InterruptDisabler disabler; - Scheduler::prepare_to_modify_tss(*current); - current->m_tss = *current->m_tss_to_resume_kernel; - current->m_tss_to_resume_kernel.clear(); -#ifdef SIGNAL_DEBUG - kprintf("sys$sigreturn in %s(%u)\n", name().characters(), pid()); - auto& tss = current->tss(); - kprintf(" -> resuming execution at %w:%x stack %w:%x flags %x cr3 %x\n", tss.cs, tss.eip, tss.ss, tss.esp, tss.eflags, tss.cr3); -#endif - current->set_state(Thread::State::Skip1SchedulerPass); - Scheduler::yield(); - kprintf("sys$sigreturn failed in %s(%u)\n", name().characters(), pid()); - ASSERT_NOT_REACHED(); + //Here, we restore the state pushed by dispatch signal and asm_signal_trampoline. + u32* stack_ptr = (u32*)registers.esp_if_crossRing; + u32 smuggled_eax = *stack_ptr; + + //pop the stored eax, ebp, return address, handler and signal code + stack_ptr += 5; + + current->m_signal_mask = *stack_ptr; + stack_ptr++; + + //pop edi, esi, ebp, esp, ebx, edx, ecx, eax and eip + memcpy(®isters.edi, stack_ptr, 9 * sizeof(u32)); + stack_ptr += 9; + + registers.eflags = *stack_ptr; + stack_ptr++; + + registers.esp_if_crossRing = registers.esp; + return smuggled_eax; } void Process::crash(int signal, u32 eip) diff --git a/Kernel/Process.h b/Kernel/Process.h index 82d2063c80..e19a0d36fb 100644 --- a/Kernel/Process.h +++ b/Kernel/Process.h @@ -141,7 +141,7 @@ public: int sys$lseek(int fd, off_t, int whence); int sys$kill(pid_t pid, int sig); [[noreturn]] void sys$exit(int status); - [[noreturn]] void sys$sigreturn(); + int sys$sigreturn(RegisterDump& registers); pid_t sys$waitpid(pid_t, int* wstatus, int options); void* sys$mmap(const Syscall::SC_mmap_params*); int sys$munmap(void*, size_t size); diff --git a/Kernel/Syscall.cpp b/Kernel/Syscall.cpp index ac40386569..ab4d2c1b65 100644 --- a/Kernel/Syscall.cpp +++ b/Kernel/Syscall.cpp @@ -181,11 +181,7 @@ static u32 handle(RegisterDump& regs, u32 function, u32 arg1, u32 arg2, u32 arg3 case Syscall::SC_setgroups: return current->process().sys$setgroups((ssize_t)arg1, (const gid_t*)arg2); case Syscall::SC_sigreturn: - if (auto* tracer = current->process().tracer()) - tracer->did_syscall(function, arg1, arg2, arg3, 0); - current->process().sys$sigreturn(); - ASSERT_NOT_REACHED(); - return 0; + return current->process().sys$sigreturn(regs); case Syscall::SC_sigprocmask: return current->process().sys$sigprocmask((int)arg1, (const sigset_t*)arg2, (sigset_t*)arg3); case Syscall::SC_pipe: diff --git a/Kernel/Thread.cpp b/Kernel/Thread.cpp index f9061c5930..6d3ece2a59 100644 --- a/Kernel/Thread.cpp +++ b/Kernel/Thread.cpp @@ -316,6 +316,7 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal) { ASSERT_INTERRUPTS_DISABLED(); ASSERT(signal > 0 && signal <= 32); + ASSERT(!process().is_ring0()); #ifdef SIGNAL_DEBUG kprintf("dispatch_signal %s(%u) <- %u\n", process().name().characters(), pid(), signal); @@ -366,6 +367,12 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal) return ShouldUnblockThread::Yes; } + ProcessPagingScope paging_scope(m_process); + // The userspace registers should be stored at the top of the stack + // We have to subtract 2 because the processor decrements the kernel + // stack before pushing the args. + auto& regs = *(RegisterDump*)(kernel_stack_top() - sizeof(RegisterDump) - 2); + u32 old_signal_mask = m_signal_mask; u32 new_signal_mask = action.mask; if (action.flags & SA_NODEFER) @@ -375,78 +382,49 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal) m_signal_mask |= new_signal_mask; - Scheduler::prepare_to_modify_tss(*this); + u32 old_esp = regs.esp_if_crossRing; + u32 ret_eip = regs.eip; + u32 ret_eflags = regs.eflags; - u16 ret_cs = m_tss.cs; - u32 ret_eip = m_tss.eip; - u32 ret_eflags = m_tss.eflags; - bool interrupting_in_kernel = (ret_cs & 3) == 0; + push_value_on_user_stack(regs, ret_eflags); - ProcessPagingScope paging_scope(m_process); - - if (interrupting_in_kernel) { -#ifdef SIGNAL_DEBUG - kprintf("dispatch_signal to %s(%u) in state=%s with return to %w:%x\n", process().name().characters(), pid(), to_string(state()), ret_cs, ret_eip); -#endif - ASSERT(is_blocked()); - m_tss_to_resume_kernel = make(m_tss); -#ifdef SIGNAL_DEBUG - kprintf("resume tss pc: %w:%x stack: %w:%x flags: %x cr3: %x\n", m_tss_to_resume_kernel->cs, m_tss_to_resume_kernel->eip, m_tss_to_resume_kernel->ss, m_tss_to_resume_kernel->esp, m_tss_to_resume_kernel->eflags, m_tss_to_resume_kernel->cr3); -#endif - - if (!m_signal_stack_user_region) { - m_signal_stack_user_region = m_process.allocate_region(VirtualAddress(), default_userspace_stack_size, String::format("User Signal Stack (Thread %d)", m_tid)); - ASSERT(m_signal_stack_user_region); - } - if (!m_kernel_stack_for_signal_handler_region) - m_kernel_stack_for_signal_handler_region = MM.allocate_kernel_region(default_kernel_stack_size, String::format("Kernel Signal Stack (Thread %d)", m_tid)); - m_tss.ss = 0x23; - m_tss.esp = m_signal_stack_user_region->vaddr().offset(default_userspace_stack_size).get(); - m_tss.ss0 = 0x10; - m_tss.esp0 = m_kernel_stack_for_signal_handler_region->vaddr().offset(default_kernel_stack_size).get(); - - push_value_on_stack(0); - } else { - push_value_on_stack(ret_eip); - push_value_on_stack(ret_eflags); - - // PUSHA - u32 old_esp = m_tss.esp; - push_value_on_stack(m_tss.eax); - push_value_on_stack(m_tss.ecx); - push_value_on_stack(m_tss.edx); - push_value_on_stack(m_tss.ebx); - push_value_on_stack(old_esp); - push_value_on_stack(m_tss.ebp); - push_value_on_stack(m_tss.esi); - push_value_on_stack(m_tss.edi); - - // Align the stack. - m_tss.esp -= 12; - } + push_value_on_user_stack(regs, ret_eip); + push_value_on_user_stack(regs, regs.eax); + push_value_on_user_stack(regs, regs.ecx); + push_value_on_user_stack(regs, regs.edx); + push_value_on_user_stack(regs, regs.ebx); + push_value_on_user_stack(regs, old_esp); + push_value_on_user_stack(regs, regs.ebp); + push_value_on_user_stack(regs, regs.esi); + push_value_on_user_stack(regs, regs.edi); // PUSH old_signal_mask - push_value_on_stack(old_signal_mask); + push_value_on_user_stack(regs, old_signal_mask); - m_tss.cs = 0x1b; - m_tss.ds = 0x23; - m_tss.es = 0x23; - m_tss.fs = 0x23; - m_tss.gs = 0x23; - m_tss.eip = handler_vaddr.get(); + push_value_on_user_stack(regs, signal); + push_value_on_user_stack(regs, handler_vaddr.get()); + push_value_on_user_stack(regs, 0); //push fake return address + + regs.eip = g_return_to_ring3_from_signal_trampoline.get(); // FIXME: Should we worry about the stack being 16 byte aligned when entering a signal handler? - push_value_on_stack(signal); - if (interrupting_in_kernel) - push_value_on_stack(g_return_to_ring0_from_signal_trampoline.get()); - else - push_value_on_stack(g_return_to_ring3_from_signal_trampoline.get()); - - ASSERT((m_tss.esp % 16) == 0); - - // FIXME: This state is such a hack. It avoids trouble if 'current' is the process receiving a signal. - set_state(Skip1SchedulerPass); + // If we're not blocking we need to update the tss so + // that the far jump in Scheduler goes to the proper location. + // When we are blocking we don't update the TSS as we want to + // resume at the blocker and descend the stack, cleaning up nicely. + if (!in_kernel()) { + Scheduler::prepare_to_modify_tss(*this); + m_tss.cs = 0x1b; + m_tss.ds = 0x23; + m_tss.es = 0x23; + m_tss.fs = 0x23; + m_tss.gs = 0x23; + m_tss.eip = regs.eip; + m_tss.esp = regs.esp_if_crossRing; + // FIXME: This state is such a hack. It avoids trouble if 'current' is the process receiving a signal. + set_state(Skip1SchedulerPass); + } #ifdef SIGNAL_DEBUG kprintf("signal: Okay, %s(%u) {%s} has been primed with signal handler %w:%x\n", process().name().characters(), pid(), to_string(state()), m_tss.cs, m_tss.eip); @@ -462,6 +440,13 @@ void Thread::set_default_signal_dispositions() m_signal_action_data[SIGWINCH].handler_or_sigaction = VirtualAddress((u32)SIG_IGN); } +void Thread::push_value_on_user_stack(RegisterDump& registers, u32 value) +{ + registers.esp_if_crossRing -= 4; + u32* stack_ptr = (u32*)registers.esp_if_crossRing; + *stack_ptr = value; +} + void Thread::push_value_on_stack(u32 value) { m_tss.esp -= 4; diff --git a/Kernel/Thread.h b/Kernel/Thread.h index 6c1f481212..42357e4b53 100644 --- a/Kernel/Thread.h +++ b/Kernel/Thread.h @@ -6,9 +6,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -73,6 +73,7 @@ public: virtual const char* state_string() const = 0; void set_interrupted_by_signal() { m_was_interrupted_while_blocked = true; } bool was_interrupted_by_signal() const { return m_was_interrupted_while_blocked; } + private: bool m_was_interrupted_while_blocked { false }; friend class Thread; @@ -220,8 +221,8 @@ public: InterruptedBySignal, }; - template - [[nodiscard]] BlockResult block(Args&& ... args) + template + [[nodiscard]] BlockResult block(Args&&... args) { // We should never be blocking a blocked (or otherwise non-active) thread. ASSERT(state() == Thread::Running); @@ -295,6 +296,7 @@ public: void set_has_used_fpu(bool b) { m_has_used_fpu = b; } void set_default_signal_dispositions(); + void push_value_on_user_stack(RegisterDump&, u32); void push_value_on_stack(u32); void make_userspace_stack_for_main_thread(Vector arguments, Vector environment); void make_userspace_stack_for_secondary_thread(void* argument);