1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-23 14:07:42 +00:00

Kernel: Use user stack for signal handlers.

This commit drastically changes how signals are handled.

In the case that an unblocked thread is signaled it works much
in the same way as previously. However, when a blocking syscall
is interrupted, we set up the signal trampoline on the user
stack, complete the blocking syscall, return down the kernel
stack and then jump to the handler. This means that from the
kernel stack's perspective, we only ever get one system call deep.

The signal trampoline has also been changed in order to properly
store the return value from system calls. This is necessary due
to the new way we exit from signaled system calls.
This commit is contained in:
Drew Stratford 2019-09-05 01:14:54 +12:00 committed by Andreas Kling
parent 259a1d56b0
commit 81d0f96f20
5 changed files with 104 additions and 122 deletions

View file

@ -735,6 +735,29 @@ void Process::sys$exit(int status)
ASSERT_NOT_REACHED();
}
// The trampoline preserves the current eax, pushes the signal code and
// then calls the signal handler. We do this because, when interrupting a
// blocking syscall, that syscall may return some special error code in eax;
// This error code would likely be overwritten by the signal handler, so it's
// neccessary to preserve it here.
asm(
".intel_syntax noprefix\n"
"asm_signal_trampoline:\n"
"push ebp\n"
"mov ebp, esp\n"
"push eax\n" // we have to store eax 'cause it might be the return value from a syscall
"mov eax, [ebp+12]\n" // push the signal code
"push eax\n"
"call [ebp+8]\n" // call the signal handler
"add esp, 4\n"
"mov eax, 0x2d\n" // FIXME: We shouldn't be hardcoding this.
"int 0x82\n" // sigreturn syscall
"asm_signal_trampoline_end:\n"
".att_syntax");
extern "C" void asm_signal_trampoline(void);
extern "C" void asm_signal_trampoline_end(void);
void create_signal_trampolines()
{
InterruptDisabler disabler;
@ -743,42 +766,12 @@ void create_signal_trampolines()
auto* trampoline_region = MM.allocate_user_accessible_kernel_region(PAGE_SIZE, "Signal trampolines").leak_ref();
g_return_to_ring3_from_signal_trampoline = trampoline_region->vaddr();
u8* trampoline = (u8*)asm_signal_trampoline;
u8* trampoline_end = (u8*)asm_signal_trampoline_end;
size_t trampoline_size = trampoline_end - trampoline;
u8* code_ptr = (u8*)trampoline_region->vaddr().as_ptr();
*code_ptr++ = 0x58; // pop eax (Argument to signal handler (ignored here))
*code_ptr++ = 0x5a; // pop edx (Original signal mask to restore)
*code_ptr++ = 0xb8; // mov eax, <u32>
*(u32*)code_ptr = Syscall::SC_restore_signal_mask;
code_ptr += sizeof(u32);
*code_ptr++ = 0xcd; // int 0x82
*code_ptr++ = 0x82;
*code_ptr++ = 0x83; // add esp, (stack alignment padding)
*code_ptr++ = 0xc4;
*code_ptr++ = sizeof(u32) * 3;
*code_ptr++ = 0x61; // popa
*code_ptr++ = 0x9d; // popf
*code_ptr++ = 0xc3; // ret
*code_ptr++ = 0x0f; // ud2
*code_ptr++ = 0x0b;
g_return_to_ring0_from_signal_trampoline = VirtualAddress((u32)code_ptr);
*code_ptr++ = 0x58; // pop eax (Argument to signal handler (ignored here))
*code_ptr++ = 0x5a; // pop edx (Original signal mask to restore)
*code_ptr++ = 0xb8; // mov eax, <u32>
*(u32*)code_ptr = Syscall::SC_restore_signal_mask;
code_ptr += sizeof(u32);
*code_ptr++ = 0xcd; // int 0x82
// NOTE: Stack alignment padding doesn't matter when returning to ring0.
// Nothing matters really, as we're returning by replacing the entire TSS.
*code_ptr++ = 0x82;
*code_ptr++ = 0xb8; // mov eax, <u32>
*(u32*)code_ptr = Syscall::SC_sigreturn;
code_ptr += sizeof(u32);
*code_ptr++ = 0xcd; // int 0x82
*code_ptr++ = 0x82;
*code_ptr++ = 0x0f; // ud2
*code_ptr++ = 0x0b;
memcpy(code_ptr, trampoline, trampoline_size);
trampoline_region->set_writable(false);
MM.remap_region(*trampoline_region->page_directory(), *trampoline_region);
@ -790,21 +783,27 @@ int Process::sys$restore_signal_mask(u32 mask)
return 0;
}
void Process::sys$sigreturn()
int Process::sys$sigreturn(RegisterDump& registers)
{
InterruptDisabler disabler;
Scheduler::prepare_to_modify_tss(*current);
current->m_tss = *current->m_tss_to_resume_kernel;
current->m_tss_to_resume_kernel.clear();
#ifdef SIGNAL_DEBUG
kprintf("sys$sigreturn in %s(%u)\n", name().characters(), pid());
auto& tss = current->tss();
kprintf(" -> resuming execution at %w:%x stack %w:%x flags %x cr3 %x\n", tss.cs, tss.eip, tss.ss, tss.esp, tss.eflags, tss.cr3);
#endif
current->set_state(Thread::State::Skip1SchedulerPass);
Scheduler::yield();
kprintf("sys$sigreturn failed in %s(%u)\n", name().characters(), pid());
ASSERT_NOT_REACHED();
//Here, we restore the state pushed by dispatch signal and asm_signal_trampoline.
u32* stack_ptr = (u32*)registers.esp_if_crossRing;
u32 smuggled_eax = *stack_ptr;
//pop the stored eax, ebp, return address, handler and signal code
stack_ptr += 5;
current->m_signal_mask = *stack_ptr;
stack_ptr++;
//pop edi, esi, ebp, esp, ebx, edx, ecx, eax and eip
memcpy(&registers.edi, stack_ptr, 9 * sizeof(u32));
stack_ptr += 9;
registers.eflags = *stack_ptr;
stack_ptr++;
registers.esp_if_crossRing = registers.esp;
return smuggled_eax;
}
void Process::crash(int signal, u32 eip)

View file

@ -141,7 +141,7 @@ public:
int sys$lseek(int fd, off_t, int whence);
int sys$kill(pid_t pid, int sig);
[[noreturn]] void sys$exit(int status);
[[noreturn]] void sys$sigreturn();
int sys$sigreturn(RegisterDump& registers);
pid_t sys$waitpid(pid_t, int* wstatus, int options);
void* sys$mmap(const Syscall::SC_mmap_params*);
int sys$munmap(void*, size_t size);

View file

@ -181,11 +181,7 @@ static u32 handle(RegisterDump& regs, u32 function, u32 arg1, u32 arg2, u32 arg3
case Syscall::SC_setgroups:
return current->process().sys$setgroups((ssize_t)arg1, (const gid_t*)arg2);
case Syscall::SC_sigreturn:
if (auto* tracer = current->process().tracer())
tracer->did_syscall(function, arg1, arg2, arg3, 0);
current->process().sys$sigreturn();
ASSERT_NOT_REACHED();
return 0;
return current->process().sys$sigreturn(regs);
case Syscall::SC_sigprocmask:
return current->process().sys$sigprocmask((int)arg1, (const sigset_t*)arg2, (sigset_t*)arg3);
case Syscall::SC_pipe:

View file

@ -316,6 +316,7 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal)
{
ASSERT_INTERRUPTS_DISABLED();
ASSERT(signal > 0 && signal <= 32);
ASSERT(!process().is_ring0());
#ifdef SIGNAL_DEBUG
kprintf("dispatch_signal %s(%u) <- %u\n", process().name().characters(), pid(), signal);
@ -366,6 +367,12 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal)
return ShouldUnblockThread::Yes;
}
ProcessPagingScope paging_scope(m_process);
// The userspace registers should be stored at the top of the stack
// We have to subtract 2 because the processor decrements the kernel
// stack before pushing the args.
auto& regs = *(RegisterDump*)(kernel_stack_top() - sizeof(RegisterDump) - 2);
u32 old_signal_mask = m_signal_mask;
u32 new_signal_mask = action.mask;
if (action.flags & SA_NODEFER)
@ -375,78 +382,49 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal)
m_signal_mask |= new_signal_mask;
Scheduler::prepare_to_modify_tss(*this);
u32 old_esp = regs.esp_if_crossRing;
u32 ret_eip = regs.eip;
u32 ret_eflags = regs.eflags;
u16 ret_cs = m_tss.cs;
u32 ret_eip = m_tss.eip;
u32 ret_eflags = m_tss.eflags;
bool interrupting_in_kernel = (ret_cs & 3) == 0;
push_value_on_user_stack(regs, ret_eflags);
ProcessPagingScope paging_scope(m_process);
if (interrupting_in_kernel) {
#ifdef SIGNAL_DEBUG
kprintf("dispatch_signal to %s(%u) in state=%s with return to %w:%x\n", process().name().characters(), pid(), to_string(state()), ret_cs, ret_eip);
#endif
ASSERT(is_blocked());
m_tss_to_resume_kernel = make<TSS32>(m_tss);
#ifdef SIGNAL_DEBUG
kprintf("resume tss pc: %w:%x stack: %w:%x flags: %x cr3: %x\n", m_tss_to_resume_kernel->cs, m_tss_to_resume_kernel->eip, m_tss_to_resume_kernel->ss, m_tss_to_resume_kernel->esp, m_tss_to_resume_kernel->eflags, m_tss_to_resume_kernel->cr3);
#endif
if (!m_signal_stack_user_region) {
m_signal_stack_user_region = m_process.allocate_region(VirtualAddress(), default_userspace_stack_size, String::format("User Signal Stack (Thread %d)", m_tid));
ASSERT(m_signal_stack_user_region);
}
if (!m_kernel_stack_for_signal_handler_region)
m_kernel_stack_for_signal_handler_region = MM.allocate_kernel_region(default_kernel_stack_size, String::format("Kernel Signal Stack (Thread %d)", m_tid));
m_tss.ss = 0x23;
m_tss.esp = m_signal_stack_user_region->vaddr().offset(default_userspace_stack_size).get();
m_tss.ss0 = 0x10;
m_tss.esp0 = m_kernel_stack_for_signal_handler_region->vaddr().offset(default_kernel_stack_size).get();
push_value_on_stack(0);
} else {
push_value_on_stack(ret_eip);
push_value_on_stack(ret_eflags);
// PUSHA
u32 old_esp = m_tss.esp;
push_value_on_stack(m_tss.eax);
push_value_on_stack(m_tss.ecx);
push_value_on_stack(m_tss.edx);
push_value_on_stack(m_tss.ebx);
push_value_on_stack(old_esp);
push_value_on_stack(m_tss.ebp);
push_value_on_stack(m_tss.esi);
push_value_on_stack(m_tss.edi);
// Align the stack.
m_tss.esp -= 12;
}
push_value_on_user_stack(regs, ret_eip);
push_value_on_user_stack(regs, regs.eax);
push_value_on_user_stack(regs, regs.ecx);
push_value_on_user_stack(regs, regs.edx);
push_value_on_user_stack(regs, regs.ebx);
push_value_on_user_stack(regs, old_esp);
push_value_on_user_stack(regs, regs.ebp);
push_value_on_user_stack(regs, regs.esi);
push_value_on_user_stack(regs, regs.edi);
// PUSH old_signal_mask
push_value_on_stack(old_signal_mask);
push_value_on_user_stack(regs, old_signal_mask);
push_value_on_user_stack(regs, signal);
push_value_on_user_stack(regs, handler_vaddr.get());
push_value_on_user_stack(regs, 0); //push fake return address
regs.eip = g_return_to_ring3_from_signal_trampoline.get();
// FIXME: Should we worry about the stack being 16 byte aligned when entering a signal handler?
// If we're not blocking we need to update the tss so
// that the far jump in Scheduler goes to the proper location.
// When we are blocking we don't update the TSS as we want to
// resume at the blocker and descend the stack, cleaning up nicely.
if (!in_kernel()) {
Scheduler::prepare_to_modify_tss(*this);
m_tss.cs = 0x1b;
m_tss.ds = 0x23;
m_tss.es = 0x23;
m_tss.fs = 0x23;
m_tss.gs = 0x23;
m_tss.eip = handler_vaddr.get();
// FIXME: Should we worry about the stack being 16 byte aligned when entering a signal handler?
push_value_on_stack(signal);
if (interrupting_in_kernel)
push_value_on_stack(g_return_to_ring0_from_signal_trampoline.get());
else
push_value_on_stack(g_return_to_ring3_from_signal_trampoline.get());
ASSERT((m_tss.esp % 16) == 0);
m_tss.eip = regs.eip;
m_tss.esp = regs.esp_if_crossRing;
// FIXME: This state is such a hack. It avoids trouble if 'current' is the process receiving a signal.
set_state(Skip1SchedulerPass);
}
#ifdef SIGNAL_DEBUG
kprintf("signal: Okay, %s(%u) {%s} has been primed with signal handler %w:%x\n", process().name().characters(), pid(), to_string(state()), m_tss.cs, m_tss.eip);
@ -462,6 +440,13 @@ void Thread::set_default_signal_dispositions()
m_signal_action_data[SIGWINCH].handler_or_sigaction = VirtualAddress((u32)SIG_IGN);
}
void Thread::push_value_on_user_stack(RegisterDump& registers, u32 value)
{
registers.esp_if_crossRing -= 4;
u32* stack_ptr = (u32*)registers.esp_if_crossRing;
*stack_ptr = value;
}
void Thread::push_value_on_stack(u32 value)
{
m_tss.esp -= 4;

View file

@ -6,9 +6,9 @@
#include <AK/OwnPtr.h>
#include <AK/RefPtr.h>
#include <AK/Vector.h>
#include <Kernel/Scheduler.h>
#include <Kernel/Arch/i386/CPU.h>
#include <Kernel/KResult.h>
#include <Kernel/Scheduler.h>
#include <Kernel/UnixTypes.h>
#include <Kernel/VM/Region.h>
#include <LibC/fd_set.h>
@ -73,6 +73,7 @@ public:
virtual const char* state_string() const = 0;
void set_interrupted_by_signal() { m_was_interrupted_while_blocked = true; }
bool was_interrupted_by_signal() const { return m_was_interrupted_while_blocked; }
private:
bool m_was_interrupted_while_blocked { false };
friend class Thread;
@ -220,8 +221,8 @@ public:
InterruptedBySignal,
};
template <typename T, class... Args>
[[nodiscard]] BlockResult block(Args&& ... args)
template<typename T, class... Args>
[[nodiscard]] BlockResult block(Args&&... args)
{
// We should never be blocking a blocked (or otherwise non-active) thread.
ASSERT(state() == Thread::Running);
@ -295,6 +296,7 @@ public:
void set_has_used_fpu(bool b) { m_has_used_fpu = b; }
void set_default_signal_dispositions();
void push_value_on_user_stack(RegisterDump&, u32);
void push_value_on_stack(u32);
void make_userspace_stack_for_main_thread(Vector<String> arguments, Vector<String> environment);
void make_userspace_stack_for_secondary_thread(void* argument);