mirror of
https://github.com/RGBCube/serenity
synced 2025-07-23 13:57:35 +00:00
Kernel: Use user stack for signal handlers.
This commit drastically changes how signals are handled. In the case that an unblocked thread is signaled it works much in the same way as previously. However, when a blocking syscall is interrupted, we set up the signal trampoline on the user stack, complete the blocking syscall, return down the kernel stack and then jump to the handler. This means that from the kernel stack's perspective, we only ever get one system call deep. The signal trampoline has also been changed in order to properly store the return value from system calls. This is necessary due to the new way we exit from signaled system calls.
This commit is contained in:
parent
259a1d56b0
commit
81d0f96f20
5 changed files with 104 additions and 122 deletions
|
@ -735,6 +735,29 @@ void Process::sys$exit(int status)
|
||||||
ASSERT_NOT_REACHED();
|
ASSERT_NOT_REACHED();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The trampoline preserves the current eax, pushes the signal code and
|
||||||
|
// then calls the signal handler. We do this because, when interrupting a
|
||||||
|
// blocking syscall, that syscall may return some special error code in eax;
|
||||||
|
// This error code would likely be overwritten by the signal handler, so it's
|
||||||
|
// neccessary to preserve it here.
|
||||||
|
asm(
|
||||||
|
".intel_syntax noprefix\n"
|
||||||
|
"asm_signal_trampoline:\n"
|
||||||
|
"push ebp\n"
|
||||||
|
"mov ebp, esp\n"
|
||||||
|
"push eax\n" // we have to store eax 'cause it might be the return value from a syscall
|
||||||
|
"mov eax, [ebp+12]\n" // push the signal code
|
||||||
|
"push eax\n"
|
||||||
|
"call [ebp+8]\n" // call the signal handler
|
||||||
|
"add esp, 4\n"
|
||||||
|
"mov eax, 0x2d\n" // FIXME: We shouldn't be hardcoding this.
|
||||||
|
"int 0x82\n" // sigreturn syscall
|
||||||
|
"asm_signal_trampoline_end:\n"
|
||||||
|
".att_syntax");
|
||||||
|
|
||||||
|
extern "C" void asm_signal_trampoline(void);
|
||||||
|
extern "C" void asm_signal_trampoline_end(void);
|
||||||
|
|
||||||
void create_signal_trampolines()
|
void create_signal_trampolines()
|
||||||
{
|
{
|
||||||
InterruptDisabler disabler;
|
InterruptDisabler disabler;
|
||||||
|
@ -743,42 +766,12 @@ void create_signal_trampolines()
|
||||||
auto* trampoline_region = MM.allocate_user_accessible_kernel_region(PAGE_SIZE, "Signal trampolines").leak_ref();
|
auto* trampoline_region = MM.allocate_user_accessible_kernel_region(PAGE_SIZE, "Signal trampolines").leak_ref();
|
||||||
g_return_to_ring3_from_signal_trampoline = trampoline_region->vaddr();
|
g_return_to_ring3_from_signal_trampoline = trampoline_region->vaddr();
|
||||||
|
|
||||||
|
u8* trampoline = (u8*)asm_signal_trampoline;
|
||||||
|
u8* trampoline_end = (u8*)asm_signal_trampoline_end;
|
||||||
|
size_t trampoline_size = trampoline_end - trampoline;
|
||||||
|
|
||||||
u8* code_ptr = (u8*)trampoline_region->vaddr().as_ptr();
|
u8* code_ptr = (u8*)trampoline_region->vaddr().as_ptr();
|
||||||
*code_ptr++ = 0x58; // pop eax (Argument to signal handler (ignored here))
|
memcpy(code_ptr, trampoline, trampoline_size);
|
||||||
*code_ptr++ = 0x5a; // pop edx (Original signal mask to restore)
|
|
||||||
*code_ptr++ = 0xb8; // mov eax, <u32>
|
|
||||||
*(u32*)code_ptr = Syscall::SC_restore_signal_mask;
|
|
||||||
code_ptr += sizeof(u32);
|
|
||||||
*code_ptr++ = 0xcd; // int 0x82
|
|
||||||
*code_ptr++ = 0x82;
|
|
||||||
|
|
||||||
*code_ptr++ = 0x83; // add esp, (stack alignment padding)
|
|
||||||
*code_ptr++ = 0xc4;
|
|
||||||
*code_ptr++ = sizeof(u32) * 3;
|
|
||||||
|
|
||||||
*code_ptr++ = 0x61; // popa
|
|
||||||
*code_ptr++ = 0x9d; // popf
|
|
||||||
*code_ptr++ = 0xc3; // ret
|
|
||||||
*code_ptr++ = 0x0f; // ud2
|
|
||||||
*code_ptr++ = 0x0b;
|
|
||||||
|
|
||||||
g_return_to_ring0_from_signal_trampoline = VirtualAddress((u32)code_ptr);
|
|
||||||
*code_ptr++ = 0x58; // pop eax (Argument to signal handler (ignored here))
|
|
||||||
*code_ptr++ = 0x5a; // pop edx (Original signal mask to restore)
|
|
||||||
*code_ptr++ = 0xb8; // mov eax, <u32>
|
|
||||||
*(u32*)code_ptr = Syscall::SC_restore_signal_mask;
|
|
||||||
code_ptr += sizeof(u32);
|
|
||||||
*code_ptr++ = 0xcd; // int 0x82
|
|
||||||
// NOTE: Stack alignment padding doesn't matter when returning to ring0.
|
|
||||||
// Nothing matters really, as we're returning by replacing the entire TSS.
|
|
||||||
*code_ptr++ = 0x82;
|
|
||||||
*code_ptr++ = 0xb8; // mov eax, <u32>
|
|
||||||
*(u32*)code_ptr = Syscall::SC_sigreturn;
|
|
||||||
code_ptr += sizeof(u32);
|
|
||||||
*code_ptr++ = 0xcd; // int 0x82
|
|
||||||
*code_ptr++ = 0x82;
|
|
||||||
*code_ptr++ = 0x0f; // ud2
|
|
||||||
*code_ptr++ = 0x0b;
|
|
||||||
|
|
||||||
trampoline_region->set_writable(false);
|
trampoline_region->set_writable(false);
|
||||||
MM.remap_region(*trampoline_region->page_directory(), *trampoline_region);
|
MM.remap_region(*trampoline_region->page_directory(), *trampoline_region);
|
||||||
|
@ -790,21 +783,27 @@ int Process::sys$restore_signal_mask(u32 mask)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Process::sys$sigreturn()
|
int Process::sys$sigreturn(RegisterDump& registers)
|
||||||
{
|
{
|
||||||
InterruptDisabler disabler;
|
//Here, we restore the state pushed by dispatch signal and asm_signal_trampoline.
|
||||||
Scheduler::prepare_to_modify_tss(*current);
|
u32* stack_ptr = (u32*)registers.esp_if_crossRing;
|
||||||
current->m_tss = *current->m_tss_to_resume_kernel;
|
u32 smuggled_eax = *stack_ptr;
|
||||||
current->m_tss_to_resume_kernel.clear();
|
|
||||||
#ifdef SIGNAL_DEBUG
|
//pop the stored eax, ebp, return address, handler and signal code
|
||||||
kprintf("sys$sigreturn in %s(%u)\n", name().characters(), pid());
|
stack_ptr += 5;
|
||||||
auto& tss = current->tss();
|
|
||||||
kprintf(" -> resuming execution at %w:%x stack %w:%x flags %x cr3 %x\n", tss.cs, tss.eip, tss.ss, tss.esp, tss.eflags, tss.cr3);
|
current->m_signal_mask = *stack_ptr;
|
||||||
#endif
|
stack_ptr++;
|
||||||
current->set_state(Thread::State::Skip1SchedulerPass);
|
|
||||||
Scheduler::yield();
|
//pop edi, esi, ebp, esp, ebx, edx, ecx, eax and eip
|
||||||
kprintf("sys$sigreturn failed in %s(%u)\n", name().characters(), pid());
|
memcpy(®isters.edi, stack_ptr, 9 * sizeof(u32));
|
||||||
ASSERT_NOT_REACHED();
|
stack_ptr += 9;
|
||||||
|
|
||||||
|
registers.eflags = *stack_ptr;
|
||||||
|
stack_ptr++;
|
||||||
|
|
||||||
|
registers.esp_if_crossRing = registers.esp;
|
||||||
|
return smuggled_eax;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Process::crash(int signal, u32 eip)
|
void Process::crash(int signal, u32 eip)
|
||||||
|
|
|
@ -141,7 +141,7 @@ public:
|
||||||
int sys$lseek(int fd, off_t, int whence);
|
int sys$lseek(int fd, off_t, int whence);
|
||||||
int sys$kill(pid_t pid, int sig);
|
int sys$kill(pid_t pid, int sig);
|
||||||
[[noreturn]] void sys$exit(int status);
|
[[noreturn]] void sys$exit(int status);
|
||||||
[[noreturn]] void sys$sigreturn();
|
int sys$sigreturn(RegisterDump& registers);
|
||||||
pid_t sys$waitpid(pid_t, int* wstatus, int options);
|
pid_t sys$waitpid(pid_t, int* wstatus, int options);
|
||||||
void* sys$mmap(const Syscall::SC_mmap_params*);
|
void* sys$mmap(const Syscall::SC_mmap_params*);
|
||||||
int sys$munmap(void*, size_t size);
|
int sys$munmap(void*, size_t size);
|
||||||
|
|
|
@ -181,11 +181,7 @@ static u32 handle(RegisterDump& regs, u32 function, u32 arg1, u32 arg2, u32 arg3
|
||||||
case Syscall::SC_setgroups:
|
case Syscall::SC_setgroups:
|
||||||
return current->process().sys$setgroups((ssize_t)arg1, (const gid_t*)arg2);
|
return current->process().sys$setgroups((ssize_t)arg1, (const gid_t*)arg2);
|
||||||
case Syscall::SC_sigreturn:
|
case Syscall::SC_sigreturn:
|
||||||
if (auto* tracer = current->process().tracer())
|
return current->process().sys$sigreturn(regs);
|
||||||
tracer->did_syscall(function, arg1, arg2, arg3, 0);
|
|
||||||
current->process().sys$sigreturn();
|
|
||||||
ASSERT_NOT_REACHED();
|
|
||||||
return 0;
|
|
||||||
case Syscall::SC_sigprocmask:
|
case Syscall::SC_sigprocmask:
|
||||||
return current->process().sys$sigprocmask((int)arg1, (const sigset_t*)arg2, (sigset_t*)arg3);
|
return current->process().sys$sigprocmask((int)arg1, (const sigset_t*)arg2, (sigset_t*)arg3);
|
||||||
case Syscall::SC_pipe:
|
case Syscall::SC_pipe:
|
||||||
|
|
|
@ -316,6 +316,7 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal)
|
||||||
{
|
{
|
||||||
ASSERT_INTERRUPTS_DISABLED();
|
ASSERT_INTERRUPTS_DISABLED();
|
||||||
ASSERT(signal > 0 && signal <= 32);
|
ASSERT(signal > 0 && signal <= 32);
|
||||||
|
ASSERT(!process().is_ring0());
|
||||||
|
|
||||||
#ifdef SIGNAL_DEBUG
|
#ifdef SIGNAL_DEBUG
|
||||||
kprintf("dispatch_signal %s(%u) <- %u\n", process().name().characters(), pid(), signal);
|
kprintf("dispatch_signal %s(%u) <- %u\n", process().name().characters(), pid(), signal);
|
||||||
|
@ -366,6 +367,12 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal)
|
||||||
return ShouldUnblockThread::Yes;
|
return ShouldUnblockThread::Yes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ProcessPagingScope paging_scope(m_process);
|
||||||
|
// The userspace registers should be stored at the top of the stack
|
||||||
|
// We have to subtract 2 because the processor decrements the kernel
|
||||||
|
// stack before pushing the args.
|
||||||
|
auto& regs = *(RegisterDump*)(kernel_stack_top() - sizeof(RegisterDump) - 2);
|
||||||
|
|
||||||
u32 old_signal_mask = m_signal_mask;
|
u32 old_signal_mask = m_signal_mask;
|
||||||
u32 new_signal_mask = action.mask;
|
u32 new_signal_mask = action.mask;
|
||||||
if (action.flags & SA_NODEFER)
|
if (action.flags & SA_NODEFER)
|
||||||
|
@ -375,78 +382,49 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal)
|
||||||
|
|
||||||
m_signal_mask |= new_signal_mask;
|
m_signal_mask |= new_signal_mask;
|
||||||
|
|
||||||
Scheduler::prepare_to_modify_tss(*this);
|
u32 old_esp = regs.esp_if_crossRing;
|
||||||
|
u32 ret_eip = regs.eip;
|
||||||
|
u32 ret_eflags = regs.eflags;
|
||||||
|
|
||||||
u16 ret_cs = m_tss.cs;
|
push_value_on_user_stack(regs, ret_eflags);
|
||||||
u32 ret_eip = m_tss.eip;
|
|
||||||
u32 ret_eflags = m_tss.eflags;
|
|
||||||
bool interrupting_in_kernel = (ret_cs & 3) == 0;
|
|
||||||
|
|
||||||
ProcessPagingScope paging_scope(m_process);
|
push_value_on_user_stack(regs, ret_eip);
|
||||||
|
push_value_on_user_stack(regs, regs.eax);
|
||||||
if (interrupting_in_kernel) {
|
push_value_on_user_stack(regs, regs.ecx);
|
||||||
#ifdef SIGNAL_DEBUG
|
push_value_on_user_stack(regs, regs.edx);
|
||||||
kprintf("dispatch_signal to %s(%u) in state=%s with return to %w:%x\n", process().name().characters(), pid(), to_string(state()), ret_cs, ret_eip);
|
push_value_on_user_stack(regs, regs.ebx);
|
||||||
#endif
|
push_value_on_user_stack(regs, old_esp);
|
||||||
ASSERT(is_blocked());
|
push_value_on_user_stack(regs, regs.ebp);
|
||||||
m_tss_to_resume_kernel = make<TSS32>(m_tss);
|
push_value_on_user_stack(regs, regs.esi);
|
||||||
#ifdef SIGNAL_DEBUG
|
push_value_on_user_stack(regs, regs.edi);
|
||||||
kprintf("resume tss pc: %w:%x stack: %w:%x flags: %x cr3: %x\n", m_tss_to_resume_kernel->cs, m_tss_to_resume_kernel->eip, m_tss_to_resume_kernel->ss, m_tss_to_resume_kernel->esp, m_tss_to_resume_kernel->eflags, m_tss_to_resume_kernel->cr3);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!m_signal_stack_user_region) {
|
|
||||||
m_signal_stack_user_region = m_process.allocate_region(VirtualAddress(), default_userspace_stack_size, String::format("User Signal Stack (Thread %d)", m_tid));
|
|
||||||
ASSERT(m_signal_stack_user_region);
|
|
||||||
}
|
|
||||||
if (!m_kernel_stack_for_signal_handler_region)
|
|
||||||
m_kernel_stack_for_signal_handler_region = MM.allocate_kernel_region(default_kernel_stack_size, String::format("Kernel Signal Stack (Thread %d)", m_tid));
|
|
||||||
m_tss.ss = 0x23;
|
|
||||||
m_tss.esp = m_signal_stack_user_region->vaddr().offset(default_userspace_stack_size).get();
|
|
||||||
m_tss.ss0 = 0x10;
|
|
||||||
m_tss.esp0 = m_kernel_stack_for_signal_handler_region->vaddr().offset(default_kernel_stack_size).get();
|
|
||||||
|
|
||||||
push_value_on_stack(0);
|
|
||||||
} else {
|
|
||||||
push_value_on_stack(ret_eip);
|
|
||||||
push_value_on_stack(ret_eflags);
|
|
||||||
|
|
||||||
// PUSHA
|
|
||||||
u32 old_esp = m_tss.esp;
|
|
||||||
push_value_on_stack(m_tss.eax);
|
|
||||||
push_value_on_stack(m_tss.ecx);
|
|
||||||
push_value_on_stack(m_tss.edx);
|
|
||||||
push_value_on_stack(m_tss.ebx);
|
|
||||||
push_value_on_stack(old_esp);
|
|
||||||
push_value_on_stack(m_tss.ebp);
|
|
||||||
push_value_on_stack(m_tss.esi);
|
|
||||||
push_value_on_stack(m_tss.edi);
|
|
||||||
|
|
||||||
// Align the stack.
|
|
||||||
m_tss.esp -= 12;
|
|
||||||
}
|
|
||||||
|
|
||||||
// PUSH old_signal_mask
|
// PUSH old_signal_mask
|
||||||
push_value_on_stack(old_signal_mask);
|
push_value_on_user_stack(regs, old_signal_mask);
|
||||||
|
|
||||||
m_tss.cs = 0x1b;
|
push_value_on_user_stack(regs, signal);
|
||||||
m_tss.ds = 0x23;
|
push_value_on_user_stack(regs, handler_vaddr.get());
|
||||||
m_tss.es = 0x23;
|
push_value_on_user_stack(regs, 0); //push fake return address
|
||||||
m_tss.fs = 0x23;
|
|
||||||
m_tss.gs = 0x23;
|
regs.eip = g_return_to_ring3_from_signal_trampoline.get();
|
||||||
m_tss.eip = handler_vaddr.get();
|
|
||||||
|
|
||||||
// FIXME: Should we worry about the stack being 16 byte aligned when entering a signal handler?
|
// FIXME: Should we worry about the stack being 16 byte aligned when entering a signal handler?
|
||||||
push_value_on_stack(signal);
|
|
||||||
|
|
||||||
if (interrupting_in_kernel)
|
// If we're not blocking we need to update the tss so
|
||||||
push_value_on_stack(g_return_to_ring0_from_signal_trampoline.get());
|
// that the far jump in Scheduler goes to the proper location.
|
||||||
else
|
// When we are blocking we don't update the TSS as we want to
|
||||||
push_value_on_stack(g_return_to_ring3_from_signal_trampoline.get());
|
// resume at the blocker and descend the stack, cleaning up nicely.
|
||||||
|
if (!in_kernel()) {
|
||||||
ASSERT((m_tss.esp % 16) == 0);
|
Scheduler::prepare_to_modify_tss(*this);
|
||||||
|
m_tss.cs = 0x1b;
|
||||||
// FIXME: This state is such a hack. It avoids trouble if 'current' is the process receiving a signal.
|
m_tss.ds = 0x23;
|
||||||
set_state(Skip1SchedulerPass);
|
m_tss.es = 0x23;
|
||||||
|
m_tss.fs = 0x23;
|
||||||
|
m_tss.gs = 0x23;
|
||||||
|
m_tss.eip = regs.eip;
|
||||||
|
m_tss.esp = regs.esp_if_crossRing;
|
||||||
|
// FIXME: This state is such a hack. It avoids trouble if 'current' is the process receiving a signal.
|
||||||
|
set_state(Skip1SchedulerPass);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef SIGNAL_DEBUG
|
#ifdef SIGNAL_DEBUG
|
||||||
kprintf("signal: Okay, %s(%u) {%s} has been primed with signal handler %w:%x\n", process().name().characters(), pid(), to_string(state()), m_tss.cs, m_tss.eip);
|
kprintf("signal: Okay, %s(%u) {%s} has been primed with signal handler %w:%x\n", process().name().characters(), pid(), to_string(state()), m_tss.cs, m_tss.eip);
|
||||||
|
@ -462,6 +440,13 @@ void Thread::set_default_signal_dispositions()
|
||||||
m_signal_action_data[SIGWINCH].handler_or_sigaction = VirtualAddress((u32)SIG_IGN);
|
m_signal_action_data[SIGWINCH].handler_or_sigaction = VirtualAddress((u32)SIG_IGN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Thread::push_value_on_user_stack(RegisterDump& registers, u32 value)
|
||||||
|
{
|
||||||
|
registers.esp_if_crossRing -= 4;
|
||||||
|
u32* stack_ptr = (u32*)registers.esp_if_crossRing;
|
||||||
|
*stack_ptr = value;
|
||||||
|
}
|
||||||
|
|
||||||
void Thread::push_value_on_stack(u32 value)
|
void Thread::push_value_on_stack(u32 value)
|
||||||
{
|
{
|
||||||
m_tss.esp -= 4;
|
m_tss.esp -= 4;
|
||||||
|
|
|
@ -6,9 +6,9 @@
|
||||||
#include <AK/OwnPtr.h>
|
#include <AK/OwnPtr.h>
|
||||||
#include <AK/RefPtr.h>
|
#include <AK/RefPtr.h>
|
||||||
#include <AK/Vector.h>
|
#include <AK/Vector.h>
|
||||||
#include <Kernel/Scheduler.h>
|
|
||||||
#include <Kernel/Arch/i386/CPU.h>
|
#include <Kernel/Arch/i386/CPU.h>
|
||||||
#include <Kernel/KResult.h>
|
#include <Kernel/KResult.h>
|
||||||
|
#include <Kernel/Scheduler.h>
|
||||||
#include <Kernel/UnixTypes.h>
|
#include <Kernel/UnixTypes.h>
|
||||||
#include <Kernel/VM/Region.h>
|
#include <Kernel/VM/Region.h>
|
||||||
#include <LibC/fd_set.h>
|
#include <LibC/fd_set.h>
|
||||||
|
@ -73,6 +73,7 @@ public:
|
||||||
virtual const char* state_string() const = 0;
|
virtual const char* state_string() const = 0;
|
||||||
void set_interrupted_by_signal() { m_was_interrupted_while_blocked = true; }
|
void set_interrupted_by_signal() { m_was_interrupted_while_blocked = true; }
|
||||||
bool was_interrupted_by_signal() const { return m_was_interrupted_while_blocked; }
|
bool was_interrupted_by_signal() const { return m_was_interrupted_while_blocked; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool m_was_interrupted_while_blocked { false };
|
bool m_was_interrupted_while_blocked { false };
|
||||||
friend class Thread;
|
friend class Thread;
|
||||||
|
@ -220,8 +221,8 @@ public:
|
||||||
InterruptedBySignal,
|
InterruptedBySignal,
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T, class... Args>
|
template<typename T, class... Args>
|
||||||
[[nodiscard]] BlockResult block(Args&& ... args)
|
[[nodiscard]] BlockResult block(Args&&... args)
|
||||||
{
|
{
|
||||||
// We should never be blocking a blocked (or otherwise non-active) thread.
|
// We should never be blocking a blocked (or otherwise non-active) thread.
|
||||||
ASSERT(state() == Thread::Running);
|
ASSERT(state() == Thread::Running);
|
||||||
|
@ -295,6 +296,7 @@ public:
|
||||||
void set_has_used_fpu(bool b) { m_has_used_fpu = b; }
|
void set_has_used_fpu(bool b) { m_has_used_fpu = b; }
|
||||||
|
|
||||||
void set_default_signal_dispositions();
|
void set_default_signal_dispositions();
|
||||||
|
void push_value_on_user_stack(RegisterDump&, u32);
|
||||||
void push_value_on_stack(u32);
|
void push_value_on_stack(u32);
|
||||||
void make_userspace_stack_for_main_thread(Vector<String> arguments, Vector<String> environment);
|
void make_userspace_stack_for_main_thread(Vector<String> arguments, Vector<String> environment);
|
||||||
void make_userspace_stack_for_secondary_thread(void* argument);
|
void make_userspace_stack_for_secondary_thread(void* argument);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue