mirror of
https://github.com/RGBCube/serenity
synced 2025-05-15 13:54:58 +00:00

Hook the kernel page fault handler and capture page fault events when the fault has a current thread attached in TLS. We capture the eip and ebp so we can unwind the stack and locate which pieces of code are generating the most page faults. Co-authored-by: Gunnar Beutner <gbeutner@serenityos.org>
2499 lines
90 KiB
C++
2499 lines
90 KiB
C++
/*
|
|
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/Assertions.h>
|
|
#include <AK/ScopeGuard.h>
|
|
#include <AK/String.h>
|
|
#include <AK/StringBuilder.h>
|
|
#include <AK/Types.h>
|
|
#include <Kernel/Arch/x86/CPU.h>
|
|
#include <Kernel/Arch/x86/ISRStubs.h>
|
|
#include <Kernel/Arch/x86/ProcessorInfo.h>
|
|
#include <Kernel/Arch/x86/SafeMem.h>
|
|
#include <Kernel/Assertions.h>
|
|
#include <Kernel/Debug.h>
|
|
#include <Kernel/IO.h>
|
|
#include <Kernel/Interrupts/APIC.h>
|
|
#include <Kernel/Interrupts/GenericInterruptHandler.h>
|
|
#include <Kernel/Interrupts/SharedIRQHandler.h>
|
|
#include <Kernel/Interrupts/SpuriousInterruptHandler.h>
|
|
#include <Kernel/Interrupts/UnhandledInterruptHandler.h>
|
|
#include <Kernel/KSyms.h>
|
|
#include <Kernel/Panic.h>
|
|
#include <Kernel/PerformanceManager.h>
|
|
#include <Kernel/Process.h>
|
|
#include <Kernel/Random.h>
|
|
#include <Kernel/Thread.h>
|
|
#include <Kernel/VM/MemoryManager.h>
|
|
#include <Kernel/VM/PageDirectory.h>
|
|
#include <Kernel/VM/ProcessPagingScope.h>
|
|
#include <LibC/mallocdefs.h>
|
|
|
|
extern FlatPtr start_of_unmap_after_init;
|
|
extern FlatPtr end_of_unmap_after_init;
|
|
extern FlatPtr start_of_ro_after_init;
|
|
extern FlatPtr end_of_ro_after_init;
|
|
|
|
namespace Kernel {
|
|
|
|
READONLY_AFTER_INIT static DescriptorTablePointer s_idtr;
|
|
READONLY_AFTER_INIT static IDTEntry s_idt[256];
|
|
|
|
static GenericInterruptHandler* s_interrupt_handler[GENERIC_INTERRUPT_HANDLERS_COUNT];
|
|
|
|
static EntropySource s_entropy_source_interrupts { EntropySource::Static::Interrupts };
|
|
|
|
// The compiler can't see the calls to these functions inside assembly.
|
|
// Declare them, to avoid dead code warnings.
|
|
extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __attribute__((used));
|
|
extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap) __attribute__((used));
|
|
extern "C" u32 do_init_context(Thread* thread, u32 flags) __attribute__((used));
|
|
extern "C" void exit_kernel_thread(void);
|
|
extern "C" void pre_init_finished(void) __attribute__((used));
|
|
extern "C" void post_init_finished(void) __attribute__((used));
|
|
extern "C" void handle_interrupt(TrapFrame*) __attribute__((used));
|
|
|
|
// clang-format off
|
|
|
|
#if ARCH(I386)
|
|
#define EH_ENTRY(ec, title) \
|
|
extern "C" void title##_asm_entry(); \
|
|
extern "C" void title##_handler(TrapFrame*) __attribute__((used)); \
|
|
asm( \
|
|
".globl " #title "_asm_entry\n" \
|
|
"" #title "_asm_entry: \n" \
|
|
" pusha\n" \
|
|
" pushl %ds\n" \
|
|
" pushl %es\n" \
|
|
" pushl %fs\n" \
|
|
" pushl %gs\n" \
|
|
" pushl %ss\n" \
|
|
" mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n" \
|
|
" mov %ax, %ds\n" \
|
|
" mov %ax, %es\n" \
|
|
" mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n" \
|
|
" mov %ax, %fs\n" \
|
|
" pushl %esp \n" /* set TrapFrame::regs */ \
|
|
" subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n" \
|
|
" pushl %esp \n" \
|
|
" cld\n" \
|
|
" call enter_trap_no_irq \n" \
|
|
" call " #title "_handler\n" \
|
|
" jmp common_trap_exit \n");
|
|
|
|
#define EH_ENTRY_NO_CODE(ec, title) \
|
|
extern "C" void title##_asm_entry(); \
|
|
extern "C" void title##_handler(TrapFrame*) __attribute__((used)); \
|
|
asm( \
|
|
".globl " #title "_asm_entry\n" \
|
|
"" #title "_asm_entry: \n" \
|
|
" pushl $0x0\n" \
|
|
" pusha\n" \
|
|
" pushl %ds\n" \
|
|
" pushl %es\n" \
|
|
" pushl %fs\n" \
|
|
" pushl %gs\n" \
|
|
" pushl %ss\n" \
|
|
" mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n" \
|
|
" mov %ax, %ds\n" \
|
|
" mov %ax, %es\n" \
|
|
" mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n" \
|
|
" mov %ax, %fs\n" \
|
|
" pushl %esp \n" /* set TrapFrame::regs */ \
|
|
" subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n" \
|
|
" pushl %esp \n" \
|
|
" cld\n" \
|
|
" call enter_trap_no_irq \n" \
|
|
" call " #title "_handler\n" \
|
|
" jmp common_trap_exit \n");
|
|
|
|
#elif ARCH(X86_64)
|
|
#define EH_ENTRY(ec, title) \
|
|
extern "C" void title##_asm_entry(); \
|
|
extern "C" void title##_handler(TrapFrame*); \
|
|
asm( \
|
|
".globl " #title "_asm_entry\n" \
|
|
"" #title "_asm_entry: \n" \
|
|
" cli;hlt;\n" \
|
|
);
|
|
|
|
#define EH_ENTRY_NO_CODE(ec, title) \
|
|
extern "C" void title##_handler(TrapFrame*); \
|
|
extern "C" void title##_asm_entry(); \
|
|
asm( \
|
|
".globl " #title "_asm_entry\n" \
|
|
"" #title "_asm_entry: \n" \
|
|
" cli;hlt;\n" \
|
|
);
|
|
#endif
|
|
|
|
// clang-format on
|
|
|
|
static void dump(const RegisterState& regs)
|
|
{
|
|
u16 ss;
|
|
u32 esp;
|
|
|
|
if (!(regs.cs & 3)) {
|
|
ss = regs.ss;
|
|
esp = regs.esp;
|
|
} else {
|
|
ss = regs.userspace_ss;
|
|
esp = regs.userspace_esp;
|
|
}
|
|
|
|
dbgln("Exception code: {:04x} (isr: {:04x})", regs.exception_code, regs.isr_number);
|
|
dbgln(" pc={:04x}:{:08x} eflags={:08x}", (u16)regs.cs, regs.eip, regs.eflags);
|
|
dbgln(" stack={:04x}:{:08x}", ss, esp);
|
|
dbgln(" ds={:04x} es={:04x} fs={:04x} gs={:04x}", (u16)regs.ds, (u16)regs.es, (u16)regs.fs, (u16)regs.gs);
|
|
dbgln(" eax={:08x} ebx={:08x} ecx={:08x} edx={:08x}", regs.eax, regs.ebx, regs.ecx, regs.edx);
|
|
dbgln(" ebp={:08x} esp={:08x} esi={:08x} edi={:08x}", regs.ebp, regs.esp, regs.esi, regs.edi);
|
|
dbgln(" cr0={:08x} cr2={:08x} cr3={:08x} cr4={:08x}", read_cr0(), read_cr2(), read_cr3(), read_cr4());
|
|
}
|
|
|
|
void handle_crash(RegisterState& regs, const char* description, int signal, bool out_of_memory)
|
|
{
|
|
auto process = Process::current();
|
|
if (!process) {
|
|
PANIC("{} with !current", description);
|
|
}
|
|
|
|
// If a process crashed while inspecting another process,
|
|
// make sure we switch back to the right page tables.
|
|
MM.enter_process_paging_scope(*process);
|
|
|
|
dmesgln("CRASH: CPU #{} {} in ring {}", Processor::id(), description, (regs.cs & 3));
|
|
dump(regs);
|
|
|
|
if (!(regs.cs & 3)) {
|
|
PANIC("Crash in ring 0");
|
|
}
|
|
|
|
process->crash(signal, regs.eip, out_of_memory);
|
|
}
|
|
|
|
EH_ENTRY_NO_CODE(6, illegal_instruction);
|
|
void illegal_instruction_handler(TrapFrame* trap)
|
|
{
|
|
clac();
|
|
handle_crash(*trap->regs, "Illegal instruction", SIGILL);
|
|
}
|
|
|
|
EH_ENTRY_NO_CODE(0, divide_error);
|
|
void divide_error_handler(TrapFrame* trap)
|
|
{
|
|
clac();
|
|
handle_crash(*trap->regs, "Divide error", SIGFPE);
|
|
}
|
|
|
|
EH_ENTRY(13, general_protection_fault);
|
|
void general_protection_fault_handler(TrapFrame* trap)
|
|
{
|
|
clac();
|
|
handle_crash(*trap->regs, "General protection fault", SIGSEGV);
|
|
}
|
|
|
|
// 7: FPU not available exception
|
|
EH_ENTRY_NO_CODE(7, fpu_exception);
|
|
void fpu_exception_handler(TrapFrame*)
|
|
{
|
|
// Just clear the TS flag. We've already restored the FPU state eagerly.
|
|
// FIXME: It would be nice if we didn't have to do this at all.
|
|
asm volatile("clts");
|
|
}
|
|
|
|
// 14: Page Fault
|
|
EH_ENTRY(14, page_fault);
|
|
void page_fault_handler(TrapFrame* trap)
|
|
{
|
|
clac();
|
|
|
|
auto& regs = *trap->regs;
|
|
auto fault_address = read_cr2();
|
|
|
|
if constexpr (PAGE_FAULT_DEBUG) {
|
|
u32 fault_page_directory = read_cr3();
|
|
dbgln("CPU #{} ring {} {} page fault in PD={:#x}, {}{} {}",
|
|
Processor::is_initialized() ? Processor::id() : 0,
|
|
regs.cs & 3,
|
|
regs.exception_code & 1 ? "PV" : "NP",
|
|
fault_page_directory,
|
|
regs.exception_code & 8 ? "reserved-bit " : "",
|
|
regs.exception_code & 2 ? "write" : "read",
|
|
VirtualAddress(fault_address));
|
|
|
|
dump(regs);
|
|
}
|
|
|
|
bool faulted_in_kernel = !(regs.cs & 3);
|
|
|
|
if (faulted_in_kernel && Processor::current().in_irq()) {
|
|
// If we're faulting in an IRQ handler, first check if we failed
|
|
// due to safe_memcpy, safe_strnlen, or safe_memset. If we did,
|
|
// gracefully continue immediately. Because we're in an IRQ handler
|
|
// we can't really try to resolve the page fault in a meaningful
|
|
// way, so we need to do this before calling into
|
|
// MemoryManager::handle_page_fault, which would just bail and
|
|
// request a crash
|
|
if (handle_safe_access_fault(regs, fault_address))
|
|
return;
|
|
}
|
|
|
|
auto current_thread = Thread::current();
|
|
|
|
if (current_thread) {
|
|
current_thread->set_handling_page_fault(true);
|
|
PerformanceManager::add_page_fault_event(*current_thread, regs);
|
|
}
|
|
|
|
ScopeGuard guard = [current_thread] {
|
|
if (current_thread)
|
|
current_thread->set_handling_page_fault(false);
|
|
};
|
|
|
|
if (!faulted_in_kernel && !MM.validate_user_stack(current_thread->process(), VirtualAddress(regs.userspace_esp))) {
|
|
dbgln("Invalid stack pointer: {}", VirtualAddress(regs.userspace_esp));
|
|
handle_crash(regs, "Bad stack on page fault", SIGSTKFLT);
|
|
}
|
|
|
|
if (fault_address >= (FlatPtr)&start_of_ro_after_init && fault_address < (FlatPtr)&end_of_ro_after_init) {
|
|
dump(regs);
|
|
PANIC("Attempt to write into READONLY_AFTER_INIT section");
|
|
}
|
|
|
|
if (fault_address >= (FlatPtr)&start_of_unmap_after_init && fault_address < (FlatPtr)&end_of_unmap_after_init) {
|
|
dump(regs);
|
|
PANIC("Attempt to access UNMAP_AFTER_INIT section");
|
|
}
|
|
|
|
PageFault fault { regs.exception_code, VirtualAddress { fault_address } };
|
|
auto response = MM.handle_page_fault(fault);
|
|
|
|
if (response == PageFaultResponse::ShouldCrash || response == PageFaultResponse::OutOfMemory) {
|
|
if (faulted_in_kernel && handle_safe_access_fault(regs, fault_address)) {
|
|
// If this would be a ring0 (kernel) fault and the fault was triggered by
|
|
// safe_memcpy, safe_strnlen, or safe_memset then we resume execution at
|
|
// the appropriate _fault label rather than crashing
|
|
return;
|
|
}
|
|
|
|
if (response != PageFaultResponse::OutOfMemory && current_thread) {
|
|
if (current_thread->has_signal_handler(SIGSEGV)) {
|
|
current_thread->send_urgent_signal_to_self(SIGSEGV);
|
|
return;
|
|
}
|
|
}
|
|
|
|
dbgln("Unrecoverable page fault, {}{}{} address {}",
|
|
regs.exception_code & PageFaultFlags::ReservedBitViolation ? "reserved bit violation / " : "",
|
|
regs.exception_code & PageFaultFlags::InstructionFetch ? "instruction fetch / " : "",
|
|
regs.exception_code & PageFaultFlags::Write ? "write to" : "read from",
|
|
VirtualAddress(fault_address));
|
|
u32 malloc_scrub_pattern = explode_byte(MALLOC_SCRUB_BYTE);
|
|
u32 free_scrub_pattern = explode_byte(FREE_SCRUB_BYTE);
|
|
u32 kmalloc_scrub_pattern = explode_byte(KMALLOC_SCRUB_BYTE);
|
|
u32 kfree_scrub_pattern = explode_byte(KFREE_SCRUB_BYTE);
|
|
u32 slab_alloc_scrub_pattern = explode_byte(SLAB_ALLOC_SCRUB_BYTE);
|
|
u32 slab_dealloc_scrub_pattern = explode_byte(SLAB_DEALLOC_SCRUB_BYTE);
|
|
if ((fault_address & 0xffff0000) == (malloc_scrub_pattern & 0xffff0000)) {
|
|
dbgln("Note: Address {} looks like it may be uninitialized malloc() memory", VirtualAddress(fault_address));
|
|
} else if ((fault_address & 0xffff0000) == (free_scrub_pattern & 0xffff0000)) {
|
|
dbgln("Note: Address {} looks like it may be recently free()'d memory", VirtualAddress(fault_address));
|
|
} else if ((fault_address & 0xffff0000) == (kmalloc_scrub_pattern & 0xffff0000)) {
|
|
dbgln("Note: Address {} looks like it may be uninitialized kmalloc() memory", VirtualAddress(fault_address));
|
|
} else if ((fault_address & 0xffff0000) == (kfree_scrub_pattern & 0xffff0000)) {
|
|
dbgln("Note: Address {} looks like it may be recently kfree()'d memory", VirtualAddress(fault_address));
|
|
} else if ((fault_address & 0xffff0000) == (slab_alloc_scrub_pattern & 0xffff0000)) {
|
|
dbgln("Note: Address {} looks like it may be uninitialized slab_alloc() memory", VirtualAddress(fault_address));
|
|
} else if ((fault_address & 0xffff0000) == (slab_dealloc_scrub_pattern & 0xffff0000)) {
|
|
dbgln("Note: Address {} looks like it may be recently slab_dealloc()'d memory", VirtualAddress(fault_address));
|
|
} else if (fault_address < 4096) {
|
|
dbgln("Note: Address {} looks like a possible nullptr dereference", VirtualAddress(fault_address));
|
|
}
|
|
|
|
if (current_thread) {
|
|
auto& current_process = current_thread->process();
|
|
if (current_process.is_user_process()) {
|
|
current_process.set_coredump_metadata("fault_address", String::formatted("{:p}", fault_address));
|
|
current_process.set_coredump_metadata("fault_type", fault.type() == PageFault::Type::PageNotPresent ? "NotPresent" : "ProtectionViolation");
|
|
String fault_access;
|
|
if (fault.is_instruction_fetch())
|
|
fault_access = "Execute";
|
|
else
|
|
fault_access = fault.access() == PageFault::Access::Read ? "Read" : "Write";
|
|
current_process.set_coredump_metadata("fault_access", fault_access);
|
|
}
|
|
}
|
|
|
|
handle_crash(regs, "Page Fault", SIGSEGV, response == PageFaultResponse::OutOfMemory);
|
|
} else if (response == PageFaultResponse::Continue) {
|
|
dbgln_if(PAGE_FAULT_DEBUG, "Continuing after resolved page fault");
|
|
} else {
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
EH_ENTRY_NO_CODE(1, debug);
|
|
void debug_handler(TrapFrame* trap)
|
|
{
|
|
clac();
|
|
auto& regs = *trap->regs;
|
|
auto current_thread = Thread::current();
|
|
auto& process = current_thread->process();
|
|
if ((regs.cs & 3) == 0) {
|
|
PANIC("Debug exception in ring 0");
|
|
}
|
|
constexpr u8 REASON_SINGLESTEP = 14;
|
|
auto debug_status = read_dr6();
|
|
auto should_trap_mask = (1 << REASON_SINGLESTEP) | 0b1111;
|
|
if ((debug_status & should_trap_mask) == 0)
|
|
return;
|
|
if (auto tracer = process.tracer()) {
|
|
tracer->set_regs(regs);
|
|
}
|
|
current_thread->send_urgent_signal_to_self(SIGTRAP);
|
|
write_dr6(debug_status & ~(should_trap_mask));
|
|
}
|
|
|
|
EH_ENTRY_NO_CODE(3, breakpoint);
|
|
void breakpoint_handler(TrapFrame* trap)
|
|
{
|
|
clac();
|
|
auto& regs = *trap->regs;
|
|
auto current_thread = Thread::current();
|
|
auto& process = current_thread->process();
|
|
if ((regs.cs & 3) == 0) {
|
|
PANIC("Breakpoint trap in ring 0");
|
|
}
|
|
if (auto tracer = process.tracer()) {
|
|
tracer->set_regs(regs);
|
|
}
|
|
current_thread->send_urgent_signal_to_self(SIGTRAP);
|
|
}
|
|
|
|
#define EH(i, msg) \
|
|
static void _exception##i() \
|
|
{ \
|
|
dbgln("{}", msg); \
|
|
PANIC("cr0={:08x} cr2={:08x} cr3={:08x} cr4={:08x}", read_cr0(), read_cr2(), read_cr3(), read_cr4()); \
|
|
}
|
|
|
|
EH(2, "Unknown error")
|
|
EH(4, "Overflow")
|
|
EH(5, "Bounds check")
|
|
EH(8, "Double fault")
|
|
EH(9, "Coprocessor segment overrun")
|
|
EH(10, "Invalid TSS")
|
|
EH(11, "Segment not present")
|
|
EH(12, "Stack exception")
|
|
EH(15, "Unknown error")
|
|
EH(16, "Coprocessor error")
|
|
|
|
const DescriptorTablePointer& get_idtr()
|
|
{
|
|
return s_idtr;
|
|
}
|
|
|
|
static void unimp_trap()
|
|
{
|
|
PANIC("Unhandled IRQ");
|
|
}
|
|
|
|
GenericInterruptHandler& get_interrupt_handler(u8 interrupt_number)
|
|
{
|
|
auto*& handler_slot = s_interrupt_handler[interrupt_number];
|
|
VERIFY(handler_slot != nullptr);
|
|
return *handler_slot;
|
|
}
|
|
|
|
static void revert_to_unused_handler(u8 interrupt_number)
|
|
{
|
|
auto handler = new UnhandledInterruptHandler(interrupt_number);
|
|
handler->register_interrupt_handler();
|
|
}
|
|
|
|
void register_generic_interrupt_handler(u8 interrupt_number, GenericInterruptHandler& handler)
|
|
{
|
|
VERIFY(interrupt_number < GENERIC_INTERRUPT_HANDLERS_COUNT);
|
|
auto*& handler_slot = s_interrupt_handler[interrupt_number];
|
|
if (handler_slot != nullptr) {
|
|
if (handler_slot->type() == HandlerType::UnhandledInterruptHandler) {
|
|
if (handler_slot) {
|
|
auto* unhandled_handler = static_cast<UnhandledInterruptHandler*>(handler_slot);
|
|
unhandled_handler->unregister_interrupt_handler();
|
|
delete unhandled_handler;
|
|
}
|
|
handler_slot = &handler;
|
|
return;
|
|
}
|
|
if (handler_slot->is_shared_handler() && !handler_slot->is_sharing_with_others()) {
|
|
VERIFY(handler_slot->type() == HandlerType::SharedIRQHandler);
|
|
static_cast<SharedIRQHandler*>(handler_slot)->register_handler(handler);
|
|
return;
|
|
}
|
|
if (!handler_slot->is_shared_handler()) {
|
|
if (handler_slot->type() == HandlerType::SpuriousInterruptHandler) {
|
|
static_cast<SpuriousInterruptHandler*>(handler_slot)->register_handler(handler);
|
|
return;
|
|
}
|
|
VERIFY(handler_slot->type() == HandlerType::IRQHandler);
|
|
auto& previous_handler = *handler_slot;
|
|
handler_slot = nullptr;
|
|
SharedIRQHandler::initialize(interrupt_number);
|
|
VERIFY(handler_slot);
|
|
static_cast<SharedIRQHandler*>(handler_slot)->register_handler(previous_handler);
|
|
static_cast<SharedIRQHandler*>(handler_slot)->register_handler(handler);
|
|
return;
|
|
}
|
|
VERIFY_NOT_REACHED();
|
|
} else {
|
|
handler_slot = &handler;
|
|
}
|
|
}
|
|
|
|
void unregister_generic_interrupt_handler(u8 interrupt_number, GenericInterruptHandler& handler)
|
|
{
|
|
auto*& handler_slot = s_interrupt_handler[interrupt_number];
|
|
VERIFY(handler_slot != nullptr);
|
|
if (handler_slot->type() == HandlerType::UnhandledInterruptHandler) {
|
|
dbgln("Trying to unregister unused handler (?)");
|
|
return;
|
|
}
|
|
if (handler_slot->is_shared_handler() && !handler_slot->is_sharing_with_others()) {
|
|
VERIFY(handler_slot->type() == HandlerType::SharedIRQHandler);
|
|
auto* shared_handler = static_cast<SharedIRQHandler*>(handler_slot);
|
|
shared_handler->unregister_handler(handler);
|
|
if (!shared_handler->sharing_devices_count()) {
|
|
handler_slot = nullptr;
|
|
revert_to_unused_handler(interrupt_number);
|
|
}
|
|
return;
|
|
}
|
|
if (!handler_slot->is_shared_handler()) {
|
|
VERIFY(handler_slot->type() == HandlerType::IRQHandler);
|
|
handler_slot = nullptr;
|
|
revert_to_unused_handler(interrupt_number);
|
|
return;
|
|
}
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void register_interrupt_handler(u8 index, void (*handler)())
|
|
{
|
|
// FIXME: Why is that with selector 8?
|
|
// FIXME: Is the Gate Type really required to be an Interrupt
|
|
// FIXME: What's up with that storage segment 0?
|
|
s_idt[index] = IDTEntry((FlatPtr)handler, 8, IDTEntryType::InterruptGate32, 0, 0);
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void register_user_callable_interrupt_handler(u8 index, void (*handler)())
|
|
{
|
|
// FIXME: Why is that with selector 8?
|
|
// FIXME: Is the Gate Type really required to be a Trap
|
|
// FIXME: What's up with that storage segment 0?
|
|
s_idt[index] = IDTEntry((FlatPtr)handler, 8, IDTEntryType::TrapGate32, 0, 3);
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void flush_idt()
|
|
{
|
|
asm("lidt %0" ::"m"(s_idtr));
|
|
}
|
|
|
|
UNMAP_AFTER_INIT static void idt_init()
|
|
{
|
|
s_idtr.address = s_idt;
|
|
s_idtr.limit = 256 * 8 - 1;
|
|
|
|
register_interrupt_handler(0x00, divide_error_asm_entry);
|
|
register_user_callable_interrupt_handler(0x01, debug_asm_entry);
|
|
register_interrupt_handler(0x02, _exception2);
|
|
register_user_callable_interrupt_handler(0x03, breakpoint_asm_entry);
|
|
register_interrupt_handler(0x04, _exception4);
|
|
register_interrupt_handler(0x05, _exception5);
|
|
register_interrupt_handler(0x06, illegal_instruction_asm_entry);
|
|
register_interrupt_handler(0x07, fpu_exception_asm_entry);
|
|
register_interrupt_handler(0x08, _exception8);
|
|
register_interrupt_handler(0x09, _exception9);
|
|
register_interrupt_handler(0x0a, _exception10);
|
|
register_interrupt_handler(0x0b, _exception11);
|
|
register_interrupt_handler(0x0c, _exception12);
|
|
register_interrupt_handler(0x0d, general_protection_fault_asm_entry);
|
|
register_interrupt_handler(0x0e, page_fault_asm_entry);
|
|
register_interrupt_handler(0x0f, _exception15);
|
|
register_interrupt_handler(0x10, _exception16);
|
|
|
|
for (u8 i = 0x11; i < 0x50; i++)
|
|
register_interrupt_handler(i, unimp_trap);
|
|
|
|
register_interrupt_handler(0x50, interrupt_80_asm_entry);
|
|
register_interrupt_handler(0x51, interrupt_81_asm_entry);
|
|
register_interrupt_handler(0x52, interrupt_82_asm_entry);
|
|
register_interrupt_handler(0x53, interrupt_83_asm_entry);
|
|
register_interrupt_handler(0x54, interrupt_84_asm_entry);
|
|
register_interrupt_handler(0x55, interrupt_85_asm_entry);
|
|
register_interrupt_handler(0x56, interrupt_86_asm_entry);
|
|
register_interrupt_handler(0x57, interrupt_87_asm_entry);
|
|
register_interrupt_handler(0x58, interrupt_88_asm_entry);
|
|
register_interrupt_handler(0x59, interrupt_89_asm_entry);
|
|
register_interrupt_handler(0x5a, interrupt_90_asm_entry);
|
|
register_interrupt_handler(0x5b, interrupt_91_asm_entry);
|
|
register_interrupt_handler(0x5c, interrupt_92_asm_entry);
|
|
register_interrupt_handler(0x5d, interrupt_93_asm_entry);
|
|
register_interrupt_handler(0x5e, interrupt_94_asm_entry);
|
|
register_interrupt_handler(0x5f, interrupt_95_asm_entry);
|
|
register_interrupt_handler(0x60, interrupt_96_asm_entry);
|
|
register_interrupt_handler(0x61, interrupt_97_asm_entry);
|
|
register_interrupt_handler(0x62, interrupt_98_asm_entry);
|
|
register_interrupt_handler(0x63, interrupt_99_asm_entry);
|
|
register_interrupt_handler(0x64, interrupt_100_asm_entry);
|
|
register_interrupt_handler(0x65, interrupt_101_asm_entry);
|
|
register_interrupt_handler(0x66, interrupt_102_asm_entry);
|
|
register_interrupt_handler(0x67, interrupt_103_asm_entry);
|
|
register_interrupt_handler(0x68, interrupt_104_asm_entry);
|
|
register_interrupt_handler(0x69, interrupt_105_asm_entry);
|
|
register_interrupt_handler(0x6a, interrupt_106_asm_entry);
|
|
register_interrupt_handler(0x6b, interrupt_107_asm_entry);
|
|
register_interrupt_handler(0x6c, interrupt_108_asm_entry);
|
|
register_interrupt_handler(0x6d, interrupt_109_asm_entry);
|
|
register_interrupt_handler(0x6e, interrupt_110_asm_entry);
|
|
register_interrupt_handler(0x6f, interrupt_111_asm_entry);
|
|
register_interrupt_handler(0x70, interrupt_112_asm_entry);
|
|
register_interrupt_handler(0x71, interrupt_113_asm_entry);
|
|
register_interrupt_handler(0x72, interrupt_114_asm_entry);
|
|
register_interrupt_handler(0x73, interrupt_115_asm_entry);
|
|
register_interrupt_handler(0x74, interrupt_116_asm_entry);
|
|
register_interrupt_handler(0x75, interrupt_117_asm_entry);
|
|
register_interrupt_handler(0x76, interrupt_118_asm_entry);
|
|
register_interrupt_handler(0x77, interrupt_119_asm_entry);
|
|
register_interrupt_handler(0x78, interrupt_120_asm_entry);
|
|
register_interrupt_handler(0x79, interrupt_121_asm_entry);
|
|
register_interrupt_handler(0x7a, interrupt_122_asm_entry);
|
|
register_interrupt_handler(0x7b, interrupt_123_asm_entry);
|
|
register_interrupt_handler(0x7c, interrupt_124_asm_entry);
|
|
register_interrupt_handler(0x7d, interrupt_125_asm_entry);
|
|
register_interrupt_handler(0x7e, interrupt_126_asm_entry);
|
|
register_interrupt_handler(0x7f, interrupt_127_asm_entry);
|
|
register_interrupt_handler(0x80, interrupt_128_asm_entry);
|
|
register_interrupt_handler(0x81, interrupt_129_asm_entry);
|
|
register_interrupt_handler(0x82, interrupt_130_asm_entry);
|
|
register_interrupt_handler(0x83, interrupt_131_asm_entry);
|
|
register_interrupt_handler(0x84, interrupt_132_asm_entry);
|
|
register_interrupt_handler(0x85, interrupt_133_asm_entry);
|
|
register_interrupt_handler(0x86, interrupt_134_asm_entry);
|
|
register_interrupt_handler(0x87, interrupt_135_asm_entry);
|
|
register_interrupt_handler(0x88, interrupt_136_asm_entry);
|
|
register_interrupt_handler(0x89, interrupt_137_asm_entry);
|
|
register_interrupt_handler(0x8a, interrupt_138_asm_entry);
|
|
register_interrupt_handler(0x8b, interrupt_139_asm_entry);
|
|
register_interrupt_handler(0x8c, interrupt_140_asm_entry);
|
|
register_interrupt_handler(0x8d, interrupt_141_asm_entry);
|
|
register_interrupt_handler(0x8e, interrupt_142_asm_entry);
|
|
register_interrupt_handler(0x8f, interrupt_143_asm_entry);
|
|
register_interrupt_handler(0x90, interrupt_144_asm_entry);
|
|
register_interrupt_handler(0x91, interrupt_145_asm_entry);
|
|
register_interrupt_handler(0x92, interrupt_146_asm_entry);
|
|
register_interrupt_handler(0x93, interrupt_147_asm_entry);
|
|
register_interrupt_handler(0x94, interrupt_148_asm_entry);
|
|
register_interrupt_handler(0x95, interrupt_149_asm_entry);
|
|
register_interrupt_handler(0x96, interrupt_150_asm_entry);
|
|
register_interrupt_handler(0x97, interrupt_151_asm_entry);
|
|
register_interrupt_handler(0x98, interrupt_152_asm_entry);
|
|
register_interrupt_handler(0x99, interrupt_153_asm_entry);
|
|
register_interrupt_handler(0x9a, interrupt_154_asm_entry);
|
|
register_interrupt_handler(0x9b, interrupt_155_asm_entry);
|
|
register_interrupt_handler(0x9c, interrupt_156_asm_entry);
|
|
register_interrupt_handler(0x9d, interrupt_157_asm_entry);
|
|
register_interrupt_handler(0x9e, interrupt_158_asm_entry);
|
|
register_interrupt_handler(0x9f, interrupt_159_asm_entry);
|
|
register_interrupt_handler(0xa0, interrupt_160_asm_entry);
|
|
register_interrupt_handler(0xa1, interrupt_161_asm_entry);
|
|
register_interrupt_handler(0xa2, interrupt_162_asm_entry);
|
|
register_interrupt_handler(0xa3, interrupt_163_asm_entry);
|
|
register_interrupt_handler(0xa4, interrupt_164_asm_entry);
|
|
register_interrupt_handler(0xa5, interrupt_165_asm_entry);
|
|
register_interrupt_handler(0xa6, interrupt_166_asm_entry);
|
|
register_interrupt_handler(0xa7, interrupt_167_asm_entry);
|
|
register_interrupt_handler(0xa8, interrupt_168_asm_entry);
|
|
register_interrupt_handler(0xa9, interrupt_169_asm_entry);
|
|
register_interrupt_handler(0xaa, interrupt_170_asm_entry);
|
|
register_interrupt_handler(0xab, interrupt_171_asm_entry);
|
|
register_interrupt_handler(0xac, interrupt_172_asm_entry);
|
|
register_interrupt_handler(0xad, interrupt_173_asm_entry);
|
|
register_interrupt_handler(0xae, interrupt_174_asm_entry);
|
|
register_interrupt_handler(0xaf, interrupt_175_asm_entry);
|
|
register_interrupt_handler(0xb0, interrupt_176_asm_entry);
|
|
register_interrupt_handler(0xb1, interrupt_177_asm_entry);
|
|
register_interrupt_handler(0xb2, interrupt_178_asm_entry);
|
|
register_interrupt_handler(0xb3, interrupt_179_asm_entry);
|
|
register_interrupt_handler(0xb4, interrupt_180_asm_entry);
|
|
register_interrupt_handler(0xb5, interrupt_181_asm_entry);
|
|
register_interrupt_handler(0xb6, interrupt_182_asm_entry);
|
|
register_interrupt_handler(0xb7, interrupt_183_asm_entry);
|
|
register_interrupt_handler(0xb8, interrupt_184_asm_entry);
|
|
register_interrupt_handler(0xb9, interrupt_185_asm_entry);
|
|
register_interrupt_handler(0xba, interrupt_186_asm_entry);
|
|
register_interrupt_handler(0xbb, interrupt_187_asm_entry);
|
|
register_interrupt_handler(0xbc, interrupt_188_asm_entry);
|
|
register_interrupt_handler(0xbd, interrupt_189_asm_entry);
|
|
register_interrupt_handler(0xbe, interrupt_190_asm_entry);
|
|
register_interrupt_handler(0xbf, interrupt_191_asm_entry);
|
|
register_interrupt_handler(0xc0, interrupt_192_asm_entry);
|
|
register_interrupt_handler(0xc1, interrupt_193_asm_entry);
|
|
register_interrupt_handler(0xc2, interrupt_194_asm_entry);
|
|
register_interrupt_handler(0xc3, interrupt_195_asm_entry);
|
|
register_interrupt_handler(0xc4, interrupt_196_asm_entry);
|
|
register_interrupt_handler(0xc5, interrupt_197_asm_entry);
|
|
register_interrupt_handler(0xc6, interrupt_198_asm_entry);
|
|
register_interrupt_handler(0xc7, interrupt_199_asm_entry);
|
|
register_interrupt_handler(0xc8, interrupt_200_asm_entry);
|
|
register_interrupt_handler(0xc9, interrupt_201_asm_entry);
|
|
register_interrupt_handler(0xca, interrupt_202_asm_entry);
|
|
register_interrupt_handler(0xcb, interrupt_203_asm_entry);
|
|
register_interrupt_handler(0xcc, interrupt_204_asm_entry);
|
|
register_interrupt_handler(0xcd, interrupt_205_asm_entry);
|
|
register_interrupt_handler(0xce, interrupt_206_asm_entry);
|
|
register_interrupt_handler(0xcf, interrupt_207_asm_entry);
|
|
register_interrupt_handler(0xd0, interrupt_208_asm_entry);
|
|
register_interrupt_handler(0xd1, interrupt_209_asm_entry);
|
|
register_interrupt_handler(0xd2, interrupt_210_asm_entry);
|
|
register_interrupt_handler(0xd3, interrupt_211_asm_entry);
|
|
register_interrupt_handler(0xd4, interrupt_212_asm_entry);
|
|
register_interrupt_handler(0xd5, interrupt_213_asm_entry);
|
|
register_interrupt_handler(0xd6, interrupt_214_asm_entry);
|
|
register_interrupt_handler(0xd7, interrupt_215_asm_entry);
|
|
register_interrupt_handler(0xd8, interrupt_216_asm_entry);
|
|
register_interrupt_handler(0xd9, interrupt_217_asm_entry);
|
|
register_interrupt_handler(0xda, interrupt_218_asm_entry);
|
|
register_interrupt_handler(0xdb, interrupt_219_asm_entry);
|
|
register_interrupt_handler(0xdc, interrupt_220_asm_entry);
|
|
register_interrupt_handler(0xdd, interrupt_221_asm_entry);
|
|
register_interrupt_handler(0xde, interrupt_222_asm_entry);
|
|
register_interrupt_handler(0xdf, interrupt_223_asm_entry);
|
|
register_interrupt_handler(0xe0, interrupt_224_asm_entry);
|
|
register_interrupt_handler(0xe1, interrupt_225_asm_entry);
|
|
register_interrupt_handler(0xe2, interrupt_226_asm_entry);
|
|
register_interrupt_handler(0xe3, interrupt_227_asm_entry);
|
|
register_interrupt_handler(0xe4, interrupt_228_asm_entry);
|
|
register_interrupt_handler(0xe5, interrupt_229_asm_entry);
|
|
register_interrupt_handler(0xe6, interrupt_230_asm_entry);
|
|
register_interrupt_handler(0xe7, interrupt_231_asm_entry);
|
|
register_interrupt_handler(0xe8, interrupt_232_asm_entry);
|
|
register_interrupt_handler(0xe9, interrupt_233_asm_entry);
|
|
register_interrupt_handler(0xea, interrupt_234_asm_entry);
|
|
register_interrupt_handler(0xeb, interrupt_235_asm_entry);
|
|
register_interrupt_handler(0xec, interrupt_236_asm_entry);
|
|
register_interrupt_handler(0xed, interrupt_237_asm_entry);
|
|
register_interrupt_handler(0xee, interrupt_238_asm_entry);
|
|
register_interrupt_handler(0xef, interrupt_239_asm_entry);
|
|
register_interrupt_handler(0xf0, interrupt_240_asm_entry);
|
|
register_interrupt_handler(0xf1, interrupt_241_asm_entry);
|
|
register_interrupt_handler(0xf2, interrupt_242_asm_entry);
|
|
register_interrupt_handler(0xf3, interrupt_243_asm_entry);
|
|
register_interrupt_handler(0xf4, interrupt_244_asm_entry);
|
|
register_interrupt_handler(0xf5, interrupt_245_asm_entry);
|
|
register_interrupt_handler(0xf6, interrupt_246_asm_entry);
|
|
register_interrupt_handler(0xf7, interrupt_247_asm_entry);
|
|
register_interrupt_handler(0xf8, interrupt_248_asm_entry);
|
|
register_interrupt_handler(0xf9, interrupt_249_asm_entry);
|
|
register_interrupt_handler(0xfa, interrupt_250_asm_entry);
|
|
register_interrupt_handler(0xfb, interrupt_251_asm_entry);
|
|
register_interrupt_handler(0xfc, interrupt_252_asm_entry);
|
|
register_interrupt_handler(0xfd, interrupt_253_asm_entry);
|
|
register_interrupt_handler(0xfe, interrupt_254_asm_entry);
|
|
register_interrupt_handler(0xff, interrupt_255_asm_entry);
|
|
|
|
dbgln("Installing Unhandled Handlers");
|
|
|
|
for (u8 i = 0; i < GENERIC_INTERRUPT_HANDLERS_COUNT; ++i) {
|
|
auto* handler = new UnhandledInterruptHandler(i);
|
|
handler->register_interrupt_handler();
|
|
}
|
|
|
|
flush_idt();
|
|
}
|
|
|
|
void load_task_register(u16 selector)
|
|
{
|
|
asm("ltr %0" ::"r"(selector));
|
|
}
|
|
|
|
void handle_interrupt(TrapFrame* trap)
|
|
{
|
|
clac();
|
|
auto& regs = *trap->regs;
|
|
VERIFY(regs.isr_number >= IRQ_VECTOR_BASE && regs.isr_number <= (IRQ_VECTOR_BASE + GENERIC_INTERRUPT_HANDLERS_COUNT));
|
|
u8 irq = (u8)(regs.isr_number - 0x50);
|
|
s_entropy_source_interrupts.add_random_event(irq);
|
|
auto* handler = s_interrupt_handler[irq];
|
|
VERIFY(handler);
|
|
handler->increment_invoking_counter();
|
|
handler->handle_interrupt(regs);
|
|
handler->eoi();
|
|
}
|
|
|
|
void enter_trap_no_irq(TrapFrame* trap)
|
|
{
|
|
InterruptDisabler disable;
|
|
Processor::current().enter_trap(*trap, false);
|
|
}
|
|
|
|
void enter_trap(TrapFrame* trap)
|
|
{
|
|
InterruptDisabler disable;
|
|
Processor::current().enter_trap(*trap, true);
|
|
}
|
|
|
|
void exit_trap(TrapFrame* trap)
|
|
{
|
|
InterruptDisabler disable;
|
|
return Processor::current().exit_trap(*trap);
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void write_cr0(FlatPtr value)
|
|
{
|
|
#if ARCH(I386)
|
|
asm volatile("mov %%eax, %%cr0" ::"a"(value));
|
|
#else
|
|
asm volatile("mov %%rax, %%cr0" ::"a"(value));
|
|
#endif
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void write_cr4(FlatPtr value)
|
|
{
|
|
#if ARCH(I386)
|
|
asm volatile("mov %%eax, %%cr4" ::"a"(value));
|
|
#else
|
|
asm volatile("mov %%rax, %%cr4" ::"a"(value));
|
|
#endif
|
|
}
|
|
|
|
UNMAP_AFTER_INIT static void sse_init()
|
|
{
|
|
write_cr0((read_cr0() & 0xfffffffbu) | 0x2);
|
|
write_cr4(read_cr4() | 0x600);
|
|
}
|
|
|
|
FlatPtr read_cr0()
|
|
{
|
|
FlatPtr cr0;
|
|
#if ARCH(I386)
|
|
asm("mov %%cr0, %%eax"
|
|
: "=a"(cr0));
|
|
#else
|
|
asm("mov %%cr0, %%rax"
|
|
: "=a"(cr0));
|
|
#endif
|
|
return cr0;
|
|
}
|
|
|
|
FlatPtr read_cr2()
|
|
{
|
|
FlatPtr cr2;
|
|
#if ARCH(I386)
|
|
asm("mov %%cr2, %%eax"
|
|
: "=a"(cr2));
|
|
#else
|
|
asm("mov %%cr2, %%rax"
|
|
: "=a"(cr2));
|
|
#endif
|
|
return cr2;
|
|
}
|
|
|
|
FlatPtr read_cr3()
|
|
{
|
|
FlatPtr cr3;
|
|
#if ARCH(I386)
|
|
asm("mov %%cr3, %%eax"
|
|
: "=a"(cr3));
|
|
#else
|
|
asm("mov %%cr3, %%rax"
|
|
: "=a"(cr3));
|
|
#endif
|
|
return cr3;
|
|
}
|
|
|
|
void write_cr3(FlatPtr cr3)
|
|
{
|
|
// NOTE: If you're here from a GPF crash, it's very likely that a PDPT entry is incorrect, not this!
|
|
#if ARCH(I386)
|
|
asm volatile("mov %%eax, %%cr3" ::"a"(cr3)
|
|
: "memory");
|
|
#else
|
|
asm volatile("mov %%rax, %%cr3" ::"a"(cr3)
|
|
: "memory");
|
|
#endif
|
|
}
|
|
|
|
FlatPtr read_cr4()
|
|
{
|
|
FlatPtr cr4;
|
|
#if ARCH(I386)
|
|
asm("mov %%cr4, %%eax"
|
|
: "=a"(cr4));
|
|
#else
|
|
asm("mov %%cr4, %%rax"
|
|
: "=a"(cr4));
|
|
#endif
|
|
return cr4;
|
|
}
|
|
|
|
void read_debug_registers_into(DebugRegisterState& state)
|
|
{
|
|
state.dr0 = read_dr0();
|
|
state.dr1 = read_dr1();
|
|
state.dr2 = read_dr2();
|
|
state.dr3 = read_dr3();
|
|
state.dr6 = read_dr6();
|
|
state.dr7 = read_dr7();
|
|
}
|
|
|
|
void write_debug_registers_from(const DebugRegisterState& state)
|
|
{
|
|
write_dr0(state.dr0);
|
|
write_dr1(state.dr1);
|
|
write_dr2(state.dr2);
|
|
write_dr3(state.dr3);
|
|
write_dr6(state.dr6);
|
|
write_dr7(state.dr7);
|
|
}
|
|
|
|
void clear_debug_registers()
|
|
{
|
|
write_dr0(0);
|
|
write_dr1(0);
|
|
write_dr2(0);
|
|
write_dr3(0);
|
|
write_dr7(1 << 10); // Bit 10 is reserved and must be set to 1.
|
|
}
|
|
|
|
#if ARCH(I386)
|
|
# define DEFINE_DEBUG_REGISTER(index) \
|
|
FlatPtr read_dr##index() \
|
|
{ \
|
|
FlatPtr value; \
|
|
asm("mov %%dr" #index ", %%eax" \
|
|
: "=a"(value)); \
|
|
return value; \
|
|
} \
|
|
void write_dr##index(FlatPtr value) \
|
|
{ \
|
|
asm volatile("mov %%eax, %%dr" #index ::"a"(value)); \
|
|
}
|
|
#else
|
|
# define DEFINE_DEBUG_REGISTER(index) \
|
|
FlatPtr read_dr##index() \
|
|
{ \
|
|
FlatPtr value; \
|
|
asm("mov %%dr" #index ", %%rax" \
|
|
: "=a"(value)); \
|
|
return value; \
|
|
} \
|
|
void write_dr##index(FlatPtr value) \
|
|
{ \
|
|
asm volatile("mov %%rax, %%dr" #index ::"a"(value)); \
|
|
}
|
|
#endif
|
|
|
|
DEFINE_DEBUG_REGISTER(0);
|
|
DEFINE_DEBUG_REGISTER(1);
|
|
DEFINE_DEBUG_REGISTER(2);
|
|
DEFINE_DEBUG_REGISTER(3);
|
|
DEFINE_DEBUG_REGISTER(6);
|
|
DEFINE_DEBUG_REGISTER(7);
|
|
|
|
#define XCR_XFEATURE_ENABLED_MASK 0
|
|
|
|
UNMAP_AFTER_INIT u64 read_xcr0()
|
|
{
|
|
u32 eax, edx;
|
|
asm volatile("xgetbv"
|
|
: "=a"(eax), "=d"(edx)
|
|
: "c"(XCR_XFEATURE_ENABLED_MASK));
|
|
return eax + ((u64)edx << 32);
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void write_xcr0(u64 value)
|
|
{
|
|
u32 eax = value;
|
|
u32 edx = value >> 32;
|
|
asm volatile("xsetbv" ::"a"(eax), "d"(edx), "c"(XCR_XFEATURE_ENABLED_MASK));
|
|
}
|
|
|
|
READONLY_AFTER_INIT FPUState Processor::s_clean_fpu_state;
|
|
|
|
READONLY_AFTER_INIT static Vector<Processor*>* s_processors;
|
|
static SpinLock s_processor_lock;
|
|
READONLY_AFTER_INIT volatile u32 Processor::g_total_processors;
|
|
static volatile bool s_smp_enabled;
|
|
|
|
Vector<Processor*>& Processor::processors()
|
|
{
|
|
VERIFY(s_processors);
|
|
return *s_processors;
|
|
}
|
|
|
|
Processor& Processor::by_id(u32 cpu)
|
|
{
|
|
// s_processors does not need to be protected by a lock of any kind.
|
|
// It is populated early in the boot process, and the BSP is waiting
|
|
// for all APs to finish, after which this array never gets modified
|
|
// again, so it's safe to not protect access to it here
|
|
auto& procs = processors();
|
|
VERIFY(procs[cpu] != nullptr);
|
|
VERIFY(procs.size() > cpu);
|
|
return *procs[cpu];
|
|
}
|
|
|
|
[[noreturn]] static inline void halt_this()
|
|
{
|
|
for (;;) {
|
|
asm volatile("cli; hlt");
|
|
}
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void Processor::cpu_detect()
|
|
{
|
|
// NOTE: This is called during Processor::early_initialize, we cannot
|
|
// safely log at this point because we don't have kmalloc
|
|
// initialized yet!
|
|
auto set_feature =
|
|
[&](CPUFeature f) {
|
|
m_features = static_cast<CPUFeature>(static_cast<u32>(m_features) | static_cast<u32>(f));
|
|
};
|
|
m_features = static_cast<CPUFeature>(0);
|
|
|
|
CPUID processor_info(0x1);
|
|
if (processor_info.edx() & (1 << 4))
|
|
set_feature(CPUFeature::TSC);
|
|
if (processor_info.edx() & (1 << 6))
|
|
set_feature(CPUFeature::PAE);
|
|
if (processor_info.edx() & (1 << 13))
|
|
set_feature(CPUFeature::PGE);
|
|
if (processor_info.edx() & (1 << 23))
|
|
set_feature(CPUFeature::MMX);
|
|
if (processor_info.edx() & (1 << 25))
|
|
set_feature(CPUFeature::SSE);
|
|
if (processor_info.edx() & (1 << 26))
|
|
set_feature(CPUFeature::SSE2);
|
|
if (processor_info.ecx() & (1 << 0))
|
|
set_feature(CPUFeature::SSE3);
|
|
if (processor_info.ecx() & (1 << 9))
|
|
set_feature(CPUFeature::SSSE3);
|
|
if (processor_info.ecx() & (1 << 19))
|
|
set_feature(CPUFeature::SSE4_1);
|
|
if (processor_info.ecx() & (1 << 20))
|
|
set_feature(CPUFeature::SSE4_2);
|
|
if (processor_info.ecx() & (1 << 26))
|
|
set_feature(CPUFeature::XSAVE);
|
|
if (processor_info.ecx() & (1 << 28))
|
|
set_feature(CPUFeature::AVX);
|
|
if (processor_info.ecx() & (1 << 30))
|
|
set_feature(CPUFeature::RDRAND);
|
|
if (processor_info.edx() & (1 << 11)) {
|
|
u32 stepping = processor_info.eax() & 0xf;
|
|
u32 model = (processor_info.eax() >> 4) & 0xf;
|
|
u32 family = (processor_info.eax() >> 8) & 0xf;
|
|
if (!(family == 6 && model < 3 && stepping < 3))
|
|
set_feature(CPUFeature::SEP);
|
|
if ((family == 6 && model >= 3) || (family == 0xf && model >= 0xe))
|
|
set_feature(CPUFeature::CONSTANT_TSC);
|
|
}
|
|
|
|
u32 max_extended_leaf = CPUID(0x80000000).eax();
|
|
|
|
VERIFY(max_extended_leaf >= 0x80000001);
|
|
CPUID extended_processor_info(0x80000001);
|
|
if (extended_processor_info.edx() & (1 << 20))
|
|
set_feature(CPUFeature::NX);
|
|
if (extended_processor_info.edx() & (1 << 27))
|
|
set_feature(CPUFeature::RDTSCP);
|
|
if (extended_processor_info.edx() & (1 << 11)) {
|
|
// Only available in 64 bit mode
|
|
set_feature(CPUFeature::SYSCALL);
|
|
}
|
|
|
|
if (max_extended_leaf >= 0x80000007) {
|
|
CPUID cpuid(0x80000007);
|
|
if (cpuid.edx() & (1 << 8)) {
|
|
set_feature(CPUFeature::CONSTANT_TSC);
|
|
set_feature(CPUFeature::NONSTOP_TSC);
|
|
}
|
|
}
|
|
|
|
if (max_extended_leaf >= 0x80000008) {
|
|
// CPUID.80000008H:EAX[7:0] reports the physical-address width supported by the processor.
|
|
CPUID cpuid(0x80000008);
|
|
m_physical_address_bit_width = cpuid.eax() & 0xff;
|
|
} else {
|
|
// For processors that do not support CPUID function 80000008H, the width is generally 36 if CPUID.01H:EDX.PAE [bit 6] = 1 and 32 otherwise.
|
|
m_physical_address_bit_width = has_feature(CPUFeature::PAE) ? 36 : 32;
|
|
}
|
|
|
|
CPUID extended_features(0x7);
|
|
if (extended_features.ebx() & (1 << 20))
|
|
set_feature(CPUFeature::SMAP);
|
|
if (extended_features.ebx() & (1 << 7))
|
|
set_feature(CPUFeature::SMEP);
|
|
if (extended_features.ecx() & (1 << 2))
|
|
set_feature(CPUFeature::UMIP);
|
|
if (extended_features.ebx() & (1 << 18))
|
|
set_feature(CPUFeature::RDSEED);
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void Processor::cpu_setup()
|
|
{
|
|
// NOTE: This is called during Processor::early_initialize, we cannot
|
|
// safely log at this point because we don't have kmalloc
|
|
// initialized yet!
|
|
cpu_detect();
|
|
|
|
if (has_feature(CPUFeature::SSE))
|
|
sse_init();
|
|
|
|
write_cr0(read_cr0() | 0x00010000);
|
|
|
|
if (has_feature(CPUFeature::PGE)) {
|
|
// Turn on CR4.PGE so the CPU will respect the G bit in page tables.
|
|
write_cr4(read_cr4() | 0x80);
|
|
}
|
|
|
|
if (has_feature(CPUFeature::NX)) {
|
|
// Turn on IA32_EFER.NXE
|
|
asm volatile(
|
|
"movl $0xc0000080, %ecx\n"
|
|
"rdmsr\n"
|
|
"orl $0x800, %eax\n"
|
|
"wrmsr\n");
|
|
}
|
|
|
|
if (has_feature(CPUFeature::SMEP)) {
|
|
// Turn on CR4.SMEP
|
|
write_cr4(read_cr4() | 0x100000);
|
|
}
|
|
|
|
if (has_feature(CPUFeature::SMAP)) {
|
|
// Turn on CR4.SMAP
|
|
write_cr4(read_cr4() | 0x200000);
|
|
}
|
|
|
|
if (has_feature(CPUFeature::UMIP)) {
|
|
write_cr4(read_cr4() | 0x800);
|
|
}
|
|
|
|
if (has_feature(CPUFeature::TSC)) {
|
|
write_cr4(read_cr4() | 0x4);
|
|
}
|
|
|
|
if (has_feature(CPUFeature::XSAVE)) {
|
|
// Turn on CR4.OSXSAVE
|
|
write_cr4(read_cr4() | 0x40000);
|
|
|
|
// According to the Intel manual: "After reset, all bits (except bit 0) in XCR0 are cleared to zero; XCR0[0] is set to 1."
|
|
// Sadly we can't trust this, for example VirtualBox starts with bits 0-4 set, so let's do it ourselves.
|
|
write_xcr0(0x1);
|
|
|
|
if (has_feature(CPUFeature::AVX)) {
|
|
// Turn on SSE, AVX and x87 flags
|
|
write_xcr0(read_xcr0() | 0x7);
|
|
}
|
|
}
|
|
}
|
|
|
|
String Processor::features_string() const
|
|
{
|
|
StringBuilder builder;
|
|
auto feature_to_str =
|
|
[](CPUFeature f) -> const char* {
|
|
switch (f) {
|
|
case CPUFeature::NX:
|
|
return "nx";
|
|
case CPUFeature::PAE:
|
|
return "pae";
|
|
case CPUFeature::PGE:
|
|
return "pge";
|
|
case CPUFeature::RDRAND:
|
|
return "rdrand";
|
|
case CPUFeature::RDSEED:
|
|
return "rdseed";
|
|
case CPUFeature::SMAP:
|
|
return "smap";
|
|
case CPUFeature::SMEP:
|
|
return "smep";
|
|
case CPUFeature::SSE:
|
|
return "sse";
|
|
case CPUFeature::TSC:
|
|
return "tsc";
|
|
case CPUFeature::RDTSCP:
|
|
return "rdtscp";
|
|
case CPUFeature::CONSTANT_TSC:
|
|
return "constant_tsc";
|
|
case CPUFeature::NONSTOP_TSC:
|
|
return "nonstop_tsc";
|
|
case CPUFeature::UMIP:
|
|
return "umip";
|
|
case CPUFeature::SEP:
|
|
return "sep";
|
|
case CPUFeature::SYSCALL:
|
|
return "syscall";
|
|
case CPUFeature::MMX:
|
|
return "mmx";
|
|
case CPUFeature::SSE2:
|
|
return "sse2";
|
|
case CPUFeature::SSE3:
|
|
return "sse3";
|
|
case CPUFeature::SSSE3:
|
|
return "ssse3";
|
|
case CPUFeature::SSE4_1:
|
|
return "sse4.1";
|
|
case CPUFeature::SSE4_2:
|
|
return "sse4.2";
|
|
case CPUFeature::XSAVE:
|
|
return "xsave";
|
|
case CPUFeature::AVX:
|
|
return "avx";
|
|
// no default statement here intentionally so that we get
|
|
// a warning if a new feature is forgotten to be added here
|
|
}
|
|
// Shouldn't ever happen
|
|
return "???";
|
|
};
|
|
bool first = true;
|
|
for (u32 flag = 1; flag != 0; flag <<= 1) {
|
|
if ((static_cast<u32>(m_features) & flag) != 0) {
|
|
if (first)
|
|
first = false;
|
|
else
|
|
builder.append(' ');
|
|
auto str = feature_to_str(static_cast<CPUFeature>(flag));
|
|
builder.append(str, strlen(str));
|
|
}
|
|
}
|
|
return builder.build();
|
|
}
|
|
|
|
String Processor::platform_string() const
|
|
{
|
|
return "i386";
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void Processor::early_initialize(u32 cpu)
|
|
{
|
|
m_self = this;
|
|
|
|
m_cpu = cpu;
|
|
m_in_irq = 0;
|
|
m_in_critical = 0;
|
|
|
|
m_invoke_scheduler_async = false;
|
|
m_scheduler_initialized = false;
|
|
|
|
m_message_queue = nullptr;
|
|
m_idle_thread = nullptr;
|
|
m_current_thread = nullptr;
|
|
m_scheduler_data = nullptr;
|
|
m_mm_data = nullptr;
|
|
m_info = nullptr;
|
|
|
|
m_halt_requested = false;
|
|
if (cpu == 0) {
|
|
s_smp_enabled = false;
|
|
atomic_store(&g_total_processors, 1u, AK::MemoryOrder::memory_order_release);
|
|
} else {
|
|
atomic_fetch_add(&g_total_processors, 1u, AK::MemoryOrder::memory_order_acq_rel);
|
|
}
|
|
|
|
deferred_call_pool_init();
|
|
|
|
cpu_setup();
|
|
gdt_init();
|
|
|
|
VERIFY(is_initialized()); // sanity check
|
|
VERIFY(¤t() == this); // sanity check
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void Processor::initialize(u32 cpu)
|
|
{
|
|
VERIFY(m_self == this);
|
|
VERIFY(¤t() == this); // sanity check
|
|
|
|
dmesgln("CPU[{}]: Supported features: {}", id(), features_string());
|
|
if (!has_feature(CPUFeature::RDRAND))
|
|
dmesgln("CPU[{}]: No RDRAND support detected, randomness will be poor", id());
|
|
dmesgln("CPU[{}]: Physical address bit width: {}", id(), m_physical_address_bit_width);
|
|
|
|
if (cpu == 0)
|
|
idt_init();
|
|
else
|
|
flush_idt();
|
|
|
|
if (cpu == 0) {
|
|
VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0);
|
|
asm volatile("fninit");
|
|
asm volatile("fxsave %0"
|
|
: "=m"(s_clean_fpu_state));
|
|
}
|
|
|
|
m_info = new ProcessorInfo(*this);
|
|
|
|
{
|
|
ScopedSpinLock lock(s_processor_lock);
|
|
// We need to prevent races between APs starting up at the same time
|
|
if (!s_processors)
|
|
s_processors = new Vector<Processor*>();
|
|
if (cpu >= s_processors->size())
|
|
s_processors->resize(cpu + 1);
|
|
(*s_processors)[cpu] = this;
|
|
}
|
|
}
|
|
|
|
void Processor::write_raw_gdt_entry(u16 selector, u32 low, u32 high)
|
|
{
|
|
u16 i = (selector & 0xfffc) >> 3;
|
|
u32 prev_gdt_length = m_gdt_length;
|
|
|
|
if (i > m_gdt_length) {
|
|
m_gdt_length = i + 1;
|
|
VERIFY(m_gdt_length <= sizeof(m_gdt) / sizeof(m_gdt[0]));
|
|
m_gdtr.limit = (m_gdt_length + 1) * 8 - 1;
|
|
}
|
|
m_gdt[i].low = low;
|
|
m_gdt[i].high = high;
|
|
|
|
// clear selectors we may have skipped
|
|
while (i < prev_gdt_length) {
|
|
m_gdt[i].low = 0;
|
|
m_gdt[i].high = 0;
|
|
i++;
|
|
}
|
|
}
|
|
|
|
void Processor::write_gdt_entry(u16 selector, Descriptor& descriptor)
|
|
{
|
|
write_raw_gdt_entry(selector, descriptor.low, descriptor.high);
|
|
}
|
|
|
|
Descriptor& Processor::get_gdt_entry(u16 selector)
|
|
{
|
|
u16 i = (selector & 0xfffc) >> 3;
|
|
return *(Descriptor*)(&m_gdt[i]);
|
|
}
|
|
|
|
void Processor::flush_gdt()
|
|
{
|
|
m_gdtr.address = m_gdt;
|
|
m_gdtr.limit = (m_gdt_length * 8) - 1;
|
|
asm volatile("lgdt %0" ::"m"(m_gdtr)
|
|
: "memory");
|
|
}
|
|
|
|
const DescriptorTablePointer& Processor::get_gdtr()
|
|
{
|
|
return m_gdtr;
|
|
}
|
|
|
|
Vector<FlatPtr> Processor::capture_stack_trace(Thread& thread, size_t max_frames)
|
|
{
|
|
FlatPtr frame_ptr = 0, eip = 0;
|
|
Vector<FlatPtr, 32> stack_trace;
|
|
|
|
auto walk_stack = [&](FlatPtr stack_ptr) {
|
|
static constexpr size_t max_stack_frames = 4096;
|
|
stack_trace.append(eip);
|
|
size_t count = 1;
|
|
while (stack_ptr && stack_trace.size() < max_stack_frames) {
|
|
FlatPtr retaddr;
|
|
|
|
count++;
|
|
if (max_frames != 0 && count > max_frames)
|
|
break;
|
|
|
|
if (is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) {
|
|
if (!copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]) || !retaddr)
|
|
break;
|
|
stack_trace.append(retaddr);
|
|
if (!copy_from_user(&stack_ptr, (FlatPtr*)stack_ptr))
|
|
break;
|
|
} else {
|
|
void* fault_at;
|
|
if (!safe_memcpy(&retaddr, &((FlatPtr*)stack_ptr)[1], sizeof(FlatPtr), fault_at) || !retaddr)
|
|
break;
|
|
stack_trace.append(retaddr);
|
|
if (!safe_memcpy(&stack_ptr, (FlatPtr*)stack_ptr, sizeof(FlatPtr), fault_at))
|
|
break;
|
|
}
|
|
}
|
|
};
|
|
auto capture_current_thread = [&]() {
|
|
frame_ptr = (FlatPtr)__builtin_frame_address(0);
|
|
eip = (FlatPtr)__builtin_return_address(0);
|
|
|
|
walk_stack(frame_ptr);
|
|
};
|
|
|
|
// Since the thread may be running on another processor, there
|
|
// is a chance a context switch may happen while we're trying
|
|
// to get it. It also won't be entirely accurate and merely
|
|
// reflect the status at the last context switch.
|
|
ScopedSpinLock lock(g_scheduler_lock);
|
|
if (&thread == Processor::current_thread()) {
|
|
VERIFY(thread.state() == Thread::Running);
|
|
// Leave the scheduler lock. If we trigger page faults we may
|
|
// need to be preempted. Since this is our own thread it won't
|
|
// cause any problems as the stack won't change below this frame.
|
|
lock.unlock();
|
|
capture_current_thread();
|
|
} else if (thread.is_active()) {
|
|
VERIFY(thread.cpu() != Processor::id());
|
|
// If this is the case, the thread is currently running
|
|
// on another processor. We can't trust the kernel stack as
|
|
// it may be changing at any time. We need to probably send
|
|
// an IPI to that processor, have it walk the stack and wait
|
|
// until it returns the data back to us
|
|
auto& proc = Processor::current();
|
|
smp_unicast(
|
|
thread.cpu(),
|
|
[&]() {
|
|
dbgln("CPU[{}] getting stack for cpu #{}", Processor::id(), proc.get_id());
|
|
ProcessPagingScope paging_scope(thread.process());
|
|
VERIFY(&Processor::current() != &proc);
|
|
VERIFY(&thread == Processor::current_thread());
|
|
// NOTE: Because the other processor is still holding the
|
|
// scheduler lock while waiting for this callback to finish,
|
|
// the current thread on the target processor cannot change
|
|
|
|
// TODO: What to do about page faults here? We might deadlock
|
|
// because the other processor is still holding the
|
|
// scheduler lock...
|
|
capture_current_thread();
|
|
},
|
|
false);
|
|
} else {
|
|
switch (thread.state()) {
|
|
case Thread::Running:
|
|
VERIFY_NOT_REACHED(); // should have been handled above
|
|
case Thread::Runnable:
|
|
case Thread::Stopped:
|
|
case Thread::Blocked:
|
|
case Thread::Dying:
|
|
case Thread::Dead: {
|
|
// We need to retrieve ebp from what was last pushed to the kernel
|
|
// stack. Before switching out of that thread, it switch_context
|
|
// pushed the callee-saved registers, and the last of them happens
|
|
// to be ebp.
|
|
ProcessPagingScope paging_scope(thread.process());
|
|
auto& tss = thread.tss();
|
|
u32* stack_top = reinterpret_cast<u32*>(tss.esp);
|
|
if (is_user_range(VirtualAddress(stack_top), sizeof(FlatPtr))) {
|
|
if (!copy_from_user(&frame_ptr, &((FlatPtr*)stack_top)[0]))
|
|
frame_ptr = 0;
|
|
} else {
|
|
void* fault_at;
|
|
if (!safe_memcpy(&frame_ptr, &((FlatPtr*)stack_top)[0], sizeof(FlatPtr), fault_at))
|
|
frame_ptr = 0;
|
|
}
|
|
eip = tss.eip;
|
|
// TODO: We need to leave the scheduler lock here, but we also
|
|
// need to prevent the target thread from being run while
|
|
// we walk the stack
|
|
lock.unlock();
|
|
walk_stack(frame_ptr);
|
|
break;
|
|
}
|
|
default:
|
|
dbgln("Cannot capture stack trace for thread {} in state {}", thread, thread.state_string());
|
|
break;
|
|
}
|
|
}
|
|
return stack_trace;
|
|
}
|
|
|
|
extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
|
|
{
|
|
VERIFY(from_thread == to_thread || from_thread->state() != Thread::Running);
|
|
VERIFY(to_thread->state() == Thread::Running);
|
|
|
|
Processor::set_current_thread(*to_thread);
|
|
|
|
auto& from_tss = from_thread->tss();
|
|
auto& to_tss = to_thread->tss();
|
|
asm volatile("fxsave %0"
|
|
: "=m"(from_thread->fpu_state()));
|
|
|
|
from_tss.fs = get_fs();
|
|
from_tss.gs = get_gs();
|
|
set_fs(to_tss.fs);
|
|
set_gs(to_tss.gs);
|
|
|
|
if (from_thread->process().is_traced())
|
|
read_debug_registers_into(from_thread->debug_register_state());
|
|
|
|
if (to_thread->process().is_traced()) {
|
|
write_debug_registers_from(to_thread->debug_register_state());
|
|
} else {
|
|
clear_debug_registers();
|
|
}
|
|
|
|
auto& processor = Processor::current();
|
|
auto& tls_descriptor = processor.get_gdt_entry(GDT_SELECTOR_TLS);
|
|
tls_descriptor.set_base(to_thread->thread_specific_data());
|
|
tls_descriptor.set_limit(to_thread->thread_specific_region_size());
|
|
|
|
if (from_tss.cr3 != to_tss.cr3)
|
|
write_cr3(to_tss.cr3);
|
|
|
|
to_thread->set_cpu(processor.get_id());
|
|
processor.restore_in_critical(to_thread->saved_critical());
|
|
|
|
asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state()));
|
|
|
|
// TODO: ioperm?
|
|
}
|
|
|
|
#define ENTER_THREAD_CONTEXT_ARGS_SIZE (2 * 4) // to_thread, from_thread
|
|
|
|
void Processor::switch_context(Thread*& from_thread, Thread*& to_thread)
|
|
{
|
|
VERIFY(!in_irq());
|
|
VERIFY(m_in_critical == 1);
|
|
VERIFY(is_kernel_mode());
|
|
|
|
dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context --> switching out of: {} {}", VirtualAddress(from_thread), *from_thread);
|
|
from_thread->save_critical(m_in_critical);
|
|
|
|
#if ARCH(I386)
|
|
// clang-format off
|
|
// Switch to new thread context, passing from_thread and to_thread
|
|
// through to the new context using registers edx and eax
|
|
asm volatile(
|
|
// NOTE: changing how much we push to the stack affects
|
|
// SWITCH_CONTEXT_TO_STACK_SIZE and thread_context_first_enter()!
|
|
"pushfl \n"
|
|
"pushl %%ebx \n"
|
|
"pushl %%esi \n"
|
|
"pushl %%edi \n"
|
|
"pushl %%ebp \n"
|
|
"movl %%esp, %[from_esp] \n"
|
|
"movl $1f, %[from_eip] \n"
|
|
"movl %[to_esp0], %%ebx \n"
|
|
"movl %%ebx, %[tss_esp0] \n"
|
|
"movl %[to_esp], %%esp \n"
|
|
"pushl %[to_thread] \n"
|
|
"pushl %[from_thread] \n"
|
|
"pushl %[to_eip] \n"
|
|
"cld \n"
|
|
"jmp enter_thread_context \n"
|
|
"1: \n"
|
|
"popl %%edx \n"
|
|
"popl %%eax \n"
|
|
"popl %%ebp \n"
|
|
"popl %%edi \n"
|
|
"popl %%esi \n"
|
|
"popl %%ebx \n"
|
|
"popfl \n"
|
|
: [from_esp] "=m" (from_thread->tss().esp),
|
|
[from_eip] "=m" (from_thread->tss().eip),
|
|
[tss_esp0] "=m" (m_tss.esp0),
|
|
"=d" (from_thread), // needed so that from_thread retains the correct value
|
|
"=a" (to_thread) // needed so that to_thread retains the correct value
|
|
: [to_esp] "g" (to_thread->tss().esp),
|
|
[to_esp0] "g" (to_thread->tss().esp0),
|
|
[to_eip] "c" (to_thread->tss().eip),
|
|
[from_thread] "d" (from_thread),
|
|
[to_thread] "a" (to_thread)
|
|
: "memory"
|
|
);
|
|
// clang-format on
|
|
#else
|
|
PANIC("Context switching not implemented.");
|
|
#endif
|
|
|
|
dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {}", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread);
|
|
|
|
Processor::current().restore_in_critical(to_thread->saved_critical());
|
|
}
|
|
|
|
extern "C" void context_first_init([[maybe_unused]] Thread* from_thread, [[maybe_unused]] Thread* to_thread, [[maybe_unused]] TrapFrame* trap)
|
|
{
|
|
VERIFY(!are_interrupts_enabled());
|
|
VERIFY(is_kernel_mode());
|
|
|
|
dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {} (context_first_init)", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread);
|
|
|
|
VERIFY(to_thread == Thread::current());
|
|
|
|
Scheduler::enter_current(*from_thread, true);
|
|
|
|
// Since we got here and don't have Scheduler::context_switch in the
|
|
// call stack (because this is the first time we switched into this
|
|
// context), we need to notify the scheduler so that it can release
|
|
// the scheduler lock. We don't want to enable interrupts at this point
|
|
// as we're still in the middle of a context switch. Doing so could
|
|
// trigger a context switch within a context switch, leading to a crash.
|
|
Scheduler::leave_on_first_switch(trap->regs->eflags & ~0x200);
|
|
}
|
|
|
|
extern "C" void thread_context_first_enter(void);
|
|
|
|
// clang-format off
|
|
asm(
|
|
// enter_thread_context returns to here first time a thread is executing
|
|
".globl thread_context_first_enter \n"
|
|
"thread_context_first_enter: \n"
|
|
// switch_context will have pushed from_thread and to_thread to our new
|
|
// stack prior to thread_context_first_enter() being called, and the
|
|
// pointer to TrapFrame was the top of the stack before that
|
|
" movl 8(%esp), %ebx \n" // save pointer to TrapFrame
|
|
" cld \n"
|
|
" call context_first_init \n"
|
|
" addl $" __STRINGIFY(ENTER_THREAD_CONTEXT_ARGS_SIZE) ", %esp \n"
|
|
" movl %ebx, 0(%esp) \n" // push pointer to TrapFrame
|
|
" jmp common_trap_exit \n"
|
|
);
|
|
// clang-format on
|
|
|
|
void exit_kernel_thread(void)
|
|
{
|
|
Thread::current()->exit();
|
|
}
|
|
|
|
u32 Processor::init_context(Thread& thread, bool leave_crit)
|
|
{
|
|
VERIFY(is_kernel_mode());
|
|
VERIFY(g_scheduler_lock.is_locked());
|
|
if (leave_crit) {
|
|
// Leave the critical section we set up in in Process::exec,
|
|
// but because we still have the scheduler lock we should end up with 1
|
|
m_in_critical--; // leave it without triggering anything or restoring flags
|
|
VERIFY(in_critical() == 1);
|
|
}
|
|
|
|
u32 kernel_stack_top = thread.kernel_stack_top();
|
|
|
|
// Add a random offset between 0-256 (16-byte aligned)
|
|
kernel_stack_top -= round_up_to_power_of_two(get_fast_random<u8>(), 16);
|
|
|
|
u32 stack_top = kernel_stack_top;
|
|
|
|
// TODO: handle NT?
|
|
VERIFY((cpu_flags() & 0x24000) == 0); // Assume !(NT | VM)
|
|
|
|
auto& tss = thread.tss();
|
|
bool return_to_user = (tss.cs & 3) != 0;
|
|
|
|
// make room for an interrupt frame
|
|
if (!return_to_user) {
|
|
// userspace_esp and userspace_ss are not popped off by iret
|
|
// unless we're switching back to user mode
|
|
stack_top -= sizeof(RegisterState) - 2 * sizeof(u32);
|
|
|
|
// For kernel threads we'll push the thread function argument
|
|
// which should be in tss.esp and exit_kernel_thread as return
|
|
// address.
|
|
stack_top -= 2 * sizeof(u32);
|
|
*reinterpret_cast<u32*>(kernel_stack_top - 2 * sizeof(u32)) = tss.esp;
|
|
*reinterpret_cast<u32*>(kernel_stack_top - 3 * sizeof(u32)) = FlatPtr(&exit_kernel_thread);
|
|
} else {
|
|
stack_top -= sizeof(RegisterState);
|
|
}
|
|
|
|
// we want to end up 16-byte aligned, %esp + 4 should be aligned
|
|
stack_top -= sizeof(u32);
|
|
*reinterpret_cast<u32*>(kernel_stack_top - sizeof(u32)) = 0;
|
|
|
|
// set up the stack so that after returning from thread_context_first_enter()
|
|
// we will end up either in kernel mode or user mode, depending on how the thread is set up
|
|
// However, the first step is to always start in kernel mode with thread_context_first_enter
|
|
RegisterState& iretframe = *reinterpret_cast<RegisterState*>(stack_top);
|
|
iretframe.ss = tss.ss;
|
|
iretframe.gs = tss.gs;
|
|
iretframe.fs = tss.fs;
|
|
iretframe.es = tss.es;
|
|
iretframe.ds = tss.ds;
|
|
iretframe.edi = tss.edi;
|
|
iretframe.esi = tss.esi;
|
|
iretframe.ebp = tss.ebp;
|
|
iretframe.esp = 0;
|
|
iretframe.ebx = tss.ebx;
|
|
iretframe.edx = tss.edx;
|
|
iretframe.ecx = tss.ecx;
|
|
iretframe.eax = tss.eax;
|
|
iretframe.eflags = tss.eflags;
|
|
iretframe.eip = tss.eip;
|
|
iretframe.cs = tss.cs;
|
|
if (return_to_user) {
|
|
iretframe.userspace_esp = tss.esp;
|
|
iretframe.userspace_ss = tss.ss;
|
|
}
|
|
|
|
// make space for a trap frame
|
|
stack_top -= sizeof(TrapFrame);
|
|
TrapFrame& trap = *reinterpret_cast<TrapFrame*>(stack_top);
|
|
trap.regs = &iretframe;
|
|
trap.prev_irq_level = 0;
|
|
trap.next_trap = nullptr;
|
|
|
|
stack_top -= sizeof(u32); // pointer to TrapFrame
|
|
*reinterpret_cast<u32*>(stack_top) = stack_top + 4;
|
|
|
|
if constexpr (CONTEXT_SWITCH_DEBUG) {
|
|
if (return_to_user) {
|
|
dbgln("init_context {} ({}) set up to execute at eip={}:{}, esp={}, stack_top={}, user_top={}:{}",
|
|
thread,
|
|
VirtualAddress(&thread),
|
|
iretframe.cs, tss.eip,
|
|
VirtualAddress(tss.esp),
|
|
VirtualAddress(stack_top),
|
|
iretframe.userspace_ss,
|
|
iretframe.userspace_esp);
|
|
} else {
|
|
dbgln("init_context {} ({}) set up to execute at eip={}:{}, esp={}, stack_top={}",
|
|
thread,
|
|
VirtualAddress(&thread),
|
|
iretframe.cs, tss.eip,
|
|
VirtualAddress(tss.esp),
|
|
VirtualAddress(stack_top));
|
|
}
|
|
}
|
|
|
|
// make switch_context() always first return to thread_context_first_enter()
|
|
// in kernel mode, so set up these values so that we end up popping iretframe
|
|
// off the stack right after the context switch completed, at which point
|
|
// control is transferred to what iretframe is pointing to.
|
|
tss.eip = FlatPtr(&thread_context_first_enter);
|
|
tss.esp0 = kernel_stack_top;
|
|
tss.esp = stack_top;
|
|
tss.cs = GDT_SELECTOR_CODE0;
|
|
tss.ds = GDT_SELECTOR_DATA0;
|
|
tss.es = GDT_SELECTOR_DATA0;
|
|
tss.gs = GDT_SELECTOR_DATA0;
|
|
tss.ss = GDT_SELECTOR_DATA0;
|
|
tss.fs = GDT_SELECTOR_PROC;
|
|
return stack_top;
|
|
}
|
|
|
|
extern "C" u32 do_init_context(Thread* thread, u32 flags)
|
|
{
|
|
VERIFY_INTERRUPTS_DISABLED();
|
|
thread->tss().eflags = flags;
|
|
return Processor::current().init_context(*thread, true);
|
|
}
|
|
|
|
extern "C" void do_assume_context(Thread* thread, u32 flags);
|
|
|
|
#if ARCH(I386)
|
|
// clang-format off
|
|
asm(
|
|
".global do_assume_context \n"
|
|
"do_assume_context: \n"
|
|
" movl 4(%esp), %ebx \n"
|
|
" movl 8(%esp), %esi \n"
|
|
// We're going to call Processor::init_context, so just make sure
|
|
// we have enough stack space so we don't stomp over it
|
|
" subl $(" __STRINGIFY(4 + REGISTER_STATE_SIZE + TRAP_FRAME_SIZE + 4) "), %esp \n"
|
|
" pushl %esi \n"
|
|
" pushl %ebx \n"
|
|
" cld \n"
|
|
" call do_init_context \n"
|
|
" addl $8, %esp \n"
|
|
" movl %eax, %esp \n" // move stack pointer to what Processor::init_context set up for us
|
|
" pushl %ebx \n" // push to_thread
|
|
" pushl %ebx \n" // push from_thread
|
|
" pushl $thread_context_first_enter \n" // should be same as tss.eip
|
|
" jmp enter_thread_context \n"
|
|
);
|
|
// clang-format on
|
|
#endif
|
|
|
|
void Processor::assume_context(Thread& thread, FlatPtr flags)
|
|
{
|
|
dbgln_if(CONTEXT_SWITCH_DEBUG, "Assume context for thread {} {}", VirtualAddress(&thread), thread);
|
|
|
|
VERIFY_INTERRUPTS_DISABLED();
|
|
Scheduler::prepare_after_exec();
|
|
// in_critical() should be 2 here. The critical section in Process::exec
|
|
// and then the scheduler lock
|
|
VERIFY(Processor::current().in_critical() == 2);
|
|
#if ARCH(I386)
|
|
do_assume_context(&thread, flags);
|
|
#elif ARCH(X86_64)
|
|
(void)flags;
|
|
TODO();
|
|
#endif
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
extern "C" UNMAP_AFTER_INIT void pre_init_finished(void)
|
|
{
|
|
VERIFY(g_scheduler_lock.own_lock());
|
|
|
|
// Because init_finished() will wait on the other APs, we need
|
|
// to release the scheduler lock so that the other APs can also get
|
|
// to this point
|
|
|
|
// The target flags will get restored upon leaving the trap
|
|
u32 prev_flags = cpu_flags();
|
|
Scheduler::leave_on_first_switch(prev_flags);
|
|
}
|
|
|
|
extern "C" UNMAP_AFTER_INIT void post_init_finished(void)
|
|
{
|
|
// We need to re-acquire the scheduler lock before a context switch
|
|
// transfers control into the idle loop, which needs the lock held
|
|
Scheduler::prepare_for_idle_loop();
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void Processor::initialize_context_switching(Thread& initial_thread)
|
|
{
|
|
VERIFY(initial_thread.process().is_kernel_process());
|
|
|
|
auto& tss = initial_thread.tss();
|
|
m_tss = tss;
|
|
m_tss.esp0 = tss.esp0;
|
|
m_tss.ss0 = GDT_SELECTOR_DATA0;
|
|
// user mode needs to be able to switch to kernel mode:
|
|
m_tss.cs = m_tss.ds = m_tss.es = m_tss.gs = m_tss.ss = GDT_SELECTOR_CODE0 | 3;
|
|
m_tss.fs = GDT_SELECTOR_PROC | 3;
|
|
|
|
m_scheduler_initialized = true;
|
|
|
|
#if ARCH(I386)
|
|
// clang-format off
|
|
asm volatile(
|
|
"movl %[new_esp], %%esp \n" // switch to new stack
|
|
"pushl %[from_to_thread] \n" // to_thread
|
|
"pushl %[from_to_thread] \n" // from_thread
|
|
"pushl $" __STRINGIFY(GDT_SELECTOR_CODE0) " \n"
|
|
"pushl %[new_eip] \n" // save the entry eip to the stack
|
|
"movl %%esp, %%ebx \n"
|
|
"addl $20, %%ebx \n" // calculate pointer to TrapFrame
|
|
"pushl %%ebx \n"
|
|
"cld \n"
|
|
"pushl %[cpu] \n" // push argument for init_finished before register is clobbered
|
|
"call pre_init_finished \n"
|
|
"call init_finished \n"
|
|
"addl $4, %%esp \n"
|
|
"call post_init_finished \n"
|
|
"call enter_trap_no_irq \n"
|
|
"addl $4, %%esp \n"
|
|
"lret \n"
|
|
:: [new_esp] "g" (tss.esp),
|
|
[new_eip] "a" (tss.eip),
|
|
[from_to_thread] "b" (&initial_thread),
|
|
[cpu] "c" (id())
|
|
);
|
|
// clang-format on
|
|
#endif
|
|
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
void Processor::enter_trap(TrapFrame& trap, bool raise_irq)
|
|
{
|
|
VERIFY_INTERRUPTS_DISABLED();
|
|
VERIFY(&Processor::current() == this);
|
|
trap.prev_irq_level = m_in_irq;
|
|
if (raise_irq)
|
|
m_in_irq++;
|
|
auto* current_thread = Processor::current_thread();
|
|
if (current_thread) {
|
|
auto& current_trap = current_thread->current_trap();
|
|
trap.next_trap = current_trap;
|
|
current_trap = &trap;
|
|
// The cs register of this trap tells us where we will return back to
|
|
current_thread->set_previous_mode(((trap.regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode);
|
|
} else {
|
|
trap.next_trap = nullptr;
|
|
}
|
|
}
|
|
|
|
void Processor::exit_trap(TrapFrame& trap)
|
|
{
|
|
VERIFY_INTERRUPTS_DISABLED();
|
|
VERIFY(&Processor::current() == this);
|
|
VERIFY(m_in_irq >= trap.prev_irq_level);
|
|
m_in_irq = trap.prev_irq_level;
|
|
|
|
smp_process_pending_messages();
|
|
|
|
if (!m_in_irq && !m_in_critical)
|
|
check_invoke_scheduler();
|
|
|
|
auto* current_thread = Processor::current_thread();
|
|
if (current_thread) {
|
|
auto& current_trap = current_thread->current_trap();
|
|
current_trap = trap.next_trap;
|
|
if (current_trap) {
|
|
VERIFY(current_trap->regs);
|
|
// If we have another higher level trap then we probably returned
|
|
// from an interrupt or irq handler. The cs register of the
|
|
// new/higher level trap tells us what the mode prior to it was
|
|
current_thread->set_previous_mode(((current_trap->regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode);
|
|
} else {
|
|
// If we don't have a higher level trap then we're back in user mode.
|
|
// Unless we're a kernel process, in which case we're always in kernel mode
|
|
current_thread->set_previous_mode(current_thread->process().is_kernel_process() ? Thread::PreviousMode::KernelMode : Thread::PreviousMode::UserMode);
|
|
}
|
|
}
|
|
}
|
|
|
|
void Processor::check_invoke_scheduler()
|
|
{
|
|
VERIFY(!m_in_irq);
|
|
VERIFY(!m_in_critical);
|
|
if (m_invoke_scheduler_async && m_scheduler_initialized) {
|
|
m_invoke_scheduler_async = false;
|
|
Scheduler::invoke_async();
|
|
}
|
|
}
|
|
|
|
void Processor::flush_tlb_local(VirtualAddress vaddr, size_t page_count)
|
|
{
|
|
auto ptr = vaddr.as_ptr();
|
|
while (page_count > 0) {
|
|
// clang-format off
|
|
asm volatile("invlpg %0"
|
|
:
|
|
: "m"(*ptr)
|
|
: "memory");
|
|
// clang-format on
|
|
ptr += PAGE_SIZE;
|
|
page_count--;
|
|
}
|
|
}
|
|
|
|
void Processor::flush_tlb(const PageDirectory* page_directory, VirtualAddress vaddr, size_t page_count)
|
|
{
|
|
if (s_smp_enabled && (!is_user_address(vaddr) || Process::current()->thread_count() > 1))
|
|
smp_broadcast_flush_tlb(page_directory, vaddr, page_count);
|
|
else
|
|
flush_tlb_local(vaddr, page_count);
|
|
}
|
|
|
|
static volatile ProcessorMessage* s_message_pool;
|
|
|
|
void Processor::smp_return_to_pool(ProcessorMessage& msg)
|
|
{
|
|
ProcessorMessage* next = nullptr;
|
|
do {
|
|
msg.next = next;
|
|
} while (!atomic_compare_exchange_strong(&s_message_pool, next, &msg, AK::MemoryOrder::memory_order_acq_rel));
|
|
}
|
|
|
|
ProcessorMessage& Processor::smp_get_from_pool()
|
|
{
|
|
ProcessorMessage* msg;
|
|
|
|
// The assumption is that messages are never removed from the pool!
|
|
for (;;) {
|
|
msg = atomic_load(&s_message_pool, AK::MemoryOrder::memory_order_consume);
|
|
if (!msg) {
|
|
if (!Processor::current().smp_process_pending_messages()) {
|
|
// TODO: pause for a bit?
|
|
}
|
|
continue;
|
|
}
|
|
// If another processor were to use this message in the meanwhile,
|
|
// "msg" is still valid (because it never gets freed). We'd detect
|
|
// this because the expected value "msg" and pool would
|
|
// no longer match, and the compare_exchange will fail. But accessing
|
|
// "msg->next" is always safe here.
|
|
if (atomic_compare_exchange_strong(&s_message_pool, msg, msg->next, AK::MemoryOrder::memory_order_acq_rel)) {
|
|
// We successfully "popped" this available message
|
|
break;
|
|
}
|
|
}
|
|
|
|
VERIFY(msg != nullptr);
|
|
return *msg;
|
|
}
|
|
|
|
Atomic<u32> Processor::s_idle_cpu_mask { 0 };
|
|
|
|
u32 Processor::smp_wake_n_idle_processors(u32 wake_count)
|
|
{
|
|
VERIFY(Processor::current().in_critical());
|
|
VERIFY(wake_count > 0);
|
|
if (!s_smp_enabled)
|
|
return 0;
|
|
|
|
// Wake at most N - 1 processors
|
|
if (wake_count >= Processor::count()) {
|
|
wake_count = Processor::count() - 1;
|
|
VERIFY(wake_count > 0);
|
|
}
|
|
|
|
u32 current_id = Processor::current().id();
|
|
|
|
u32 did_wake_count = 0;
|
|
auto& apic = APIC::the();
|
|
while (did_wake_count < wake_count) {
|
|
// Try to get a set of idle CPUs and flip them to busy
|
|
u32 idle_mask = s_idle_cpu_mask.load(AK::MemoryOrder::memory_order_relaxed) & ~(1u << current_id);
|
|
u32 idle_count = __builtin_popcountl(idle_mask);
|
|
if (idle_count == 0)
|
|
break; // No (more) idle processor available
|
|
|
|
u32 found_mask = 0;
|
|
for (u32 i = 0; i < idle_count; i++) {
|
|
u32 cpu = __builtin_ffsl(idle_mask) - 1;
|
|
idle_mask &= ~(1u << cpu);
|
|
found_mask |= 1u << cpu;
|
|
}
|
|
|
|
idle_mask = s_idle_cpu_mask.fetch_and(~found_mask, AK::MemoryOrder::memory_order_acq_rel) & found_mask;
|
|
if (idle_mask == 0)
|
|
continue; // All of them were flipped to busy, try again
|
|
idle_count = __builtin_popcountl(idle_mask);
|
|
for (u32 i = 0; i < idle_count; i++) {
|
|
u32 cpu = __builtin_ffsl(idle_mask) - 1;
|
|
idle_mask &= ~(1u << cpu);
|
|
|
|
// Send an IPI to that CPU to wake it up. There is a possibility
|
|
// someone else woke it up as well, or that it woke up due to
|
|
// a timer interrupt. But we tried hard to avoid this...
|
|
apic.send_ipi(cpu);
|
|
did_wake_count++;
|
|
}
|
|
}
|
|
return did_wake_count;
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void Processor::smp_enable()
|
|
{
|
|
size_t msg_pool_size = Processor::count() * 100u;
|
|
size_t msg_entries_cnt = Processor::count();
|
|
|
|
auto msgs = new ProcessorMessage[msg_pool_size];
|
|
auto msg_entries = new ProcessorMessageEntry[msg_pool_size * msg_entries_cnt];
|
|
size_t msg_entry_i = 0;
|
|
for (size_t i = 0; i < msg_pool_size; i++, msg_entry_i += msg_entries_cnt) {
|
|
auto& msg = msgs[i];
|
|
msg.next = i < msg_pool_size - 1 ? &msgs[i + 1] : nullptr;
|
|
msg.per_proc_entries = &msg_entries[msg_entry_i];
|
|
for (size_t k = 0; k < msg_entries_cnt; k++)
|
|
msg_entries[msg_entry_i + k].msg = &msg;
|
|
}
|
|
|
|
atomic_store(&s_message_pool, &msgs[0], AK::MemoryOrder::memory_order_release);
|
|
|
|
// Start sending IPI messages
|
|
s_smp_enabled = true;
|
|
}
|
|
|
|
void Processor::smp_cleanup_message(ProcessorMessage& msg)
|
|
{
|
|
switch (msg.type) {
|
|
case ProcessorMessage::CallbackWithData:
|
|
if (msg.callback_with_data.free)
|
|
msg.callback_with_data.free(msg.callback_with_data.data);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool Processor::smp_process_pending_messages()
|
|
{
|
|
bool did_process = false;
|
|
u32 prev_flags;
|
|
enter_critical(prev_flags);
|
|
|
|
if (auto pending_msgs = atomic_exchange(&m_message_queue, nullptr, AK::MemoryOrder::memory_order_acq_rel)) {
|
|
// We pulled the stack of pending messages in LIFO order, so we need to reverse the list first
|
|
auto reverse_list =
|
|
[](ProcessorMessageEntry* list) -> ProcessorMessageEntry* {
|
|
ProcessorMessageEntry* rev_list = nullptr;
|
|
while (list) {
|
|
auto next = list->next;
|
|
list->next = rev_list;
|
|
rev_list = list;
|
|
list = next;
|
|
}
|
|
return rev_list;
|
|
};
|
|
|
|
pending_msgs = reverse_list(pending_msgs);
|
|
|
|
// now process in the right order
|
|
ProcessorMessageEntry* next_msg;
|
|
for (auto cur_msg = pending_msgs; cur_msg; cur_msg = next_msg) {
|
|
next_msg = cur_msg->next;
|
|
auto msg = cur_msg->msg;
|
|
|
|
dbgln_if(SMP_DEBUG, "SMP[{}]: Processing message {}", id(), VirtualAddress(msg));
|
|
|
|
switch (msg->type) {
|
|
case ProcessorMessage::Callback:
|
|
msg->callback.handler();
|
|
break;
|
|
case ProcessorMessage::CallbackWithData:
|
|
msg->callback_with_data.handler(msg->callback_with_data.data);
|
|
break;
|
|
case ProcessorMessage::FlushTlb:
|
|
if (is_user_address(VirtualAddress(msg->flush_tlb.ptr))) {
|
|
// We assume that we don't cross into kernel land!
|
|
VERIFY(is_user_range(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count * PAGE_SIZE));
|
|
if (read_cr3() != msg->flush_tlb.page_directory->cr3()) {
|
|
// This processor isn't using this page directory right now, we can ignore this request
|
|
dbgln_if(SMP_DEBUG, "SMP[{}]: No need to flush {} pages at {}", id(), msg->flush_tlb.page_count, VirtualAddress(msg->flush_tlb.ptr));
|
|
break;
|
|
}
|
|
}
|
|
flush_tlb_local(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count);
|
|
break;
|
|
}
|
|
|
|
bool is_async = msg->async; // Need to cache this value *before* dropping the ref count!
|
|
auto prev_refs = atomic_fetch_sub(&msg->refs, 1u, AK::MemoryOrder::memory_order_acq_rel);
|
|
VERIFY(prev_refs != 0);
|
|
if (prev_refs == 1) {
|
|
// All processors handled this. If this is an async message,
|
|
// we need to clean it up and return it to the pool
|
|
if (is_async) {
|
|
smp_cleanup_message(*msg);
|
|
smp_return_to_pool(*msg);
|
|
}
|
|
}
|
|
|
|
if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed))
|
|
halt_this();
|
|
}
|
|
did_process = true;
|
|
} else if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) {
|
|
halt_this();
|
|
}
|
|
|
|
leave_critical(prev_flags);
|
|
return did_process;
|
|
}
|
|
|
|
bool Processor::smp_queue_message(ProcessorMessage& msg)
|
|
{
|
|
// Note that it's quite possible that the other processor may pop
|
|
// the queue at any given time. We rely on the fact that the messages
|
|
// are pooled and never get freed!
|
|
auto& msg_entry = msg.per_proc_entries[id()];
|
|
VERIFY(msg_entry.msg == &msg);
|
|
ProcessorMessageEntry* next = nullptr;
|
|
do {
|
|
msg_entry.next = next;
|
|
} while (!atomic_compare_exchange_strong(&m_message_queue, next, &msg_entry, AK::MemoryOrder::memory_order_acq_rel));
|
|
return next == nullptr;
|
|
}
|
|
|
|
void Processor::smp_broadcast_message(ProcessorMessage& msg)
|
|
{
|
|
auto& cur_proc = Processor::current();
|
|
|
|
dbgln_if(SMP_DEBUG, "SMP[{}]: Broadcast message {} to cpus: {} proc: {}", cur_proc.get_id(), VirtualAddress(&msg), count(), VirtualAddress(&cur_proc));
|
|
|
|
atomic_store(&msg.refs, count() - 1, AK::MemoryOrder::memory_order_release);
|
|
VERIFY(msg.refs > 0);
|
|
bool need_broadcast = false;
|
|
for_each(
|
|
[&](Processor& proc) {
|
|
if (&proc != &cur_proc) {
|
|
if (proc.smp_queue_message(msg))
|
|
need_broadcast = true;
|
|
}
|
|
});
|
|
|
|
// Now trigger an IPI on all other APs (unless all targets already had messages queued)
|
|
if (need_broadcast)
|
|
APIC::the().broadcast_ipi();
|
|
}
|
|
|
|
void Processor::smp_broadcast_wait_sync(ProcessorMessage& msg)
|
|
{
|
|
auto& cur_proc = Processor::current();
|
|
VERIFY(!msg.async);
|
|
// If synchronous then we must cleanup and return the message back
|
|
// to the pool. Otherwise, the last processor to complete it will return it
|
|
while (atomic_load(&msg.refs, AK::MemoryOrder::memory_order_consume) != 0) {
|
|
// TODO: pause for a bit?
|
|
|
|
// We need to process any messages that may have been sent to
|
|
// us while we're waiting. This also checks if another processor
|
|
// may have requested us to halt.
|
|
cur_proc.smp_process_pending_messages();
|
|
}
|
|
|
|
smp_cleanup_message(msg);
|
|
smp_return_to_pool(msg);
|
|
}
|
|
|
|
void Processor::smp_broadcast(void (*callback)(void*), void* data, void (*free_data)(void*), bool async)
|
|
{
|
|
auto& msg = smp_get_from_pool();
|
|
msg.async = async;
|
|
msg.type = ProcessorMessage::CallbackWithData;
|
|
msg.callback_with_data.handler = callback;
|
|
msg.callback_with_data.data = data;
|
|
msg.callback_with_data.free = free_data;
|
|
smp_broadcast_message(msg);
|
|
if (!async)
|
|
smp_broadcast_wait_sync(msg);
|
|
}
|
|
|
|
void Processor::smp_broadcast(void (*callback)(), bool async)
|
|
{
|
|
auto& msg = smp_get_from_pool();
|
|
msg.async = async;
|
|
msg.type = ProcessorMessage::CallbackWithData;
|
|
msg.callback.handler = callback;
|
|
smp_broadcast_message(msg);
|
|
if (!async)
|
|
smp_broadcast_wait_sync(msg);
|
|
}
|
|
|
|
void Processor::smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async)
|
|
{
|
|
auto& cur_proc = Processor::current();
|
|
VERIFY(cpu != cur_proc.get_id());
|
|
auto& target_proc = processors()[cpu];
|
|
msg.async = async;
|
|
|
|
dbgln_if(SMP_DEBUG, "SMP[{}]: Send message {} to cpu #{} proc: {}", cur_proc.get_id(), VirtualAddress(&msg), cpu, VirtualAddress(&target_proc));
|
|
|
|
atomic_store(&msg.refs, 1u, AK::MemoryOrder::memory_order_release);
|
|
if (target_proc->smp_queue_message(msg)) {
|
|
APIC::the().send_ipi(cpu);
|
|
}
|
|
|
|
if (!async) {
|
|
// If synchronous then we must cleanup and return the message back
|
|
// to the pool. Otherwise, the last processor to complete it will return it
|
|
while (atomic_load(&msg.refs, AK::MemoryOrder::memory_order_consume) != 0) {
|
|
// TODO: pause for a bit?
|
|
|
|
// We need to process any messages that may have been sent to
|
|
// us while we're waiting. This also checks if another processor
|
|
// may have requested us to halt.
|
|
cur_proc.smp_process_pending_messages();
|
|
}
|
|
|
|
smp_cleanup_message(msg);
|
|
smp_return_to_pool(msg);
|
|
}
|
|
}
|
|
|
|
void Processor::smp_unicast(u32 cpu, void (*callback)(void*), void* data, void (*free_data)(void*), bool async)
|
|
{
|
|
auto& msg = smp_get_from_pool();
|
|
msg.type = ProcessorMessage::CallbackWithData;
|
|
msg.callback_with_data.handler = callback;
|
|
msg.callback_with_data.data = data;
|
|
msg.callback_with_data.free = free_data;
|
|
smp_unicast_message(cpu, msg, async);
|
|
}
|
|
|
|
void Processor::smp_unicast(u32 cpu, void (*callback)(), bool async)
|
|
{
|
|
auto& msg = smp_get_from_pool();
|
|
msg.type = ProcessorMessage::CallbackWithData;
|
|
msg.callback.handler = callback;
|
|
smp_unicast_message(cpu, msg, async);
|
|
}
|
|
|
|
void Processor::smp_broadcast_flush_tlb(const PageDirectory* page_directory, VirtualAddress vaddr, size_t page_count)
|
|
{
|
|
auto& msg = smp_get_from_pool();
|
|
msg.async = false;
|
|
msg.type = ProcessorMessage::FlushTlb;
|
|
msg.flush_tlb.page_directory = page_directory;
|
|
msg.flush_tlb.ptr = vaddr.as_ptr();
|
|
msg.flush_tlb.page_count = page_count;
|
|
smp_broadcast_message(msg);
|
|
// While the other processors handle this request, we'll flush ours
|
|
flush_tlb_local(vaddr, page_count);
|
|
// Now wait until everybody is done as well
|
|
smp_broadcast_wait_sync(msg);
|
|
}
|
|
|
|
void Processor::smp_broadcast_halt()
|
|
{
|
|
// We don't want to use a message, because this could have been triggered
|
|
// by being out of memory and we might not be able to get a message
|
|
for_each(
|
|
[&](Processor& proc) {
|
|
proc.m_halt_requested.store(true, AK::MemoryOrder::memory_order_release);
|
|
});
|
|
|
|
// Now trigger an IPI on all other APs
|
|
APIC::the().broadcast_ipi();
|
|
}
|
|
|
|
void Processor::Processor::halt()
|
|
{
|
|
if (s_smp_enabled)
|
|
smp_broadcast_halt();
|
|
|
|
halt_this();
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void Processor::deferred_call_pool_init()
|
|
{
|
|
size_t pool_count = sizeof(m_deferred_call_pool) / sizeof(m_deferred_call_pool[0]);
|
|
for (size_t i = 0; i < pool_count; i++) {
|
|
auto& entry = m_deferred_call_pool[i];
|
|
entry.next = i < pool_count - 1 ? &m_deferred_call_pool[i + 1] : nullptr;
|
|
entry.was_allocated = false;
|
|
}
|
|
m_pending_deferred_calls = nullptr;
|
|
m_free_deferred_call_pool_entry = &m_deferred_call_pool[0];
|
|
}
|
|
|
|
void Processor::deferred_call_return_to_pool(DeferredCallEntry* entry)
|
|
{
|
|
VERIFY(m_in_critical);
|
|
VERIFY(!entry->was_allocated);
|
|
|
|
entry->next = m_free_deferred_call_pool_entry;
|
|
m_free_deferred_call_pool_entry = entry;
|
|
}
|
|
|
|
DeferredCallEntry* Processor::deferred_call_get_free()
|
|
{
|
|
VERIFY(m_in_critical);
|
|
|
|
if (m_free_deferred_call_pool_entry) {
|
|
// Fast path, we have an entry in our pool
|
|
auto* entry = m_free_deferred_call_pool_entry;
|
|
m_free_deferred_call_pool_entry = entry->next;
|
|
VERIFY(!entry->was_allocated);
|
|
return entry;
|
|
}
|
|
|
|
auto* entry = new DeferredCallEntry;
|
|
entry->was_allocated = true;
|
|
return entry;
|
|
}
|
|
|
|
void Processor::deferred_call_execute_pending()
|
|
{
|
|
VERIFY(m_in_critical);
|
|
|
|
if (!m_pending_deferred_calls)
|
|
return;
|
|
auto* pending_list = m_pending_deferred_calls;
|
|
m_pending_deferred_calls = nullptr;
|
|
|
|
// We pulled the stack of pending deferred calls in LIFO order, so we need to reverse the list first
|
|
auto reverse_list =
|
|
[](DeferredCallEntry* list) -> DeferredCallEntry* {
|
|
DeferredCallEntry* rev_list = nullptr;
|
|
while (list) {
|
|
auto next = list->next;
|
|
list->next = rev_list;
|
|
rev_list = list;
|
|
list = next;
|
|
}
|
|
return rev_list;
|
|
};
|
|
pending_list = reverse_list(pending_list);
|
|
|
|
do {
|
|
// Call the appropriate callback handler
|
|
if (pending_list->have_data) {
|
|
pending_list->callback_with_data.handler(pending_list->callback_with_data.data);
|
|
if (pending_list->callback_with_data.free)
|
|
pending_list->callback_with_data.free(pending_list->callback_with_data.data);
|
|
} else {
|
|
pending_list->callback.handler();
|
|
}
|
|
|
|
// Return the entry back to the pool, or free it
|
|
auto* next = pending_list->next;
|
|
if (pending_list->was_allocated)
|
|
delete pending_list;
|
|
else
|
|
deferred_call_return_to_pool(pending_list);
|
|
pending_list = next;
|
|
} while (pending_list);
|
|
}
|
|
|
|
void Processor::deferred_call_queue_entry(DeferredCallEntry* entry)
|
|
{
|
|
VERIFY(m_in_critical);
|
|
entry->next = m_pending_deferred_calls;
|
|
m_pending_deferred_calls = entry;
|
|
}
|
|
|
|
void Processor::deferred_call_queue(void (*callback)())
|
|
{
|
|
// NOTE: If we are called outside of a critical section and outside
|
|
// of an irq handler, the function will be executed before we return!
|
|
ScopedCritical critical;
|
|
auto& cur_proc = Processor::current();
|
|
|
|
auto* entry = cur_proc.deferred_call_get_free();
|
|
entry->have_data = false;
|
|
entry->callback.handler = callback;
|
|
|
|
cur_proc.deferred_call_queue_entry(entry);
|
|
}
|
|
|
|
void Processor::deferred_call_queue(void (*callback)(void*), void* data, void (*free_data)(void*))
|
|
{
|
|
// NOTE: If we are called outside of a critical section and outside
|
|
// of an irq handler, the function will be executed before we return!
|
|
ScopedCritical critical;
|
|
auto& cur_proc = Processor::current();
|
|
|
|
auto* entry = cur_proc.deferred_call_get_free();
|
|
entry->have_data = true;
|
|
entry->callback_with_data.handler = callback;
|
|
entry->callback_with_data.data = data;
|
|
entry->callback_with_data.free = free_data;
|
|
|
|
cur_proc.deferred_call_queue_entry(entry);
|
|
}
|
|
|
|
UNMAP_AFTER_INIT void Processor::gdt_init()
|
|
{
|
|
m_gdt_length = 0;
|
|
m_gdtr.address = nullptr;
|
|
m_gdtr.limit = 0;
|
|
|
|
write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000);
|
|
write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00cf9a00); // code0
|
|
write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00cf9200); // data0
|
|
write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00cffa00); // code3
|
|
write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x00cff200); // data3
|
|
|
|
Descriptor tls_descriptor {};
|
|
tls_descriptor.low = tls_descriptor.high = 0;
|
|
tls_descriptor.dpl = 3;
|
|
tls_descriptor.segment_present = 1;
|
|
tls_descriptor.granularity = 0;
|
|
tls_descriptor.operation_size64 = 0;
|
|
tls_descriptor.operation_size32 = 1;
|
|
tls_descriptor.descriptor_type = 1;
|
|
tls_descriptor.type = 2;
|
|
write_gdt_entry(GDT_SELECTOR_TLS, tls_descriptor); // tls3
|
|
|
|
Descriptor fs_descriptor {};
|
|
fs_descriptor.set_base(VirtualAddress { this });
|
|
fs_descriptor.set_limit(sizeof(Processor));
|
|
fs_descriptor.dpl = 0;
|
|
fs_descriptor.segment_present = 1;
|
|
fs_descriptor.granularity = 0;
|
|
fs_descriptor.operation_size64 = 0;
|
|
fs_descriptor.operation_size32 = 1;
|
|
fs_descriptor.descriptor_type = 1;
|
|
fs_descriptor.type = 2;
|
|
write_gdt_entry(GDT_SELECTOR_PROC, fs_descriptor); // fs0
|
|
|
|
Descriptor tss_descriptor {};
|
|
tss_descriptor.set_base(VirtualAddress { &m_tss });
|
|
tss_descriptor.set_limit(sizeof(TSS32));
|
|
tss_descriptor.dpl = 0;
|
|
tss_descriptor.segment_present = 1;
|
|
tss_descriptor.granularity = 0;
|
|
tss_descriptor.operation_size64 = 0;
|
|
tss_descriptor.operation_size32 = 1;
|
|
tss_descriptor.descriptor_type = 0;
|
|
tss_descriptor.type = 9;
|
|
write_gdt_entry(GDT_SELECTOR_TSS, tss_descriptor); // tss
|
|
|
|
flush_gdt();
|
|
load_task_register(GDT_SELECTOR_TSS);
|
|
|
|
asm volatile(
|
|
"mov %%ax, %%ds\n"
|
|
"mov %%ax, %%es\n"
|
|
"mov %%ax, %%gs\n"
|
|
"mov %%ax, %%ss\n" ::"a"(GDT_SELECTOR_DATA0)
|
|
: "memory");
|
|
set_fs(GDT_SELECTOR_PROC);
|
|
|
|
#if ARCH(I386)
|
|
// Make sure CS points to the kernel code descriptor.
|
|
// clang-format off
|
|
asm volatile(
|
|
"ljmpl $" __STRINGIFY(GDT_SELECTOR_CODE0) ", $sanity\n"
|
|
"sanity:\n");
|
|
// clang-format on
|
|
#endif
|
|
}
|
|
|
|
void copy_kernel_registers_into_ptrace_registers(PtraceRegisters& ptrace_regs, const RegisterState& kernel_regs)
|
|
{
|
|
ptrace_regs.eax = kernel_regs.eax,
|
|
ptrace_regs.ecx = kernel_regs.ecx,
|
|
ptrace_regs.edx = kernel_regs.edx,
|
|
ptrace_regs.ebx = kernel_regs.ebx,
|
|
ptrace_regs.esp = kernel_regs.userspace_esp,
|
|
ptrace_regs.ebp = kernel_regs.ebp,
|
|
ptrace_regs.esi = kernel_regs.esi,
|
|
ptrace_regs.edi = kernel_regs.edi,
|
|
ptrace_regs.eip = kernel_regs.eip,
|
|
ptrace_regs.eflags = kernel_regs.eflags,
|
|
ptrace_regs.cs = 0;
|
|
ptrace_regs.ss = 0;
|
|
ptrace_regs.ds = 0;
|
|
ptrace_regs.es = 0;
|
|
ptrace_regs.fs = 0;
|
|
ptrace_regs.gs = 0;
|
|
}
|
|
|
|
void copy_ptrace_registers_into_kernel_registers(RegisterState& kernel_regs, const PtraceRegisters& ptrace_regs)
|
|
{
|
|
kernel_regs.eax = ptrace_regs.eax;
|
|
kernel_regs.ecx = ptrace_regs.ecx;
|
|
kernel_regs.edx = ptrace_regs.edx;
|
|
kernel_regs.ebx = ptrace_regs.ebx;
|
|
kernel_regs.esp = ptrace_regs.esp;
|
|
kernel_regs.ebp = ptrace_regs.ebp;
|
|
kernel_regs.esi = ptrace_regs.esi;
|
|
kernel_regs.edi = ptrace_regs.edi;
|
|
kernel_regs.eip = ptrace_regs.eip;
|
|
kernel_regs.eflags = (kernel_regs.eflags & ~safe_eflags_mask) | (ptrace_regs.eflags & safe_eflags_mask);
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
void __assertion_failed(const char* msg, const char* file, unsigned line, const char* func)
|
|
{
|
|
asm volatile("cli");
|
|
critical_dmesgln("ASSERTION FAILED: {}", msg);
|
|
critical_dmesgln("{}:{} in {}", file, line, func);
|
|
|
|
abort();
|
|
}
|
|
#endif
|
|
|
|
[[noreturn]] void abort()
|
|
{
|
|
#ifdef DEBUG
|
|
// Switch back to the current process's page tables if there are any.
|
|
// Otherwise stack walking will be a disaster.
|
|
auto process = Process::current();
|
|
if (process)
|
|
MM.enter_process_paging_scope(*process);
|
|
|
|
Kernel::dump_backtrace();
|
|
Processor::halt();
|
|
#endif
|
|
|
|
abort();
|
|
}
|
|
|
|
[[noreturn]] void _abort()
|
|
{
|
|
asm volatile("ud2");
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
NonMaskableInterruptDisabler::NonMaskableInterruptDisabler()
|
|
{
|
|
IO::out8(0x70, IO::in8(0x70) | 0x80);
|
|
}
|
|
|
|
NonMaskableInterruptDisabler::~NonMaskableInterruptDisabler()
|
|
{
|
|
IO::out8(0x70, IO::in8(0x70) & 0x7F);
|
|
}
|