/* * Copyright (c) 2023, Sönke Holz * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include namespace Kernel { Processor* g_current_processor; static void store_fpu_state(FPUState* fpu_state) { asm volatile( "fsd f0, 0*8(%0) \n" "fsd f1, 1*8(%0) \n" "fsd f2, 2*8(%0) \n" "fsd f3, 3*8(%0) \n" "fsd f4, 4*8(%0) \n" "fsd f5, 5*8(%0) \n" "fsd f6, 6*8(%0) \n" "fsd f7, 7*8(%0) \n" "fsd f8, 8*8(%0) \n" "fsd f9, 9*8(%0) \n" "fsd f10, 10*8(%0) \n" "fsd f11, 11*8(%0) \n" "fsd f12, 12*8(%0) \n" "fsd f13, 13*8(%0) \n" "fsd f14, 14*8(%0) \n" "fsd f15, 15*8(%0) \n" "fsd f16, 16*8(%0) \n" "fsd f17, 17*8(%0) \n" "fsd f18, 18*8(%0) \n" "fsd f19, 19*8(%0) \n" "fsd f20, 20*8(%0) \n" "fsd f21, 21*8(%0) \n" "fsd f22, 22*8(%0) \n" "fsd f23, 23*8(%0) \n" "fsd f24, 24*8(%0) \n" "fsd f25, 25*8(%0) \n" "fsd f26, 26*8(%0) \n" "fsd f27, 27*8(%0) \n" "fsd f28, 28*8(%0) \n" "fsd f29, 29*8(%0) \n" "fsd f30, 30*8(%0) \n" "fsd f31, 31*8(%0) \n" "csrr t0, fcsr \n" "sd t0, 32*8(%0) \n" ::"r"(fpu_state) : "t0", "memory"); } static void load_fpu_state(FPUState* fpu_state) { asm volatile( "fld f0, 0*8(%0) \n" "fld f1, 1*8(%0) \n" "fld f2, 2*8(%0) \n" "fld f3, 3*8(%0) \n" "fld f4, 4*8(%0) \n" "fld f5, 5*8(%0) \n" "fld f6, 6*8(%0) \n" "fld f7, 7*8(%0) \n" "fld f8, 8*8(%0) \n" "fld f9, 9*8(%0) \n" "fld f10, 10*8(%0) \n" "fld f11, 11*8(%0) \n" "fld f12, 12*8(%0) \n" "fld f13, 13*8(%0) \n" "fld f14, 14*8(%0) \n" "fld f15, 15*8(%0) \n" "fld f16, 16*8(%0) \n" "fld f17, 17*8(%0) \n" "fld f18, 18*8(%0) \n" "fld f19, 19*8(%0) \n" "fld f20, 20*8(%0) \n" "fld f21, 21*8(%0) \n" "fld f22, 22*8(%0) \n" "fld f23, 23*8(%0) \n" "fld f24, 24*8(%0) \n" "fld f25, 25*8(%0) \n" "fld f26, 26*8(%0) \n" "fld f27, 27*8(%0) \n" "fld f28, 28*8(%0) \n" "fld f29, 29*8(%0) \n" "fld f30, 30*8(%0) \n" "fld f31, 31*8(%0) \n" "ld t0, 32*8(%0) \n" "csrw fcsr, t0 \n" ::"r"(fpu_state) : "t0", "memory"); } template void ProcessorBase::early_initialize(u32 cpu) { VERIFY(g_current_processor == nullptr); m_cpu = cpu; g_current_processor = static_cast(this); } template void ProcessorBase::initialize(u32) { m_deferred_call_pool.init(); // Enable the FPU auto sstatus = RISCV64::CSR::SSTATUS::read(); sstatus.FS = RISCV64::CSR::SSTATUS::FloatingPointStatus::Initial; RISCV64::CSR::SSTATUS::write(sstatus); store_fpu_state(&s_clean_fpu_state); initialize_interrupts(); } template [[noreturn]] void ProcessorBase::halt() { // WFI ignores the value of sstatus.SIE, so we can't use disable_interrupts(). // Instead, disable all interrupts sources by setting sie to zero. RISCV64::CSR::write(RISCV64::CSR::Address::SIE, 0); for (;;) asm volatile("wfi"); } template void ProcessorBase::flush_tlb_local(VirtualAddress, size_t) { // FIXME: Don't flush all pages flush_entire_tlb_local(); } template void ProcessorBase::flush_entire_tlb_local() { asm volatile("sfence.vma"); } template void ProcessorBase::flush_tlb(Memory::PageDirectory const*, VirtualAddress vaddr, size_t page_count) { flush_tlb_local(vaddr, page_count); } template u32 ProcessorBase::clear_critical() { InterruptDisabler disabler; auto prev_critical = in_critical(); auto& proc = current(); proc.m_in_critical = 0; if (proc.m_in_irq == 0) proc.check_invoke_scheduler(); return prev_critical; } template u32 ProcessorBase::smp_wake_n_idle_processors(u32) { // FIXME: Actually wake up other cores when SMP is supported for riscv64. return 0; } template void ProcessorBase::initialize_context_switching(Thread& initial_thread) { VERIFY(initial_thread.process().is_kernel_process()); m_scheduler_initialized = true; // FIXME: Figure out if we need to call {pre_,post_,}init_finished once riscv64 supports SMP Processor::set_current_in_scheduler(true); auto& regs = initial_thread.regs(); asm volatile( "mv sp, %[new_sp] \n" "addi sp, sp, -32 \n" "sd %[from_to_thread], 0(sp) \n" "sd %[from_to_thread], 8(sp) \n" "jr %[new_ip] \n" ::[new_sp] "r"(regs.sp()), [new_ip] "r"(regs.ip()), [from_to_thread] "r"(&initial_thread) : "t0"); VERIFY_NOT_REACHED(); } template void ProcessorBase::switch_context(Thread*& from_thread, Thread*& to_thread) { VERIFY(!m_in_irq); VERIFY(m_in_critical == 1); dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context --> switching out of: {} {}", VirtualAddress(from_thread), *from_thread); // m_in_critical is restored in enter_thread_context from_thread->save_critical(m_in_critical); // clang-format off asm volatile( // Store a RegisterState of from_thread on from_thread's stack "addi sp, sp, -(34 * 8) \n" "sd x1, 0*8(sp) \n" // sp "sd x3, 2*8(sp) \n" "sd x4, 3*8(sp) \n" "sd x5, 4*8(sp) \n" "sd x6, 5*8(sp) \n" "sd x7, 6*8(sp) \n" "sd x8, 7*8(sp) \n" "sd x9, 8*8(sp) \n" "sd x10, 9*8(sp) \n" "sd x11, 10*8(sp) \n" "sd x12, 11*8(sp) \n" "sd x13, 12*8(sp) \n" "sd x14, 13*8(sp) \n" "sd x15, 14*8(sp) \n" "sd x16, 15*8(sp) \n" "sd x17, 16*8(sp) \n" "sd x18, 17*8(sp) \n" "sd x19, 18*8(sp) \n" "sd x20, 19*8(sp) \n" "sd x21, 20*8(sp) \n" "sd x22, 21*8(sp) \n" "sd x23, 22*8(sp) \n" "sd x24, 23*8(sp) \n" "sd x25, 24*8(sp) \n" "sd x26, 25*8(sp) \n" "sd x27, 26*8(sp) \n" "sd x28, 27*8(sp) \n" "sd x29, 28*8(sp) \n" "sd x30, 29*8(sp) \n" "sd x31, 30*8(sp) \n" // Store current sp as from_thread's sp. "sd sp, %[from_sp] \n" // Set from_thread's pc to label "1" "la t0, 1f \n" "sd t0, %[from_ip] \n" // Switch to to_thread's stack "ld sp, %[to_sp] \n" // Store from_thread, to_thread, to_ip on to_thread's stack "addi sp, sp, -(4 * 8) \n" "ld a0, %[from_thread] \n" "sd a0, 0*8(sp) \n" "ld a1, %[to_thread] \n" "sd a1, 1*8(sp) \n" "ld s1, %[to_ip] \n" "sd s1, 2*8(sp) \n" // enter_thread_context(from_thread, to_thread) "call enter_thread_context \n" // Jump to to_ip "jr s1 \n" // A thread enters here when they were already scheduled at least once "1: \n" "addi sp, sp, (4 * 8) \n" // Restore the RegisterState of to_thread "ld x1, 0*8(sp) \n" // sp "ld x3, 2*8(sp) \n" "ld x4, 3*8(sp) \n" "ld x5, 4*8(sp) \n" "ld x6, 5*8(sp) \n" "ld x7, 6*8(sp) \n" "ld x8, 7*8(sp) \n" "ld x9, 8*8(sp) \n" "ld x10, 9*8(sp) \n" "ld x11, 10*8(sp) \n" "ld x12, 11*8(sp) \n" "ld x13, 12*8(sp) \n" "ld x14, 13*8(sp) \n" "ld x15, 14*8(sp) \n" "ld x16, 15*8(sp) \n" "ld x17, 16*8(sp) \n" "ld x18, 17*8(sp) \n" "ld x19, 18*8(sp) \n" "ld x20, 19*8(sp) \n" "ld x21, 20*8(sp) \n" "ld x22, 21*8(sp) \n" "ld x23, 22*8(sp) \n" "ld x24, 23*8(sp) \n" "ld x25, 24*8(sp) \n" "ld x26, 25*8(sp) \n" "ld x27, 26*8(sp) \n" "ld x28, 27*8(sp) \n" "ld x29, 28*8(sp) \n" "ld x30, 29*8(sp) \n" "ld x31, 30*8(sp) \n" "addi sp, sp, -(4 * 8) \n" "ld t0, 0*8(sp) \n" "sd t0, %[from_thread] \n" "ld t0, 1*8(sp) \n" "sd t0, %[to_thread] \n" "addi sp, sp, (34 * 8) + (4 * 8) \n" : [from_ip] "=m"(from_thread->regs().pc), [from_sp] "=m"(from_thread->regs().x[1]), "=m"(from_thread), "=m"(to_thread) : [to_ip] "m"(to_thread->regs().pc), [to_sp] "m"(to_thread->regs().x[1]), [from_thread] "m"(from_thread), [to_thread] "m"(to_thread) : "memory", "t0", "s1", "a0", "a1"); // clang-format on dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {}", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread); } extern "C" FlatPtr do_init_context(Thread* thread, u32 new_interrupts_state) { VERIFY_INTERRUPTS_DISABLED(); thread->regs().sstatus.SPIE = (new_interrupts_state == to_underlying(InterruptsState::Enabled)); return Processor::current().init_context(*thread, true); } template void ProcessorBase::assume_context(Thread& thread, InterruptsState new_interrupts_state) { dbgln_if(CONTEXT_SWITCH_DEBUG, "Assume context for thread {} {}", VirtualAddress(&thread), thread); VERIFY_INTERRUPTS_DISABLED(); Scheduler::prepare_after_exec(); // in_critical() should be 2 here. The critical section in Process::exec // and then the scheduler lock VERIFY(Processor::in_critical() == 2); do_assume_context(&thread, to_underlying(new_interrupts_state)); VERIFY_NOT_REACHED(); } template FlatPtr ProcessorBase::init_context(Thread& thread, bool leave_crit) { VERIFY(g_scheduler_lock.is_locked()); if (leave_crit) { // Leave the critical section we set up in Process::exec, // but because we still have the scheduler lock we should end up with 1 VERIFY(in_critical() == 2); m_in_critical = 1; // leave it without triggering anything or restoring flags } u64 kernel_stack_top = thread.kernel_stack_top(); // Add a random offset between 0-256 (16-byte aligned) kernel_stack_top -= round_up_to_power_of_two(get_fast_random(), 16); u64 stack_top = kernel_stack_top; auto& thread_regs = thread.regs(); // Push a RegisterState and TrapFrame onto the stack, which will be popped of the stack and restored into the // state of the processor by restore_previous_context. stack_top -= sizeof(RegisterState); RegisterState& frame = *reinterpret_cast(stack_top); memcpy(frame.x, thread_regs.x, sizeof(thread_regs.x)); // We don't overwrite the return address register if it's not 0, since that means this thread's register state was already initialized with // an existing return address register value (e.g. it was fork()'ed), so we assume exit_kernel_thread is already saved as previous RA on the // stack somewhere. if (frame.x[0] == 0x0) { // x1 is the return address register for the riscv64 ABI, so this will return to exit_kernel_thread when main thread function returns. frame.x[0] = FlatPtr(&exit_kernel_thread); } frame.sepc = thread_regs.pc; frame.set_userspace_sp(thread_regs.sp()); frame.sstatus = thread_regs.sstatus; // Push a TrapFrame onto the stack stack_top -= sizeof(TrapFrame); TrapFrame& trap = *reinterpret_cast(stack_top); trap.regs = &frame; trap.next_trap = nullptr; if constexpr (CONTEXT_SWITCH_DEBUG) { dbgln("init_context {} ({}) set up to execute at ip={}, sp={}, stack_top={}", thread, VirtualAddress(&thread), VirtualAddress(thread_regs.pc), VirtualAddress(thread_regs.sp()), VirtualAddress(stack_top)); } // This make sure the thread first executes thread_context_first_enter, which will actually call restore_previous_context // which restores the context set up above. thread_regs.set_sp(stack_top); thread_regs.set_ip(FlatPtr(&thread_context_first_enter)); return stack_top; } // FIXME: Figure out if we can fully share this code with x86. template void ProcessorBase::exit_trap(TrapFrame& trap) { VERIFY_INTERRUPTS_DISABLED(); VERIFY(&Processor::current() == this); // Temporarily enter a critical section. This is to prevent critical // sections entered and left within e.g. smp_process_pending_messages // to trigger a context switch while we're executing this function // See the comment at the end of the function why we don't use // ScopedCritical here. m_in_critical = m_in_critical + 1; // FIXME: Figure out if we need prev_irq_level, see duplicated code in Kernel/Arch/x86/common/Processor.cpp m_in_irq = 0; // Process the deferred call queue. Among other things, this ensures // that any pending thread unblocks happen before we enter the scheduler. m_deferred_call_pool.execute_pending(); auto* current_thread = Processor::current_thread(); if (current_thread) { auto& current_trap = current_thread->current_trap(); current_trap = trap.next_trap; ExecutionMode new_previous_mode; if (current_trap) { VERIFY(current_trap->regs); new_previous_mode = current_trap->regs->previous_mode(); } else { // If we don't have a higher level trap then we're back in user mode. // Which means that the previous mode prior to being back in user mode was kernel mode new_previous_mode = ExecutionMode::Kernel; } if (current_thread->set_previous_mode(new_previous_mode)) current_thread->update_time_scheduled(TimeManagement::scheduler_current_time(), true, false); } VERIFY_INTERRUPTS_DISABLED(); // Leave the critical section without actually enabling interrupts. // We don't want context switches to happen until we're explicitly // triggering a switch in check_invoke_scheduler. m_in_critical = m_in_critical - 1; if (!m_in_irq && !m_in_critical) check_invoke_scheduler(); } template ErrorOr> ProcessorBase::capture_stack_trace(Thread&, size_t) { dbgln("FIXME: Implement Processor::capture_stack_trace() for riscv64"); return Vector {}; } extern "C" void context_first_init(Thread* from_thread, Thread* to_thread); extern "C" void context_first_init(Thread* from_thread, Thread* to_thread) { do_context_first_init(from_thread, to_thread); } extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread); extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) { VERIFY(from_thread == to_thread || from_thread->state() != Thread::State::Running); VERIFY(to_thread->state() == Thread::State::Running); Processor::set_current_thread(*to_thread); store_fpu_state(&from_thread->fpu_state()); auto& from_regs = from_thread->regs(); auto& to_regs = to_thread->regs(); if (from_regs.satp != to_regs.satp) { RISCV64::CSR::SATP::write(to_regs.satp); Processor::flush_entire_tlb_local(); } to_thread->set_cpu(Processor::current().id()); Processor::set_thread_specific_data(to_thread->thread_specific_data()); auto in_critical = to_thread->saved_critical(); VERIFY(in_critical > 0); Processor::restore_critical(in_critical); load_fpu_state(&to_thread->fpu_state()); } NAKED void thread_context_first_enter() { asm( "ld a0, 0(sp) \n" "ld a1, 8(sp) \n" "addi sp, sp, 32 \n" "call context_first_init \n" "mv a0, sp \n" "call exit_trap \n" "tail restore_context_and_sret \n"); } NAKED void do_assume_context(Thread*, u32) { // clang-format off asm( "mv s1, a0 \n" // save thread ptr // We're going to call Processor::init_context, so just make sure // we have enough stack space so we don't stomp over it "addi sp, sp, -" __STRINGIFY(8 + REGISTER_STATE_SIZE + TRAP_FRAME_SIZE + 8) " \n" "call do_init_context \n" "mv sp, a0 \n" // move stack pointer to what Processor::init_context set up for us "mv a0, s1 \n" // to_thread "mv a1, s1 \n" // from_thread "addi sp, sp, -32 \n" "sd s1, 0(sp) \n" "sd s1, 8(sp) \n" "la ra, thread_context_first_enter \n" // should be same as regs.sepc "tail enter_thread_context \n"); // clang-format on } template StringView ProcessorBase::platform_string() { return "riscv64"sv; } template void ProcessorBase::set_thread_specific_data(VirtualAddress) { // FIXME: Add support for thread-local storage on RISC-V } template void ProcessorBase::wait_for_interrupt() const { asm("wfi"); } template Processor& ProcessorBase::by_id(u32) { TODO_RISCV64(); } } #include