diff --git a/Kernel/Arch/i386/Boot/boot.S b/Kernel/Arch/i386/Boot/boot.S index 535f13d848..5ae068c771 100644 --- a/Kernel/Arch/i386/Boot/boot.S +++ b/Kernel/Arch/i386/Boot/boot.S @@ -34,7 +34,7 @@ stack_top: .section .page_tables .align 4096 page_tables_start: -.skip 4096*3 +.skip 4096*9 .section .text diff --git a/Kernel/Arch/i386/CPU.h b/Kernel/Arch/i386/CPU.h index 49bf4c9aa3..0f592e9037 100644 --- a/Kernel/Arch/i386/CPU.h +++ b/Kernel/Arch/i386/CPU.h @@ -9,6 +9,7 @@ #define PAGE_MASK 0xfffff000 class MemoryManager; +class PageDirectory; class PageTableEntry; struct [[gnu::packed]] TSS32 @@ -89,12 +90,12 @@ public: PageTableEntry* page_table_base() { return reinterpret_cast(m_raw & 0xfffff000u); } void set_page_table_base(u32 value) { - m_raw &= 0xfff; + m_raw &= 0x8000000000000fffULL; m_raw |= value & 0xfffff000; } - u32 raw() const { return m_raw; } - void copy_from(Badge, const PageDirectoryEntry& other) { m_raw = other.m_raw; } + u64 raw() const { return m_raw; } + void copy_from(Badge, const PageDirectoryEntry& other) { m_raw = other.m_raw; } enum Flags { Present = 1 << 0, @@ -103,6 +104,7 @@ public: WriteThrough = 1 << 3, CacheDisabled = 1 << 4, Global = 1 << 8, + NoExecute = 0x8000000000000000ULL, }; bool is_present() const { return raw() & Present; } @@ -123,7 +125,10 @@ public: bool is_global() const { return raw() & Global; } void set_global(bool b) { set_bit(Global, b); } - void set_bit(u32 bit, bool value) + bool is_execute_disabled() const { return raw() & NoExecute; } + void set_execute_disabled(bool b) { set_bit(NoExecute, b); } + + void set_bit(u64 bit, bool value) { if (value) m_raw |= bit; @@ -132,7 +137,7 @@ public: } private: - u32 m_raw; + u64 m_raw; }; class PageTableEntry { @@ -140,11 +145,11 @@ public: void* physical_page_base() { return reinterpret_cast(m_raw & 0xfffff000u); } void set_physical_page_base(u32 value) { - m_raw &= 0xfff; + m_raw &= 0x8000000000000fffULL; m_raw |= value & 0xfffff000; } - u32 raw() const { return m_raw; } + u64 raw() const { return (u32)m_raw; } enum Flags { Present = 1 << 0, @@ -153,6 +158,7 @@ public: WriteThrough = 1 << 3, CacheDisabled = 1 << 4, Global = 1 << 8, + NoExecute = 0x8000000000000000ULL, }; bool is_present() const { return raw() & Present; } @@ -173,7 +179,10 @@ public: bool is_global() const { return raw() & Global; } void set_global(bool b) { set_bit(Global, b); } - void set_bit(u32 bit, bool value) + bool is_execute_disabled() const { return raw() & NoExecute; } + void set_execute_disabled(bool b) { set_bit(NoExecute, b); } + + void set_bit(u64 bit, bool value) { if (value) m_raw |= bit; @@ -182,11 +191,21 @@ public: } private: - u32 m_raw; + u64 m_raw; }; -static_assert(sizeof(PageDirectoryEntry) == 4); -static_assert(sizeof(PageTableEntry) == 4); +static_assert(sizeof(PageDirectoryEntry) == 8); +static_assert(sizeof(PageTableEntry) == 8); + +class PageDirectoryPointerTable { +public: + PageDirectoryEntry* directory(size_t index) + { + return (PageDirectoryEntry*)(raw[index] & ~0xfffu); + } + + u64 raw[4]; +}; class IRQHandler; struct RegisterDump; diff --git a/Kernel/Thread.cpp b/Kernel/Thread.cpp index be1ceeff22..81a6612f56 100644 --- a/Kernel/Thread.cpp +++ b/Kernel/Thread.cpp @@ -100,7 +100,6 @@ Thread::Thread(Process& process) m_tss.esp0 = m_kernel_stack_top; kprintf("Allocated ring0 stack @ %p - %p\n", m_kernel_stack_base, m_kernel_stack_top); } - m_process.page_directory().update_kernel_mappings(); // HACK: Ring2 SS in the TSS is the current PID. m_tss.ss2 = m_process.pid(); diff --git a/Kernel/VM/MemoryManager.cpp b/Kernel/VM/MemoryManager.cpp index f6861136a5..a3e792d6cb 100644 --- a/Kernel/VM/MemoryManager.cpp +++ b/Kernel/VM/MemoryManager.cpp @@ -23,8 +23,11 @@ MemoryManager& MM MemoryManager::MemoryManager(u32 physical_address_for_kernel_page_tables) { m_kernel_page_directory = PageDirectory::create_at_fixed_address(PhysicalAddress(physical_address_for_kernel_page_tables)); - m_page_table_zero = (PageTableEntry*)(physical_address_for_kernel_page_tables + PAGE_SIZE); - m_page_table_one = (PageTableEntry*)(physical_address_for_kernel_page_tables + PAGE_SIZE * 2); + for (size_t i = 0; i < 4; ++i) { + m_low_page_tables[i] = (PageTableEntry*)(physical_address_for_kernel_page_tables + PAGE_SIZE * (5 + i)); + memset(m_low_page_tables[i], 0, PAGE_SIZE); + } + initialize_paging(); kprintf("MM initialized.\n"); @@ -34,21 +37,8 @@ MemoryManager::~MemoryManager() { } -void MemoryManager::populate_page_directory(PageDirectory& page_directory) -{ - page_directory.m_directory_page = allocate_supervisor_physical_page(); - page_directory.entries()[0].copy_from({}, kernel_page_directory().entries()[0]); - page_directory.entries()[1].copy_from({}, kernel_page_directory().entries()[1]); - // Defer to the kernel page tables for 0xC0000000-0xFFFFFFFF - for (int i = 768; i < 1024; ++i) - page_directory.entries()[i].copy_from({}, kernel_page_directory().entries()[i]); -} - void MemoryManager::initialize_paging() { - memset(m_page_table_zero, 0, PAGE_SIZE); - memset(m_page_table_one, 0, PAGE_SIZE); - #ifdef MM_DEBUG dbgprintf("MM: Kernel page directory @ %p\n", kernel_page_directory().cr3()); #endif @@ -171,6 +161,12 @@ void MemoryManager::initialize_paging() "orl $0x80, %eax\n" "mov %eax, %cr4\n"); + // Turn on CR4.PAE + asm volatile( + "mov %cr4, %eax\n" + "orl $0x20, %eax\n" + "mov %eax, %cr4\n"); + asm volatile("movl %%eax, %%cr3" ::"a"(kernel_page_directory().cr3())); asm volatile( "movl %%cr0, %%eax\n" @@ -186,30 +182,23 @@ void MemoryManager::initialize_paging() PageTableEntry& MemoryManager::ensure_pte(PageDirectory& page_directory, VirtualAddress vaddr) { ASSERT_INTERRUPTS_DISABLED(); - u32 page_directory_index = (vaddr.get() >> 22) & 0x3ff; - u32 page_table_index = (vaddr.get() >> 12) & 0x3ff; + u32 page_directory_table_index = (vaddr.get() >> 30) & 0x3; + u32 page_directory_index = (vaddr.get() >> 21) & 0x1ff; + u32 page_table_index = (vaddr.get() >> 12) & 0x1ff; - PageDirectoryEntry& pde = page_directory.entries()[page_directory_index]; + PageDirectoryEntry& pde = page_directory.table().directory(page_directory_table_index)[page_directory_index]; if (!pde.is_present()) { #ifdef MM_DEBUG dbgprintf("MM: PDE %u not present (requested for V%p), allocating\n", page_directory_index, vaddr.get()); #endif - if (page_directory_index == 0) { + if (page_directory_table_index == 0 && page_directory_index < 4) { ASSERT(&page_directory == m_kernel_page_directory); - pde.set_page_table_base((u32)m_page_table_zero); - pde.set_user_allowed(false); - pde.set_present(true); - pde.set_writable(true); - pde.set_global(true); - } else if (page_directory_index == 1) { - ASSERT(&page_directory == m_kernel_page_directory); - pde.set_page_table_base((u32)m_page_table_one); + pde.set_page_table_base((u32)m_low_page_tables[page_directory_index]); pde.set_user_allowed(false); pde.set_present(true); pde.set_writable(true); pde.set_global(true); } else { - //ASSERT(&page_directory != m_kernel_page_directory.ptr()); auto page_table = allocate_supervisor_physical_page(); #ifdef MM_DEBUG dbgprintf("MM: PD K%p (%s) at P%p allocated page table #%u (for V%p) at P%p\n", @@ -220,7 +209,6 @@ PageTableEntry& MemoryManager::ensure_pte(PageDirectory& page_directory, Virtual vaddr.get(), page_table->paddr().get()); #endif - pde.set_page_table_base(page_table->paddr().get()); pde.set_user_allowed(true); pde.set_present(true); @@ -322,21 +310,6 @@ PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault) dbgprintf("MM: handle_page_fault(%w) at V%p\n", fault.code(), fault.vaddr().get()); #endif ASSERT(fault.vaddr() != m_quickmap_addr); - if (fault.type() == PageFault::Type::PageNotPresent && fault.vaddr().get() >= 0xc0000000) { - auto* current_page_directory = reinterpret_cast(cpu_cr3()); - u32 page_directory_index = (fault.vaddr().get() >> 22) & 0x3ff; - auto& kernel_pde = kernel_page_directory().entries()[page_directory_index]; - auto& current_pde = current_page_directory[page_directory_index]; - - if (kernel_pde.is_present() && !current_pde.is_present()) { -#ifdef PAGE_FAULT_DEBUG - dbg() << "NP(kernel): Copying new kernel mapping for " << fault.vaddr() << " into current page directory"; -#endif - current_pde.copy_from({}, kernel_pde); - flush_tlb(fault.vaddr().page_base()); - return PageFaultResponse::Continue; - } - } auto* region = region_from_vaddr(fault.vaddr()); if (!region) { kprintf("NP(error) fault at invalid address V%p\n", fault.vaddr().get()); @@ -494,11 +467,6 @@ void MemoryManager::enter_process_paging_scope(Process& process) ASSERT(current); InterruptDisabler disabler; - // NOTE: To prevent triple-faulting here, we have to ensure that the current stack - // is accessible to the incoming page directory. We achieve this by forcing - // an update of the kernel VM mappings in the entered scope's page directory. - process.page_directory().update_kernel_mappings(); - current->tss().cr3 = process.page_directory().cr3(); asm volatile("movl %%eax, %%cr3" ::"a"(process.page_directory().cr3()) : "memory"); diff --git a/Kernel/VM/MemoryManager.h b/Kernel/VM/MemoryManager.h index ce0c52ff73..ef00bee9da 100644 --- a/Kernel/VM/MemoryManager.h +++ b/Kernel/VM/MemoryManager.h @@ -42,8 +42,6 @@ public: PageFaultResponse handle_page_fault(const PageFault&); - void populate_page_directory(PageDirectory&); - void enter_process_paging_scope(Process&); bool validate_user_stack(const Process&, VirtualAddress) const; @@ -114,8 +112,7 @@ private: PageTableEntry& ensure_pte(PageDirectory&, VirtualAddress); RefPtr m_kernel_page_directory; - PageTableEntry* m_page_table_zero { nullptr }; - PageTableEntry* m_page_table_one { nullptr }; + PageTableEntry* m_low_page_tables[4] { nullptr }; VirtualAddress m_quickmap_addr; diff --git a/Kernel/VM/PageDirectory.cpp b/Kernel/VM/PageDirectory.cpp index e25c6e9119..729cf4c9be 100644 --- a/Kernel/VM/PageDirectory.cpp +++ b/Kernel/VM/PageDirectory.cpp @@ -24,7 +24,17 @@ RefPtr PageDirectory::find_by_cr3(u32 cr3) PageDirectory::PageDirectory(PhysicalAddress paddr) : m_range_allocator(VirtualAddress(0xc0000000), 0x3f000000) { - m_directory_page = PhysicalPage::create(paddr, true, false); + m_directory_table = PhysicalPage::create(paddr, true, false); + m_directory_pages[0] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 1), true, false); + m_directory_pages[1] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 2), true, false); + m_directory_pages[2] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 3), true, false); + m_directory_pages[3] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 4), true, false); + + table().raw[0] = (u64)m_directory_pages[0]->paddr().as_ptr() | 1; + table().raw[1] = (u64)m_directory_pages[1]->paddr().as_ptr() | 1; + table().raw[2] = (u64)m_directory_pages[2]->paddr().as_ptr() | 1; + table().raw[3] = (u64)m_directory_pages[3]->paddr().as_ptr() | 1; + InterruptDisabler disabler; cr3_map().set(cr3(), this); } @@ -33,7 +43,26 @@ PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_rang : m_process(&process) , m_range_allocator(parent_range_allocator ? RangeAllocator(*parent_range_allocator) : RangeAllocator(VirtualAddress(userspace_range_base), kernelspace_range_base - userspace_range_base)) { - MM.populate_page_directory(*this); + // Set up a userspace page directory + + m_directory_table = MM.allocate_supervisor_physical_page(); + m_directory_pages[0] = MM.allocate_supervisor_physical_page(); + m_directory_pages[1] = MM.allocate_supervisor_physical_page(); + m_directory_pages[2] = MM.allocate_supervisor_physical_page(); + // Share the top 1 GB of kernel-only mappings (>=3GB or >=0xc0000000) + m_directory_pages[3] = MM.kernel_page_directory().m_directory_pages[3]; + + table().raw[0] = (u64)m_directory_pages[0]->paddr().as_ptr() | 1; + table().raw[1] = (u64)m_directory_pages[1]->paddr().as_ptr() | 1; + table().raw[2] = (u64)m_directory_pages[2]->paddr().as_ptr() | 1; + table().raw[3] = (u64)m_directory_pages[3]->paddr().as_ptr() | 1; + + // Clone bottom 8 MB of mappings from kernel_page_directory + table().directory(0)[0].copy_from({}, MM.kernel_page_directory().table().directory(0)[0]); + table().directory(0)[1].copy_from({}, MM.kernel_page_directory().table().directory(0)[1]); + table().directory(0)[2].copy_from({}, MM.kernel_page_directory().table().directory(0)[2]); + table().directory(0)[3].copy_from({}, MM.kernel_page_directory().table().directory(0)[3]); + InterruptDisabler disabler; cr3_map().set(cr3(), this); } @@ -57,11 +86,3 @@ void PageDirectory::flush(VirtualAddress vaddr) if (this == &MM.kernel_page_directory() || ¤t->process().page_directory() == this) MM.flush_tlb(vaddr); } - -void PageDirectory::update_kernel_mappings() -{ - // This ensures that the kernel virtual address space is up-to-date in this page directory. - // This may be necessary to avoid triple faulting when entering a process's paging scope - // whose mappings are out-of-date. - memcpy(entries() + 768, MM.kernel_page_directory().entries() + 768, sizeof(PageDirectoryEntry) * 256); -} diff --git a/Kernel/VM/PageDirectory.h b/Kernel/VM/PageDirectory.h index 98ac63dcab..2d70eb0fe1 100644 --- a/Kernel/VM/PageDirectory.h +++ b/Kernel/VM/PageDirectory.h @@ -21,8 +21,8 @@ public: ~PageDirectory(); - u32 cr3() const { return m_directory_page->paddr().get(); } - PageDirectoryEntry* entries() { return reinterpret_cast(cr3()); } + u32 cr3() const { return m_directory_table->paddr().get(); } + PageDirectoryPointerTable& table() { return *reinterpret_cast(cr3()); } void flush(VirtualAddress); @@ -31,14 +31,13 @@ public: Process* process() { return m_process; } const Process* process() const { return m_process; } - void update_kernel_mappings(); - private: PageDirectory(Process&, const RangeAllocator* parent_range_allocator); explicit PageDirectory(PhysicalAddress); Process* m_process { nullptr }; RangeAllocator m_range_allocator; - RefPtr m_directory_page; + RefPtr m_directory_table; + RefPtr m_directory_pages[4]; HashMap> m_physical_pages; };