1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-24 23:17:44 +00:00

Kernel: Enable PAE (Physical Address Extension)

Introduce one more (CPU) indirection layer in the paging code: the page
directory pointer table (PDPT). Each PageDirectory now has 4 separate
PageDirectoryEntry arrays, governing 1 GB of VM each.

A really neat side-effect of this is that we can now share the physical
page containing the >=3GB kernel-only address space metadata between
all processes, instead of lazily cloning it on page faults.

This will give us access to the NX (No eXecute) bit, allowing us to
prevent execution of memory that's not supposed to be executed.
This commit is contained in:
Andreas Kling 2019-12-25 11:22:16 +01:00
parent 4883176fd8
commit 52deb09382
7 changed files with 84 additions and 81 deletions

View file

@ -34,7 +34,7 @@ stack_top:
.section .page_tables .section .page_tables
.align 4096 .align 4096
page_tables_start: page_tables_start:
.skip 4096*3 .skip 4096*9
.section .text .section .text

View file

@ -9,6 +9,7 @@
#define PAGE_MASK 0xfffff000 #define PAGE_MASK 0xfffff000
class MemoryManager; class MemoryManager;
class PageDirectory;
class PageTableEntry; class PageTableEntry;
struct [[gnu::packed]] TSS32 struct [[gnu::packed]] TSS32
@ -89,12 +90,12 @@ public:
PageTableEntry* page_table_base() { return reinterpret_cast<PageTableEntry*>(m_raw & 0xfffff000u); } PageTableEntry* page_table_base() { return reinterpret_cast<PageTableEntry*>(m_raw & 0xfffff000u); }
void set_page_table_base(u32 value) void set_page_table_base(u32 value)
{ {
m_raw &= 0xfff; m_raw &= 0x8000000000000fffULL;
m_raw |= value & 0xfffff000; m_raw |= value & 0xfffff000;
} }
u32 raw() const { return m_raw; } u64 raw() const { return m_raw; }
void copy_from(Badge<MemoryManager>, const PageDirectoryEntry& other) { m_raw = other.m_raw; } void copy_from(Badge<PageDirectory>, const PageDirectoryEntry& other) { m_raw = other.m_raw; }
enum Flags { enum Flags {
Present = 1 << 0, Present = 1 << 0,
@ -103,6 +104,7 @@ public:
WriteThrough = 1 << 3, WriteThrough = 1 << 3,
CacheDisabled = 1 << 4, CacheDisabled = 1 << 4,
Global = 1 << 8, Global = 1 << 8,
NoExecute = 0x8000000000000000ULL,
}; };
bool is_present() const { return raw() & Present; } bool is_present() const { return raw() & Present; }
@ -123,7 +125,10 @@ public:
bool is_global() const { return raw() & Global; } bool is_global() const { return raw() & Global; }
void set_global(bool b) { set_bit(Global, b); } void set_global(bool b) { set_bit(Global, b); }
void set_bit(u32 bit, bool value) bool is_execute_disabled() const { return raw() & NoExecute; }
void set_execute_disabled(bool b) { set_bit(NoExecute, b); }
void set_bit(u64 bit, bool value)
{ {
if (value) if (value)
m_raw |= bit; m_raw |= bit;
@ -132,7 +137,7 @@ public:
} }
private: private:
u32 m_raw; u64 m_raw;
}; };
class PageTableEntry { class PageTableEntry {
@ -140,11 +145,11 @@ public:
void* physical_page_base() { return reinterpret_cast<void*>(m_raw & 0xfffff000u); } void* physical_page_base() { return reinterpret_cast<void*>(m_raw & 0xfffff000u); }
void set_physical_page_base(u32 value) void set_physical_page_base(u32 value)
{ {
m_raw &= 0xfff; m_raw &= 0x8000000000000fffULL;
m_raw |= value & 0xfffff000; m_raw |= value & 0xfffff000;
} }
u32 raw() const { return m_raw; } u64 raw() const { return (u32)m_raw; }
enum Flags { enum Flags {
Present = 1 << 0, Present = 1 << 0,
@ -153,6 +158,7 @@ public:
WriteThrough = 1 << 3, WriteThrough = 1 << 3,
CacheDisabled = 1 << 4, CacheDisabled = 1 << 4,
Global = 1 << 8, Global = 1 << 8,
NoExecute = 0x8000000000000000ULL,
}; };
bool is_present() const { return raw() & Present; } bool is_present() const { return raw() & Present; }
@ -173,7 +179,10 @@ public:
bool is_global() const { return raw() & Global; } bool is_global() const { return raw() & Global; }
void set_global(bool b) { set_bit(Global, b); } void set_global(bool b) { set_bit(Global, b); }
void set_bit(u32 bit, bool value) bool is_execute_disabled() const { return raw() & NoExecute; }
void set_execute_disabled(bool b) { set_bit(NoExecute, b); }
void set_bit(u64 bit, bool value)
{ {
if (value) if (value)
m_raw |= bit; m_raw |= bit;
@ -182,11 +191,21 @@ public:
} }
private: private:
u32 m_raw; u64 m_raw;
}; };
static_assert(sizeof(PageDirectoryEntry) == 4); static_assert(sizeof(PageDirectoryEntry) == 8);
static_assert(sizeof(PageTableEntry) == 4); static_assert(sizeof(PageTableEntry) == 8);
class PageDirectoryPointerTable {
public:
PageDirectoryEntry* directory(size_t index)
{
return (PageDirectoryEntry*)(raw[index] & ~0xfffu);
}
u64 raw[4];
};
class IRQHandler; class IRQHandler;
struct RegisterDump; struct RegisterDump;

View file

@ -100,7 +100,6 @@ Thread::Thread(Process& process)
m_tss.esp0 = m_kernel_stack_top; m_tss.esp0 = m_kernel_stack_top;
kprintf("Allocated ring0 stack @ %p - %p\n", m_kernel_stack_base, m_kernel_stack_top); kprintf("Allocated ring0 stack @ %p - %p\n", m_kernel_stack_base, m_kernel_stack_top);
} }
m_process.page_directory().update_kernel_mappings();
// HACK: Ring2 SS in the TSS is the current PID. // HACK: Ring2 SS in the TSS is the current PID.
m_tss.ss2 = m_process.pid(); m_tss.ss2 = m_process.pid();

View file

@ -23,8 +23,11 @@ MemoryManager& MM
MemoryManager::MemoryManager(u32 physical_address_for_kernel_page_tables) MemoryManager::MemoryManager(u32 physical_address_for_kernel_page_tables)
{ {
m_kernel_page_directory = PageDirectory::create_at_fixed_address(PhysicalAddress(physical_address_for_kernel_page_tables)); m_kernel_page_directory = PageDirectory::create_at_fixed_address(PhysicalAddress(physical_address_for_kernel_page_tables));
m_page_table_zero = (PageTableEntry*)(physical_address_for_kernel_page_tables + PAGE_SIZE); for (size_t i = 0; i < 4; ++i) {
m_page_table_one = (PageTableEntry*)(physical_address_for_kernel_page_tables + PAGE_SIZE * 2); m_low_page_tables[i] = (PageTableEntry*)(physical_address_for_kernel_page_tables + PAGE_SIZE * (5 + i));
memset(m_low_page_tables[i], 0, PAGE_SIZE);
}
initialize_paging(); initialize_paging();
kprintf("MM initialized.\n"); kprintf("MM initialized.\n");
@ -34,21 +37,8 @@ MemoryManager::~MemoryManager()
{ {
} }
void MemoryManager::populate_page_directory(PageDirectory& page_directory)
{
page_directory.m_directory_page = allocate_supervisor_physical_page();
page_directory.entries()[0].copy_from({}, kernel_page_directory().entries()[0]);
page_directory.entries()[1].copy_from({}, kernel_page_directory().entries()[1]);
// Defer to the kernel page tables for 0xC0000000-0xFFFFFFFF
for (int i = 768; i < 1024; ++i)
page_directory.entries()[i].copy_from({}, kernel_page_directory().entries()[i]);
}
void MemoryManager::initialize_paging() void MemoryManager::initialize_paging()
{ {
memset(m_page_table_zero, 0, PAGE_SIZE);
memset(m_page_table_one, 0, PAGE_SIZE);
#ifdef MM_DEBUG #ifdef MM_DEBUG
dbgprintf("MM: Kernel page directory @ %p\n", kernel_page_directory().cr3()); dbgprintf("MM: Kernel page directory @ %p\n", kernel_page_directory().cr3());
#endif #endif
@ -171,6 +161,12 @@ void MemoryManager::initialize_paging()
"orl $0x80, %eax\n" "orl $0x80, %eax\n"
"mov %eax, %cr4\n"); "mov %eax, %cr4\n");
// Turn on CR4.PAE
asm volatile(
"mov %cr4, %eax\n"
"orl $0x20, %eax\n"
"mov %eax, %cr4\n");
asm volatile("movl %%eax, %%cr3" ::"a"(kernel_page_directory().cr3())); asm volatile("movl %%eax, %%cr3" ::"a"(kernel_page_directory().cr3()));
asm volatile( asm volatile(
"movl %%cr0, %%eax\n" "movl %%cr0, %%eax\n"
@ -186,30 +182,23 @@ void MemoryManager::initialize_paging()
PageTableEntry& MemoryManager::ensure_pte(PageDirectory& page_directory, VirtualAddress vaddr) PageTableEntry& MemoryManager::ensure_pte(PageDirectory& page_directory, VirtualAddress vaddr)
{ {
ASSERT_INTERRUPTS_DISABLED(); ASSERT_INTERRUPTS_DISABLED();
u32 page_directory_index = (vaddr.get() >> 22) & 0x3ff; u32 page_directory_table_index = (vaddr.get() >> 30) & 0x3;
u32 page_table_index = (vaddr.get() >> 12) & 0x3ff; u32 page_directory_index = (vaddr.get() >> 21) & 0x1ff;
u32 page_table_index = (vaddr.get() >> 12) & 0x1ff;
PageDirectoryEntry& pde = page_directory.entries()[page_directory_index]; PageDirectoryEntry& pde = page_directory.table().directory(page_directory_table_index)[page_directory_index];
if (!pde.is_present()) { if (!pde.is_present()) {
#ifdef MM_DEBUG #ifdef MM_DEBUG
dbgprintf("MM: PDE %u not present (requested for V%p), allocating\n", page_directory_index, vaddr.get()); dbgprintf("MM: PDE %u not present (requested for V%p), allocating\n", page_directory_index, vaddr.get());
#endif #endif
if (page_directory_index == 0) { if (page_directory_table_index == 0 && page_directory_index < 4) {
ASSERT(&page_directory == m_kernel_page_directory); ASSERT(&page_directory == m_kernel_page_directory);
pde.set_page_table_base((u32)m_page_table_zero); pde.set_page_table_base((u32)m_low_page_tables[page_directory_index]);
pde.set_user_allowed(false);
pde.set_present(true);
pde.set_writable(true);
pde.set_global(true);
} else if (page_directory_index == 1) {
ASSERT(&page_directory == m_kernel_page_directory);
pde.set_page_table_base((u32)m_page_table_one);
pde.set_user_allowed(false); pde.set_user_allowed(false);
pde.set_present(true); pde.set_present(true);
pde.set_writable(true); pde.set_writable(true);
pde.set_global(true); pde.set_global(true);
} else { } else {
//ASSERT(&page_directory != m_kernel_page_directory.ptr());
auto page_table = allocate_supervisor_physical_page(); auto page_table = allocate_supervisor_physical_page();
#ifdef MM_DEBUG #ifdef MM_DEBUG
dbgprintf("MM: PD K%p (%s) at P%p allocated page table #%u (for V%p) at P%p\n", dbgprintf("MM: PD K%p (%s) at P%p allocated page table #%u (for V%p) at P%p\n",
@ -220,7 +209,6 @@ PageTableEntry& MemoryManager::ensure_pte(PageDirectory& page_directory, Virtual
vaddr.get(), vaddr.get(),
page_table->paddr().get()); page_table->paddr().get());
#endif #endif
pde.set_page_table_base(page_table->paddr().get()); pde.set_page_table_base(page_table->paddr().get());
pde.set_user_allowed(true); pde.set_user_allowed(true);
pde.set_present(true); pde.set_present(true);
@ -322,21 +310,6 @@ PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault)
dbgprintf("MM: handle_page_fault(%w) at V%p\n", fault.code(), fault.vaddr().get()); dbgprintf("MM: handle_page_fault(%w) at V%p\n", fault.code(), fault.vaddr().get());
#endif #endif
ASSERT(fault.vaddr() != m_quickmap_addr); ASSERT(fault.vaddr() != m_quickmap_addr);
if (fault.type() == PageFault::Type::PageNotPresent && fault.vaddr().get() >= 0xc0000000) {
auto* current_page_directory = reinterpret_cast<PageDirectoryEntry*>(cpu_cr3());
u32 page_directory_index = (fault.vaddr().get() >> 22) & 0x3ff;
auto& kernel_pde = kernel_page_directory().entries()[page_directory_index];
auto& current_pde = current_page_directory[page_directory_index];
if (kernel_pde.is_present() && !current_pde.is_present()) {
#ifdef PAGE_FAULT_DEBUG
dbg() << "NP(kernel): Copying new kernel mapping for " << fault.vaddr() << " into current page directory";
#endif
current_pde.copy_from({}, kernel_pde);
flush_tlb(fault.vaddr().page_base());
return PageFaultResponse::Continue;
}
}
auto* region = region_from_vaddr(fault.vaddr()); auto* region = region_from_vaddr(fault.vaddr());
if (!region) { if (!region) {
kprintf("NP(error) fault at invalid address V%p\n", fault.vaddr().get()); kprintf("NP(error) fault at invalid address V%p\n", fault.vaddr().get());
@ -494,11 +467,6 @@ void MemoryManager::enter_process_paging_scope(Process& process)
ASSERT(current); ASSERT(current);
InterruptDisabler disabler; InterruptDisabler disabler;
// NOTE: To prevent triple-faulting here, we have to ensure that the current stack
// is accessible to the incoming page directory. We achieve this by forcing
// an update of the kernel VM mappings in the entered scope's page directory.
process.page_directory().update_kernel_mappings();
current->tss().cr3 = process.page_directory().cr3(); current->tss().cr3 = process.page_directory().cr3();
asm volatile("movl %%eax, %%cr3" ::"a"(process.page_directory().cr3()) asm volatile("movl %%eax, %%cr3" ::"a"(process.page_directory().cr3())
: "memory"); : "memory");

View file

@ -42,8 +42,6 @@ public:
PageFaultResponse handle_page_fault(const PageFault&); PageFaultResponse handle_page_fault(const PageFault&);
void populate_page_directory(PageDirectory&);
void enter_process_paging_scope(Process&); void enter_process_paging_scope(Process&);
bool validate_user_stack(const Process&, VirtualAddress) const; bool validate_user_stack(const Process&, VirtualAddress) const;
@ -114,8 +112,7 @@ private:
PageTableEntry& ensure_pte(PageDirectory&, VirtualAddress); PageTableEntry& ensure_pte(PageDirectory&, VirtualAddress);
RefPtr<PageDirectory> m_kernel_page_directory; RefPtr<PageDirectory> m_kernel_page_directory;
PageTableEntry* m_page_table_zero { nullptr }; PageTableEntry* m_low_page_tables[4] { nullptr };
PageTableEntry* m_page_table_one { nullptr };
VirtualAddress m_quickmap_addr; VirtualAddress m_quickmap_addr;

View file

@ -24,7 +24,17 @@ RefPtr<PageDirectory> PageDirectory::find_by_cr3(u32 cr3)
PageDirectory::PageDirectory(PhysicalAddress paddr) PageDirectory::PageDirectory(PhysicalAddress paddr)
: m_range_allocator(VirtualAddress(0xc0000000), 0x3f000000) : m_range_allocator(VirtualAddress(0xc0000000), 0x3f000000)
{ {
m_directory_page = PhysicalPage::create(paddr, true, false); m_directory_table = PhysicalPage::create(paddr, true, false);
m_directory_pages[0] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 1), true, false);
m_directory_pages[1] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 2), true, false);
m_directory_pages[2] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 3), true, false);
m_directory_pages[3] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 4), true, false);
table().raw[0] = (u64)m_directory_pages[0]->paddr().as_ptr() | 1;
table().raw[1] = (u64)m_directory_pages[1]->paddr().as_ptr() | 1;
table().raw[2] = (u64)m_directory_pages[2]->paddr().as_ptr() | 1;
table().raw[3] = (u64)m_directory_pages[3]->paddr().as_ptr() | 1;
InterruptDisabler disabler; InterruptDisabler disabler;
cr3_map().set(cr3(), this); cr3_map().set(cr3(), this);
} }
@ -33,7 +43,26 @@ PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_rang
: m_process(&process) : m_process(&process)
, m_range_allocator(parent_range_allocator ? RangeAllocator(*parent_range_allocator) : RangeAllocator(VirtualAddress(userspace_range_base), kernelspace_range_base - userspace_range_base)) , m_range_allocator(parent_range_allocator ? RangeAllocator(*parent_range_allocator) : RangeAllocator(VirtualAddress(userspace_range_base), kernelspace_range_base - userspace_range_base))
{ {
MM.populate_page_directory(*this); // Set up a userspace page directory
m_directory_table = MM.allocate_supervisor_physical_page();
m_directory_pages[0] = MM.allocate_supervisor_physical_page();
m_directory_pages[1] = MM.allocate_supervisor_physical_page();
m_directory_pages[2] = MM.allocate_supervisor_physical_page();
// Share the top 1 GB of kernel-only mappings (>=3GB or >=0xc0000000)
m_directory_pages[3] = MM.kernel_page_directory().m_directory_pages[3];
table().raw[0] = (u64)m_directory_pages[0]->paddr().as_ptr() | 1;
table().raw[1] = (u64)m_directory_pages[1]->paddr().as_ptr() | 1;
table().raw[2] = (u64)m_directory_pages[2]->paddr().as_ptr() | 1;
table().raw[3] = (u64)m_directory_pages[3]->paddr().as_ptr() | 1;
// Clone bottom 8 MB of mappings from kernel_page_directory
table().directory(0)[0].copy_from({}, MM.kernel_page_directory().table().directory(0)[0]);
table().directory(0)[1].copy_from({}, MM.kernel_page_directory().table().directory(0)[1]);
table().directory(0)[2].copy_from({}, MM.kernel_page_directory().table().directory(0)[2]);
table().directory(0)[3].copy_from({}, MM.kernel_page_directory().table().directory(0)[3]);
InterruptDisabler disabler; InterruptDisabler disabler;
cr3_map().set(cr3(), this); cr3_map().set(cr3(), this);
} }
@ -57,11 +86,3 @@ void PageDirectory::flush(VirtualAddress vaddr)
if (this == &MM.kernel_page_directory() || &current->process().page_directory() == this) if (this == &MM.kernel_page_directory() || &current->process().page_directory() == this)
MM.flush_tlb(vaddr); MM.flush_tlb(vaddr);
} }
void PageDirectory::update_kernel_mappings()
{
// This ensures that the kernel virtual address space is up-to-date in this page directory.
// This may be necessary to avoid triple faulting when entering a process's paging scope
// whose mappings are out-of-date.
memcpy(entries() + 768, MM.kernel_page_directory().entries() + 768, sizeof(PageDirectoryEntry) * 256);
}

View file

@ -21,8 +21,8 @@ public:
~PageDirectory(); ~PageDirectory();
u32 cr3() const { return m_directory_page->paddr().get(); } u32 cr3() const { return m_directory_table->paddr().get(); }
PageDirectoryEntry* entries() { return reinterpret_cast<PageDirectoryEntry*>(cr3()); } PageDirectoryPointerTable& table() { return *reinterpret_cast<PageDirectoryPointerTable*>(cr3()); }
void flush(VirtualAddress); void flush(VirtualAddress);
@ -31,14 +31,13 @@ public:
Process* process() { return m_process; } Process* process() { return m_process; }
const Process* process() const { return m_process; } const Process* process() const { return m_process; }
void update_kernel_mappings();
private: private:
PageDirectory(Process&, const RangeAllocator* parent_range_allocator); PageDirectory(Process&, const RangeAllocator* parent_range_allocator);
explicit PageDirectory(PhysicalAddress); explicit PageDirectory(PhysicalAddress);
Process* m_process { nullptr }; Process* m_process { nullptr };
RangeAllocator m_range_allocator; RangeAllocator m_range_allocator;
RefPtr<PhysicalPage> m_directory_page; RefPtr<PhysicalPage> m_directory_table;
RefPtr<PhysicalPage> m_directory_pages[4];
HashMap<unsigned, RefPtr<PhysicalPage>> m_physical_pages; HashMap<unsigned, RefPtr<PhysicalPage>> m_physical_pages;
}; };