From 03a9ee79faf0ba42776492f027154b6156d0085c Mon Sep 17 00:00:00 2001
From: Tom <tomut@yahoo.com>
Date: Fri, 22 Jan 2021 16:56:08 -0700
Subject: [PATCH] Kernel: Implement thread priority queues

Rather than walking all Thread instances and putting them into
a vector to be sorted by priority, queue them into priority sorted
linked lists as soon as they become ready to be executed.
---
 Kernel/Process.h     |   5 --
 Kernel/Scheduler.cpp | 159 +++++++++++++++++++++++++++++--------------
 Kernel/Scheduler.h   |   3 +
 Kernel/Thread.cpp    |   9 ++-
 Kernel/Thread.h      |   6 +-
 5 files changed, 124 insertions(+), 58 deletions(-)
diff --git a/Kernel/Process.h b/Kernel/Process.h
index 3f0fd622d1..1327da2e9e 100644
--- a/Kernel/Process.h
+++ b/Kernel/Process.h
@@ -758,11 +758,6 @@ inline const LogStream& operator<<(const LogStream& stream, const Process& proce
     return stream << process.name() << '(' << process.pid().value() << ')';
 }
 
-inline u32 Thread::effective_priority() const
-{
-    return m_priority + m_extra_priority;
-}
-
 #define REQUIRE_NO_PROMISES                        \
     do {                                           \
         if (Process::current()->has_promises()) {  \
diff --git a/Kernel/Scheduler.cpp b/Kernel/Scheduler.cpp
index 5c0eb58506..0ab8c2894d 100644
--- a/Kernel/Scheduler.cpp
+++ b/Kernel/Scheduler.cpp
@@ -72,6 +72,105 @@ WaitQueue* g_finalizer_wait_queue;
 Atomic<bool> g_finalizer_has_work { false };
 static Process* s_colonel_process;
 
+struct ThreadReadyQueue {
+    IntrusiveList<Thread, &Thread::m_ready_queue_node> thread_list;
+};
+static SpinLock<u8> g_ready_queues_lock;
+static u32 g_ready_queues_mask;
+static constexpr u32 g_ready_queue_buckets = sizeof(g_ready_queues_mask) * 8;
+static ThreadReadyQueue* g_ready_queues; // g_ready_queue_buckets entries
+
+static inline u32 thread_priority_to_priority_index(u32 thread_priority)
+{
+    // Converts the priority in the range of THREAD_PRIORITY_MIN...THREAD_PRIORITY_MAX
+    // to a index into g_ready_queues where 0 is the highest priority bucket
+    ASSERT(thread_priority >= THREAD_PRIORITY_MIN && thread_priority <= THREAD_PRIORITY_MAX);
+    constexpr u32 thread_priority_count = THREAD_PRIORITY_MAX - THREAD_PRIORITY_MIN + 1;
+    static_assert(thread_priority_count > 0);
+    auto priority_bucket = ((thread_priority_count - (thread_priority - THREAD_PRIORITY_MIN)) / thread_priority_count) * (g_ready_queue_buckets - 1);
+    ASSERT(priority_bucket < g_ready_queue_buckets);
+    return priority_bucket;
+}
+
+Thread& Scheduler::pull_next_runnable_thread()
+{
+    auto affinity_mask = 1u << Processor::current().id();
+
+    ScopedSpinLock lock(g_ready_queues_lock);
+    auto priority_mask = g_ready_queues_mask;
+    while (priority_mask != 0) {
+        auto priority = __builtin_ffsl(priority_mask);
+        ASSERT(priority > 0);
+        auto& ready_queue = g_ready_queues[--priority];
+        for (auto& thread : ready_queue.thread_list) {
+            ASSERT(thread.m_runnable_priority == (int)priority);
+            if (thread.is_active())
+                continue;
+            if (!(thread.affinity() & affinity_mask))
+                continue;
+            thread.m_runnable_priority = -1;
+            ready_queue.thread_list.remove(thread);
+            if (ready_queue.thread_list.is_empty())
+                g_ready_queues_mask &= ~(1u << priority);
+            // Mark it as active because we are using this thread. This is similar
+            // to comparing it with Processor::current_thread, but when there are
+            // multiple processors there's no easy way to check whether the thread
+            // is actually still needed. This prevents accidental finalization when
+            // a thread is no longer in Running state, but running on another core.
+
+            // We need to mark it active here so that this thread won't be
+            // scheduled on another core if it were to be queued before actually
+            // switching to it.
+            // FIXME: Figure out a better way maybe?
+            thread.set_active(true);
+            return thread;
+        }
+        priority_mask &= ~(1u << priority);
+    }
+    return *Processor::current().idle_thread();
+}
+
+bool Scheduler::dequeue_runnable_thread(Thread& thread, bool check_affinity)
+{
+    if (&thread == Processor::current().idle_thread())
+        return true;
+    ScopedSpinLock lock(g_ready_queues_lock);
+    auto priority = thread.m_runnable_priority;
+    if (priority < 0) {
+        ASSERT(!thread.m_ready_queue_node.is_in_list());
+        return false;
+    }
+
+    if (check_affinity && !(thread.affinity() & (1 << Processor::current().id())))
+        return false;
+
+    ASSERT(g_ready_queues_mask & (1u << priority));
+    auto& ready_queue = g_ready_queues[priority];
+    thread.m_runnable_priority = -1;
+    ready_queue.thread_list.remove(thread);
+    if (ready_queue.thread_list.is_empty())
+        g_ready_queues_mask &= ~(1u << priority);
+    return true;
+}
+
+void Scheduler::queue_runnable_thread(Thread& thread)
+{
+    ASSERT(g_scheduler_lock.own_lock());
+    if (&thread == Processor::current().idle_thread())
+        return;
+    auto priority = thread_priority_to_priority_index(thread.effective_priority());
+
+    ScopedSpinLock lock(g_ready_queues_lock);
+    ASSERT(thread.m_runnable_priority < 0);
+    thread.m_runnable_priority = (int)priority;
+    ASSERT(!thread.m_ready_queue_node.is_in_list());
+    auto& ready_queue = g_ready_queues[priority];
+    bool was_empty = ready_queue.thread_list.is_empty();
+    ready_queue.thread_list.append(thread);
+    if (was_empty)
+        g_ready_queues_mask |= (1u << priority);
+}
+
 void Scheduler::start()
 {
     ASSERT_INTERRUPTS_DISABLED();
@@ -169,25 +268,9 @@ bool Scheduler::pick_next()
         });
     }
 
-    Thread* thread_to_schedule = nullptr;
-
     auto pending_beneficiary = scheduler_data.m_pending_beneficiary.strong_ref();
-    Vector<Thread*, 128> sorted_runnables;
-    for_each_runnable([&](auto& thread) {
-        if ((thread.affinity() & (1u << Processor::id())) == 0)
-            return IterationDecision::Continue;
-        if (thread.state() == Thread::Running && &thread != current_thread)
-            return IterationDecision::Continue;
-        sorted_runnables.append(&thread);
-        if (&thread == pending_beneficiary) {
-            thread_to_schedule = &thread;
-            return IterationDecision::Break;
-        }
-        return IterationDecision::Continue;
-    });
-
-    if (thread_to_schedule) {
-        // The thread we're supposed to donate to still exists
+    if (pending_beneficiary && dequeue_runnable_thread(*pending_beneficiary, true)) {
+        // The thread we're supposed to donate to still exists and we can
         const char* reason = scheduler_data.m_pending_donate_reason;
         scheduler_data.m_pending_beneficiary = nullptr;
         scheduler_data.m_pending_donate_reason = nullptr;
@@ -196,8 +279,8 @@ bool Scheduler::pick_next()
         // but since we're still holding the scheduler lock we're still in a critical section
         critical.leave();
 
-        dbgln<SCHEDULER_DEBUG>("Processing pending donate to {} reason={}", *thread_to_schedule, reason);
-        return donate_to_and_switch(thread_to_schedule, reason);
+        dbgln<SCHEDULER_DEBUG>("Processing pending donate to {} reason={}", *pending_beneficiary, reason);
+        return donate_to_and_switch(pending_beneficiary.ptr(), reason);
     }
 
     // Either we're not donating or the beneficiary disappeared.
@@ -205,38 +288,20 @@ bool Scheduler::pick_next()
     scheduler_data.m_pending_beneficiary = nullptr;
     scheduler_data.m_pending_donate_reason = nullptr;
 
-    quick_sort(sorted_runnables, [](auto& a, auto& b) { return a->effective_priority() >= b->effective_priority(); });
-
-    for (auto* thread : sorted_runnables) {
-        if (thread->process().exec_tid() && thread->process().exec_tid() != thread->tid())
-            continue;
-
-        ASSERT(thread->state() == Thread::Runnable || thread->state() == Thread::Running);
-
-        if (!thread_to_schedule) {
-            thread->m_extra_priority = 0;
-            thread_to_schedule = thread;
-        } else {
-            thread->m_extra_priority++;
-        }
-    }
-
-    if (!thread_to_schedule)
-        thread_to_schedule = Processor::current().idle_thread();
-
+    auto& thread_to_schedule = pull_next_runnable_thread();
     if constexpr (SCHEDULER_DEBUG) {
         dbgln("Scheduler[{}]: Switch to {} @ {:04x}:{:08x}",
             Processor::id(),
-            *thread_to_schedule,
-            thread_to_schedule->tss().cs, thread_to_schedule->tss().eip);
+            thread_to_schedule,
+            thread_to_schedule.tss().cs, thread_to_schedule.tss().eip);
     }
 
     // We need to leave our first critical section before switching context,
     // but since we're still holding the scheduler lock we're still in a critical section
     critical.leave();
 
-    thread_to_schedule->set_ticks_left(time_slice_for(*thread_to_schedule));
-    return context_switch(thread_to_schedule);
+    thread_to_schedule.set_ticks_left(time_slice_for(thread_to_schedule));
+    return context_switch(&thread_to_schedule);
 }
 
 bool Scheduler::yield()
@@ -354,13 +419,6 @@ bool Scheduler::context_switch(Thread* thread)
     }
     thread->set_state(Thread::Running);
 
-    // Mark it as active because we are using this thread. This is similar
-    // to comparing it with Processor::current_thread, but when there are
-    // multiple processors there's no easy way to check whether the thread
-    // is actually still needed. This prevents accidental finalization when
-    // a thread is no longer in Running state, but running on another core.
-    thread->set_active(true);
-
     proc.switch_context(from_thread, thread);
 
     // NOTE: from_thread at this point reflects the thread we were
@@ -449,6 +507,7 @@ void Scheduler::initialize()
     RefPtr<Thread> idle_thread;
     g_scheduler_data = new SchedulerData;
     g_finalizer_wait_queue = new WaitQueue;
+    g_ready_queues = new ThreadReadyQueue[g_ready_queue_buckets];
 
     g_finalizer_has_work.store(false, AK::MemoryOrder::memory_order_release);
     s_colonel_process = Process::create_kernel_process(idle_thread, "colonel", idle_loop, nullptr, 1).leak_ref();
diff --git a/Kernel/Scheduler.h b/Kernel/Scheduler.h
index 5ea17c4c25..08b8d8f79c 100644
--- a/Kernel/Scheduler.h
+++ b/Kernel/Scheduler.h
@@ -70,6 +70,9 @@ public:
     static void idle_loop(void*);
     static void invoke_async();
     static void notify_finalizer();
+    static Thread& pull_next_runnable_thread();
+    static bool dequeue_runnable_thread(Thread&, bool = false);
+    static void queue_runnable_thread(Thread&);
 
     template<typename Callback>
     static inline IterationDecision for_each_runnable(Callback);
diff --git a/Kernel/Thread.cpp b/Kernel/Thread.cpp
index 0931fc11c3..8ede0661b5 100644
--- a/Kernel/Thread.cpp
+++ b/Kernel/Thread.cpp
@@ -131,6 +131,10 @@ Thread::~Thread()
         // the middle of being destroyed.
         ScopedSpinLock lock(g_scheduler_lock);
         g_scheduler_data->thread_list_for_state(m_state).remove(*this);
+
+        // We shouldn't be queued
+        ASSERT(m_runnable_priority < 0);
+        ASSERT(!m_runnable_list_node.is_in_list());
     }
 }
 
@@ -904,7 +908,9 @@ void Thread::set_state(State new_state, u8 stop_signal)
         ASSERT(g_scheduler_data->has_thread(*this));
     }
 
-    if (previous_state == Stopped) {
+    if (previous_state == Runnable) {
+        Scheduler::dequeue_runnable_thread(*this);
+    } else if (previous_state == Stopped) {
         m_stop_state = State::Invalid;
         auto& process = this->process();
         if (process.set_stopped(false) == true) {
@@ -920,6 +926,7 @@ void Thread::set_state(State new_state, u8 stop_signal)
     }
 
     if (m_state == Runnable) {
+        Scheduler::queue_runnable_thread(*this);
         Processor::smp_wake_n_idle_processors(1);
     } else if (m_state == Stopped) {
         // We don't want to restore to Running state, only Runnable!
diff --git a/Kernel/Thread.h b/Kernel/Thread.h
index 31d6a39f5f..bbbf213670 100644
--- a/Kernel/Thread.h
+++ b/Kernel/Thread.h
@@ -83,6 +83,7 @@ class Thread
 
     friend class Process;
     friend class Scheduler;
+    friend class ThreadReadyQueue;
 
 public:
     inline static Thread* current()
@@ -102,7 +103,7 @@ public:
     void set_priority(u32 p) { m_priority = p; }
     u32 priority() const { return m_priority; }
 
-    u32 effective_priority() const;
+    u32 effective_priority() const { return m_priority; }
 
     void detach()
     {
@@ -1170,6 +1171,7 @@ public:
 
 private:
     IntrusiveListNode m_runnable_list_node;
+    int m_runnable_priority { -1 };
 
 private:
     friend struct SchedulerData;
@@ -1243,6 +1245,7 @@ private:
     TSS32 m_tss;
     TrapFrame* m_current_trap { nullptr };
     u32 m_saved_critical { 1 };
+    IntrusiveListNode m_ready_queue_node;
     Atomic<u32> m_cpu { 0 };
     u32 m_cpu_affinity { THREAD_AFFINITY_DEFAULT };
     u32 m_ticks_left { 0 };
@@ -1294,7 +1297,6 @@ private:
     State m_state { Invalid };
     String m_name;
     u32 m_priority { THREAD_PRIORITY_NORMAL };
-    u32 m_extra_priority { 0 };
 
     State m_stop_state { Invalid };