Kernel/Scheduler.cpp at master

jcs.org / serenity
fork atom
Serenity Operating System
fork atom
serenity / Kernel / Scheduler.cpp
at master 560 lines 20 kB view raw
wrap content
Timon Kruiper Kernel: Factor our PreviousMode into RegisterState::previous_mode 3y ago
fb107748
  1/*
  2 * Copyright (c) 2018-2022, Andreas Kling <kling@serenityos.org>
  3 *
  4 * SPDX-License-Identifier: BSD-2-Clause
  5 */
  6
  7#include <AK/BuiltinWrappers.h>
  8#include <AK/ScopeGuard.h>
  9#include <AK/Singleton.h>
 10#include <AK/Time.h>
 11#include <Kernel/Arch/TrapFrame.h>
 12#include <Kernel/Debug.h>
 13#include <Kernel/InterruptDisabler.h>
 14#include <Kernel/Panic.h>
 15#include <Kernel/PerformanceManager.h>
 16#include <Kernel/Process.h>
 17#include <Kernel/Scheduler.h>
 18#include <Kernel/Sections.h>
 19#include <Kernel/Time/TimeManagement.h>
 20#include <Kernel/kstdio.h>
 21
 22namespace Kernel {
 23
 24RecursiveSpinlock<LockRank::None> g_scheduler_lock {};
 25
 26static u32 time_slice_for(Thread const& thread)
 27{
 28    // One time slice unit == 4ms (assuming 250 ticks/second)
 29    if (thread.is_idle_thread())
 30        return 1;
 31    return 2;
 32}
 33
 34READONLY_AFTER_INIT Thread* g_finalizer;
 35READONLY_AFTER_INIT WaitQueue* g_finalizer_wait_queue;
 36Atomic<bool> g_finalizer_has_work { false };
 37READONLY_AFTER_INIT static Process* s_colonel_process;
 38
 39struct ThreadReadyQueue {
 40    IntrusiveList<&Thread::m_ready_queue_node> thread_list;
 41};
 42
 43struct ThreadReadyQueues {
 44    u32 mask {};
 45    static constexpr size_t count = sizeof(mask) * 8;
 46    Array<ThreadReadyQueue, count> queues;
 47};
 48
 49static Singleton<SpinlockProtected<ThreadReadyQueues, LockRank::None>> g_ready_queues;
 50
 51static SpinlockProtected<TotalTimeScheduled, LockRank::None> g_total_time_scheduled {};
 52
 53static void dump_thread_list(bool = false);
 54
 55static inline u32 thread_priority_to_priority_index(u32 thread_priority)
 56{
 57    // Converts the priority in the range of THREAD_PRIORITY_MIN...THREAD_PRIORITY_MAX
 58    // to a index into g_ready_queues where 0 is the highest priority bucket
 59    VERIFY(thread_priority >= THREAD_PRIORITY_MIN && thread_priority <= THREAD_PRIORITY_MAX);
 60    constexpr u32 thread_priority_count = THREAD_PRIORITY_MAX - THREAD_PRIORITY_MIN + 1;
 61    static_assert(thread_priority_count > 0);
 62    auto priority_bucket = ((thread_priority_count - (thread_priority - THREAD_PRIORITY_MIN)) / thread_priority_count) * (ThreadReadyQueues::count - 1);
 63    VERIFY(priority_bucket < ThreadReadyQueues::count);
 64    return priority_bucket;
 65}
 66
 67Thread& Scheduler::pull_next_runnable_thread()
 68{
 69    auto affinity_mask = 1u << Processor::current_id();
 70
 71    return g_ready_queues->with([&](auto& ready_queues) -> Thread& {
 72        auto priority_mask = ready_queues.mask;
 73        while (priority_mask != 0) {
 74            auto priority = bit_scan_forward(priority_mask);
 75            VERIFY(priority > 0);
 76            auto& ready_queue = ready_queues.queues[--priority];
 77            for (auto& thread : ready_queue.thread_list) {
 78                VERIFY(thread.m_runnable_priority == (int)priority);
 79                if (thread.is_active())
 80                    continue;
 81                if (!(thread.affinity() & affinity_mask))
 82                    continue;
 83                thread.m_runnable_priority = -1;
 84                ready_queue.thread_list.remove(thread);
 85                if (ready_queue.thread_list.is_empty())
 86                    ready_queues.mask &= ~(1u << priority);
 87                // Mark it as active because we are using this thread. This is similar
 88                // to comparing it with Processor::current_thread, but when there are
 89                // multiple processors there's no easy way to check whether the thread
 90                // is actually still needed. This prevents accidental finalization when
 91                // a thread is no longer in Running state, but running on another core.
 92
 93                // We need to mark it active here so that this thread won't be
 94                // scheduled on another core if it were to be queued before actually
 95                // switching to it.
 96                // FIXME: Figure out a better way maybe?
 97                thread.set_active(true);
 98                return thread;
 99            }
100            priority_mask &= ~(1u << priority);
101        }
102        return *Processor::idle_thread();
103    });
104}
105
106Thread* Scheduler::peek_next_runnable_thread()
107{
108    auto affinity_mask = 1u << Processor::current_id();
109
110    return g_ready_queues->with([&](auto& ready_queues) -> Thread* {
111        auto priority_mask = ready_queues.mask;
112        while (priority_mask != 0) {
113            auto priority = bit_scan_forward(priority_mask);
114            VERIFY(priority > 0);
115            auto& ready_queue = ready_queues.queues[--priority];
116            for (auto& thread : ready_queue.thread_list) {
117                VERIFY(thread.m_runnable_priority == (int)priority);
118                if (thread.is_active())
119                    continue;
120                if (!(thread.affinity() & affinity_mask))
121                    continue;
122                return &thread;
123            }
124            priority_mask &= ~(1u << priority);
125        }
126
127        // Unlike in pull_next_runnable_thread() we don't want to fall back to
128        // the idle thread. We just want to see if we have any other thread ready
129        // to be scheduled.
130        return nullptr;
131    });
132}
133
134bool Scheduler::dequeue_runnable_thread(Thread& thread, bool check_affinity)
135{
136    if (thread.is_idle_thread())
137        return true;
138
139    return g_ready_queues->with([&](auto& ready_queues) {
140        auto priority = thread.m_runnable_priority;
141        if (priority < 0) {
142            VERIFY(!thread.m_ready_queue_node.is_in_list());
143            return false;
144        }
145
146        if (check_affinity && !(thread.affinity() & (1 << Processor::current_id())))
147            return false;
148
149        VERIFY(ready_queues.mask & (1u << priority));
150        auto& ready_queue = ready_queues.queues[priority];
151        thread.m_runnable_priority = -1;
152        ready_queue.thread_list.remove(thread);
153        if (ready_queue.thread_list.is_empty())
154            ready_queues.mask &= ~(1u << priority);
155        return true;
156    });
157}
158
159void Scheduler::enqueue_runnable_thread(Thread& thread)
160{
161    VERIFY(g_scheduler_lock.is_locked_by_current_processor());
162    if (thread.is_idle_thread())
163        return;
164    auto priority = thread_priority_to_priority_index(thread.priority());
165
166    g_ready_queues->with([&](auto& ready_queues) {
167        VERIFY(thread.m_runnable_priority < 0);
168        thread.m_runnable_priority = (int)priority;
169        VERIFY(!thread.m_ready_queue_node.is_in_list());
170        auto& ready_queue = ready_queues.queues[priority];
171        bool was_empty = ready_queue.thread_list.is_empty();
172        ready_queue.thread_list.append(thread);
173        if (was_empty)
174            ready_queues.mask |= (1u << priority);
175    });
176}
177
178UNMAP_AFTER_INIT void Scheduler::start()
179{
180    VERIFY_INTERRUPTS_DISABLED();
181
182    // We need to acquire our scheduler lock, which will be released
183    // by the idle thread once control transferred there
184    g_scheduler_lock.lock();
185
186    auto& processor = Processor::current();
187    VERIFY(processor.is_initialized());
188    auto& idle_thread = *Processor::idle_thread();
189    VERIFY(processor.current_thread() == &idle_thread);
190    idle_thread.set_ticks_left(time_slice_for(idle_thread));
191    idle_thread.did_schedule();
192    idle_thread.set_initialized(true);
193    processor.init_context(idle_thread, false);
194    idle_thread.set_state(Thread::State::Running);
195    VERIFY(idle_thread.affinity() == (1u << processor.id()));
196    processor.initialize_context_switching(idle_thread);
197    VERIFY_NOT_REACHED();
198}
199
200void Scheduler::pick_next()
201{
202    VERIFY_INTERRUPTS_DISABLED();
203
204    // Set the in_scheduler flag before acquiring the spinlock. This
205    // prevents a recursive call into Scheduler::invoke_async upon
206    // leaving the scheduler lock.
207    ScopedCritical critical;
208    Processor::set_current_in_scheduler(true);
209    ScopeGuard guard(
210        []() {
211            // We may be on a different processor after we got switched
212            // back to this thread!
213            VERIFY(Processor::current_in_scheduler());
214            Processor::set_current_in_scheduler(false);
215        });
216
217    SpinlockLocker lock(g_scheduler_lock);
218
219    if constexpr (SCHEDULER_RUNNABLE_DEBUG) {
220        dump_thread_list();
221    }
222
223    auto& thread_to_schedule = pull_next_runnable_thread();
224    if constexpr (SCHEDULER_DEBUG) {
225        dbgln("Scheduler[{}]: Switch to {} @ {:p}",
226            Processor::current_id(),
227            thread_to_schedule,
228            thread_to_schedule.regs().ip());
229    }
230
231    // We need to leave our first critical section before switching context,
232    // but since we're still holding the scheduler lock we're still in a critical section
233    critical.leave();
234
235    thread_to_schedule.set_ticks_left(time_slice_for(thread_to_schedule));
236    context_switch(&thread_to_schedule);
237}
238
239void Scheduler::yield()
240{
241    InterruptDisabler disabler;
242
243    auto const* current_thread = Thread::current();
244    dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: yielding thread {} in_irq={}", Processor::current_id(), *current_thread, Processor::current_in_irq());
245    VERIFY(current_thread != nullptr);
246    if (Processor::current_in_irq() || Processor::in_critical()) {
247        // If we're handling an IRQ we can't switch context, or we're in
248        // a critical section where we don't want to switch contexts, then
249        // delay until exiting the trap or critical section
250        Processor::current().invoke_scheduler_async();
251        return;
252    }
253
254    Scheduler::pick_next();
255}
256
257void Scheduler::context_switch(Thread* thread)
258{
259    thread->did_schedule();
260
261    auto* from_thread = Thread::current();
262    VERIFY(from_thread);
263
264    if (from_thread == thread)
265        return;
266
267    // If the last process hasn't blocked (still marked as running),
268    // mark it as runnable for the next round.
269    if (from_thread->state() == Thread::State::Running)
270        from_thread->set_state(Thread::State::Runnable);
271
272#ifdef LOG_EVERY_CONTEXT_SWITCH
273    auto const msg = "Scheduler[{}]: {} -> {} [prio={}] {:p}";
274
275    dbgln(msg,
276        Processor::current_id(), from_thread->tid().value(),
277        thread->tid().value(), thread->priority(), thread->regs().ip());
278#endif
279
280    auto& proc = Processor::current();
281    if (!thread->is_initialized()) {
282        proc.init_context(*thread, false);
283        thread->set_initialized(true);
284    }
285    thread->set_state(Thread::State::Running);
286
287    PerformanceManager::add_context_switch_perf_event(*from_thread, *thread);
288
289    proc.switch_context(from_thread, thread);
290
291    // NOTE: from_thread at this point reflects the thread we were
292    // switched from, and thread reflects Thread::current()
293    enter_current(*from_thread);
294    VERIFY(thread == Thread::current());
295
296    {
297        SpinlockLocker lock(thread->get_lock());
298        thread->dispatch_one_pending_signal();
299    }
300}
301
302void Scheduler::enter_current(Thread& prev_thread)
303{
304    VERIFY(g_scheduler_lock.is_locked_by_current_processor());
305
306    // We already recorded the scheduled time when entering the trap, so this merely accounts for the kernel time since then
307    auto scheduler_time = TimeManagement::scheduler_current_time();
308    prev_thread.update_time_scheduled(scheduler_time, true, true);
309    auto* current_thread = Thread::current();
310    current_thread->update_time_scheduled(scheduler_time, true, false);
311
312    // NOTE: When doing an exec(), we will context switch from and to the same thread!
313    //       In that case, we must not mark the previous thread as inactive.
314    if (&prev_thread != current_thread)
315        prev_thread.set_active(false);
316
317    if (prev_thread.state() == Thread::State::Dying) {
318        // If the thread we switched from is marked as dying, then notify
319        // the finalizer. Note that as soon as we leave the scheduler lock
320        // the finalizer may free from_thread!
321        notify_finalizer();
322    }
323}
324
325void Scheduler::leave_on_first_switch(InterruptsState previous_interrupts_state)
326{
327    // This is called when a thread is switched into for the first time.
328    // At this point, enter_current has already be called, but because
329    // Scheduler::context_switch is not in the call stack we need to
330    // clean up and release locks manually here
331    g_scheduler_lock.unlock(previous_interrupts_state);
332
333    VERIFY(Processor::current_in_scheduler());
334    Processor::set_current_in_scheduler(false);
335}
336
337void Scheduler::prepare_after_exec()
338{
339    // This is called after exec() when doing a context "switch" into
340    // the new process. This is called from Processor::assume_context
341    VERIFY(g_scheduler_lock.is_locked_by_current_processor());
342
343    VERIFY(!Processor::current_in_scheduler());
344    Processor::set_current_in_scheduler(true);
345}
346
347void Scheduler::prepare_for_idle_loop()
348{
349    // This is called when the CPU finished setting up the idle loop
350    // and is about to run it. We need to acquire the scheduler lock
351    VERIFY(!g_scheduler_lock.is_locked_by_current_processor());
352    g_scheduler_lock.lock();
353
354    VERIFY(!Processor::current_in_scheduler());
355    Processor::set_current_in_scheduler(true);
356}
357
358Process* Scheduler::colonel()
359{
360    VERIFY(s_colonel_process);
361    return s_colonel_process;
362}
363
364UNMAP_AFTER_INIT void Scheduler::initialize()
365{
366    VERIFY(Processor::is_initialized()); // sanity check
367    VERIFY(TimeManagement::is_initialized());
368
369    LockRefPtr<Thread> idle_thread;
370    g_finalizer_wait_queue = new WaitQueue;
371
372    g_finalizer_has_work.store(false, AK::MemoryOrder::memory_order_release);
373    s_colonel_process = Process::create_kernel_process(idle_thread, KString::must_create("colonel"sv), idle_loop, nullptr, 1, Process::RegisterProcess::No).leak_ref();
374    VERIFY(s_colonel_process);
375    VERIFY(idle_thread);
376    idle_thread->set_priority(THREAD_PRIORITY_MIN);
377    idle_thread->set_name(KString::must_create("Idle Task #0"sv));
378
379    set_idle_thread(idle_thread);
380}
381
382UNMAP_AFTER_INIT void Scheduler::set_idle_thread(Thread* idle_thread)
383{
384    idle_thread->set_idle_thread();
385    Processor::current().set_idle_thread(*idle_thread);
386    Processor::set_current_thread(*idle_thread);
387}
388
389UNMAP_AFTER_INIT Thread* Scheduler::create_ap_idle_thread(u32 cpu)
390{
391    VERIFY(cpu != 0);
392    // This function is called on the bsp, but creates an idle thread for another AP
393    VERIFY(Processor::is_bootstrap_processor());
394
395    VERIFY(s_colonel_process);
396    Thread* idle_thread = s_colonel_process->create_kernel_thread(idle_loop, nullptr, THREAD_PRIORITY_MIN, MUST(KString::formatted("idle thread #{}", cpu)), 1 << cpu, false);
397    VERIFY(idle_thread);
398    return idle_thread;
399}
400
401void Scheduler::add_time_scheduled(u64 time_to_add, bool is_kernel)
402{
403    g_total_time_scheduled.with([&](auto& total_time_scheduled) {
404        total_time_scheduled.total += time_to_add;
405        if (is_kernel)
406            total_time_scheduled.total_kernel += time_to_add;
407    });
408}
409
410void Scheduler::timer_tick(RegisterState const& regs)
411{
412    VERIFY_INTERRUPTS_DISABLED();
413    VERIFY(Processor::current_in_irq());
414
415    auto* current_thread = Processor::current_thread();
416    if (!current_thread)
417        return;
418
419    // Sanity checks
420    VERIFY(current_thread->current_trap());
421    VERIFY(current_thread->current_trap()->regs == &regs);
422
423    if (current_thread->process().is_kernel_process()) {
424        // Because the previous mode when entering/exiting kernel threads never changes
425        // we never update the time scheduled. So we need to update it manually on the
426        // timer interrupt
427        current_thread->update_time_scheduled(TimeManagement::scheduler_current_time(), true, false);
428    }
429
430    if (current_thread->previous_mode() == ExecutionMode::User && current_thread->should_die() && !current_thread->is_blocked()) {
431        SpinlockLocker scheduler_lock(g_scheduler_lock);
432        dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: Terminating user mode thread {}", Processor::current_id(), *current_thread);
433        current_thread->set_state(Thread::State::Dying);
434        Processor::current().invoke_scheduler_async();
435        return;
436    }
437
438    if (current_thread->tick())
439        return;
440
441    if (!current_thread->is_idle_thread() && !peek_next_runnable_thread()) {
442        // If no other thread is ready to be scheduled we don't need to
443        // switch to the idle thread. Just give the current thread another
444        // time slice and let it run!
445        current_thread->set_ticks_left(time_slice_for(*current_thread));
446        current_thread->did_schedule();
447        dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: No other threads ready, give {} another timeslice", Processor::current_id(), *current_thread);
448        return;
449    }
450
451    VERIFY_INTERRUPTS_DISABLED();
452    VERIFY(Processor::current_in_irq());
453    Processor::current().invoke_scheduler_async();
454}
455
456void Scheduler::invoke_async()
457{
458    VERIFY_INTERRUPTS_DISABLED();
459    VERIFY(!Processor::current_in_irq());
460
461    // Since this function is called when leaving critical sections (such
462    // as a Spinlock), we need to check if we're not already doing this
463    // to prevent recursion
464    if (!Processor::current_in_scheduler())
465        pick_next();
466}
467
468void Scheduler::notify_finalizer()
469{
470    if (!g_finalizer_has_work.exchange(true, AK::MemoryOrder::memory_order_acq_rel))
471        g_finalizer_wait_queue->wake_all();
472}
473
474void Scheduler::idle_loop(void*)
475{
476    auto& proc = Processor::current();
477    dbgln("Scheduler[{}]: idle loop running", proc.id());
478    VERIFY(Processor::are_interrupts_enabled());
479
480    for (;;) {
481        proc.idle_begin();
482        proc.wait_for_interrupt();
483        proc.idle_end();
484        VERIFY_INTERRUPTS_ENABLED();
485        yield();
486    }
487}
488
489void Scheduler::dump_scheduler_state(bool with_stack_traces)
490{
491    dump_thread_list(with_stack_traces);
492}
493
494bool Scheduler::is_initialized()
495{
496    // The scheduler is initialized iff the idle thread exists
497    return Processor::idle_thread() != nullptr;
498}
499
500TotalTimeScheduled Scheduler::get_total_time_scheduled()
501{
502    return g_total_time_scheduled.with([&](auto& total_time_scheduled) { return total_time_scheduled; });
503}
504
505void dump_thread_list(bool with_stack_traces)
506{
507    dbgln("Scheduler thread list for processor {}:", Processor::current_id());
508
509    auto get_eip = [](Thread& thread) -> u32 {
510        if (!thread.current_trap())
511            return thread.regs().ip();
512        return thread.get_register_dump_from_stack().ip();
513    };
514
515    Thread::for_each([&](Thread& thread) {
516        auto color = thread.process().is_kernel_process() ? "\x1b[34;1m"sv : "\x1b[33;1m"sv;
517        switch (thread.state()) {
518        case Thread::State::Dying:
519            dmesgln("  {}{:30}\x1b[0m @ {:08x} is {:14} (Finalizable: {}, nsched: {})",
520                color,
521                thread,
522                get_eip(thread),
523                thread.state_string(),
524                thread.is_finalizable(),
525                thread.times_scheduled());
526            break;
527        default:
528            dmesgln("  {}{:30}\x1b[0m @ {:08x} is {:14} (Pr:{:2}, nsched: {})",
529                color,
530                thread,
531                get_eip(thread),
532                thread.state_string(),
533                thread.priority(),
534                thread.times_scheduled());
535            break;
536        }
537        if (thread.state() == Thread::State::Blocked && thread.blocking_mutex()) {
538            dmesgln("    Blocking on Mutex {:#x} ({})", thread.blocking_mutex(), thread.blocking_mutex()->name());
539        }
540        if (thread.state() == Thread::State::Blocked && thread.blocker()) {
541            dmesgln("    Blocking on Blocker {:#x}", thread.blocker());
542        }
543#if LOCK_DEBUG
544        thread.for_each_held_lock([](auto const& entry) {
545            dmesgln("    Holding lock {:#x} ({}) at {}", entry.lock, entry.lock->name(), entry.lock_location);
546        });
547#endif
548        if (with_stack_traces) {
549            auto trace_or_error = thread.backtrace();
550            if (!trace_or_error.is_error()) {
551                auto trace = trace_or_error.release_value();
552                dbgln("Backtrace:");
553                kernelputstr(trace->characters(), trace->length());
554            }
555        }
556        return IterationDecision::Continue;
557    });
558}
559
560}