Serenity Operating System
at master 560 lines 20 kB view raw
1/* 2 * Copyright (c) 2018-2022, Andreas Kling <kling@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/BuiltinWrappers.h> 8#include <AK/ScopeGuard.h> 9#include <AK/Singleton.h> 10#include <AK/Time.h> 11#include <Kernel/Arch/TrapFrame.h> 12#include <Kernel/Debug.h> 13#include <Kernel/InterruptDisabler.h> 14#include <Kernel/Panic.h> 15#include <Kernel/PerformanceManager.h> 16#include <Kernel/Process.h> 17#include <Kernel/Scheduler.h> 18#include <Kernel/Sections.h> 19#include <Kernel/Time/TimeManagement.h> 20#include <Kernel/kstdio.h> 21 22namespace Kernel { 23 24RecursiveSpinlock<LockRank::None> g_scheduler_lock {}; 25 26static u32 time_slice_for(Thread const& thread) 27{ 28 // One time slice unit == 4ms (assuming 250 ticks/second) 29 if (thread.is_idle_thread()) 30 return 1; 31 return 2; 32} 33 34READONLY_AFTER_INIT Thread* g_finalizer; 35READONLY_AFTER_INIT WaitQueue* g_finalizer_wait_queue; 36Atomic<bool> g_finalizer_has_work { false }; 37READONLY_AFTER_INIT static Process* s_colonel_process; 38 39struct ThreadReadyQueue { 40 IntrusiveList<&Thread::m_ready_queue_node> thread_list; 41}; 42 43struct ThreadReadyQueues { 44 u32 mask {}; 45 static constexpr size_t count = sizeof(mask) * 8; 46 Array<ThreadReadyQueue, count> queues; 47}; 48 49static Singleton<SpinlockProtected<ThreadReadyQueues, LockRank::None>> g_ready_queues; 50 51static SpinlockProtected<TotalTimeScheduled, LockRank::None> g_total_time_scheduled {}; 52 53static void dump_thread_list(bool = false); 54 55static inline u32 thread_priority_to_priority_index(u32 thread_priority) 56{ 57 // Converts the priority in the range of THREAD_PRIORITY_MIN...THREAD_PRIORITY_MAX 58 // to a index into g_ready_queues where 0 is the highest priority bucket 59 VERIFY(thread_priority >= THREAD_PRIORITY_MIN && thread_priority <= THREAD_PRIORITY_MAX); 60 constexpr u32 thread_priority_count = THREAD_PRIORITY_MAX - THREAD_PRIORITY_MIN + 1; 61 static_assert(thread_priority_count > 0); 62 auto priority_bucket = ((thread_priority_count - (thread_priority - THREAD_PRIORITY_MIN)) / thread_priority_count) * (ThreadReadyQueues::count - 1); 63 VERIFY(priority_bucket < ThreadReadyQueues::count); 64 return priority_bucket; 65} 66 67Thread& Scheduler::pull_next_runnable_thread() 68{ 69 auto affinity_mask = 1u << Processor::current_id(); 70 71 return g_ready_queues->with([&](auto& ready_queues) -> Thread& { 72 auto priority_mask = ready_queues.mask; 73 while (priority_mask != 0) { 74 auto priority = bit_scan_forward(priority_mask); 75 VERIFY(priority > 0); 76 auto& ready_queue = ready_queues.queues[--priority]; 77 for (auto& thread : ready_queue.thread_list) { 78 VERIFY(thread.m_runnable_priority == (int)priority); 79 if (thread.is_active()) 80 continue; 81 if (!(thread.affinity() & affinity_mask)) 82 continue; 83 thread.m_runnable_priority = -1; 84 ready_queue.thread_list.remove(thread); 85 if (ready_queue.thread_list.is_empty()) 86 ready_queues.mask &= ~(1u << priority); 87 // Mark it as active because we are using this thread. This is similar 88 // to comparing it with Processor::current_thread, but when there are 89 // multiple processors there's no easy way to check whether the thread 90 // is actually still needed. This prevents accidental finalization when 91 // a thread is no longer in Running state, but running on another core. 92 93 // We need to mark it active here so that this thread won't be 94 // scheduled on another core if it were to be queued before actually 95 // switching to it. 96 // FIXME: Figure out a better way maybe? 97 thread.set_active(true); 98 return thread; 99 } 100 priority_mask &= ~(1u << priority); 101 } 102 return *Processor::idle_thread(); 103 }); 104} 105 106Thread* Scheduler::peek_next_runnable_thread() 107{ 108 auto affinity_mask = 1u << Processor::current_id(); 109 110 return g_ready_queues->with([&](auto& ready_queues) -> Thread* { 111 auto priority_mask = ready_queues.mask; 112 while (priority_mask != 0) { 113 auto priority = bit_scan_forward(priority_mask); 114 VERIFY(priority > 0); 115 auto& ready_queue = ready_queues.queues[--priority]; 116 for (auto& thread : ready_queue.thread_list) { 117 VERIFY(thread.m_runnable_priority == (int)priority); 118 if (thread.is_active()) 119 continue; 120 if (!(thread.affinity() & affinity_mask)) 121 continue; 122 return &thread; 123 } 124 priority_mask &= ~(1u << priority); 125 } 126 127 // Unlike in pull_next_runnable_thread() we don't want to fall back to 128 // the idle thread. We just want to see if we have any other thread ready 129 // to be scheduled. 130 return nullptr; 131 }); 132} 133 134bool Scheduler::dequeue_runnable_thread(Thread& thread, bool check_affinity) 135{ 136 if (thread.is_idle_thread()) 137 return true; 138 139 return g_ready_queues->with([&](auto& ready_queues) { 140 auto priority = thread.m_runnable_priority; 141 if (priority < 0) { 142 VERIFY(!thread.m_ready_queue_node.is_in_list()); 143 return false; 144 } 145 146 if (check_affinity && !(thread.affinity() & (1 << Processor::current_id()))) 147 return false; 148 149 VERIFY(ready_queues.mask & (1u << priority)); 150 auto& ready_queue = ready_queues.queues[priority]; 151 thread.m_runnable_priority = -1; 152 ready_queue.thread_list.remove(thread); 153 if (ready_queue.thread_list.is_empty()) 154 ready_queues.mask &= ~(1u << priority); 155 return true; 156 }); 157} 158 159void Scheduler::enqueue_runnable_thread(Thread& thread) 160{ 161 VERIFY(g_scheduler_lock.is_locked_by_current_processor()); 162 if (thread.is_idle_thread()) 163 return; 164 auto priority = thread_priority_to_priority_index(thread.priority()); 165 166 g_ready_queues->with([&](auto& ready_queues) { 167 VERIFY(thread.m_runnable_priority < 0); 168 thread.m_runnable_priority = (int)priority; 169 VERIFY(!thread.m_ready_queue_node.is_in_list()); 170 auto& ready_queue = ready_queues.queues[priority]; 171 bool was_empty = ready_queue.thread_list.is_empty(); 172 ready_queue.thread_list.append(thread); 173 if (was_empty) 174 ready_queues.mask |= (1u << priority); 175 }); 176} 177 178UNMAP_AFTER_INIT void Scheduler::start() 179{ 180 VERIFY_INTERRUPTS_DISABLED(); 181 182 // We need to acquire our scheduler lock, which will be released 183 // by the idle thread once control transferred there 184 g_scheduler_lock.lock(); 185 186 auto& processor = Processor::current(); 187 VERIFY(processor.is_initialized()); 188 auto& idle_thread = *Processor::idle_thread(); 189 VERIFY(processor.current_thread() == &idle_thread); 190 idle_thread.set_ticks_left(time_slice_for(idle_thread)); 191 idle_thread.did_schedule(); 192 idle_thread.set_initialized(true); 193 processor.init_context(idle_thread, false); 194 idle_thread.set_state(Thread::State::Running); 195 VERIFY(idle_thread.affinity() == (1u << processor.id())); 196 processor.initialize_context_switching(idle_thread); 197 VERIFY_NOT_REACHED(); 198} 199 200void Scheduler::pick_next() 201{ 202 VERIFY_INTERRUPTS_DISABLED(); 203 204 // Set the in_scheduler flag before acquiring the spinlock. This 205 // prevents a recursive call into Scheduler::invoke_async upon 206 // leaving the scheduler lock. 207 ScopedCritical critical; 208 Processor::set_current_in_scheduler(true); 209 ScopeGuard guard( 210 []() { 211 // We may be on a different processor after we got switched 212 // back to this thread! 213 VERIFY(Processor::current_in_scheduler()); 214 Processor::set_current_in_scheduler(false); 215 }); 216 217 SpinlockLocker lock(g_scheduler_lock); 218 219 if constexpr (SCHEDULER_RUNNABLE_DEBUG) { 220 dump_thread_list(); 221 } 222 223 auto& thread_to_schedule = pull_next_runnable_thread(); 224 if constexpr (SCHEDULER_DEBUG) { 225 dbgln("Scheduler[{}]: Switch to {} @ {:p}", 226 Processor::current_id(), 227 thread_to_schedule, 228 thread_to_schedule.regs().ip()); 229 } 230 231 // We need to leave our first critical section before switching context, 232 // but since we're still holding the scheduler lock we're still in a critical section 233 critical.leave(); 234 235 thread_to_schedule.set_ticks_left(time_slice_for(thread_to_schedule)); 236 context_switch(&thread_to_schedule); 237} 238 239void Scheduler::yield() 240{ 241 InterruptDisabler disabler; 242 243 auto const* current_thread = Thread::current(); 244 dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: yielding thread {} in_irq={}", Processor::current_id(), *current_thread, Processor::current_in_irq()); 245 VERIFY(current_thread != nullptr); 246 if (Processor::current_in_irq() || Processor::in_critical()) { 247 // If we're handling an IRQ we can't switch context, or we're in 248 // a critical section where we don't want to switch contexts, then 249 // delay until exiting the trap or critical section 250 Processor::current().invoke_scheduler_async(); 251 return; 252 } 253 254 Scheduler::pick_next(); 255} 256 257void Scheduler::context_switch(Thread* thread) 258{ 259 thread->did_schedule(); 260 261 auto* from_thread = Thread::current(); 262 VERIFY(from_thread); 263 264 if (from_thread == thread) 265 return; 266 267 // If the last process hasn't blocked (still marked as running), 268 // mark it as runnable for the next round. 269 if (from_thread->state() == Thread::State::Running) 270 from_thread->set_state(Thread::State::Runnable); 271 272#ifdef LOG_EVERY_CONTEXT_SWITCH 273 auto const msg = "Scheduler[{}]: {} -> {} [prio={}] {:p}"; 274 275 dbgln(msg, 276 Processor::current_id(), from_thread->tid().value(), 277 thread->tid().value(), thread->priority(), thread->regs().ip()); 278#endif 279 280 auto& proc = Processor::current(); 281 if (!thread->is_initialized()) { 282 proc.init_context(*thread, false); 283 thread->set_initialized(true); 284 } 285 thread->set_state(Thread::State::Running); 286 287 PerformanceManager::add_context_switch_perf_event(*from_thread, *thread); 288 289 proc.switch_context(from_thread, thread); 290 291 // NOTE: from_thread at this point reflects the thread we were 292 // switched from, and thread reflects Thread::current() 293 enter_current(*from_thread); 294 VERIFY(thread == Thread::current()); 295 296 { 297 SpinlockLocker lock(thread->get_lock()); 298 thread->dispatch_one_pending_signal(); 299 } 300} 301 302void Scheduler::enter_current(Thread& prev_thread) 303{ 304 VERIFY(g_scheduler_lock.is_locked_by_current_processor()); 305 306 // We already recorded the scheduled time when entering the trap, so this merely accounts for the kernel time since then 307 auto scheduler_time = TimeManagement::scheduler_current_time(); 308 prev_thread.update_time_scheduled(scheduler_time, true, true); 309 auto* current_thread = Thread::current(); 310 current_thread->update_time_scheduled(scheduler_time, true, false); 311 312 // NOTE: When doing an exec(), we will context switch from and to the same thread! 313 // In that case, we must not mark the previous thread as inactive. 314 if (&prev_thread != current_thread) 315 prev_thread.set_active(false); 316 317 if (prev_thread.state() == Thread::State::Dying) { 318 // If the thread we switched from is marked as dying, then notify 319 // the finalizer. Note that as soon as we leave the scheduler lock 320 // the finalizer may free from_thread! 321 notify_finalizer(); 322 } 323} 324 325void Scheduler::leave_on_first_switch(InterruptsState previous_interrupts_state) 326{ 327 // This is called when a thread is switched into for the first time. 328 // At this point, enter_current has already be called, but because 329 // Scheduler::context_switch is not in the call stack we need to 330 // clean up and release locks manually here 331 g_scheduler_lock.unlock(previous_interrupts_state); 332 333 VERIFY(Processor::current_in_scheduler()); 334 Processor::set_current_in_scheduler(false); 335} 336 337void Scheduler::prepare_after_exec() 338{ 339 // This is called after exec() when doing a context "switch" into 340 // the new process. This is called from Processor::assume_context 341 VERIFY(g_scheduler_lock.is_locked_by_current_processor()); 342 343 VERIFY(!Processor::current_in_scheduler()); 344 Processor::set_current_in_scheduler(true); 345} 346 347void Scheduler::prepare_for_idle_loop() 348{ 349 // This is called when the CPU finished setting up the idle loop 350 // and is about to run it. We need to acquire the scheduler lock 351 VERIFY(!g_scheduler_lock.is_locked_by_current_processor()); 352 g_scheduler_lock.lock(); 353 354 VERIFY(!Processor::current_in_scheduler()); 355 Processor::set_current_in_scheduler(true); 356} 357 358Process* Scheduler::colonel() 359{ 360 VERIFY(s_colonel_process); 361 return s_colonel_process; 362} 363 364UNMAP_AFTER_INIT void Scheduler::initialize() 365{ 366 VERIFY(Processor::is_initialized()); // sanity check 367 VERIFY(TimeManagement::is_initialized()); 368 369 LockRefPtr<Thread> idle_thread; 370 g_finalizer_wait_queue = new WaitQueue; 371 372 g_finalizer_has_work.store(false, AK::MemoryOrder::memory_order_release); 373 s_colonel_process = Process::create_kernel_process(idle_thread, KString::must_create("colonel"sv), idle_loop, nullptr, 1, Process::RegisterProcess::No).leak_ref(); 374 VERIFY(s_colonel_process); 375 VERIFY(idle_thread); 376 idle_thread->set_priority(THREAD_PRIORITY_MIN); 377 idle_thread->set_name(KString::must_create("Idle Task #0"sv)); 378 379 set_idle_thread(idle_thread); 380} 381 382UNMAP_AFTER_INIT void Scheduler::set_idle_thread(Thread* idle_thread) 383{ 384 idle_thread->set_idle_thread(); 385 Processor::current().set_idle_thread(*idle_thread); 386 Processor::set_current_thread(*idle_thread); 387} 388 389UNMAP_AFTER_INIT Thread* Scheduler::create_ap_idle_thread(u32 cpu) 390{ 391 VERIFY(cpu != 0); 392 // This function is called on the bsp, but creates an idle thread for another AP 393 VERIFY(Processor::is_bootstrap_processor()); 394 395 VERIFY(s_colonel_process); 396 Thread* idle_thread = s_colonel_process->create_kernel_thread(idle_loop, nullptr, THREAD_PRIORITY_MIN, MUST(KString::formatted("idle thread #{}", cpu)), 1 << cpu, false); 397 VERIFY(idle_thread); 398 return idle_thread; 399} 400 401void Scheduler::add_time_scheduled(u64 time_to_add, bool is_kernel) 402{ 403 g_total_time_scheduled.with([&](auto& total_time_scheduled) { 404 total_time_scheduled.total += time_to_add; 405 if (is_kernel) 406 total_time_scheduled.total_kernel += time_to_add; 407 }); 408} 409 410void Scheduler::timer_tick(RegisterState const& regs) 411{ 412 VERIFY_INTERRUPTS_DISABLED(); 413 VERIFY(Processor::current_in_irq()); 414 415 auto* current_thread = Processor::current_thread(); 416 if (!current_thread) 417 return; 418 419 // Sanity checks 420 VERIFY(current_thread->current_trap()); 421 VERIFY(current_thread->current_trap()->regs == &regs); 422 423 if (current_thread->process().is_kernel_process()) { 424 // Because the previous mode when entering/exiting kernel threads never changes 425 // we never update the time scheduled. So we need to update it manually on the 426 // timer interrupt 427 current_thread->update_time_scheduled(TimeManagement::scheduler_current_time(), true, false); 428 } 429 430 if (current_thread->previous_mode() == ExecutionMode::User && current_thread->should_die() && !current_thread->is_blocked()) { 431 SpinlockLocker scheduler_lock(g_scheduler_lock); 432 dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: Terminating user mode thread {}", Processor::current_id(), *current_thread); 433 current_thread->set_state(Thread::State::Dying); 434 Processor::current().invoke_scheduler_async(); 435 return; 436 } 437 438 if (current_thread->tick()) 439 return; 440 441 if (!current_thread->is_idle_thread() && !peek_next_runnable_thread()) { 442 // If no other thread is ready to be scheduled we don't need to 443 // switch to the idle thread. Just give the current thread another 444 // time slice and let it run! 445 current_thread->set_ticks_left(time_slice_for(*current_thread)); 446 current_thread->did_schedule(); 447 dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: No other threads ready, give {} another timeslice", Processor::current_id(), *current_thread); 448 return; 449 } 450 451 VERIFY_INTERRUPTS_DISABLED(); 452 VERIFY(Processor::current_in_irq()); 453 Processor::current().invoke_scheduler_async(); 454} 455 456void Scheduler::invoke_async() 457{ 458 VERIFY_INTERRUPTS_DISABLED(); 459 VERIFY(!Processor::current_in_irq()); 460 461 // Since this function is called when leaving critical sections (such 462 // as a Spinlock), we need to check if we're not already doing this 463 // to prevent recursion 464 if (!Processor::current_in_scheduler()) 465 pick_next(); 466} 467 468void Scheduler::notify_finalizer() 469{ 470 if (!g_finalizer_has_work.exchange(true, AK::MemoryOrder::memory_order_acq_rel)) 471 g_finalizer_wait_queue->wake_all(); 472} 473 474void Scheduler::idle_loop(void*) 475{ 476 auto& proc = Processor::current(); 477 dbgln("Scheduler[{}]: idle loop running", proc.id()); 478 VERIFY(Processor::are_interrupts_enabled()); 479 480 for (;;) { 481 proc.idle_begin(); 482 proc.wait_for_interrupt(); 483 proc.idle_end(); 484 VERIFY_INTERRUPTS_ENABLED(); 485 yield(); 486 } 487} 488 489void Scheduler::dump_scheduler_state(bool with_stack_traces) 490{ 491 dump_thread_list(with_stack_traces); 492} 493 494bool Scheduler::is_initialized() 495{ 496 // The scheduler is initialized iff the idle thread exists 497 return Processor::idle_thread() != nullptr; 498} 499 500TotalTimeScheduled Scheduler::get_total_time_scheduled() 501{ 502 return g_total_time_scheduled.with([&](auto& total_time_scheduled) { return total_time_scheduled; }); 503} 504 505void dump_thread_list(bool with_stack_traces) 506{ 507 dbgln("Scheduler thread list for processor {}:", Processor::current_id()); 508 509 auto get_eip = [](Thread& thread) -> u32 { 510 if (!thread.current_trap()) 511 return thread.regs().ip(); 512 return thread.get_register_dump_from_stack().ip(); 513 }; 514 515 Thread::for_each([&](Thread& thread) { 516 auto color = thread.process().is_kernel_process() ? "\x1b[34;1m"sv : "\x1b[33;1m"sv; 517 switch (thread.state()) { 518 case Thread::State::Dying: 519 dmesgln(" {}{:30}\x1b[0m @ {:08x} is {:14} (Finalizable: {}, nsched: {})", 520 color, 521 thread, 522 get_eip(thread), 523 thread.state_string(), 524 thread.is_finalizable(), 525 thread.times_scheduled()); 526 break; 527 default: 528 dmesgln(" {}{:30}\x1b[0m @ {:08x} is {:14} (Pr:{:2}, nsched: {})", 529 color, 530 thread, 531 get_eip(thread), 532 thread.state_string(), 533 thread.priority(), 534 thread.times_scheduled()); 535 break; 536 } 537 if (thread.state() == Thread::State::Blocked && thread.blocking_mutex()) { 538 dmesgln(" Blocking on Mutex {:#x} ({})", thread.blocking_mutex(), thread.blocking_mutex()->name()); 539 } 540 if (thread.state() == Thread::State::Blocked && thread.blocker()) { 541 dmesgln(" Blocking on Blocker {:#x}", thread.blocker()); 542 } 543#if LOCK_DEBUG 544 thread.for_each_held_lock([](auto const& entry) { 545 dmesgln(" Holding lock {:#x} ({}) at {}", entry.lock, entry.lock->name(), entry.lock_location); 546 }); 547#endif 548 if (with_stack_traces) { 549 auto trace_or_error = thread.backtrace(); 550 if (!trace_or_error.is_error()) { 551 auto trace = trace_or_error.release_value(); 552 dbgln("Backtrace:"); 553 kernelputstr(trace->characters(), trace->length()); 554 } 555 } 556 return IterationDecision::Continue; 557 }); 558} 559 560}