Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched, idle: Fix the idle polling state logic

Mike reported that commit 7d1a9417 ("x86: Use generic idle loop")
regressed several workloads and caused excessive reschedule
interrupts.

The patch in question failed to notice that the x86 code had an
inverted sense of the polling state versus the new generic code (x86:
default polling, generic: default !polling).

Fix the two prominent x86 mwait based idle drivers and introduce a few
new generic polling helpers (fixing the wrong smp_mb__after_clear_bit
usage).

Also switch the idle routines to using tif_need_resched() which is an
immediate TIF_NEED_RESCHED test as opposed to need_resched which will
end up being slightly different.

Reported-by: Mike Galbraith <bitbucket@online.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: lenb@kernel.org
Cc: tglx@linutronix.de
Link: http://lkml.kernel.org/n/tip-nc03imb0etuefmzybzj7sprf@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Peter Zijlstra and committed by
Ingo Molnar
ea811747 31503986

+91 -52
+3 -3
arch/x86/kernel/process.c
··· 391 391 * The switch back from broadcast mode needs to be 392 392 * called with interrupts disabled. 393 393 */ 394 - local_irq_disable(); 395 - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); 396 - local_irq_enable(); 394 + local_irq_disable(); 395 + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); 396 + local_irq_enable(); 397 397 } else 398 398 default_idle(); 399 399 }
+10 -36
drivers/acpi/processor_idle.c
··· 119 119 */ 120 120 static void acpi_safe_halt(void) 121 121 { 122 - current_thread_info()->status &= ~TS_POLLING; 123 - /* 124 - * TS_POLLING-cleared state must be visible before we 125 - * test NEED_RESCHED: 126 - */ 127 - smp_mb(); 128 - if (!need_resched()) { 122 + if (!tif_need_resched()) { 129 123 safe_halt(); 130 124 local_irq_disable(); 131 125 } 132 - current_thread_info()->status |= TS_POLLING; 133 126 } 134 127 135 128 #ifdef ARCH_APICTIMER_STOPS_ON_C3 ··· 730 737 if (unlikely(!pr)) 731 738 return -EINVAL; 732 739 740 + if (cx->entry_method == ACPI_CSTATE_FFH) { 741 + if (current_set_polling_and_test()) 742 + return -EINVAL; 743 + } 744 + 733 745 lapic_timer_state_broadcast(pr, cx, 1); 734 746 acpi_idle_do_entry(cx); 735 747 ··· 788 790 if (unlikely(!pr)) 789 791 return -EINVAL; 790 792 791 - if (cx->entry_method != ACPI_CSTATE_FFH) { 792 - current_thread_info()->status &= ~TS_POLLING; 793 - /* 794 - * TS_POLLING-cleared state must be visible before we test 795 - * NEED_RESCHED: 796 - */ 797 - smp_mb(); 798 - 799 - if (unlikely(need_resched())) { 800 - current_thread_info()->status |= TS_POLLING; 793 + if (cx->entry_method == ACPI_CSTATE_FFH) { 794 + if (current_set_polling_and_test()) 801 795 return -EINVAL; 802 - } 803 796 } 804 797 805 798 /* ··· 807 818 acpi_idle_do_entry(cx); 808 819 809 820 sched_clock_idle_wakeup_event(0); 810 - 811 - if (cx->entry_method != ACPI_CSTATE_FFH) 812 - current_thread_info()->status |= TS_POLLING; 813 821 814 822 lapic_timer_state_broadcast(pr, cx, 0); 815 823 return index; ··· 844 858 } 845 859 } 846 860 847 - if (cx->entry_method != ACPI_CSTATE_FFH) { 848 - current_thread_info()->status &= ~TS_POLLING; 849 - /* 850 - * TS_POLLING-cleared state must be visible before we test 851 - * NEED_RESCHED: 852 - */ 853 - smp_mb(); 854 - 855 - if (unlikely(need_resched())) { 856 - current_thread_info()->status |= TS_POLLING; 861 + if (cx->entry_method == ACPI_CSTATE_FFH) { 862 + if (current_set_polling_and_test()) 857 863 return -EINVAL; 858 - } 859 864 } 860 865 861 866 acpi_unlazy_tlb(smp_processor_id()); ··· 891 914 } 892 915 893 916 sched_clock_idle_wakeup_event(0); 894 - 895 - if (cx->entry_method != ACPI_CSTATE_FFH) 896 - current_thread_info()->status |= TS_POLLING; 897 917 898 918 lapic_timer_state_broadcast(pr, cx, 0); 899 919 return index;
+1 -1
drivers/idle/intel_idle.c
··· 359 359 if (!(lapic_timer_reliable_states & (1 << (cstate)))) 360 360 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); 361 361 362 - if (!need_resched()) { 362 + if (!current_set_polling_and_test()) { 363 363 364 364 __monitor((void *)&current_thread_info()->flags, 0, 0); 365 365 smp_mb();
+71 -7
include/linux/sched.h
··· 2479 2479 { 2480 2480 return task_thread_info(p)->status & TS_POLLING; 2481 2481 } 2482 - static inline void current_set_polling(void) 2482 + static inline void __current_set_polling(void) 2483 2483 { 2484 2484 current_thread_info()->status |= TS_POLLING; 2485 2485 } 2486 2486 2487 - static inline void current_clr_polling(void) 2487 + static inline bool __must_check current_set_polling_and_test(void) 2488 + { 2489 + __current_set_polling(); 2490 + 2491 + /* 2492 + * Polling state must be visible before we test NEED_RESCHED, 2493 + * paired by resched_task() 2494 + */ 2495 + smp_mb(); 2496 + 2497 + return unlikely(tif_need_resched()); 2498 + } 2499 + 2500 + static inline void __current_clr_polling(void) 2488 2501 { 2489 2502 current_thread_info()->status &= ~TS_POLLING; 2490 - smp_mb__after_clear_bit(); 2503 + } 2504 + 2505 + static inline bool __must_check current_clr_polling_and_test(void) 2506 + { 2507 + __current_clr_polling(); 2508 + 2509 + /* 2510 + * Polling state must be visible before we test NEED_RESCHED, 2511 + * paired by resched_task() 2512 + */ 2513 + smp_mb(); 2514 + 2515 + return unlikely(tif_need_resched()); 2491 2516 } 2492 2517 #elif defined(TIF_POLLING_NRFLAG) 2493 2518 static inline int tsk_is_polling(struct task_struct *p) 2494 2519 { 2495 2520 return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); 2496 2521 } 2497 - static inline void current_set_polling(void) 2522 + 2523 + static inline void __current_set_polling(void) 2498 2524 { 2499 2525 set_thread_flag(TIF_POLLING_NRFLAG); 2500 2526 } 2501 2527 2502 - static inline void current_clr_polling(void) 2528 + static inline bool __must_check current_set_polling_and_test(void) 2529 + { 2530 + __current_set_polling(); 2531 + 2532 + /* 2533 + * Polling state must be visible before we test NEED_RESCHED, 2534 + * paired by resched_task() 2535 + * 2536 + * XXX: assumes set/clear bit are identical barrier wise. 2537 + */ 2538 + smp_mb__after_clear_bit(); 2539 + 2540 + return unlikely(tif_need_resched()); 2541 + } 2542 + 2543 + static inline void __current_clr_polling(void) 2503 2544 { 2504 2545 clear_thread_flag(TIF_POLLING_NRFLAG); 2505 2546 } 2547 + 2548 + static inline bool __must_check current_clr_polling_and_test(void) 2549 + { 2550 + __current_clr_polling(); 2551 + 2552 + /* 2553 + * Polling state must be visible before we test NEED_RESCHED, 2554 + * paired by resched_task() 2555 + */ 2556 + smp_mb__after_clear_bit(); 2557 + 2558 + return unlikely(tif_need_resched()); 2559 + } 2560 + 2506 2561 #else 2507 2562 static inline int tsk_is_polling(struct task_struct *p) { return 0; } 2508 - static inline void current_set_polling(void) { } 2509 - static inline void current_clr_polling(void) { } 2563 + static inline void __current_set_polling(void) { } 2564 + static inline void __current_clr_polling(void) { } 2565 + 2566 + static inline bool __must_check current_set_polling_and_test(void) 2567 + { 2568 + return unlikely(tif_need_resched()); 2569 + } 2570 + static inline bool __must_check current_clr_polling_and_test(void) 2571 + { 2572 + return unlikely(tif_need_resched()); 2573 + } 2510 2574 #endif 2511 2575 2512 2576 /*
+2
include/linux/thread_info.h
··· 118 118 */ 119 119 } 120 120 121 + #define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) 122 + 121 123 #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK 122 124 /* 123 125 * An arch can define its own version of set_restore_sigmask() to get the
+4 -5
kernel/cpu/idle.c
··· 44 44 rcu_idle_enter(); 45 45 trace_cpu_idle_rcuidle(0, smp_processor_id()); 46 46 local_irq_enable(); 47 - while (!need_resched()) 47 + while (!tif_need_resched()) 48 48 cpu_relax(); 49 49 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); 50 50 rcu_idle_exit(); ··· 92 92 if (cpu_idle_force_poll || tick_check_broadcast_expired()) { 93 93 cpu_idle_poll(); 94 94 } else { 95 - current_clr_polling(); 96 - if (!need_resched()) { 95 + if (!current_clr_polling_and_test()) { 97 96 stop_critical_timings(); 98 97 rcu_idle_enter(); 99 98 arch_cpu_idle(); ··· 102 103 } else { 103 104 local_irq_enable(); 104 105 } 105 - current_set_polling(); 106 + __current_set_polling(); 106 107 } 107 108 arch_cpu_idle_exit(); 108 109 } ··· 128 129 */ 129 130 boot_init_stack_canary(); 130 131 #endif 131 - current_set_polling(); 132 + __current_set_polling(); 132 133 arch_cpu_idle_prepare(); 133 134 cpu_idle_loop(); 134 135 }