Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nohz: Allow rcu extended quiescent state handling seperately from tick stop

It is assumed that rcu won't be used once we switch to tickless
mode and until we restart the tick. However this is not always
true, as in x86-64 where we dereference the idle notifiers after
the tick is stopped.

To prepare for fixing this, add two new APIs:
tick_nohz_idle_enter_norcu() and tick_nohz_idle_exit_norcu().

If no use of RCU is made in the idle loop between
tick_nohz_enter_idle() and tick_nohz_exit_idle() calls, the arch
must instead call the new *_norcu() version such that the arch doesn't
need to call rcu_idle_enter() and rcu_idle_exit().

Otherwise the arch must call tick_nohz_enter_idle() and
tick_nohz_exit_idle() and also call explicitly:

- rcu_idle_enter() after its last use of RCU before the CPU is put
to sleep.
- rcu_idle_exit() before the first use of RCU after the CPU is woken
up.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: David Miller <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

authored by

Frederic Weisbecker and committed by
Paul E. McKenney
2bbb6817 280f0677

+90 -49
+2 -2
arch/arm/kernel/process.c
··· 183 183 184 184 /* endless idle loop with no priority at all */ 185 185 while (1) { 186 - tick_nohz_idle_enter(); 186 + tick_nohz_idle_enter_norcu(); 187 187 leds_event(led_idle_start); 188 188 while (!need_resched()) { 189 189 #ifdef CONFIG_HOTPLUG_CPU ··· 213 213 } 214 214 } 215 215 leds_event(led_idle_end); 216 - tick_nohz_idle_exit(); 216 + tick_nohz_idle_exit_norcu(); 217 217 preempt_enable_no_resched(); 218 218 schedule(); 219 219 preempt_disable();
+2 -2
arch/avr32/kernel/process.c
··· 34 34 { 35 35 /* endless idle loop with no priority at all */ 36 36 while (1) { 37 - tick_nohz_idle_enter(); 37 + tick_nohz_idle_enter_norcu(); 38 38 while (!need_resched()) 39 39 cpu_idle_sleep(); 40 - tick_nohz_idle_exit(); 40 + tick_nohz_idle_exit_norcu(); 41 41 preempt_enable_no_resched(); 42 42 schedule(); 43 43 preempt_disable();
+2 -2
arch/blackfin/kernel/process.c
··· 88 88 #endif 89 89 if (!idle) 90 90 idle = default_idle; 91 - tick_nohz_idle_enter(); 91 + tick_nohz_idle_enter_norcu(); 92 92 while (!need_resched()) 93 93 idle(); 94 - tick_nohz_idle_exit(); 94 + tick_nohz_idle_exit_norcu(); 95 95 preempt_enable_no_resched(); 96 96 schedule(); 97 97 preempt_disable();
+2 -2
arch/microblaze/kernel/process.c
··· 103 103 if (!idle) 104 104 idle = default_idle; 105 105 106 - tick_nohz_idle_enter(); 106 + tick_nohz_idle_enter_norcu(); 107 107 while (!need_resched()) 108 108 idle(); 109 - tick_nohz_idle_exit(); 109 + tick_nohz_idle_exit_norcu(); 110 110 111 111 preempt_enable_no_resched(); 112 112 schedule();
+2 -2
arch/mips/kernel/process.c
··· 56 56 57 57 /* endless idle loop with no priority at all */ 58 58 while (1) { 59 - tick_nohz_idle_enter(); 59 + tick_nohz_idle_enter_norcu(); 60 60 while (!need_resched() && cpu_online(cpu)) { 61 61 #ifdef CONFIG_MIPS_MT_SMTC 62 62 extern void smtc_idle_loop_hook(void); ··· 77 77 system_state == SYSTEM_BOOTING)) 78 78 play_dead(); 79 79 #endif 80 - tick_nohz_idle_exit(); 80 + tick_nohz_idle_exit_norcu(); 81 81 preempt_enable_no_resched(); 82 82 schedule(); 83 83 preempt_disable();
+2 -2
arch/openrisc/kernel/idle.c
··· 51 51 52 52 /* endless idle loop with no priority at all */ 53 53 while (1) { 54 - tick_nohz_idle_enter(); 54 + tick_nohz_idle_enter_norcu(); 55 55 56 56 while (!need_resched()) { 57 57 check_pgt_cache(); ··· 69 69 set_thread_flag(TIF_POLLING_NRFLAG); 70 70 } 71 71 72 - tick_nohz_idle_exit(); 72 + tick_nohz_idle_exit_norcu(); 73 73 preempt_enable_no_resched(); 74 74 schedule(); 75 75 preempt_disable();
+2 -2
arch/powerpc/kernel/idle.c
··· 56 56 57 57 set_thread_flag(TIF_POLLING_NRFLAG); 58 58 while (1) { 59 - tick_nohz_idle_enter(); 59 + tick_nohz_idle_enter_norcu(); 60 60 while (!need_resched() && !cpu_should_die()) { 61 61 ppc64_runlatch_off(); 62 62 ··· 93 93 94 94 HMT_medium(); 95 95 ppc64_runlatch_on(); 96 - tick_nohz_idle_exit(); 96 + tick_nohz_idle_exit_norcu(); 97 97 preempt_enable_no_resched(); 98 98 if (cpu_should_die()) 99 99 cpu_die();
+4 -4
arch/powerpc/platforms/iseries/setup.c
··· 563 563 static void iseries_shared_idle(void) 564 564 { 565 565 while (1) { 566 - tick_nohz_idle_enter(); 566 + tick_nohz_idle_enter_norcu(); 567 567 while (!need_resched() && !hvlpevent_is_pending()) { 568 568 local_irq_disable(); 569 569 ppc64_runlatch_off(); ··· 577 577 } 578 578 579 579 ppc64_runlatch_on(); 580 - tick_nohz_idle_exit(); 580 + tick_nohz_idle_exit_norcu(); 581 581 582 582 if (hvlpevent_is_pending()) 583 583 process_iSeries_events(); ··· 593 593 set_thread_flag(TIF_POLLING_NRFLAG); 594 594 595 595 while (1) { 596 - tick_nohz_idle_enter(); 596 + tick_nohz_idle_enter_norcu(); 597 597 if (!need_resched()) { 598 598 while (!need_resched()) { 599 599 ppc64_runlatch_off(); ··· 610 610 } 611 611 612 612 ppc64_runlatch_on(); 613 - tick_nohz_idle_exit(); 613 + tick_nohz_idle_exit_norcu(); 614 614 preempt_enable_no_resched(); 615 615 schedule(); 616 616 preempt_disable();
+2 -2
arch/s390/kernel/process.c
··· 91 91 void cpu_idle(void) 92 92 { 93 93 for (;;) { 94 - tick_nohz_idle_enter(); 94 + tick_nohz_idle_enter_norcu(); 95 95 while (!need_resched()) 96 96 default_idle(); 97 - tick_nohz_idle_exit(); 97 + tick_nohz_idle_exit_norcu(); 98 98 preempt_enable_no_resched(); 99 99 schedule(); 100 100 preempt_disable();
+2 -2
arch/sh/kernel/idle.c
··· 89 89 90 90 /* endless idle loop with no priority at all */ 91 91 while (1) { 92 - tick_nohz_idle_enter(); 92 + tick_nohz_idle_enter_norcu(); 93 93 94 94 while (!need_resched()) { 95 95 check_pgt_cache(); ··· 111 111 start_critical_timings(); 112 112 } 113 113 114 - tick_nohz_idle_exit(); 114 + tick_nohz_idle_exit_norcu(); 115 115 preempt_enable_no_resched(); 116 116 schedule(); 117 117 preempt_disable();
+2 -2
arch/sparc/kernel/process_64.c
··· 95 95 set_thread_flag(TIF_POLLING_NRFLAG); 96 96 97 97 while(1) { 98 - tick_nohz_idle_enter(); 98 + tick_nohz_idle_enter_norcu(); 99 99 100 100 while (!need_resched() && !cpu_is_offline(cpu)) 101 101 sparc64_yield(cpu); 102 102 103 - tick_nohz_idle_exit(); 103 + tick_nohz_idle_exit_norcu(); 104 104 105 105 preempt_enable_no_resched(); 106 106
+2 -2
arch/tile/kernel/process.c
··· 85 85 86 86 /* endless idle loop with no priority at all */ 87 87 while (1) { 88 - tick_nohz_idle_enter(); 88 + tick_nohz_idle_enter_norcu(); 89 89 while (!need_resched()) { 90 90 if (cpu_is_offline(cpu)) 91 91 BUG(); /* no HOTPLUG_CPU */ ··· 105 105 local_irq_enable(); 106 106 current_thread_info()->status |= TS_POLLING; 107 107 } 108 - tick_nohz_idle_exit(); 108 + tick_nohz_idle_exit_norcu(); 109 109 preempt_enable_no_resched(); 110 110 schedule(); 111 111 preempt_disable();
+2 -2
arch/um/kernel/process.c
··· 246 246 if (need_resched()) 247 247 schedule(); 248 248 249 - tick_nohz_idle_enter(); 249 + tick_nohz_idle_enter_norcu(); 250 250 nsecs = disable_timer(); 251 251 idle_sleep(nsecs); 252 - tick_nohz_idle_exit(); 252 + tick_nohz_idle_exit_norcu(); 253 253 } 254 254 } 255 255
+2 -2
arch/unicore32/kernel/process.c
··· 55 55 { 56 56 /* endless idle loop with no priority at all */ 57 57 while (1) { 58 - tick_nohz_idle_enter(); 58 + tick_nohz_idle_enter_norcu(); 59 59 while (!need_resched()) { 60 60 local_irq_disable(); 61 61 stop_critical_timings(); ··· 63 63 local_irq_enable(); 64 64 start_critical_timings(); 65 65 } 66 - tick_nohz_idle_exit(); 66 + tick_nohz_idle_exit_norcu(); 67 67 preempt_enable_no_resched(); 68 68 schedule(); 69 69 preempt_disable();
+2 -2
arch/x86/kernel/process_32.c
··· 99 99 100 100 /* endless idle loop with no priority at all */ 101 101 while (1) { 102 - tick_nohz_idle_enter(); 102 + tick_nohz_idle_enter_norcu(); 103 103 while (!need_resched()) { 104 104 105 105 check_pgt_cache(); ··· 116 116 pm_idle(); 117 117 start_critical_timings(); 118 118 } 119 - tick_nohz_idle_exit(); 119 + tick_nohz_idle_exit_norcu(); 120 120 preempt_enable_no_resched(); 121 121 schedule(); 122 122 preempt_disable();
+2 -2
arch/x86/kernel/process_64.c
··· 122 122 123 123 /* endless idle loop with no priority at all */ 124 124 while (1) { 125 - tick_nohz_idle_enter(); 125 + tick_nohz_idle_enter_norcu(); 126 126 while (!need_resched()) { 127 127 128 128 rmb(); ··· 149 149 __exit_idle(); 150 150 } 151 151 152 - tick_nohz_idle_exit(); 152 + tick_nohz_idle_exit_norcu(); 153 153 preempt_enable_no_resched(); 154 154 schedule(); 155 155 preempt_disable();
+43 -3
include/linux/tick.h
··· 7 7 #define _LINUX_TICK_H 8 8 9 9 #include <linux/clockchips.h> 10 + #include <linux/irqflags.h> 10 11 11 12 #ifdef CONFIG_GENERIC_CLOCKEVENTS 12 13 ··· 122 121 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ 123 122 124 123 # ifdef CONFIG_NO_HZ 125 - extern void tick_nohz_idle_enter(void); 124 + extern void __tick_nohz_idle_enter(void); 125 + static inline void tick_nohz_idle_enter(void) 126 + { 127 + local_irq_disable(); 128 + __tick_nohz_idle_enter(); 129 + local_irq_enable(); 130 + } 126 131 extern void tick_nohz_idle_exit(void); 132 + 133 + /* 134 + * Call this pair of function if the arch doesn't make any use 135 + * of RCU in-between. You won't need to call rcu_idle_enter() and 136 + * rcu_idle_exit(). 137 + * Otherwise you need to call tick_nohz_idle_enter() and tick_nohz_idle_exit() 138 + * and explicitly tell RCU about the window around the place the CPU enters low 139 + * power mode where no RCU use is made. This is done by calling rcu_idle_enter() 140 + * after the last use of RCU before the CPU is put to sleep and by calling 141 + * rcu_idle_exit() before the first use of RCU after the CPU woke up. 142 + */ 143 + static inline void tick_nohz_idle_enter_norcu(void) 144 + { 145 + /* 146 + * Also call rcu_idle_enter() in the irq disabled section even 147 + * if it disables irq itself. 148 + * Just an optimization that prevents from an interrupt happening 149 + * between it and __tick_nohz_idle_enter() to lose time to help 150 + * completing a grace period while we could be in extended grace 151 + * period already. 152 + */ 153 + local_irq_disable(); 154 + __tick_nohz_idle_enter(); 155 + rcu_idle_enter(); 156 + local_irq_enable(); 157 + } 158 + static inline void tick_nohz_idle_exit_norcu(void) 159 + { 160 + rcu_idle_exit(); 161 + tick_nohz_idle_exit(); 162 + } 127 163 extern void tick_nohz_irq_exit(void); 128 164 extern ktime_t tick_nohz_get_sleep_length(void); 129 165 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); 130 166 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); 131 167 # else 132 - static inline void tick_nohz_idle_enter(void) 168 + static inline void tick_nohz_idle_enter(void) { } 169 + static inline void tick_nohz_idle_exit(void) { } 170 + static inline void tick_nohz_idle_enter_norcu(void) 133 171 { 134 172 rcu_idle_enter(); 135 173 } 136 - static inline void tick_nohz_idle_exit(void) 174 + static inline void tick_nohz_idle_exit_norcu(void) 137 175 { 138 176 rcu_idle_exit(); 139 177 }
+13 -12
kernel/time/tick-sched.c
··· 453 453 * 454 454 * When the next event is more than a tick into the future, stop the idle tick 455 455 * Called when we start the idle loop. 456 - * This also enters into RCU extended quiescent state so that this CPU doesn't 457 - * need anymore to be part of any global grace period completion. This way 458 - * the tick can be stopped safely as we don't need to report quiescent states. 456 + * 457 + * If no use of RCU is made in the idle loop between 458 + * tick_nohz_idle_enter() and tick_nohz_idle_exit() calls, then 459 + * tick_nohz_idle_enter_norcu() should be called instead and the arch 460 + * doesn't need to call rcu_idle_enter() and rcu_idle_exit() explicitly. 461 + * 462 + * Otherwise the arch is responsible of calling: 463 + * 464 + * - rcu_idle_enter() after its last use of RCU before the CPU is put 465 + * to sleep. 466 + * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. 459 467 */ 460 - void tick_nohz_idle_enter(void) 468 + void __tick_nohz_idle_enter(void) 461 469 { 462 470 struct tick_sched *ts; 463 - 464 - WARN_ON_ONCE(irqs_disabled()); 465 - 466 - local_irq_disable(); 467 471 468 472 ts = &__get_cpu_var(tick_cpu_sched); 469 473 /* ··· 477 473 */ 478 474 ts->inidle = 1; 479 475 tick_nohz_stop_sched_tick(ts); 480 - rcu_idle_enter(); 481 - 482 - local_irq_enable(); 483 476 } 484 477 485 478 /** ··· 552 551 ktime_t now; 553 552 554 553 local_irq_disable(); 555 - rcu_idle_exit(); 554 + 556 555 if (ts->idle_active || (ts->inidle && ts->tick_stopped)) 557 556 now = ktime_get(); 558 557