x86: Simplify cpu_idle_wait

This patch also resolves hangs on boot:
http://lkml.org/lkml/2008/2/23/263
http://bugzilla.kernel.org/show_bug.cgi?id=10093

The bug was causing once-in-few-reboots 10-15 sec wait during boot on
certain laptops.

Earlier commit 40d6a146629b98d8e322b6f9332b182c7cbff3df added
smp_call_function in cpu_idle_wait() to kick cpus that are in tickless
idle. Looking at cpu_idle_wait code at that time, code seemed to be
over-engineered for a case which is rarely used (while changing idle
handler).

Below is a simplified version of cpu_idle_wait, which just makes a dummy
smp_call_function to all cpus, to make them come out of old idle handler
and start using the new idle handler. It eliminates code in the idle
loop to handle cpu_idle_wait.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by Venki Pallipadi and committed by Linus Torvalds 783e391b a1e58bbd

+22 -72
+11 -36
arch/x86/kernel/process_32.c
··· 82 82 */ 83 83 void (*pm_idle)(void); 84 84 EXPORT_SYMBOL(pm_idle); 85 - static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 86 85 87 86 void disable_hlt(void) 88 87 { ··· 189 190 while (!need_resched()) { 190 191 void (*idle)(void); 191 192 192 - if (__get_cpu_var(cpu_idle_state)) 193 - __get_cpu_var(cpu_idle_state) = 0; 194 - 195 193 check_pgt_cache(); 196 194 rmb(); 197 195 idle = pm_idle; ··· 216 220 { 217 221 } 218 222 223 + /* 224 + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of 225 + * pm_idle and update to new pm_idle value. Required while changing pm_idle 226 + * handler on SMP systems. 227 + * 228 + * Caller must have changed pm_idle to the new value before the call. Old 229 + * pm_idle value will not be used by any CPU after the return of this function. 230 + */ 219 231 void cpu_idle_wait(void) 220 232 { 221 - unsigned int cpu, this_cpu = get_cpu(); 222 - cpumask_t map, tmp = current->cpus_allowed; 223 - 224 - set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); 225 - put_cpu(); 226 - 227 - cpus_clear(map); 228 - for_each_online_cpu(cpu) { 229 - per_cpu(cpu_idle_state, cpu) = 1; 230 - cpu_set(cpu, map); 231 - } 232 - 233 - __get_cpu_var(cpu_idle_state) = 0; 234 - 235 - wmb(); 236 - do { 237 - ssleep(1); 238 - for_each_online_cpu(cpu) { 239 - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) 240 - cpu_clear(cpu, map); 241 - } 242 - cpus_and(map, map, cpu_online_map); 243 - /* 244 - * We waited 1 sec, if a CPU still did not call idle 245 - * it may be because it is in idle and not waking up 246 - * because it has nothing to do. 247 - * Give all the remaining CPUS a kick. 248 - */ 249 - smp_call_function_mask(map, do_nothing, NULL, 0); 250 - } while (!cpus_empty(map)); 251 - 252 - set_cpus_allowed(current, tmp); 233 + smp_mb(); 234 + /* kick all the CPUs so that they exit out of pm_idle */ 235 + smp_call_function(do_nothing, NULL, 0, 1); 253 236 } 254 237 EXPORT_SYMBOL_GPL(cpu_idle_wait); 255 238
+11 -36
arch/x86/kernel/process_64.c
··· 63 63 */ 64 64 void (*pm_idle)(void); 65 65 EXPORT_SYMBOL(pm_idle); 66 - static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 67 66 68 67 static ATOMIC_NOTIFIER_HEAD(idle_notifier); 69 68 ··· 172 173 while (!need_resched()) { 173 174 void (*idle)(void); 174 175 175 - if (__get_cpu_var(cpu_idle_state)) 176 - __get_cpu_var(cpu_idle_state) = 0; 177 - 178 176 rmb(); 179 177 idle = pm_idle; 180 178 if (!idle) ··· 203 207 { 204 208 } 205 209 210 + /* 211 + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of 212 + * pm_idle and update to new pm_idle value. Required while changing pm_idle 213 + * handler on SMP systems. 214 + * 215 + * Caller must have changed pm_idle to the new value before the call. Old 216 + * pm_idle value will not be used by any CPU after the return of this function. 217 + */ 206 218 void cpu_idle_wait(void) 207 219 { 208 - unsigned int cpu, this_cpu = get_cpu(); 209 - cpumask_t map, tmp = current->cpus_allowed; 210 - 211 - set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); 212 - put_cpu(); 213 - 214 - cpus_clear(map); 215 - for_each_online_cpu(cpu) { 216 - per_cpu(cpu_idle_state, cpu) = 1; 217 - cpu_set(cpu, map); 218 - } 219 - 220 - __get_cpu_var(cpu_idle_state) = 0; 221 - 222 - wmb(); 223 - do { 224 - ssleep(1); 225 - for_each_online_cpu(cpu) { 226 - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) 227 - cpu_clear(cpu, map); 228 - } 229 - cpus_and(map, map, cpu_online_map); 230 - /* 231 - * We waited 1 sec, if a CPU still did not call idle 232 - * it may be because it is in idle and not waking up 233 - * because it has nothing to do. 234 - * Give all the remaining CPUS a kick. 235 - */ 236 - smp_call_function_mask(map, do_nothing, 0, 0); 237 - } while (!cpus_empty(map)); 238 - 239 - set_cpus_allowed(current, tmp); 220 + smp_mb(); 221 + /* kick all the CPUs so that they exit out of pm_idle */ 222 + smp_call_function(do_nothing, NULL, 0, 1); 240 223 } 241 224 EXPORT_SYMBOL_GPL(cpu_idle_wait); 242 225