[PATCH] clocksource init adjustments (fix bug #7426)

This patch resolves the issue found here:
http://bugme.osdl.org/show_bug.cgi?id=7426

The basic summary is:
Currently we register most of i386/x86_64 clocksources at module_init
time. Then we enable clocksource selection at late_initcall time. This
causes some problems for drivers that use gettimeofday for init
calibration routines (specifically the es1968 driver in this case),
where durring module_init, the only clocksource available is the low-res
jiffies clocksource. This may cause slight calibration errors, due to
the small sampling time used.

It should be noted that drivers that require fine grained time may not
function on architectures that do not have better then jiffies
resolution timekeeping (there are a few). However, this does not
discount the reasonable need for such fine-grained timekeeping at init
time.

Thus the solution here is to register clocksources earlier (ideally when
the hardware is being initialized), and then we enable clocksource
selection at fs_initcall (before device_initcall).

This patch should probably get some testing time in -mm, since
clocksource selection is one of the most important issues for correct
timekeeping, and I've only been able to test this on a few of my own
boxes.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by john stultz and committed by Linus Torvalds 6bb74df4 45407680

+135 -166
+37 -43
arch/i386/kernel/hpet.c
··· 201 } 202 203 /* 204 * Try to setup the HPET timer 205 */ 206 int __init hpet_enable(void) 207 { 208 unsigned long id; 209 uint64_t hpet_freq; 210 211 if (!is_hpet_capable()) 212 return 0; ··· 271 /* Start the counter */ 272 hpet_start_counter(); 273 274 if (id & HPET_ID_LEGSUP) { 275 hpet_enable_int(); 276 hpet_reserve_platform_timers(id); ··· 310 return 0; 311 } 312 313 - /* 314 - * Clock source related code 315 - */ 316 - static cycle_t read_hpet(void) 317 - { 318 - return (cycle_t)hpet_readl(HPET_COUNTER); 319 - } 320 - 321 - static struct clocksource clocksource_hpet = { 322 - .name = "hpet", 323 - .rating = 250, 324 - .read = read_hpet, 325 - .mask = HPET_MASK, 326 - .shift = HPET_SHIFT, 327 - .flags = CLOCK_SOURCE_IS_CONTINUOUS, 328 - }; 329 - 330 - static int __init init_hpet_clocksource(void) 331 - { 332 - u64 tmp; 333 - 334 - if (!hpet_virt_address) 335 - return -ENODEV; 336 - 337 - /* 338 - * hpet period is in femto seconds per cycle 339 - * so we need to convert this to ns/cyc units 340 - * aproximated by mult/2^shift 341 - * 342 - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift 343 - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult 344 - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult 345 - * (fsec/cyc << shift)/1000000 = mult 346 - * (hpet_period << shift)/FSEC_PER_NSEC = mult 347 - */ 348 - tmp = (u64)hpet_period << HPET_SHIFT; 349 - do_div(tmp, FSEC_PER_NSEC); 350 - clocksource_hpet.mult = (u32)tmp; 351 - 352 - return clocksource_register(&clocksource_hpet); 353 - } 354 - 355 - module_init(init_hpet_clocksource); 356 357 #ifdef CONFIG_HPET_EMULATE_RTC 358
··· 201 } 202 203 /* 204 + * Clock source related code 205 + */ 206 + static cycle_t read_hpet(void) 207 + { 208 + return (cycle_t)hpet_readl(HPET_COUNTER); 209 + } 210 + 211 + static struct clocksource clocksource_hpet = { 212 + .name = "hpet", 213 + .rating = 250, 214 + .read = read_hpet, 215 + .mask = HPET_MASK, 216 + .shift = HPET_SHIFT, 217 + .flags = CLOCK_SOURCE_IS_CONTINUOUS, 218 + }; 219 + 220 + /* 221 * Try to setup the HPET timer 222 */ 223 int __init hpet_enable(void) 224 { 225 unsigned long id; 226 uint64_t hpet_freq; 227 + u64 tmp; 228 229 if (!is_hpet_capable()) 230 return 0; ··· 253 /* Start the counter */ 254 hpet_start_counter(); 255 256 + /* Initialize and register HPET clocksource 257 + * 258 + * hpet period is in femto seconds per cycle 259 + * so we need to convert this to ns/cyc units 260 + * aproximated by mult/2^shift 261 + * 262 + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift 263 + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult 264 + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult 265 + * (fsec/cyc << shift)/1000000 = mult 266 + * (hpet_period << shift)/FSEC_PER_NSEC = mult 267 + */ 268 + tmp = (u64)hpet_period << HPET_SHIFT; 269 + do_div(tmp, FSEC_PER_NSEC); 270 + clocksource_hpet.mult = (u32)tmp; 271 + 272 + clocksource_register(&clocksource_hpet); 273 + 274 + 275 if (id & HPET_ID_LEGSUP) { 276 hpet_enable_int(); 277 hpet_reserve_platform_timers(id); ··· 273 return 0; 274 } 275 276 277 #ifdef CONFIG_HPET_EMULATE_RTC 278
+1 -1
arch/i386/kernel/i8253.c
··· 195 clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); 196 return clocksource_register(&clocksource_pit); 197 } 198 - module_init(init_pit_clocksource);
··· 195 clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); 196 return clocksource_register(&clocksource_pit); 197 } 198 + arch_initcall(init_pit_clocksource);
-1
arch/i386/kernel/setup.c
··· 657 conswitchp = &dummy_con; 658 #endif 659 #endif 660 - tsc_init(); 661 }
··· 657 conswitchp = &dummy_con; 658 #endif 659 #endif 660 }
+1
arch/i386/kernel/time.c
··· 279 */ 280 void __init time_init(void) 281 { 282 late_time_init = choose_time_init(); 283 }
··· 279 */ 280 void __init time_init(void) 281 { 282 + tsc_init(); 283 late_time_init = choose_time_init(); 284 }
+37 -46
arch/i386/kernel/tsc.c
··· 184 185 EXPORT_SYMBOL(recalibrate_cpu_khz); 186 187 - void __init tsc_init(void) 188 - { 189 - if (!cpu_has_tsc || tsc_disable) 190 - goto out_no_tsc; 191 - 192 - cpu_khz = calculate_cpu_khz(); 193 - tsc_khz = cpu_khz; 194 - 195 - if (!cpu_khz) 196 - goto out_no_tsc; 197 - 198 - printk("Detected %lu.%03lu MHz processor.\n", 199 - (unsigned long)cpu_khz / 1000, 200 - (unsigned long)cpu_khz % 1000); 201 - 202 - set_cyc2ns_scale(cpu_khz); 203 - use_tsc_delay(); 204 - return; 205 - 206 - out_no_tsc: 207 - /* 208 - * Set the tsc_disable flag if there's no TSC support, this 209 - * makes it a fast flag for the kernel to see whether it 210 - * should be using the TSC. 211 - */ 212 - tsc_disable = 1; 213 - } 214 - 215 #ifdef CONFIG_CPU_FREQ 216 217 /* ··· 353 static inline void check_geode_tsc_reliable(void) { } 354 #endif 355 356 - static int __init init_tsc_clocksource(void) 357 { 358 359 - if (cpu_has_tsc && tsc_khz && !tsc_disable) { 360 - /* check blacklist */ 361 - dmi_check_system(bad_tsc_dmi_table); 362 363 - unsynchronized_tsc(); 364 - check_geode_tsc_reliable(); 365 - current_tsc_khz = tsc_khz; 366 - clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, 367 clocksource_tsc.shift); 368 - /* lower the rating if we already know its unstable: */ 369 - if (check_tsc_unstable()) { 370 - clocksource_tsc.rating = 0; 371 - clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 372 - } 373 - 374 - return clocksource_register(&clocksource_tsc); 375 } 376 377 - return 0; 378 } 379 - 380 - module_init(init_tsc_clocksource);
··· 184 185 EXPORT_SYMBOL(recalibrate_cpu_khz); 186 187 #ifdef CONFIG_CPU_FREQ 188 189 /* ··· 381 static inline void check_geode_tsc_reliable(void) { } 382 #endif 383 384 + 385 + void __init tsc_init(void) 386 { 387 + if (!cpu_has_tsc || tsc_disable) 388 + goto out_no_tsc; 389 390 + cpu_khz = calculate_cpu_khz(); 391 + tsc_khz = cpu_khz; 392 393 + if (!cpu_khz) 394 + goto out_no_tsc; 395 + 396 + printk("Detected %lu.%03lu MHz processor.\n", 397 + (unsigned long)cpu_khz / 1000, 398 + (unsigned long)cpu_khz % 1000); 399 + 400 + set_cyc2ns_scale(cpu_khz); 401 + use_tsc_delay(); 402 + 403 + /* Check and install the TSC clocksource */ 404 + dmi_check_system(bad_tsc_dmi_table); 405 + 406 + unsynchronized_tsc(); 407 + check_geode_tsc_reliable(); 408 + current_tsc_khz = tsc_khz; 409 + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, 410 clocksource_tsc.shift); 411 + /* lower the rating if we already know its unstable: */ 412 + if (check_tsc_unstable()) { 413 + clocksource_tsc.rating = 0; 414 + clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 415 } 416 + clocksource_register(&clocksource_tsc); 417 418 + return; 419 + 420 + out_no_tsc: 421 + /* 422 + * Set the tsc_disable flag if there's no TSC support, this 423 + * makes it a fast flag for the kernel to see whether it 424 + * should be using the TSC. 425 + */ 426 + tsc_disable = 1; 427 }
+44 -65
arch/x86_64/kernel/hpet.c
··· 12 #include <asm/timex.h> 13 #include <asm/hpet.h> 14 15 int nohpet __initdata; 16 17 unsigned long hpet_address; ··· 112 return 0; 113 } 114 115 int hpet_arch_init(void) 116 { 117 unsigned int id; 118 119 if (!hpet_address) 120 return -1; ··· 159 hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period; 160 161 hpet_use_timer = (id & HPET_ID_LEGSUP); 162 163 return hpet_timer_stop_set_go(hpet_tick); 164 } ··· 488 } 489 490 __setup("nohpet", nohpet_setup); 491 - 492 - #define HPET_MASK 0xFFFFFFFF 493 - #define HPET_SHIFT 22 494 - 495 - /* FSEC = 10^-15 NSEC = 10^-9 */ 496 - #define FSEC_PER_NSEC 1000000 497 - 498 - static void *hpet_ptr; 499 - 500 - static cycle_t read_hpet(void) 501 - { 502 - return (cycle_t)readl(hpet_ptr); 503 - } 504 - 505 - static cycle_t __vsyscall_fn vread_hpet(void) 506 - { 507 - return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); 508 - } 509 - 510 - struct clocksource clocksource_hpet = { 511 - .name = "hpet", 512 - .rating = 250, 513 - .read = read_hpet, 514 - .mask = (cycle_t)HPET_MASK, 515 - .mult = 0, /* set below */ 516 - .shift = HPET_SHIFT, 517 - .flags = CLOCK_SOURCE_IS_CONTINUOUS, 518 - .vread = vread_hpet, 519 - }; 520 - 521 - static int __init init_hpet_clocksource(void) 522 - { 523 - unsigned long hpet_period; 524 - void __iomem *hpet_base; 525 - u64 tmp; 526 - 527 - if (!hpet_address) 528 - return -ENODEV; 529 - 530 - /* calculate the hpet address: */ 531 - hpet_base = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); 532 - hpet_ptr = hpet_base + HPET_COUNTER; 533 - 534 - /* calculate the frequency: */ 535 - hpet_period = readl(hpet_base + HPET_PERIOD); 536 - 537 - /* 538 - * hpet period is in femto seconds per cycle 539 - * so we need to convert this to ns/cyc units 540 - * aproximated by mult/2^shift 541 - * 542 - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift 543 - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult 544 - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult 545 - * (fsec/cyc << shift)/1000000 = mult 546 - * (hpet_period << shift)/FSEC_PER_NSEC = mult 547 - */ 548 - tmp = (u64)hpet_period << HPET_SHIFT; 549 - do_div(tmp, FSEC_PER_NSEC); 550 - clocksource_hpet.mult = (u32)tmp; 551 - 552 - return clocksource_register(&clocksource_hpet); 553 - } 554 - 555 - module_init(init_hpet_clocksource);
··· 12 #include <asm/timex.h> 13 #include <asm/hpet.h> 14 15 + #define HPET_MASK 0xFFFFFFFF 16 + #define HPET_SHIFT 22 17 + 18 + /* FSEC = 10^-15 NSEC = 10^-9 */ 19 + #define FSEC_PER_NSEC 1000000 20 + 21 int nohpet __initdata; 22 23 unsigned long hpet_address; ··· 106 return 0; 107 } 108 109 + static cycle_t read_hpet(void) 110 + { 111 + return (cycle_t)hpet_readl(HPET_COUNTER); 112 + } 113 + 114 + static cycle_t __vsyscall_fn vread_hpet(void) 115 + { 116 + return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); 117 + } 118 + 119 + struct clocksource clocksource_hpet = { 120 + .name = "hpet", 121 + .rating = 250, 122 + .read = read_hpet, 123 + .mask = (cycle_t)HPET_MASK, 124 + .mult = 0, /* set below */ 125 + .shift = HPET_SHIFT, 126 + .flags = CLOCK_SOURCE_IS_CONTINUOUS, 127 + .vread = vread_hpet, 128 + }; 129 + 130 int hpet_arch_init(void) 131 { 132 unsigned int id; 133 + u64 tmp; 134 135 if (!hpet_address) 136 return -1; ··· 131 hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period; 132 133 hpet_use_timer = (id & HPET_ID_LEGSUP); 134 + 135 + /* 136 + * hpet period is in femto seconds per cycle 137 + * so we need to convert this to ns/cyc units 138 + * aproximated by mult/2^shift 139 + * 140 + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift 141 + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult 142 + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult 143 + * (fsec/cyc << shift)/1000000 = mult 144 + * (hpet_period << shift)/FSEC_PER_NSEC = mult 145 + */ 146 + tmp = (u64)hpet_period << HPET_SHIFT; 147 + do_div(tmp, FSEC_PER_NSEC); 148 + clocksource_hpet.mult = (u32)tmp; 149 + clocksource_register(&clocksource_hpet); 150 151 return hpet_timer_stop_set_go(hpet_tick); 152 } ··· 444 } 445 446 __setup("nohpet", nohpet_setup);
+2
arch/x86_64/kernel/time.c
··· 358 set_cyc2ns_scale(cpu_khz); 359 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 360 cpu_khz / 1000, cpu_khz % 1000); 361 setup_irq(0, &irq0); 362 } 363
··· 358 set_cyc2ns_scale(cpu_khz); 359 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 360 cpu_khz / 1000, cpu_khz % 1000); 361 + init_tsc_clocksource(); 362 + 363 setup_irq(0, &irq0); 364 } 365
+2 -5
arch/x86_64/kernel/tsc.c
··· 210 } 211 EXPORT_SYMBOL_GPL(mark_tsc_unstable); 212 213 - static int __init init_tsc_clocksource(void) 214 { 215 if (!notsc) { 216 clocksource_tsc.mult = clocksource_khz2mult(cpu_khz, ··· 218 if (check_tsc_unstable()) 219 clocksource_tsc.rating = 0; 220 221 - return clocksource_register(&clocksource_tsc); 222 } 223 - return 0; 224 } 225 - 226 - module_init(init_tsc_clocksource);
··· 210 } 211 EXPORT_SYMBOL_GPL(mark_tsc_unstable); 212 213 + void __init init_tsc_clocksource(void) 214 { 215 if (!notsc) { 216 clocksource_tsc.mult = clocksource_khz2mult(cpu_khz, ··· 218 if (check_tsc_unstable()) 219 clocksource_tsc.rating = 0; 220 221 + clocksource_register(&clocksource_tsc); 222 } 223 }
+4 -1
drivers/clocksource/acpi_pm.c
··· 214 return clocksource_register(&clocksource_acpi_pm); 215 } 216 217 - module_init(init_acpi_pm_clocksource);
··· 214 return clocksource_register(&clocksource_acpi_pm); 215 } 216 217 + /* We use fs_initcall because we want the PCI fixups to have run 218 + * but we still need to load before device_initcall 219 + */ 220 + fs_initcall(init_acpi_pm_clocksource);
+1 -1
drivers/clocksource/cyclone.c
··· 116 return clocksource_register(&clocksource_cyclone); 117 } 118 119 - module_init(init_cyclone_clocksource);
··· 116 return clocksource_register(&clocksource_cyclone); 117 } 118 119 + arch_initcall(init_cyclone_clocksource);
+1
include/asm-x86_64/tsc.h
··· 55 extern void tsc_init(void); 56 extern void mark_tsc_unstable(void); 57 extern int unsynchronized_tsc(void); 58 59 /* 60 * Boot-time check whether the TSCs are synchronized across
··· 55 extern void tsc_init(void); 56 extern void mark_tsc_unstable(void); 57 extern int unsynchronized_tsc(void); 58 + extern void init_tsc_clocksource(void); 59 60 /* 61 * Boot-time check whether the TSCs are synchronized across
+5 -3
kernel/time/clocksource.c
··· 55 static char override_name[32]; 56 static int finished_booting; 57 58 - /* clocksource_done_booting - Called near the end of bootup 59 * 60 - * Hack to avoid lots of clocksource churn at boot time 61 */ 62 static int __init clocksource_done_booting(void) 63 { 64 finished_booting = 1; 65 return 0; 66 } 67 - late_initcall(clocksource_done_booting); 68 69 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG 70 static LIST_HEAD(watchdog_list);
··· 55 static char override_name[32]; 56 static int finished_booting; 57 58 + /* clocksource_done_booting - Called near the end of core bootup 59 * 60 + * Hack to avoid lots of clocksource churn at boot time. 61 + * We use fs_initcall because we want this to start before 62 + * device_initcall but after subsys_initcall. 63 */ 64 static int __init clocksource_done_booting(void) 65 { 66 finished_booting = 1; 67 return 0; 68 } 69 + fs_initcall(clocksource_done_booting); 70 71 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG 72 static LIST_HEAD(watchdog_list);