[PATCH] clocksource init adjustments (fix bug #7426)

This patch resolves the issue found here:
http://bugme.osdl.org/show_bug.cgi?id=7426

The basic summary is:
Currently we register most of i386/x86_64 clocksources at module_init
time. Then we enable clocksource selection at late_initcall time. This
causes some problems for drivers that use gettimeofday for init
calibration routines (specifically the es1968 driver in this case),
where durring module_init, the only clocksource available is the low-res
jiffies clocksource. This may cause slight calibration errors, due to
the small sampling time used.

It should be noted that drivers that require fine grained time may not
function on architectures that do not have better then jiffies
resolution timekeeping (there are a few). However, this does not
discount the reasonable need for such fine-grained timekeeping at init
time.

Thus the solution here is to register clocksources earlier (ideally when
the hardware is being initialized), and then we enable clocksource
selection at fs_initcall (before device_initcall).

This patch should probably get some testing time in -mm, since
clocksource selection is one of the most important issues for correct
timekeeping, and I've only been able to test this on a few of my own
boxes.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by john stultz and committed by Linus Torvalds 6bb74df4 45407680

+135 -166
+37 -43
arch/i386/kernel/hpet.c
··· 201 201 } 202 202 203 203 /* 204 + * Clock source related code 205 + */ 206 + static cycle_t read_hpet(void) 207 + { 208 + return (cycle_t)hpet_readl(HPET_COUNTER); 209 + } 210 + 211 + static struct clocksource clocksource_hpet = { 212 + .name = "hpet", 213 + .rating = 250, 214 + .read = read_hpet, 215 + .mask = HPET_MASK, 216 + .shift = HPET_SHIFT, 217 + .flags = CLOCK_SOURCE_IS_CONTINUOUS, 218 + }; 219 + 220 + /* 204 221 * Try to setup the HPET timer 205 222 */ 206 223 int __init hpet_enable(void) 207 224 { 208 225 unsigned long id; 209 226 uint64_t hpet_freq; 227 + u64 tmp; 210 228 211 229 if (!is_hpet_capable()) 212 230 return 0; ··· 271 253 /* Start the counter */ 272 254 hpet_start_counter(); 273 255 256 + /* Initialize and register HPET clocksource 257 + * 258 + * hpet period is in femto seconds per cycle 259 + * so we need to convert this to ns/cyc units 260 + * aproximated by mult/2^shift 261 + * 262 + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift 263 + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult 264 + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult 265 + * (fsec/cyc << shift)/1000000 = mult 266 + * (hpet_period << shift)/FSEC_PER_NSEC = mult 267 + */ 268 + tmp = (u64)hpet_period << HPET_SHIFT; 269 + do_div(tmp, FSEC_PER_NSEC); 270 + clocksource_hpet.mult = (u32)tmp; 271 + 272 + clocksource_register(&clocksource_hpet); 273 + 274 + 274 275 if (id & HPET_ID_LEGSUP) { 275 276 hpet_enable_int(); 276 277 hpet_reserve_platform_timers(id); ··· 310 273 return 0; 311 274 } 312 275 313 - /* 314 - * Clock source related code 315 - */ 316 - static cycle_t read_hpet(void) 317 - { 318 - return (cycle_t)hpet_readl(HPET_COUNTER); 319 - } 320 - 321 - static struct clocksource clocksource_hpet = { 322 - .name = "hpet", 323 - .rating = 250, 324 - .read = read_hpet, 325 - .mask = HPET_MASK, 326 - .shift = HPET_SHIFT, 327 - .flags = CLOCK_SOURCE_IS_CONTINUOUS, 328 - }; 329 - 330 - static int __init init_hpet_clocksource(void) 331 - { 332 - u64 tmp; 333 - 334 - if (!hpet_virt_address) 335 - return -ENODEV; 336 - 337 - /* 338 - * hpet period is in femto seconds per cycle 339 - * so we need to convert this to ns/cyc units 340 - * aproximated by mult/2^shift 341 - * 342 - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift 343 - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult 344 - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult 345 - * (fsec/cyc << shift)/1000000 = mult 346 - * (hpet_period << shift)/FSEC_PER_NSEC = mult 347 - */ 348 - tmp = (u64)hpet_period << HPET_SHIFT; 349 - do_div(tmp, FSEC_PER_NSEC); 350 - clocksource_hpet.mult = (u32)tmp; 351 - 352 - return clocksource_register(&clocksource_hpet); 353 - } 354 - 355 - module_init(init_hpet_clocksource); 356 276 357 277 #ifdef CONFIG_HPET_EMULATE_RTC 358 278
+1 -1
arch/i386/kernel/i8253.c
··· 195 195 clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); 196 196 return clocksource_register(&clocksource_pit); 197 197 } 198 - module_init(init_pit_clocksource); 198 + arch_initcall(init_pit_clocksource);
-1
arch/i386/kernel/setup.c
··· 657 657 conswitchp = &dummy_con; 658 658 #endif 659 659 #endif 660 - tsc_init(); 661 660 }
+1
arch/i386/kernel/time.c
··· 279 279 */ 280 280 void __init time_init(void) 281 281 { 282 + tsc_init(); 282 283 late_time_init = choose_time_init(); 283 284 }
+37 -46
arch/i386/kernel/tsc.c
··· 184 184 185 185 EXPORT_SYMBOL(recalibrate_cpu_khz); 186 186 187 - void __init tsc_init(void) 188 - { 189 - if (!cpu_has_tsc || tsc_disable) 190 - goto out_no_tsc; 191 - 192 - cpu_khz = calculate_cpu_khz(); 193 - tsc_khz = cpu_khz; 194 - 195 - if (!cpu_khz) 196 - goto out_no_tsc; 197 - 198 - printk("Detected %lu.%03lu MHz processor.\n", 199 - (unsigned long)cpu_khz / 1000, 200 - (unsigned long)cpu_khz % 1000); 201 - 202 - set_cyc2ns_scale(cpu_khz); 203 - use_tsc_delay(); 204 - return; 205 - 206 - out_no_tsc: 207 - /* 208 - * Set the tsc_disable flag if there's no TSC support, this 209 - * makes it a fast flag for the kernel to see whether it 210 - * should be using the TSC. 211 - */ 212 - tsc_disable = 1; 213 - } 214 - 215 187 #ifdef CONFIG_CPU_FREQ 216 188 217 189 /* ··· 353 381 static inline void check_geode_tsc_reliable(void) { } 354 382 #endif 355 383 356 - static int __init init_tsc_clocksource(void) 384 + 385 + void __init tsc_init(void) 357 386 { 387 + if (!cpu_has_tsc || tsc_disable) 388 + goto out_no_tsc; 358 389 359 - if (cpu_has_tsc && tsc_khz && !tsc_disable) { 360 - /* check blacklist */ 361 - dmi_check_system(bad_tsc_dmi_table); 390 + cpu_khz = calculate_cpu_khz(); 391 + tsc_khz = cpu_khz; 362 392 363 - unsynchronized_tsc(); 364 - check_geode_tsc_reliable(); 365 - current_tsc_khz = tsc_khz; 366 - clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, 393 + if (!cpu_khz) 394 + goto out_no_tsc; 395 + 396 + printk("Detected %lu.%03lu MHz processor.\n", 397 + (unsigned long)cpu_khz / 1000, 398 + (unsigned long)cpu_khz % 1000); 399 + 400 + set_cyc2ns_scale(cpu_khz); 401 + use_tsc_delay(); 402 + 403 + /* Check and install the TSC clocksource */ 404 + dmi_check_system(bad_tsc_dmi_table); 405 + 406 + unsynchronized_tsc(); 407 + check_geode_tsc_reliable(); 408 + current_tsc_khz = tsc_khz; 409 + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, 367 410 clocksource_tsc.shift); 368 - /* lower the rating if we already know its unstable: */ 369 - if (check_tsc_unstable()) { 370 - clocksource_tsc.rating = 0; 371 - clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 372 - } 373 - 374 - return clocksource_register(&clocksource_tsc); 411 + /* lower the rating if we already know its unstable: */ 412 + if (check_tsc_unstable()) { 413 + clocksource_tsc.rating = 0; 414 + clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 375 415 } 416 + clocksource_register(&clocksource_tsc); 376 417 377 - return 0; 418 + return; 419 + 420 + out_no_tsc: 421 + /* 422 + * Set the tsc_disable flag if there's no TSC support, this 423 + * makes it a fast flag for the kernel to see whether it 424 + * should be using the TSC. 425 + */ 426 + tsc_disable = 1; 378 427 } 379 - 380 - module_init(init_tsc_clocksource);
+44 -65
arch/x86_64/kernel/hpet.c
··· 12 12 #include <asm/timex.h> 13 13 #include <asm/hpet.h> 14 14 15 + #define HPET_MASK 0xFFFFFFFF 16 + #define HPET_SHIFT 22 17 + 18 + /* FSEC = 10^-15 NSEC = 10^-9 */ 19 + #define FSEC_PER_NSEC 1000000 20 + 15 21 int nohpet __initdata; 16 22 17 23 unsigned long hpet_address; ··· 112 106 return 0; 113 107 } 114 108 109 + static cycle_t read_hpet(void) 110 + { 111 + return (cycle_t)hpet_readl(HPET_COUNTER); 112 + } 113 + 114 + static cycle_t __vsyscall_fn vread_hpet(void) 115 + { 116 + return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); 117 + } 118 + 119 + struct clocksource clocksource_hpet = { 120 + .name = "hpet", 121 + .rating = 250, 122 + .read = read_hpet, 123 + .mask = (cycle_t)HPET_MASK, 124 + .mult = 0, /* set below */ 125 + .shift = HPET_SHIFT, 126 + .flags = CLOCK_SOURCE_IS_CONTINUOUS, 127 + .vread = vread_hpet, 128 + }; 129 + 115 130 int hpet_arch_init(void) 116 131 { 117 132 unsigned int id; 133 + u64 tmp; 118 134 119 135 if (!hpet_address) 120 136 return -1; ··· 159 131 hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period; 160 132 161 133 hpet_use_timer = (id & HPET_ID_LEGSUP); 134 + 135 + /* 136 + * hpet period is in femto seconds per cycle 137 + * so we need to convert this to ns/cyc units 138 + * aproximated by mult/2^shift 139 + * 140 + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift 141 + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult 142 + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult 143 + * (fsec/cyc << shift)/1000000 = mult 144 + * (hpet_period << shift)/FSEC_PER_NSEC = mult 145 + */ 146 + tmp = (u64)hpet_period << HPET_SHIFT; 147 + do_div(tmp, FSEC_PER_NSEC); 148 + clocksource_hpet.mult = (u32)tmp; 149 + clocksource_register(&clocksource_hpet); 162 150 163 151 return hpet_timer_stop_set_go(hpet_tick); 164 152 } ··· 488 444 } 489 445 490 446 __setup("nohpet", nohpet_setup); 491 - 492 - #define HPET_MASK 0xFFFFFFFF 493 - #define HPET_SHIFT 22 494 - 495 - /* FSEC = 10^-15 NSEC = 10^-9 */ 496 - #define FSEC_PER_NSEC 1000000 497 - 498 - static void *hpet_ptr; 499 - 500 - static cycle_t read_hpet(void) 501 - { 502 - return (cycle_t)readl(hpet_ptr); 503 - } 504 - 505 - static cycle_t __vsyscall_fn vread_hpet(void) 506 - { 507 - return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); 508 - } 509 - 510 - struct clocksource clocksource_hpet = { 511 - .name = "hpet", 512 - .rating = 250, 513 - .read = read_hpet, 514 - .mask = (cycle_t)HPET_MASK, 515 - .mult = 0, /* set below */ 516 - .shift = HPET_SHIFT, 517 - .flags = CLOCK_SOURCE_IS_CONTINUOUS, 518 - .vread = vread_hpet, 519 - }; 520 - 521 - static int __init init_hpet_clocksource(void) 522 - { 523 - unsigned long hpet_period; 524 - void __iomem *hpet_base; 525 - u64 tmp; 526 - 527 - if (!hpet_address) 528 - return -ENODEV; 529 - 530 - /* calculate the hpet address: */ 531 - hpet_base = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); 532 - hpet_ptr = hpet_base + HPET_COUNTER; 533 - 534 - /* calculate the frequency: */ 535 - hpet_period = readl(hpet_base + HPET_PERIOD); 536 - 537 - /* 538 - * hpet period is in femto seconds per cycle 539 - * so we need to convert this to ns/cyc units 540 - * aproximated by mult/2^shift 541 - * 542 - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift 543 - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult 544 - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult 545 - * (fsec/cyc << shift)/1000000 = mult 546 - * (hpet_period << shift)/FSEC_PER_NSEC = mult 547 - */ 548 - tmp = (u64)hpet_period << HPET_SHIFT; 549 - do_div(tmp, FSEC_PER_NSEC); 550 - clocksource_hpet.mult = (u32)tmp; 551 - 552 - return clocksource_register(&clocksource_hpet); 553 - } 554 - 555 - module_init(init_hpet_clocksource);
+2
arch/x86_64/kernel/time.c
··· 358 358 set_cyc2ns_scale(cpu_khz); 359 359 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", 360 360 cpu_khz / 1000, cpu_khz % 1000); 361 + init_tsc_clocksource(); 362 + 361 363 setup_irq(0, &irq0); 362 364 } 363 365
+2 -5
arch/x86_64/kernel/tsc.c
··· 210 210 } 211 211 EXPORT_SYMBOL_GPL(mark_tsc_unstable); 212 212 213 - static int __init init_tsc_clocksource(void) 213 + void __init init_tsc_clocksource(void) 214 214 { 215 215 if (!notsc) { 216 216 clocksource_tsc.mult = clocksource_khz2mult(cpu_khz, ··· 218 218 if (check_tsc_unstable()) 219 219 clocksource_tsc.rating = 0; 220 220 221 - return clocksource_register(&clocksource_tsc); 221 + clocksource_register(&clocksource_tsc); 222 222 } 223 - return 0; 224 223 } 225 - 226 - module_init(init_tsc_clocksource);
+4 -1
drivers/clocksource/acpi_pm.c
··· 214 214 return clocksource_register(&clocksource_acpi_pm); 215 215 } 216 216 217 - module_init(init_acpi_pm_clocksource); 217 + /* We use fs_initcall because we want the PCI fixups to have run 218 + * but we still need to load before device_initcall 219 + */ 220 + fs_initcall(init_acpi_pm_clocksource);
+1 -1
drivers/clocksource/cyclone.c
··· 116 116 return clocksource_register(&clocksource_cyclone); 117 117 } 118 118 119 - module_init(init_cyclone_clocksource); 119 + arch_initcall(init_cyclone_clocksource);
+1
include/asm-x86_64/tsc.h
··· 55 55 extern void tsc_init(void); 56 56 extern void mark_tsc_unstable(void); 57 57 extern int unsynchronized_tsc(void); 58 + extern void init_tsc_clocksource(void); 58 59 59 60 /* 60 61 * Boot-time check whether the TSCs are synchronized across
+5 -3
kernel/time/clocksource.c
··· 55 55 static char override_name[32]; 56 56 static int finished_booting; 57 57 58 - /* clocksource_done_booting - Called near the end of bootup 58 + /* clocksource_done_booting - Called near the end of core bootup 59 59 * 60 - * Hack to avoid lots of clocksource churn at boot time 60 + * Hack to avoid lots of clocksource churn at boot time. 61 + * We use fs_initcall because we want this to start before 62 + * device_initcall but after subsys_initcall. 61 63 */ 62 64 static int __init clocksource_done_booting(void) 63 65 { 64 66 finished_booting = 1; 65 67 return 0; 66 68 } 67 - late_initcall(clocksource_done_booting); 69 + fs_initcall(clocksource_done_booting); 68 70 69 71 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG 70 72 static LIST_HEAD(watchdog_list);