Merge branch 'x86-tsc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-tsc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86: Check tsc available/disabled in the delayed init function
x86: Improve TSC calibration using a delayed workqueue
x86: Make tsc=reliable override boot time stability checks

+96 -9
+5 -4
Documentation/kernel-parameters.txt
··· 2461 2461 to facilitate early boot debugging. 2462 2462 See also Documentation/trace/events.txt 2463 2463 2464 - tsc= Disable clocksource-must-verify flag for TSC. 2464 + tsc= Disable clocksource stability checks for TSC. 2465 2465 Format: <string> 2466 2466 [x86] reliable: mark tsc clocksource as reliable, this 2467 - disables clocksource verification at runtime. 2468 - Used to enable high-resolution timer mode on older 2469 - hardware, and in virtualized environment. 2467 + disables clocksource verification at runtime, as well 2468 + as the stability checks done at bootup. Used to enable 2469 + high-resolution timer mode on older hardware, and in 2470 + virtualized environment. 2470 2471 [x86] noirqtime: Do not use TSC to do irq accounting. 2471 2472 Used to run time disable IRQ_TIME_ACCOUNTING on any 2472 2473 platforms where RDTSC is slow and this accounting
+91 -5
arch/x86/kernel/tsc.c
··· 872 872 873 873 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 874 874 return 0; 875 + 876 + if (tsc_clocksource_reliable) 877 + return 0; 875 878 /* 876 879 * Intel systems are normally all synchronized. 877 880 * Exceptions must mark TSC as unstable: ··· 882 879 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 883 880 /* assume multi socket systems are not synchronized: */ 884 881 if (num_possible_cpus() > 1) 885 - tsc_unstable = 1; 882 + return 1; 886 883 } 887 884 888 - return tsc_unstable; 885 + return 0; 889 886 } 890 887 891 - static void __init init_tsc_clocksource(void) 888 + 889 + static void tsc_refine_calibration_work(struct work_struct *work); 890 + static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); 891 + /** 892 + * tsc_refine_calibration_work - Further refine tsc freq calibration 893 + * @work - ignored. 894 + * 895 + * This functions uses delayed work over a period of a 896 + * second to further refine the TSC freq value. Since this is 897 + * timer based, instead of loop based, we don't block the boot 898 + * process while this longer calibration is done. 899 + * 900 + * If there are any calibration anomolies (too many SMIs, etc), 901 + * or the refined calibration is off by 1% of the fast early 902 + * calibration, we throw out the new calibration and use the 903 + * early calibration. 904 + */ 905 + static void tsc_refine_calibration_work(struct work_struct *work) 892 906 { 907 + static u64 tsc_start = -1, ref_start; 908 + static int hpet; 909 + u64 tsc_stop, ref_stop, delta; 910 + unsigned long freq; 911 + 912 + /* Don't bother refining TSC on unstable systems */ 913 + if (check_tsc_unstable()) 914 + goto out; 915 + 916 + /* 917 + * Since the work is started early in boot, we may be 918 + * delayed the first time we expire. So set the workqueue 919 + * again once we know timers are working. 920 + */ 921 + if (tsc_start == -1) { 922 + /* 923 + * Only set hpet once, to avoid mixing hardware 924 + * if the hpet becomes enabled later. 925 + */ 926 + hpet = is_hpet_enabled(); 927 + schedule_delayed_work(&tsc_irqwork, HZ); 928 + tsc_start = tsc_read_refs(&ref_start, hpet); 929 + return; 930 + } 931 + 932 + tsc_stop = tsc_read_refs(&ref_stop, hpet); 933 + 934 + /* hpet or pmtimer available ? */ 935 + if (!hpet && !ref_start && !ref_stop) 936 + goto out; 937 + 938 + /* Check, whether the sampling was disturbed by an SMI */ 939 + if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) 940 + goto out; 941 + 942 + delta = tsc_stop - tsc_start; 943 + delta *= 1000000LL; 944 + if (hpet) 945 + freq = calc_hpet_ref(delta, ref_start, ref_stop); 946 + else 947 + freq = calc_pmtimer_ref(delta, ref_start, ref_stop); 948 + 949 + /* Make sure we're within 1% */ 950 + if (abs(tsc_khz - freq) > tsc_khz/100) 951 + goto out; 952 + 953 + tsc_khz = freq; 954 + printk(KERN_INFO "Refined TSC clocksource calibration: " 955 + "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, 956 + (unsigned long)tsc_khz % 1000); 957 + 958 + out: 959 + clocksource_register_khz(&clocksource_tsc, tsc_khz); 960 + } 961 + 962 + 963 + static int __init init_tsc_clocksource(void) 964 + { 965 + if (!cpu_has_tsc || tsc_disabled > 0) 966 + return 0; 967 + 893 968 if (tsc_clocksource_reliable) 894 969 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 895 970 /* lower the rating if we already know its unstable: */ ··· 975 894 clocksource_tsc.rating = 0; 976 895 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 977 896 } 978 - clocksource_register_khz(&clocksource_tsc, tsc_khz); 897 + schedule_delayed_work(&tsc_irqwork, 0); 898 + return 0; 979 899 } 900 + /* 901 + * We use device_initcall here, to ensure we run after the hpet 902 + * is fully initialized, which may occur at fs_initcall time. 903 + */ 904 + device_initcall(init_tsc_clocksource); 980 905 981 906 void __init tsc_init(void) 982 907 { ··· 1036 949 mark_tsc_unstable("TSCs unsynchronized"); 1037 950 1038 951 check_system_tsc_reliable(); 1039 - init_tsc_clocksource(); 1040 952 } 1041 953