Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

+26

Documentation/devicetree/bindings/arm/armv7m_systick.txt

··· 1 + * ARMv7M System Timer 2 + 3 + ARMv7-M includes a system timer, known as SysTick. Current driver only 4 + implements the clocksource feature. 5 + 6 + Required properties: 7 + - compatible : Should be "arm,armv7m-systick" 8 + - reg : The address range of the timer 9 + 10 + Required clocking property, have to be one of: 11 + - clocks : The input clock of the timer 12 + - clock-frequency : The rate in HZ in input of the ARM SysTick 13 + 14 + Examples: 15 + 16 + systick: timer@e000e010 { 17 + compatible = "arm,armv7m-systick"; 18 + reg = <0xe000e010 0x10>; 19 + clocks = <&clk_systick>; 20 + }; 21 + 22 + systick: timer@e000e010 { 23 + compatible = "arm,armv7m-systick"; 24 + reg = <0xe000e010 0x10>; 25 + clock-frequency = <90000000>; 26 + };

+26

Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.txt

··· 1 + * NXP LPC3220 timer 2 + 3 + The NXP LPC3220 timer is used on a wide range of NXP SoCs. This 4 + includes LPC32xx, LPC178x, LPC18xx and LPC43xx parts. 5 + 6 + Required properties: 7 + - compatible: 8 + Should be "nxp,lpc3220-timer". 9 + - reg: 10 + Address and length of the register set. 11 + - interrupts: 12 + Reference to the timer interrupt 13 + - clocks: 14 + Should contain a reference to timer clock. 15 + - clock-names: 16 + Should contain "timerclk". 17 + 18 + Example: 19 + 20 + timer1: timer@40085000 { 21 + compatible = "nxp,lpc3220-timer"; 22 + reg = <0x40085000 0x1000>; 23 + interrupts = <13>; 24 + clocks = <&ccu1 CLK_CPU_TIMER1>; 25 + clock-names = "timerclk"; 26 + };

+22

Documentation/devicetree/bindings/timer/st,stm32-timer.txt

··· 1 + . STMicroelectronics STM32 timer 2 + 3 + The STM32 MCUs family has several general-purpose 16 and 32 bits timers. 4 + 5 + Required properties: 6 + - compatible : Should be "st,stm32-timer" 7 + - reg : Address and length of the register set 8 + - clocks : Reference on the timer input clock 9 + - interrupts : Reference to the timer interrupt 10 + 11 + Optional properties: 12 + - resets: Reference to a reset controller asserting the timer 13 + 14 + Example: 15 + 16 + timer5: timer@40000c00 { 17 + compatible = "st,stm32-timer"; 18 + reg = <0x40000c00 0x400>; 19 + interrupts = <50>; 20 + resets = <&rrc 259>; 21 + clocks = <&clk_pmtr1>; 22 + };

+27 -7

Kbuild

··· 2 2 # Kbuild for top-level directory of the kernel 3 3 # This file takes care of the following: 4 4 # 1) Generate bounds.h 5 - # 2) Generate asm-offsets.h (may need bounds.h) 6 - # 3) Check for missing system calls 5 + # 2) Generate timeconst.h 6 + # 3) Generate asm-offsets.h (may need bounds.h and timeconst.h) 7 + # 4) Check for missing system calls 7 8 8 9 # Default sed regexp - multiline due to syntax constraints 9 10 define sed-y ··· 48 47 $(call filechk,offsets,__LINUX_BOUNDS_H__) 49 48 50 49 ##### 51 - # 2) Generate asm-offsets.h 50 + # 2) Generate timeconst.h 51 + 52 + timeconst-file := include/generated/timeconst.h 53 + 54 + #always += $(timeconst-file) 55 + targets += $(timeconst-file) 56 + 57 + quiet_cmd_gentimeconst = GEN $@ 58 + define cmd_gentimeconst 59 + (echo $(CONFIG_HZ) | bc -q $< ) > $@ 60 + endef 61 + define filechk_gentimeconst 62 + (echo $(CONFIG_HZ) | bc -q $< ) 63 + endef 64 + 65 + $(obj)/$(timeconst-file): kernel/time/timeconst.bc FORCE 66 + $(call filechk,gentimeconst) 67 + 68 + ##### 69 + # 3) Generate asm-offsets.h 52 70 # 53 71 54 72 offsets-file := include/generated/asm-offsets.h ··· 77 57 78 58 # We use internal kbuild rules to avoid the "is up to date" message from make 79 59 arch/$(SRCARCH)/kernel/asm-offsets.s: arch/$(SRCARCH)/kernel/asm-offsets.c \ 80 - $(obj)/$(bounds-file) FORCE 60 + $(obj)/$(timeconst-file) $(obj)/$(bounds-file) FORCE 81 61 $(Q)mkdir -p $(dir $@) 82 62 $(call if_changed_dep,cc_s_c) 83 63 ··· 85 65 $(call filechk,offsets,__ASM_OFFSETS_H__) 86 66 87 67 ##### 88 - # 3) Check for missing system calls 68 + # 4) Check for missing system calls 89 69 # 90 70 91 71 always += missing-syscalls ··· 97 77 missing-syscalls: scripts/checksyscalls.sh $(offsets-file) FORCE 98 78 $(call cmd,syscalls) 99 79 100 - # Keep these two files during make clean 101 - no-clean-files := $(bounds-file) $(offsets-file) 80 + # Keep these three files during make clean 81 + no-clean-files := $(bounds-file) $(offsets-file) $(timeconst-file)

+3 -2

arch/s390/include/asm/timex.h

··· 10 10 #define _ASM_S390_TIMEX_H 11 11 12 12 #include <asm/lowcore.h> 13 + #include <linux/time64.h> 13 14 14 15 /* The value of the TOD clock for 1.1.1970. */ 15 16 #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL ··· 109 108 void init_cpu_timer(void); 110 109 unsigned long long monotonic_clock(void); 111 110 112 - void tod_to_timeval(__u64, struct timespec *); 111 + void tod_to_timeval(__u64 todval, struct timespec64 *xt); 113 112 114 113 static inline 115 - void stck_to_timespec(unsigned long long stck, struct timespec *ts) 114 + void stck_to_timespec64(unsigned long long stck, struct timespec64 *ts) 116 115 { 117 116 tod_to_timeval(stck - TOD_UNIX_EPOCH, ts); 118 117 }

+6 -5

arch/s390/kernel/debug.c

··· 1457 1457 debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, 1458 1458 int area, debug_entry_t * entry, char *out_buf) 1459 1459 { 1460 - struct timespec time_spec; 1460 + struct timespec64 time_spec; 1461 1461 char *except_str; 1462 1462 unsigned long caller; 1463 1463 int rc = 0; 1464 1464 unsigned int level; 1465 1465 1466 1466 level = entry->id.fields.level; 1467 - stck_to_timespec(entry->id.stck, &time_spec); 1467 + stck_to_timespec64(entry->id.stck, &time_spec); 1468 1468 1469 1469 if (entry->id.fields.exception) 1470 1470 except_str = "*"; 1471 1471 else 1472 1472 except_str = "-"; 1473 1473 caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN; 1474 - rc += sprintf(out_buf, "%02i %011lu:%06lu %1u %1s %02i %p ", 1475 - area, time_spec.tv_sec, time_spec.tv_nsec / 1000, level, 1476 - except_str, entry->id.fields.cpuid, (void *) caller); 1474 + rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p ", 1475 + area, (long long)time_spec.tv_sec, 1476 + time_spec.tv_nsec / 1000, level, except_str, 1477 + entry->id.fields.cpuid, (void *)caller); 1477 1478 return rc; 1478 1479 } 1479 1480 EXPORT_SYMBOL(debug_dflt_header_fn);

+3 -3

arch/s390/kernel/time.c

··· 76 76 } 77 77 EXPORT_SYMBOL(monotonic_clock); 78 78 79 - void tod_to_timeval(__u64 todval, struct timespec *xt) 79 + void tod_to_timeval(__u64 todval, struct timespec64 *xt) 80 80 { 81 81 unsigned long long sec; 82 82 ··· 181 181 static void etr_reset(void); 182 182 static void stp_reset(void); 183 183 184 - void read_persistent_clock(struct timespec *ts) 184 + void read_persistent_clock64(struct timespec64 *ts) 185 185 { 186 186 tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts); 187 187 } 188 188 189 - void read_boot_clock(struct timespec *ts) 189 + void read_boot_clock64(struct timespec64 *ts) 190 190 { 191 191 tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); 192 192 }

+2 -3

arch/x86/kernel/cpu/perf_event_intel_rapl.c

··· 204 204 205 205 static void rapl_start_hrtimer(struct rapl_pmu *pmu) 206 206 { 207 - __hrtimer_start_range_ns(&pmu->hrtimer, 208 - pmu->timer_interval, 0, 209 - HRTIMER_MODE_REL_PINNED, 0); 207 + hrtimer_start(&pmu->hrtimer, pmu->timer_interval, 208 + HRTIMER_MODE_REL_PINNED); 210 209 } 211 210 212 211 static void rapl_stop_hrtimer(struct rapl_pmu *pmu)

+2 -3

arch/x86/kernel/cpu/perf_event_intel_uncore.c

··· 233 233 234 234 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 235 235 { 236 - __hrtimer_start_range_ns(&box->hrtimer, 237 - ns_to_ktime(box->hrtimer_duration), 0, 238 - HRTIMER_MODE_REL_PINNED, 0); 236 + hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), 237 + HRTIMER_MODE_REL_PINNED); 239 238 } 240 239 241 240 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)

+17

drivers/clocksource/Kconfig

··· 106 106 Support to use the timers of EFM32 SoCs as clock source and clock 107 107 event device. 108 108 109 + config CLKSRC_LPC32XX 110 + bool 111 + select CLKSRC_MMIO 112 + select CLKSRC_OF 113 + 114 + config CLKSRC_STM32 115 + bool "Clocksource for STM32 SoCs" if !ARCH_STM32 116 + depends on OF && ARM && (ARCH_STM32 || COMPILE_TEST) 117 + select CLKSRC_MMIO 118 + 109 119 config ARM_ARCH_TIMER 110 120 bool 111 121 select CLKSRC_OF if OF ··· 148 138 default y 149 139 help 150 140 Use ARM global timer clock source as sched_clock 141 + 142 + config ARMV7M_SYSTICK 143 + bool 144 + select CLKSRC_OF if OF 145 + select CLKSRC_MMIO 146 + help 147 + This options enables support for the ARMv7M system timer unit 151 148 152 149 config ATMEL_PIT 153 150 select CLKSRC_OF if OF

+3

drivers/clocksource/Makefile

··· 36 36 obj-$(CONFIG_ARCH_BCM_MOBILE) += bcm_kona_timer.o 37 37 obj-$(CONFIG_CADENCE_TTC_TIMER) += cadence_ttc_timer.o 38 38 obj-$(CONFIG_CLKSRC_EFM32) += time-efm32.o 39 + obj-$(CONFIG_CLKSRC_STM32) += timer-stm32.o 39 40 obj-$(CONFIG_CLKSRC_EXYNOS_MCT) += exynos_mct.o 41 + obj-$(CONFIG_CLKSRC_LPC32XX) += time-lpc32xx.o 40 42 obj-$(CONFIG_CLKSRC_SAMSUNG_PWM) += samsung_pwm_timer.o 41 43 obj-$(CONFIG_FSL_FTM_TIMER) += fsl_ftm_timer.o 42 44 obj-$(CONFIG_VF_PIT_TIMER) += vf_pit_timer.o ··· 47 45 48 46 obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o 49 47 obj-$(CONFIG_ARM_GLOBAL_TIMER) += arm_global_timer.o 48 + obj-$(CONFIG_ARMV7M_SYSTICK) += armv7m_systick.o 50 49 obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o 51 50 obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST) += dummy_timer.o 52 51 obj-$(CONFIG_ARCH_KEYSTONE) += timer-keystone.o

+79

drivers/clocksource/armv7m_systick.c

··· 1 + /* 2 + * Copyright (C) Maxime Coquelin 2015 3 + * Author: Maxime Coquelin <mcoquelin.stm32@gmail.com> 4 + * License terms: GNU General Public License (GPL), version 2 5 + */ 6 + 7 + #include <linux/kernel.h> 8 + #include <linux/clocksource.h> 9 + #include <linux/clockchips.h> 10 + #include <linux/of.h> 11 + #include <linux/of_address.h> 12 + #include <linux/clk.h> 13 + #include <linux/bitops.h> 14 + 15 + #define SYST_CSR 0x00 16 + #define SYST_RVR 0x04 17 + #define SYST_CVR 0x08 18 + #define SYST_CALIB 0x0c 19 + 20 + #define SYST_CSR_ENABLE BIT(0) 21 + 22 + #define SYSTICK_LOAD_RELOAD_MASK 0x00FFFFFF 23 + 24 + static void __init system_timer_of_register(struct device_node *np) 25 + { 26 + struct clk *clk = NULL; 27 + void __iomem *base; 28 + u32 rate; 29 + int ret; 30 + 31 + base = of_iomap(np, 0); 32 + if (!base) { 33 + pr_warn("system-timer: invalid base address\n"); 34 + return; 35 + } 36 + 37 + ret = of_property_read_u32(np, "clock-frequency", &rate); 38 + if (ret) { 39 + clk = of_clk_get(np, 0); 40 + if (IS_ERR(clk)) 41 + goto out_unmap; 42 + 43 + ret = clk_prepare_enable(clk); 44 + if (ret) 45 + goto out_clk_put; 46 + 47 + rate = clk_get_rate(clk); 48 + if (!rate) 49 + goto out_clk_disable; 50 + } 51 + 52 + writel_relaxed(SYSTICK_LOAD_RELOAD_MASK, base + SYST_RVR); 53 + writel_relaxed(SYST_CSR_ENABLE, base + SYST_CSR); 54 + 55 + ret = clocksource_mmio_init(base + SYST_CVR, "arm_system_timer", rate, 56 + 200, 24, clocksource_mmio_readl_down); 57 + if (ret) { 58 + pr_err("failed to init clocksource (%d)\n", ret); 59 + if (clk) 60 + goto out_clk_disable; 61 + else 62 + goto out_unmap; 63 + } 64 + 65 + pr_info("ARM System timer initialized as clocksource\n"); 66 + 67 + return; 68 + 69 + out_clk_disable: 70 + clk_disable_unprepare(clk); 71 + out_clk_put: 72 + clk_put(clk); 73 + out_unmap: 74 + iounmap(base); 75 + pr_warn("ARM System timer register failed (%d)\n", ret); 76 + } 77 + 78 + CLOCKSOURCE_OF_DECLARE(arm_systick, "arm,armv7m-systick", 79 + system_timer_of_register);

+1 -1

drivers/clocksource/asm9260_timer.c

··· 178 178 unsigned long rate; 179 179 180 180 priv.base = of_io_request_and_map(np, 0, np->name); 181 - if (!priv.base) 181 + if (IS_ERR(priv.base)) 182 182 panic("%s: unable to map resource", np->name); 183 183 184 184 clk = of_clk_get(np, 0);

+3 -19

drivers/clocksource/exynos_mct.c

··· 209 209 exynos4_mct_frc_start(); 210 210 } 211 211 212 - struct clocksource mct_frc = { 212 + static struct clocksource mct_frc = { 213 213 .name = "mct-frc", 214 214 .rating = 400, 215 215 .read = exynos4_frc_read, ··· 413 413 } 414 414 } 415 415 416 - static int exynos4_mct_tick_clear(struct mct_clock_event_device *mevt) 416 + static void exynos4_mct_tick_clear(struct mct_clock_event_device *mevt) 417 417 { 418 418 struct clock_event_device *evt = &mevt->evt; 419 419 ··· 426 426 exynos4_mct_tick_stop(mevt); 427 427 428 428 /* Clear the MCT tick interrupt */ 429 - if (readl_relaxed(reg_base + mevt->base + MCT_L_INT_CSTAT_OFFSET) & 1) { 429 + if (readl_relaxed(reg_base + mevt->base + MCT_L_INT_CSTAT_OFFSET) & 1) 430 430 exynos4_mct_write(0x1, mevt->base + MCT_L_INT_CSTAT_OFFSET); 431 - return 1; 432 - } else { 433 - return 0; 434 - } 435 431 } 436 432 437 433 static irqreturn_t exynos4_mct_tick_isr(int irq, void *dev_id) ··· 558 562 559 563 out_irq: 560 564 free_percpu_irq(mct_irqs[MCT_L0_IRQ], &percpu_mct_tick); 561 - } 562 - 563 - void __init mct_init(void __iomem *base, int irq_g0, int irq_l0, int irq_l1) 564 - { 565 - mct_irqs[MCT_G0_IRQ] = irq_g0; 566 - mct_irqs[MCT_L0_IRQ] = irq_l0; 567 - mct_irqs[MCT_L1_IRQ] = irq_l1; 568 - mct_int_type = MCT_INT_SPI; 569 - 570 - exynos4_timer_resources(NULL, base); 571 - exynos4_clocksource_init(); 572 - exynos4_clockevent_init(); 573 565 } 574 566 575 567 static void __init mct_init_dt(struct device_node *np, unsigned int int_type)

-59

drivers/clocksource/qcom-timer.c

··· 40 40 41 41 #define GPT_HZ 32768 42 42 43 - #define MSM_DGT_SHIFT 5 44 - 45 43 static void __iomem *event_base; 46 44 static void __iomem *sts_base; 47 45 ··· 230 232 register_current_timer_delay(&msm_delay_timer); 231 233 } 232 234 233 - #ifdef CONFIG_ARCH_QCOM 234 235 static void __init msm_dt_timer_init(struct device_node *np) 235 236 { 236 237 u32 freq; ··· 282 285 } 283 286 CLOCKSOURCE_OF_DECLARE(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init); 284 287 CLOCKSOURCE_OF_DECLARE(scss_timer, "qcom,scss-timer", msm_dt_timer_init); 285 - #else 286 - 287 - static int __init msm_timer_map(phys_addr_t addr, u32 event, u32 source, 288 - u32 sts) 289 - { 290 - void __iomem *base; 291 - 292 - base = ioremap(addr, SZ_256); 293 - if (!base) { 294 - pr_err("Failed to map timer base\n"); 295 - return -ENOMEM; 296 - } 297 - event_base = base + event; 298 - source_base = base + source; 299 - if (sts) 300 - sts_base = base + sts; 301 - 302 - return 0; 303 - } 304 - 305 - static notrace cycle_t msm_read_timer_count_shift(struct clocksource *cs) 306 - { 307 - /* 308 - * Shift timer count down by a constant due to unreliable lower bits 309 - * on some targets. 310 - */ 311 - return msm_read_timer_count(cs) >> MSM_DGT_SHIFT; 312 - } 313 - 314 - void __init msm7x01_timer_init(void) 315 - { 316 - struct clocksource *cs = &msm_clocksource; 317 - 318 - if (msm_timer_map(0xc0100000, 0x0, 0x10, 0x0)) 319 - return; 320 - cs->read = msm_read_timer_count_shift; 321 - cs->mask = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT)); 322 - /* 600 KHz */ 323 - msm_timer_init(19200000 >> MSM_DGT_SHIFT, 32 - MSM_DGT_SHIFT, 7, 324 - false); 325 - } 326 - 327 - void __init msm7x30_timer_init(void) 328 - { 329 - if (msm_timer_map(0xc0100000, 0x4, 0x24, 0x80)) 330 - return; 331 - msm_timer_init(24576000 / 4, 32, 1, false); 332 - } 333 - 334 - void __init qsd8x50_timer_init(void) 335 - { 336 - if (msm_timer_map(0xAC100000, 0x0, 0x10, 0x34)) 337 - return; 338 - msm_timer_init(19200000 / 4, 32, 7, false); 339 - } 340 - #endif

+272

drivers/clocksource/time-lpc32xx.c

··· 1 + /* 2 + * Clocksource driver for NXP LPC32xx/18xx/43xx timer 3 + * 4 + * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com> 5 + * 6 + * Based on: 7 + * time-efm32 Copyright (C) 2013 Pengutronix 8 + * mach-lpc32xx/timer.c Copyright (C) 2009 - 2010 NXP Semiconductors 9 + * 10 + * This file is licensed under the terms of the GNU General Public 11 + * License version 2. This program is licensed "as is" without any 12 + * warranty of any kind, whether express or implied. 13 + * 14 + */ 15 + 16 + #define pr_fmt(fmt) "%s: " fmt, __func__ 17 + 18 + #include <linux/clk.h> 19 + #include <linux/clockchips.h> 20 + #include <linux/clocksource.h> 21 + #include <linux/interrupt.h> 22 + #include <linux/irq.h> 23 + #include <linux/kernel.h> 24 + #include <linux/of.h> 25 + #include <linux/of_address.h> 26 + #include <linux/of_irq.h> 27 + #include <linux/sched_clock.h> 28 + 29 + #define LPC32XX_TIMER_IR 0x000 30 + #define LPC32XX_TIMER_IR_MR0INT BIT(0) 31 + #define LPC32XX_TIMER_TCR 0x004 32 + #define LPC32XX_TIMER_TCR_CEN BIT(0) 33 + #define LPC32XX_TIMER_TCR_CRST BIT(1) 34 + #define LPC32XX_TIMER_TC 0x008 35 + #define LPC32XX_TIMER_PR 0x00c 36 + #define LPC32XX_TIMER_MCR 0x014 37 + #define LPC32XX_TIMER_MCR_MR0I BIT(0) 38 + #define LPC32XX_TIMER_MCR_MR0R BIT(1) 39 + #define LPC32XX_TIMER_MCR_MR0S BIT(2) 40 + #define LPC32XX_TIMER_MR0 0x018 41 + #define LPC32XX_TIMER_CTCR 0x070 42 + 43 + struct lpc32xx_clock_event_ddata { 44 + struct clock_event_device evtdev; 45 + void __iomem *base; 46 + }; 47 + 48 + /* Needed for the sched clock */ 49 + static void __iomem *clocksource_timer_counter; 50 + 51 + static u64 notrace lpc32xx_read_sched_clock(void) 52 + { 53 + return readl(clocksource_timer_counter); 54 + } 55 + 56 + static int lpc32xx_clkevt_next_event(unsigned long delta, 57 + struct clock_event_device *evtdev) 58 + { 59 + struct lpc32xx_clock_event_ddata *ddata = 60 + container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev); 61 + 62 + /* 63 + * Place timer in reset and program the delta in the prescale 64 + * register (PR). When the prescale counter matches the value 65 + * in PR the counter register is incremented and the compare 66 + * match will trigger. After setup the timer is released from 67 + * reset and enabled. 68 + */ 69 + writel_relaxed(LPC32XX_TIMER_TCR_CRST, ddata->base + LPC32XX_TIMER_TCR); 70 + writel_relaxed(delta, ddata->base + LPC32XX_TIMER_PR); 71 + writel_relaxed(LPC32XX_TIMER_TCR_CEN, ddata->base + LPC32XX_TIMER_TCR); 72 + 73 + return 0; 74 + } 75 + 76 + static int lpc32xx_clkevt_shutdown(struct clock_event_device *evtdev) 77 + { 78 + struct lpc32xx_clock_event_ddata *ddata = 79 + container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev); 80 + 81 + /* Disable the timer */ 82 + writel_relaxed(0, ddata->base + LPC32XX_TIMER_TCR); 83 + 84 + return 0; 85 + } 86 + 87 + static int lpc32xx_clkevt_oneshot(struct clock_event_device *evtdev) 88 + { 89 + /* 90 + * When using oneshot, we must also disable the timer 91 + * to wait for the first call to set_next_event(). 92 + */ 93 + return lpc32xx_clkevt_shutdown(evtdev); 94 + } 95 + 96 + static irqreturn_t lpc32xx_clock_event_handler(int irq, void *dev_id) 97 + { 98 + struct lpc32xx_clock_event_ddata *ddata = dev_id; 99 + 100 + /* Clear match on channel 0 */ 101 + writel_relaxed(LPC32XX_TIMER_IR_MR0INT, ddata->base + LPC32XX_TIMER_IR); 102 + 103 + ddata->evtdev.event_handler(&ddata->evtdev); 104 + 105 + return IRQ_HANDLED; 106 + } 107 + 108 + static struct lpc32xx_clock_event_ddata lpc32xx_clk_event_ddata = { 109 + .evtdev = { 110 + .name = "lpc3220 clockevent", 111 + .features = CLOCK_EVT_FEAT_ONESHOT, 112 + .rating = 300, 113 + .set_next_event = lpc32xx_clkevt_next_event, 114 + .set_state_shutdown = lpc32xx_clkevt_shutdown, 115 + .set_state_oneshot = lpc32xx_clkevt_oneshot, 116 + }, 117 + }; 118 + 119 + static int __init lpc32xx_clocksource_init(struct device_node *np) 120 + { 121 + void __iomem *base; 122 + unsigned long rate; 123 + struct clk *clk; 124 + int ret; 125 + 126 + clk = of_clk_get_by_name(np, "timerclk"); 127 + if (IS_ERR(clk)) { 128 + pr_err("clock get failed (%lu)\n", PTR_ERR(clk)); 129 + return PTR_ERR(clk); 130 + } 131 + 132 + ret = clk_prepare_enable(clk); 133 + if (ret) { 134 + pr_err("clock enable failed (%d)\n", ret); 135 + goto err_clk_enable; 136 + } 137 + 138 + base = of_iomap(np, 0); 139 + if (!base) { 140 + pr_err("unable to map registers\n"); 141 + ret = -EADDRNOTAVAIL; 142 + goto err_iomap; 143 + } 144 + 145 + /* 146 + * Disable and reset timer then set it to free running timer 147 + * mode (CTCR) with no prescaler (PR) or match operations (MCR). 148 + * After setup the timer is released from reset and enabled. 149 + */ 150 + writel_relaxed(LPC32XX_TIMER_TCR_CRST, base + LPC32XX_TIMER_TCR); 151 + writel_relaxed(0, base + LPC32XX_TIMER_PR); 152 + writel_relaxed(0, base + LPC32XX_TIMER_MCR); 153 + writel_relaxed(0, base + LPC32XX_TIMER_CTCR); 154 + writel_relaxed(LPC32XX_TIMER_TCR_CEN, base + LPC32XX_TIMER_TCR); 155 + 156 + rate = clk_get_rate(clk); 157 + ret = clocksource_mmio_init(base + LPC32XX_TIMER_TC, "lpc3220 timer", 158 + rate, 300, 32, clocksource_mmio_readl_up); 159 + if (ret) { 160 + pr_err("failed to init clocksource (%d)\n", ret); 161 + goto err_clocksource_init; 162 + } 163 + 164 + clocksource_timer_counter = base + LPC32XX_TIMER_TC; 165 + sched_clock_register(lpc32xx_read_sched_clock, 32, rate); 166 + 167 + return 0; 168 + 169 + err_clocksource_init: 170 + iounmap(base); 171 + err_iomap: 172 + clk_disable_unprepare(clk); 173 + err_clk_enable: 174 + clk_put(clk); 175 + return ret; 176 + } 177 + 178 + static int __init lpc32xx_clockevent_init(struct device_node *np) 179 + { 180 + void __iomem *base; 181 + unsigned long rate; 182 + struct clk *clk; 183 + int ret, irq; 184 + 185 + clk = of_clk_get_by_name(np, "timerclk"); 186 + if (IS_ERR(clk)) { 187 + pr_err("clock get failed (%lu)\n", PTR_ERR(clk)); 188 + return PTR_ERR(clk); 189 + } 190 + 191 + ret = clk_prepare_enable(clk); 192 + if (ret) { 193 + pr_err("clock enable failed (%d)\n", ret); 194 + goto err_clk_enable; 195 + } 196 + 197 + base = of_iomap(np, 0); 198 + if (!base) { 199 + pr_err("unable to map registers\n"); 200 + ret = -EADDRNOTAVAIL; 201 + goto err_iomap; 202 + } 203 + 204 + irq = irq_of_parse_and_map(np, 0); 205 + if (!irq) { 206 + pr_err("get irq failed\n"); 207 + ret = -ENOENT; 208 + goto err_irq; 209 + } 210 + 211 + /* 212 + * Disable timer and clear any pending interrupt (IR) on match 213 + * channel 0 (MR0). Configure a compare match value of 1 on MR0 214 + * and enable interrupt, reset on match and stop on match (MCR). 215 + */ 216 + writel_relaxed(0, base + LPC32XX_TIMER_TCR); 217 + writel_relaxed(0, base + LPC32XX_TIMER_CTCR); 218 + writel_relaxed(LPC32XX_TIMER_IR_MR0INT, base + LPC32XX_TIMER_IR); 219 + writel_relaxed(1, base + LPC32XX_TIMER_MR0); 220 + writel_relaxed(LPC32XX_TIMER_MCR_MR0I | LPC32XX_TIMER_MCR_MR0R | 221 + LPC32XX_TIMER_MCR_MR0S, base + LPC32XX_TIMER_MCR); 222 + 223 + rate = clk_get_rate(clk); 224 + lpc32xx_clk_event_ddata.base = base; 225 + clockevents_config_and_register(&lpc32xx_clk_event_ddata.evtdev, 226 + rate, 1, -1); 227 + 228 + ret = request_irq(irq, lpc32xx_clock_event_handler, 229 + IRQF_TIMER | IRQF_IRQPOLL, "lpc3220 clockevent", 230 + &lpc32xx_clk_event_ddata); 231 + if (ret) { 232 + pr_err("request irq failed\n"); 233 + goto err_irq; 234 + } 235 + 236 + return 0; 237 + 238 + err_irq: 239 + iounmap(base); 240 + err_iomap: 241 + clk_disable_unprepare(clk); 242 + err_clk_enable: 243 + clk_put(clk); 244 + return ret; 245 + } 246 + 247 + /* 248 + * This function asserts that we have exactly one clocksource and one 249 + * clock_event_device in the end. 250 + */ 251 + static void __init lpc32xx_timer_init(struct device_node *np) 252 + { 253 + static int has_clocksource, has_clockevent; 254 + int ret; 255 + 256 + if (!has_clocksource) { 257 + ret = lpc32xx_clocksource_init(np); 258 + if (!ret) { 259 + has_clocksource = 1; 260 + return; 261 + } 262 + } 263 + 264 + if (!has_clockevent) { 265 + ret = lpc32xx_clockevent_init(np); 266 + if (!ret) { 267 + has_clockevent = 1; 268 + return; 269 + } 270 + } 271 + } 272 + CLOCKSOURCE_OF_DECLARE(lpc32xx_timer, "nxp,lpc3220-timer", lpc32xx_timer_init);

+1 -1

drivers/clocksource/timer-integrator-ap.c

··· 166 166 struct device_node *sec_node; 167 167 168 168 base = of_io_request_and_map(node, 0, "integrator-timer"); 169 - if (!base) 169 + if (IS_ERR(base)) 170 170 return; 171 171 172 172 clk = of_clk_get(node, 0);

+184

drivers/clocksource/timer-stm32.c

··· 1 + /* 2 + * Copyright (C) Maxime Coquelin 2015 3 + * Author: Maxime Coquelin <mcoquelin.stm32@gmail.com> 4 + * License terms: GNU General Public License (GPL), version 2 5 + * 6 + * Inspired by time-efm32.c from Uwe Kleine-Koenig 7 + */ 8 + 9 + #include <linux/kernel.h> 10 + #include <linux/clocksource.h> 11 + #include <linux/clockchips.h> 12 + #include <linux/irq.h> 13 + #include <linux/interrupt.h> 14 + #include <linux/of.h> 15 + #include <linux/of_address.h> 16 + #include <linux/of_irq.h> 17 + #include <linux/clk.h> 18 + #include <linux/reset.h> 19 + 20 + #define TIM_CR1 0x00 21 + #define TIM_DIER 0x0c 22 + #define TIM_SR 0x10 23 + #define TIM_EGR 0x14 24 + #define TIM_PSC 0x28 25 + #define TIM_ARR 0x2c 26 + 27 + #define TIM_CR1_CEN BIT(0) 28 + #define TIM_CR1_OPM BIT(3) 29 + #define TIM_CR1_ARPE BIT(7) 30 + 31 + #define TIM_DIER_UIE BIT(0) 32 + 33 + #define TIM_SR_UIF BIT(0) 34 + 35 + #define TIM_EGR_UG BIT(0) 36 + 37 + struct stm32_clock_event_ddata { 38 + struct clock_event_device evtdev; 39 + unsigned periodic_top; 40 + void __iomem *base; 41 + }; 42 + 43 + static void stm32_clock_event_set_mode(enum clock_event_mode mode, 44 + struct clock_event_device *evtdev) 45 + { 46 + struct stm32_clock_event_ddata *data = 47 + container_of(evtdev, struct stm32_clock_event_ddata, evtdev); 48 + void *base = data->base; 49 + 50 + switch (mode) { 51 + case CLOCK_EVT_MODE_PERIODIC: 52 + writel_relaxed(data->periodic_top, base + TIM_ARR); 53 + writel_relaxed(TIM_CR1_ARPE | TIM_CR1_CEN, base + TIM_CR1); 54 + break; 55 + 56 + case CLOCK_EVT_MODE_ONESHOT: 57 + default: 58 + writel_relaxed(0, base + TIM_CR1); 59 + break; 60 + } 61 + } 62 + 63 + static int stm32_clock_event_set_next_event(unsigned long evt, 64 + struct clock_event_device *evtdev) 65 + { 66 + struct stm32_clock_event_ddata *data = 67 + container_of(evtdev, struct stm32_clock_event_ddata, evtdev); 68 + 69 + writel_relaxed(evt, data->base + TIM_ARR); 70 + writel_relaxed(TIM_CR1_ARPE | TIM_CR1_OPM | TIM_CR1_CEN, 71 + data->base + TIM_CR1); 72 + 73 + return 0; 74 + } 75 + 76 + static irqreturn_t stm32_clock_event_handler(int irq, void *dev_id) 77 + { 78 + struct stm32_clock_event_ddata *data = dev_id; 79 + 80 + writel_relaxed(0, data->base + TIM_SR); 81 + 82 + data->evtdev.event_handler(&data->evtdev); 83 + 84 + return IRQ_HANDLED; 85 + } 86 + 87 + static struct stm32_clock_event_ddata clock_event_ddata = { 88 + .evtdev = { 89 + .name = "stm32 clockevent", 90 + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, 91 + .set_mode = stm32_clock_event_set_mode, 92 + .set_next_event = stm32_clock_event_set_next_event, 93 + .rating = 200, 94 + }, 95 + }; 96 + 97 + static void __init stm32_clockevent_init(struct device_node *np) 98 + { 99 + struct stm32_clock_event_ddata *data = &clock_event_ddata; 100 + struct clk *clk; 101 + struct reset_control *rstc; 102 + unsigned long rate, max_delta; 103 + int irq, ret, bits, prescaler = 1; 104 + 105 + clk = of_clk_get(np, 0); 106 + if (IS_ERR(clk)) { 107 + ret = PTR_ERR(clk); 108 + pr_err("failed to get clock for clockevent (%d)\n", ret); 109 + goto err_clk_get; 110 + } 111 + 112 + ret = clk_prepare_enable(clk); 113 + if (ret) { 114 + pr_err("failed to enable timer clock for clockevent (%d)\n", 115 + ret); 116 + goto err_clk_enable; 117 + } 118 + 119 + rate = clk_get_rate(clk); 120 + 121 + rstc = of_reset_control_get(np, NULL); 122 + if (!IS_ERR(rstc)) { 123 + reset_control_assert(rstc); 124 + reset_control_deassert(rstc); 125 + } 126 + 127 + data->base = of_iomap(np, 0); 128 + if (!data->base) { 129 + pr_err("failed to map registers for clockevent\n"); 130 + goto err_iomap; 131 + } 132 + 133 + irq = irq_of_parse_and_map(np, 0); 134 + if (!irq) { 135 + pr_err("%s: failed to get irq.\n", np->full_name); 136 + goto err_get_irq; 137 + } 138 + 139 + /* Detect whether the timer is 16 or 32 bits */ 140 + writel_relaxed(~0U, data->base + TIM_ARR); 141 + max_delta = readl_relaxed(data->base + TIM_ARR); 142 + if (max_delta == ~0U) { 143 + prescaler = 1; 144 + bits = 32; 145 + } else { 146 + prescaler = 1024; 147 + bits = 16; 148 + } 149 + writel_relaxed(0, data->base + TIM_ARR); 150 + 151 + writel_relaxed(prescaler - 1, data->base + TIM_PSC); 152 + writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR); 153 + writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); 154 + writel_relaxed(0, data->base + TIM_SR); 155 + 156 + data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ); 157 + 158 + clockevents_config_and_register(&data->evtdev, 159 + DIV_ROUND_CLOSEST(rate, prescaler), 160 + 0x1, max_delta); 161 + 162 + ret = request_irq(irq, stm32_clock_event_handler, IRQF_TIMER, 163 + "stm32 clockevent", data); 164 + if (ret) { 165 + pr_err("%s: failed to request irq.\n", np->full_name); 166 + goto err_get_irq; 167 + } 168 + 169 + pr_info("%s: STM32 clockevent driver initialized (%d bits)\n", 170 + np->full_name, bits); 171 + 172 + return; 173 + 174 + err_get_irq: 175 + iounmap(data->base); 176 + err_iomap: 177 + clk_disable_unprepare(clk); 178 + err_clk_enable: 179 + clk_put(clk); 180 + err_clk_get: 181 + return; 182 + } 183 + 184 + CLOCKSOURCE_OF_DECLARE(stm32, "st,stm32-timer", stm32_clockevent_init);

+1 -1

drivers/clocksource/timer-sun5i.c

··· 324 324 int irq; 325 325 326 326 timer_base = of_io_request_and_map(node, 0, of_node_full_name(node)); 327 - if (!timer_base) 327 + if (IS_ERR(timer_base)) 328 328 panic("Can't map registers"); 329 329 330 330 irq = irq_of_parse_and_map(node, 0);

-1

drivers/power/reset/ltc2952-poweroff.c

··· 158 158 HRTIMER_MODE_REL); 159 159 } else { 160 160 hrtimer_cancel(&data->timer_trigger); 161 - /* omitting return value check, timer should have been valid */ 162 161 } 163 162 return IRQ_HANDLED; 164 163 }

+8 -8

fs/dcache.c

··· 322 322 } 323 323 324 324 /** 325 - * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups 325 + * dentry_rcuwalk_invalidate - invalidate in-progress rcu-walk lookups 326 326 * @dentry: the target dentry 327 327 * After this call, in-progress rcu-walk path lookup will fail. This 328 328 * should be called after unhashing, and after changing d_inode (if 329 329 * the dentry has not already been unhashed). 330 330 */ 331 - static inline void dentry_rcuwalk_barrier(struct dentry *dentry) 331 + static inline void dentry_rcuwalk_invalidate(struct dentry *dentry) 332 332 { 333 - assert_spin_locked(&dentry->d_lock); 334 - /* Go through a barrier */ 335 - write_seqcount_barrier(&dentry->d_seq); 333 + lockdep_assert_held(&dentry->d_lock); 334 + /* Go through am invalidation barrier */ 335 + write_seqcount_invalidate(&dentry->d_seq); 336 336 } 337 337 338 338 /* ··· 372 372 struct inode *inode = dentry->d_inode; 373 373 __d_clear_type_and_inode(dentry); 374 374 hlist_del_init(&dentry->d_u.d_alias); 375 - dentry_rcuwalk_barrier(dentry); 375 + dentry_rcuwalk_invalidate(dentry); 376 376 spin_unlock(&dentry->d_lock); 377 377 spin_unlock(&inode->i_lock); 378 378 if (!inode->i_nlink) ··· 494 494 __hlist_bl_del(&dentry->d_hash); 495 495 dentry->d_hash.pprev = NULL; 496 496 hlist_bl_unlock(b); 497 - dentry_rcuwalk_barrier(dentry); 497 + dentry_rcuwalk_invalidate(dentry); 498 498 } 499 499 } 500 500 EXPORT_SYMBOL(__d_drop); ··· 1752 1752 if (inode) 1753 1753 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); 1754 1754 __d_set_inode_and_type(dentry, inode, add_flags); 1755 - dentry_rcuwalk_barrier(dentry); 1755 + dentry_rcuwalk_invalidate(dentry); 1756 1756 spin_unlock(&dentry->d_lock); 1757 1757 fsnotify_d_instantiate(dentry, inode); 1758 1758 }

+2 -2

include/linux/alarmtimer.h

··· 43 43 44 44 void alarm_init(struct alarm *alarm, enum alarmtimer_type type, 45 45 enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); 46 - int alarm_start(struct alarm *alarm, ktime_t start); 47 - int alarm_start_relative(struct alarm *alarm, ktime_t start); 46 + void alarm_start(struct alarm *alarm, ktime_t start); 47 + void alarm_start_relative(struct alarm *alarm, ktime_t start); 48 48 void alarm_restart(struct alarm *alarm); 49 49 int alarm_try_to_cancel(struct alarm *alarm); 50 50 int alarm_cancel(struct alarm *alarm);

+34 -3

include/linux/clockchips.h

··· 37 37 * reached from DETACHED or SHUTDOWN. 38 38 * ONESHOT: Device is programmed to generate event only once. Can be reached 39 39 * from DETACHED or SHUTDOWN. 40 + * ONESHOT_STOPPED: Device was programmed in ONESHOT mode and is temporarily 41 + * stopped. 40 42 */ 41 43 enum clock_event_state { 42 44 CLOCK_EVT_STATE_DETACHED, 43 45 CLOCK_EVT_STATE_SHUTDOWN, 44 46 CLOCK_EVT_STATE_PERIODIC, 45 47 CLOCK_EVT_STATE_ONESHOT, 48 + CLOCK_EVT_STATE_ONESHOT_STOPPED, 46 49 }; 47 50 48 51 /* ··· 87 84 * @mult: nanosecond to cycles multiplier 88 85 * @shift: nanoseconds to cycles divisor (power of two) 89 86 * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE 90 - * @state: current state of the device, assigned by the core code 87 + * @state_use_accessors:current state of the device, assigned by the core code 91 88 * @features: features 92 89 * @retries: number of forced programming retries 93 90 * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. 94 91 * @set_state_periodic: switch state to periodic, if !set_mode 95 92 * @set_state_oneshot: switch state to oneshot, if !set_mode 93 + * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode 96 94 * @set_state_shutdown: switch state to shutdown, if !set_mode 97 95 * @tick_resume: resume clkevt device, if !set_mode 98 96 * @broadcast: function to broadcast events ··· 117 113 u32 mult; 118 114 u32 shift; 119 115 enum clock_event_mode mode; 120 - enum clock_event_state state; 116 + enum clock_event_state state_use_accessors; 121 117 unsigned int features; 122 118 unsigned long retries; 123 119 ··· 125 121 * State transition callback(s): Only one of the two groups should be 126 122 * defined: 127 123 * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. 128 - * - set_state_{shutdown|periodic|oneshot}(), tick_resume(). 124 + * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume(). 129 125 */ 130 126 void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); 131 127 int (*set_state_periodic)(struct clock_event_device *); 132 128 int (*set_state_oneshot)(struct clock_event_device *); 129 + int (*set_state_oneshot_stopped)(struct clock_event_device *); 133 130 int (*set_state_shutdown)(struct clock_event_device *); 134 131 int (*tick_resume)(struct clock_event_device *); 135 132 ··· 148 143 struct list_head list; 149 144 struct module *owner; 150 145 } ____cacheline_aligned; 146 + 147 + /* Helpers to verify state of a clockevent device */ 148 + static inline bool clockevent_state_detached(struct clock_event_device *dev) 149 + { 150 + return dev->state_use_accessors == CLOCK_EVT_STATE_DETACHED; 151 + } 152 + 153 + static inline bool clockevent_state_shutdown(struct clock_event_device *dev) 154 + { 155 + return dev->state_use_accessors == CLOCK_EVT_STATE_SHUTDOWN; 156 + } 157 + 158 + static inline bool clockevent_state_periodic(struct clock_event_device *dev) 159 + { 160 + return dev->state_use_accessors == CLOCK_EVT_STATE_PERIODIC; 161 + } 162 + 163 + static inline bool clockevent_state_oneshot(struct clock_event_device *dev) 164 + { 165 + return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT; 166 + } 167 + 168 + static inline bool clockevent_state_oneshot_stopped(struct clock_event_device *dev) 169 + { 170 + return dev->state_use_accessors == CLOCK_EVT_STATE_ONESHOT_STOPPED; 171 + } 151 172 152 173 /* 153 174 * Calculate a multiplication factor for scaled math, which is used to convert

-1

include/linux/clocksource.h

··· 181 181 182 182 extern int clocksource_unregister(struct clocksource*); 183 183 extern void clocksource_touch_watchdog(void); 184 - extern struct clocksource* clocksource_get_next(void); 185 184 extern void clocksource_change_rating(struct clocksource *cs, int rating); 186 185 extern void clocksource_suspend(void); 187 186 extern void clocksource_resume(void);

+90 -77

include/linux/hrtimer.h

··· 53 53 * 54 54 * 0x00 inactive 55 55 * 0x01 enqueued into rbtree 56 - * 0x02 callback function running 57 - * 0x04 timer is migrated to another cpu 58 56 * 59 - * Special cases: 60 - * 0x03 callback function running and enqueued 61 - * (was requeued on another CPU) 62 - * 0x05 timer was migrated on CPU hotunplug 57 + * The callback state is not part of the timer->state because clearing it would 58 + * mean touching the timer after the callback, this makes it impossible to free 59 + * the timer from the callback function. 63 60 * 64 - * The "callback function running and enqueued" status is only possible on 65 - * SMP. It happens for example when a posix timer expired and the callback 61 + * Therefore we track the callback state in: 62 + * 63 + * timer->base->cpu_base->running == timer 64 + * 65 + * On SMP it is possible to have a "callback function running and enqueued" 66 + * status. It happens for example when a posix timer expired and the callback 66 67 * queued a signal. Between dropping the lock which protects the posix timer 67 68 * and reacquiring the base lock of the hrtimer, another CPU can deliver the 68 - * signal and rearm the timer. We have to preserve the callback running state, 69 - * as otherwise the timer could be removed before the softirq code finishes the 70 - * the handling of the timer. 71 - * 72 - * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state 73 - * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This 74 - * also affects HRTIMER_STATE_MIGRATE where the preservation is not 75 - * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is 76 - * enqueued on the new cpu. 69 + * signal and rearm the timer. 77 70 * 78 71 * All state transitions are protected by cpu_base->lock. 79 72 */ 80 73 #define HRTIMER_STATE_INACTIVE 0x00 81 74 #define HRTIMER_STATE_ENQUEUED 0x01 82 - #define HRTIMER_STATE_CALLBACK 0x02 83 - #define HRTIMER_STATE_MIGRATE 0x04 84 75 85 76 /** 86 77 * struct hrtimer - the basic hrtimer structure ··· 121 130 struct task_struct *task; 122 131 }; 123 132 133 + #ifdef CONFIG_64BIT 134 + # define HRTIMER_CLOCK_BASE_ALIGN 64 135 + #else 136 + # define HRTIMER_CLOCK_BASE_ALIGN 32 137 + #endif 138 + 124 139 /** 125 140 * struct hrtimer_clock_base - the timer base for a specific clock 126 141 * @cpu_base: per cpu clock base ··· 134 137 * timer to a base on another cpu. 135 138 * @clockid: clock id for per_cpu support 136 139 * @active: red black tree root node for the active timers 137 - * @resolution: the resolution of the clock, in nanoseconds 138 140 * @get_time: function to retrieve the current time of the clock 139 - * @softirq_time: the time when running the hrtimer queue in the softirq 140 141 * @offset: offset of this clock to the monotonic base 141 142 */ 142 143 struct hrtimer_clock_base { ··· 142 147 int index; 143 148 clockid_t clockid; 144 149 struct timerqueue_head active; 145 - ktime_t resolution; 146 150 ktime_t (*get_time)(void); 147 - ktime_t softirq_time; 148 151 ktime_t offset; 149 - }; 152 + } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); 150 153 151 154 enum hrtimer_base_type { 152 155 HRTIMER_BASE_MONOTONIC, ··· 158 165 * struct hrtimer_cpu_base - the per cpu clock bases 159 166 * @lock: lock protecting the base and associated clock bases 160 167 * and timers 168 + * @seq: seqcount around __run_hrtimer 169 + * @running: pointer to the currently running hrtimer 161 170 * @cpu: cpu number 162 171 * @active_bases: Bitfield to mark bases with active timers 163 - * @clock_was_set: Indicates that clock was set from irq context. 172 + * @clock_was_set_seq: Sequence counter of clock was set events 173 + * @migration_enabled: The migration of hrtimers to other cpus is enabled 174 + * @nohz_active: The nohz functionality is enabled 164 175 * @expires_next: absolute time of the next event which was scheduled 165 176 * via clock_set_next_event() 177 + * @next_timer: Pointer to the first expiring timer 166 178 * @in_hrtirq: hrtimer_interrupt() is currently executing 167 179 * @hres_active: State of high resolution mode 168 180 * @hang_detected: The last hrtimer interrupt detected a hang ··· 176 178 * @nr_hangs: Total number of hrtimer interrupt hangs 177 179 * @max_hang_time: Maximum time spent in hrtimer_interrupt 178 180 * @clock_base: array of clock bases for this cpu 181 + * 182 + * Note: next_timer is just an optimization for __remove_hrtimer(). 183 + * Do not dereference the pointer because it is not reliable on 184 + * cross cpu removals. 179 185 */ 180 186 struct hrtimer_cpu_base { 181 187 raw_spinlock_t lock; 188 + seqcount_t seq; 189 + struct hrtimer *running; 182 190 unsigned int cpu; 183 191 unsigned int active_bases; 184 - unsigned int clock_was_set; 192 + unsigned int clock_was_set_seq; 193 + bool migration_enabled; 194 + bool nohz_active; 185 195 #ifdef CONFIG_HIGH_RES_TIMERS 196 + unsigned int in_hrtirq : 1, 197 + hres_active : 1, 198 + hang_detected : 1; 186 199 ktime_t expires_next; 187 - int in_hrtirq; 188 - int hres_active; 189 - int hang_detected; 190 - unsigned long nr_events; 191 - unsigned long nr_retries; 192 - unsigned long nr_hangs; 193 - ktime_t max_hang_time; 200 + struct hrtimer *next_timer; 201 + unsigned int nr_events; 202 + unsigned int nr_retries; 203 + unsigned int nr_hangs; 204 + unsigned int max_hang_time; 194 205 #endif 195 206 struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; 196 - }; 207 + } ____cacheline_aligned; 197 208 198 209 static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) 199 210 { 211 + BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN); 212 + 200 213 timer->node.expires = time; 201 214 timer->_softexpires = time; 202 215 } ··· 271 262 return ktime_sub(timer->node.expires, timer->base->get_time()); 272 263 } 273 264 274 - #ifdef CONFIG_HIGH_RES_TIMERS 275 - struct clock_event_device; 276 - 277 - extern void hrtimer_interrupt(struct clock_event_device *dev); 278 - 279 - /* 280 - * In high resolution mode the time reference must be read accurate 281 - */ 282 265 static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) 283 266 { 284 267 return timer->base->get_time(); 285 268 } 269 + 270 + #ifdef CONFIG_HIGH_RES_TIMERS 271 + struct clock_event_device; 272 + 273 + extern void hrtimer_interrupt(struct clock_event_device *dev); 286 274 287 275 static inline int hrtimer_is_hres_active(struct hrtimer *timer) 288 276 { ··· 301 295 302 296 extern void clock_was_set_delayed(void); 303 297 298 + extern unsigned int hrtimer_resolution; 299 + 304 300 #else 305 301 306 302 # define MONOTONIC_RES_NSEC LOW_RES_NSEC 307 303 # define KTIME_MONOTONIC_RES KTIME_LOW_RES 308 304 309 - static inline void hrtimer_peek_ahead_timers(void) { } 305 + #define hrtimer_resolution (unsigned int)LOW_RES_NSEC 310 306 311 - /* 312 - * In non high resolution mode the time reference is taken from 313 - * the base softirq time variable. 314 - */ 315 - static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) 316 - { 317 - return timer->base->softirq_time; 318 - } 307 + static inline void hrtimer_peek_ahead_timers(void) { } 319 308 320 309 static inline int hrtimer_is_hres_active(struct hrtimer *timer) 321 310 { ··· 354 353 #endif 355 354 356 355 /* Basic timer operations: */ 357 - extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, 358 - const enum hrtimer_mode mode); 359 - extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 356 + extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 360 357 unsigned long range_ns, const enum hrtimer_mode mode); 361 - extern int 362 - __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 363 - unsigned long delta_ns, 364 - const enum hrtimer_mode mode, int wakeup); 358 + 359 + /** 360 + * hrtimer_start - (re)start an hrtimer on the current CPU 361 + * @timer: the timer to be added 362 + * @tim: expiry time 363 + * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or 364 + * relative (HRTIMER_MODE_REL) 365 + */ 366 + static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, 367 + const enum hrtimer_mode mode) 368 + { 369 + hrtimer_start_range_ns(timer, tim, 0, mode); 370 + } 365 371 366 372 extern int hrtimer_cancel(struct hrtimer *timer); 367 373 extern int hrtimer_try_to_cancel(struct hrtimer *timer); 368 374 369 - static inline int hrtimer_start_expires(struct hrtimer *timer, 370 - enum hrtimer_mode mode) 375 + static inline void hrtimer_start_expires(struct hrtimer *timer, 376 + enum hrtimer_mode mode) 371 377 { 372 378 unsigned long delta; 373 379 ktime_t soft, hard; 374 380 soft = hrtimer_get_softexpires(timer); 375 381 hard = hrtimer_get_expires(timer); 376 382 delta = ktime_to_ns(ktime_sub(hard, soft)); 377 - return hrtimer_start_range_ns(timer, soft, delta, mode); 383 + hrtimer_start_range_ns(timer, soft, delta, mode); 378 384 } 379 385 380 - static inline int hrtimer_restart(struct hrtimer *timer) 386 + static inline void hrtimer_restart(struct hrtimer *timer) 381 387 { 382 - return hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 388 + hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 383 389 } 384 390 385 391 /* Query timers: */ 386 392 extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); 387 - extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); 388 393 389 - extern ktime_t hrtimer_get_next_event(void); 394 + extern u64 hrtimer_get_next_event(void); 390 395 391 - /* 392 - * A timer is active, when it is enqueued into the rbtree or the 393 - * callback function is running or it's in the state of being migrated 394 - * to another cpu. 395 - */ 396 - static inline int hrtimer_active(const struct hrtimer *timer) 397 - { 398 - return timer->state != HRTIMER_STATE_INACTIVE; 399 - } 396 + extern bool hrtimer_active(const struct hrtimer *timer); 400 397 401 398 /* 402 399 * Helper function to check, whether the timer is on one of the queues ··· 410 411 */ 411 412 static inline int hrtimer_callback_running(struct hrtimer *timer) 412 413 { 413 - return timer->state & HRTIMER_STATE_CALLBACK; 414 + return timer->base->cpu_base->running == timer; 414 415 } 415 416 416 417 /* Forward a hrtimer so it expires after now: */ 417 418 extern u64 418 419 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); 419 420 420 - /* Forward a hrtimer so it expires after the hrtimer's current now */ 421 + /** 422 + * hrtimer_forward_now - forward the timer expiry so it expires after now 423 + * @timer: hrtimer to forward 424 + * @interval: the interval to forward 425 + * 426 + * Forward the timer expiry so it will expire after the current time 427 + * of the hrtimer clock base. Returns the number of overruns. 428 + * 429 + * Can be safely called from the callback function of @timer. If 430 + * called from other contexts @timer must neither be enqueued nor 431 + * running the callback and the caller needs to take care of 432 + * serialization. 433 + * 434 + * Note: This only updates the timer expiry value and does not requeue 435 + * the timer. 436 + */ 421 437 static inline u64 hrtimer_forward_now(struct hrtimer *timer, 422 438 ktime_t interval) 423 439 { ··· 457 443 458 444 /* Soft interrupt function to run the hrtimer queues: */ 459 445 extern void hrtimer_run_queues(void); 460 - extern void hrtimer_run_pending(void); 461 446 462 447 /* Bootup initialization: */ 463 448 extern void __init hrtimers_init(void);

+5 -4

include/linux/interrupt.h

··· 413 413 BLOCK_IOPOLL_SOFTIRQ, 414 414 TASKLET_SOFTIRQ, 415 415 SCHED_SOFTIRQ, 416 - HRTIMER_SOFTIRQ, 416 + HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the 417 + numbering. Sigh! */ 417 418 RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ 418 419 419 420 NR_SOFTIRQS ··· 593 592 clockid_t which_clock, enum hrtimer_mode mode); 594 593 595 594 static inline 596 - int tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, 597 - const enum hrtimer_mode mode) 595 + void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, 596 + const enum hrtimer_mode mode) 598 597 { 599 - return hrtimer_start(&ttimer->timer, time, mode); 598 + hrtimer_start(&ttimer->timer, time, mode); 600 599 } 601 600 602 601 static inline

+128 -2

include/linux/jiffies.h

··· 7 7 #include <linux/time.h> 8 8 #include <linux/timex.h> 9 9 #include <asm/param.h> /* for HZ */ 10 + #include <generated/timeconst.h> 10 11 11 12 /* 12 13 * The following defines establish the engineering parameters of the PLL ··· 289 288 return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC; 290 289 } 291 290 292 - extern unsigned long msecs_to_jiffies(const unsigned int m); 293 - extern unsigned long usecs_to_jiffies(const unsigned int u); 291 + extern unsigned long __msecs_to_jiffies(const unsigned int m); 292 + #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) 293 + /* 294 + * HZ is equal to or smaller than 1000, and 1000 is a nice round 295 + * multiple of HZ, divide with the factor between them, but round 296 + * upwards: 297 + */ 298 + static inline unsigned long _msecs_to_jiffies(const unsigned int m) 299 + { 300 + return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); 301 + } 302 + #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) 303 + /* 304 + * HZ is larger than 1000, and HZ is a nice round multiple of 1000 - 305 + * simply multiply with the factor between them. 306 + * 307 + * But first make sure the multiplication result cannot overflow: 308 + */ 309 + static inline unsigned long _msecs_to_jiffies(const unsigned int m) 310 + { 311 + if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) 312 + return MAX_JIFFY_OFFSET; 313 + return m * (HZ / MSEC_PER_SEC); 314 + } 315 + #else 316 + /* 317 + * Generic case - multiply, round and divide. But first check that if 318 + * we are doing a net multiplication, that we wouldn't overflow: 319 + */ 320 + static inline unsigned long _msecs_to_jiffies(const unsigned int m) 321 + { 322 + if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) 323 + return MAX_JIFFY_OFFSET; 324 + 325 + return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) >> MSEC_TO_HZ_SHR32; 326 + } 327 + #endif 328 + /** 329 + * msecs_to_jiffies: - convert milliseconds to jiffies 330 + * @m: time in milliseconds 331 + * 332 + * conversion is done as follows: 333 + * 334 + * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET) 335 + * 336 + * - 'too large' values [that would result in larger than 337 + * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. 338 + * 339 + * - all other values are converted to jiffies by either multiplying 340 + * the input value by a factor or dividing it with a factor and 341 + * handling any 32-bit overflows. 342 + * for the details see __msecs_to_jiffies() 343 + * 344 + * msecs_to_jiffies() checks for the passed in value being a constant 345 + * via __builtin_constant_p() allowing gcc to eliminate most of the 346 + * code, __msecs_to_jiffies() is called if the value passed does not 347 + * allow constant folding and the actual conversion must be done at 348 + * runtime. 349 + * the HZ range specific helpers _msecs_to_jiffies() are called both 350 + * directly here and from __msecs_to_jiffies() in the case where 351 + * constant folding is not possible. 352 + */ 353 + static inline unsigned long msecs_to_jiffies(const unsigned int m) 354 + { 355 + if (__builtin_constant_p(m)) { 356 + if ((int)m < 0) 357 + return MAX_JIFFY_OFFSET; 358 + return _msecs_to_jiffies(m); 359 + } else { 360 + return __msecs_to_jiffies(m); 361 + } 362 + } 363 + 364 + extern unsigned long __usecs_to_jiffies(const unsigned int u); 365 + #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) 366 + static inline unsigned long _usecs_to_jiffies(const unsigned int u) 367 + { 368 + return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ); 369 + } 370 + #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) 371 + static inline unsigned long _usecs_to_jiffies(const unsigned int u) 372 + { 373 + return u * (HZ / USEC_PER_SEC); 374 + } 375 + static inline unsigned long _usecs_to_jiffies(const unsigned int u) 376 + { 377 + #else 378 + static inline unsigned long _usecs_to_jiffies(const unsigned int u) 379 + { 380 + return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32) 381 + >> USEC_TO_HZ_SHR32; 382 + } 383 + #endif 384 + 385 + /** 386 + * usecs_to_jiffies: - convert microseconds to jiffies 387 + * @u: time in microseconds 388 + * 389 + * conversion is done as follows: 390 + * 391 + * - 'too large' values [that would result in larger than 392 + * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. 393 + * 394 + * - all other values are converted to jiffies by either multiplying 395 + * the input value by a factor or dividing it with a factor and 396 + * handling any 32-bit overflows as for msecs_to_jiffies. 397 + * 398 + * usecs_to_jiffies() checks for the passed in value being a constant 399 + * via __builtin_constant_p() allowing gcc to eliminate most of the 400 + * code, __usecs_to_jiffies() is called if the value passed does not 401 + * allow constant folding and the actual conversion must be done at 402 + * runtime. 403 + * the HZ range specific helpers _usecs_to_jiffies() are called both 404 + * directly here and from __msecs_to_jiffies() in the case where 405 + * constant folding is not possible. 406 + */ 407 + static inline unsigned long usecs_to_jiffies(const unsigned int u) 408 + { 409 + if (__builtin_constant_p(u)) { 410 + if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET)) 411 + return MAX_JIFFY_OFFSET; 412 + return _usecs_to_jiffies(u); 413 + } else { 414 + return __usecs_to_jiffies(u); 415 + } 416 + } 417 + 294 418 extern unsigned long timespec_to_jiffies(const struct timespec *value); 295 419 extern void jiffies_to_timespec(const unsigned long jiffies, 296 420 struct timespec *value);

+4

include/linux/perf_event.h

··· 562 562 struct perf_event_context *task_ctx; 563 563 int active_oncpu; 564 564 int exclusive; 565 + 566 + raw_spinlock_t hrtimer_lock; 565 567 struct hrtimer hrtimer; 566 568 ktime_t hrtimer_interval; 569 + unsigned int hrtimer_active; 570 + 567 571 struct pmu *unique_pmu; 568 572 struct perf_cgroup *cgrp; 569 573 };

+4 -2

include/linux/rcupdate.h

··· 44 44 #include <linux/debugobjects.h> 45 45 #include <linux/bug.h> 46 46 #include <linux/compiler.h> 47 + #include <linux/ktime.h> 48 + 47 49 #include <asm/barrier.h> 48 50 49 51 extern int rcu_expedited; /* for sysctl */ ··· 1102 1100 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) 1103 1101 1104 1102 #ifdef CONFIG_TINY_RCU 1105 - static inline int rcu_needs_cpu(unsigned long *delta_jiffies) 1103 + static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) 1106 1104 { 1107 - *delta_jiffies = ULONG_MAX; 1105 + *nextevt = KTIME_MAX; 1108 1106 return 0; 1109 1107 } 1110 1108 #endif /* #ifdef CONFIG_TINY_RCU */

+1 -1

include/linux/rcutree.h

··· 31 31 #define __LINUX_RCUTREE_H 32 32 33 33 void rcu_note_context_switch(void); 34 - int rcu_needs_cpu(unsigned long *delta_jiffies); 34 + int rcu_needs_cpu(u64 basem, u64 *nextevt); 35 35 void rcu_cpu_stall_reset(void); 36 36 37 37 /*

+1 -5

include/linux/sched.h

··· 345 345 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 346 346 extern void nohz_balance_enter_idle(int cpu); 347 347 extern void set_cpu_sd_state_idle(void); 348 - extern int get_nohz_timer_target(int pinned); 348 + extern int get_nohz_timer_target(void); 349 349 #else 350 350 static inline void nohz_balance_enter_idle(int cpu) { } 351 351 static inline void set_cpu_sd_state_idle(void) { } 352 - static inline int get_nohz_timer_target(int pinned) 353 - { 354 - return smp_processor_id(); 355 - } 356 352 #endif 357 353 358 354 /*

-12

include/linux/sched/sysctl.h

··· 57 57 extern unsigned int sysctl_sched_migration_cost; 58 58 extern unsigned int sysctl_sched_nr_migrate; 59 59 extern unsigned int sysctl_sched_time_avg; 60 - extern unsigned int sysctl_timer_migration; 61 60 extern unsigned int sysctl_sched_shares_window; 62 61 63 62 int sched_proc_update_handler(struct ctl_table *table, int write, 64 63 void __user *buffer, size_t *length, 65 64 loff_t *ppos); 66 - #endif 67 - #ifdef CONFIG_SCHED_DEBUG 68 - static inline unsigned int get_sysctl_timer_migration(void) 69 - { 70 - return sysctl_timer_migration; 71 - } 72 - #else 73 - static inline unsigned int get_sysctl_timer_migration(void) 74 - { 75 - return 1; 76 - } 77 65 #endif 78 66 79 67 /*

+44 -3

include/linux/seqlock.h

··· 233 233 s->sequence++; 234 234 } 235 235 236 + /** 237 + * raw_write_seqcount_barrier - do a seq write barrier 238 + * @s: pointer to seqcount_t 239 + * 240 + * This can be used to provide an ordering guarantee instead of the 241 + * usual consistency guarantee. It is one wmb cheaper, because we can 242 + * collapse the two back-to-back wmb()s. 243 + * 244 + * seqcount_t seq; 245 + * bool X = true, Y = false; 246 + * 247 + * void read(void) 248 + * { 249 + * bool x, y; 250 + * 251 + * do { 252 + * int s = read_seqcount_begin(&seq); 253 + * 254 + * x = X; y = Y; 255 + * 256 + * } while (read_seqcount_retry(&seq, s)); 257 + * 258 + * BUG_ON(!x && !y); 259 + * } 260 + * 261 + * void write(void) 262 + * { 263 + * Y = true; 264 + * 265 + * raw_write_seqcount_barrier(seq); 266 + * 267 + * X = false; 268 + * } 269 + */ 270 + static inline void raw_write_seqcount_barrier(seqcount_t *s) 271 + { 272 + s->sequence++; 273 + smp_wmb(); 274 + s->sequence++; 275 + } 276 + 236 277 /* 237 278 * raw_write_seqcount_latch - redirect readers to even/odd copy 238 279 * @s: pointer to seqcount_t ··· 307 266 } 308 267 309 268 /** 310 - * write_seqcount_barrier - invalidate in-progress read-side seq operations 269 + * write_seqcount_invalidate - invalidate in-progress read-side seq operations 311 270 * @s: pointer to seqcount_t 312 271 * 313 - * After write_seqcount_barrier, no read-side seq operations will complete 272 + * After write_seqcount_invalidate, no read-side seq operations will complete 314 273 * successfully and see data older than this. 315 274 */ 316 - static inline void write_seqcount_barrier(seqcount_t *s) 275 + static inline void write_seqcount_invalidate(seqcount_t *s) 317 276 { 318 277 smp_wmb(); 319 278 s->sequence+=2;

+2

include/linux/time64.h

··· 2 2 #define _LINUX_TIME64_H 3 3 4 4 #include <uapi/linux/time.h> 5 + #include <linux/math64.h> 5 6 6 7 typedef __s64 time64_t; 7 8 ··· 29 28 #define FSEC_PER_SEC 1000000000000000LL 30 29 31 30 /* Located here for timespec[64]_valid_strict */ 31 + #define TIME64_MAX ((s64)~((u64)1 << 63)) 32 32 #define KTIME_MAX ((s64)~((u64)1 << 63)) 33 33 #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) 34 34

+19

include/linux/timekeeper_internal.h

··· 49 49 * @offs_boot: Offset clock monotonic -> clock boottime 50 50 * @offs_tai: Offset clock monotonic -> clock tai 51 51 * @tai_offset: The current UTC to TAI offset in seconds 52 + * @clock_was_set_seq: The sequence number of clock was set events 53 + * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second 52 54 * @raw_time: Monotonic raw base time in timespec64 format 53 55 * @cycle_interval: Number of clock cycles in one NTP interval 54 56 * @xtime_interval: Number of clock shifted nano seconds in one NTP ··· 62 60 * shifted nano seconds. 63 61 * @ntp_error_shift: Shift conversion between clock shifted nano seconds and 64 62 * ntp shifted nano seconds. 63 + * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) 64 + * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) 65 + * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) 65 66 * 66 67 * Note: For timespec(64) based interfaces wall_to_monotonic is what 67 68 * we need to add to xtime (or xtime corrected for sub jiffie times) ··· 90 85 ktime_t offs_boot; 91 86 ktime_t offs_tai; 92 87 s32 tai_offset; 88 + unsigned int clock_was_set_seq; 89 + ktime_t next_leap_ktime; 93 90 struct timespec64 raw_time; 94 91 95 92 /* The following members are for timekeeping internal use */ ··· 111 104 s64 ntp_error; 112 105 u32 ntp_error_shift; 113 106 u32 ntp_err_mult; 107 + #ifdef CONFIG_DEBUG_TIMEKEEPING 108 + long last_warning; 109 + /* 110 + * These simple flag variables are managed 111 + * without locks, which is racy, but they are 112 + * ok since we don't really care about being 113 + * super precise about how many events were 114 + * seen, just that a problem was observed. 115 + */ 116 + int underflow_seen; 117 + int overflow_seen; 118 + #endif 114 119 }; 115 120 116 121 #ifdef CONFIG_GENERIC_TIME_VSYSCALL

+1 -1

include/linux/timekeeping.h

··· 163 163 extern ktime_t ktime_get_with_offset(enum tk_offsets offs); 164 164 extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs); 165 165 extern ktime_t ktime_get_raw(void); 166 + extern u32 ktime_get_resolution_ns(void); 166 167 167 168 /** 168 169 * ktime_get_real - get the real (wall-) time in ktime_t format ··· 267 266 268 267 extern void read_persistent_clock(struct timespec *ts); 269 268 extern void read_persistent_clock64(struct timespec64 *ts); 270 - extern void read_boot_clock(struct timespec *ts); 271 269 extern void read_boot_clock64(struct timespec64 *ts); 272 270 extern int update_persistent_clock(struct timespec now); 273 271 extern int update_persistent_clock64(struct timespec64 now);

+28 -35

include/linux/timer.h

··· 14 14 * All fields that change during normal runtime grouped to the 15 15 * same cacheline 16 16 */ 17 - struct list_head entry; 18 - unsigned long expires; 19 - struct tvec_base *base; 20 - 21 - void (*function)(unsigned long); 22 - unsigned long data; 23 - 24 - int slack; 17 + struct hlist_node entry; 18 + unsigned long expires; 19 + void (*function)(unsigned long); 20 + unsigned long data; 21 + u32 flags; 22 + int slack; 25 23 26 24 #ifdef CONFIG_TIMER_STATS 27 - int start_pid; 28 - void *start_site; 29 - char start_comm[16]; 25 + int start_pid; 26 + void *start_site; 27 + char start_comm[16]; 30 28 #endif 31 29 #ifdef CONFIG_LOCKDEP 32 - struct lockdep_map lockdep_map; 30 + struct lockdep_map lockdep_map; 33 31 #endif 34 32 }; 35 - 36 - extern struct tvec_base boot_tvec_bases; 37 33 38 34 #ifdef CONFIG_LOCKDEP 39 35 /* ··· 45 49 #endif 46 50 47 51 /* 48 - * Note that all tvec_bases are at least 4 byte aligned and lower two bits 49 - * of base in timer_list is guaranteed to be zero. Use them for flags. 50 - * 51 52 * A deferrable timer will work normally when the system is busy, but 52 53 * will not cause a CPU to come out of idle just to service it; instead, 53 54 * the timer will be serviced when the CPU eventually wakes up with a ··· 58 65 * workqueue locking issues. It's not meant for executing random crap 59 66 * with interrupts disabled. Abuse is monitored! 60 67 */ 61 - #define TIMER_DEFERRABLE 0x1LU 62 - #define TIMER_IRQSAFE 0x2LU 63 - 64 - #define TIMER_FLAG_MASK 0x3LU 68 + #define TIMER_CPUMASK 0x0007FFFF 69 + #define TIMER_MIGRATING 0x00080000 70 + #define TIMER_BASEMASK (TIMER_CPUMASK | TIMER_MIGRATING) 71 + #define TIMER_DEFERRABLE 0x00100000 72 + #define TIMER_IRQSAFE 0x00200000 65 73 66 74 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ 67 - .entry = { .prev = TIMER_ENTRY_STATIC }, \ 75 + .entry = { .next = TIMER_ENTRY_STATIC }, \ 68 76 .function = (_function), \ 69 77 .expires = (_expires), \ 70 78 .data = (_data), \ 71 - .base = (void *)((unsigned long)&boot_tvec_bases + (_flags)), \ 79 + .flags = (_flags), \ 72 80 .slack = -1, \ 73 81 __TIMER_LOCKDEP_MAP_INITIALIZER( \ 74 82 __FILE__ ":" __stringify(__LINE__)) \ ··· 162 168 */ 163 169 static inline int timer_pending(const struct timer_list * timer) 164 170 { 165 - return timer->entry.next != NULL; 171 + return timer->entry.pprev != NULL; 166 172 } 167 173 168 174 extern void add_timer_on(struct timer_list *timer, int cpu); ··· 182 188 #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) 183 189 184 190 /* 185 - * Return when the next timer-wheel timeout occurs (in absolute jiffies), 186 - * locks the timer base and does the comparison against the given 187 - * jiffie. 188 - */ 189 - extern unsigned long get_next_timer_interrupt(unsigned long now); 190 - 191 - /* 192 191 * Timer-statistics info: 193 192 */ 194 193 #ifdef CONFIG_TIMER_STATS 195 194 196 195 extern int timer_stats_active; 197 196 198 - #define TIMER_STATS_FLAG_DEFERRABLE 0x1 199 - 200 197 extern void init_timer_stats(void); 201 198 202 199 extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, 203 - void *timerf, char *comm, 204 - unsigned int timer_flag); 200 + void *timerf, char *comm, u32 flags); 205 201 206 202 extern void __timer_stats_timer_set_start_info(struct timer_list *timer, 207 203 void *addr); ··· 237 253 extern void run_local_timers(void); 238 254 struct hrtimer; 239 255 extern enum hrtimer_restart it_real_fn(struct hrtimer *); 256 + 257 + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 258 + #include <linux/sysctl.h> 259 + 260 + extern unsigned int sysctl_timer_migration; 261 + int timer_migration_handler(struct ctl_table *table, int write, 262 + void __user *buffer, size_t *lenp, 263 + loff_t *ppos); 264 + #endif 240 265 241 266 unsigned long __round_jiffies(unsigned long j, int cpu); 242 267 unsigned long __round_jiffies_relative(unsigned long j, int cpu);

+4 -4

include/linux/timerqueue.h

··· 16 16 }; 17 17 18 18 19 - extern void timerqueue_add(struct timerqueue_head *head, 20 - struct timerqueue_node *node); 21 - extern void timerqueue_del(struct timerqueue_head *head, 22 - struct timerqueue_node *node); 19 + extern bool timerqueue_add(struct timerqueue_head *head, 20 + struct timerqueue_node *node); 21 + extern bool timerqueue_del(struct timerqueue_head *head, 22 + struct timerqueue_node *node); 23 23 extern struct timerqueue_node *timerqueue_iterate_next( 24 24 struct timerqueue_node *node); 25 25

+8 -4

include/trace/events/timer.h

··· 43 43 */ 44 44 TRACE_EVENT(timer_start, 45 45 46 - TP_PROTO(struct timer_list *timer, unsigned long expires), 46 + TP_PROTO(struct timer_list *timer, 47 + unsigned long expires, 48 + unsigned int flags), 47 49 48 - TP_ARGS(timer, expires), 50 + TP_ARGS(timer, expires, flags), 49 51 50 52 TP_STRUCT__entry( 51 53 __field( void *, timer ) 52 54 __field( void *, function ) 53 55 __field( unsigned long, expires ) 54 56 __field( unsigned long, now ) 57 + __field( unsigned int, flags ) 55 58 ), 56 59 57 60 TP_fast_assign( ··· 62 59 __entry->function = timer->function; 63 60 __entry->expires = expires; 64 61 __entry->now = jiffies; 62 + __entry->flags = flags; 65 63 ), 66 64 67 - TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", 65 + TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] flags=0x%08x", 68 66 __entry->timer, __entry->function, __entry->expires, 69 - (long)__entry->expires - __entry->now) 67 + (long)__entry->expires - __entry->now, __entry->flags) 70 68 ); 71 69 72 70 /**

+49 -68

kernel/events/core.c

··· 51 51 52 52 static struct workqueue_struct *perf_wq; 53 53 54 + typedef int (*remote_function_f)(void *); 55 + 54 56 struct remote_function_call { 55 57 struct task_struct *p; 56 - int (*func)(void *info); 58 + remote_function_f func; 57 59 void *info; 58 60 int ret; 59 61 }; ··· 88 86 * -EAGAIN - when the process moved away 89 87 */ 90 88 static int 91 - task_function_call(struct task_struct *p, int (*func) (void *info), void *info) 89 + task_function_call(struct task_struct *p, remote_function_f func, void *info) 92 90 { 93 91 struct remote_function_call data = { 94 92 .p = p, ··· 112 110 * 113 111 * returns: @func return value or -ENXIO when the cpu is offline 114 112 */ 115 - static int cpu_function_call(int cpu, int (*func) (void *info), void *info) 113 + static int cpu_function_call(int cpu, remote_function_f func, void *info) 116 114 { 117 115 struct remote_function_call data = { 118 116 .p = NULL, ··· 749 747 /* 750 748 * function must be called with interrupts disbled 751 749 */ 752 - static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr) 750 + static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr) 753 751 { 754 752 struct perf_cpu_context *cpuctx; 755 - enum hrtimer_restart ret = HRTIMER_NORESTART; 756 753 int rotations = 0; 757 754 758 755 WARN_ON(!irqs_disabled()); 759 756 760 757 cpuctx = container_of(hr, struct perf_cpu_context, hrtimer); 761 - 762 758 rotations = perf_rotate_context(cpuctx); 763 759 764 - /* 765 - * arm timer if needed 766 - */ 767 - if (rotations) { 760 + raw_spin_lock(&cpuctx->hrtimer_lock); 761 + if (rotations) 768 762 hrtimer_forward_now(hr, cpuctx->hrtimer_interval); 769 - ret = HRTIMER_RESTART; 770 - } 763 + else 764 + cpuctx->hrtimer_active = 0; 765 + raw_spin_unlock(&cpuctx->hrtimer_lock); 771 766 772 - return ret; 767 + return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART; 773 768 } 774 769 775 - /* CPU is going down */ 776 - void perf_cpu_hrtimer_cancel(int cpu) 770 + static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) 777 771 { 778 - struct perf_cpu_context *cpuctx; 779 - struct pmu *pmu; 780 - unsigned long flags; 781 - 782 - if (WARN_ON(cpu != smp_processor_id())) 783 - return; 784 - 785 - local_irq_save(flags); 786 - 787 - rcu_read_lock(); 788 - 789 - list_for_each_entry_rcu(pmu, &pmus, entry) { 790 - cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); 791 - 792 - if (pmu->task_ctx_nr == perf_sw_context) 793 - continue; 794 - 795 - hrtimer_cancel(&cpuctx->hrtimer); 796 - } 797 - 798 - rcu_read_unlock(); 799 - 800 - local_irq_restore(flags); 801 - } 802 - 803 - static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) 804 - { 805 - struct hrtimer *hr = &cpuctx->hrtimer; 772 + struct hrtimer *timer = &cpuctx->hrtimer; 806 773 struct pmu *pmu = cpuctx->ctx.pmu; 807 - int timer; 774 + u64 interval; 808 775 809 776 /* no multiplexing needed for SW PMU */ 810 777 if (pmu->task_ctx_nr == perf_sw_context) ··· 783 812 * check default is sane, if not set then force to 784 813 * default interval (1/tick) 785 814 */ 786 - timer = pmu->hrtimer_interval_ms; 787 - if (timer < 1) 788 - timer = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER; 815 + interval = pmu->hrtimer_interval_ms; 816 + if (interval < 1) 817 + interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER; 789 818 790 - cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); 819 + cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval); 791 820 792 - hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); 793 - hr->function = perf_cpu_hrtimer_handler; 821 + raw_spin_lock_init(&cpuctx->hrtimer_lock); 822 + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); 823 + timer->function = perf_mux_hrtimer_handler; 794 824 } 795 825 796 - static void perf_cpu_hrtimer_restart(struct perf_cpu_context *cpuctx) 826 + static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) 797 827 { 798 - struct hrtimer *hr = &cpuctx->hrtimer; 828 + struct hrtimer *timer = &cpuctx->hrtimer; 799 829 struct pmu *pmu = cpuctx->ctx.pmu; 830 + unsigned long flags; 800 831 801 832 /* not for SW PMU */ 802 833 if (pmu->task_ctx_nr == perf_sw_context) 803 - return; 834 + return 0; 804 835 805 - if (hrtimer_active(hr)) 806 - return; 836 + raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags); 837 + if (!cpuctx->hrtimer_active) { 838 + cpuctx->hrtimer_active = 1; 839 + hrtimer_forward_now(timer, cpuctx->hrtimer_interval); 840 + hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); 841 + } 842 + raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags); 807 843 808 - if (!hrtimer_callback_running(hr)) 809 - __hrtimer_start_range_ns(hr, cpuctx->hrtimer_interval, 810 - 0, HRTIMER_MODE_REL_PINNED, 0); 844 + return 0; 811 845 } 812 846 813 847 void perf_pmu_disable(struct pmu *pmu) ··· 1911 1935 1912 1936 if (event_sched_in(group_event, cpuctx, ctx)) { 1913 1937 pmu->cancel_txn(pmu); 1914 - perf_cpu_hrtimer_restart(cpuctx); 1938 + perf_mux_hrtimer_restart(cpuctx); 1915 1939 return -EAGAIN; 1916 1940 } 1917 1941 ··· 1958 1982 1959 1983 pmu->cancel_txn(pmu); 1960 1984 1961 - perf_cpu_hrtimer_restart(cpuctx); 1985 + perf_mux_hrtimer_restart(cpuctx); 1962 1986 1963 1987 return -EAGAIN; 1964 1988 } ··· 2231 2255 */ 2232 2256 if (leader != event) { 2233 2257 group_sched_out(leader, cpuctx, ctx); 2234 - perf_cpu_hrtimer_restart(cpuctx); 2258 + perf_mux_hrtimer_restart(cpuctx); 2235 2259 } 2236 2260 if (leader->attr.pinned) { 2237 2261 update_group_times(leader); ··· 6873 6897 } else { 6874 6898 period = max_t(u64, 10000, hwc->sample_period); 6875 6899 } 6876 - __hrtimer_start_range_ns(&hwc->hrtimer, 6877 - ns_to_ktime(period), 0, 6878 - HRTIMER_MODE_REL_PINNED, 0); 6900 + hrtimer_start(&hwc->hrtimer, ns_to_ktime(period), 6901 + HRTIMER_MODE_REL_PINNED); 6879 6902 } 6880 6903 6881 6904 static void perf_swevent_cancel_hrtimer(struct perf_event *event) ··· 7175 7200 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms); 7176 7201 } 7177 7202 7203 + static DEFINE_MUTEX(mux_interval_mutex); 7204 + 7178 7205 static ssize_t 7179 7206 perf_event_mux_interval_ms_store(struct device *dev, 7180 7207 struct device_attribute *attr, ··· 7196 7219 if (timer == pmu->hrtimer_interval_ms) 7197 7220 return count; 7198 7221 7222 + mutex_lock(&mux_interval_mutex); 7199 7223 pmu->hrtimer_interval_ms = timer; 7200 7224 7201 7225 /* update all cpuctx for this PMU */ 7202 - for_each_possible_cpu(cpu) { 7226 + get_online_cpus(); 7227 + for_each_online_cpu(cpu) { 7203 7228 struct perf_cpu_context *cpuctx; 7204 7229 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 7205 7230 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); 7206 7231 7207 - if (hrtimer_active(&cpuctx->hrtimer)) 7208 - hrtimer_forward_now(&cpuctx->hrtimer, cpuctx->hrtimer_interval); 7232 + cpu_function_call(cpu, 7233 + (remote_function_f)perf_mux_hrtimer_restart, cpuctx); 7209 7234 } 7235 + put_online_cpus(); 7236 + mutex_unlock(&mux_interval_mutex); 7210 7237 7211 7238 return count; 7212 7239 } ··· 7315 7334 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); 7316 7335 cpuctx->ctx.pmu = pmu; 7317 7336 7318 - __perf_cpu_hrtimer_init(cpuctx, cpu); 7337 + __perf_mux_hrtimer_init(cpuctx, cpu); 7319 7338 7320 7339 cpuctx->unique_pmu = pmu; 7321 7340 }

+1 -4

kernel/futex.c

··· 2064 2064 queue_me(q, hb); 2065 2065 2066 2066 /* Arm the timer */ 2067 - if (timeout) { 2067 + if (timeout) 2068 2068 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); 2069 - if (!hrtimer_active(&timeout->timer)) 2070 - timeout->task = NULL; 2071 - } 2072 2069 2073 2070 /* 2074 2071 * If we have been removed from the hash list, then another task

+1 -4

kernel/locking/rtmutex.c

··· 1182 1182 set_current_state(state); 1183 1183 1184 1184 /* Setup the timer, when timeout != NULL */ 1185 - if (unlikely(timeout)) { 1185 + if (unlikely(timeout)) 1186 1186 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); 1187 - if (!hrtimer_active(&timeout->timer)) 1188 - timeout->task = NULL; 1189 - } 1190 1187 1191 1188 ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); 1192 1189

+9 -9

kernel/rcu/tree_plugin.h

··· 1375 1375 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs 1376 1376 * any flavor of RCU. 1377 1377 */ 1378 - int rcu_needs_cpu(unsigned long *delta_jiffies) 1378 + int rcu_needs_cpu(u64 basemono, u64 *nextevt) 1379 1379 { 1380 - *delta_jiffies = ULONG_MAX; 1380 + *nextevt = KTIME_MAX; 1381 1381 return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) 1382 1382 ? 0 : rcu_cpu_has_callbacks(NULL); 1383 1383 } ··· 1439 1439 static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; 1440 1440 module_param(rcu_idle_lazy_gp_delay, int, 0644); 1441 1441 1442 - extern int tick_nohz_active; 1443 - 1444 1442 /* 1445 1443 * Try to advance callbacks for all flavors of RCU on the current CPU, but 1446 1444 * only if it has been awhile since the last time we did so. Afterwards, ··· 1485 1487 * 1486 1488 * The caller must have disabled interrupts. 1487 1489 */ 1488 - int rcu_needs_cpu(unsigned long *dj) 1490 + int rcu_needs_cpu(u64 basemono, u64 *nextevt) 1489 1491 { 1490 1492 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 1493 + unsigned long dj; 1491 1494 1492 1495 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) { 1493 - *dj = ULONG_MAX; 1496 + *nextevt = KTIME_MAX; 1494 1497 return 0; 1495 1498 } 1496 1499 ··· 1500 1501 1501 1502 /* If no callbacks, RCU doesn't need the CPU. */ 1502 1503 if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) { 1503 - *dj = ULONG_MAX; 1504 + *nextevt = KTIME_MAX; 1504 1505 return 0; 1505 1506 } 1506 1507 ··· 1514 1515 1515 1516 /* Request timer delay depending on laziness, and round. */ 1516 1517 if (!rdtp->all_lazy) { 1517 - *dj = round_up(rcu_idle_gp_delay + jiffies, 1518 + dj = round_up(rcu_idle_gp_delay + jiffies, 1518 1519 rcu_idle_gp_delay) - jiffies; 1519 1520 } else { 1520 - *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; 1521 + dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; 1521 1522 } 1523 + *nextevt = basemono + dj * TICK_NSEC; 1522 1524 return 0; 1523 1525 } 1524 1526

+9 -35

kernel/sched/core.c

··· 90 90 #define CREATE_TRACE_POINTS 91 91 #include <trace/events/sched.h> 92 92 93 - void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) 94 - { 95 - unsigned long delta; 96 - ktime_t soft, hard, now; 97 - 98 - for (;;) { 99 - if (hrtimer_active(period_timer)) 100 - break; 101 - 102 - now = hrtimer_cb_get_time(period_timer); 103 - hrtimer_forward(period_timer, now, period); 104 - 105 - soft = hrtimer_get_softexpires(period_timer); 106 - hard = hrtimer_get_expires(period_timer); 107 - delta = ktime_to_ns(ktime_sub(hard, soft)); 108 - __hrtimer_start_range_ns(period_timer, soft, delta, 109 - HRTIMER_MODE_ABS_PINNED, 0); 110 - } 111 - } 112 - 113 93 DEFINE_MUTEX(sched_domains_mutex); 114 94 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 115 95 ··· 335 355 336 356 #ifdef CONFIG_SMP 337 357 338 - static int __hrtick_restart(struct rq *rq) 358 + static void __hrtick_restart(struct rq *rq) 339 359 { 340 360 struct hrtimer *timer = &rq->hrtick_timer; 341 - ktime_t time = hrtimer_get_softexpires(timer); 342 361 343 - return __hrtimer_start_range_ns(timer, time, 0, HRTIMER_MODE_ABS_PINNED, 0); 362 + hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); 344 363 } 345 364 346 365 /* ··· 419 440 * doesn't make sense. Rely on vruntime for fairness. 420 441 */ 421 442 delay = max_t(u64, delay, 10000LL); 422 - __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, 423 - HRTIMER_MODE_REL_PINNED, 0); 443 + hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), 444 + HRTIMER_MODE_REL_PINNED); 424 445 } 425 446 426 447 static inline void init_hrtick(void) ··· 618 639 * selecting an idle cpu will add more delays to the timers than intended 619 640 * (as that cpu's timer base may not be uptodate wrt jiffies etc). 620 641 */ 621 - int get_nohz_timer_target(int pinned) 642 + int get_nohz_timer_target(void) 622 643 { 623 - int cpu = smp_processor_id(); 624 - int i; 644 + int i, cpu = smp_processor_id(); 625 645 struct sched_domain *sd; 626 646 627 - if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu)) 647 + if (!idle_cpu(cpu)) 628 648 return cpu; 629 649 630 650 rcu_read_lock(); ··· 7104 7126 } 7105 7127 #endif /* CONFIG_SMP */ 7106 7128 7107 - const_debug unsigned int sysctl_timer_migration = 1; 7108 - 7109 7129 int in_sched_functions(unsigned long addr) 7110 7130 { 7111 7131 return in_lock_functions(addr) || ··· 8139 8163 8140 8164 __refill_cfs_bandwidth_runtime(cfs_b); 8141 8165 /* restart the period timer (if active) to handle new period expiry */ 8142 - if (runtime_enabled && cfs_b->timer_active) { 8143 - /* force a reprogram */ 8144 - __start_cfs_bandwidth(cfs_b, true); 8145 - } 8166 + if (runtime_enabled) 8167 + start_cfs_bandwidth(cfs_b); 8146 8168 raw_spin_unlock_irq(&cfs_b->lock); 8147 8169 8148 8170 for_each_online_cpu(i) {

+2 -10

kernel/sched/deadline.c

··· 503 503 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 504 504 struct rq *rq = rq_of_dl_rq(dl_rq); 505 505 ktime_t now, act; 506 - ktime_t soft, hard; 507 - unsigned long range; 508 506 s64 delta; 509 507 510 508 if (boosted) ··· 525 527 if (ktime_us_delta(act, now) < 0) 526 528 return 0; 527 529 528 - hrtimer_set_expires(&dl_se->dl_timer, act); 530 + hrtimer_start(&dl_se->dl_timer, act, HRTIMER_MODE_ABS); 529 531 530 - soft = hrtimer_get_softexpires(&dl_se->dl_timer); 531 - hard = hrtimer_get_expires(&dl_se->dl_timer); 532 - range = ktime_to_ns(ktime_sub(hard, soft)); 533 - __hrtimer_start_range_ns(&dl_se->dl_timer, soft, 534 - range, HRTIMER_MODE_ABS, 0); 535 - 536 - return hrtimer_active(&dl_se->dl_timer); 532 + return 1; 537 533 } 538 534 539 535 /*

-2

kernel/sched/debug.c

··· 232 232 #endif 233 233 #endif 234 234 #ifdef CONFIG_CFS_BANDWIDTH 235 - SEQ_printf(m, " .%-30s: %d\n", "tg->cfs_bandwidth.timer_active", 236 - cfs_rq->tg->cfs_bandwidth.timer_active); 237 235 SEQ_printf(m, " .%-30s: %d\n", "throttled", 238 236 cfs_rq->throttled); 239 237 SEQ_printf(m, " .%-30s: %d\n", "throttle_count",

+28 -48

kernel/sched/fair.c

··· 3504 3504 if (cfs_b->quota == RUNTIME_INF) 3505 3505 amount = min_amount; 3506 3506 else { 3507 - /* 3508 - * If the bandwidth pool has become inactive, then at least one 3509 - * period must have elapsed since the last consumption. 3510 - * Refresh the global state and ensure bandwidth timer becomes 3511 - * active. 3512 - */ 3513 - if (!cfs_b->timer_active) { 3514 - __refill_cfs_bandwidth_runtime(cfs_b); 3515 - __start_cfs_bandwidth(cfs_b, false); 3516 - } 3507 + start_cfs_bandwidth(cfs_b); 3517 3508 3518 3509 if (cfs_b->runtime > 0) { 3519 3510 amount = min(cfs_b->runtime, min_amount); ··· 3653 3662 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); 3654 3663 struct sched_entity *se; 3655 3664 long task_delta, dequeue = 1; 3665 + bool empty; 3656 3666 3657 3667 se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; 3658 3668 ··· 3683 3691 cfs_rq->throttled = 1; 3684 3692 cfs_rq->throttled_clock = rq_clock(rq); 3685 3693 raw_spin_lock(&cfs_b->lock); 3694 + empty = list_empty(&cfs_rq->throttled_list); 3695 + 3686 3696 /* 3687 3697 * Add to the _head_ of the list, so that an already-started 3688 3698 * distribute_cfs_runtime will not see us 3689 3699 */ 3690 3700 list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); 3691 - if (!cfs_b->timer_active) 3692 - __start_cfs_bandwidth(cfs_b, false); 3701 + 3702 + /* 3703 + * If we're the first throttled task, make sure the bandwidth 3704 + * timer is running. 3705 + */ 3706 + if (empty) 3707 + start_cfs_bandwidth(cfs_b); 3708 + 3693 3709 raw_spin_unlock(&cfs_b->lock); 3694 3710 } 3695 3711 ··· 3812 3812 if (cfs_b->idle && !throttled) 3813 3813 goto out_deactivate; 3814 3814 3815 - /* 3816 - * if we have relooped after returning idle once, we need to update our 3817 - * status as actually running, so that other cpus doing 3818 - * __start_cfs_bandwidth will stop trying to cancel us. 3819 - */ 3820 - cfs_b->timer_active = 1; 3821 - 3822 3815 __refill_cfs_bandwidth_runtime(cfs_b); 3823 3816 3824 3817 if (!throttled) { ··· 3856 3863 return 0; 3857 3864 3858 3865 out_deactivate: 3859 - cfs_b->timer_active = 0; 3860 3866 return 1; 3861 3867 } 3862 3868 ··· 3870 3878 * Are we near the end of the current quota period? 3871 3879 * 3872 3880 * Requires cfs_b->lock for hrtimer_expires_remaining to be safe against the 3873 - * hrtimer base being cleared by __hrtimer_start_range_ns. In the case of 3881 + * hrtimer base being cleared by hrtimer_start. In the case of 3874 3882 * migrate_hrtimers, base is never cleared, so we are fine. 3875 3883 */ 3876 3884 static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire) ··· 3898 3906 if (runtime_refresh_within(cfs_b, min_left)) 3899 3907 return; 3900 3908 3901 - start_bandwidth_timer(&cfs_b->slack_timer, 3902 - ns_to_ktime(cfs_bandwidth_slack_period)); 3909 + hrtimer_start(&cfs_b->slack_timer, 3910 + ns_to_ktime(cfs_bandwidth_slack_period), 3911 + HRTIMER_MODE_REL); 3903 3912 } 3904 3913 3905 3914 /* we know any runtime found here is valid as update_curr() precedes return */ ··· 4020 4027 { 4021 4028 struct cfs_bandwidth *cfs_b = 4022 4029 container_of(timer, struct cfs_bandwidth, slack_timer); 4030 + 4023 4031 do_sched_cfs_slack_timer(cfs_b); 4024 4032 4025 4033 return HRTIMER_NORESTART; ··· 4030 4036 { 4031 4037 struct cfs_bandwidth *cfs_b = 4032 4038 container_of(timer, struct cfs_bandwidth, period_timer); 4033 - ktime_t now; 4034 4039 int overrun; 4035 4040 int idle = 0; 4036 4041 4037 4042 raw_spin_lock(&cfs_b->lock); 4038 4043 for (;;) { 4039 - now = hrtimer_cb_get_time(timer); 4040 - overrun = hrtimer_forward(timer, now, cfs_b->period); 4041 - 4044 + overrun = hrtimer_forward_now(timer, cfs_b->period); 4042 4045 if (!overrun) 4043 4046 break; 4044 4047 4045 4048 idle = do_sched_cfs_period_timer(cfs_b, overrun); 4046 4049 } 4050 + if (idle) 4051 + cfs_b->period_active = 0; 4047 4052 raw_spin_unlock(&cfs_b->lock); 4048 4053 4049 4054 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; ··· 4056 4063 cfs_b->period = ns_to_ktime(default_cfs_period()); 4057 4064 4058 4065 INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); 4059 - hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 4066 + hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); 4060 4067 cfs_b->period_timer.function = sched_cfs_period_timer; 4061 4068 hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 4062 4069 cfs_b->slack_timer.function = sched_cfs_slack_timer; ··· 4068 4075 INIT_LIST_HEAD(&cfs_rq->throttled_list); 4069 4076 } 4070 4077 4071 - /* requires cfs_b->lock, may release to reprogram timer */ 4072 - void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b, bool force) 4078 + void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) 4073 4079 { 4074 - /* 4075 - * The timer may be active because we're trying to set a new bandwidth 4076 - * period or because we're racing with the tear-down path 4077 - * (timer_active==0 becomes visible before the hrtimer call-back 4078 - * terminates). In either case we ensure that it's re-programmed 4079 - */ 4080 - while (unlikely(hrtimer_active(&cfs_b->period_timer)) && 4081 - hrtimer_try_to_cancel(&cfs_b->period_timer) < 0) { 4082 - /* bounce the lock to allow do_sched_cfs_period_timer to run */ 4083 - raw_spin_unlock(&cfs_b->lock); 4084 - cpu_relax(); 4085 - raw_spin_lock(&cfs_b->lock); 4086 - /* if someone else restarted the timer then we're done */ 4087 - if (!force && cfs_b->timer_active) 4088 - return; 4089 - } 4080 + lockdep_assert_held(&cfs_b->lock); 4090 4081 4091 - cfs_b->timer_active = 1; 4092 - start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period); 4082 + if (!cfs_b->period_active) { 4083 + cfs_b->period_active = 1; 4084 + hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); 4085 + hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED); 4086 + } 4093 4087 } 4094 4088 4095 4089 static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)

+13 -9

kernel/sched/rt.c

··· 18 18 { 19 19 struct rt_bandwidth *rt_b = 20 20 container_of(timer, struct rt_bandwidth, rt_period_timer); 21 - ktime_t now; 22 - int overrun; 23 21 int idle = 0; 22 + int overrun; 24 23 24 + raw_spin_lock(&rt_b->rt_runtime_lock); 25 25 for (;;) { 26 - now = hrtimer_cb_get_time(timer); 27 - overrun = hrtimer_forward(timer, now, rt_b->rt_period); 28 - 26 + overrun = hrtimer_forward_now(timer, rt_b->rt_period); 29 27 if (!overrun) 30 28 break; 31 29 30 + raw_spin_unlock(&rt_b->rt_runtime_lock); 32 31 idle = do_sched_rt_period_timer(rt_b, overrun); 32 + raw_spin_lock(&rt_b->rt_runtime_lock); 33 33 } 34 + if (idle) 35 + rt_b->rt_period_active = 0; 36 + raw_spin_unlock(&rt_b->rt_runtime_lock); 34 37 35 38 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; 36 39 } ··· 55 52 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 56 53 return; 57 54 58 - if (hrtimer_active(&rt_b->rt_period_timer)) 59 - return; 60 - 61 55 raw_spin_lock(&rt_b->rt_runtime_lock); 62 - start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); 56 + if (!rt_b->rt_period_active) { 57 + rt_b->rt_period_active = 1; 58 + hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period); 59 + hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED); 60 + } 63 61 raw_spin_unlock(&rt_b->rt_runtime_lock); 64 62 } 65 63

+3 -4

kernel/sched/sched.h

··· 137 137 ktime_t rt_period; 138 138 u64 rt_runtime; 139 139 struct hrtimer rt_period_timer; 140 + unsigned int rt_period_active; 140 141 }; 141 142 142 143 void __dl_clear_params(struct task_struct *p); ··· 222 221 s64 hierarchical_quota; 223 222 u64 runtime_expires; 224 223 225 - int idle, timer_active; 224 + int idle, period_active; 226 225 struct hrtimer period_timer, slack_timer; 227 226 struct list_head throttled_cfs_rq; 228 227 ··· 313 312 extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); 314 313 315 314 extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b); 316 - extern void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b, bool force); 315 + extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b); 317 316 extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq); 318 317 319 318 extern void free_rt_sched_group(struct task_group *tg); ··· 1409 1408 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } 1410 1409 static inline void sched_avg_update(struct rq *rq) { } 1411 1410 #endif 1412 - 1413 - extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period); 1414 1411 1415 1412 /* 1416 1413 * __task_rq_lock - lock the rq @p resides on.

+9 -9

kernel/sysctl.c

··· 349 349 .mode = 0644, 350 350 .proc_handler = proc_dointvec, 351 351 }, 352 - { 353 - .procname = "timer_migration", 354 - .data = &sysctl_timer_migration, 355 - .maxlen = sizeof(unsigned int), 356 - .mode = 0644, 357 - .proc_handler = proc_dointvec_minmax, 358 - .extra1 = &zero, 359 - .extra2 = &one, 360 - }, 361 352 #endif /* CONFIG_SMP */ 362 353 #ifdef CONFIG_NUMA_BALANCING 363 354 { ··· 1123 1132 .extra1 = &zero, 1124 1133 .extra2 = &one, 1125 1134 }, 1135 + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 1136 + { 1137 + .procname = "timer_migration", 1138 + .data = &sysctl_timer_migration, 1139 + .maxlen = sizeof(unsigned int), 1140 + .mode = 0644, 1141 + .proc_handler = timer_migration_handler, 1142 + }, 1143 + #endif 1126 1144 { } 1127 1145 }; 1128 1146

+1 -16

kernel/time/Makefile

··· 13 13 obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o 14 14 obj-$(CONFIG_TEST_UDELAY) += test_udelay.o 15 15 16 - $(obj)/time.o: $(obj)/timeconst.h 17 - 18 - quiet_cmd_hzfile = HZFILE $@ 19 - cmd_hzfile = echo "hz=$(CONFIG_HZ)" > $@ 20 - 21 - targets += hz.bc 22 - $(obj)/hz.bc: $(objtree)/include/config/hz.h FORCE 23 - $(call if_changed,hzfile) 24 - 25 - quiet_cmd_bc = BC $@ 26 - cmd_bc = bc -q $(filter-out FORCE,$^) > $@ 27 - 28 - targets += timeconst.h 29 - $(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE 30 - $(call if_changed,bc) 31 - 16 + $(obj)/time.o: $(objtree)/include/config/

+7 -10

kernel/time/alarmtimer.c

··· 317 317 * @alarm: ptr to alarm to set 318 318 * @start: time to run the alarm 319 319 */ 320 - int alarm_start(struct alarm *alarm, ktime_t start) 320 + void alarm_start(struct alarm *alarm, ktime_t start) 321 321 { 322 322 struct alarm_base *base = &alarm_bases[alarm->type]; 323 323 unsigned long flags; 324 - int ret; 325 324 326 325 spin_lock_irqsave(&base->lock, flags); 327 326 alarm->node.expires = start; 328 327 alarmtimer_enqueue(base, alarm); 329 - ret = hrtimer_start(&alarm->timer, alarm->node.expires, 330 - HRTIMER_MODE_ABS); 328 + hrtimer_start(&alarm->timer, alarm->node.expires, HRTIMER_MODE_ABS); 331 329 spin_unlock_irqrestore(&base->lock, flags); 332 - return ret; 333 330 } 334 331 EXPORT_SYMBOL_GPL(alarm_start); 335 332 ··· 335 338 * @alarm: ptr to alarm to set 336 339 * @start: time relative to now to run the alarm 337 340 */ 338 - int alarm_start_relative(struct alarm *alarm, ktime_t start) 341 + void alarm_start_relative(struct alarm *alarm, ktime_t start) 339 342 { 340 343 struct alarm_base *base = &alarm_bases[alarm->type]; 341 344 342 345 start = ktime_add(start, base->gettime()); 343 - return alarm_start(alarm, start); 346 + alarm_start(alarm, start); 344 347 } 345 348 EXPORT_SYMBOL_GPL(alarm_start_relative); 346 349 ··· 492 495 */ 493 496 static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp) 494 497 { 495 - clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid; 496 - 497 498 if (!alarmtimer_get_rtcdev()) 498 499 return -EINVAL; 499 500 500 - return hrtimer_get_res(baseid, tp); 501 + tp->tv_sec = 0; 502 + tp->tv_nsec = hrtimer_resolution; 503 + return 0; 501 504 } 502 505 503 506 /**

+42 -25

kernel/time/clockevents.c

··· 94 94 } 95 95 EXPORT_SYMBOL_GPL(clockevent_delta2ns); 96 96 97 - static int __clockevents_set_state(struct clock_event_device *dev, 98 - enum clock_event_state state) 97 + static int __clockevents_switch_state(struct clock_event_device *dev, 98 + enum clock_event_state state) 99 99 { 100 100 /* Transition with legacy set_mode() callback */ 101 101 if (dev->set_mode) { ··· 134 134 return -ENOSYS; 135 135 return dev->set_state_oneshot(dev); 136 136 137 + case CLOCK_EVT_STATE_ONESHOT_STOPPED: 138 + /* Core internal bug */ 139 + if (WARN_ONCE(!clockevent_state_oneshot(dev), 140 + "Current state: %d\n", 141 + clockevent_get_state(dev))) 142 + return -EINVAL; 143 + 144 + if (dev->set_state_oneshot_stopped) 145 + return dev->set_state_oneshot_stopped(dev); 146 + else 147 + return -ENOSYS; 148 + 137 149 default: 138 150 return -ENOSYS; 139 151 } 140 152 } 141 153 142 154 /** 143 - * clockevents_set_state - set the operating state of a clock event device 155 + * clockevents_switch_state - set the operating state of a clock event device 144 156 * @dev: device to modify 145 157 * @state: new state 146 158 * 147 159 * Must be called with interrupts disabled ! 148 160 */ 149 - void clockevents_set_state(struct clock_event_device *dev, 150 - enum clock_event_state state) 161 + void clockevents_switch_state(struct clock_event_device *dev, 162 + enum clock_event_state state) 151 163 { 152 - if (dev->state != state) { 153 - if (__clockevents_set_state(dev, state)) 164 + if (clockevent_get_state(dev) != state) { 165 + if (__clockevents_switch_state(dev, state)) 154 166 return; 155 167 156 - dev->state = state; 168 + clockevent_set_state(dev, state); 157 169 158 170 /* 159 171 * A nsec2cyc multiplicator of 0 is invalid and we'd crash 160 172 * on it, so fix it up and emit a warning: 161 173 */ 162 - if (state == CLOCK_EVT_STATE_ONESHOT) { 174 + if (clockevent_state_oneshot(dev)) { 163 175 if (unlikely(!dev->mult)) { 164 176 dev->mult = 1; 165 177 WARN_ON(1); ··· 186 174 */ 187 175 void clockevents_shutdown(struct clock_event_device *dev) 188 176 { 189 - clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 177 + clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 190 178 dev->next_event.tv64 = KTIME_MAX; 191 179 } 192 180 ··· 260 248 delta = dev->min_delta_ns; 261 249 dev->next_event = ktime_add_ns(ktime_get(), delta); 262 250 263 - if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) 251 + if (clockevent_state_shutdown(dev)) 264 252 return 0; 265 253 266 254 dev->retries++; ··· 297 285 delta = dev->min_delta_ns; 298 286 dev->next_event = ktime_add_ns(ktime_get(), delta); 299 287 300 - if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) 288 + if (clockevent_state_shutdown(dev)) 301 289 return 0; 302 290 303 291 dev->retries++; ··· 329 317 330 318 dev->next_event = expires; 331 319 332 - if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) 320 + if (clockevent_state_shutdown(dev)) 333 321 return 0; 322 + 323 + /* We must be in ONESHOT state here */ 324 + WARN_ONCE(!clockevent_state_oneshot(dev), "Current state: %d\n", 325 + clockevent_get_state(dev)); 334 326 335 327 /* Shortcut for clockevent devices that can deal with ktime. */ 336 328 if (dev->features & CLOCK_EVT_FEAT_KTIME) ··· 378 362 struct clock_event_device *dev, *newdev = NULL; 379 363 380 364 list_for_each_entry(dev, &clockevent_devices, list) { 381 - if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED) 365 + if (dev == ced || !clockevent_state_detached(dev)) 382 366 continue; 383 367 384 368 if (!tick_check_replacement(newdev, dev)) ··· 404 388 static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu) 405 389 { 406 390 /* Fast track. Device is unused */ 407 - if (ced->state == CLOCK_EVT_STATE_DETACHED) { 391 + if (clockevent_state_detached(ced)) { 408 392 list_del_init(&ced->list); 409 393 return 0; 410 394 } ··· 461 445 if (dev->set_mode) { 462 446 /* We shouldn't be supporting new modes now */ 463 447 WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || 464 - dev->set_state_shutdown || dev->tick_resume); 448 + dev->set_state_shutdown || dev->tick_resume || 449 + dev->set_state_oneshot_stopped); 465 450 466 451 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); 467 452 return 0; ··· 497 480 BUG_ON(clockevents_sanity_check(dev)); 498 481 499 482 /* Initialize state to DETACHED */ 500 - dev->state = CLOCK_EVT_STATE_DETACHED; 483 + clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); 501 484 502 485 if (!dev->cpumask) { 503 486 WARN_ON(num_possible_cpus() > 1); ··· 562 545 { 563 546 clockevents_config(dev, freq); 564 547 565 - if (dev->state == CLOCK_EVT_STATE_ONESHOT) 548 + if (clockevent_state_oneshot(dev)) 566 549 return clockevents_program_event(dev, dev->next_event, false); 567 550 568 - if (dev->state == CLOCK_EVT_STATE_PERIODIC) 569 - return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC); 551 + if (clockevent_state_periodic(dev)) 552 + return __clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC); 570 553 571 554 return 0; 572 555 } ··· 620 603 */ 621 604 if (old) { 622 605 module_put(old->owner); 623 - clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED); 606 + clockevents_switch_state(old, CLOCK_EVT_STATE_DETACHED); 624 607 list_del(&old->list); 625 608 list_add(&old->list, &clockevents_released); 626 609 } 627 610 628 611 if (new) { 629 - BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED); 612 + BUG_ON(!clockevent_state_detached(new)); 630 613 clockevents_shutdown(new); 631 614 } 632 615 } ··· 639 622 struct clock_event_device *dev; 640 623 641 624 list_for_each_entry_reverse(dev, &clockevent_devices, list) 642 - if (dev->suspend) 625 + if (dev->suspend && !clockevent_state_detached(dev)) 643 626 dev->suspend(dev); 644 627 } 645 628 ··· 651 634 struct clock_event_device *dev; 652 635 653 636 list_for_each_entry(dev, &clockevent_devices, list) 654 - if (dev->resume) 637 + if (dev->resume && !clockevent_state_detached(dev)) 655 638 dev->resume(dev); 656 639 } 657 640 ··· 682 665 if (cpumask_test_cpu(cpu, dev->cpumask) && 683 666 cpumask_weight(dev->cpumask) == 1 && 684 667 !tick_is_broadcast_device(dev)) { 685 - BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED); 668 + BUG_ON(!clockevent_state_detached(dev)); 686 669 list_del(&dev->list); 687 670 } 688 671 }

+12 -12

kernel/time/clocksource.c

··· 23 23 * o Allow clocksource drivers to be unregistered 24 24 */ 25 25 26 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 27 + 26 28 #include <linux/device.h> 27 29 #include <linux/clocksource.h> 28 30 #include <linux/init.h> ··· 218 216 219 217 /* Check the deviation from the watchdog clocksource. */ 220 218 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { 221 - pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name); 222 - pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", 219 + pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n", 220 + cs->name); 221 + pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", 223 222 watchdog->name, wdnow, wdlast, watchdog->mask); 224 - pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", 223 + pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", 225 224 cs->name, csnow, cslast, cs->mask); 226 225 __clocksource_unstable(cs); 227 226 continue; ··· 570 567 */ 571 568 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) { 572 569 /* Override clocksource cannot be used. */ 573 - printk(KERN_WARNING "Override clocksource %s is not " 574 - "HRT compatible. Cannot switch while in " 575 - "HRT/NOHZ mode\n", cs->name); 570 + pr_warn("Override clocksource %s is not HRT compatible - cannot switch while in HRT/NOHZ mode\n", 571 + cs->name); 576 572 override_name[0] = 0; 577 573 } else 578 574 /* Override clocksource can be used. */ ··· 710 708 711 709 clocksource_update_max_deferment(cs); 712 710 713 - pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", 714 - cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); 711 + pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", 712 + cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); 715 713 } 716 714 EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); 717 715 ··· 1010 1008 static int __init boot_override_clock(char* str) 1011 1009 { 1012 1010 if (!strcmp(str, "pmtmr")) { 1013 - printk("Warning: clock=pmtmr is deprecated. " 1014 - "Use clocksource=acpi_pm.\n"); 1011 + pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n"); 1015 1012 return boot_override_clocksource("acpi_pm"); 1016 1013 } 1017 - printk("Warning! clock= boot option is deprecated. " 1018 - "Use clocksource=xyz\n"); 1014 + pr_warn("clock= boot option is deprecated - use clocksource=xyz\n"); 1019 1015 return boot_override_clocksource(str); 1020 1016 } 1021 1017

+327 -376

kernel/time/hrtimer.c

··· 66 66 */ 67 67 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 68 68 { 69 - 70 69 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), 70 + .seq = SEQCNT_ZERO(hrtimer_bases.seq), 71 71 .clock_base = 72 72 { 73 73 { 74 74 .index = HRTIMER_BASE_MONOTONIC, 75 75 .clockid = CLOCK_MONOTONIC, 76 76 .get_time = &ktime_get, 77 - .resolution = KTIME_LOW_RES, 78 77 }, 79 78 { 80 79 .index = HRTIMER_BASE_REALTIME, 81 80 .clockid = CLOCK_REALTIME, 82 81 .get_time = &ktime_get_real, 83 - .resolution = KTIME_LOW_RES, 84 82 }, 85 83 { 86 84 .index = HRTIMER_BASE_BOOTTIME, 87 85 .clockid = CLOCK_BOOTTIME, 88 86 .get_time = &ktime_get_boottime, 89 - .resolution = KTIME_LOW_RES, 90 87 }, 91 88 { 92 89 .index = HRTIMER_BASE_TAI, 93 90 .clockid = CLOCK_TAI, 94 91 .get_time = &ktime_get_clocktai, 95 - .resolution = KTIME_LOW_RES, 96 92 }, 97 93 } 98 94 }; ··· 105 109 return hrtimer_clock_to_base_table[clock_id]; 106 110 } 107 111 108 - 109 - /* 110 - * Get the coarse grained time at the softirq based on xtime and 111 - * wall_to_monotonic. 112 - */ 113 - static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) 114 - { 115 - ktime_t xtim, mono, boot, tai; 116 - ktime_t off_real, off_boot, off_tai; 117 - 118 - mono = ktime_get_update_offsets_tick(&off_real, &off_boot, &off_tai); 119 - boot = ktime_add(mono, off_boot); 120 - xtim = ktime_add(mono, off_real); 121 - tai = ktime_add(mono, off_tai); 122 - 123 - base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; 124 - base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; 125 - base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot; 126 - base->clock_base[HRTIMER_BASE_TAI].softirq_time = tai; 127 - } 128 - 129 112 /* 130 113 * Functions and macros which are different for UP/SMP systems are kept in a 131 114 * single place 132 115 */ 133 116 #ifdef CONFIG_SMP 117 + 118 + /* 119 + * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base() 120 + * such that hrtimer_callback_running() can unconditionally dereference 121 + * timer->base->cpu_base 122 + */ 123 + static struct hrtimer_cpu_base migration_cpu_base = { 124 + .seq = SEQCNT_ZERO(migration_cpu_base), 125 + .clock_base = { { .cpu_base = &migration_cpu_base, }, }, 126 + }; 127 + 128 + #define migration_base migration_cpu_base.clock_base[0] 134 129 135 130 /* 136 131 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock ··· 132 145 * be found on the lists/queues. 133 146 * 134 147 * When the timer's base is locked, and the timer removed from list, it is 135 - * possible to set timer->base = NULL and drop the lock: the timer remains 136 - * locked. 148 + * possible to set timer->base = &migration_base and drop the lock: the timer 149 + * remains locked. 137 150 */ 138 151 static 139 152 struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, ··· 143 156 144 157 for (;;) { 145 158 base = timer->base; 146 - if (likely(base != NULL)) { 159 + if (likely(base != &migration_base)) { 147 160 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 148 161 if (likely(base == timer->base)) 149 162 return base; ··· 177 190 #endif 178 191 } 179 192 193 + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 194 + static inline 195 + struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, 196 + int pinned) 197 + { 198 + if (pinned || !base->migration_enabled) 199 + return this_cpu_ptr(&hrtimer_bases); 200 + return &per_cpu(hrtimer_bases, get_nohz_timer_target()); 201 + } 202 + #else 203 + static inline 204 + struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, 205 + int pinned) 206 + { 207 + return this_cpu_ptr(&hrtimer_bases); 208 + } 209 + #endif 210 + 180 211 /* 181 212 * Switch the timer base to the current CPU when possible. 182 213 */ ··· 202 197 switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, 203 198 int pinned) 204 199 { 200 + struct hrtimer_cpu_base *new_cpu_base, *this_base; 205 201 struct hrtimer_clock_base *new_base; 206 - struct hrtimer_cpu_base *new_cpu_base; 207 - int this_cpu = smp_processor_id(); 208 - int cpu = get_nohz_timer_target(pinned); 209 202 int basenum = base->index; 210 203 204 + this_base = this_cpu_ptr(&hrtimer_bases); 205 + new_cpu_base = get_target_base(this_base, pinned); 211 206 again: 212 - new_cpu_base = &per_cpu(hrtimer_bases, cpu); 213 207 new_base = &new_cpu_base->clock_base[basenum]; 214 208 215 209 if (base != new_base) { ··· 224 220 if (unlikely(hrtimer_callback_running(timer))) 225 221 return base; 226 222 227 - /* See the comment in lock_timer_base() */ 228 - timer->base = NULL; 223 + /* See the comment in lock_hrtimer_base() */ 224 + timer->base = &migration_base; 229 225 raw_spin_unlock(&base->cpu_base->lock); 230 226 raw_spin_lock(&new_base->cpu_base->lock); 231 227 232 - if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 233 - cpu = this_cpu; 228 + if (new_cpu_base != this_base && 229 + hrtimer_check_target(timer, new_base)) { 234 230 raw_spin_unlock(&new_base->cpu_base->lock); 235 231 raw_spin_lock(&base->cpu_base->lock); 232 + new_cpu_base = this_base; 236 233 timer->base = base; 237 234 goto again; 238 235 } 239 236 timer->base = new_base; 240 237 } else { 241 - if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 242 - cpu = this_cpu; 238 + if (new_cpu_base != this_base && 239 + hrtimer_check_target(timer, new_base)) { 240 + new_cpu_base = this_base; 243 241 goto again; 244 242 } 245 243 } ··· 449 443 } 450 444 451 445 #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) 446 + static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, 447 + struct hrtimer *timer) 448 + { 449 + #ifdef CONFIG_HIGH_RES_TIMERS 450 + cpu_base->next_timer = timer; 451 + #endif 452 + } 453 + 452 454 static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) 453 455 { 454 456 struct hrtimer_clock_base *base = cpu_base->clock_base; 455 457 ktime_t expires, expires_next = { .tv64 = KTIME_MAX }; 456 - int i; 458 + unsigned int active = cpu_base->active_bases; 457 459 458 - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { 460 + hrtimer_update_next_timer(cpu_base, NULL); 461 + for (; active; base++, active >>= 1) { 459 462 struct timerqueue_node *next; 460 463 struct hrtimer *timer; 461 464 462 - next = timerqueue_getnext(&base->active); 463 - if (!next) 465 + if (!(active & 0x01)) 464 466 continue; 465 467 468 + next = timerqueue_getnext(&base->active); 466 469 timer = container_of(next, struct hrtimer, node); 467 470 expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 468 - if (expires.tv64 < expires_next.tv64) 471 + if (expires.tv64 < expires_next.tv64) { 469 472 expires_next = expires; 473 + hrtimer_update_next_timer(cpu_base, timer); 474 + } 470 475 } 471 476 /* 472 477 * clock_was_set() might have changed base->offset of any of ··· 490 473 } 491 474 #endif 492 475 476 + static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) 477 + { 478 + ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; 479 + ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; 480 + ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; 481 + 482 + return ktime_get_update_offsets_now(&base->clock_was_set_seq, 483 + offs_real, offs_boot, offs_tai); 484 + } 485 + 493 486 /* High resolution timer related functions */ 494 487 #ifdef CONFIG_HIGH_RES_TIMERS 495 488 ··· 507 480 * High resolution timer enabled ? 508 481 */ 509 482 static int hrtimer_hres_enabled __read_mostly = 1; 483 + unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; 484 + EXPORT_SYMBOL_GPL(hrtimer_resolution); 510 485 511 486 /* 512 487 * Enable / Disable high resolution mode ··· 537 508 /* 538 509 * Is the high resolution mode active ? 539 510 */ 511 + static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) 512 + { 513 + return cpu_base->hres_active; 514 + } 515 + 540 516 static inline int hrtimer_hres_active(void) 541 517 { 542 - return __this_cpu_read(hrtimer_bases.hres_active); 518 + return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); 543 519 } 544 520 545 521 /* ··· 555 521 static void 556 522 hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) 557 523 { 558 - ktime_t expires_next = __hrtimer_get_next_event(cpu_base); 524 + ktime_t expires_next; 525 + 526 + if (!cpu_base->hres_active) 527 + return; 528 + 529 + expires_next = __hrtimer_get_next_event(cpu_base); 559 530 560 531 if (skip_equal && expires_next.tv64 == cpu_base->expires_next.tv64) 561 532 return; ··· 584 545 if (cpu_base->hang_detected) 585 546 return; 586 547 587 - if (cpu_base->expires_next.tv64 != KTIME_MAX) 588 - tick_program_event(cpu_base->expires_next, 1); 548 + tick_program_event(cpu_base->expires_next, 1); 589 549 } 590 550 591 551 /* 592 - * Shared reprogramming for clock_realtime and clock_monotonic 593 - * 594 552 * When a timer is enqueued and expires earlier than the already enqueued 595 553 * timers, we have to check, whether it expires earlier than the timer for 596 554 * which the clock event device was armed. 597 555 * 598 - * Note, that in case the state has HRTIMER_STATE_CALLBACK set, no reprogramming 599 - * and no expiry check happens. The timer gets enqueued into the rbtree. The 600 - * reprogramming and expiry check is done in the hrtimer_interrupt or in the 601 - * softirq. 602 - * 603 556 * Called with interrupts disabled and base->cpu_base.lock held 604 557 */ 605 - static int hrtimer_reprogram(struct hrtimer *timer, 606 - struct hrtimer_clock_base *base) 558 + static void hrtimer_reprogram(struct hrtimer *timer, 559 + struct hrtimer_clock_base *base) 607 560 { 608 561 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 609 562 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 610 - int res; 611 563 612 564 WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); 613 565 614 566 /* 615 - * When the callback is running, we do not reprogram the clock event 616 - * device. The timer callback is either running on a different CPU or 617 - * the callback is executed in the hrtimer_interrupt context. The 618 - * reprogramming is handled either by the softirq, which called the 619 - * callback or at the end of the hrtimer_interrupt. 567 + * If the timer is not on the current cpu, we cannot reprogram 568 + * the other cpus clock event device. 620 569 */ 621 - if (hrtimer_callback_running(timer)) 622 - return 0; 570 + if (base->cpu_base != cpu_base) 571 + return; 572 + 573 + /* 574 + * If the hrtimer interrupt is running, then it will 575 + * reevaluate the clock bases and reprogram the clock event 576 + * device. The callbacks are always executed in hard interrupt 577 + * context so we don't need an extra check for a running 578 + * callback. 579 + */ 580 + if (cpu_base->in_hrtirq) 581 + return; 623 582 624 583 /* 625 584 * CLOCK_REALTIME timer might be requested with an absolute 626 - * expiry time which is less than base->offset. Nothing wrong 627 - * about that, just avoid to call into the tick code, which 628 - * has now objections against negative expiry values. 585 + * expiry time which is less than base->offset. Set it to 0. 629 586 */ 630 587 if (expires.tv64 < 0) 631 - return -ETIME; 588 + expires.tv64 = 0; 632 589 633 590 if (expires.tv64 >= cpu_base->expires_next.tv64) 634 - return 0; 591 + return; 635 592 636 - /* 637 - * When the target cpu of the timer is currently executing 638 - * hrtimer_interrupt(), then we do not touch the clock event 639 - * device. hrtimer_interrupt() will reevaluate all clock bases 640 - * before reprogramming the device. 641 - */ 642 - if (cpu_base->in_hrtirq) 643 - return 0; 593 + /* Update the pointer to the next expiring timer */ 594 + cpu_base->next_timer = timer; 644 595 645 596 /* 646 597 * If a hang was detected in the last timer interrupt then we ··· 639 610 * to make progress. 640 611 */ 641 612 if (cpu_base->hang_detected) 642 - return 0; 613 + return; 643 614 644 615 /* 645 - * Clockevents returns -ETIME, when the event was in the past. 616 + * Program the timer hardware. We enforce the expiry for 617 + * events which are already in the past. 646 618 */ 647 - res = tick_program_event(expires, 0); 648 - if (!IS_ERR_VALUE(res)) 649 - cpu_base->expires_next = expires; 650 - return res; 619 + cpu_base->expires_next = expires; 620 + tick_program_event(expires, 1); 651 621 } 652 622 653 623 /* ··· 658 630 base->hres_active = 0; 659 631 } 660 632 661 - static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) 662 - { 663 - ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; 664 - ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; 665 - ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; 666 - 667 - return ktime_get_update_offsets_now(offs_real, offs_boot, offs_tai); 668 - } 669 - 670 633 /* 671 634 * Retrigger next event is called after clock was set 672 635 * ··· 667 648 { 668 649 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 669 650 670 - if (!hrtimer_hres_active()) 651 + if (!base->hres_active) 671 652 return; 672 653 673 654 raw_spin_lock(&base->lock); ··· 681 662 */ 682 663 static int hrtimer_switch_to_hres(void) 683 664 { 684 - int i, cpu = smp_processor_id(); 685 - struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu); 686 - unsigned long flags; 687 - 688 - if (base->hres_active) 689 - return 1; 690 - 691 - local_irq_save(flags); 665 + struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 692 666 693 667 if (tick_init_highres()) { 694 - local_irq_restore(flags); 695 668 printk(KERN_WARNING "Could not switch to high resolution " 696 - "mode on CPU %d\n", cpu); 669 + "mode on CPU %d\n", base->cpu); 697 670 return 0; 698 671 } 699 672 base->hres_active = 1; 700 - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) 701 - base->clock_base[i].resolution = KTIME_HIGH_RES; 673 + hrtimer_resolution = HIGH_RES_NSEC; 702 674 703 675 tick_setup_sched_timer(); 704 676 /* "Retrigger" the interrupt to get things going */ 705 677 retrigger_next_event(NULL); 706 - local_irq_restore(flags); 707 678 return 1; 708 679 } 709 680 ··· 715 706 716 707 #else 717 708 709 + static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *b) { return 0; } 718 710 static inline int hrtimer_hres_active(void) { return 0; } 719 711 static inline int hrtimer_is_hres_enabled(void) { return 0; } 720 712 static inline int hrtimer_switch_to_hres(void) { return 0; } ··· 813 803 * 814 804 * Forward the timer expiry so it will expire in the future. 815 805 * Returns the number of overruns. 806 + * 807 + * Can be safely called from the callback function of @timer. If 808 + * called from other contexts @timer must neither be enqueued nor 809 + * running the callback and the caller needs to take care of 810 + * serialization. 811 + * 812 + * Note: This only updates the timer expiry value and does not requeue 813 + * the timer. 816 814 */ 817 815 u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) 818 816 { ··· 832 814 if (delta.tv64 < 0) 833 815 return 0; 834 816 835 - if (interval.tv64 < timer->base->resolution.tv64) 836 - interval.tv64 = timer->base->resolution.tv64; 817 + if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED)) 818 + return 0; 819 + 820 + if (interval.tv64 < hrtimer_resolution) 821 + interval.tv64 = hrtimer_resolution; 837 822 838 823 if (unlikely(delta.tv64 >= interval.tv64)) { 839 824 s64 incr = ktime_to_ns(interval); ··· 870 849 { 871 850 debug_activate(timer); 872 851 873 - timerqueue_add(&base->active, &timer->node); 874 852 base->cpu_base->active_bases |= 1 << base->index; 875 853 876 - /* 877 - * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the 878 - * state of a possibly running callback. 879 - */ 880 - timer->state |= HRTIMER_STATE_ENQUEUED; 854 + timer->state = HRTIMER_STATE_ENQUEUED; 881 855 882 - return (&timer->node == base->active.next); 856 + return timerqueue_add(&base->active, &timer->node); 883 857 } 884 858 885 859 /* ··· 891 875 struct hrtimer_clock_base *base, 892 876 unsigned long newstate, int reprogram) 893 877 { 894 - struct timerqueue_node *next_timer; 895 - if (!(timer->state & HRTIMER_STATE_ENQUEUED)) 896 - goto out; 878 + struct hrtimer_cpu_base *cpu_base = base->cpu_base; 879 + unsigned int state = timer->state; 897 880 898 - next_timer = timerqueue_getnext(&base->active); 899 - timerqueue_del(&base->active, &timer->node); 900 - if (&timer->node == next_timer) { 901 - #ifdef CONFIG_HIGH_RES_TIMERS 902 - /* Reprogram the clock event device. if enabled */ 903 - if (reprogram && hrtimer_hres_active()) { 904 - ktime_t expires; 905 - 906 - expires = ktime_sub(hrtimer_get_expires(timer), 907 - base->offset); 908 - if (base->cpu_base->expires_next.tv64 == expires.tv64) 909 - hrtimer_force_reprogram(base->cpu_base, 1); 910 - } 911 - #endif 912 - } 913 - if (!timerqueue_getnext(&base->active)) 914 - base->cpu_base->active_bases &= ~(1 << base->index); 915 - out: 916 881 timer->state = newstate; 882 + if (!(state & HRTIMER_STATE_ENQUEUED)) 883 + return; 884 + 885 + if (!timerqueue_del(&base->active, &timer->node)) 886 + cpu_base->active_bases &= ~(1 << base->index); 887 + 888 + #ifdef CONFIG_HIGH_RES_TIMERS 889 + /* 890 + * Note: If reprogram is false we do not update 891 + * cpu_base->next_timer. This happens when we remove the first 892 + * timer on a remote cpu. No harm as we never dereference 893 + * cpu_base->next_timer. So the worst thing what can happen is 894 + * an superflous call to hrtimer_force_reprogram() on the 895 + * remote cpu later on if the same timer gets enqueued again. 896 + */ 897 + if (reprogram && timer == cpu_base->next_timer) 898 + hrtimer_force_reprogram(cpu_base, 1); 899 + #endif 917 900 } 918 901 919 902 /* 920 903 * remove hrtimer, called with base lock held 921 904 */ 922 905 static inline int 923 - remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) 906 + remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) 924 907 { 925 908 if (hrtimer_is_queued(timer)) { 926 - unsigned long state; 909 + unsigned long state = timer->state; 927 910 int reprogram; 928 911 929 912 /* ··· 936 921 debug_deactivate(timer); 937 922 timer_stats_hrtimer_clear_start_info(timer); 938 923 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); 939 - /* 940 - * We must preserve the CALLBACK state flag here, 941 - * otherwise we could move the timer base in 942 - * switch_hrtimer_base. 943 - */ 944 - state = timer->state & HRTIMER_STATE_CALLBACK; 924 + 925 + if (!restart) 926 + state = HRTIMER_STATE_INACTIVE; 927 + 945 928 __remove_hrtimer(timer, base, state, reprogram); 946 929 return 1; 947 930 } 948 931 return 0; 949 932 } 950 933 951 - int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 952 - unsigned long delta_ns, const enum hrtimer_mode mode, 953 - int wakeup) 934 + /** 935 + * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU 936 + * @timer: the timer to be added 937 + * @tim: expiry time 938 + * @delta_ns: "slack" range for the timer 939 + * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or 940 + * relative (HRTIMER_MODE_REL) 941 + */ 942 + void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 943 + unsigned long delta_ns, const enum hrtimer_mode mode) 954 944 { 955 945 struct hrtimer_clock_base *base, *new_base; 956 946 unsigned long flags; 957 - int ret, leftmost; 947 + int leftmost; 958 948 959 949 base = lock_hrtimer_base(timer, &flags); 960 950 961 951 /* Remove an active timer from the queue: */ 962 - ret = remove_hrtimer(timer, base); 952 + remove_hrtimer(timer, base, true); 963 953 964 954 if (mode & HRTIMER_MODE_REL) { 965 955 tim = ktime_add_safe(tim, base->get_time()); ··· 976 956 * timeouts. This will go away with the GTOD framework. 977 957 */ 978 958 #ifdef CONFIG_TIME_LOW_RES 979 - tim = ktime_add_safe(tim, base->resolution); 959 + tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution)); 980 960 #endif 981 961 } 982 962 ··· 988 968 timer_stats_hrtimer_set_start_info(timer); 989 969 990 970 leftmost = enqueue_hrtimer(timer, new_base); 991 - 992 - if (!leftmost) { 993 - unlock_hrtimer_base(timer, &flags); 994 - return ret; 995 - } 971 + if (!leftmost) 972 + goto unlock; 996 973 997 974 if (!hrtimer_is_hres_active(timer)) { 998 975 /* 999 976 * Kick to reschedule the next tick to handle the new timer 1000 977 * on dynticks target. 1001 978 */ 1002 - wake_up_nohz_cpu(new_base->cpu_base->cpu); 1003 - } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases) && 1004 - hrtimer_reprogram(timer, new_base)) { 1005 - /* 1006 - * Only allow reprogramming if the new base is on this CPU. 1007 - * (it might still be on another CPU if the timer was pending) 1008 - * 1009 - * XXX send_remote_softirq() ? 1010 - */ 1011 - if (wakeup) { 1012 - /* 1013 - * We need to drop cpu_base->lock to avoid a 1014 - * lock ordering issue vs. rq->lock. 1015 - */ 1016 - raw_spin_unlock(&new_base->cpu_base->lock); 1017 - raise_softirq_irqoff(HRTIMER_SOFTIRQ); 1018 - local_irq_restore(flags); 1019 - return ret; 1020 - } else { 1021 - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); 1022 - } 979 + if (new_base->cpu_base->nohz_active) 980 + wake_up_nohz_cpu(new_base->cpu_base->cpu); 981 + } else { 982 + hrtimer_reprogram(timer, new_base); 1023 983 } 1024 - 984 + unlock: 1025 985 unlock_hrtimer_base(timer, &flags); 1026 - 1027 - return ret; 1028 - } 1029 - EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns); 1030 - 1031 - /** 1032 - * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU 1033 - * @timer: the timer to be added 1034 - * @tim: expiry time 1035 - * @delta_ns: "slack" range for the timer 1036 - * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or 1037 - * relative (HRTIMER_MODE_REL) 1038 - * 1039 - * Returns: 1040 - * 0 on success 1041 - * 1 when the timer was active 1042 - */ 1043 - int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 1044 - unsigned long delta_ns, const enum hrtimer_mode mode) 1045 - { 1046 - return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1); 1047 986 } 1048 987 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); 1049 - 1050 - /** 1051 - * hrtimer_start - (re)start an hrtimer on the current CPU 1052 - * @timer: the timer to be added 1053 - * @tim: expiry time 1054 - * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or 1055 - * relative (HRTIMER_MODE_REL) 1056 - * 1057 - * Returns: 1058 - * 0 on success 1059 - * 1 when the timer was active 1060 - */ 1061 - int 1062 - hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) 1063 - { 1064 - return __hrtimer_start_range_ns(timer, tim, 0, mode, 1); 1065 - } 1066 - EXPORT_SYMBOL_GPL(hrtimer_start); 1067 - 1068 988 1069 989 /** 1070 990 * hrtimer_try_to_cancel - try to deactivate a timer ··· 1022 1062 unsigned long flags; 1023 1063 int ret = -1; 1024 1064 1065 + /* 1066 + * Check lockless first. If the timer is not active (neither 1067 + * enqueued nor running the callback, nothing to do here. The 1068 + * base lock does not serialize against a concurrent enqueue, 1069 + * so we can avoid taking it. 1070 + */ 1071 + if (!hrtimer_active(timer)) 1072 + return 0; 1073 + 1025 1074 base = lock_hrtimer_base(timer, &flags); 1026 1075 1027 1076 if (!hrtimer_callback_running(timer)) 1028 - ret = remove_hrtimer(timer, base); 1077 + ret = remove_hrtimer(timer, base, false); 1029 1078 1030 1079 unlock_hrtimer_base(timer, &flags); 1031 1080 ··· 1084 1115 /** 1085 1116 * hrtimer_get_next_event - get the time until next expiry event 1086 1117 * 1087 - * Returns the delta to the next expiry event or KTIME_MAX if no timer 1088 - * is pending. 1118 + * Returns the next expiry time or KTIME_MAX if no timer is pending. 1089 1119 */ 1090 - ktime_t hrtimer_get_next_event(void) 1120 + u64 hrtimer_get_next_event(void) 1091 1121 { 1092 1122 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1093 - ktime_t mindelta = { .tv64 = KTIME_MAX }; 1123 + u64 expires = KTIME_MAX; 1094 1124 unsigned long flags; 1095 1125 1096 1126 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1097 1127 1098 - if (!hrtimer_hres_active()) 1099 - mindelta = ktime_sub(__hrtimer_get_next_event(cpu_base), 1100 - ktime_get()); 1128 + if (!__hrtimer_hres_active(cpu_base)) 1129 + expires = __hrtimer_get_next_event(cpu_base).tv64; 1101 1130 1102 1131 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1103 1132 1104 - if (mindelta.tv64 < 0) 1105 - mindelta.tv64 = 0; 1106 - return mindelta; 1133 + return expires; 1107 1134 } 1108 1135 #endif 1109 1136 ··· 1141 1176 } 1142 1177 EXPORT_SYMBOL_GPL(hrtimer_init); 1143 1178 1144 - /** 1145 - * hrtimer_get_res - get the timer resolution for a clock 1146 - * @which_clock: which clock to query 1147 - * @tp: pointer to timespec variable to store the resolution 1179 + /* 1180 + * A timer is active, when it is enqueued into the rbtree or the 1181 + * callback function is running or it's in the state of being migrated 1182 + * to another cpu. 1148 1183 * 1149 - * Store the resolution of the clock selected by @which_clock in the 1150 - * variable pointed to by @tp. 1184 + * It is important for this function to not return a false negative. 1151 1185 */ 1152 - int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) 1186 + bool hrtimer_active(const struct hrtimer *timer) 1153 1187 { 1154 1188 struct hrtimer_cpu_base *cpu_base; 1155 - int base = hrtimer_clockid_to_base(which_clock); 1189 + unsigned int seq; 1156 1190 1157 - cpu_base = raw_cpu_ptr(&hrtimer_bases); 1158 - *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution); 1191 + do { 1192 + cpu_base = READ_ONCE(timer->base->cpu_base); 1193 + seq = raw_read_seqcount_begin(&cpu_base->seq); 1159 1194 1160 - return 0; 1195 + if (timer->state != HRTIMER_STATE_INACTIVE || 1196 + cpu_base->running == timer) 1197 + return true; 1198 + 1199 + } while (read_seqcount_retry(&cpu_base->seq, seq) || 1200 + cpu_base != READ_ONCE(timer->base->cpu_base)); 1201 + 1202 + return false; 1161 1203 } 1162 - EXPORT_SYMBOL_GPL(hrtimer_get_res); 1204 + EXPORT_SYMBOL_GPL(hrtimer_active); 1163 1205 1164 - static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) 1206 + /* 1207 + * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3 1208 + * distinct sections: 1209 + * 1210 + * - queued: the timer is queued 1211 + * - callback: the timer is being ran 1212 + * - post: the timer is inactive or (re)queued 1213 + * 1214 + * On the read side we ensure we observe timer->state and cpu_base->running 1215 + * from the same section, if anything changed while we looked at it, we retry. 1216 + * This includes timer->base changing because sequence numbers alone are 1217 + * insufficient for that. 1218 + * 1219 + * The sequence numbers are required because otherwise we could still observe 1220 + * a false negative if the read side got smeared over multiple consequtive 1221 + * __run_hrtimer() invocations. 1222 + */ 1223 + 1224 + static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, 1225 + struct hrtimer_clock_base *base, 1226 + struct hrtimer *timer, ktime_t *now) 1165 1227 { 1166 - struct hrtimer_clock_base *base = timer->base; 1167 - struct hrtimer_cpu_base *cpu_base = base->cpu_base; 1168 1228 enum hrtimer_restart (*fn)(struct hrtimer *); 1169 1229 int restart; 1170 1230 1171 - WARN_ON(!irqs_disabled()); 1231 + lockdep_assert_held(&cpu_base->lock); 1172 1232 1173 1233 debug_deactivate(timer); 1174 - __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); 1234 + cpu_base->running = timer; 1235 + 1236 + /* 1237 + * Separate the ->running assignment from the ->state assignment. 1238 + * 1239 + * As with a regular write barrier, this ensures the read side in 1240 + * hrtimer_active() cannot observe cpu_base->running == NULL && 1241 + * timer->state == INACTIVE. 1242 + */ 1243 + raw_write_seqcount_barrier(&cpu_base->seq); 1244 + 1245 + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); 1175 1246 timer_stats_account_hrtimer(timer); 1176 1247 fn = timer->function; 1177 1248 ··· 1223 1222 raw_spin_lock(&cpu_base->lock); 1224 1223 1225 1224 /* 1226 - * Note: We clear the CALLBACK bit after enqueue_hrtimer and 1225 + * Note: We clear the running state after enqueue_hrtimer and 1227 1226 * we do not reprogramm the event hardware. Happens either in 1228 1227 * hrtimer_start_range_ns() or in hrtimer_interrupt() 1228 + * 1229 + * Note: Because we dropped the cpu_base->lock above, 1230 + * hrtimer_start_range_ns() can have popped in and enqueued the timer 1231 + * for us already. 1229 1232 */ 1230 - if (restart != HRTIMER_NORESTART) { 1231 - BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); 1233 + if (restart != HRTIMER_NORESTART && 1234 + !(timer->state & HRTIMER_STATE_ENQUEUED)) 1232 1235 enqueue_hrtimer(timer, base); 1233 - } 1234 1236 1235 - WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK)); 1237 + /* 1238 + * Separate the ->running assignment from the ->state assignment. 1239 + * 1240 + * As with a regular write barrier, this ensures the read side in 1241 + * hrtimer_active() cannot observe cpu_base->running == NULL && 1242 + * timer->state == INACTIVE. 1243 + */ 1244 + raw_write_seqcount_barrier(&cpu_base->seq); 1236 1245 1237 - timer->state &= ~HRTIMER_STATE_CALLBACK; 1246 + WARN_ON_ONCE(cpu_base->running != timer); 1247 + cpu_base->running = NULL; 1238 1248 } 1239 1249 1240 - #ifdef CONFIG_HIGH_RES_TIMERS 1241 - 1242 - /* 1243 - * High resolution timer interrupt 1244 - * Called with interrupts disabled 1245 - */ 1246 - void hrtimer_interrupt(struct clock_event_device *dev) 1250 + static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) 1247 1251 { 1248 - struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1249 - ktime_t expires_next, now, entry_time, delta; 1250 - int i, retries = 0; 1252 + struct hrtimer_clock_base *base = cpu_base->clock_base; 1253 + unsigned int active = cpu_base->active_bases; 1251 1254 1252 - BUG_ON(!cpu_base->hres_active); 1253 - cpu_base->nr_events++; 1254 - dev->next_event.tv64 = KTIME_MAX; 1255 - 1256 - raw_spin_lock(&cpu_base->lock); 1257 - entry_time = now = hrtimer_update_base(cpu_base); 1258 - retry: 1259 - cpu_base->in_hrtirq = 1; 1260 - /* 1261 - * We set expires_next to KTIME_MAX here with cpu_base->lock 1262 - * held to prevent that a timer is enqueued in our queue via 1263 - * the migration code. This does not affect enqueueing of 1264 - * timers which run their callback and need to be requeued on 1265 - * this CPU. 1266 - */ 1267 - cpu_base->expires_next.tv64 = KTIME_MAX; 1268 - 1269 - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1270 - struct hrtimer_clock_base *base; 1255 + for (; active; base++, active >>= 1) { 1271 1256 struct timerqueue_node *node; 1272 1257 ktime_t basenow; 1273 1258 1274 - if (!(cpu_base->active_bases & (1 << i))) 1259 + if (!(active & 0x01)) 1275 1260 continue; 1276 1261 1277 - base = cpu_base->clock_base + i; 1278 1262 basenow = ktime_add(now, base->offset); 1279 1263 1280 1264 while ((node = timerqueue_getnext(&base->active))) { ··· 1282 1296 if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) 1283 1297 break; 1284 1298 1285 - __run_hrtimer(timer, &basenow); 1299 + __run_hrtimer(cpu_base, base, timer, &basenow); 1286 1300 } 1287 1301 } 1302 + } 1303 + 1304 + #ifdef CONFIG_HIGH_RES_TIMERS 1305 + 1306 + /* 1307 + * High resolution timer interrupt 1308 + * Called with interrupts disabled 1309 + */ 1310 + void hrtimer_interrupt(struct clock_event_device *dev) 1311 + { 1312 + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1313 + ktime_t expires_next, now, entry_time, delta; 1314 + int retries = 0; 1315 + 1316 + BUG_ON(!cpu_base->hres_active); 1317 + cpu_base->nr_events++; 1318 + dev->next_event.tv64 = KTIME_MAX; 1319 + 1320 + raw_spin_lock(&cpu_base->lock); 1321 + entry_time = now = hrtimer_update_base(cpu_base); 1322 + retry: 1323 + cpu_base->in_hrtirq = 1; 1324 + /* 1325 + * We set expires_next to KTIME_MAX here with cpu_base->lock 1326 + * held to prevent that a timer is enqueued in our queue via 1327 + * the migration code. This does not affect enqueueing of 1328 + * timers which run their callback and need to be requeued on 1329 + * this CPU. 1330 + */ 1331 + cpu_base->expires_next.tv64 = KTIME_MAX; 1332 + 1333 + __hrtimer_run_queues(cpu_base, now); 1334 + 1288 1335 /* Reevaluate the clock bases for the next expiry */ 1289 1336 expires_next = __hrtimer_get_next_event(cpu_base); 1290 1337 /* ··· 1329 1310 raw_spin_unlock(&cpu_base->lock); 1330 1311 1331 1312 /* Reprogramming necessary ? */ 1332 - if (expires_next.tv64 == KTIME_MAX || 1333 - !tick_program_event(expires_next, 0)) { 1313 + if (!tick_program_event(expires_next, 0)) { 1334 1314 cpu_base->hang_detected = 0; 1335 1315 return; 1336 1316 } ··· 1362 1344 cpu_base->hang_detected = 1; 1363 1345 raw_spin_unlock(&cpu_base->lock); 1364 1346 delta = ktime_sub(now, entry_time); 1365 - if (delta.tv64 > cpu_base->max_hang_time.tv64) 1366 - cpu_base->max_hang_time = delta; 1347 + if ((unsigned int)delta.tv64 > cpu_base->max_hang_time) 1348 + cpu_base->max_hang_time = (unsigned int) delta.tv64; 1367 1349 /* 1368 1350 * Limit it to a sensible value as we enforce a longer 1369 1351 * delay. Give the CPU at least 100ms to catch up. ··· 1381 1363 * local version of hrtimer_peek_ahead_timers() called with interrupts 1382 1364 * disabled. 1383 1365 */ 1384 - static void __hrtimer_peek_ahead_timers(void) 1366 + static inline void __hrtimer_peek_ahead_timers(void) 1385 1367 { 1386 1368 struct tick_device *td; 1387 1369 ··· 1393 1375 hrtimer_interrupt(td->evtdev); 1394 1376 } 1395 1377 1396 - /** 1397 - * hrtimer_peek_ahead_timers -- run soft-expired timers now 1398 - * 1399 - * hrtimer_peek_ahead_timers will peek at the timer queue of 1400 - * the current cpu and check if there are any timers for which 1401 - * the soft expires time has passed. If any such timers exist, 1402 - * they are run immediately and then removed from the timer queue. 1403 - * 1404 - */ 1405 - void hrtimer_peek_ahead_timers(void) 1406 - { 1407 - unsigned long flags; 1408 - 1409 - local_irq_save(flags); 1410 - __hrtimer_peek_ahead_timers(); 1411 - local_irq_restore(flags); 1412 - } 1413 - 1414 - static void run_hrtimer_softirq(struct softirq_action *h) 1415 - { 1416 - hrtimer_peek_ahead_timers(); 1417 - } 1418 - 1419 1378 #else /* CONFIG_HIGH_RES_TIMERS */ 1420 1379 1421 1380 static inline void __hrtimer_peek_ahead_timers(void) { } ··· 1400 1405 #endif /* !CONFIG_HIGH_RES_TIMERS */ 1401 1406 1402 1407 /* 1403 - * Called from timer softirq every jiffy, expire hrtimers: 1404 - * 1405 - * For HRT its the fall back code to run the softirq in the timer 1406 - * softirq context in case the hrtimer initialization failed or has 1407 - * not been done yet. 1408 - */ 1409 - void hrtimer_run_pending(void) 1410 - { 1411 - if (hrtimer_hres_active()) 1412 - return; 1413 - 1414 - /* 1415 - * This _is_ ugly: We have to check in the softirq context, 1416 - * whether we can switch to highres and / or nohz mode. The 1417 - * clocksource switch happens in the timer interrupt with 1418 - * xtime_lock held. Notification from there only sets the 1419 - * check bit in the tick_oneshot code, otherwise we might 1420 - * deadlock vs. xtime_lock. 1421 - */ 1422 - if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) 1423 - hrtimer_switch_to_hres(); 1424 - } 1425 - 1426 - /* 1427 - * Called from hardirq context every jiffy 1408 + * Called from run_local_timers in hardirq context every jiffy 1428 1409 */ 1429 1410 void hrtimer_run_queues(void) 1430 1411 { 1431 - struct timerqueue_node *node; 1432 1412 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1433 - struct hrtimer_clock_base *base; 1434 - int index, gettime = 1; 1413 + ktime_t now; 1435 1414 1436 - if (hrtimer_hres_active()) 1415 + if (__hrtimer_hres_active(cpu_base)) 1437 1416 return; 1438 1417 1439 - for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { 1440 - base = &cpu_base->clock_base[index]; 1441 - if (!timerqueue_getnext(&base->active)) 1442 - continue; 1443 - 1444 - if (gettime) { 1445 - hrtimer_get_softirq_time(cpu_base); 1446 - gettime = 0; 1447 - } 1448 - 1449 - raw_spin_lock(&cpu_base->lock); 1450 - 1451 - while ((node = timerqueue_getnext(&base->active))) { 1452 - struct hrtimer *timer; 1453 - 1454 - timer = container_of(node, struct hrtimer, node); 1455 - if (base->softirq_time.tv64 <= 1456 - hrtimer_get_expires_tv64(timer)) 1457 - break; 1458 - 1459 - __run_hrtimer(timer, &base->softirq_time); 1460 - } 1461 - raw_spin_unlock(&cpu_base->lock); 1418 + /* 1419 + * This _is_ ugly: We have to check periodically, whether we 1420 + * can switch to highres and / or nohz mode. The clocksource 1421 + * switch happens with xtime_lock held. Notification from 1422 + * there only sets the check bit in the tick_oneshot code, 1423 + * otherwise we might deadlock vs. xtime_lock. 1424 + */ 1425 + if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) { 1426 + hrtimer_switch_to_hres(); 1427 + return; 1462 1428 } 1429 + 1430 + raw_spin_lock(&cpu_base->lock); 1431 + now = hrtimer_update_base(cpu_base); 1432 + __hrtimer_run_queues(cpu_base, now); 1433 + raw_spin_unlock(&cpu_base->lock); 1463 1434 } 1464 1435 1465 1436 /* ··· 1458 1497 do { 1459 1498 set_current_state(TASK_INTERRUPTIBLE); 1460 1499 hrtimer_start_expires(&t->timer, mode); 1461 - if (!hrtimer_active(&t->timer)) 1462 - t->task = NULL; 1463 1500 1464 1501 if (likely(t->task)) 1465 1502 freezable_schedule(); ··· 1601 1642 debug_deactivate(timer); 1602 1643 1603 1644 /* 1604 - * Mark it as STATE_MIGRATE not INACTIVE otherwise the 1645 + * Mark it as ENQUEUED not INACTIVE otherwise the 1605 1646 * timer could be seen as !active and just vanish away 1606 1647 * under us on another CPU 1607 1648 */ 1608 - __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0); 1649 + __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); 1609 1650 timer->base = new_base; 1610 1651 /* 1611 1652 * Enqueue the timers on the new cpu. This does not ··· 1616 1657 * event device. 1617 1658 */ 1618 1659 enqueue_hrtimer(timer, new_base); 1619 - 1620 - /* Clear the migration state bit */ 1621 - timer->state &= ~HRTIMER_STATE_MIGRATE; 1622 1660 } 1623 1661 } 1624 1662 ··· 1687 1731 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, 1688 1732 (void *)(long)smp_processor_id()); 1689 1733 register_cpu_notifier(&hrtimers_nb); 1690 - #ifdef CONFIG_HIGH_RES_TIMERS 1691 - open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); 1692 - #endif 1693 1734 } 1694 1735 1695 1736 /** ··· 1725 1772 hrtimer_init_sleeper(&t, current); 1726 1773 1727 1774 hrtimer_start_expires(&t.timer, mode); 1728 - if (!hrtimer_active(&t.timer)) 1729 - t.task = NULL; 1730 1775 1731 1776 if (likely(t.task)) 1732 1777 schedule();

+54 -7

kernel/time/ntp.c

··· 35 35 static u64 tick_length; 36 36 static u64 tick_length_base; 37 37 38 + #define SECS_PER_DAY 86400 38 39 #define MAX_TICKADJ 500LL /* usecs */ 39 40 #define MAX_TICKADJ_SCALED \ 40 41 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) ··· 76 75 77 76 /* constant (boot-param configurable) NTP tick adjustment (upscaled) */ 78 77 static s64 ntp_tick_adj; 78 + 79 + /* second value of the next pending leapsecond, or TIME64_MAX if no leap */ 80 + static time64_t ntp_next_leap_sec = TIME64_MAX; 79 81 80 82 #ifdef CONFIG_NTP_PPS 81 83 ··· 353 349 tick_length = tick_length_base; 354 350 time_offset = 0; 355 351 352 + ntp_next_leap_sec = TIME64_MAX; 356 353 /* Clear PPS state variables */ 357 354 pps_clear(); 358 355 } ··· 364 359 return tick_length; 365 360 } 366 361 362 + /** 363 + * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime_t 364 + * 365 + * Provides the time of the next leapsecond against CLOCK_REALTIME in 366 + * a ktime_t format. Returns KTIME_MAX if no leapsecond is pending. 367 + */ 368 + ktime_t ntp_get_next_leap(void) 369 + { 370 + ktime_t ret; 371 + 372 + if ((time_state == TIME_INS) && (time_status & STA_INS)) 373 + return ktime_set(ntp_next_leap_sec, 0); 374 + ret.tv64 = KTIME_MAX; 375 + return ret; 376 + } 367 377 368 378 /* 369 379 * this routine handles the overflow of the microsecond field ··· 402 382 */ 403 383 switch (time_state) { 404 384 case TIME_OK: 405 - if (time_status & STA_INS) 385 + if (time_status & STA_INS) { 406 386 time_state = TIME_INS; 407 - else if (time_status & STA_DEL) 387 + ntp_next_leap_sec = secs + SECS_PER_DAY - 388 + (secs % SECS_PER_DAY); 389 + } else if (time_status & STA_DEL) { 408 390 time_state = TIME_DEL; 391 + ntp_next_leap_sec = secs + SECS_PER_DAY - 392 + ((secs+1) % SECS_PER_DAY); 393 + } 409 394 break; 410 395 case TIME_INS: 411 - if (!(time_status & STA_INS)) 396 + if (!(time_status & STA_INS)) { 397 + ntp_next_leap_sec = TIME64_MAX; 412 398 time_state = TIME_OK; 413 - else if (secs % 86400 == 0) { 399 + } else if (secs % SECS_PER_DAY == 0) { 414 400 leap = -1; 415 401 time_state = TIME_OOP; 416 402 printk(KERN_NOTICE ··· 424 398 } 425 399 break; 426 400 case TIME_DEL: 427 - if (!(time_status & STA_DEL)) 401 + if (!(time_status & STA_DEL)) { 402 + ntp_next_leap_sec = TIME64_MAX; 428 403 time_state = TIME_OK; 429 - else if ((secs + 1) % 86400 == 0) { 404 + } else if ((secs + 1) % SECS_PER_DAY == 0) { 430 405 leap = 1; 406 + ntp_next_leap_sec = TIME64_MAX; 431 407 time_state = TIME_WAIT; 432 408 printk(KERN_NOTICE 433 409 "Clock: deleting leap second 23:59:59 UTC\n"); 434 410 } 435 411 break; 436 412 case TIME_OOP: 413 + ntp_next_leap_sec = TIME64_MAX; 437 414 time_state = TIME_WAIT; 438 415 break; 439 - 440 416 case TIME_WAIT: 441 417 if (!(time_status & (STA_INS | STA_DEL))) 442 418 time_state = TIME_OK; ··· 575 547 if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { 576 548 time_state = TIME_OK; 577 549 time_status = STA_UNSYNC; 550 + ntp_next_leap_sec = TIME64_MAX; 578 551 /* restart PPS frequency calibration */ 579 552 pps_reset_freq_interval(); 580 553 } ··· 739 710 txc->time.tv_usec = ts->tv_nsec; 740 711 if (!(time_status & STA_NANO)) 741 712 txc->time.tv_usec /= NSEC_PER_USEC; 713 + 714 + /* Handle leapsec adjustments */ 715 + if (unlikely(ts->tv_sec >= ntp_next_leap_sec)) { 716 + if ((time_state == TIME_INS) && (time_status & STA_INS)) { 717 + result = TIME_OOP; 718 + txc->tai++; 719 + txc->time.tv_sec--; 720 + } 721 + if ((time_state == TIME_DEL) && (time_status & STA_DEL)) { 722 + result = TIME_WAIT; 723 + txc->tai--; 724 + txc->time.tv_sec++; 725 + } 726 + if ((time_state == TIME_OOP) && 727 + (ts->tv_sec == ntp_next_leap_sec)) { 728 + result = TIME_WAIT; 729 + } 730 + } 742 731 743 732 return result; 744 733 }

+1

kernel/time/ntp_internal.h

··· 5 5 extern void ntp_clear(void); 6 6 /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ 7 7 extern u64 ntp_tick_length(void); 8 + extern ktime_t ntp_get_next_leap(void); 8 9 extern int second_overflow(unsigned long secs); 9 10 extern int ntp_validate_timex(struct timex *); 10 11 extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);

+12 -5

kernel/time/posix-timers.c

··· 272 272 return 0; 273 273 } 274 274 275 + static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec *tp) 276 + { 277 + tp->tv_sec = 0; 278 + tp->tv_nsec = hrtimer_resolution; 279 + return 0; 280 + } 281 + 275 282 /* 276 283 * Initialize everything, well, just everything in Posix clocks/timers ;) 277 284 */ 278 285 static __init int init_posix_timers(void) 279 286 { 280 287 struct k_clock clock_realtime = { 281 - .clock_getres = hrtimer_get_res, 288 + .clock_getres = posix_get_hrtimer_res, 282 289 .clock_get = posix_clock_realtime_get, 283 290 .clock_set = posix_clock_realtime_set, 284 291 .clock_adj = posix_clock_realtime_adj, ··· 297 290 .timer_del = common_timer_del, 298 291 }; 299 292 struct k_clock clock_monotonic = { 300 - .clock_getres = hrtimer_get_res, 293 + .clock_getres = posix_get_hrtimer_res, 301 294 .clock_get = posix_ktime_get_ts, 302 295 .nsleep = common_nsleep, 303 296 .nsleep_restart = hrtimer_nanosleep_restart, ··· 307 300 .timer_del = common_timer_del, 308 301 }; 309 302 struct k_clock clock_monotonic_raw = { 310 - .clock_getres = hrtimer_get_res, 303 + .clock_getres = posix_get_hrtimer_res, 311 304 .clock_get = posix_get_monotonic_raw, 312 305 }; 313 306 struct k_clock clock_realtime_coarse = { ··· 319 312 .clock_get = posix_get_monotonic_coarse, 320 313 }; 321 314 struct k_clock clock_tai = { 322 - .clock_getres = hrtimer_get_res, 315 + .clock_getres = posix_get_hrtimer_res, 323 316 .clock_get = posix_get_tai, 324 317 .nsleep = common_nsleep, 325 318 .nsleep_restart = hrtimer_nanosleep_restart, ··· 329 322 .timer_del = common_timer_del, 330 323 }; 331 324 struct k_clock clock_boottime = { 332 - .clock_getres = hrtimer_get_res, 325 + .clock_getres = posix_get_hrtimer_res, 333 326 .clock_get = posix_get_boottime, 334 327 .nsleep = common_nsleep, 335 328 .nsleep_restart = hrtimer_nanosleep_restart,

+12 -6

kernel/time/tick-broadcast-hrtimer.c

··· 22 22 struct clock_event_device *bc) 23 23 { 24 24 switch (mode) { 25 + case CLOCK_EVT_MODE_UNUSED: 25 26 case CLOCK_EVT_MODE_SHUTDOWN: 26 27 /* 27 28 * Note, we cannot cancel the timer here as we might ··· 67 66 * hrtimer_{start/cancel} functions call into tracing, 68 67 * calls to these functions must be bound within RCU_NONIDLE. 69 68 */ 70 - RCU_NONIDLE(bc_moved = (hrtimer_try_to_cancel(&bctimer) >= 0) ? 71 - !hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED) : 72 - 0); 69 + RCU_NONIDLE({ 70 + bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0; 71 + if (bc_moved) 72 + hrtimer_start(&bctimer, expires, 73 + HRTIMER_MODE_ABS_PINNED);}); 73 74 if (bc_moved) { 74 75 /* Bind the "device" to the cpu */ 75 76 bc->bound_on = smp_processor_id(); ··· 102 99 { 103 100 ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer); 104 101 105 - if (ce_broadcast_hrtimer.next_event.tv64 == KTIME_MAX) 102 + switch (ce_broadcast_hrtimer.mode) { 103 + case CLOCK_EVT_MODE_ONESHOT: 104 + if (ce_broadcast_hrtimer.next_event.tv64 != KTIME_MAX) 105 + return HRTIMER_RESTART; 106 + default: 106 107 return HRTIMER_NORESTART; 107 - 108 - return HRTIMER_RESTART; 108 + } 109 109 } 110 110 111 111 void tick_setup_hrtimer_broadcast(void)

+44 -55

kernel/time/tick-broadcast.c

··· 255 255 /* 256 256 * Broadcast the event to the cpus, which are set in the mask (mangled). 257 257 */ 258 - static void tick_do_broadcast(struct cpumask *mask) 258 + static bool tick_do_broadcast(struct cpumask *mask) 259 259 { 260 260 int cpu = smp_processor_id(); 261 261 struct tick_device *td; 262 + bool local = false; 262 263 263 264 /* 264 265 * Check, if the current cpu is in the mask 265 266 */ 266 267 if (cpumask_test_cpu(cpu, mask)) { 267 268 cpumask_clear_cpu(cpu, mask); 268 - td = &per_cpu(tick_cpu_device, cpu); 269 - td->evtdev->event_handler(td->evtdev); 269 + local = true; 270 270 } 271 271 272 272 if (!cpumask_empty(mask)) { ··· 279 279 td = &per_cpu(tick_cpu_device, cpumask_first(mask)); 280 280 td->evtdev->broadcast(mask); 281 281 } 282 + return local; 282 283 } 283 284 284 285 /* 285 286 * Periodic broadcast: 286 287 * - invoke the broadcast handlers 287 288 */ 288 - static void tick_do_periodic_broadcast(void) 289 + static bool tick_do_periodic_broadcast(void) 289 290 { 290 291 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); 291 - tick_do_broadcast(tmpmask); 292 + return tick_do_broadcast(tmpmask); 292 293 } 293 294 294 295 /* ··· 297 296 */ 298 297 static void tick_handle_periodic_broadcast(struct clock_event_device *dev) 299 298 { 300 - ktime_t next; 299 + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 300 + bool bc_local; 301 301 302 302 raw_spin_lock(&tick_broadcast_lock); 303 + bc_local = tick_do_periodic_broadcast(); 303 304 304 - tick_do_periodic_broadcast(); 305 + if (clockevent_state_oneshot(dev)) { 306 + ktime_t next = ktime_add(dev->next_event, tick_period); 305 307 306 - /* 307 - * The device is in periodic mode. No reprogramming necessary: 308 - */ 309 - if (dev->state == CLOCK_EVT_STATE_PERIODIC) 310 - goto unlock; 311 - 312 - /* 313 - * Setup the next period for devices, which do not have 314 - * periodic mode. We read dev->next_event first and add to it 315 - * when the event already expired. clockevents_program_event() 316 - * sets dev->next_event only when the event is really 317 - * programmed to the device. 318 - */ 319 - for (next = dev->next_event; ;) { 320 - next = ktime_add(next, tick_period); 321 - 322 - if (!clockevents_program_event(dev, next, false)) 323 - goto unlock; 324 - tick_do_periodic_broadcast(); 308 + clockevents_program_event(dev, next, true); 325 309 } 326 - unlock: 327 310 raw_spin_unlock(&tick_broadcast_lock); 311 + 312 + /* 313 + * We run the handler of the local cpu after dropping 314 + * tick_broadcast_lock because the handler might deadlock when 315 + * trying to switch to oneshot mode. 316 + */ 317 + if (bc_local) 318 + td->evtdev->event_handler(td->evtdev); 328 319 } 329 320 330 321 /** ··· 525 532 irq_set_affinity(bc->irq, bc->cpumask); 526 533 } 527 534 528 - static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, 529 - ktime_t expires, int force) 535 + static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu, 536 + ktime_t expires) 530 537 { 531 - int ret; 538 + if (!clockevent_state_oneshot(bc)) 539 + clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 532 540 533 - if (bc->state != CLOCK_EVT_STATE_ONESHOT) 534 - clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); 535 - 536 - ret = clockevents_program_event(bc, expires, force); 537 - if (!ret) 538 - tick_broadcast_set_affinity(bc, cpumask_of(cpu)); 539 - return ret; 541 + clockevents_program_event(bc, expires, 1); 542 + tick_broadcast_set_affinity(bc, cpumask_of(cpu)); 540 543 } 541 544 542 545 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) 543 546 { 544 - clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); 547 + clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 545 548 } 546 549 547 550 /* ··· 555 566 * switched over, leave the device alone. 556 567 */ 557 568 if (td->mode == TICKDEV_MODE_ONESHOT) { 558 - clockevents_set_state(td->evtdev, 569 + clockevents_switch_state(td->evtdev, 559 570 CLOCK_EVT_STATE_ONESHOT); 560 571 } 561 572 } ··· 569 580 struct tick_device *td; 570 581 ktime_t now, next_event; 571 582 int cpu, next_cpu = 0; 583 + bool bc_local; 572 584 573 585 raw_spin_lock(&tick_broadcast_lock); 574 - again: 575 586 dev->next_event.tv64 = KTIME_MAX; 576 587 next_event.tv64 = KTIME_MAX; 577 588 cpumask_clear(tmpmask); ··· 613 624 /* 614 625 * Wakeup the cpus which have an expired event. 615 626 */ 616 - tick_do_broadcast(tmpmask); 627 + bc_local = tick_do_broadcast(tmpmask); 617 628 618 629 /* 619 630 * Two reasons for reprogram: ··· 625 636 * - There are pending events on sleeping CPUs which were not 626 637 * in the event mask 627 638 */ 628 - if (next_event.tv64 != KTIME_MAX) { 629 - /* 630 - * Rearm the broadcast device. If event expired, 631 - * repeat the above 632 - */ 633 - if (tick_broadcast_set_event(dev, next_cpu, next_event, 0)) 634 - goto again; 635 - } 639 + if (next_event.tv64 != KTIME_MAX) 640 + tick_broadcast_set_event(dev, next_cpu, next_event); 641 + 636 642 raw_spin_unlock(&tick_broadcast_lock); 643 + 644 + if (bc_local) { 645 + td = this_cpu_ptr(&tick_cpu_device); 646 + td->evtdev->event_handler(td->evtdev); 647 + } 637 648 } 638 649 639 650 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) ··· 659 670 if (dev->next_event.tv64 < bc->next_event.tv64) 660 671 return; 661 672 } 662 - clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 673 + clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 663 674 } 664 675 665 676 /** ··· 715 726 */ 716 727 if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) && 717 728 dev->next_event.tv64 < bc->next_event.tv64) 718 - tick_broadcast_set_event(bc, cpu, dev->next_event, 1); 729 + tick_broadcast_set_event(bc, cpu, dev->next_event); 719 730 } 720 731 /* 721 732 * If the current CPU owns the hrtimer broadcast ··· 729 740 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 730 741 } else { 731 742 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { 732 - clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); 743 + clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 733 744 /* 734 745 * The cpu which was handling the broadcast 735 746 * timer marked this cpu in the broadcast ··· 831 842 832 843 /* Set it up only once ! */ 833 844 if (bc->event_handler != tick_handle_oneshot_broadcast) { 834 - int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC; 845 + int was_periodic = clockevent_state_periodic(bc); 835 846 836 847 bc->event_handler = tick_handle_oneshot_broadcast; 837 848 ··· 847 858 tick_broadcast_oneshot_mask, tmpmask); 848 859 849 860 if (was_periodic && !cpumask_empty(tmpmask)) { 850 - clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); 861 + clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 851 862 tick_broadcast_init_next_event(tmpmask, 852 863 tick_next_period); 853 - tick_broadcast_set_event(bc, cpu, tick_next_period, 1); 864 + tick_broadcast_set_event(bc, cpu, tick_next_period); 854 865 } else 855 866 bc->next_event.tv64 = KTIME_MAX; 856 867 } else {

+14 -4

kernel/time/tick-common.c

··· 102 102 103 103 tick_periodic(cpu); 104 104 105 - if (dev->state != CLOCK_EVT_STATE_ONESHOT) 105 + #if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON) 106 + /* 107 + * The cpu might have transitioned to HIGHRES or NOHZ mode via 108 + * update_process_times() -> run_local_timers() -> 109 + * hrtimer_run_queues(). 110 + */ 111 + if (dev->event_handler != tick_handle_periodic) 112 + return; 113 + #endif 114 + 115 + if (!clockevent_state_oneshot(dev)) 106 116 return; 107 117 for (;;) { 108 118 /* ··· 150 140 151 141 if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && 152 142 !tick_broadcast_oneshot_active()) { 153 - clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC); 143 + clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC); 154 144 } else { 155 145 unsigned long seq; 156 146 ktime_t next; ··· 160 150 next = tick_next_period; 161 151 } while (read_seqretry(&jiffies_lock, seq)); 162 152 163 - clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); 153 + clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 164 154 165 155 for (;;) { 166 156 if (!clockevents_program_event(dev, next, false)) ··· 377 367 * Prevent that the clock events layer tries to call 378 368 * the set mode function! 379 369 */ 380 - dev->state = CLOCK_EVT_STATE_DETACHED; 370 + clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); 381 371 dev->mode = CLOCK_EVT_MODE_UNUSED; 382 372 clockevents_exchange_device(dev, NULL); 383 373 dev->event_handler = clockevents_handle_noop;

+29 -2

kernel/time/tick-internal.h

··· 36 36 return !(dev->features & CLOCK_EVT_FEAT_DUMMY); 37 37 } 38 38 39 + static inline enum clock_event_state clockevent_get_state(struct clock_event_device *dev) 40 + { 41 + return dev->state_use_accessors; 42 + } 43 + 44 + static inline void clockevent_set_state(struct clock_event_device *dev, 45 + enum clock_event_state state) 46 + { 47 + dev->state_use_accessors = state; 48 + } 49 + 39 50 extern void clockevents_shutdown(struct clock_event_device *dev); 40 51 extern void clockevents_exchange_device(struct clock_event_device *old, 41 52 struct clock_event_device *new); 42 - extern void clockevents_set_state(struct clock_event_device *dev, 43 - enum clock_event_state state); 53 + extern void clockevents_switch_state(struct clock_event_device *dev, 54 + enum clock_event_state state); 44 55 extern int clockevents_program_event(struct clock_event_device *dev, 45 56 ktime_t expires, bool force); 46 57 extern void clockevents_handle_noop(struct clock_event_device *dev); ··· 148 137 # else 149 138 static inline void tick_nohz_init(void) { } 150 139 #endif 140 + 141 + #ifdef CONFIG_NO_HZ_COMMON 142 + extern unsigned long tick_nohz_active; 143 + #else 144 + #define tick_nohz_active (0) 145 + #endif 146 + 147 + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 148 + extern void timers_update_migration(bool update_nohz); 149 + #else 150 + static inline void timers_update_migration(bool update_nohz) { } 151 + #endif 152 + 153 + DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); 154 + 155 + extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);

+19 -3

kernel/time/tick-oneshot.c

··· 28 28 { 29 29 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 30 30 31 + if (unlikely(expires.tv64 == KTIME_MAX)) { 32 + /* 33 + * We don't need the clock event device any more, stop it. 34 + */ 35 + clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); 36 + return 0; 37 + } 38 + 39 + if (unlikely(clockevent_state_oneshot_stopped(dev))) { 40 + /* 41 + * We need the clock event again, configure it in ONESHOT mode 42 + * before using it. 43 + */ 44 + clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 45 + } 46 + 31 47 return clockevents_program_event(dev, expires, force); 32 48 } 33 49 ··· 54 38 { 55 39 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 56 40 57 - clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); 41 + clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 58 42 clockevents_program_event(dev, ktime_get(), true); 59 43 } 60 44 ··· 66 50 ktime_t next_event) 67 51 { 68 52 newdev->event_handler = handler; 69 - clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT); 53 + clockevents_switch_state(newdev, CLOCK_EVT_STATE_ONESHOT); 70 54 clockevents_program_event(newdev, next_event, true); 71 55 } 72 56 ··· 97 81 98 82 td->mode = TICKDEV_MODE_ONESHOT; 99 83 dev->event_handler = handler; 100 - clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); 84 + clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 101 85 tick_broadcast_switch_to_oneshot(); 102 86 return 0; 103 87 }

+134 -190

kernel/time/tick-sched.c

··· 399 399 * NO HZ enabled ? 400 400 */ 401 401 static int tick_nohz_enabled __read_mostly = 1; 402 - int tick_nohz_active __read_mostly; 402 + unsigned long tick_nohz_active __read_mostly; 403 403 /* 404 404 * Enable / Disable tickless mode 405 405 */ ··· 565 565 } 566 566 EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); 567 567 568 + static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) 569 + { 570 + hrtimer_cancel(&ts->sched_timer); 571 + hrtimer_set_expires(&ts->sched_timer, ts->last_tick); 572 + 573 + /* Forward the time to expire in the future */ 574 + hrtimer_forward(&ts->sched_timer, now, tick_period); 575 + 576 + if (ts->nohz_mode == NOHZ_MODE_HIGHRES) 577 + hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); 578 + else 579 + tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); 580 + } 581 + 568 582 static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, 569 583 ktime_t now, int cpu) 570 584 { 571 - unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; 572 - ktime_t last_update, expires, ret = { .tv64 = 0 }; 573 - unsigned long rcu_delta_jiffies; 574 585 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 575 - u64 time_delta; 576 - 577 - time_delta = timekeeping_max_deferment(); 586 + u64 basemono, next_tick, next_tmr, next_rcu, delta, expires; 587 + unsigned long seq, basejiff; 588 + ktime_t tick; 578 589 579 590 /* Read jiffies and the time when jiffies were updated last */ 580 591 do { 581 592 seq = read_seqbegin(&jiffies_lock); 582 - last_update = last_jiffies_update; 583 - last_jiffies = jiffies; 593 + basemono = last_jiffies_update.tv64; 594 + basejiff = jiffies; 584 595 } while (read_seqretry(&jiffies_lock, seq)); 596 + ts->last_jiffies = basejiff; 585 597 586 - if (rcu_needs_cpu(&rcu_delta_jiffies) || 598 + if (rcu_needs_cpu(basemono, &next_rcu) || 587 599 arch_needs_cpu() || irq_work_needs_cpu()) { 588 - next_jiffies = last_jiffies + 1; 589 - delta_jiffies = 1; 600 + next_tick = basemono + TICK_NSEC; 590 601 } else { 591 - /* Get the next timer wheel timer */ 592 - next_jiffies = get_next_timer_interrupt(last_jiffies); 593 - delta_jiffies = next_jiffies - last_jiffies; 594 - if (rcu_delta_jiffies < delta_jiffies) { 595 - next_jiffies = last_jiffies + rcu_delta_jiffies; 596 - delta_jiffies = rcu_delta_jiffies; 602 + /* 603 + * Get the next pending timer. If high resolution 604 + * timers are enabled this only takes the timer wheel 605 + * timers into account. If high resolution timers are 606 + * disabled this also looks at the next expiring 607 + * hrtimer. 608 + */ 609 + next_tmr = get_next_timer_interrupt(basejiff, basemono); 610 + ts->next_timer = next_tmr; 611 + /* Take the next rcu event into account */ 612 + next_tick = next_rcu < next_tmr ? next_rcu : next_tmr; 613 + } 614 + 615 + /* 616 + * If the tick is due in the next period, keep it ticking or 617 + * restart it proper. 618 + */ 619 + delta = next_tick - basemono; 620 + if (delta <= (u64)TICK_NSEC) { 621 + tick.tv64 = 0; 622 + if (!ts->tick_stopped) 623 + goto out; 624 + if (delta == 0) { 625 + /* Tick is stopped, but required now. Enforce it */ 626 + tick_nohz_restart(ts, now); 627 + goto out; 597 628 } 598 629 } 599 630 600 631 /* 601 - * Do not stop the tick, if we are only one off (or less) 602 - * or if the cpu is required for RCU: 632 + * If this cpu is the one which updates jiffies, then give up 633 + * the assignment and let it be taken by the cpu which runs 634 + * the tick timer next, which might be this cpu as well. If we 635 + * don't drop this here the jiffies might be stale and 636 + * do_timer() never invoked. Keep track of the fact that it 637 + * was the one which had the do_timer() duty last. If this cpu 638 + * is the one which had the do_timer() duty last, we limit the 639 + * sleep time to the timekeeping max_deferement value. 640 + * Otherwise we can sleep as long as we want. 603 641 */ 604 - if (!ts->tick_stopped && delta_jiffies <= 1) 605 - goto out; 606 - 607 - /* Schedule the tick, if we are at least one jiffie off */ 608 - if ((long)delta_jiffies >= 1) { 609 - 610 - /* 611 - * If this cpu is the one which updates jiffies, then 612 - * give up the assignment and let it be taken by the 613 - * cpu which runs the tick timer next, which might be 614 - * this cpu as well. If we don't drop this here the 615 - * jiffies might be stale and do_timer() never 616 - * invoked. Keep track of the fact that it was the one 617 - * which had the do_timer() duty last. If this cpu is 618 - * the one which had the do_timer() duty last, we 619 - * limit the sleep time to the timekeeping 620 - * max_deferement value which we retrieved 621 - * above. Otherwise we can sleep as long as we want. 622 - */ 623 - if (cpu == tick_do_timer_cpu) { 624 - tick_do_timer_cpu = TICK_DO_TIMER_NONE; 625 - ts->do_timer_last = 1; 626 - } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { 627 - time_delta = KTIME_MAX; 628 - ts->do_timer_last = 0; 629 - } else if (!ts->do_timer_last) { 630 - time_delta = KTIME_MAX; 631 - } 642 + delta = timekeeping_max_deferment(); 643 + if (cpu == tick_do_timer_cpu) { 644 + tick_do_timer_cpu = TICK_DO_TIMER_NONE; 645 + ts->do_timer_last = 1; 646 + } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { 647 + delta = KTIME_MAX; 648 + ts->do_timer_last = 0; 649 + } else if (!ts->do_timer_last) { 650 + delta = KTIME_MAX; 651 + } 632 652 633 653 #ifdef CONFIG_NO_HZ_FULL 634 - if (!ts->inidle) { 635 - time_delta = min(time_delta, 636 - scheduler_tick_max_deferment()); 637 - } 654 + /* Limit the tick delta to the maximum scheduler deferment */ 655 + if (!ts->inidle) 656 + delta = min(delta, scheduler_tick_max_deferment()); 638 657 #endif 639 658 640 - /* 641 - * calculate the expiry time for the next timer wheel 642 - * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals 643 - * that there is no timer pending or at least extremely 644 - * far into the future (12 days for HZ=1000). In this 645 - * case we set the expiry to the end of time. 646 - */ 647 - if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { 648 - /* 649 - * Calculate the time delta for the next timer event. 650 - * If the time delta exceeds the maximum time delta 651 - * permitted by the current clocksource then adjust 652 - * the time delta accordingly to ensure the 653 - * clocksource does not wrap. 654 - */ 655 - time_delta = min_t(u64, time_delta, 656 - tick_period.tv64 * delta_jiffies); 657 - } 659 + /* Calculate the next expiry time */ 660 + if (delta < (KTIME_MAX - basemono)) 661 + expires = basemono + delta; 662 + else 663 + expires = KTIME_MAX; 658 664 659 - if (time_delta < KTIME_MAX) 660 - expires = ktime_add_ns(last_update, time_delta); 661 - else 662 - expires.tv64 = KTIME_MAX; 665 + expires = min_t(u64, expires, next_tick); 666 + tick.tv64 = expires; 663 667 664 - /* Skip reprogram of event if its not changed */ 665 - if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) 666 - goto out; 668 + /* Skip reprogram of event if its not changed */ 669 + if (ts->tick_stopped && (expires == dev->next_event.tv64)) 670 + goto out; 667 671 668 - ret = expires; 672 + /* 673 + * nohz_stop_sched_tick can be called several times before 674 + * the nohz_restart_sched_tick is called. This happens when 675 + * interrupts arrive which do not cause a reschedule. In the 676 + * first call we save the current tick time, so we can restart 677 + * the scheduler tick in nohz_restart_sched_tick. 678 + */ 679 + if (!ts->tick_stopped) { 680 + nohz_balance_enter_idle(cpu); 681 + calc_load_enter_idle(); 669 682 670 - /* 671 - * nohz_stop_sched_tick can be called several times before 672 - * the nohz_restart_sched_tick is called. This happens when 673 - * interrupts arrive which do not cause a reschedule. In the 674 - * first call we save the current tick time, so we can restart 675 - * the scheduler tick in nohz_restart_sched_tick. 676 - */ 677 - if (!ts->tick_stopped) { 678 - nohz_balance_enter_idle(cpu); 679 - calc_load_enter_idle(); 680 - 681 - ts->last_tick = hrtimer_get_expires(&ts->sched_timer); 682 - ts->tick_stopped = 1; 683 - trace_tick_stop(1, " "); 684 - } 685 - 686 - /* 687 - * If the expiration time == KTIME_MAX, then 688 - * in this case we simply stop the tick timer. 689 - */ 690 - if (unlikely(expires.tv64 == KTIME_MAX)) { 691 - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) 692 - hrtimer_cancel(&ts->sched_timer); 693 - goto out; 694 - } 695 - 696 - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 697 - hrtimer_start(&ts->sched_timer, expires, 698 - HRTIMER_MODE_ABS_PINNED); 699 - /* Check, if the timer was already in the past */ 700 - if (hrtimer_active(&ts->sched_timer)) 701 - goto out; 702 - } else if (!tick_program_event(expires, 0)) 703 - goto out; 704 - /* 705 - * We are past the event already. So we crossed a 706 - * jiffie boundary. Update jiffies and raise the 707 - * softirq. 708 - */ 709 - tick_do_update_jiffies64(ktime_get()); 683 + ts->last_tick = hrtimer_get_expires(&ts->sched_timer); 684 + ts->tick_stopped = 1; 685 + trace_tick_stop(1, " "); 710 686 } 711 - raise_softirq_irqoff(TIMER_SOFTIRQ); 712 - out: 713 - ts->next_jiffies = next_jiffies; 714 - ts->last_jiffies = last_jiffies; 715 - ts->sleep_length = ktime_sub(dev->next_event, now); 716 687 717 - return ret; 688 + /* 689 + * If the expiration time == KTIME_MAX, then we simply stop 690 + * the tick timer. 691 + */ 692 + if (unlikely(expires == KTIME_MAX)) { 693 + if (ts->nohz_mode == NOHZ_MODE_HIGHRES) 694 + hrtimer_cancel(&ts->sched_timer); 695 + goto out; 696 + } 697 + 698 + if (ts->nohz_mode == NOHZ_MODE_HIGHRES) 699 + hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED); 700 + else 701 + tick_program_event(tick, 1); 702 + out: 703 + /* Update the estimated sleep length */ 704 + ts->sleep_length = ktime_sub(dev->next_event, now); 705 + return tick; 718 706 } 719 707 720 708 static void tick_nohz_full_stop_tick(struct tick_sched *ts) ··· 864 876 return ts->sleep_length; 865 877 } 866 878 867 - static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) 868 - { 869 - hrtimer_cancel(&ts->sched_timer); 870 - hrtimer_set_expires(&ts->sched_timer, ts->last_tick); 871 - 872 - while (1) { 873 - /* Forward the time to expire in the future */ 874 - hrtimer_forward(&ts->sched_timer, now, tick_period); 875 - 876 - if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 877 - hrtimer_start_expires(&ts->sched_timer, 878 - HRTIMER_MODE_ABS_PINNED); 879 - /* Check, if the timer was already in the past */ 880 - if (hrtimer_active(&ts->sched_timer)) 881 - break; 882 - } else { 883 - if (!tick_program_event( 884 - hrtimer_get_expires(&ts->sched_timer), 0)) 885 - break; 886 - } 887 - /* Reread time and update jiffies */ 888 - now = ktime_get(); 889 - tick_do_update_jiffies64(now); 890 - } 891 - } 892 - 893 879 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) 894 880 { 895 881 /* Update jiffies first */ ··· 934 972 local_irq_enable(); 935 973 } 936 974 937 - static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) 938 - { 939 - hrtimer_forward(&ts->sched_timer, now, tick_period); 940 - return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0); 941 - } 942 - 943 975 /* 944 976 * The nohz low res interrupt handler 945 977 */ ··· 952 996 if (unlikely(ts->tick_stopped)) 953 997 return; 954 998 955 - while (tick_nohz_reprogram(ts, now)) { 956 - now = ktime_get(); 957 - tick_do_update_jiffies64(now); 958 - } 999 + hrtimer_forward(&ts->sched_timer, now, tick_period); 1000 + tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); 1001 + } 1002 + 1003 + static inline void tick_nohz_activate(struct tick_sched *ts, int mode) 1004 + { 1005 + if (!tick_nohz_enabled) 1006 + return; 1007 + ts->nohz_mode = mode; 1008 + /* One update is enough */ 1009 + if (!test_and_set_bit(0, &tick_nohz_active)) 1010 + timers_update_migration(true); 959 1011 } 960 1012 961 1013 /** ··· 977 1013 if (!tick_nohz_enabled) 978 1014 return; 979 1015 980 - local_irq_disable(); 981 - if (tick_switch_to_oneshot(tick_nohz_handler)) { 982 - local_irq_enable(); 1016 + if (tick_switch_to_oneshot(tick_nohz_handler)) 983 1017 return; 984 - } 985 - tick_nohz_active = 1; 986 - ts->nohz_mode = NOHZ_MODE_LOWRES; 987 1018 988 1019 /* 989 1020 * Recycle the hrtimer in ts, so we can share the ··· 988 1029 /* Get the next period */ 989 1030 next = tick_init_jiffy_update(); 990 1031 991 - for (;;) { 992 - hrtimer_set_expires(&ts->sched_timer, next); 993 - if (!tick_program_event(next, 0)) 994 - break; 995 - next = ktime_add(next, tick_period); 996 - } 997 - local_irq_enable(); 1032 + hrtimer_forward_now(&ts->sched_timer, tick_period); 1033 + hrtimer_set_expires(&ts->sched_timer, next); 1034 + tick_program_event(next, 1); 1035 + tick_nohz_activate(ts, NOHZ_MODE_LOWRES); 998 1036 } 999 1037 1000 1038 /* ··· 1043 1087 1044 1088 static inline void tick_nohz_switch_to_nohz(void) { } 1045 1089 static inline void tick_nohz_irq_enter(void) { } 1090 + static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { } 1046 1091 1047 1092 #endif /* CONFIG_NO_HZ_COMMON */ 1048 1093 ··· 1124 1167 hrtimer_add_expires_ns(&ts->sched_timer, offset); 1125 1168 } 1126 1169 1127 - for (;;) { 1128 - hrtimer_forward(&ts->sched_timer, now, tick_period); 1129 - hrtimer_start_expires(&ts->sched_timer, 1130 - HRTIMER_MODE_ABS_PINNED); 1131 - /* Check, if the timer was already in the past */ 1132 - if (hrtimer_active(&ts->sched_timer)) 1133 - break; 1134 - now = ktime_get(); 1135 - } 1136 - 1137 - #ifdef CONFIG_NO_HZ_COMMON 1138 - if (tick_nohz_enabled) { 1139 - ts->nohz_mode = NOHZ_MODE_HIGHRES; 1140 - tick_nohz_active = 1; 1141 - } 1142 - #endif 1170 + hrtimer_forward(&ts->sched_timer, now, tick_period); 1171 + hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); 1172 + tick_nohz_activate(ts, NOHZ_MODE_HIGHRES); 1143 1173 } 1144 1174 #endif /* HIGH_RES_TIMERS */ 1145 1175 ··· 1171 1227 * Called cyclic from the hrtimer softirq (driven by the timer 1172 1228 * softirq) allow_nohz signals, that we can switch into low-res nohz 1173 1229 * mode, because high resolution timers are disabled (either compile 1174 - * or runtime). 1230 + * or runtime). Called with interrupts disabled. 1175 1231 */ 1176 1232 int tick_check_oneshot_change(int allow_nohz) 1177 1233 {

+1 -1

kernel/time/tick-sched.h

··· 57 57 ktime_t iowait_sleeptime; 58 58 ktime_t sleep_length; 59 59 unsigned long last_jiffies; 60 - unsigned long next_jiffies; 60 + u64 next_timer; 61 61 ktime_t idle_expires; 62 62 int do_timer_last; 63 63 };

+26 -50

kernel/time/time.c

··· 41 41 #include <asm/uaccess.h> 42 42 #include <asm/unistd.h> 43 43 44 - #include "timeconst.h" 44 + #include <generated/timeconst.h> 45 45 #include "timekeeping.h" 46 46 47 47 /* ··· 173 173 return error; 174 174 175 175 if (tz) { 176 + /* Verify we're witin the +-15 hrs range */ 177 + if (tz->tz_minuteswest > 15*60 || tz->tz_minuteswest < -15*60) 178 + return -EINVAL; 179 + 176 180 sys_tz = *tz; 177 181 update_vsyscall_tz(); 178 182 if (firsttime) { ··· 487 483 } 488 484 EXPORT_SYMBOL(ns_to_timespec64); 489 485 #endif 490 - /* 491 - * When we convert to jiffies then we interpret incoming values 492 - * the following way: 486 + /** 487 + * msecs_to_jiffies: - convert milliseconds to jiffies 488 + * @m: time in milliseconds 489 + * 490 + * conversion is done as follows: 493 491 * 494 492 * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET) 495 493 * ··· 499 493 * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. 500 494 * 501 495 * - all other values are converted to jiffies by either multiplying 502 - * the input value by a factor or dividing it with a factor 496 + * the input value by a factor or dividing it with a factor and 497 + * handling any 32-bit overflows. 498 + * for the details see __msecs_to_jiffies() 503 499 * 504 - * We must also be careful about 32-bit overflows. 500 + * msecs_to_jiffies() checks for the passed in value being a constant 501 + * via __builtin_constant_p() allowing gcc to eliminate most of the 502 + * code, __msecs_to_jiffies() is called if the value passed does not 503 + * allow constant folding and the actual conversion must be done at 504 + * runtime. 505 + * the _msecs_to_jiffies helpers are the HZ dependent conversion 506 + * routines found in include/linux/jiffies.h 505 507 */ 506 - unsigned long msecs_to_jiffies(const unsigned int m) 508 + unsigned long __msecs_to_jiffies(const unsigned int m) 507 509 { 508 510 /* 509 511 * Negative value, means infinite timeout: 510 512 */ 511 513 if ((int)m < 0) 512 514 return MAX_JIFFY_OFFSET; 513 - 514 - #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) 515 - /* 516 - * HZ is equal to or smaller than 1000, and 1000 is a nice 517 - * round multiple of HZ, divide with the factor between them, 518 - * but round upwards: 519 - */ 520 - return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); 521 - #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) 522 - /* 523 - * HZ is larger than 1000, and HZ is a nice round multiple of 524 - * 1000 - simply multiply with the factor between them. 525 - * 526 - * But first make sure the multiplication result cannot 527 - * overflow: 528 - */ 529 - if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) 530 - return MAX_JIFFY_OFFSET; 531 - 532 - return m * (HZ / MSEC_PER_SEC); 533 - #else 534 - /* 535 - * Generic case - multiply, round and divide. But first 536 - * check that if we are doing a net multiplication, that 537 - * we wouldn't overflow: 538 - */ 539 - if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) 540 - return MAX_JIFFY_OFFSET; 541 - 542 - return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) 543 - >> MSEC_TO_HZ_SHR32; 544 - #endif 515 + return _msecs_to_jiffies(m); 545 516 } 546 - EXPORT_SYMBOL(msecs_to_jiffies); 517 + EXPORT_SYMBOL(__msecs_to_jiffies); 547 518 548 - unsigned long usecs_to_jiffies(const unsigned int u) 519 + unsigned long __usecs_to_jiffies(const unsigned int u) 549 520 { 550 521 if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET)) 551 522 return MAX_JIFFY_OFFSET; 552 - #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) 553 - return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ); 554 - #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) 555 - return u * (HZ / USEC_PER_SEC); 556 - #else 557 - return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32) 558 - >> USEC_TO_HZ_SHR32; 559 - #endif 523 + return _usecs_to_jiffies(u); 560 524 } 561 - EXPORT_SYMBOL(usecs_to_jiffies); 525 + EXPORT_SYMBOL(__usecs_to_jiffies); 562 526 563 527 /* 564 528 * The TICK_NSEC - 1 rounds up the value to the next resolution. Note

+2 -1

kernel/time/timeconst.bc

··· 50 50 print "#include <linux/types.h>\n\n" 51 51 52 52 print "#if HZ != ", hz, "\n" 53 - print "#error \qkernel/timeconst.h has the wrong HZ value!\q\n" 53 + print "#error \qinclude/generated/timeconst.h has the wrong HZ value!\q\n" 54 54 print "#endif\n\n" 55 55 56 56 if (hz < 2) { ··· 105 105 halt 106 106 } 107 107 108 + hz = read(); 108 109 timeconst(hz)

+77 -76

kernel/time/timekeeping.c

··· 118 118 119 119 #ifdef CONFIG_DEBUG_TIMEKEEPING 120 120 #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ 121 - /* 122 - * These simple flag variables are managed 123 - * without locks, which is racy, but ok since 124 - * we don't really care about being super 125 - * precise about how many events were seen, 126 - * just that a problem was observed. 127 - */ 128 - static int timekeeping_underflow_seen; 129 - static int timekeeping_overflow_seen; 130 - 131 - /* last_warning is only modified under the timekeeping lock */ 132 - static long timekeeping_last_warning; 133 121 134 122 static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) 135 123 { ··· 137 149 } 138 150 } 139 151 140 - if (timekeeping_underflow_seen) { 141 - if (jiffies - timekeeping_last_warning > WARNING_FREQ) { 152 + if (tk->underflow_seen) { 153 + if (jiffies - tk->last_warning > WARNING_FREQ) { 142 154 printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name); 143 155 printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); 144 156 printk_deferred(" Your kernel is probably still fine.\n"); 145 - timekeeping_last_warning = jiffies; 157 + tk->last_warning = jiffies; 146 158 } 147 - timekeeping_underflow_seen = 0; 159 + tk->underflow_seen = 0; 148 160 } 149 161 150 - if (timekeeping_overflow_seen) { 151 - if (jiffies - timekeeping_last_warning > WARNING_FREQ) { 162 + if (tk->overflow_seen) { 163 + if (jiffies - tk->last_warning > WARNING_FREQ) { 152 164 printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name); 153 165 printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); 154 166 printk_deferred(" Your kernel is probably still fine.\n"); 155 - timekeeping_last_warning = jiffies; 167 + tk->last_warning = jiffies; 156 168 } 157 - timekeeping_overflow_seen = 0; 169 + tk->overflow_seen = 0; 158 170 } 159 171 } 160 172 161 173 static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) 162 174 { 175 + struct timekeeper *tk = &tk_core.timekeeper; 163 176 cycle_t now, last, mask, max, delta; 164 177 unsigned int seq; 165 178 ··· 186 197 * mask-relative negative values. 187 198 */ 188 199 if (unlikely((~delta & mask) < (mask >> 3))) { 189 - timekeeping_underflow_seen = 1; 200 + tk->underflow_seen = 1; 190 201 delta = 0; 191 202 } 192 203 193 204 /* Cap delta value to the max_cycles values to avoid mult overflows */ 194 205 if (unlikely(delta > max)) { 195 - timekeeping_overflow_seen = 1; 206 + tk->overflow_seen = 1; 196 207 delta = tkr->clock->max_cycles; 197 208 } 198 209 ··· 540 551 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); 541 552 542 553 /* 554 + * tk_update_leap_state - helper to update the next_leap_ktime 555 + */ 556 + static inline void tk_update_leap_state(struct timekeeper *tk) 557 + { 558 + tk->next_leap_ktime = ntp_get_next_leap(); 559 + if (tk->next_leap_ktime.tv64 != KTIME_MAX) 560 + /* Convert to monotonic time */ 561 + tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real); 562 + } 563 + 564 + /* 543 565 * Update the ktime_t based scalar nsec members of the timekeeper 544 566 */ 545 567 static inline void tk_update_ktime_data(struct timekeeper *tk) ··· 591 591 ntp_clear(); 592 592 } 593 593 594 + tk_update_leap_state(tk); 594 595 tk_update_ktime_data(tk); 595 596 596 597 update_vsyscall(tk); 597 598 update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); 598 599 600 + update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); 601 + update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); 602 + 603 + if (action & TK_CLOCK_WAS_SET) 604 + tk->clock_was_set_seq++; 605 + /* 606 + * The mirroring of the data to the shadow-timekeeper needs 607 + * to happen last here to ensure we don't over-write the 608 + * timekeeper structure on the next update with stale data 609 + */ 599 610 if (action & TK_MIRROR) 600 611 memcpy(&shadow_timekeeper, &tk_core.timekeeper, 601 612 sizeof(tk_core.timekeeper)); 602 - 603 - update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); 604 - update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); 605 613 } 606 614 607 615 /** ··· 706 698 return ktime_add_ns(base, nsecs); 707 699 } 708 700 EXPORT_SYMBOL_GPL(ktime_get); 701 + 702 + u32 ktime_get_resolution_ns(void) 703 + { 704 + struct timekeeper *tk = &tk_core.timekeeper; 705 + unsigned int seq; 706 + u32 nsecs; 707 + 708 + WARN_ON(timekeeping_suspended); 709 + 710 + do { 711 + seq = read_seqcount_begin(&tk_core.seq); 712 + nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift; 713 + } while (read_seqcount_retry(&tk_core.seq, seq)); 714 + 715 + return nsecs; 716 + } 717 + EXPORT_SYMBOL_GPL(ktime_get_resolution_ns); 709 718 710 719 static ktime_t *offsets[TK_OFFS_MAX] = { 711 720 [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real, ··· 1204 1179 } 1205 1180 1206 1181 /** 1207 - * read_boot_clock - Return time of the system start. 1182 + * read_boot_clock64 - Return time of the system start. 1208 1183 * 1209 1184 * Weak dummy function for arches that do not yet support it. 1210 1185 * Function to read the exact time the system has been started. 1211 - * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. 1186 + * Returns a timespec64 with tv_sec=0 and tv_nsec=0 if unsupported. 1212 1187 * 1213 1188 * XXX - Do be sure to remove it once all arches implement it. 1214 1189 */ 1215 - void __weak read_boot_clock(struct timespec *ts) 1190 + void __weak read_boot_clock64(struct timespec64 *ts) 1216 1191 { 1217 1192 ts->tv_sec = 0; 1218 1193 ts->tv_nsec = 0; 1219 - } 1220 - 1221 - void __weak read_boot_clock64(struct timespec64 *ts64) 1222 - { 1223 - struct timespec ts; 1224 - 1225 - read_boot_clock(&ts); 1226 - *ts64 = timespec_to_timespec64(ts); 1227 1194 } 1228 1195 1229 1196 /* Flag for if timekeeping_resume() has injected sleeptime */ ··· 1853 1836 * memcpy under the tk_core.seq against one before we start 1854 1837 * updating. 1855 1838 */ 1839 + timekeeping_update(tk, clock_set); 1856 1840 memcpy(real_tk, tk, sizeof(*tk)); 1857 - timekeeping_update(real_tk, clock_set); 1841 + /* The memcpy must come last. Do not put anything here! */ 1858 1842 write_seqcount_end(&tk_core.seq); 1859 1843 out: 1860 1844 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); ··· 1944 1926 } 1945 1927 1946 1928 /** 1947 - * ktime_get_update_offsets_tick - hrtimer helper 1948 - * @offs_real: pointer to storage for monotonic -> realtime offset 1949 - * @offs_boot: pointer to storage for monotonic -> boottime offset 1950 - * @offs_tai: pointer to storage for monotonic -> clock tai offset 1951 - * 1952 - * Returns monotonic time at last tick and various offsets 1953 - */ 1954 - ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot, 1955 - ktime_t *offs_tai) 1956 - { 1957 - struct timekeeper *tk = &tk_core.timekeeper; 1958 - unsigned int seq; 1959 - ktime_t base; 1960 - u64 nsecs; 1961 - 1962 - do { 1963 - seq = read_seqcount_begin(&tk_core.seq); 1964 - 1965 - base = tk->tkr_mono.base; 1966 - nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; 1967 - 1968 - *offs_real = tk->offs_real; 1969 - *offs_boot = tk->offs_boot; 1970 - *offs_tai = tk->offs_tai; 1971 - } while (read_seqcount_retry(&tk_core.seq, seq)); 1972 - 1973 - return ktime_add_ns(base, nsecs); 1974 - } 1975 - 1976 - #ifdef CONFIG_HIGH_RES_TIMERS 1977 - /** 1978 1929 * ktime_get_update_offsets_now - hrtimer helper 1930 + * @cwsseq: pointer to check and store the clock was set sequence number 1979 1931 * @offs_real: pointer to storage for monotonic -> realtime offset 1980 1932 * @offs_boot: pointer to storage for monotonic -> boottime offset 1981 1933 * @offs_tai: pointer to storage for monotonic -> clock tai offset 1982 1934 * 1983 - * Returns current monotonic time and updates the offsets 1935 + * Returns current monotonic time and updates the offsets if the 1936 + * sequence number in @cwsseq and timekeeper.clock_was_set_seq are 1937 + * different. 1938 + * 1984 1939 * Called from hrtimer_interrupt() or retrigger_next_event() 1985 1940 */ 1986 - ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot, 1987 - ktime_t *offs_tai) 1941 + ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, 1942 + ktime_t *offs_boot, ktime_t *offs_tai) 1988 1943 { 1989 1944 struct timekeeper *tk = &tk_core.timekeeper; 1990 1945 unsigned int seq; ··· 1969 1978 1970 1979 base = tk->tkr_mono.base; 1971 1980 nsecs = timekeeping_get_ns(&tk->tkr_mono); 1981 + base = ktime_add_ns(base, nsecs); 1972 1982 1973 - *offs_real = tk->offs_real; 1974 - *offs_boot = tk->offs_boot; 1975 - *offs_tai = tk->offs_tai; 1983 + if (*cwsseq != tk->clock_was_set_seq) { 1984 + *cwsseq = tk->clock_was_set_seq; 1985 + *offs_real = tk->offs_real; 1986 + *offs_boot = tk->offs_boot; 1987 + *offs_tai = tk->offs_tai; 1988 + } 1989 + 1990 + /* Handle leapsecond insertion adjustments */ 1991 + if (unlikely(base.tv64 >= tk->next_leap_ktime.tv64)) 1992 + *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0)); 1993 + 1976 1994 } while (read_seqcount_retry(&tk_core.seq, seq)); 1977 1995 1978 - return ktime_add_ns(base, nsecs); 1996 + return base; 1979 1997 } 1980 - #endif 1981 1998 1982 1999 /** 1983 2000 * do_adjtimex() - Accessor function to NTP __do_adjtimex function ··· 2026 2027 __timekeeping_set_tai_offset(tk, tai); 2027 2028 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); 2028 2029 } 2030 + tk_update_leap_state(tk); 2031 + 2029 2032 write_seqcount_end(&tk_core.seq); 2030 2033 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 2031 2034

+4 -7

kernel/time/timekeeping.h

··· 3 3 /* 4 4 * Internal interfaces for kernel/time/ 5 5 */ 6 - extern ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, 7 - ktime_t *offs_boot, 8 - ktime_t *offs_tai); 9 - extern ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, 10 - ktime_t *offs_boot, 11 - ktime_t *offs_tai); 6 + extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, 7 + ktime_t *offs_real, 8 + ktime_t *offs_boot, 9 + ktime_t *offs_tai); 12 10 13 11 extern int timekeeping_valid_for_hres(void); 14 12 extern u64 timekeeping_max_deferment(void); 15 13 extern int timekeeping_inject_offset(struct timespec *ts); 16 14 extern s32 timekeeping_get_tai_offset(void); 17 15 extern void timekeeping_set_tai_offset(s32 tai_offset); 18 - extern void timekeeping_clocktai(struct timespec *ts); 19 16 extern int timekeeping_suspend(void); 20 17 extern void timekeeping_resume(void); 21 18

+169 -195

kernel/time/timer.c

··· 49 49 #include <asm/timex.h> 50 50 #include <asm/io.h> 51 51 52 + #include "tick-internal.h" 53 + 52 54 #define CREATE_TRACE_POINTS 53 55 #include <trace/events/timer.h> 54 56 ··· 70 68 #define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1)) 71 69 72 70 struct tvec { 73 - struct list_head vec[TVN_SIZE]; 71 + struct hlist_head vec[TVN_SIZE]; 74 72 }; 75 73 76 74 struct tvec_root { 77 - struct list_head vec[TVR_SIZE]; 75 + struct hlist_head vec[TVR_SIZE]; 78 76 }; 79 77 80 78 struct tvec_base { ··· 85 83 unsigned long active_timers; 86 84 unsigned long all_timers; 87 85 int cpu; 86 + bool migration_enabled; 87 + bool nohz_active; 88 88 struct tvec_root tv1; 89 89 struct tvec tv2; 90 90 struct tvec tv3; ··· 94 90 struct tvec tv5; 95 91 } ____cacheline_aligned; 96 92 97 - /* 98 - * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've 99 - * made NULL special, hint: lock_timer_base()) and we cannot get a compile time 100 - * pointer to per-cpu entries because we don't know where we'll map the section, 101 - * even for the boot cpu. 102 - * 103 - * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the 104 - * rest of them. 105 - */ 106 - struct tvec_base boot_tvec_bases; 107 - EXPORT_SYMBOL(boot_tvec_bases); 108 93 109 - static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; 94 + static DEFINE_PER_CPU(struct tvec_base, tvec_bases); 110 95 111 - /* Functions below help us manage 'deferrable' flag */ 112 - static inline unsigned int tbase_get_deferrable(struct tvec_base *base) 96 + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 97 + unsigned int sysctl_timer_migration = 1; 98 + 99 + void timers_update_migration(bool update_nohz) 113 100 { 114 - return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE); 101 + bool on = sysctl_timer_migration && tick_nohz_active; 102 + unsigned int cpu; 103 + 104 + /* Avoid the loop, if nothing to update */ 105 + if (this_cpu_read(tvec_bases.migration_enabled) == on) 106 + return; 107 + 108 + for_each_possible_cpu(cpu) { 109 + per_cpu(tvec_bases.migration_enabled, cpu) = on; 110 + per_cpu(hrtimer_bases.migration_enabled, cpu) = on; 111 + if (!update_nohz) 112 + continue; 113 + per_cpu(tvec_bases.nohz_active, cpu) = true; 114 + per_cpu(hrtimer_bases.nohz_active, cpu) = true; 115 + } 115 116 } 116 117 117 - static inline unsigned int tbase_get_irqsafe(struct tvec_base *base) 118 + int timer_migration_handler(struct ctl_table *table, int write, 119 + void __user *buffer, size_t *lenp, 120 + loff_t *ppos) 118 121 { 119 - return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE); 122 + static DEFINE_MUTEX(mutex); 123 + int ret; 124 + 125 + mutex_lock(&mutex); 126 + ret = proc_dointvec(table, write, buffer, lenp, ppos); 127 + if (!ret && write) 128 + timers_update_migration(false); 129 + mutex_unlock(&mutex); 130 + return ret; 120 131 } 121 132 122 - static inline struct tvec_base *tbase_get_base(struct tvec_base *base) 133 + static inline struct tvec_base *get_target_base(struct tvec_base *base, 134 + int pinned) 123 135 { 124 - return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK)); 136 + if (pinned || !base->migration_enabled) 137 + return this_cpu_ptr(&tvec_bases); 138 + return per_cpu_ptr(&tvec_bases, get_nohz_timer_target()); 125 139 } 126 - 127 - static inline void 128 - timer_set_base(struct timer_list *timer, struct tvec_base *new_base) 140 + #else 141 + static inline struct tvec_base *get_target_base(struct tvec_base *base, 142 + int pinned) 129 143 { 130 - unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK; 131 - 132 - timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags); 144 + return this_cpu_ptr(&tvec_bases); 133 145 } 146 + #endif 134 147 135 148 static unsigned long round_jiffies_common(unsigned long j, int cpu, 136 149 bool force_up) ··· 370 349 } 371 350 EXPORT_SYMBOL_GPL(set_timer_slack); 372 351 373 - /* 374 - * If the list is empty, catch up ->timer_jiffies to the current time. 375 - * The caller must hold the tvec_base lock. Returns true if the list 376 - * was empty and therefore ->timer_jiffies was updated. 377 - */ 378 - static bool catchup_timer_jiffies(struct tvec_base *base) 379 - { 380 - if (!base->all_timers) { 381 - base->timer_jiffies = jiffies; 382 - return true; 383 - } 384 - return false; 385 - } 386 - 387 352 static void 388 353 __internal_add_timer(struct tvec_base *base, struct timer_list *timer) 389 354 { 390 355 unsigned long expires = timer->expires; 391 356 unsigned long idx = expires - base->timer_jiffies; 392 - struct list_head *vec; 357 + struct hlist_head *vec; 393 358 394 359 if (idx < TVR_SIZE) { 395 360 int i = expires & TVR_MASK; ··· 408 401 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 409 402 vec = base->tv5.vec + i; 410 403 } 411 - /* 412 - * Timers are FIFO: 413 - */ 414 - list_add_tail(&timer->entry, vec); 404 + 405 + hlist_add_head(&timer->entry, vec); 415 406 } 416 407 417 408 static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) 418 409 { 419 - (void)catchup_timer_jiffies(base); 410 + /* Advance base->jiffies, if the base is empty */ 411 + if (!base->all_timers++) 412 + base->timer_jiffies = jiffies; 413 + 420 414 __internal_add_timer(base, timer); 421 415 /* 422 416 * Update base->active_timers and base->next_timer 423 417 */ 424 - if (!tbase_get_deferrable(timer->base)) { 418 + if (!(timer->flags & TIMER_DEFERRABLE)) { 425 419 if (!base->active_timers++ || 426 420 time_before(timer->expires, base->next_timer)) 427 421 base->next_timer = timer->expires; 428 422 } 429 - base->all_timers++; 430 423 431 424 /* 432 425 * Check whether the other CPU is in dynticks mode and needs ··· 441 434 * require special care against races with idle_cpu(), lets deal 442 435 * with that later. 443 436 */ 444 - if (!tbase_get_deferrable(base) || tick_nohz_full_cpu(base->cpu)) 445 - wake_up_nohz_cpu(base->cpu); 437 + if (base->nohz_active) { 438 + if (!(timer->flags & TIMER_DEFERRABLE) || 439 + tick_nohz_full_cpu(base->cpu)) 440 + wake_up_nohz_cpu(base->cpu); 441 + } 446 442 } 447 443 448 444 #ifdef CONFIG_TIMER_STATS ··· 461 451 462 452 static void timer_stats_account_timer(struct timer_list *timer) 463 453 { 464 - unsigned int flag = 0; 465 - 466 454 if (likely(!timer->start_site)) 467 455 return; 468 - if (unlikely(tbase_get_deferrable(timer->base))) 469 - flag |= TIMER_STATS_FLAG_DEFERRABLE; 470 456 471 457 timer_stats_update_stats(timer, timer->start_pid, timer->start_site, 472 - timer->function, timer->start_comm, flag); 458 + timer->function, timer->start_comm, 459 + timer->flags); 473 460 } 474 461 475 462 #else ··· 523 516 * statically initialized. We just make sure that it 524 517 * is tracked in the object tracker. 525 518 */ 526 - if (timer->entry.next == NULL && 527 - timer->entry.prev == TIMER_ENTRY_STATIC) { 519 + if (timer->entry.pprev == NULL && 520 + timer->entry.next == TIMER_ENTRY_STATIC) { 528 521 debug_object_init(timer, &timer_debug_descr); 529 522 debug_object_activate(timer, &timer_debug_descr); 530 523 return 0; ··· 570 563 571 564 switch (state) { 572 565 case ODEBUG_STATE_NOTAVAILABLE: 573 - if (timer->entry.prev == TIMER_ENTRY_STATIC) { 566 + if (timer->entry.next == TIMER_ENTRY_STATIC) { 574 567 /* 575 568 * This is not really a fixup. The timer was 576 569 * statically initialized. We just make sure that it ··· 655 648 debug_activate(struct timer_list *timer, unsigned long expires) 656 649 { 657 650 debug_timer_activate(timer); 658 - trace_timer_start(timer, expires); 651 + trace_timer_start(timer, expires, timer->flags); 659 652 } 660 653 661 654 static inline void debug_deactivate(struct timer_list *timer) ··· 672 665 static void do_init_timer(struct timer_list *timer, unsigned int flags, 673 666 const char *name, struct lock_class_key *key) 674 667 { 675 - struct tvec_base *base = raw_cpu_read(tvec_bases); 676 - 677 - timer->entry.next = NULL; 678 - timer->base = (void *)((unsigned long)base | flags); 668 + timer->entry.pprev = NULL; 669 + timer->flags = flags | raw_smp_processor_id(); 679 670 timer->slack = -1; 680 671 #ifdef CONFIG_TIMER_STATS 681 672 timer->start_site = NULL; ··· 704 699 705 700 static inline void detach_timer(struct timer_list *timer, bool clear_pending) 706 701 { 707 - struct list_head *entry = &timer->entry; 702 + struct hlist_node *entry = &timer->entry; 708 703 709 704 debug_deactivate(timer); 710 705 711 - __list_del(entry->prev, entry->next); 706 + __hlist_del(entry); 712 707 if (clear_pending) 713 - entry->next = NULL; 714 - entry->prev = LIST_POISON2; 708 + entry->pprev = NULL; 709 + entry->next = LIST_POISON2; 715 710 } 716 711 717 712 static inline void 718 713 detach_expired_timer(struct timer_list *timer, struct tvec_base *base) 719 714 { 720 715 detach_timer(timer, true); 721 - if (!tbase_get_deferrable(timer->base)) 716 + if (!(timer->flags & TIMER_DEFERRABLE)) 722 717 base->active_timers--; 723 718 base->all_timers--; 724 - (void)catchup_timer_jiffies(base); 725 719 } 726 720 727 721 static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, ··· 730 726 return 0; 731 727 732 728 detach_timer(timer, clear_pending); 733 - if (!tbase_get_deferrable(timer->base)) { 729 + if (!(timer->flags & TIMER_DEFERRABLE)) { 734 730 base->active_timers--; 735 731 if (timer->expires == base->next_timer) 736 732 base->next_timer = base->timer_jiffies; 737 733 } 738 - base->all_timers--; 739 - (void)catchup_timer_jiffies(base); 734 + /* If this was the last timer, advance base->jiffies */ 735 + if (!--base->all_timers) 736 + base->timer_jiffies = jiffies; 740 737 return 1; 741 738 } 742 739 ··· 749 744 * So __run_timers/migrate_timers can safely modify all timers which could 750 745 * be found on ->tvX lists. 751 746 * 752 - * When the timer's base is locked, and the timer removed from list, it is 753 - * possible to set timer->base = NULL and drop the lock: the timer remains 754 - * locked. 747 + * When the timer's base is locked and removed from the list, the 748 + * TIMER_MIGRATING flag is set, FIXME 755 749 */ 756 750 static struct tvec_base *lock_timer_base(struct timer_list *timer, 757 751 unsigned long *flags) 758 752 __acquires(timer->base->lock) 759 753 { 760 - struct tvec_base *base; 761 - 762 754 for (;;) { 763 - struct tvec_base *prelock_base = timer->base; 764 - base = tbase_get_base(prelock_base); 765 - if (likely(base != NULL)) { 755 + u32 tf = timer->flags; 756 + struct tvec_base *base; 757 + 758 + if (!(tf & TIMER_MIGRATING)) { 759 + base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK); 766 760 spin_lock_irqsave(&base->lock, *flags); 767 - if (likely(prelock_base == timer->base)) 761 + if (timer->flags == tf) 768 762 return base; 769 - /* The timer has migrated to another CPU */ 770 763 spin_unlock_irqrestore(&base->lock, *flags); 771 764 } 772 765 cpu_relax(); ··· 773 770 774 771 static inline int 775 772 __mod_timer(struct timer_list *timer, unsigned long expires, 776 - bool pending_only, int pinned) 773 + bool pending_only, int pinned) 777 774 { 778 775 struct tvec_base *base, *new_base; 779 776 unsigned long flags; 780 - int ret = 0 , cpu; 777 + int ret = 0; 781 778 782 779 timer_stats_timer_set_start_info(timer); 783 780 BUG_ON(!timer->function); ··· 790 787 791 788 debug_activate(timer, expires); 792 789 793 - cpu = get_nohz_timer_target(pinned); 794 - new_base = per_cpu(tvec_bases, cpu); 790 + new_base = get_target_base(base, pinned); 795 791 796 792 if (base != new_base) { 797 793 /* ··· 802 800 */ 803 801 if (likely(base->running_timer != timer)) { 804 802 /* See the comment in lock_timer_base() */ 805 - timer_set_base(timer, NULL); 803 + timer->flags |= TIMER_MIGRATING; 804 + 806 805 spin_unlock(&base->lock); 807 806 base = new_base; 808 807 spin_lock(&base->lock); 809 - timer_set_base(timer, base); 808 + timer->flags &= ~TIMER_BASEMASK; 809 + timer->flags |= base->cpu; 810 810 } 811 811 } 812 812 ··· 970 966 */ 971 967 void add_timer_on(struct timer_list *timer, int cpu) 972 968 { 973 - struct tvec_base *base = per_cpu(tvec_bases, cpu); 969 + struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu); 974 970 unsigned long flags; 975 971 976 972 timer_stats_timer_set_start_info(timer); 977 973 BUG_ON(timer_pending(timer) || !timer->function); 978 974 spin_lock_irqsave(&base->lock, flags); 979 - timer_set_base(timer, base); 975 + timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; 980 976 debug_activate(timer, timer->expires); 981 977 internal_add_timer(base, timer); 982 978 spin_unlock_irqrestore(&base->lock, flags); ··· 1041 1037 EXPORT_SYMBOL(try_to_del_timer_sync); 1042 1038 1043 1039 #ifdef CONFIG_SMP 1044 - static DEFINE_PER_CPU(struct tvec_base, __tvec_bases); 1045 - 1046 1040 /** 1047 1041 * del_timer_sync - deactivate a timer and wait for the handler to finish. 1048 1042 * @timer: the timer to be deactivated ··· 1095 1093 * don't use it in hardirq context, because it 1096 1094 * could lead to deadlock. 1097 1095 */ 1098 - WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base)); 1096 + WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); 1099 1097 for (;;) { 1100 1098 int ret = try_to_del_timer_sync(timer); 1101 1099 if (ret >= 0) ··· 1109 1107 static int cascade(struct tvec_base *base, struct tvec *tv, int index) 1110 1108 { 1111 1109 /* cascade all the timers from tv up one level */ 1112 - struct timer_list *timer, *tmp; 1113 - struct list_head tv_list; 1110 + struct timer_list *timer; 1111 + struct hlist_node *tmp; 1112 + struct hlist_head tv_list; 1114 1113 1115 - list_replace_init(tv->vec + index, &tv_list); 1114 + hlist_move_list(tv->vec + index, &tv_list); 1116 1115 1117 1116 /* 1118 1117 * We are removing _all_ timers from the list, so we 1119 1118 * don't have to detach them individually. 1120 1119 */ 1121 - list_for_each_entry_safe(timer, tmp, &tv_list, entry) { 1122 - BUG_ON(tbase_get_base(timer->base) != base); 1120 + hlist_for_each_entry_safe(timer, tmp, &tv_list, entry) { 1123 1121 /* No accounting, while moving them */ 1124 1122 __internal_add_timer(base, timer); 1125 1123 } ··· 1184 1182 struct timer_list *timer; 1185 1183 1186 1184 spin_lock_irq(&base->lock); 1187 - if (catchup_timer_jiffies(base)) { 1188 - spin_unlock_irq(&base->lock); 1189 - return; 1190 - } 1185 + 1191 1186 while (time_after_eq(jiffies, base->timer_jiffies)) { 1192 - struct list_head work_list; 1193 - struct list_head *head = &work_list; 1194 - int index = base->timer_jiffies & TVR_MASK; 1187 + struct hlist_head work_list; 1188 + struct hlist_head *head = &work_list; 1189 + int index; 1190 + 1191 + if (!base->all_timers) { 1192 + base->timer_jiffies = jiffies; 1193 + break; 1194 + } 1195 + 1196 + index = base->timer_jiffies & TVR_MASK; 1195 1197 1196 1198 /* 1197 1199 * Cascade timers: ··· 1206 1200 !cascade(base, &base->tv4, INDEX(2))) 1207 1201 cascade(base, &base->tv5, INDEX(3)); 1208 1202 ++base->timer_jiffies; 1209 - list_replace_init(base->tv1.vec + index, head); 1210 - while (!list_empty(head)) { 1203 + hlist_move_list(base->tv1.vec + index, head); 1204 + while (!hlist_empty(head)) { 1211 1205 void (*fn)(unsigned long); 1212 1206 unsigned long data; 1213 1207 bool irqsafe; 1214 1208 1215 - timer = list_first_entry(head, struct timer_list,entry); 1209 + timer = hlist_entry(head->first, struct timer_list, entry); 1216 1210 fn = timer->function; 1217 1211 data = timer->data; 1218 - irqsafe = tbase_get_irqsafe(timer->base); 1212 + irqsafe = timer->flags & TIMER_IRQSAFE; 1219 1213 1220 1214 timer_stats_account_timer(timer); 1221 1215 ··· 1254 1248 /* Look for timer events in tv1. */ 1255 1249 index = slot = timer_jiffies & TVR_MASK; 1256 1250 do { 1257 - list_for_each_entry(nte, base->tv1.vec + slot, entry) { 1258 - if (tbase_get_deferrable(nte->base)) 1251 + hlist_for_each_entry(nte, base->tv1.vec + slot, entry) { 1252 + if (nte->flags & TIMER_DEFERRABLE) 1259 1253 continue; 1260 1254 1261 1255 found = 1; ··· 1285 1279 1286 1280 index = slot = timer_jiffies & TVN_MASK; 1287 1281 do { 1288 - list_for_each_entry(nte, varp->vec + slot, entry) { 1289 - if (tbase_get_deferrable(nte->base)) 1282 + hlist_for_each_entry(nte, varp->vec + slot, entry) { 1283 + if (nte->flags & TIMER_DEFERRABLE) 1290 1284 continue; 1291 1285 1292 1286 found = 1; ··· 1317 1311 * Check, if the next hrtimer event is before the next timer wheel 1318 1312 * event: 1319 1313 */ 1320 - static unsigned long cmp_next_hrtimer_event(unsigned long now, 1321 - unsigned long expires) 1314 + static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) 1322 1315 { 1323 - ktime_t hr_delta = hrtimer_get_next_event(); 1324 - struct timespec tsdelta; 1325 - unsigned long delta; 1316 + u64 nextevt = hrtimer_get_next_event(); 1326 1317 1327 - if (hr_delta.tv64 == KTIME_MAX) 1318 + /* 1319 + * If high resolution timers are enabled 1320 + * hrtimer_get_next_event() returns KTIME_MAX. 1321 + */ 1322 + if (expires <= nextevt) 1328 1323 return expires; 1329 1324 1330 1325 /* 1331 - * Expired timer available, let it expire in the next tick 1326 + * If the next timer is already expired, return the tick base 1327 + * time so the tick is fired immediately. 1332 1328 */ 1333 - if (hr_delta.tv64 <= 0) 1334 - return now + 1; 1335 - 1336 - tsdelta = ktime_to_timespec(hr_delta); 1337 - delta = timespec_to_jiffies(&tsdelta); 1329 + if (nextevt <= basem) 1330 + return basem; 1338 1331 1339 1332 /* 1340 - * Limit the delta to the max value, which is checked in 1341 - * tick_nohz_stop_sched_tick(): 1333 + * Round up to the next jiffie. High resolution timers are 1334 + * off, so the hrtimers are expired in the tick and we need to 1335 + * make sure that this tick really expires the timer to avoid 1336 + * a ping pong of the nohz stop code. 1337 + * 1338 + * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3 1342 1339 */ 1343 - if (delta > NEXT_TIMER_MAX_DELTA) 1344 - delta = NEXT_TIMER_MAX_DELTA; 1345 - 1346 - /* 1347 - * Take rounding errors in to account and make sure, that it 1348 - * expires in the next tick. Otherwise we go into an endless 1349 - * ping pong due to tick_nohz_stop_sched_tick() retriggering 1350 - * the timer softirq 1351 - */ 1352 - if (delta < 1) 1353 - delta = 1; 1354 - now += delta; 1355 - if (time_before(now, expires)) 1356 - return now; 1357 - return expires; 1340 + return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC; 1358 1341 } 1359 1342 1360 1343 /** 1361 - * get_next_timer_interrupt - return the jiffy of the next pending timer 1362 - * @now: current time (in jiffies) 1344 + * get_next_timer_interrupt - return the time (clock mono) of the next timer 1345 + * @basej: base time jiffies 1346 + * @basem: base time clock monotonic 1347 + * 1348 + * Returns the tick aligned clock monotonic time of the next pending 1349 + * timer or KTIME_MAX if no timer is pending. 1363 1350 */ 1364 - unsigned long get_next_timer_interrupt(unsigned long now) 1351 + u64 get_next_timer_interrupt(unsigned long basej, u64 basem) 1365 1352 { 1366 - struct tvec_base *base = __this_cpu_read(tvec_bases); 1367 - unsigned long expires = now + NEXT_TIMER_MAX_DELTA; 1353 + struct tvec_base *base = this_cpu_ptr(&tvec_bases); 1354 + u64 expires = KTIME_MAX; 1355 + unsigned long nextevt; 1368 1356 1369 1357 /* 1370 1358 * Pretend that there is no timer pending if the cpu is offline. ··· 1371 1371 if (base->active_timers) { 1372 1372 if (time_before_eq(base->next_timer, base->timer_jiffies)) 1373 1373 base->next_timer = __next_timer_interrupt(base); 1374 - expires = base->next_timer; 1374 + nextevt = base->next_timer; 1375 + if (time_before_eq(nextevt, basej)) 1376 + expires = basem; 1377 + else 1378 + expires = basem + (nextevt - basej) * TICK_NSEC; 1375 1379 } 1376 1380 spin_unlock(&base->lock); 1377 1381 1378 - if (time_before_eq(expires, now)) 1379 - return now; 1380 - 1381 - return cmp_next_hrtimer_event(now, expires); 1382 + return cmp_next_hrtimer_event(basem, expires); 1382 1383 } 1383 1384 #endif 1384 1385 ··· 1408 1407 */ 1409 1408 static void run_timer_softirq(struct softirq_action *h) 1410 1409 { 1411 - struct tvec_base *base = __this_cpu_read(tvec_bases); 1412 - 1413 - hrtimer_run_pending(); 1410 + struct tvec_base *base = this_cpu_ptr(&tvec_bases); 1414 1411 1415 1412 if (time_after_eq(jiffies, base->timer_jiffies)) 1416 1413 __run_timers(base); ··· 1544 1545 EXPORT_SYMBOL(schedule_timeout_uninterruptible); 1545 1546 1546 1547 #ifdef CONFIG_HOTPLUG_CPU 1547 - static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) 1548 + static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head) 1548 1549 { 1549 1550 struct timer_list *timer; 1551 + int cpu = new_base->cpu; 1550 1552 1551 - while (!list_empty(head)) { 1552 - timer = list_first_entry(head, struct timer_list, entry); 1553 + while (!hlist_empty(head)) { 1554 + timer = hlist_entry(head->first, struct timer_list, entry); 1553 1555 /* We ignore the accounting on the dying cpu */ 1554 1556 detach_timer(timer, false); 1555 - timer_set_base(timer, new_base); 1557 + timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; 1556 1558 internal_add_timer(new_base, timer); 1557 1559 } 1558 1560 } ··· 1565 1565 int i; 1566 1566 1567 1567 BUG_ON(cpu_online(cpu)); 1568 - old_base = per_cpu(tvec_bases, cpu); 1569 - new_base = get_cpu_var(tvec_bases); 1568 + old_base = per_cpu_ptr(&tvec_bases, cpu); 1569 + new_base = this_cpu_ptr(&tvec_bases); 1570 1570 /* 1571 1571 * The caller is globally serialized and nobody else 1572 1572 * takes two locks at once, deadlock is not possible. ··· 1590 1590 1591 1591 spin_unlock(&old_base->lock); 1592 1592 spin_unlock_irq(&new_base->lock); 1593 - put_cpu_var(tvec_bases); 1594 1593 } 1595 1594 1596 1595 static int timer_cpu_notify(struct notifier_block *self, ··· 1615 1616 static inline void timer_register_cpu_notifier(void) { } 1616 1617 #endif /* CONFIG_HOTPLUG_CPU */ 1617 1618 1618 - static void __init init_timer_cpu(struct tvec_base *base, int cpu) 1619 + static void __init init_timer_cpu(int cpu) 1619 1620 { 1620 - int j; 1621 - 1622 - BUG_ON(base != tbase_get_base(base)); 1621 + struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu); 1623 1622 1624 1623 base->cpu = cpu; 1625 - per_cpu(tvec_bases, cpu) = base; 1626 1624 spin_lock_init(&base->lock); 1627 - 1628 - for (j = 0; j < TVN_SIZE; j++) { 1629 - INIT_LIST_HEAD(base->tv5.vec + j); 1630 - INIT_LIST_HEAD(base->tv4.vec + j); 1631 - INIT_LIST_HEAD(base->tv3.vec + j); 1632 - INIT_LIST_HEAD(base->tv2.vec + j); 1633 - } 1634 - for (j = 0; j < TVR_SIZE; j++) 1635 - INIT_LIST_HEAD(base->tv1.vec + j); 1636 1625 1637 1626 base->timer_jiffies = jiffies; 1638 1627 base->next_timer = base->timer_jiffies; ··· 1628 1641 1629 1642 static void __init init_timer_cpus(void) 1630 1643 { 1631 - struct tvec_base *base; 1632 - int local_cpu = smp_processor_id(); 1633 1644 int cpu; 1634 1645 1635 - for_each_possible_cpu(cpu) { 1636 - if (cpu == local_cpu) 1637 - base = &boot_tvec_bases; 1638 - #ifdef CONFIG_SMP 1639 - else 1640 - base = per_cpu_ptr(&__tvec_bases, cpu); 1641 - #endif 1642 - 1643 - init_timer_cpu(base, cpu); 1644 - } 1646 + for_each_possible_cpu(cpu) 1647 + init_timer_cpu(cpu); 1645 1648 } 1646 1649 1647 1650 void __init init_timers(void) 1648 1651 { 1649 - /* ensure there are enough low bits for flags in timer->base pointer */ 1650 - BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK); 1651 - 1652 1652 init_timer_cpus(); 1653 1653 init_timer_stats(); 1654 1654 timer_register_cpu_notifier(); ··· 1671 1697 1672 1698 EXPORT_SYMBOL(msleep_interruptible); 1673 1699 1674 - static int __sched do_usleep_range(unsigned long min, unsigned long max) 1700 + static void __sched do_usleep_range(unsigned long min, unsigned long max) 1675 1701 { 1676 1702 ktime_t kmin; 1677 1703 unsigned long delta; 1678 1704 1679 1705 kmin = ktime_set(0, min * NSEC_PER_USEC); 1680 1706 delta = (max - min) * NSEC_PER_USEC; 1681 - return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); 1707 + schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); 1682 1708 } 1683 1709 1684 1710 /** ··· 1686 1712 * @min: Minimum time in usecs to sleep 1687 1713 * @max: Maximum time in usecs to sleep 1688 1714 */ 1689 - void usleep_range(unsigned long min, unsigned long max) 1715 + void __sched usleep_range(unsigned long min, unsigned long max) 1690 1716 { 1691 1717 __set_current_state(TASK_UNINTERRUPTIBLE); 1692 1718 do_usleep_range(min, max);

+31 -20

kernel/time/timer_list.c

··· 29 29 30 30 typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes); 31 31 32 - DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); 33 - 34 32 /* 35 33 * This allows printing both to /proc/timer_list and 36 34 * to the console (on SysRq-Q): 37 35 */ 38 - #define SEQ_printf(m, x...) \ 39 - do { \ 40 - if (m) \ 41 - seq_printf(m, x); \ 42 - else \ 43 - printk(x); \ 44 - } while (0) 36 + __printf(2, 3) 37 + static void SEQ_printf(struct seq_file *m, const char *fmt, ...) 38 + { 39 + va_list args; 40 + 41 + va_start(args, fmt); 42 + 43 + if (m) 44 + seq_vprintf(m, fmt, args); 45 + else 46 + vprintk(fmt, args); 47 + 48 + va_end(args); 49 + } 45 50 46 51 static void print_name_offset(struct seq_file *m, void *sym) 47 52 { ··· 125 120 print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) 126 121 { 127 122 SEQ_printf(m, " .base: %pK\n", base); 128 - SEQ_printf(m, " .index: %d\n", 129 - base->index); 130 - SEQ_printf(m, " .resolution: %Lu nsecs\n", 131 - (unsigned long long)ktime_to_ns(base->resolution)); 123 + SEQ_printf(m, " .index: %d\n", base->index); 124 + 125 + SEQ_printf(m, " .resolution: %u nsecs\n", (unsigned) hrtimer_resolution); 126 + 132 127 SEQ_printf(m, " .get_time: "); 133 128 print_name_offset(m, base->get_time); 134 129 SEQ_printf(m, "\n"); ··· 163 158 P(nr_events); 164 159 P(nr_retries); 165 160 P(nr_hangs); 166 - P_ns(max_hang_time); 161 + P(max_hang_time); 167 162 #endif 168 163 #undef P 169 164 #undef P_ns ··· 189 184 P_ns(idle_sleeptime); 190 185 P_ns(iowait_sleeptime); 191 186 P(last_jiffies); 192 - P(next_jiffies); 187 + P(next_timer); 193 188 P_ns(idle_expires); 194 189 SEQ_printf(m, "jiffies: %Lu\n", 195 190 (unsigned long long)jiffies); ··· 256 251 SEQ_printf(m, "\n"); 257 252 } 258 253 254 + if (dev->set_state_oneshot_stopped) { 255 + SEQ_printf(m, " oneshot stopped: "); 256 + print_name_offset(m, dev->set_state_oneshot_stopped); 257 + SEQ_printf(m, "\n"); 258 + } 259 + 259 260 if (dev->tick_resume) { 260 261 SEQ_printf(m, " resume: "); 261 262 print_name_offset(m, dev->tick_resume); ··· 280 269 { 281 270 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 282 271 print_tickdevice(m, tick_get_broadcast_device(), -1); 283 - SEQ_printf(m, "tick_broadcast_mask: %08lx\n", 284 - cpumask_bits(tick_get_broadcast_mask())[0]); 272 + SEQ_printf(m, "tick_broadcast_mask: %*pb\n", 273 + cpumask_pr_args(tick_get_broadcast_mask())); 285 274 #ifdef CONFIG_TICK_ONESHOT 286 - SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n", 287 - cpumask_bits(tick_get_broadcast_oneshot_mask())[0]); 275 + SEQ_printf(m, "tick_broadcast_oneshot_mask: %*pb\n", 276 + cpumask_pr_args(tick_get_broadcast_oneshot_mask())); 288 277 #endif 289 278 SEQ_printf(m, "\n"); 290 279 #endif ··· 293 282 294 283 static inline void timer_list_header(struct seq_file *m, u64 now) 295 284 { 296 - SEQ_printf(m, "Timer List Version: v0.7\n"); 285 + SEQ_printf(m, "Timer List Version: v0.8\n"); 297 286 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); 298 287 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); 299 288 SEQ_printf(m, "\n");

+5 -5

kernel/time/timer_stats.c

··· 68 68 * Number of timeout events: 69 69 */ 70 70 unsigned long count; 71 - unsigned int timer_flag; 71 + u32 flags; 72 72 73 73 /* 74 74 * We save the command-line string to preserve ··· 227 227 * @startf: pointer to the function which did the timer setup 228 228 * @timerf: pointer to the timer callback function of the timer 229 229 * @comm: name of the process which set up the timer 230 + * @tflags: The flags field of the timer 230 231 * 231 232 * When the timer is already registered, then the event counter is 232 233 * incremented. Otherwise the timer is registered in a free slot. 233 234 */ 234 235 void timer_stats_update_stats(void *timer, pid_t pid, void *startf, 235 - void *timerf, char *comm, 236 - unsigned int timer_flag) 236 + void *timerf, char *comm, u32 tflags) 237 237 { 238 238 /* 239 239 * It doesn't matter which lock we take: ··· 251 251 input.start_func = startf; 252 252 input.expire_func = timerf; 253 253 input.pid = pid; 254 - input.timer_flag = timer_flag; 254 + input.flags = tflags; 255 255 256 256 raw_spin_lock_irqsave(lock, flags); 257 257 if (!timer_stats_active) ··· 306 306 307 307 for (i = 0; i < nr_entries; i++) { 308 308 entry = entries + i; 309 - if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) { 309 + if (entry->flags & TIMER_DEFERRABLE) { 310 310 seq_printf(m, "%4luD, %5d %-16s ", 311 311 entry->count, entry->pid, entry->comm); 312 312 } else {

+7 -3

lib/timerqueue.c

··· 36 36 * Adds the timer node to the timerqueue, sorted by the 37 37 * node's expires value. 38 38 */ 39 - void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) 39 + bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) 40 40 { 41 41 struct rb_node **p = &head->head.rb_node; 42 42 struct rb_node *parent = NULL; ··· 56 56 rb_link_node(&node->node, parent, p); 57 57 rb_insert_color(&node->node, &head->head); 58 58 59 - if (!head->next || node->expires.tv64 < head->next->expires.tv64) 59 + if (!head->next || node->expires.tv64 < head->next->expires.tv64) { 60 60 head->next = node; 61 + return true; 62 + } 63 + return false; 61 64 } 62 65 EXPORT_SYMBOL_GPL(timerqueue_add); 63 66 ··· 72 69 * 73 70 * Removes the timer node from the timerqueue. 74 71 */ 75 - void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) 72 + bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) 76 73 { 77 74 WARN_ON_ONCE(RB_EMPTY_NODE(&node->node)); 78 75 ··· 85 82 } 86 83 rb_erase(&node->node, &head->head); 87 84 RB_CLEAR_NODE(&node->node); 85 + return head->next != NULL; 88 86 } 89 87 EXPORT_SYMBOL_GPL(timerqueue_del); 90 88

-2

net/core/pktgen.c

··· 2212 2212 do { 2213 2213 set_current_state(TASK_INTERRUPTIBLE); 2214 2214 hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); 2215 - if (!hrtimer_active(&t.timer)) 2216 - t.task = NULL; 2217 2215 2218 2216 if (likely(t.task)) 2219 2217 schedule();

+1 -4

net/sched/sch_api.c

··· 1885 1885 #ifdef CONFIG_PROC_FS 1886 1886 static int psched_show(struct seq_file *seq, void *v) 1887 1887 { 1888 - struct timespec ts; 1889 - 1890 - hrtimer_get_res(CLOCK_MONOTONIC, &ts); 1891 1888 seq_printf(seq, "%08x %08x %08x %08x\n", 1892 1889 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 1893 1890 1000000, 1894 - (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts))); 1891 + (u32)NSEC_PER_SEC / hrtimer_resolution); 1895 1892 1896 1893 return 0; 1897 1894 }

+1 -8

sound/core/hrtimer.c

··· 121 121 static int __init snd_hrtimer_init(void) 122 122 { 123 123 struct snd_timer *timer; 124 - struct timespec tp; 125 124 int err; 126 125 127 - hrtimer_get_res(CLOCK_MONOTONIC, &tp); 128 - if (tp.tv_sec > 0 || !tp.tv_nsec) { 129 - pr_err("snd-hrtimer: Invalid resolution %u.%09u", 130 - (unsigned)tp.tv_sec, (unsigned)tp.tv_nsec); 131 - return -EINVAL; 132 - } 133 - resolution = tp.tv_nsec; 126 + resolution = hrtimer_resolution; 134 127 135 128 /* Create a new timer and set up the fields */ 136 129 err = snd_timer_global_new("hrtimer", SNDRV_TIMER_GLOBAL_HRTIMER,

+7 -10

sound/drivers/pcsp/pcsp.c

··· 42 42 static int snd_pcsp_create(struct snd_card *card) 43 43 { 44 44 static struct snd_device_ops ops = { }; 45 - struct timespec tp; 46 - int err; 47 - int div, min_div, order; 48 - 49 - hrtimer_get_res(CLOCK_MONOTONIC, &tp); 45 + unsigned int resolution = hrtimer_resolution; 46 + int err, div, min_div, order; 50 47 51 48 if (!nopcm) { 52 - if (tp.tv_sec || tp.tv_nsec > PCSP_MAX_PERIOD_NS) { 49 + if (resolution > PCSP_MAX_PERIOD_NS) { 53 50 printk(KERN_ERR "PCSP: Timer resolution is not sufficient " 54 - "(%linS)\n", tp.tv_nsec); 51 + "(%unS)\n", resolution); 55 52 printk(KERN_ERR "PCSP: Make sure you have HPET and ACPI " 56 53 "enabled.\n"); 57 54 printk(KERN_ERR "PCSP: Turned into nopcm mode.\n"); ··· 56 59 } 57 60 } 58 61 59 - if (loops_per_jiffy >= PCSP_MIN_LPJ && tp.tv_nsec <= PCSP_MIN_PERIOD_NS) 62 + if (loops_per_jiffy >= PCSP_MIN_LPJ && resolution <= PCSP_MIN_PERIOD_NS) 60 63 min_div = MIN_DIV; 61 64 else 62 65 min_div = MAX_DIV; 63 66 #if PCSP_DEBUG 64 - printk(KERN_DEBUG "PCSP: lpj=%li, min_div=%i, res=%li\n", 65 - loops_per_jiffy, min_div, tp.tv_nsec); 67 + printk(KERN_DEBUG "PCSP: lpj=%li, min_div=%i, res=%u\n", 68 + loops_per_jiffy, min_div, resolution); 66 69 #endif 67 70 68 71 div = MAX_DIV / min_div;

+73 -4

tools/testing/selftests/timers/leap-a-day.c

··· 44 44 #include <time.h> 45 45 #include <sys/time.h> 46 46 #include <sys/timex.h> 47 + #include <sys/errno.h> 47 48 #include <string.h> 48 49 #include <signal.h> 49 50 #include <unistd.h> ··· 63 62 64 63 #define NSEC_PER_SEC 1000000000ULL 65 64 #define CLOCK_TAI 11 65 + 66 + time_t next_leap; 67 + int error_found; 66 68 67 69 /* returns 1 if a <= b, 0 otherwise */ 68 70 static inline int in_order(struct timespec a, struct timespec b) ··· 138 134 exit(0); 139 135 } 140 136 137 + void sigalarm(int signo) 138 + { 139 + struct timex tx; 140 + int ret; 141 + 142 + tx.modes = 0; 143 + ret = adjtimex(&tx); 144 + 145 + if (tx.time.tv_sec < next_leap) { 146 + printf("Error: Early timer expiration! (Should be %ld)\n", next_leap); 147 + error_found = 1; 148 + printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n", 149 + tx.time.tv_sec, 150 + tx.time.tv_usec, 151 + tx.tai, 152 + time_state_str(ret)); 153 + } 154 + if (ret != TIME_WAIT) { 155 + printf("Error: Timer seeing incorrect NTP state? (Should be TIME_WAIT)\n"); 156 + error_found = 1; 157 + printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n", 158 + tx.time.tv_sec, 159 + tx.time.tv_usec, 160 + tx.tai, 161 + time_state_str(ret)); 162 + } 163 + } 164 + 165 + 141 166 /* Test for known hrtimer failure */ 142 167 void test_hrtimer_failure(void) 143 168 { ··· 177 144 clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL); 178 145 clock_gettime(CLOCK_REALTIME, &now); 179 146 180 - if (!in_order(target, now)) 147 + if (!in_order(target, now)) { 181 148 printf("ERROR: hrtimer early expiration failure observed.\n"); 149 + error_found = 1; 150 + } 182 151 } 183 152 184 153 int main(int argc, char **argv) 185 154 { 155 + timer_t tm1; 156 + struct itimerspec its1; 157 + struct sigevent se; 158 + struct sigaction act; 159 + int signum = SIGRTMAX; 186 160 int settime = 0; 187 161 int tai_time = 0; 188 162 int insert = 1; ··· 231 191 signal(SIGINT, handler); 232 192 signal(SIGKILL, handler); 233 193 194 + /* Set up timer signal handler: */ 195 + sigfillset(&act.sa_mask); 196 + act.sa_flags = 0; 197 + act.sa_handler = sigalarm; 198 + sigaction(signum, &act, NULL); 199 + 234 200 if (iterations < 0) 235 201 printf("This runs continuously. Press ctrl-c to stop\n"); 236 202 else ··· 247 201 int ret; 248 202 struct timespec ts; 249 203 struct timex tx; 250 - time_t now, next_leap; 204 + time_t now; 251 205 252 206 /* Get the current time */ 253 207 clock_gettime(CLOCK_REALTIME, &ts); ··· 297 251 298 252 printf("Scheduling leap second for %s", ctime(&next_leap)); 299 253 254 + /* Set up timer */ 255 + printf("Setting timer for %ld - %s", next_leap, ctime(&next_leap)); 256 + memset(&se, 0, sizeof(se)); 257 + se.sigev_notify = SIGEV_SIGNAL; 258 + se.sigev_signo = signum; 259 + se.sigev_value.sival_int = 0; 260 + if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) { 261 + printf("Error: timer_create failed\n"); 262 + return ksft_exit_fail(); 263 + } 264 + its1.it_value.tv_sec = next_leap; 265 + its1.it_value.tv_nsec = 0; 266 + its1.it_interval.tv_sec = 0; 267 + its1.it_interval.tv_nsec = 0; 268 + timer_settime(tm1, TIMER_ABSTIME, &its1, NULL); 269 + 300 270 /* Wake up 3 seconds before leap */ 301 271 ts.tv_sec = next_leap - 3; 302 272 ts.tv_nsec = 0; 273 + 303 274 304 275 while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL)) 305 276 printf("Something woke us up, returning to sleep\n"); ··· 339 276 while (now < next_leap + 2) { 340 277 char buf[26]; 341 278 struct timespec tai; 279 + int ret; 342 280 343 281 tx.modes = 0; 344 282 ret = adjtimex(&tx); ··· 372 308 /* Note if kernel has known hrtimer failure */ 373 309 test_hrtimer_failure(); 374 310 375 - printf("Leap complete\n\n"); 376 - 311 + printf("Leap complete\n"); 312 + if (error_found) { 313 + printf("Errors observed\n"); 314 + clear_time_state(); 315 + return ksft_exit_fail(); 316 + } 317 + printf("\n"); 377 318 if ((iterations != -1) && !(--iterations)) 378 319 break; 379 320 }