Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timers/nohz changes from Ingo Molnar:
"It mostly contains fixes and full dynticks off-case optimizations, by
Frederic Weisbecker"

* 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
nohz: Include local CPU in full dynticks global kick
nohz: Optimize full dynticks's sched hooks with static keys
nohz: Optimize full dynticks state checks with static keys
nohz: Rename a few state variables
vtime: Always debug check snapshot source _before_ updating it
vtime: Always scale generic vtime accounting results
vtime: Optimize full dynticks accounting off case with static keys
vtime: Describe overriden functions in dedicated arch headers
m68k: hardirq_count() only need preempt_mask.h
hardirq: Split preempt count mask definitions
context_tracking: Split low level state headers
vtime: Fix racy cputime delta update
vtime: Remove a few unneeded generic vtime state checks
context_tracking: User/kernel broundary cross trace events
context_tracking: Optimize context switch off case with static keys
context_tracking: Optimize guest APIs off case with static key
context_tracking: Optimize main APIs off case with static key
context_tracking: Ground setup for static key use
context_tracking: Remove full dynticks' hacky dependency on wide context tracking
nohz: Only enable context tracking on full dynticks CPUs
...

+549 -331
+1
arch/ia64/include/asm/Kbuild
··· 3 3 generic-y += exec.h 4 4 generic-y += kvm_para.h 5 5 generic-y += trace_clock.h 6 + generic-y += vtime.h
+1 -1
arch/m68k/include/asm/irqflags.h
··· 3 3 4 4 #include <linux/types.h> 5 5 #ifdef CONFIG_MMU 6 - #include <linux/hardirq.h> 6 + #include <linux/preempt_mask.h> 7 7 #endif 8 8 #include <linux/preempt.h> 9 9 #include <asm/thread_info.h>
+1
arch/powerpc/include/asm/Kbuild
··· 2 2 generic-y += clkdev.h 3 3 generic-y += rwsem.h 4 4 generic-y += trace_clock.h 5 + generic-y += vtime.h
-3
arch/s390/include/asm/cputime.h
··· 13 13 #include <asm/div64.h> 14 14 15 15 16 - #define __ARCH_HAS_VTIME_ACCOUNT 17 - #define __ARCH_HAS_VTIME_TASK_SWITCH 18 - 19 16 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ 20 17 21 18 typedef unsigned long long __nocast cputime_t;
+7
arch/s390/include/asm/vtime.h
··· 1 + #ifndef _S390_VTIME_H 2 + #define _S390_VTIME_H 3 + 4 + #define __ARCH_HAS_VTIME_ACCOUNT 5 + #define __ARCH_HAS_VTIME_TASK_SWITCH 6 + 7 + #endif /* _S390_VTIME_H */
+1
arch/s390/kernel/vtime.c
··· 19 19 #include <asm/irq_regs.h> 20 20 #include <asm/cputime.h> 21 21 #include <asm/vtimer.h> 22 + #include <asm/vtime.h> 22 23 #include <asm/irq.h> 23 24 #include "entry.h" 24 25
include/asm-generic/vtime.h
+73 -63
include/linux/context_tracking.h
··· 2 2 #define _LINUX_CONTEXT_TRACKING_H 3 3 4 4 #include <linux/sched.h> 5 - #include <linux/percpu.h> 6 5 #include <linux/vtime.h> 6 + #include <linux/context_tracking_state.h> 7 7 #include <asm/ptrace.h> 8 8 9 - struct context_tracking { 10 - /* 11 - * When active is false, probes are unset in order 12 - * to minimize overhead: TIF flags are cleared 13 - * and calls to user_enter/exit are ignored. This 14 - * may be further optimized using static keys. 15 - */ 16 - bool active; 17 - enum ctx_state { 18 - IN_KERNEL = 0, 19 - IN_USER, 20 - } state; 21 - }; 22 - 23 - static inline void __guest_enter(void) 24 - { 25 - /* 26 - * This is running in ioctl context so we can avoid 27 - * the call to vtime_account() with its unnecessary idle check. 28 - */ 29 - vtime_account_system(current); 30 - current->flags |= PF_VCPU; 31 - } 32 - 33 - static inline void __guest_exit(void) 34 - { 35 - /* 36 - * This is running in ioctl context so we can avoid 37 - * the call to vtime_account() with its unnecessary idle check. 38 - */ 39 - vtime_account_system(current); 40 - current->flags &= ~PF_VCPU; 41 - } 42 9 43 10 #ifdef CONFIG_CONTEXT_TRACKING 44 - DECLARE_PER_CPU(struct context_tracking, context_tracking); 11 + extern void context_tracking_cpu_set(int cpu); 45 12 46 - static inline bool context_tracking_in_user(void) 13 + extern void context_tracking_user_enter(void); 14 + extern void context_tracking_user_exit(void); 15 + extern void __context_tracking_task_switch(struct task_struct *prev, 16 + struct task_struct *next); 17 + 18 + static inline void user_enter(void) 47 19 { 48 - return __this_cpu_read(context_tracking.state) == IN_USER; 49 - } 20 + if (static_key_false(&context_tracking_enabled)) 21 + context_tracking_user_enter(); 50 22 51 - static inline bool context_tracking_active(void) 23 + } 24 + static inline void user_exit(void) 52 25 { 53 - return __this_cpu_read(context_tracking.active); 26 + if (static_key_false(&context_tracking_enabled)) 27 + context_tracking_user_exit(); 54 28 } 55 - 56 - extern void user_enter(void); 57 - extern void user_exit(void); 58 - 59 - extern void guest_enter(void); 60 - extern void guest_exit(void); 61 29 62 30 static inline enum ctx_state exception_enter(void) 63 31 { 64 32 enum ctx_state prev_ctx; 65 33 34 + if (!static_key_false(&context_tracking_enabled)) 35 + return 0; 36 + 66 37 prev_ctx = this_cpu_read(context_tracking.state); 67 - user_exit(); 38 + context_tracking_user_exit(); 68 39 69 40 return prev_ctx; 70 41 } 71 42 72 43 static inline void exception_exit(enum ctx_state prev_ctx) 73 44 { 74 - if (prev_ctx == IN_USER) 75 - user_enter(); 45 + if (static_key_false(&context_tracking_enabled)) { 46 + if (prev_ctx == IN_USER) 47 + context_tracking_user_enter(); 48 + } 76 49 } 77 50 78 - extern void context_tracking_task_switch(struct task_struct *prev, 79 - struct task_struct *next); 51 + static inline void context_tracking_task_switch(struct task_struct *prev, 52 + struct task_struct *next) 53 + { 54 + if (static_key_false(&context_tracking_enabled)) 55 + __context_tracking_task_switch(prev, next); 56 + } 80 57 #else 81 - static inline bool context_tracking_in_user(void) { return false; } 82 58 static inline void user_enter(void) { } 83 59 static inline void user_exit(void) { } 84 - 85 - static inline void guest_enter(void) 86 - { 87 - __guest_enter(); 88 - } 89 - 90 - static inline void guest_exit(void) 91 - { 92 - __guest_exit(); 93 - } 94 - 95 60 static inline enum ctx_state exception_enter(void) { return 0; } 96 61 static inline void exception_exit(enum ctx_state prev_ctx) { } 97 62 static inline void context_tracking_task_switch(struct task_struct *prev, 98 63 struct task_struct *next) { } 99 64 #endif /* !CONFIG_CONTEXT_TRACKING */ 65 + 66 + 67 + #ifdef CONFIG_CONTEXT_TRACKING_FORCE 68 + extern void context_tracking_init(void); 69 + #else 70 + static inline void context_tracking_init(void) { } 71 + #endif /* CONFIG_CONTEXT_TRACKING_FORCE */ 72 + 73 + 74 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 75 + static inline void guest_enter(void) 76 + { 77 + if (vtime_accounting_enabled()) 78 + vtime_guest_enter(current); 79 + else 80 + current->flags |= PF_VCPU; 81 + } 82 + 83 + static inline void guest_exit(void) 84 + { 85 + if (vtime_accounting_enabled()) 86 + vtime_guest_exit(current); 87 + else 88 + current->flags &= ~PF_VCPU; 89 + } 90 + 91 + #else 92 + static inline void guest_enter(void) 93 + { 94 + /* 95 + * This is running in ioctl context so its safe 96 + * to assume that it's the stime pending cputime 97 + * to flush. 98 + */ 99 + vtime_account_system(current); 100 + current->flags |= PF_VCPU; 101 + } 102 + 103 + static inline void guest_exit(void) 104 + { 105 + /* Flush the guest cputime we spent on the guest */ 106 + vtime_account_system(current); 107 + current->flags &= ~PF_VCPU; 108 + } 109 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ 100 110 101 111 #endif
+39
include/linux/context_tracking_state.h
··· 1 + #ifndef _LINUX_CONTEXT_TRACKING_STATE_H 2 + #define _LINUX_CONTEXT_TRACKING_STATE_H 3 + 4 + #include <linux/percpu.h> 5 + #include <linux/static_key.h> 6 + 7 + struct context_tracking { 8 + /* 9 + * When active is false, probes are unset in order 10 + * to minimize overhead: TIF flags are cleared 11 + * and calls to user_enter/exit are ignored. This 12 + * may be further optimized using static keys. 13 + */ 14 + bool active; 15 + enum ctx_state { 16 + IN_KERNEL = 0, 17 + IN_USER, 18 + } state; 19 + }; 20 + 21 + #ifdef CONFIG_CONTEXT_TRACKING 22 + extern struct static_key context_tracking_enabled; 23 + DECLARE_PER_CPU(struct context_tracking, context_tracking); 24 + 25 + static inline bool context_tracking_in_user(void) 26 + { 27 + return __this_cpu_read(context_tracking.state) == IN_USER; 28 + } 29 + 30 + static inline bool context_tracking_active(void) 31 + { 32 + return __this_cpu_read(context_tracking.active); 33 + } 34 + #else 35 + static inline bool context_tracking_in_user(void) { return false; } 36 + static inline bool context_tracking_active(void) { return false; } 37 + #endif /* CONFIG_CONTEXT_TRACKING */ 38 + 39 + #endif
+1 -116
include/linux/hardirq.h
··· 1 1 #ifndef LINUX_HARDIRQ_H 2 2 #define LINUX_HARDIRQ_H 3 3 4 - #include <linux/preempt.h> 4 + #include <linux/preempt_mask.h> 5 5 #include <linux/lockdep.h> 6 6 #include <linux/ftrace_irq.h> 7 7 #include <linux/vtime.h> 8 - #include <asm/hardirq.h> 9 8 10 - /* 11 - * We put the hardirq and softirq counter into the preemption 12 - * counter. The bitmask has the following meaning: 13 - * 14 - * - bits 0-7 are the preemption count (max preemption depth: 256) 15 - * - bits 8-15 are the softirq count (max # of softirqs: 256) 16 - * 17 - * The hardirq count can in theory reach the same as NR_IRQS. 18 - * In reality, the number of nested IRQS is limited to the stack 19 - * size as well. For archs with over 1000 IRQS it is not practical 20 - * to expect that they will all nest. We give a max of 10 bits for 21 - * hardirq nesting. An arch may choose to give less than 10 bits. 22 - * m68k expects it to be 8. 23 - * 24 - * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024) 25 - * - bit 26 is the NMI_MASK 26 - * - bit 27 is the PREEMPT_ACTIVE flag 27 - * 28 - * PREEMPT_MASK: 0x000000ff 29 - * SOFTIRQ_MASK: 0x0000ff00 30 - * HARDIRQ_MASK: 0x03ff0000 31 - * NMI_MASK: 0x04000000 32 - */ 33 - #define PREEMPT_BITS 8 34 - #define SOFTIRQ_BITS 8 35 - #define NMI_BITS 1 36 - 37 - #define MAX_HARDIRQ_BITS 10 38 - 39 - #ifndef HARDIRQ_BITS 40 - # define HARDIRQ_BITS MAX_HARDIRQ_BITS 41 - #endif 42 - 43 - #if HARDIRQ_BITS > MAX_HARDIRQ_BITS 44 - #error HARDIRQ_BITS too high! 45 - #endif 46 - 47 - #define PREEMPT_SHIFT 0 48 - #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) 49 - #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) 50 - #define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) 51 - 52 - #define __IRQ_MASK(x) ((1UL << (x))-1) 53 - 54 - #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) 55 - #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) 56 - #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) 57 - #define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) 58 - 59 - #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) 60 - #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) 61 - #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) 62 - #define NMI_OFFSET (1UL << NMI_SHIFT) 63 - 64 - #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) 65 - 66 - #ifndef PREEMPT_ACTIVE 67 - #define PREEMPT_ACTIVE_BITS 1 68 - #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) 69 - #define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) 70 - #endif 71 - 72 - #if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) 73 - #error PREEMPT_ACTIVE is too low! 74 - #endif 75 - 76 - #define hardirq_count() (preempt_count() & HARDIRQ_MASK) 77 - #define softirq_count() (preempt_count() & SOFTIRQ_MASK) 78 - #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ 79 - | NMI_MASK)) 80 - 81 - /* 82 - * Are we doing bottom half or hardware interrupt processing? 83 - * Are we in a softirq context? Interrupt context? 84 - * in_softirq - Are we currently processing softirq or have bh disabled? 85 - * in_serving_softirq - Are we currently processing softirq? 86 - */ 87 - #define in_irq() (hardirq_count()) 88 - #define in_softirq() (softirq_count()) 89 - #define in_interrupt() (irq_count()) 90 - #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) 91 - 92 - /* 93 - * Are we in NMI context? 94 - */ 95 - #define in_nmi() (preempt_count() & NMI_MASK) 96 - 97 - #if defined(CONFIG_PREEMPT_COUNT) 98 - # define PREEMPT_CHECK_OFFSET 1 99 - #else 100 - # define PREEMPT_CHECK_OFFSET 0 101 - #endif 102 - 103 - /* 104 - * Are we running in atomic context? WARNING: this macro cannot 105 - * always detect atomic context; in particular, it cannot know about 106 - * held spinlocks in non-preemptible kernels. Thus it should not be 107 - * used in the general case to determine whether sleeping is possible. 108 - * Do not use in_atomic() in driver code. 109 - */ 110 - #define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) 111 - 112 - /* 113 - * Check whether we were atomic before we did preempt_disable(): 114 - * (used by the scheduler, *after* releasing the kernel lock) 115 - */ 116 - #define in_atomic_preempt_off() \ 117 - ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) 118 - 119 - #ifdef CONFIG_PREEMPT_COUNT 120 - # define preemptible() (preempt_count() == 0 && !irqs_disabled()) 121 - #else 122 - # define preemptible() 0 123 - #endif 124 9 125 10 #if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS) 126 11 extern void synchronize_irq(unsigned int irq);
+122
include/linux/preempt_mask.h
··· 1 + #ifndef LINUX_PREEMPT_MASK_H 2 + #define LINUX_PREEMPT_MASK_H 3 + 4 + #include <linux/preempt.h> 5 + #include <asm/hardirq.h> 6 + 7 + /* 8 + * We put the hardirq and softirq counter into the preemption 9 + * counter. The bitmask has the following meaning: 10 + * 11 + * - bits 0-7 are the preemption count (max preemption depth: 256) 12 + * - bits 8-15 are the softirq count (max # of softirqs: 256) 13 + * 14 + * The hardirq count can in theory reach the same as NR_IRQS. 15 + * In reality, the number of nested IRQS is limited to the stack 16 + * size as well. For archs with over 1000 IRQS it is not practical 17 + * to expect that they will all nest. We give a max of 10 bits for 18 + * hardirq nesting. An arch may choose to give less than 10 bits. 19 + * m68k expects it to be 8. 20 + * 21 + * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024) 22 + * - bit 26 is the NMI_MASK 23 + * - bit 27 is the PREEMPT_ACTIVE flag 24 + * 25 + * PREEMPT_MASK: 0x000000ff 26 + * SOFTIRQ_MASK: 0x0000ff00 27 + * HARDIRQ_MASK: 0x03ff0000 28 + * NMI_MASK: 0x04000000 29 + */ 30 + #define PREEMPT_BITS 8 31 + #define SOFTIRQ_BITS 8 32 + #define NMI_BITS 1 33 + 34 + #define MAX_HARDIRQ_BITS 10 35 + 36 + #ifndef HARDIRQ_BITS 37 + # define HARDIRQ_BITS MAX_HARDIRQ_BITS 38 + #endif 39 + 40 + #if HARDIRQ_BITS > MAX_HARDIRQ_BITS 41 + #error HARDIRQ_BITS too high! 42 + #endif 43 + 44 + #define PREEMPT_SHIFT 0 45 + #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) 46 + #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) 47 + #define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) 48 + 49 + #define __IRQ_MASK(x) ((1UL << (x))-1) 50 + 51 + #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) 52 + #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) 53 + #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) 54 + #define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) 55 + 56 + #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) 57 + #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) 58 + #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) 59 + #define NMI_OFFSET (1UL << NMI_SHIFT) 60 + 61 + #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) 62 + 63 + #ifndef PREEMPT_ACTIVE 64 + #define PREEMPT_ACTIVE_BITS 1 65 + #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) 66 + #define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) 67 + #endif 68 + 69 + #if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) 70 + #error PREEMPT_ACTIVE is too low! 71 + #endif 72 + 73 + #define hardirq_count() (preempt_count() & HARDIRQ_MASK) 74 + #define softirq_count() (preempt_count() & SOFTIRQ_MASK) 75 + #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ 76 + | NMI_MASK)) 77 + 78 + /* 79 + * Are we doing bottom half or hardware interrupt processing? 80 + * Are we in a softirq context? Interrupt context? 81 + * in_softirq - Are we currently processing softirq or have bh disabled? 82 + * in_serving_softirq - Are we currently processing softirq? 83 + */ 84 + #define in_irq() (hardirq_count()) 85 + #define in_softirq() (softirq_count()) 86 + #define in_interrupt() (irq_count()) 87 + #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) 88 + 89 + /* 90 + * Are we in NMI context? 91 + */ 92 + #define in_nmi() (preempt_count() & NMI_MASK) 93 + 94 + #if defined(CONFIG_PREEMPT_COUNT) 95 + # define PREEMPT_CHECK_OFFSET 1 96 + #else 97 + # define PREEMPT_CHECK_OFFSET 0 98 + #endif 99 + 100 + /* 101 + * Are we running in atomic context? WARNING: this macro cannot 102 + * always detect atomic context; in particular, it cannot know about 103 + * held spinlocks in non-preemptible kernels. Thus it should not be 104 + * used in the general case to determine whether sleeping is possible. 105 + * Do not use in_atomic() in driver code. 106 + */ 107 + #define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) 108 + 109 + /* 110 + * Check whether we were atomic before we did preempt_disable(): 111 + * (used by the scheduler, *after* releasing the kernel lock) 112 + */ 113 + #define in_atomic_preempt_off() \ 114 + ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) 115 + 116 + #ifdef CONFIG_PREEMPT_COUNT 117 + # define preemptible() (preempt_count() == 0 && !irqs_disabled()) 118 + #else 119 + # define preemptible() 0 120 + #endif 121 + 122 + #endif /* LINUX_PREEMPT_MASK_H */
+39 -6
include/linux/tick.h
··· 10 10 #include <linux/irqflags.h> 11 11 #include <linux/percpu.h> 12 12 #include <linux/hrtimer.h> 13 + #include <linux/context_tracking_state.h> 14 + #include <linux/cpumask.h> 13 15 14 16 #ifdef CONFIG_GENERIC_CLOCKEVENTS 15 17 ··· 160 158 # endif /* !CONFIG_NO_HZ_COMMON */ 161 159 162 160 #ifdef CONFIG_NO_HZ_FULL 161 + extern bool tick_nohz_full_running; 162 + extern cpumask_var_t tick_nohz_full_mask; 163 + 164 + static inline bool tick_nohz_full_enabled(void) 165 + { 166 + if (!static_key_false(&context_tracking_enabled)) 167 + return false; 168 + 169 + return tick_nohz_full_running; 170 + } 171 + 172 + static inline bool tick_nohz_full_cpu(int cpu) 173 + { 174 + if (!tick_nohz_full_enabled()) 175 + return false; 176 + 177 + return cpumask_test_cpu(cpu, tick_nohz_full_mask); 178 + } 179 + 163 180 extern void tick_nohz_init(void); 164 - extern int tick_nohz_full_cpu(int cpu); 165 - extern void tick_nohz_full_check(void); 181 + extern void __tick_nohz_full_check(void); 166 182 extern void tick_nohz_full_kick(void); 167 183 extern void tick_nohz_full_kick_all(void); 168 - extern void tick_nohz_task_switch(struct task_struct *tsk); 184 + extern void __tick_nohz_task_switch(struct task_struct *tsk); 169 185 #else 170 186 static inline void tick_nohz_init(void) { } 171 - static inline int tick_nohz_full_cpu(int cpu) { return 0; } 172 - static inline void tick_nohz_full_check(void) { } 187 + static inline bool tick_nohz_full_enabled(void) { return false; } 188 + static inline bool tick_nohz_full_cpu(int cpu) { return false; } 189 + static inline void __tick_nohz_full_check(void) { } 173 190 static inline void tick_nohz_full_kick(void) { } 174 191 static inline void tick_nohz_full_kick_all(void) { } 175 - static inline void tick_nohz_task_switch(struct task_struct *tsk) { } 192 + static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } 176 193 #endif 194 + 195 + static inline void tick_nohz_full_check(void) 196 + { 197 + if (tick_nohz_full_enabled()) 198 + __tick_nohz_full_check(); 199 + } 200 + 201 + static inline void tick_nohz_task_switch(struct task_struct *tsk) 202 + { 203 + if (tick_nohz_full_enabled()) 204 + __tick_nohz_task_switch(tsk); 205 + } 177 206 178 207 179 208 #endif
+65 -9
include/linux/vtime.h
··· 1 1 #ifndef _LINUX_KERNEL_VTIME_H 2 2 #define _LINUX_KERNEL_VTIME_H 3 3 4 + #include <linux/context_tracking_state.h> 5 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 6 + #include <asm/vtime.h> 7 + #endif 8 + 9 + 4 10 struct task_struct; 5 11 12 + /* 13 + * vtime_accounting_enabled() definitions/declarations 14 + */ 15 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 16 + static inline bool vtime_accounting_enabled(void) { return true; } 17 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 18 + 19 + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 20 + static inline bool vtime_accounting_enabled(void) 21 + { 22 + if (static_key_false(&context_tracking_enabled)) { 23 + if (context_tracking_active()) 24 + return true; 25 + } 26 + 27 + return false; 28 + } 29 + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ 30 + 31 + #ifndef CONFIG_VIRT_CPU_ACCOUNTING 32 + static inline bool vtime_accounting_enabled(void) { return false; } 33 + #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ 34 + 35 + 36 + /* 37 + * Common vtime APIs 38 + */ 6 39 #ifdef CONFIG_VIRT_CPU_ACCOUNTING 40 + 41 + #ifdef __ARCH_HAS_VTIME_TASK_SWITCH 7 42 extern void vtime_task_switch(struct task_struct *prev); 43 + #else 44 + extern void vtime_common_task_switch(struct task_struct *prev); 45 + static inline void vtime_task_switch(struct task_struct *prev) 46 + { 47 + if (vtime_accounting_enabled()) 48 + vtime_common_task_switch(prev); 49 + } 50 + #endif /* __ARCH_HAS_VTIME_TASK_SWITCH */ 51 + 8 52 extern void vtime_account_system(struct task_struct *tsk); 9 53 extern void vtime_account_idle(struct task_struct *tsk); 10 54 extern void vtime_account_user(struct task_struct *tsk); 11 - extern void vtime_account_irq_enter(struct task_struct *tsk); 12 55 13 - #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 14 - static inline bool vtime_accounting_enabled(void) { return true; } 15 - #endif 56 + #ifdef __ARCH_HAS_VTIME_ACCOUNT 57 + extern void vtime_account_irq_enter(struct task_struct *tsk); 58 + #else 59 + extern void vtime_common_account_irq_enter(struct task_struct *tsk); 60 + static inline void vtime_account_irq_enter(struct task_struct *tsk) 61 + { 62 + if (vtime_accounting_enabled()) 63 + vtime_common_account_irq_enter(tsk); 64 + } 65 + #endif /* __ARCH_HAS_VTIME_ACCOUNT */ 16 66 17 67 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ 18 68 ··· 70 20 static inline void vtime_account_system(struct task_struct *tsk) { } 71 21 static inline void vtime_account_user(struct task_struct *tsk) { } 72 22 static inline void vtime_account_irq_enter(struct task_struct *tsk) { } 73 - static inline bool vtime_accounting_enabled(void) { return false; } 74 - #endif 23 + #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ 75 24 76 25 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 77 26 extern void arch_vtime_task_switch(struct task_struct *tsk); 78 - extern void vtime_account_irq_exit(struct task_struct *tsk); 79 - extern bool vtime_accounting_enabled(void); 27 + extern void vtime_gen_account_irq_exit(struct task_struct *tsk); 28 + 29 + static inline void vtime_account_irq_exit(struct task_struct *tsk) 30 + { 31 + if (vtime_accounting_enabled()) 32 + vtime_gen_account_irq_exit(tsk); 33 + } 34 + 80 35 extern void vtime_user_enter(struct task_struct *tsk); 36 + 81 37 static inline void vtime_user_exit(struct task_struct *tsk) 82 38 { 83 39 vtime_account_user(tsk); ··· 91 35 extern void vtime_guest_enter(struct task_struct *tsk); 92 36 extern void vtime_guest_exit(struct task_struct *tsk); 93 37 extern void vtime_init_idle(struct task_struct *tsk, int cpu); 94 - #else 38 + #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */ 95 39 static inline void vtime_account_irq_exit(struct task_struct *tsk) 96 40 { 97 41 /* On hard|softirq exit we always account to hard|softirq cputime */
+58
include/trace/events/context_tracking.h
··· 1 + #undef TRACE_SYSTEM 2 + #define TRACE_SYSTEM context_tracking 3 + 4 + #if !defined(_TRACE_CONTEXT_TRACKING_H) || defined(TRACE_HEADER_MULTI_READ) 5 + #define _TRACE_CONTEXT_TRACKING_H 6 + 7 + #include <linux/tracepoint.h> 8 + 9 + DECLARE_EVENT_CLASS(context_tracking_user, 10 + 11 + TP_PROTO(int dummy), 12 + 13 + TP_ARGS(dummy), 14 + 15 + TP_STRUCT__entry( 16 + __field( int, dummy ) 17 + ), 18 + 19 + TP_fast_assign( 20 + __entry->dummy = dummy; 21 + ), 22 + 23 + TP_printk("%s", "") 24 + ); 25 + 26 + /** 27 + * user_enter - called when the kernel resumes to userspace 28 + * @dummy: dummy arg to make trace event macro happy 29 + * 30 + * This event occurs when the kernel resumes to userspace after 31 + * an exception or a syscall. 32 + */ 33 + DEFINE_EVENT(context_tracking_user, user_enter, 34 + 35 + TP_PROTO(int dummy), 36 + 37 + TP_ARGS(dummy) 38 + ); 39 + 40 + /** 41 + * user_exit - called when userspace enters the kernel 42 + * @dummy: dummy arg to make trace event macro happy 43 + * 44 + * This event occurs when userspace enters the kernel through 45 + * an exception or a syscall. 46 + */ 47 + DEFINE_EVENT(context_tracking_user, user_exit, 48 + 49 + TP_PROTO(int dummy), 50 + 51 + TP_ARGS(dummy) 52 + ); 53 + 54 + 55 + #endif /* _TRACE_CONTEXT_TRACKING_H */ 56 + 57 + /* This part must be outside protection */ 58 + #include <trace/define_trace.h>
+22 -6
init/Kconfig
··· 528 528 config CONTEXT_TRACKING_FORCE 529 529 bool "Force context tracking" 530 530 depends on CONTEXT_TRACKING 531 - default CONTEXT_TRACKING 531 + default y if !NO_HZ_FULL 532 532 help 533 - Probe on user/kernel boundaries by default in order to 534 - test the features that rely on it such as userspace RCU extended 535 - quiescent states. 536 - This test is there for debugging until we have a real user like the 537 - full dynticks mode. 533 + The major pre-requirement for full dynticks to work is to 534 + support the context tracking subsystem. But there are also 535 + other dependencies to provide in order to make the full 536 + dynticks working. 537 + 538 + This option stands for testing when an arch implements the 539 + context tracking backend but doesn't yet fullfill all the 540 + requirements to make the full dynticks feature working. 541 + Without the full dynticks, there is no way to test the support 542 + for context tracking and the subsystems that rely on it: RCU 543 + userspace extended quiescent state and tickless cputime 544 + accounting. This option copes with the absence of the full 545 + dynticks subsystem by forcing the context tracking on all 546 + CPUs in the system. 547 + 548 + Say Y only if you're working on the developpement of an 549 + architecture backend for the context tracking. 550 + 551 + Say N otherwise, this option brings an overhead that you 552 + don't want in production. 553 + 538 554 539 555 config RCU_FANOUT 540 556 int "Tree-based hierarchical RCU fanout value"
+2
init/main.c
··· 75 75 #include <linux/blkdev.h> 76 76 #include <linux/elevator.h> 77 77 #include <linux/sched_clock.h> 78 + #include <linux/context_tracking.h> 78 79 79 80 #include <asm/io.h> 80 81 #include <asm/bugs.h> ··· 546 545 idr_init_cache(); 547 546 rcu_init(); 548 547 tick_nohz_init(); 548 + context_tracking_init(); 549 549 radix_tree_init(); 550 550 /* init some links before init_ISA_irqs() */ 551 551 early_irq_init();
+71 -54
kernel/context_tracking.c
··· 20 20 #include <linux/hardirq.h> 21 21 #include <linux/export.h> 22 22 23 - DEFINE_PER_CPU(struct context_tracking, context_tracking) = { 24 - #ifdef CONFIG_CONTEXT_TRACKING_FORCE 25 - .active = true, 26 - #endif 27 - }; 23 + #define CREATE_TRACE_POINTS 24 + #include <trace/events/context_tracking.h> 25 + 26 + struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE; 27 + EXPORT_SYMBOL_GPL(context_tracking_enabled); 28 + 29 + DEFINE_PER_CPU(struct context_tracking, context_tracking); 30 + EXPORT_SYMBOL_GPL(context_tracking); 31 + 32 + void context_tracking_cpu_set(int cpu) 33 + { 34 + if (!per_cpu(context_tracking.active, cpu)) { 35 + per_cpu(context_tracking.active, cpu) = true; 36 + static_key_slow_inc(&context_tracking_enabled); 37 + } 38 + } 28 39 29 40 /** 30 - * user_enter - Inform the context tracking that the CPU is going to 31 - * enter userspace mode. 41 + * context_tracking_user_enter - Inform the context tracking that the CPU is going to 42 + * enter userspace mode. 32 43 * 33 44 * This function must be called right before we switch from the kernel 34 45 * to userspace, when it's guaranteed the remaining kernel instructions 35 46 * to execute won't use any RCU read side critical section because this 36 47 * function sets RCU in extended quiescent state. 37 48 */ 38 - void user_enter(void) 49 + void context_tracking_user_enter(void) 39 50 { 40 51 unsigned long flags; 41 52 ··· 65 54 WARN_ON_ONCE(!current->mm); 66 55 67 56 local_irq_save(flags); 68 - if (__this_cpu_read(context_tracking.active) && 69 - __this_cpu_read(context_tracking.state) != IN_USER) { 57 + if ( __this_cpu_read(context_tracking.state) != IN_USER) { 58 + if (__this_cpu_read(context_tracking.active)) { 59 + trace_user_enter(0); 60 + /* 61 + * At this stage, only low level arch entry code remains and 62 + * then we'll run in userspace. We can assume there won't be 63 + * any RCU read-side critical section until the next call to 64 + * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency 65 + * on the tick. 66 + */ 67 + vtime_user_enter(current); 68 + rcu_user_enter(); 69 + } 70 70 /* 71 - * At this stage, only low level arch entry code remains and 72 - * then we'll run in userspace. We can assume there won't be 73 - * any RCU read-side critical section until the next call to 74 - * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency 75 - * on the tick. 71 + * Even if context tracking is disabled on this CPU, because it's outside 72 + * the full dynticks mask for example, we still have to keep track of the 73 + * context transitions and states to prevent inconsistency on those of 74 + * other CPUs. 75 + * If a task triggers an exception in userspace, sleep on the exception 76 + * handler and then migrate to another CPU, that new CPU must know where 77 + * the exception returns by the time we call exception_exit(). 78 + * This information can only be provided by the previous CPU when it called 79 + * exception_enter(). 80 + * OTOH we can spare the calls to vtime and RCU when context_tracking.active 81 + * is false because we know that CPU is not tickless. 76 82 */ 77 - vtime_user_enter(current); 78 - rcu_user_enter(); 79 83 __this_cpu_write(context_tracking.state, IN_USER); 80 84 } 81 85 local_irq_restore(flags); ··· 113 87 */ 114 88 void __sched notrace preempt_schedule_context(void) 115 89 { 116 - struct thread_info *ti = current_thread_info(); 117 90 enum ctx_state prev_ctx; 118 91 119 - if (likely(ti->preempt_count || irqs_disabled())) 92 + if (likely(!preemptible())) 120 93 return; 121 94 122 95 /* ··· 137 112 #endif /* CONFIG_PREEMPT */ 138 113 139 114 /** 140 - * user_exit - Inform the context tracking that the CPU is 141 - * exiting userspace mode and entering the kernel. 115 + * context_tracking_user_exit - Inform the context tracking that the CPU is 116 + * exiting userspace mode and entering the kernel. 142 117 * 143 118 * This function must be called after we entered the kernel from userspace 144 119 * before any use of RCU read side critical section. This potentially include ··· 147 122 * This call supports re-entrancy. This way it can be called from any exception 148 123 * handler without needing to know if we came from userspace or not. 149 124 */ 150 - void user_exit(void) 125 + void context_tracking_user_exit(void) 151 126 { 152 127 unsigned long flags; 153 128 ··· 156 131 157 132 local_irq_save(flags); 158 133 if (__this_cpu_read(context_tracking.state) == IN_USER) { 159 - /* 160 - * We are going to run code that may use RCU. Inform 161 - * RCU core about that (ie: we may need the tick again). 162 - */ 163 - rcu_user_exit(); 164 - vtime_user_exit(current); 134 + if (__this_cpu_read(context_tracking.active)) { 135 + /* 136 + * We are going to run code that may use RCU. Inform 137 + * RCU core about that (ie: we may need the tick again). 138 + */ 139 + rcu_user_exit(); 140 + vtime_user_exit(current); 141 + trace_user_exit(0); 142 + } 165 143 __this_cpu_write(context_tracking.state, IN_KERNEL); 166 144 } 167 145 local_irq_restore(flags); 168 146 } 169 147 170 - void guest_enter(void) 171 - { 172 - if (vtime_accounting_enabled()) 173 - vtime_guest_enter(current); 174 - else 175 - __guest_enter(); 176 - } 177 - EXPORT_SYMBOL_GPL(guest_enter); 178 - 179 - void guest_exit(void) 180 - { 181 - if (vtime_accounting_enabled()) 182 - vtime_guest_exit(current); 183 - else 184 - __guest_exit(); 185 - } 186 - EXPORT_SYMBOL_GPL(guest_exit); 187 - 188 - 189 148 /** 190 - * context_tracking_task_switch - context switch the syscall callbacks 149 + * __context_tracking_task_switch - context switch the syscall callbacks 191 150 * @prev: the task that is being switched out 192 151 * @next: the task that is being switched in 193 152 * ··· 183 174 * migrate to some CPU that doesn't do the context tracking. As such the TIF 184 175 * flag may not be desired there. 185 176 */ 186 - void context_tracking_task_switch(struct task_struct *prev, 187 - struct task_struct *next) 177 + void __context_tracking_task_switch(struct task_struct *prev, 178 + struct task_struct *next) 188 179 { 189 - if (__this_cpu_read(context_tracking.active)) { 190 - clear_tsk_thread_flag(prev, TIF_NOHZ); 191 - set_tsk_thread_flag(next, TIF_NOHZ); 192 - } 180 + clear_tsk_thread_flag(prev, TIF_NOHZ); 181 + set_tsk_thread_flag(next, TIF_NOHZ); 193 182 } 183 + 184 + #ifdef CONFIG_CONTEXT_TRACKING_FORCE 185 + void __init context_tracking_init(void) 186 + { 187 + int cpu; 188 + 189 + for_each_possible_cpu(cpu) 190 + context_tracking_cpu_set(cpu); 191 + } 192 + #endif
+1 -3
kernel/sched/core.c
··· 2527 2527 */ 2528 2528 asmlinkage void __sched notrace preempt_schedule(void) 2529 2529 { 2530 - struct thread_info *ti = current_thread_info(); 2531 - 2532 2530 /* 2533 2531 * If there is a non-zero preempt_count or interrupts are disabled, 2534 2532 * we do not want to preempt the current task. Just return.. 2535 2533 */ 2536 - if (likely(ti->preempt_count || irqs_disabled())) 2534 + if (likely(!preemptible())) 2537 2535 return; 2538 2536 2539 2537 do {
+16 -37
kernel/sched/cputime.c
··· 378 378 #ifdef CONFIG_VIRT_CPU_ACCOUNTING 379 379 380 380 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH 381 - void vtime_task_switch(struct task_struct *prev) 381 + void vtime_common_task_switch(struct task_struct *prev) 382 382 { 383 - if (!vtime_accounting_enabled()) 384 - return; 385 - 386 383 if (is_idle_task(prev)) 387 384 vtime_account_idle(prev); 388 385 else ··· 401 404 * vtime_account(). 402 405 */ 403 406 #ifndef __ARCH_HAS_VTIME_ACCOUNT 404 - void vtime_account_irq_enter(struct task_struct *tsk) 407 + void vtime_common_account_irq_enter(struct task_struct *tsk) 405 408 { 406 - if (!vtime_accounting_enabled()) 407 - return; 408 - 409 409 if (!in_interrupt()) { 410 410 /* 411 411 * If we interrupted user, context_tracking_in_user() ··· 422 428 } 423 429 vtime_account_system(tsk); 424 430 } 425 - EXPORT_SYMBOL_GPL(vtime_account_irq_enter); 431 + EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter); 426 432 #endif /* __ARCH_HAS_VTIME_ACCOUNT */ 427 433 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 428 434 ··· 553 559 { 554 560 cputime_t rtime, stime, utime, total; 555 561 556 - if (vtime_accounting_enabled()) { 557 - *ut = curr->utime; 558 - *st = curr->stime; 559 - return; 560 - } 561 - 562 562 stime = curr->stime; 563 563 total = stime + curr->utime; 564 564 ··· 652 664 653 665 void vtime_account_system(struct task_struct *tsk) 654 666 { 655 - if (!vtime_accounting_enabled()) 656 - return; 657 - 658 667 write_seqlock(&tsk->vtime_seqlock); 659 668 __vtime_account_system(tsk); 660 669 write_sequnlock(&tsk->vtime_seqlock); 661 670 } 662 671 663 - void vtime_account_irq_exit(struct task_struct *tsk) 672 + void vtime_gen_account_irq_exit(struct task_struct *tsk) 664 673 { 665 - if (!vtime_accounting_enabled()) 666 - return; 667 - 668 674 write_seqlock(&tsk->vtime_seqlock); 675 + __vtime_account_system(tsk); 669 676 if (context_tracking_in_user()) 670 677 tsk->vtime_snap_whence = VTIME_USER; 671 - __vtime_account_system(tsk); 672 678 write_sequnlock(&tsk->vtime_seqlock); 673 679 } 674 680 ··· 670 688 { 671 689 cputime_t delta_cpu; 672 690 673 - if (!vtime_accounting_enabled()) 674 - return; 675 - 676 - delta_cpu = get_vtime_delta(tsk); 677 - 678 691 write_seqlock(&tsk->vtime_seqlock); 692 + delta_cpu = get_vtime_delta(tsk); 679 693 tsk->vtime_snap_whence = VTIME_SYS; 680 694 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); 681 695 write_sequnlock(&tsk->vtime_seqlock); ··· 679 701 680 702 void vtime_user_enter(struct task_struct *tsk) 681 703 { 682 - if (!vtime_accounting_enabled()) 683 - return; 684 - 685 704 write_seqlock(&tsk->vtime_seqlock); 686 - tsk->vtime_snap_whence = VTIME_USER; 687 705 __vtime_account_system(tsk); 706 + tsk->vtime_snap_whence = VTIME_USER; 688 707 write_sequnlock(&tsk->vtime_seqlock); 689 708 } 690 709 691 710 void vtime_guest_enter(struct task_struct *tsk) 692 711 { 712 + /* 713 + * The flags must be updated under the lock with 714 + * the vtime_snap flush and update. 715 + * That enforces a right ordering and update sequence 716 + * synchronization against the reader (task_gtime()) 717 + * that can thus safely catch up with a tickless delta. 718 + */ 693 719 write_seqlock(&tsk->vtime_seqlock); 694 720 __vtime_account_system(tsk); 695 721 current->flags |= PF_VCPU; 696 722 write_sequnlock(&tsk->vtime_seqlock); 697 723 } 724 + EXPORT_SYMBOL_GPL(vtime_guest_enter); 698 725 699 726 void vtime_guest_exit(struct task_struct *tsk) 700 727 { ··· 708 725 current->flags &= ~PF_VCPU; 709 726 write_sequnlock(&tsk->vtime_seqlock); 710 727 } 728 + EXPORT_SYMBOL_GPL(vtime_guest_exit); 711 729 712 730 void vtime_account_idle(struct task_struct *tsk) 713 731 { 714 732 cputime_t delta_cpu = get_vtime_delta(tsk); 715 733 716 734 account_idle_time(delta_cpu); 717 - } 718 - 719 - bool vtime_accounting_enabled(void) 720 - { 721 - return context_tracking_active(); 722 735 } 723 736 724 737 void arch_vtime_task_switch(struct task_struct *prev)
-1
kernel/time/Kconfig
··· 105 105 select RCU_USER_QS 106 106 select RCU_NOCB_CPU 107 107 select VIRT_CPU_ACCOUNTING_GEN 108 - select CONTEXT_TRACKING_FORCE 109 108 select IRQ_WORK 110 109 help 111 110 Adaptively try to shutdown the tick whenever possible, even when
+29 -32
kernel/time/tick-sched.c
··· 23 23 #include <linux/irq_work.h> 24 24 #include <linux/posix-timers.h> 25 25 #include <linux/perf_event.h> 26 + #include <linux/context_tracking.h> 26 27 27 28 #include <asm/irq_regs.h> 28 29 ··· 149 148 } 150 149 151 150 #ifdef CONFIG_NO_HZ_FULL 152 - static cpumask_var_t nohz_full_mask; 153 - bool have_nohz_full_mask; 151 + cpumask_var_t tick_nohz_full_mask; 152 + bool tick_nohz_full_running; 154 153 155 154 static bool can_stop_full_tick(void) 156 155 { ··· 183 182 * Don't allow the user to think they can get 184 183 * full NO_HZ with this machine. 185 184 */ 186 - WARN_ONCE(have_nohz_full_mask, 185 + WARN_ONCE(tick_nohz_full_running, 187 186 "NO_HZ FULL will not work with unstable sched clock"); 188 187 return false; 189 188 } ··· 198 197 * Re-evaluate the need for the tick on the current CPU 199 198 * and restart it if necessary. 200 199 */ 201 - void tick_nohz_full_check(void) 200 + void __tick_nohz_full_check(void) 202 201 { 203 202 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 204 203 ··· 212 211 213 212 static void nohz_full_kick_work_func(struct irq_work *work) 214 213 { 215 - tick_nohz_full_check(); 214 + __tick_nohz_full_check(); 216 215 } 217 216 218 217 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { ··· 231 230 232 231 static void nohz_full_kick_ipi(void *info) 233 232 { 234 - tick_nohz_full_check(); 233 + __tick_nohz_full_check(); 235 234 } 236 235 237 236 /* ··· 240 239 */ 241 240 void tick_nohz_full_kick_all(void) 242 241 { 243 - if (!have_nohz_full_mask) 242 + if (!tick_nohz_full_running) 244 243 return; 245 244 246 245 preempt_disable(); 247 - smp_call_function_many(nohz_full_mask, 246 + smp_call_function_many(tick_nohz_full_mask, 248 247 nohz_full_kick_ipi, NULL, false); 248 + tick_nohz_full_kick(); 249 249 preempt_enable(); 250 250 } 251 251 ··· 255 253 * It might need the tick due to per task/process properties: 256 254 * perf events, posix cpu timers, ... 257 255 */ 258 - void tick_nohz_task_switch(struct task_struct *tsk) 256 + void __tick_nohz_task_switch(struct task_struct *tsk) 259 257 { 260 258 unsigned long flags; 261 259 ··· 271 269 local_irq_restore(flags); 272 270 } 273 271 274 - int tick_nohz_full_cpu(int cpu) 275 - { 276 - if (!have_nohz_full_mask) 277 - return 0; 278 - 279 - return cpumask_test_cpu(cpu, nohz_full_mask); 280 - } 281 - 282 272 /* Parse the boot-time nohz CPU list from the kernel parameters. */ 283 273 static int __init tick_nohz_full_setup(char *str) 284 274 { 285 275 int cpu; 286 276 287 - alloc_bootmem_cpumask_var(&nohz_full_mask); 288 - if (cpulist_parse(str, nohz_full_mask) < 0) { 277 + alloc_bootmem_cpumask_var(&tick_nohz_full_mask); 278 + if (cpulist_parse(str, tick_nohz_full_mask) < 0) { 289 279 pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); 290 280 return 1; 291 281 } 292 282 293 283 cpu = smp_processor_id(); 294 - if (cpumask_test_cpu(cpu, nohz_full_mask)) { 284 + if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { 295 285 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); 296 - cpumask_clear_cpu(cpu, nohz_full_mask); 286 + cpumask_clear_cpu(cpu, tick_nohz_full_mask); 297 287 } 298 - have_nohz_full_mask = true; 288 + tick_nohz_full_running = true; 299 289 300 290 return 1; 301 291 } ··· 305 311 * If we handle the timekeeping duty for full dynticks CPUs, 306 312 * we can't safely shutdown that CPU. 307 313 */ 308 - if (have_nohz_full_mask && tick_do_timer_cpu == cpu) 314 + if (tick_nohz_full_running && tick_do_timer_cpu == cpu) 309 315 return NOTIFY_BAD; 310 316 break; 311 317 } ··· 324 330 int err = -1; 325 331 326 332 #ifdef CONFIG_NO_HZ_FULL_ALL 327 - if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { 333 + if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) { 328 334 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); 329 335 return err; 330 336 } 331 337 err = 0; 332 - cpumask_setall(nohz_full_mask); 333 - cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); 334 - have_nohz_full_mask = true; 338 + cpumask_setall(tick_nohz_full_mask); 339 + cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask); 340 + tick_nohz_full_running = true; 335 341 #endif 336 342 return err; 337 343 } 338 344 339 345 void __init tick_nohz_init(void) 340 346 { 341 - if (!have_nohz_full_mask) { 347 + int cpu; 348 + 349 + if (!tick_nohz_full_running) { 342 350 if (tick_nohz_init_all() < 0) 343 351 return; 344 352 } 345 353 354 + for_each_cpu(cpu, tick_nohz_full_mask) 355 + context_tracking_cpu_set(cpu); 356 + 346 357 cpu_notifier(tick_nohz_cpu_down_callback, 0); 347 - cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); 358 + cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask); 348 359 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); 349 360 } 350 - #else 351 - #define have_nohz_full_mask (0) 352 361 #endif 353 362 354 363 /* ··· 729 732 return false; 730 733 } 731 734 732 - if (have_nohz_full_mask) { 735 + if (tick_nohz_full_enabled()) { 733 736 /* 734 737 * Keep the tick alive to guarantee timekeeping progression 735 738 * if there are full dynticks CPUs around