Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull NOHZ updates from Thomas Gleixner:
"A few updates to the nohz infrastructure:

- recursion protection for context tracking

- make the TIF_NOHZ inheritance smarter

- isolate cpus which belong to the NOHZ full set"

* 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
nohz: Set isolcpus when nohz_full is set
nohz: Add tick_nohz_full_add_cpus_to() API
context_tracking: Inherit TIF_NOHZ through forks instead of context switches
context_tracking: Protect against recursion

+59 -33
-10
include/linux/context_tracking.h
··· 14 14 extern void context_tracking_exit(enum ctx_state state); 15 15 extern void context_tracking_user_enter(void); 16 16 extern void context_tracking_user_exit(void); 17 - extern void __context_tracking_task_switch(struct task_struct *prev, 18 - struct task_struct *next); 19 17 20 18 static inline void user_enter(void) 21 19 { ··· 49 51 } 50 52 } 51 53 52 - static inline void context_tracking_task_switch(struct task_struct *prev, 53 - struct task_struct *next) 54 - { 55 - if (context_tracking_is_enabled()) 56 - __context_tracking_task_switch(prev, next); 57 - } 58 54 #else 59 55 static inline void user_enter(void) { } 60 56 static inline void user_exit(void) { } 61 57 static inline enum ctx_state exception_enter(void) { return 0; } 62 58 static inline void exception_exit(enum ctx_state prev_ctx) { } 63 - static inline void context_tracking_task_switch(struct task_struct *prev, 64 - struct task_struct *next) { } 65 59 #endif /* !CONFIG_CONTEXT_TRACKING */ 66 60 67 61
+1
include/linux/context_tracking_state.h
··· 12 12 * may be further optimized using static keys. 13 13 */ 14 14 bool active; 15 + int recursion; 15 16 enum ctx_state { 16 17 CONTEXT_KERNEL = 0, 17 18 CONTEXT_USER,
+3
include/linux/sched.h
··· 2599 2599 } 2600 2600 #endif 2601 2601 2602 + #define tasklist_empty() \ 2603 + list_empty(&init_task.tasks) 2604 + 2602 2605 #define next_task(p) \ 2603 2606 list_entry_rcu((p)->tasks.next, struct task_struct, tasks) 2604 2607
+7
include/linux/tick.h
··· 134 134 return cpumask_test_cpu(cpu, tick_nohz_full_mask); 135 135 } 136 136 137 + static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) 138 + { 139 + if (tick_nohz_full_enabled()) 140 + cpumask_or(mask, mask, tick_nohz_full_mask); 141 + } 142 + 137 143 extern void __tick_nohz_full_check(void); 138 144 extern void tick_nohz_full_kick(void); 139 145 extern void tick_nohz_full_kick_cpu(int cpu); ··· 148 142 #else 149 143 static inline bool tick_nohz_full_enabled(void) { return false; } 150 144 static inline bool tick_nohz_full_cpu(int cpu) { return false; } 145 + static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } 151 146 static inline void __tick_nohz_full_check(void) { } 152 147 static inline void tick_nohz_full_kick_cpu(int cpu) { } 153 148 static inline void tick_nohz_full_kick(void) { }
+45 -22
kernel/context_tracking.c
··· 30 30 DEFINE_PER_CPU(struct context_tracking, context_tracking); 31 31 EXPORT_SYMBOL_GPL(context_tracking); 32 32 33 - void context_tracking_cpu_set(int cpu) 33 + static bool context_tracking_recursion_enter(void) 34 34 { 35 - if (!per_cpu(context_tracking.active, cpu)) { 36 - per_cpu(context_tracking.active, cpu) = true; 37 - static_key_slow_inc(&context_tracking_enabled); 38 - } 35 + int recursion; 36 + 37 + recursion = __this_cpu_inc_return(context_tracking.recursion); 38 + if (recursion == 1) 39 + return true; 40 + 41 + WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion); 42 + __this_cpu_dec(context_tracking.recursion); 43 + 44 + return false; 45 + } 46 + 47 + static void context_tracking_recursion_exit(void) 48 + { 49 + __this_cpu_dec(context_tracking.recursion); 39 50 } 40 51 41 52 /** ··· 86 75 WARN_ON_ONCE(!current->mm); 87 76 88 77 local_irq_save(flags); 78 + if (!context_tracking_recursion_enter()) 79 + goto out_irq_restore; 80 + 89 81 if ( __this_cpu_read(context_tracking.state) != state) { 90 82 if (__this_cpu_read(context_tracking.active)) { 91 83 /* ··· 119 105 */ 120 106 __this_cpu_write(context_tracking.state, state); 121 107 } 108 + context_tracking_recursion_exit(); 109 + out_irq_restore: 122 110 local_irq_restore(flags); 123 111 } 124 112 NOKPROBE_SYMBOL(context_tracking_enter); ··· 155 139 return; 156 140 157 141 local_irq_save(flags); 142 + if (!context_tracking_recursion_enter()) 143 + goto out_irq_restore; 144 + 158 145 if (__this_cpu_read(context_tracking.state) == state) { 159 146 if (__this_cpu_read(context_tracking.active)) { 160 147 /* ··· 172 153 } 173 154 __this_cpu_write(context_tracking.state, CONTEXT_KERNEL); 174 155 } 156 + context_tracking_recursion_exit(); 157 + out_irq_restore: 175 158 local_irq_restore(flags); 176 159 } 177 160 NOKPROBE_SYMBOL(context_tracking_exit); ··· 185 164 } 186 165 NOKPROBE_SYMBOL(context_tracking_user_exit); 187 166 188 - /** 189 - * __context_tracking_task_switch - context switch the syscall callbacks 190 - * @prev: the task that is being switched out 191 - * @next: the task that is being switched in 192 - * 193 - * The context tracking uses the syscall slow path to implement its user-kernel 194 - * boundaries probes on syscalls. This way it doesn't impact the syscall fast 195 - * path on CPUs that don't do context tracking. 196 - * 197 - * But we need to clear the flag on the previous task because it may later 198 - * migrate to some CPU that doesn't do the context tracking. As such the TIF 199 - * flag may not be desired there. 200 - */ 201 - void __context_tracking_task_switch(struct task_struct *prev, 202 - struct task_struct *next) 167 + void __init context_tracking_cpu_set(int cpu) 203 168 { 204 - clear_tsk_thread_flag(prev, TIF_NOHZ); 205 - set_tsk_thread_flag(next, TIF_NOHZ); 169 + static __initdata bool initialized = false; 170 + 171 + if (!per_cpu(context_tracking.active, cpu)) { 172 + per_cpu(context_tracking.active, cpu) = true; 173 + static_key_slow_inc(&context_tracking_enabled); 174 + } 175 + 176 + if (initialized) 177 + return; 178 + 179 + /* 180 + * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork 181 + * This assumes that init is the only task at this early boot stage. 182 + */ 183 + set_tsk_thread_flag(&init_task, TIF_NOHZ); 184 + WARN_ON_ONCE(!tasklist_empty()); 185 + 186 + initialized = true; 206 187 } 207 188 208 189 #ifdef CONFIG_CONTEXT_TRACKING_FORCE
+3 -1
kernel/sched/core.c
··· 2374 2374 */ 2375 2375 spin_release(&rq->lock.dep_map, 1, _THIS_IP_); 2376 2376 2377 - context_tracking_task_switch(prev, next); 2378 2377 /* Here we just switch the register state and the stack. */ 2379 2378 switch_to(prev, next, prev); 2380 2379 barrier(); ··· 7066 7067 7067 7068 alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); 7068 7069 alloc_cpumask_var(&fallback_doms, GFP_KERNEL); 7070 + 7071 + /* nohz_full won't take effect without isolating the cpus. */ 7072 + tick_nohz_full_add_cpus_to(cpu_isolated_map); 7069 7073 7070 7074 sched_init_numa(); 7071 7075