Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

locking/static_keys: Add a new static_key interface

There are various problems and short-comings with the current
static_key interface:

- static_key_{true,false}() read like a branch depending on the key
value, instead of the actual likely/unlikely branch depending on
init value.

- static_key_{true,false}() are, as stated above, tied to the
static_key init values STATIC_KEY_INIT_{TRUE,FALSE}.

- we're limited to the 2 (out of 4) possible options that compile to
a default NOP because that's what our arch_static_branch() assembly
emits.

So provide a new static_key interface:

DEFINE_STATIC_KEY_TRUE(name);
DEFINE_STATIC_KEY_FALSE(name);

Which define a key of different types with an initial true/false
value.

Then allow:

static_branch_likely()
static_branch_unlikely()

to take a key of either type and emit the right instruction for the
case.

This means adding a second arch_static_branch_jump() assembly helper
which emits a JMP per default.

In order to determine the right instruction for the right state,
encode the branch type in the LSB of jump_entry::key.

This is the final step in removing the naming confusion that has led to
a stream of avoidable bugs such as:

a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()")

... but it also allows new static key combinations that will give us
performance enhancements in the subsequent patches.

Tested-by: Rabin Vincent <rabin@rab.in> # arm
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> # ppc
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> # s390
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Peter Zijlstra and committed by
Ingo Molnar
11276d53 706249c2

+299 -45
+18 -9
arch/arm/include/asm/jump_label.h
··· 4 4 #ifndef __ASSEMBLY__ 5 5 6 6 #include <linux/types.h> 7 + #include <asm/unified.h> 7 8 8 9 #define JUMP_LABEL_NOP_SIZE 4 9 10 10 - #ifdef CONFIG_THUMB2_KERNEL 11 - #define JUMP_LABEL_NOP "nop.w" 12 - #else 13 - #define JUMP_LABEL_NOP "nop" 14 - #endif 15 - 16 - static __always_inline bool arch_static_branch(struct static_key *key) 11 + static __always_inline bool arch_static_branch(struct static_key *key, bool branch) 17 12 { 18 13 asm_volatile_goto("1:\n\t" 19 - JUMP_LABEL_NOP "\n\t" 14 + WASM(nop) "\n\t" 20 15 ".pushsection __jump_table, \"aw\"\n\t" 21 16 ".word 1b, %l[l_yes], %c0\n\t" 22 17 ".popsection\n\t" 23 - : : "i" (key) : : l_yes); 18 + : : "i" (&((char *)key)[branch]) : : l_yes); 19 + 20 + return false; 21 + l_yes: 22 + return true; 23 + } 24 + 25 + static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) 26 + { 27 + asm_volatile_goto("1:\n\t" 28 + WASM(b) " %l[l_yes]\n\t" 29 + ".pushsection __jump_table, \"aw\"\n\t" 30 + ".word 1b, %l[l_yes], %c0\n\t" 31 + ".popsection\n\t" 32 + : : "i" (&((char *)key)[branch]) : : l_yes); 24 33 25 34 return false; 26 35 l_yes:
+16 -2
arch/arm64/include/asm/jump_label.h
··· 26 26 27 27 #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE 28 28 29 - static __always_inline bool arch_static_branch(struct static_key *key) 29 + static __always_inline bool arch_static_branch(struct static_key *key, bool branch) 30 30 { 31 31 asm goto("1: nop\n\t" 32 32 ".pushsection __jump_table, \"aw\"\n\t" 33 33 ".align 3\n\t" 34 34 ".quad 1b, %l[l_yes], %c0\n\t" 35 35 ".popsection\n\t" 36 - : : "i"(key) : : l_yes); 36 + : : "i"(&((char *)key)[branch]) : : l_yes); 37 + 38 + return false; 39 + l_yes: 40 + return true; 41 + } 42 + 43 + static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) 44 + { 45 + asm goto("1: b %l[l_yes]\n\t" 46 + ".pushsection __jump_table, \"aw\"\n\t" 47 + ".align 3\n\t" 48 + ".quad 1b, %l[l_yes], %c0\n\t" 49 + ".popsection\n\t" 50 + : : "i"(&((char *)key)[branch]) : : l_yes); 37 51 38 52 return false; 39 53 l_yes:
+17 -2
arch/mips/include/asm/jump_label.h
··· 26 26 #define NOP_INSN "nop" 27 27 #endif 28 28 29 - static __always_inline bool arch_static_branch(struct static_key *key) 29 + static __always_inline bool arch_static_branch(struct static_key *key, bool branch) 30 30 { 31 31 asm_volatile_goto("1:\t" NOP_INSN "\n\t" 32 32 "nop\n\t" 33 33 ".pushsection __jump_table, \"aw\"\n\t" 34 34 WORD_INSN " 1b, %l[l_yes], %0\n\t" 35 35 ".popsection\n\t" 36 - : : "i" (key) : : l_yes); 36 + : : "i" (&((char *)key)[branch]) : : l_yes); 37 + 38 + return false; 39 + l_yes: 40 + return true; 41 + } 42 + 43 + static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) 44 + { 45 + asm_volatile_goto("1:\tj %l[l_yes]\n\t" 46 + "nop\n\t" 47 + ".pushsection __jump_table, \"aw\"\n\t" 48 + WORD_INSN " 1b, %l[l_yes], %0\n\t" 49 + ".popsection\n\t" 50 + : : "i" (&((char *)key)[branch]) : : l_yes); 51 + 37 52 return false; 38 53 l_yes: 39 54 return true;
+17 -2
arch/powerpc/include/asm/jump_label.h
··· 18 18 #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) 19 19 #define JUMP_LABEL_NOP_SIZE 4 20 20 21 - static __always_inline bool arch_static_branch(struct static_key *key) 21 + static __always_inline bool arch_static_branch(struct static_key *key, bool branch) 22 22 { 23 23 asm_volatile_goto("1:\n\t" 24 24 "nop\n\t" 25 25 ".pushsection __jump_table, \"aw\"\n\t" 26 26 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" 27 27 ".popsection \n\t" 28 - : : "i" (key) : : l_yes); 28 + : : "i" (&((char *)key)[branch]) : : l_yes); 29 + 30 + return false; 31 + l_yes: 32 + return true; 33 + } 34 + 35 + static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) 36 + { 37 + asm_volatile_goto("1:\n\t" 38 + "b %l[l_yes]\n\t" 39 + ".pushsection __jump_table, \"aw\"\n\t" 40 + JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" 41 + ".popsection \n\t" 42 + : : "i" (&((char *)key)[branch]) : : l_yes); 43 + 29 44 return false; 30 45 l_yes: 31 46 return true;
+17 -2
arch/s390/include/asm/jump_label.h
··· 12 12 * We use a brcl 0,2 instruction for jump labels at compile time so it 13 13 * can be easily distinguished from a hotpatch generated instruction. 14 14 */ 15 - static __always_inline bool arch_static_branch(struct static_key *key) 15 + static __always_inline bool arch_static_branch(struct static_key *key, bool branch) 16 16 { 17 17 asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n" 18 18 ".pushsection __jump_table, \"aw\"\n" 19 19 ".balign 8\n" 20 20 ".quad 0b, %l[label], %0\n" 21 21 ".popsection\n" 22 - : : "X" (key) : : label); 22 + : : "X" (&((char *)key)[branch]) : : label); 23 + 24 + return false; 25 + label: 26 + return true; 27 + } 28 + 29 + static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) 30 + { 31 + asm_volatile_goto("0: brcl 15, %l[label]\n" 32 + ".pushsection __jump_table, \"aw\"\n" 33 + ".balign 8\n" 34 + ".quad 0b, %l[label], %0\n" 35 + ".popsection\n" 36 + : : "X" (&((char *)key)[branch]) : : label); 37 + 23 38 return false; 24 39 label: 25 40 return true;
+26 -9
arch/sparc/include/asm/jump_label.h
··· 7 7 8 8 #define JUMP_LABEL_NOP_SIZE 4 9 9 10 - static __always_inline bool arch_static_branch(struct static_key *key) 10 + static __always_inline bool arch_static_branch(struct static_key *key, bool branch) 11 11 { 12 - asm_volatile_goto("1:\n\t" 13 - "nop\n\t" 14 - "nop\n\t" 15 - ".pushsection __jump_table, \"aw\"\n\t" 16 - ".align 4\n\t" 17 - ".word 1b, %l[l_yes], %c0\n\t" 18 - ".popsection \n\t" 19 - : : "i" (key) : : l_yes); 12 + asm_volatile_goto("1:\n\t" 13 + "nop\n\t" 14 + "nop\n\t" 15 + ".pushsection __jump_table, \"aw\"\n\t" 16 + ".align 4\n\t" 17 + ".word 1b, %l[l_yes], %c0\n\t" 18 + ".popsection \n\t" 19 + : : "i" (&((char *)key)[branch]) : : l_yes); 20 + 21 + return false; 22 + l_yes: 23 + return true; 24 + } 25 + 26 + static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) 27 + { 28 + asm_volatile_goto("1:\n\t" 29 + "b %l[l_yes]\n\t" 30 + "nop\n\t" 31 + ".pushsection __jump_table, \"aw\"\n\t" 32 + ".align 4\n\t" 33 + ".word 1b, %l[l_yes], %c0\n\t" 34 + ".popsection \n\t" 35 + : : "i" (&((char *)key)[branch]) : : l_yes); 36 + 20 37 return false; 21 38 l_yes: 22 39 return true;
+19 -2
arch/x86/include/asm/jump_label.h
··· 16 16 # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC 17 17 #endif 18 18 19 - static __always_inline bool arch_static_branch(struct static_key *key) 19 + static __always_inline bool arch_static_branch(struct static_key *key, bool branch) 20 20 { 21 21 asm_volatile_goto("1:" 22 22 ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" ··· 24 24 _ASM_ALIGN "\n\t" 25 25 _ASM_PTR "1b, %l[l_yes], %c0 \n\t" 26 26 ".popsection \n\t" 27 - : : "i" (key) : : l_yes); 27 + : : "i" (&((char *)key)[branch]) : : l_yes); 28 + 29 + return false; 30 + l_yes: 31 + return true; 32 + } 33 + 34 + static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) 35 + { 36 + asm_volatile_goto("1:" 37 + ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" 38 + "2:\n\t" 39 + ".pushsection __jump_table, \"aw\" \n\t" 40 + _ASM_ALIGN "\n\t" 41 + _ASM_PTR "1b, %l[l_yes], %c0 \n\t" 42 + ".popsection \n\t" 43 + : : "i" (&((char *)key)[branch]) : : l_yes); 44 + 28 45 return false; 29 46 l_yes: 30 47 return true;
+139 -10
include/linux/jump_label.h
··· 107 107 108 108 static __always_inline bool static_key_false(struct static_key *key) 109 109 { 110 - return arch_static_branch(key); 110 + return arch_static_branch(key, false); 111 111 } 112 112 113 113 static __always_inline bool static_key_true(struct static_key *key) 114 114 { 115 - return !static_key_false(key); 115 + return !arch_static_branch(key, true); 116 116 } 117 117 118 118 extern struct jump_entry __start___jump_table[]; ··· 130 130 extern void static_key_slow_dec(struct static_key *key); 131 131 extern void jump_label_apply_nops(struct module *mod); 132 132 133 - #define STATIC_KEY_INIT_TRUE ((struct static_key) \ 133 + #define STATIC_KEY_INIT_TRUE \ 134 134 { .enabled = ATOMIC_INIT(1), \ 135 - .entries = (void *)JUMP_TYPE_TRUE }) 136 - #define STATIC_KEY_INIT_FALSE ((struct static_key) \ 135 + .entries = (void *)JUMP_TYPE_TRUE } 136 + #define STATIC_KEY_INIT_FALSE \ 137 137 { .enabled = ATOMIC_INIT(0), \ 138 - .entries = (void *)JUMP_TYPE_FALSE }) 138 + .entries = (void *)JUMP_TYPE_FALSE } 139 139 140 140 #else /* !HAVE_JUMP_LABEL */ 141 141 ··· 183 183 return 0; 184 184 } 185 185 186 - #define STATIC_KEY_INIT_TRUE ((struct static_key) \ 187 - { .enabled = ATOMIC_INIT(1) }) 188 - #define STATIC_KEY_INIT_FALSE ((struct static_key) \ 189 - { .enabled = ATOMIC_INIT(0) }) 186 + #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } 187 + #define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } 190 188 191 189 #endif /* HAVE_JUMP_LABEL */ 192 190 ··· 215 217 if (count) 216 218 static_key_slow_dec(key); 217 219 } 220 + 221 + /* -------------------------------------------------------------------------- */ 222 + 223 + /* 224 + * Two type wrappers around static_key, such that we can use compile time 225 + * type differentiation to emit the right code. 226 + * 227 + * All the below code is macros in order to play type games. 228 + */ 229 + 230 + struct static_key_true { 231 + struct static_key key; 232 + }; 233 + 234 + struct static_key_false { 235 + struct static_key key; 236 + }; 237 + 238 + #define STATIC_KEY_TRUE_INIT (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE, } 239 + #define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, } 240 + 241 + #define DEFINE_STATIC_KEY_TRUE(name) \ 242 + struct static_key_true name = STATIC_KEY_TRUE_INIT 243 + 244 + #define DEFINE_STATIC_KEY_FALSE(name) \ 245 + struct static_key_false name = STATIC_KEY_FALSE_INIT 246 + 247 + #ifdef HAVE_JUMP_LABEL 248 + 249 + /* 250 + * Combine the right initial value (type) with the right branch order 251 + * to generate the desired result. 252 + * 253 + * 254 + * type\branch| likely (1) | unlikely (0) 255 + * -----------+-----------------------+------------------ 256 + * | | 257 + * true (1) | ... | ... 258 + * | NOP | JMP L 259 + * | <br-stmts> | 1: ... 260 + * | L: ... | 261 + * | | 262 + * | | L: <br-stmts> 263 + * | | jmp 1b 264 + * | | 265 + * -----------+-----------------------+------------------ 266 + * | | 267 + * false (0) | ... | ... 268 + * | JMP L | NOP 269 + * | <br-stmts> | 1: ... 270 + * | L: ... | 271 + * | | 272 + * | | L: <br-stmts> 273 + * | | jmp 1b 274 + * | | 275 + * -----------+-----------------------+------------------ 276 + * 277 + * The initial value is encoded in the LSB of static_key::entries, 278 + * type: 0 = false, 1 = true. 279 + * 280 + * The branch type is encoded in the LSB of jump_entry::key, 281 + * branch: 0 = unlikely, 1 = likely. 282 + * 283 + * This gives the following logic table: 284 + * 285 + * enabled type branch instuction 286 + * -----------------------------+----------- 287 + * 0 0 0 | NOP 288 + * 0 0 1 | JMP 289 + * 0 1 0 | NOP 290 + * 0 1 1 | JMP 291 + * 292 + * 1 0 0 | JMP 293 + * 1 0 1 | NOP 294 + * 1 1 0 | JMP 295 + * 1 1 1 | NOP 296 + * 297 + * Which gives the following functions: 298 + * 299 + * dynamic: instruction = enabled ^ branch 300 + * static: instruction = type ^ branch 301 + * 302 + * See jump_label_type() / jump_label_init_type(). 303 + */ 304 + 305 + extern bool ____wrong_branch_error(void); 306 + 307 + #define static_branch_likely(x) \ 308 + ({ \ 309 + bool branch; \ 310 + if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ 311 + branch = !arch_static_branch(&(x)->key, true); \ 312 + else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ 313 + branch = !arch_static_branch_jump(&(x)->key, true); \ 314 + else \ 315 + branch = ____wrong_branch_error(); \ 316 + branch; \ 317 + }) 318 + 319 + #define static_branch_unlikely(x) \ 320 + ({ \ 321 + bool branch; \ 322 + if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ 323 + branch = arch_static_branch_jump(&(x)->key, false); \ 324 + else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ 325 + branch = arch_static_branch(&(x)->key, false); \ 326 + else \ 327 + branch = ____wrong_branch_error(); \ 328 + branch; \ 329 + }) 330 + 331 + #else /* !HAVE_JUMP_LABEL */ 332 + 333 + #define static_branch_likely(x) likely(static_key_enabled(&(x)->key)) 334 + #define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key)) 335 + 336 + #endif /* HAVE_JUMP_LABEL */ 337 + 338 + /* 339 + * Advanced usage; refcount, branch is enabled when: count != 0 340 + */ 341 + 342 + #define static_branch_inc(x) static_key_slow_inc(&(x)->key) 343 + #define static_branch_dec(x) static_key_slow_dec(&(x)->key) 344 + 345 + /* 346 + * Normal usage; boolean enable/disable. 347 + */ 348 + 349 + #define static_branch_enable(x) static_key_enable(&(x)->key) 350 + #define static_branch_disable(x) static_key_disable(&(x)->key) 218 351 219 352 #endif /* _LINUX_JUMP_LABEL_H */ 220 353
+30 -7
kernel/jump_label.c
··· 165 165 166 166 static inline struct static_key *jump_entry_key(struct jump_entry *entry) 167 167 { 168 - return (struct static_key *)((unsigned long)entry->key); 168 + return (struct static_key *)((unsigned long)entry->key & ~1UL); 169 + } 170 + 171 + static bool jump_entry_branch(struct jump_entry *entry) 172 + { 173 + return (unsigned long)entry->key & 1UL; 169 174 } 170 175 171 176 static enum jump_label_type jump_label_type(struct jump_entry *entry) 172 177 { 173 178 struct static_key *key = jump_entry_key(entry); 174 179 bool enabled = static_key_enabled(key); 175 - bool type = static_key_type(key); 180 + bool branch = jump_entry_branch(entry); 176 181 177 - return enabled ^ type; 182 + /* See the comment in linux/jump_label.h */ 183 + return enabled ^ branch; 178 184 } 179 185 180 186 static void __jump_label_update(struct static_key *key, ··· 211 205 for (iter = iter_start; iter < iter_stop; iter++) { 212 206 struct static_key *iterk; 213 207 214 - arch_jump_label_transform_static(iter, jump_label_type(iter)); 208 + /* rewrite NOPs */ 209 + if (jump_label_type(iter) == JUMP_LABEL_NOP) 210 + arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); 211 + 215 212 iterk = jump_entry_key(iter); 216 213 if (iterk == key) 217 214 continue; ··· 233 224 } 234 225 235 226 #ifdef CONFIG_MODULES 227 + 228 + static enum jump_label_type jump_label_init_type(struct jump_entry *entry) 229 + { 230 + struct static_key *key = jump_entry_key(entry); 231 + bool type = static_key_type(key); 232 + bool branch = jump_entry_branch(entry); 233 + 234 + /* See the comment in linux/jump_label.h */ 235 + return type ^ branch; 236 + } 236 237 237 238 struct static_key_mod { 238 239 struct static_key_mod *next; ··· 295 276 if (iter_start == iter_stop) 296 277 return; 297 278 298 - for (iter = iter_start; iter < iter_stop; iter++) 299 - arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); 279 + for (iter = iter_start; iter < iter_stop; iter++) { 280 + /* Only write NOPs for arch_branch_static(). */ 281 + if (jump_label_init_type(iter) == JUMP_LABEL_NOP) 282 + arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); 283 + } 300 284 } 301 285 302 286 static int jump_label_add_module(struct module *mod) ··· 340 318 jlm->next = key->next; 341 319 key->next = jlm; 342 320 343 - if (jump_label_type(iter) == JUMP_LABEL_JMP) 321 + /* Only update if we've changed from our initial state */ 322 + if (jump_label_type(iter) != jump_label_init_type(iter)) 344 323 __jump_label_update(key, iter, iter_stop); 345 324 } 346 325