Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/percpu: Clean up percpu_cmpxchg_op()

The core percpu macros already have a switch on the data size, so the switch
in the x86 code is redundant and produces more dead code.

Also use appropriate types for the width of the instructions. This avoids
errors when compiling with Clang.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Dennis Zhou <dennis@kernel.org>
Link: https://lkml.kernel.org/r/20200720204925.3654302-9-ndesaulniers@google.com

authored by

Brian Gerst and committed by
Thomas Gleixner
ebcd580b 73ca542f

+18 -40
+18 -40
arch/x86/include/asm/percpu.h
··· 236 236 * cmpxchg has no such implied lock semantics as a result it is much 237 237 * more efficient for cpu local operations. 238 238 */ 239 - #define percpu_cmpxchg_op(qual, var, oval, nval) \ 239 + #define percpu_cmpxchg_op(size, qual, _var, _oval, _nval) \ 240 240 ({ \ 241 - typeof(var) pco_ret__; \ 242 - typeof(var) pco_old__ = (oval); \ 243 - typeof(var) pco_new__ = (nval); \ 244 - switch (sizeof(var)) { \ 245 - case 1: \ 246 - asm qual ("cmpxchgb %2, "__percpu_arg(1) \ 247 - : "=a" (pco_ret__), "+m" (var) \ 248 - : "q" (pco_new__), "0" (pco_old__) \ 249 - : "memory"); \ 250 - break; \ 251 - case 2: \ 252 - asm qual ("cmpxchgw %2, "__percpu_arg(1) \ 253 - : "=a" (pco_ret__), "+m" (var) \ 254 - : "r" (pco_new__), "0" (pco_old__) \ 255 - : "memory"); \ 256 - break; \ 257 - case 4: \ 258 - asm qual ("cmpxchgl %2, "__percpu_arg(1) \ 259 - : "=a" (pco_ret__), "+m" (var) \ 260 - : "r" (pco_new__), "0" (pco_old__) \ 261 - : "memory"); \ 262 - break; \ 263 - case 8: \ 264 - asm qual ("cmpxchgq %2, "__percpu_arg(1) \ 265 - : "=a" (pco_ret__), "+m" (var) \ 266 - : "r" (pco_new__), "0" (pco_old__) \ 267 - : "memory"); \ 268 - break; \ 269 - default: __bad_percpu_size(); \ 270 - } \ 271 - pco_ret__; \ 241 + __pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval); \ 242 + __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \ 243 + asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \ 244 + __percpu_arg([var])) \ 245 + : [oval] "+a" (pco_old__), \ 246 + [var] "+m" (_var) \ 247 + : [nval] __pcpu_reg_##size(, pco_new__) \ 248 + : "memory"); \ 249 + (typeof(_var))(unsigned long) pco_old__; \ 272 250 }) 273 251 274 252 /* ··· 314 336 #define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val) 315 337 #define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val) 316 338 #define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, val) 317 - #define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) 318 - #define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) 319 - #define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) 339 + #define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval) 340 + #define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval) 341 + #define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval) 320 342 321 343 #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val) 322 344 #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val) 323 345 #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(4, volatile, pcp, val) 324 - #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) 325 - #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) 326 - #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) 346 + #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval) 347 + #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval) 348 + #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval) 327 349 328 350 #ifdef CONFIG_X86_CMPXCHG64 329 351 #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ ··· 354 376 #define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val) 355 377 #define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val) 356 378 #define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval) 357 - #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) 379 + #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval) 358 380 359 381 #define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp) 360 382 #define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val) ··· 363 385 #define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val) 364 386 #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val) 365 387 #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval) 366 - #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) 388 + #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) 367 389 368 390 /* 369 391 * Pretty complex macro to generate cmpxchg16 instruction. The instruction