Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: Fix and improve percpu_cmpxchg{8,16}b_double()

They had several problems/shortcomings:

Only the first memory operand was mentioned in the 2x32bit asm()
operands, and 2x64-bit version had a memory clobber. The first
allowed the compiler to not recognize the need to re-load the
data in case it had it cached in some register, and the second
was overly destructive.

The memory operand in the 2x32-bit asm() was declared to only be
an output.

The types of the local copies of the old and new values were
incorrect (as in other per-CPU ops, the types of the per-CPU
variables accessed should be used here, to make sure the
respective types are compatible).

The __dummy variable was pointless (and needlessly initialized
in the 2x32-bit case), given that local copies of the inputs
already exist.

The 2x64-bit variant forced the address of the first object into
%rsi, even though this is needed only for the call to the
emulation function. The real cmpxchg16b can operate on an
memory.

At once also change the return value type to what it really is -
'bool'.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/4EE86D6502000078000679FE@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

Jan Beulich and committed by
Ingo Molnar
cebef5be 969df4b8

+21 -32
+21 -32
arch/x86/include/asm/percpu.h
··· 451 451 #endif /* !CONFIG_M386 */ 452 452 453 453 #ifdef CONFIG_X86_CMPXCHG64 454 - #define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ 454 + #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ 455 455 ({ \ 456 - char __ret; \ 457 - typeof(o1) __o1 = o1; \ 458 - typeof(o1) __n1 = n1; \ 459 - typeof(o2) __o2 = o2; \ 460 - typeof(o2) __n2 = n2; \ 461 - typeof(o2) __dummy = n2; \ 456 + bool __ret; \ 457 + typeof(pcp1) __o1 = (o1), __n1 = (n1); \ 458 + typeof(pcp2) __o2 = (o2), __n2 = (n2); \ 462 459 asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ 463 - : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ 464 - : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ 460 + : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \ 461 + : "b" (__n1), "c" (__n2), "a" (__o1)); \ 465 462 __ret; \ 466 463 }) 467 464 468 - #define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 469 - #define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 470 - #define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) 465 + #define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double 466 + #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double 467 + #define irqsafe_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double 471 468 #endif /* CONFIG_X86_CMPXCHG64 */ 472 469 473 470 /* ··· 505 508 * it in software. The address used in the cmpxchg16 instruction must be 506 509 * aligned to a 16 byte boundary. 507 510 */ 508 - #ifdef CONFIG_SMP 509 - #define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3 510 - #else 511 - #define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2 512 - #endif 513 - #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ 511 + #define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \ 514 512 ({ \ 515 - char __ret; \ 516 - typeof(o1) __o1 = o1; \ 517 - typeof(o1) __n1 = n1; \ 518 - typeof(o2) __o2 = o2; \ 519 - typeof(o2) __n2 = n2; \ 520 - typeof(o2) __dummy; \ 521 - alternative_io(CMPXCHG16B_EMU_CALL, \ 522 - "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ 513 + bool __ret; \ 514 + typeof(pcp1) __o1 = (o1), __n1 = (n1); \ 515 + typeof(pcp2) __o2 = (o2), __n2 = (n2); \ 516 + alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ 517 + "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ 523 518 X86_FEATURE_CX16, \ 524 - ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ 525 - "S" (&pcp1), "b"(__n1), "c"(__n2), \ 526 - "a"(__o1), "d"(__o2) : "memory"); \ 519 + ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ 520 + "+m" (pcp2), "+d" (__o2)), \ 521 + "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ 527 522 __ret; \ 528 523 }) 529 524 530 - #define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 531 - #define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 532 - #define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) 525 + #define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double 526 + #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double 527 + #define irqsafe_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double 533 528 534 529 #endif 535 530