riscv: Implement xchg8/16() using Zabha

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

This adds runtime support for Zabha in xchg8/16() operations.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20241103145153.105097-9-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

authored by

Alexandre Ghiti and committed by

Palmer Dabbelt 1 year ago 97ddab7f f7bd2be7

+41 -24

1 changed file

expand all

arch

riscv

include

asm

cmpxchg.h

+41 -24

arch/riscv/include/asm/cmpxchg.h

··· 14 14 #include <asm/insn-def.h> 15 15 #include <asm/cpufeature-macros.h> 16 16 17 - #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ 18 - ({ \ 19 - u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 20 - ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 21 - ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 22 - << __s; \ 23 - ulong __newx = (ulong)(n) << __s; \ 24 - ulong __retx; \ 25 - ulong __rc; \ 26 - \ 27 - __asm__ __volatile__ ( \ 28 - prepend \ 29 - "0: lr.w %0, %2\n" \ 30 - " and %1, %0, %z4\n" \ 31 - " or %1, %1, %z3\n" \ 32 - " sc.w" sc_sfx " %1, %1, %2\n" \ 33 - " bnez %1, 0b\n" \ 34 - append \ 35 - : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 36 - : "rJ" (__newx), "rJ" (~__mask) \ 37 - : "memory"); \ 38 - \ 39 - r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 17 + #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ 18 + swap_append, r, p, n) \ 19 + ({ \ 20 + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 21 + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \ 22 + __asm__ __volatile__ ( \ 23 + prepend \ 24 + " amoswap" swap_sfx " %0, %z2, %1\n" \ 25 + swap_append \ 26 + : "=&r" (r), "+A" (*(p)) \ 27 + : "rJ" (n) \ 28 + : "memory"); \ 29 + } else { \ 30 + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 31 + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 32 + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 33 + << __s; \ 34 + ulong __newx = (ulong)(n) << __s; \ 35 + ulong __retx; \ 36 + ulong __rc; \ 37 + \ 38 + __asm__ __volatile__ ( \ 39 + prepend \ 40 + "0: lr.w %0, %2\n" \ 41 + " and %1, %0, %z4\n" \ 42 + " or %1, %1, %z3\n" \ 43 + " sc.w" sc_sfx " %1, %1, %2\n" \ 44 + " bnez %1, 0b\n" \ 45 + sc_append \ 46 + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 47 + : "rJ" (__newx), "rJ" (~__mask) \ 48 + : "memory"); \ 49 + \ 50 + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 51 + } \ 40 52 }) 41 53 42 54 #define __arch_xchg(sfx, prepend, append, r, p, n) \ ··· 71 59 \ 72 60 switch (sizeof(*__ptr)) { \ 73 61 case 1: \ 62 + __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \ 63 + prepend, sc_append, swap_append, \ 64 + __ret, __ptr, __new); \ 65 + break; \ 74 66 case 2: \ 75 - __arch_xchg_masked(sc_sfx, prepend, sc_append, \ 67 + __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \ 68 + prepend, sc_append, swap_append, \ 76 69 __ret, __ptr, __new); \ 77 70 break; \ 78 71 case 4: \