Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

riscv: Implement arch_cmpxchg128() using Zacas

Now that Zacas is supported in the kernel, let's use the double word
atomic version of amocas to improve the SLUB allocator.

Note that we have to select fixed registers, otherwise gcc fails to pick
even registers and then produces a reserved encoding which fails to
assemble.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20241103145153.105097-8-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

authored by

Alexandre Ghiti and committed by
Palmer Dabbelt
f7bd2be7 6116e22e

+39
+1
arch/riscv/Kconfig
··· 115 115 select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO 116 116 select HARDIRQS_SW_RESEND 117 117 select HAS_IOPORT if MMU 118 + select HAVE_ALIGNED_STRUCT_PAGE 118 119 select HAVE_ARCH_AUDITSYSCALL 119 120 select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP 120 121 select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT
+38
arch/riscv/include/asm/cmpxchg.h
··· 296 296 arch_cmpxchg_release((ptr), (o), (n)); \ 297 297 }) 298 298 299 + #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) 300 + 301 + #define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) 302 + 303 + union __u128_halves { 304 + u128 full; 305 + struct { 306 + u64 low, high; 307 + }; 308 + }; 309 + 310 + #define __arch_cmpxchg128(p, o, n, cas_sfx) \ 311 + ({ \ 312 + __typeof__(*(p)) __o = (o); \ 313 + union __u128_halves __hn = { .full = (n) }; \ 314 + union __u128_halves __ho = { .full = (__o) }; \ 315 + register unsigned long t1 asm ("t1") = __hn.low; \ 316 + register unsigned long t2 asm ("t2") = __hn.high; \ 317 + register unsigned long t3 asm ("t3") = __ho.low; \ 318 + register unsigned long t4 asm ("t4") = __ho.high; \ 319 + \ 320 + __asm__ __volatile__ ( \ 321 + " amocas.q" cas_sfx " %0, %z3, %2" \ 322 + : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \ 323 + : "rJ" (t1), "rJ" (t2) \ 324 + : "memory"); \ 325 + \ 326 + ((u128)t4 << 64) | t3; \ 327 + }) 328 + 329 + #define arch_cmpxchg128(ptr, o, n) \ 330 + __arch_cmpxchg128((ptr), (o), (n), ".aqrl") 331 + 332 + #define arch_cmpxchg128_local(ptr, o, n) \ 333 + __arch_cmpxchg128((ptr), (o), (n), "") 334 + 335 + #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */ 336 + 299 337 #ifdef CONFIG_RISCV_ISA_ZAWRS 300 338 /* 301 339 * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to