Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RISC-V: vDSO: Wire up getrandom() vDSO implementation

Hook up the generic vDSO implementation to the generic vDSO getrandom
implementation by providing the required __arch_chacha20_blocks_nostack
and getrandom_syscall implementations. Also wire up the selftests.

The benchmark result:

vdso: 25000000 times in 2.466341333 seconds
libc: 25000000 times in 41.447720005 seconds
syscall: 25000000 times in 41.043926672 seconds

vdso: 25000000 x 256 times in 162.286219353 seconds
libc: 25000000 x 256 times in 2953.855018685 seconds
syscall: 25000000 x 256 times in 2796.268546000 seconds

[ alex: - Fix dynamic relocation
- Squash Nathan's fix https://lore.kernel.org/all/20250423-riscv-fix-compat_vdso-lld-v2-1-b7bbbc244501@kernel.org/
- Add comment from Loongarch ]

Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Link: https://lore.kernel.org/r/20250411024600.16045-1-xry111@xry111.site
Tested-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>

authored by

Xi Ruoyao and committed by
Palmer Dabbelt
ee0d0305 a869b8c2

+308
+1
arch/riscv/Kconfig
··· 219 219 select THREAD_INFO_IN_TASK 220 220 select TRACE_IRQFLAGS_SUPPORT 221 221 select UACCESS_MEMCPY if !MMU 222 + select VDSO_GETRANDOM if HAVE_GENERIC_VDSO 222 223 select USER_STACKTRACE_SUPPORT 223 224 select ZONE_DMA32 if 64BIT 224 225
+30
arch/riscv/include/asm/vdso/getrandom.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved. 4 + */ 5 + #ifndef __ASM_VDSO_GETRANDOM_H 6 + #define __ASM_VDSO_GETRANDOM_H 7 + 8 + #ifndef __ASSEMBLY__ 9 + 10 + #include <asm/unistd.h> 11 + 12 + static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags) 13 + { 14 + register long ret asm("a0"); 15 + register long nr asm("a7") = __NR_getrandom; 16 + register void *buffer asm("a0") = _buffer; 17 + register size_t len asm("a1") = _len; 18 + register unsigned int flags asm("a2") = _flags; 19 + 20 + asm volatile ("ecall\n" 21 + : "+r" (ret) 22 + : "r" (nr), "r" (buffer), "r" (len), "r" (flags) 23 + : "memory"); 24 + 25 + return ret; 26 + } 27 + 28 + #endif /* !__ASSEMBLY__ */ 29 + 30 + #endif /* __ASM_VDSO_GETRANDOM_H */
+13
arch/riscv/kernel/vdso/Makefile
··· 13 13 vdso-syms += hwprobe 14 14 vdso-syms += sys_hwprobe 15 15 16 + ifdef CONFIG_VDSO_GETRANDOM 17 + vdso-syms += getrandom 18 + endif 19 + 16 20 # Files to link into the vdso 17 21 obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o 22 + 23 + ifdef CONFIG_VDSO_GETRANDOM 24 + obj-vdso += vgetrandom-chacha.o 25 + endif 18 26 19 27 ccflags-y := -fno-stack-protector 20 28 ccflags-y += -DDISABLE_BRANCH_PROFILING ··· 30 22 31 23 ifneq ($(c-gettimeofday-y),) 32 24 CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) 25 + endif 26 + 27 + ifneq ($(c-getrandom-y),) 28 + CFLAGS_getrandom.o += -fPIC -include $(c-getrandom-y) 33 29 endif 34 30 35 31 CFLAGS_hwprobe.o += -fPIC ··· 50 38 51 39 # Disable -pg to prevent insert call site 52 40 CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) 41 + CFLAGS_REMOVE_getrandom.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) 53 42 CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) 54 43 55 44 # Force dependency
+10
arch/riscv/kernel/vdso/getrandom.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved. 4 + */ 5 + #include <linux/types.h> 6 + 7 + ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) 8 + { 9 + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); 10 + }
+3
arch/riscv/kernel/vdso/vdso.lds.S
··· 80 80 #ifndef COMPAT_VDSO 81 81 __vdso_riscv_hwprobe; 82 82 #endif 83 + #if defined(CONFIG_VDSO_GETRANDOM) && !defined(COMPAT_VDSO) 84 + __vdso_getrandom; 85 + #endif 83 86 local: *; 84 87 }; 85 88 }
+249
arch/riscv/kernel/vdso/vgetrandom-chacha.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2025 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved. 4 + * 5 + * Based on arch/loongarch/vdso/vgetrandom-chacha.S. 6 + */ 7 + 8 + #include <asm/asm.h> 9 + #include <linux/linkage.h> 10 + 11 + .text 12 + 13 + .macro ROTRI rd rs imm 14 + slliw t0, \rs, 32 - \imm 15 + srliw \rd, \rs, \imm 16 + or \rd, \rd, t0 17 + .endm 18 + 19 + .macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3 20 + \op \d0, \d0, \s0 21 + \op \d1, \d1, \s1 22 + \op \d2, \d2, \s2 23 + \op \d3, \d3, \s3 24 + .endm 25 + 26 + /* 27 + * a0: output bytes 28 + * a1: 32-byte key input 29 + * a2: 8-byte counter input/output 30 + * a3: number of 64-byte blocks to write to output 31 + */ 32 + SYM_FUNC_START(__arch_chacha20_blocks_nostack) 33 + 34 + #define output a0 35 + #define key a1 36 + #define counter a2 37 + #define nblocks a3 38 + #define i a4 39 + #define state0 s0 40 + #define state1 s1 41 + #define state2 s2 42 + #define state3 s3 43 + #define state4 s4 44 + #define state5 s5 45 + #define state6 s6 46 + #define state7 s7 47 + #define state8 s8 48 + #define state9 s9 49 + #define state10 s10 50 + #define state11 s11 51 + #define state12 a5 52 + #define state13 a6 53 + #define state14 a7 54 + #define state15 t1 55 + #define cnt t2 56 + #define copy0 t3 57 + #define copy1 t4 58 + #define copy2 t5 59 + #define copy3 t6 60 + 61 + /* Packs to be used with OP_4REG */ 62 + #define line0 state0, state1, state2, state3 63 + #define line1 state4, state5, state6, state7 64 + #define line2 state8, state9, state10, state11 65 + #define line3 state12, state13, state14, state15 66 + 67 + #define line1_perm state5, state6, state7, state4 68 + #define line2_perm state10, state11, state8, state9 69 + #define line3_perm state15, state12, state13, state14 70 + 71 + #define copy copy0, copy1, copy2, copy3 72 + 73 + #define _16 16, 16, 16, 16 74 + #define _20 20, 20, 20, 20 75 + #define _24 24, 24, 24, 24 76 + #define _25 25, 25, 25, 25 77 + 78 + /* 79 + * The ABI requires s0-s9 saved. 80 + * This does not violate the stack-less requirement: no sensitive data 81 + * is spilled onto the stack. 82 + */ 83 + addi sp, sp, -12*SZREG 84 + REG_S s0, (sp) 85 + REG_S s1, SZREG(sp) 86 + REG_S s2, 2*SZREG(sp) 87 + REG_S s3, 3*SZREG(sp) 88 + REG_S s4, 4*SZREG(sp) 89 + REG_S s5, 5*SZREG(sp) 90 + REG_S s6, 6*SZREG(sp) 91 + REG_S s7, 7*SZREG(sp) 92 + REG_S s8, 8*SZREG(sp) 93 + REG_S s9, 9*SZREG(sp) 94 + REG_S s10, 10*SZREG(sp) 95 + REG_S s11, 11*SZREG(sp) 96 + 97 + ld cnt, (counter) 98 + 99 + li copy0, 0x61707865 100 + li copy1, 0x3320646e 101 + li copy2, 0x79622d32 102 + li copy3, 0x6b206574 103 + 104 + .Lblock: 105 + /* state[0,1,2,3] = "expand 32-byte k" */ 106 + mv state0, copy0 107 + mv state1, copy1 108 + mv state2, copy2 109 + mv state3, copy3 110 + 111 + /* state[4,5,..,11] = key */ 112 + lw state4, (key) 113 + lw state5, 4(key) 114 + lw state6, 8(key) 115 + lw state7, 12(key) 116 + lw state8, 16(key) 117 + lw state9, 20(key) 118 + lw state10, 24(key) 119 + lw state11, 28(key) 120 + 121 + /* state[12,13] = counter */ 122 + mv state12, cnt 123 + srli state13, cnt, 32 124 + 125 + /* state[14,15] = 0 */ 126 + mv state14, zero 127 + mv state15, zero 128 + 129 + li i, 10 130 + .Lpermute: 131 + /* odd round */ 132 + OP_4REG addw line0, line1 133 + OP_4REG xor line3, line0 134 + OP_4REG ROTRI line3, _16 135 + 136 + OP_4REG addw line2, line3 137 + OP_4REG xor line1, line2 138 + OP_4REG ROTRI line1, _20 139 + 140 + OP_4REG addw line0, line1 141 + OP_4REG xor line3, line0 142 + OP_4REG ROTRI line3, _24 143 + 144 + OP_4REG addw line2, line3 145 + OP_4REG xor line1, line2 146 + OP_4REG ROTRI line1, _25 147 + 148 + /* even round */ 149 + OP_4REG addw line0, line1_perm 150 + OP_4REG xor line3_perm, line0 151 + OP_4REG ROTRI line3_perm, _16 152 + 153 + OP_4REG addw line2_perm, line3_perm 154 + OP_4REG xor line1_perm, line2_perm 155 + OP_4REG ROTRI line1_perm, _20 156 + 157 + OP_4REG addw line0, line1_perm 158 + OP_4REG xor line3_perm, line0 159 + OP_4REG ROTRI line3_perm, _24 160 + 161 + OP_4REG addw line2_perm, line3_perm 162 + OP_4REG xor line1_perm, line2_perm 163 + OP_4REG ROTRI line1_perm, _25 164 + 165 + addi i, i, -1 166 + bnez i, .Lpermute 167 + 168 + /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ 169 + OP_4REG addw line0, copy 170 + sw state0, (output) 171 + sw state1, 4(output) 172 + sw state2, 8(output) 173 + sw state3, 12(output) 174 + 175 + /* from now on state[0,1,2,3] are scratch registers */ 176 + 177 + /* state[0,1,2,3] = lo(key) */ 178 + lw state0, (key) 179 + lw state1, 4(key) 180 + lw state2, 8(key) 181 + lw state3, 12(key) 182 + 183 + /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */ 184 + OP_4REG addw line1, line0 185 + sw state4, 16(output) 186 + sw state5, 20(output) 187 + sw state6, 24(output) 188 + sw state7, 28(output) 189 + 190 + /* state[0,1,2,3] = hi(key) */ 191 + lw state0, 16(key) 192 + lw state1, 20(key) 193 + lw state2, 24(key) 194 + lw state3, 28(key) 195 + 196 + /* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */ 197 + OP_4REG addw line2, line0 198 + sw state8, 32(output) 199 + sw state9, 36(output) 200 + sw state10, 40(output) 201 + sw state11, 44(output) 202 + 203 + /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */ 204 + addw state12, state12, cnt 205 + srli state0, cnt, 32 206 + addw state13, state13, state0 207 + sw state12, 48(output) 208 + sw state13, 52(output) 209 + sw state14, 56(output) 210 + sw state15, 60(output) 211 + 212 + /* ++counter */ 213 + addi cnt, cnt, 1 214 + 215 + /* output += 64 */ 216 + addi output, output, 64 217 + /* --nblocks */ 218 + addi nblocks, nblocks, -1 219 + bnez nblocks, .Lblock 220 + 221 + /* counter = [cnt_lo, cnt_hi] */ 222 + sd cnt, (counter) 223 + 224 + /* Zero out the potentially sensitive regs, in case nothing uses these 225 + * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and 226 + * state[0,...,11] are s0-s11 those we'll restore in the epilogue, we 227 + * only need to zero state[12,...,15]. 228 + */ 229 + mv state12, zero 230 + mv state13, zero 231 + mv state14, zero 232 + mv state15, zero 233 + 234 + REG_L s0, (sp) 235 + REG_L s1, SZREG(sp) 236 + REG_L s2, 2*SZREG(sp) 237 + REG_L s3, 3*SZREG(sp) 238 + REG_L s4, 4*SZREG(sp) 239 + REG_L s5, 5*SZREG(sp) 240 + REG_L s6, 6*SZREG(sp) 241 + REG_L s7, 7*SZREG(sp) 242 + REG_L s8, 8*SZREG(sp) 243 + REG_L s9, 9*SZREG(sp) 244 + REG_L s10, 10*SZREG(sp) 245 + REG_L s11, 11*SZREG(sp) 246 + addi sp, sp, 12*SZREG 247 + 248 + ret 249 + SYM_FUNC_END(__arch_chacha20_blocks_nostack)
+2
tools/testing/selftests/vDSO/vgetrandom-chacha.S
··· 11 11 #include "../../../../arch/loongarch/vdso/vgetrandom-chacha.S" 12 12 #elif defined(__powerpc__) || defined(__powerpc64__) 13 13 #include "../../../../arch/powerpc/kernel/vdso/vgetrandom-chacha.S" 14 + #elif defined(__riscv) && __riscv_xlen == 64 15 + #include "../../../../arch/riscv/kernel/vdso/vgetrandom-chacha.S" 14 16 #elif defined(__s390x__) 15 17 #include "../../../../arch/s390/kernel/vdso64/vgetrandom-chacha.S" 16 18 #elif defined(__x86_64__)