Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390/vdso: Wire up getrandom() vdso implementation

Provide the s390 specific vdso getrandom() architecture backend.

_vdso_rng_data required data is placed within the _vdso_data vvar page,
by using a hardcoded offset larger than vdso_data.

As required the chacha20 implementation does not write to the stack.

The implementation follows more or less the arm64 implementations and
makes use of vector instructions. It has a fallback to the getrandom()
system call for machines where the vector facility is not installed.

The check if the vector facility is installed, as well as an
optimization for machines with the vector-enhancements facility 2, is
implemented with alternatives, avoiding runtime checks.

Note that __kernel_getrandom() is implemented without the vdso user
wrapper which would setup a stack frame for odd cases (aka very old
glibc variants) where the caller has not done that. All callers of
__kernel_getrandom() are required to setup a stack frame, like the C ABI
requires it.

The vdso testcases vdso_test_getrandom and vdso_test_chacha pass.

Benchmark on a z16:

$ ./vdso_test_getrandom bench-single
vdso: 25000000 times in 0.493703559 seconds
syscall: 25000000 times in 6.584025337 seconds

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>

authored by

Heiko Carstens and committed by
Jason A. Donenfeld
b920aa77 c1ae1b4e

+290 -9
+1
arch/s390/Kconfig
··· 243 243 select TRACE_IRQFLAGS_SUPPORT 244 244 select TTY 245 245 select USER_STACKTRACE_SUPPORT 246 + select VDSO_GETRANDOM 246 247 select VIRT_CPU_ACCOUNTING 247 248 select ZONE_DMA 248 249 # Note: keep the above list sorted alphabetically
+22
arch/s390/include/asm/fpu-insn-asm.h
··· 407 407 MRXBOPC 0, 0x0E, v1 408 408 .endm 409 409 410 + /* VECTOR STORE BYTE REVERSED ELEMENTS */ 411 + .macro VSTBR vr1, disp, index="%r0", base, m 412 + VX_NUM v1, \vr1 413 + GR_NUM x2, \index 414 + GR_NUM b2, \base 415 + .word 0xE600 | ((v1&15) << 4) | (x2&15) 416 + .word (b2 << 12) | (\disp) 417 + MRXBOPC \m, 0x0E, v1 418 + .endm 419 + .macro VSTBRH vr1, disp, index="%r0", base 420 + VSTBR \vr1, \disp, \index, \base, 1 421 + .endm 422 + .macro VSTBRF vr1, disp, index="%r0", base 423 + VSTBR \vr1, \disp, \index, \base, 2 424 + .endm 425 + .macro VSTBRG vr1, disp, index="%r0", base 426 + VSTBR \vr1, \disp, \index, \base, 3 427 + .endm 428 + .macro VSTBRQ vr1, disp, index="%r0", base 429 + VSTBR \vr1, \disp, \index, \base, 4 430 + .endm 431 + 410 432 /* VECTOR STORE MULTIPLE */ 411 433 .macro VSTM vfrom, vto, disp, base, hint=3 412 434 VX_NUM v1, \vfrom
+40
arch/s390/include/asm/vdso/getrandom.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef __ASM_VDSO_GETRANDOM_H 4 + #define __ASM_VDSO_GETRANDOM_H 5 + 6 + #ifndef __ASSEMBLY__ 7 + 8 + #include <vdso/datapage.h> 9 + #include <asm/vdso/vsyscall.h> 10 + #include <asm/syscall.h> 11 + #include <asm/unistd.h> 12 + #include <asm/page.h> 13 + 14 + /** 15 + * getrandom_syscall - Invoke the getrandom() syscall. 16 + * @buffer: Destination buffer to fill with random bytes. 17 + * @len: Size of @buffer in bytes. 18 + * @flags: Zero or more GRND_* flags. 19 + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. 20 + */ 21 + static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) 22 + { 23 + return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags); 24 + } 25 + 26 + static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) 27 + { 28 + /* 29 + * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace 30 + * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ 31 + * PAGE_OFFSET points to the real VVAR page. 32 + */ 33 + if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) 34 + return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; 35 + return &_vdso_rng_data; 36 + } 37 + 38 + #endif /* !__ASSEMBLY__ */ 39 + 40 + #endif /* __ASM_VDSO_GETRANDOM_H */
+15
arch/s390/include/asm/vdso/vsyscall.h
··· 2 2 #ifndef __ASM_VDSO_VSYSCALL_H 3 3 #define __ASM_VDSO_VSYSCALL_H 4 4 5 + #define __VDSO_RND_DATA_OFFSET 768 6 + 5 7 #ifndef __ASSEMBLY__ 6 8 7 9 #include <linux/hrtimer.h> 8 10 #include <linux/timekeeper_internal.h> 9 11 #include <vdso/datapage.h> 10 12 #include <asm/vdso.h> 13 + 14 + enum vvar_pages { 15 + VVAR_DATA_PAGE_OFFSET, 16 + VVAR_TIMENS_PAGE_OFFSET, 17 + VVAR_NR_PAGES 18 + }; 19 + 11 20 /* 12 21 * Update the vDSO data page to keep in sync with kernel timekeeping. 13 22 */ ··· 26 17 return vdso_data; 27 18 } 28 19 #define __arch_get_k_vdso_data __s390_get_k_vdso_data 20 + 21 + static __always_inline struct vdso_rng_data *__s390_get_k_vdso_rnd_data(void) 22 + { 23 + return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; 24 + } 25 + #define __arch_get_k_vdso_rng_data __s390_get_k_vdso_rnd_data 29 26 30 27 /* The asm-generic header needs to be included after the definitions above */ 31 28 #include <asm-generic/vdso/vsyscall.h>
+1 -6
arch/s390/kernel/vdso.c
··· 19 19 #include <linux/time_namespace.h> 20 20 #include <linux/random.h> 21 21 #include <vdso/datapage.h> 22 + #include <asm/vdso/vsyscall.h> 22 23 #include <asm/alternative.h> 23 24 #include <asm/vdso.h> 24 25 ··· 31 30 static union vdso_data_store vdso_data_store __page_aligned_data; 32 31 33 32 struct vdso_data *vdso_data = vdso_data_store.data; 34 - 35 - enum vvar_pages { 36 - VVAR_DATA_PAGE_OFFSET, 37 - VVAR_TIMENS_PAGE_OFFSET, 38 - VVAR_NR_PAGES, 39 - }; 40 33 41 34 #ifdef CONFIG_TIME_NS 42 35 struct vdso_data *arch_get_vdso_data(void *vvar_page)
+7 -2
arch/s390/kernel/vdso64/Makefile
··· 3 3 4 4 # Include the generic Makefile to check the built vdso. 5 5 include $(srctree)/lib/vdso/Makefile 6 - obj-vdso64 = vdso_user_wrapper.o note.o 7 - obj-cvdso64 = vdso64_generic.o getcpu.o 6 + obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o 7 + obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o 8 8 VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK) 9 9 CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) 10 + CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE) 10 11 CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE) 12 + 13 + ifneq ($(c-getrandom-y),) 14 + CFLAGS_vgetrandom.o += -include $(c-getrandom-y) 15 + endif 11 16 12 17 # Build rules 13 18
+1
arch/s390/kernel/vdso64/vdso.h
··· 10 10 int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); 11 11 int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); 12 12 int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts); 13 + ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); 13 14 14 15 #endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */
+3
arch/s390/kernel/vdso64/vdso64.lds.S
··· 4 4 * library 5 5 */ 6 6 7 + #include <asm/vdso/vsyscall.h> 7 8 #include <asm/page.h> 8 9 #include <asm/vdso.h> 9 10 ··· 14 13 SECTIONS 15 14 { 16 15 PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); 16 + PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); 17 17 #ifdef CONFIG_TIME_NS 18 18 PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); 19 19 #endif ··· 146 144 __kernel_restart_syscall; 147 145 __kernel_rt_sigreturn; 148 146 __kernel_sigreturn; 147 + __kernel_getrandom; 149 148 local: *; 150 149 }; 151 150 }
+185
arch/s390/kernel/vdso64/vgetrandom-chacha.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #include <linux/linkage.h> 4 + #include <asm/alternative.h> 5 + #include <asm/fpu-insn.h> 6 + 7 + #define STATE0 %v0 8 + #define STATE1 %v1 9 + #define STATE2 %v2 10 + #define STATE3 %v3 11 + #define COPY0 %v4 12 + #define COPY1 %v5 13 + #define COPY2 %v6 14 + #define COPY3 %v7 15 + #define PERM4 %v16 16 + #define PERM8 %v17 17 + #define PERM12 %v18 18 + #define BEPERM %v19 19 + #define TMP0 %v20 20 + #define TMP1 %v21 21 + #define TMP2 %v22 22 + #define TMP3 %v23 23 + 24 + .section .rodata 25 + 26 + .balign 128 27 + .Lconstants: 28 + .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral 29 + .long 0x04050607,0x08090a0b,0x0c0d0e0f,0x00010203 # rotl 4 bytes 30 + .long 0x08090a0b,0x0c0d0e0f,0x00010203,0x04050607 # rotl 8 bytes 31 + .long 0x0c0d0e0f,0x00010203,0x04050607,0x08090a0b # rotl 12 bytes 32 + .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap 33 + 34 + .text 35 + /* 36 + * s390 ChaCha20 implementation meant for vDSO. Produces a given positive 37 + * number of blocks of output with nonce 0, taking an input key and 8-bytes 38 + * counter. Does not spill to the stack. 39 + * 40 + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, 41 + * const uint8_t *key, 42 + * uint32_t *counter, 43 + * size_t nblocks) 44 + */ 45 + SYM_FUNC_START(__arch_chacha20_blocks_nostack) 46 + larl %r1,.Lconstants 47 + 48 + /* COPY0 = "expand 32-byte k" */ 49 + VL COPY0,0,,%r1 50 + 51 + /* PERM4-PERM12,BEPERM = byte selectors for VPERM */ 52 + VLM PERM4,BEPERM,16,%r1 53 + 54 + /* COPY1,COPY2 = key */ 55 + VLM COPY1,COPY2,0,%r3 56 + 57 + /* COPY3 = counter || zero nonce */ 58 + lg %r3,0(%r4) 59 + VZERO COPY3 60 + VLVGG COPY3,%r3,0 61 + 62 + lghi %r1,0 63 + .Lblock: 64 + VLR STATE0,COPY0 65 + VLR STATE1,COPY1 66 + VLR STATE2,COPY2 67 + VLR STATE3,COPY3 68 + 69 + lghi %r0,10 70 + .Ldoubleround: 71 + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ 72 + VAF STATE0,STATE0,STATE1 73 + VX STATE3,STATE3,STATE0 74 + VERLLF STATE3,STATE3,16 75 + 76 + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */ 77 + VAF STATE2,STATE2,STATE3 78 + VX STATE1,STATE1,STATE2 79 + VERLLF STATE1,STATE1,12 80 + 81 + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */ 82 + VAF STATE0,STATE0,STATE1 83 + VX STATE3,STATE3,STATE0 84 + VERLLF STATE3,STATE3,8 85 + 86 + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */ 87 + VAF STATE2,STATE2,STATE3 88 + VX STATE1,STATE1,STATE2 89 + VERLLF STATE1,STATE1,7 90 + 91 + /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */ 92 + VPERM STATE1,STATE1,STATE1,PERM4 93 + /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ 94 + VPERM STATE2,STATE2,STATE2,PERM8 95 + /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */ 96 + VPERM STATE3,STATE3,STATE3,PERM12 97 + 98 + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ 99 + VAF STATE0,STATE0,STATE1 100 + VX STATE3,STATE3,STATE0 101 + VERLLF STATE3,STATE3,16 102 + 103 + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */ 104 + VAF STATE2,STATE2,STATE3 105 + VX STATE1,STATE1,STATE2 106 + VERLLF STATE1,STATE1,12 107 + 108 + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */ 109 + VAF STATE0,STATE0,STATE1 110 + VX STATE3,STATE3,STATE0 111 + VERLLF STATE3,STATE3,8 112 + 113 + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */ 114 + VAF STATE2,STATE2,STATE3 115 + VX STATE1,STATE1,STATE2 116 + VERLLF STATE1,STATE1,7 117 + 118 + /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */ 119 + VPERM STATE1,STATE1,STATE1,PERM12 120 + /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ 121 + VPERM STATE2,STATE2,STATE2,PERM8 122 + /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */ 123 + VPERM STATE3,STATE3,STATE3,PERM4 124 + brctg %r0,.Ldoubleround 125 + 126 + /* OUTPUT0 = STATE0 + STATE0 */ 127 + VAF STATE0,STATE0,COPY0 128 + /* OUTPUT1 = STATE1 + STATE1 */ 129 + VAF STATE1,STATE1,COPY1 130 + /* OUTPUT2 = STATE2 + STATE2 */ 131 + VAF STATE2,STATE2,COPY2 132 + /* OUTPUT2 = STATE3 + STATE3 */ 133 + VAF STATE3,STATE3,COPY3 134 + 135 + /* 136 + * 32 bit wise little endian store to OUTPUT. If the vector 137 + * enhancement facility 2 is not installed use the slow path. 138 + */ 139 + ALTERNATIVE "brc 0xf,.Lstoreslow", "nop", ALT_FACILITY(148) 140 + VSTBRF STATE0,0,,%r2 141 + VSTBRF STATE1,16,,%r2 142 + VSTBRF STATE2,32,,%r2 143 + VSTBRF STATE3,48,,%r2 144 + .Lstoredone: 145 + 146 + /* ++COPY3.COUNTER */ 147 + /* alsih %r3,1 */ 148 + .insn rilu,0xcc0a00000000,%r3,1 149 + alcr %r3,%r1 150 + VLVGG COPY3,%r3,0 151 + 152 + /* OUTPUT += 64, --NBLOCKS */ 153 + aghi %r2,64 154 + brctg %r5,.Lblock 155 + 156 + /* COUNTER = COPY3.COUNTER */ 157 + stg %r3,0(%r4) 158 + 159 + /* Zero out potentially sensitive regs */ 160 + VZERO STATE0 161 + VZERO STATE1 162 + VZERO STATE2 163 + VZERO STATE3 164 + VZERO COPY1 165 + VZERO COPY2 166 + 167 + /* Early exit if TMP0-TMP3 have not been used */ 168 + ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148) 169 + 170 + VZERO TMP0 171 + VZERO TMP1 172 + VZERO TMP2 173 + VZERO TMP3 174 + 175 + br %r14 176 + 177 + .Lstoreslow: 178 + /* Convert STATE to little endian format and store to OUTPUT */ 179 + VPERM TMP0,STATE0,STATE0,BEPERM 180 + VPERM TMP1,STATE1,STATE1,BEPERM 181 + VPERM TMP2,STATE2,STATE2,BEPERM 182 + VPERM TMP3,STATE3,STATE3,BEPERM 183 + VSTM TMP0,TMP3,0,%r2 184 + j .Lstoredone 185 + SYM_FUNC_END(__arch_chacha20_blocks_nostack)
+14
arch/s390/kernel/vdso64/vgetrandom.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <asm/facility.h> 4 + #include <uapi/asm-generic/errno.h> 5 + #include "vdso.h" 6 + 7 + ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) 8 + { 9 + if (test_facility(129)) 10 + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); 11 + if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) 12 + return -ENOSYS; 13 + return getrandom_syscall(buffer, len, flags); 14 + }
+1 -1
tools/testing/selftests/vDSO/Makefile
··· 9 9 TEST_GEN_PROGS += vdso_standalone_test_x86 10 10 endif 11 11 TEST_GEN_PROGS += vdso_test_correctness 12 - ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch arm64 powerpc)) 12 + ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch arm64 powerpc s390)) 13 13 TEST_GEN_PROGS += vdso_test_getrandom 14 14 TEST_GEN_PROGS += vdso_test_chacha 15 15 endif