Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390/fpu: improve kernel_fpu_[begin|end]

In case of nested user of the FPU or vector registers in the kernel
the current code uses the mask of the FPU/vector registers of the
previous contexts to decide which registers to save and restore.
E.g. if the previous context used KERNEL_VXR_V0V7 and the next
context wants to use KERNEL_VXR_V24V31 the first 8 vector registers
are stored to the FPU state structure. But this is not necessary
as the next context does not use these registers.

Rework the FPU/vector register save and restore code. The new code
does a few things differently:
1) A lowcore field is used instead of a per-cpu variable.
2) The kernel_fpu_end function now has two parameters just like
kernel_fpu_begin. The register flags are required by both
functions to save / restore the minimal register set.
3) The inline functions kernel_fpu_begin/kernel_fpu_end now do the
update of the register masks. If the user space FPU registers
have already been stored neither save_fpu_regs nor the
__kernel_fpu_begin/__kernel_fpu_end functions have to be called
for the first context. In this case kernel_fpu_begin adds 7
instructions and kernel_fpu_end adds 4 instructions.
3) The inline assemblies in __kernel_fpu_begin / __kernel_fpu_end
to save / restore the vector registers are simplified a bit.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

+148 -210
+1 -1
arch/s390/crypto/crc32-vx.c
··· 67 67 \ 68 68 kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ 69 69 crc = ___crc32_vx(crc, data, aligned); \ 70 - kernel_fpu_end(&vxstate); \ 70 + kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ 71 71 \ 72 72 if (remaining) \ 73 73 crc = ___crc32_sw(crc, data + aligned, remaining); \
+21 -11
arch/s390/include/asm/fpu/api.h
··· 64 64 return rc; 65 65 } 66 66 67 - #define KERNEL_VXR_V0V7 1 68 - #define KERNEL_VXR_V8V15 2 69 - #define KERNEL_VXR_V16V23 4 70 - #define KERNEL_VXR_V24V31 8 71 - #define KERNEL_FPR 16 72 - #define KERNEL_FPC 256 67 + #define KERNEL_FPC 1 68 + #define KERNEL_VXR_V0V7 2 69 + #define KERNEL_VXR_V8V15 4 70 + #define KERNEL_VXR_V16V23 8 71 + #define KERNEL_VXR_V24V31 16 73 72 74 73 #define KERNEL_VXR_LOW (KERNEL_VXR_V0V7|KERNEL_VXR_V8V15) 75 74 #define KERNEL_VXR_MID (KERNEL_VXR_V8V15|KERNEL_VXR_V16V23) 76 75 #define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) 77 76 78 - #define KERNEL_FPU_MASK (KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR) 77 + #define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH) 78 + #define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7) 79 79 80 80 struct kernel_fpu; 81 81 ··· 87 87 * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions. 88 88 */ 89 89 void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags); 90 - void __kernel_fpu_end(struct kernel_fpu *state); 90 + void __kernel_fpu_end(struct kernel_fpu *state, u32 flags); 91 91 92 92 93 93 static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) 94 94 { 95 95 preempt_disable(); 96 - __kernel_fpu_begin(state, flags); 96 + state->mask = S390_lowcore.fpu_flags; 97 + if (!test_cpu_flag(CIF_FPU)) 98 + /* Save user space FPU state and register contents */ 99 + save_fpu_regs(); 100 + else if (state->mask & flags) 101 + /* Save FPU/vector register in-use by the kernel */ 102 + __kernel_fpu_begin(state, flags); 103 + S390_lowcore.fpu_flags |= flags; 97 104 } 98 105 99 - static inline void kernel_fpu_end(struct kernel_fpu *state) 106 + static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags) 100 107 { 101 - __kernel_fpu_end(state); 108 + S390_lowcore.fpu_flags = state->mask; 109 + if (state->mask & flags) 110 + /* Restore FPU/vector register in-use by the kernel */ 111 + __kernel_fpu_end(state, flags); 102 112 preempt_enable(); 103 113 } 104 114
+2 -1
arch/s390/include/asm/lowcore.h
··· 129 129 __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ 130 130 __u64 gmap; /* 0x0398 */ 131 131 __u32 spinlock_lockval; /* 0x03a0 */ 132 - __u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */ 132 + __u32 fpu_flags; /* 0x03a4 */ 133 + __u8 pad_0x03a8[0x0400-0x03a8]; /* 0x03a8 */ 133 134 134 135 /* Per cpu primary space access list */ 135 136 __u32 paste[16]; /* 0x0400 */
+123 -196
arch/s390/kernel/fpu.c
··· 10 10 #include <asm/fpu/types.h> 11 11 #include <asm/fpu/api.h> 12 12 13 - /* 14 - * Per-CPU variable to maintain FPU register ranges that are in use 15 - * by the kernel. 16 - */ 17 - static DEFINE_PER_CPU(u32, kernel_fpu_state); 18 - 19 - #define KERNEL_FPU_STATE_MASK (KERNEL_FPU_MASK|KERNEL_FPC) 20 - 13 + asm(".include \"asm/vx-insn.h\"\n"); 21 14 22 15 void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) 23 16 { 24 - if (!__this_cpu_read(kernel_fpu_state)) { 25 - /* 26 - * Save user space FPU state and register contents. Multiple 27 - * calls because of interruptions do not matter and return 28 - * immediately. This also sets CIF_FPU to lazy restore FP/VX 29 - * register contents when returning to user space. 30 - */ 31 - save_fpu_regs(); 32 - } 33 - 34 - /* Update flags to use the vector facility for KERNEL_FPR */ 35 - if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) { 36 - flags |= KERNEL_VXR_LOW | KERNEL_FPC; 37 - flags &= ~KERNEL_FPR; 38 - } 39 - 40 - /* Save and update current kernel VX state */ 41 - state->mask = __this_cpu_read(kernel_fpu_state); 42 - __this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK); 43 - 44 17 /* 45 - * If this is the first call to __kernel_fpu_begin(), no additional 46 - * work is required. 18 + * Limit the save to the FPU/vector registers already 19 + * in use by the previous context 47 20 */ 48 - if (!(state->mask & KERNEL_FPU_STATE_MASK)) 49 - return; 21 + flags &= state->mask; 50 22 51 - /* 52 - * If KERNEL_FPR is still set, the vector facility is not available 53 - * and, thus, save floating-point control and registers only. 54 - */ 55 - if (state->mask & KERNEL_FPR) { 56 - asm volatile("stfpc %0" : "=Q" (state->fpc)); 57 - asm volatile("std 0,%0" : "=Q" (state->fprs[0])); 58 - asm volatile("std 1,%0" : "=Q" (state->fprs[1])); 59 - asm volatile("std 2,%0" : "=Q" (state->fprs[2])); 60 - asm volatile("std 3,%0" : "=Q" (state->fprs[3])); 61 - asm volatile("std 4,%0" : "=Q" (state->fprs[4])); 62 - asm volatile("std 5,%0" : "=Q" (state->fprs[5])); 63 - asm volatile("std 6,%0" : "=Q" (state->fprs[6])); 64 - asm volatile("std 7,%0" : "=Q" (state->fprs[7])); 65 - asm volatile("std 8,%0" : "=Q" (state->fprs[8])); 66 - asm volatile("std 9,%0" : "=Q" (state->fprs[9])); 67 - asm volatile("std 10,%0" : "=Q" (state->fprs[10])); 68 - asm volatile("std 11,%0" : "=Q" (state->fprs[11])); 69 - asm volatile("std 12,%0" : "=Q" (state->fprs[12])); 70 - asm volatile("std 13,%0" : "=Q" (state->fprs[13])); 71 - asm volatile("std 14,%0" : "=Q" (state->fprs[14])); 72 - asm volatile("std 15,%0" : "=Q" (state->fprs[15])); 73 - return; 74 - } 75 - 76 - /* 77 - * If this is a nested call to __kernel_fpu_begin(), check the saved 78 - * state mask to save and later restore the vector registers that 79 - * are already in use. Let's start with checking floating-point 80 - * controls. 81 - */ 82 - if (state->mask & KERNEL_FPC) 23 + if (flags & KERNEL_FPC) 24 + /* Save floating point control */ 83 25 asm volatile("stfpc %0" : "=m" (state->fpc)); 26 + 27 + if (!MACHINE_HAS_VX) { 28 + if (flags & KERNEL_VXR_V0V7) { 29 + /* Save floating-point registers */ 30 + asm volatile("std 0,%0" : "=Q" (state->fprs[0])); 31 + asm volatile("std 1,%0" : "=Q" (state->fprs[1])); 32 + asm volatile("std 2,%0" : "=Q" (state->fprs[2])); 33 + asm volatile("std 3,%0" : "=Q" (state->fprs[3])); 34 + asm volatile("std 4,%0" : "=Q" (state->fprs[4])); 35 + asm volatile("std 5,%0" : "=Q" (state->fprs[5])); 36 + asm volatile("std 6,%0" : "=Q" (state->fprs[6])); 37 + asm volatile("std 7,%0" : "=Q" (state->fprs[7])); 38 + asm volatile("std 8,%0" : "=Q" (state->fprs[8])); 39 + asm volatile("std 9,%0" : "=Q" (state->fprs[9])); 40 + asm volatile("std 10,%0" : "=Q" (state->fprs[10])); 41 + asm volatile("std 11,%0" : "=Q" (state->fprs[11])); 42 + asm volatile("std 12,%0" : "=Q" (state->fprs[12])); 43 + asm volatile("std 13,%0" : "=Q" (state->fprs[13])); 44 + asm volatile("std 14,%0" : "=Q" (state->fprs[14])); 45 + asm volatile("std 15,%0" : "=Q" (state->fprs[15])); 46 + } 47 + return; 48 + } 84 49 85 50 /* Test and save vector registers */ 86 51 asm volatile ( ··· 53 88 * Test if any vector register must be saved and, if so, 54 89 * test if all register can be saved. 55 90 */ 56 - " tmll %[m],15\n" /* KERNEL_VXR_MASK */ 57 - " jz 20f\n" /* no work -> done */ 58 91 " la 1,%[vxrs]\n" /* load save area */ 59 - " jo 18f\n" /* -> save V0..V31 */ 60 - 92 + " tmll %[m],30\n" /* KERNEL_VXR */ 93 + " jz 7f\n" /* no work -> done */ 94 + " jo 5f\n" /* -> save V0..V31 */ 61 95 /* 62 - * Test if V8..V23 can be saved at once... this speeds up 63 - * for KERNEL_fpu_MID only. Otherwise continue to split the 64 - * range of vector registers into two halves and test them 65 - * separately. 96 + * Test for special case KERNEL_FPU_MID only. In this 97 + * case a vstm V8..V23 is the best instruction 66 98 */ 67 - " tmll %[m],6\n" /* KERNEL_VXR_MID */ 68 - " jo 17f\n" /* -> save V8..V23 */ 69 - 99 + " chi %[m],12\n" /* KERNEL_VXR_MID */ 100 + " jne 0f\n" /* -> save V8..V23 */ 101 + " VSTM 8,23,128,1\n" /* vstm %v8,%v23,128(%r1) */ 102 + " j 7f\n" 70 103 /* Test and save the first half of 16 vector registers */ 71 - "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ 72 - " jz 10f\n" /* -> KERNEL_VXR_HIGH */ 104 + "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */ 105 + " jz 3f\n" /* -> KERNEL_VXR_HIGH */ 73 106 " jo 2f\n" /* 11 -> save V0..V15 */ 74 - " brc 4,3f\n" /* 01 -> save V0..V7 */ 75 - " brc 2,4f\n" /* 10 -> save V8..V15 */ 76 - 107 + " brc 2,1f\n" /* 10 -> save V8..V15 */ 108 + " VSTM 0,7,0,1\n" /* vstm %v0,%v7,0(%r1) */ 109 + " j 3f\n" 110 + "1: VSTM 8,15,128,1\n" /* vstm %v8,%v15,128(%r1) */ 111 + " j 3f\n" 112 + "2: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */ 77 113 /* Test and save the second half of 16 vector registers */ 78 - "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ 79 - " jo 19f\n" /* 11 -> save V16..V31 */ 80 - " brc 4,11f\n" /* 01 -> save V16..V23 */ 81 - " brc 2,12f\n" /* 10 -> save V24..V31 */ 82 - " j 20f\n" /* 00 -> done */ 83 - 84 - /* 85 - * Below are the vstm combinations to save multiple vector 86 - * registers at once. 87 - */ 88 - "2: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ 89 - " j 10b\n" /* -> VXR_HIGH */ 90 - "3: .word 0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */ 91 - " j 10b\n" /* -> VXR_HIGH */ 92 - "4: .word 0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */ 93 - " j 10b\n" /* -> VXR_HIGH */ 94 - "\n" 95 - "11: .word 0xe707,0x1100,0x0c3e\n" /* vstm 16,23,256(1) */ 96 - " j 20f\n" /* -> done */ 97 - "12: .word 0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */ 98 - " j 20f\n" /* -> done */ 99 - "\n" 100 - "17: .word 0xe787,0x1080,0x043e\n" /* vstm 8,23,128(1) */ 101 - " nill %[m],249\n" /* m &= ~VXR_MID */ 102 - " j 1b\n" /* -> VXR_LOW */ 103 - "\n" 104 - "18: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ 105 - "19: .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ 106 - "20:" 114 + "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */ 115 + " jz 7f\n" 116 + " jo 6f\n" /* 11 -> save V16..V31 */ 117 + " brc 2,4f\n" /* 10 -> save V24..V31 */ 118 + " VSTM 16,23,256,1\n" /* vstm %v16,%v23,256(%r1) */ 119 + " j 7f\n" 120 + "4: VSTM 24,31,384,1\n" /* vstm %v24,%v31,384(%r1) */ 121 + " j 7f\n" 122 + "5: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */ 123 + "6: VSTM 16,31,256,1\n" /* vstm %v16,%v31,256(%r1) */ 124 + "7:" 107 125 : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs) 108 - : [m] "d" (state->mask) 126 + : [m] "d" (flags) 109 127 : "1", "cc"); 110 128 } 111 129 EXPORT_SYMBOL(__kernel_fpu_begin); 112 130 113 - void __kernel_fpu_end(struct kernel_fpu *state) 131 + void __kernel_fpu_end(struct kernel_fpu *state, u32 flags) 114 132 { 115 - /* Just update the per-CPU state if there is nothing to restore */ 116 - if (!(state->mask & KERNEL_FPU_STATE_MASK)) 117 - goto update_fpu_state; 118 - 119 133 /* 120 - * If KERNEL_FPR is specified, the vector facility is not available 121 - * and, thus, restore floating-point control and registers only. 134 + * Limit the restore to the FPU/vector registers of the 135 + * previous context that have been overwritte by the 136 + * current context 122 137 */ 123 - if (state->mask & KERNEL_FPR) { 124 - asm volatile("lfpc %0" : : "Q" (state->fpc)); 125 - asm volatile("ld 0,%0" : : "Q" (state->fprs[0])); 126 - asm volatile("ld 1,%0" : : "Q" (state->fprs[1])); 127 - asm volatile("ld 2,%0" : : "Q" (state->fprs[2])); 128 - asm volatile("ld 3,%0" : : "Q" (state->fprs[3])); 129 - asm volatile("ld 4,%0" : : "Q" (state->fprs[4])); 130 - asm volatile("ld 5,%0" : : "Q" (state->fprs[5])); 131 - asm volatile("ld 6,%0" : : "Q" (state->fprs[6])); 132 - asm volatile("ld 7,%0" : : "Q" (state->fprs[7])); 133 - asm volatile("ld 8,%0" : : "Q" (state->fprs[8])); 134 - asm volatile("ld 9,%0" : : "Q" (state->fprs[9])); 135 - asm volatile("ld 10,%0" : : "Q" (state->fprs[10])); 136 - asm volatile("ld 11,%0" : : "Q" (state->fprs[11])); 137 - asm volatile("ld 12,%0" : : "Q" (state->fprs[12])); 138 - asm volatile("ld 13,%0" : : "Q" (state->fprs[13])); 139 - asm volatile("ld 14,%0" : : "Q" (state->fprs[14])); 140 - asm volatile("ld 15,%0" : : "Q" (state->fprs[15])); 141 - goto update_fpu_state; 142 - } 138 + flags &= state->mask; 143 139 144 - /* Test and restore floating-point controls */ 145 - if (state->mask & KERNEL_FPC) 140 + if (flags & KERNEL_FPC) 141 + /* Restore floating-point controls */ 146 142 asm volatile("lfpc %0" : : "Q" (state->fpc)); 143 + 144 + if (!MACHINE_HAS_VX) { 145 + if (flags & KERNEL_VXR_V0V7) { 146 + /* Restore floating-point registers */ 147 + asm volatile("ld 0,%0" : : "Q" (state->fprs[0])); 148 + asm volatile("ld 1,%0" : : "Q" (state->fprs[1])); 149 + asm volatile("ld 2,%0" : : "Q" (state->fprs[2])); 150 + asm volatile("ld 3,%0" : : "Q" (state->fprs[3])); 151 + asm volatile("ld 4,%0" : : "Q" (state->fprs[4])); 152 + asm volatile("ld 5,%0" : : "Q" (state->fprs[5])); 153 + asm volatile("ld 6,%0" : : "Q" (state->fprs[6])); 154 + asm volatile("ld 7,%0" : : "Q" (state->fprs[7])); 155 + asm volatile("ld 8,%0" : : "Q" (state->fprs[8])); 156 + asm volatile("ld 9,%0" : : "Q" (state->fprs[9])); 157 + asm volatile("ld 10,%0" : : "Q" (state->fprs[10])); 158 + asm volatile("ld 11,%0" : : "Q" (state->fprs[11])); 159 + asm volatile("ld 12,%0" : : "Q" (state->fprs[12])); 160 + asm volatile("ld 13,%0" : : "Q" (state->fprs[13])); 161 + asm volatile("ld 14,%0" : : "Q" (state->fprs[14])); 162 + asm volatile("ld 15,%0" : : "Q" (state->fprs[15])); 163 + } 164 + return; 165 + } 147 166 148 167 /* Test and restore (load) vector registers */ 149 168 asm volatile ( 150 169 /* 151 - * Test if any vector registers must be loaded and, if so, 170 + * Test if any vector register must be loaded and, if so, 152 171 * test if all registers can be loaded at once. 153 172 */ 154 - " tmll %[m],15\n" /* KERNEL_VXR_MASK */ 155 - " jz 20f\n" /* no work -> done */ 156 - " la 1,%[vxrs]\n" /* load load area */ 157 - " jo 18f\n" /* -> load V0..V31 */ 158 - 173 + " la 1,%[vxrs]\n" /* load restore area */ 174 + " tmll %[m],30\n" /* KERNEL_VXR */ 175 + " jz 7f\n" /* no work -> done */ 176 + " jo 5f\n" /* -> restore V0..V31 */ 159 177 /* 160 - * Test if V8..V23 can be restored at once... this speeds up 161 - * for KERNEL_VXR_MID only. Otherwise continue to split the 162 - * range of vector registers into two halves and test them 163 - * separately. 178 + * Test for special case KERNEL_FPU_MID only. In this 179 + * case a vlm V8..V23 is the best instruction 164 180 */ 165 - " tmll %[m],6\n" /* KERNEL_VXR_MID */ 166 - " jo 17f\n" /* -> load V8..V23 */ 167 - 168 - /* Test and load the first half of 16 vector registers */ 169 - "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ 170 - " jz 10f\n" /* -> KERNEL_VXR_HIGH */ 171 - " jo 2f\n" /* 11 -> load V0..V15 */ 172 - " brc 4,3f\n" /* 01 -> load V0..V7 */ 173 - " brc 2,4f\n" /* 10 -> load V8..V15 */ 174 - 175 - /* Test and load the second half of 16 vector registers */ 176 - "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ 177 - " jo 19f\n" /* 11 -> load V16..V31 */ 178 - " brc 4,11f\n" /* 01 -> load V16..V23 */ 179 - " brc 2,12f\n" /* 10 -> load V24..V31 */ 180 - " j 20f\n" /* 00 -> done */ 181 - 182 - /* 183 - * Below are the vstm combinations to load multiple vector 184 - * registers at once. 185 - */ 186 - "2: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ 187 - " j 10b\n" /* -> VXR_HIGH */ 188 - "3: .word 0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */ 189 - " j 10b\n" /* -> VXR_HIGH */ 190 - "4: .word 0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */ 191 - " j 10b\n" /* -> VXR_HIGH */ 192 - "\n" 193 - "11: .word 0xe707,0x1100,0x0c36\n" /* vlm 16,23,256(1) */ 194 - " j 20f\n" /* -> done */ 195 - "12: .word 0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */ 196 - " j 20f\n" /* -> done */ 197 - "\n" 198 - "17: .word 0xe787,0x1080,0x0436\n" /* vlm 8,23,128(1) */ 199 - " nill %[m],249\n" /* m &= ~VXR_MID */ 200 - " j 1b\n" /* -> VXR_LOW */ 201 - "\n" 202 - "18: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ 203 - "19: .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ 204 - "20:" 205 - : 206 - : [vxrs] "Q" (*(struct vx_array *) &state->vxrs), 207 - [m] "d" (state->mask) 181 + " chi %[m],12\n" /* KERNEL_VXR_MID */ 182 + " jne 0f\n" /* -> restore V8..V23 */ 183 + " VLM 8,23,128,1\n" /* vlm %v8,%v23,128(%r1) */ 184 + " j 7f\n" 185 + /* Test and restore the first half of 16 vector registers */ 186 + "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */ 187 + " jz 3f\n" /* -> KERNEL_VXR_HIGH */ 188 + " jo 2f\n" /* 11 -> restore V0..V15 */ 189 + " brc 2,1f\n" /* 10 -> restore V8..V15 */ 190 + " VLM 0,7,0,1\n" /* vlm %v0,%v7,0(%r1) */ 191 + " j 3f\n" 192 + "1: VLM 8,15,128,1\n" /* vlm %v8,%v15,128(%r1) */ 193 + " j 3f\n" 194 + "2: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */ 195 + /* Test and restore the second half of 16 vector registers */ 196 + "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */ 197 + " jz 7f\n" 198 + " jo 6f\n" /* 11 -> restore V16..V31 */ 199 + " brc 2,4f\n" /* 10 -> restore V24..V31 */ 200 + " VLM 16,23,256,1\n" /* vlm %v16,%v23,256(%r1) */ 201 + " j 7f\n" 202 + "4: VLM 24,31,384,1\n" /* vlm %v24,%v31,384(%r1) */ 203 + " j 7f\n" 204 + "5: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */ 205 + "6: VLM 16,31,256,1\n" /* vlm %v16,%v31,256(%r1) */ 206 + "7:" 207 + : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs) 208 + : [m] "d" (flags) 208 209 : "1", "cc"); 209 - 210 - update_fpu_state: 211 - /* Update current kernel VX state */ 212 - __this_cpu_write(kernel_fpu_state, state->mask); 213 210 } 214 211 EXPORT_SYMBOL(__kernel_fpu_end);
+1 -1
arch/s390/kernel/sysinfo.c
··· 454 454 : "Q" (info->capability), "d" (10000000), "d" (0) 455 455 : "cc" 456 456 ); 457 - kernel_fpu_end(&fpu); 457 + kernel_fpu_end(&fpu, KERNEL_FPR); 458 458 } else 459 459 /* 460 460 * Really old machine without stsi block for basic