Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * RAID-6 syndrome calculation using RISC-V vector instructions
4 *
5 * Copyright 2024 Institute of Software, CAS.
6 * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
7 *
8 * Based on neon.uc:
9 * Copyright 2002-2004 H. Peter Anvin
10 */
11
12#include "rvv.h"
13
14#ifdef __riscv_vector
15#error "This code must be built without compiler support for vector"
16#endif
17
18static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
19{
20 u8 **dptr = (u8 **)ptrs;
21 u8 *p, *q;
22 unsigned long vl, d, nsize;
23 int z, z0;
24
25 z0 = disks - 3; /* Highest data disk */
26 p = dptr[z0 + 1]; /* XOR parity */
27 q = dptr[z0 + 2]; /* RS syndrome */
28
29 asm volatile (".option push\n"
30 ".option arch,+v\n"
31 "vsetvli %0, x0, e8, m1, ta, ma\n"
32 ".option pop\n"
33 : "=&r" (vl)
34 );
35
36 nsize = vl;
37
38 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */
39 for (d = 0; d < bytes; d += nsize * 1) {
40 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
41 asm volatile (".option push\n"
42 ".option arch,+v\n"
43 "vle8.v v0, (%[wp0])\n"
44 "vmv.v.v v1, v0\n"
45 ".option pop\n"
46 : :
47 [wp0]"r"(&dptr[z0][d + 0 * nsize])
48 );
49
50 for (z = z0 - 1 ; z >= 0 ; z--) {
51 /*
52 * w2$$ = MASK(wq$$);
53 * w1$$ = SHLBYTE(wq$$);
54 * w2$$ &= NBYTES(0x1d);
55 * w1$$ ^= w2$$;
56 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
57 * wq$$ = w1$$ ^ wd$$;
58 * wp$$ ^= wd$$;
59 */
60 asm volatile (".option push\n"
61 ".option arch,+v\n"
62 "vsra.vi v2, v1, 7\n"
63 "vsll.vi v3, v1, 1\n"
64 "vand.vx v2, v2, %[x1d]\n"
65 "vxor.vv v3, v3, v2\n"
66 "vle8.v v2, (%[wd0])\n"
67 "vxor.vv v1, v3, v2\n"
68 "vxor.vv v0, v0, v2\n"
69 ".option pop\n"
70 : :
71 [wd0]"r"(&dptr[z][d + 0 * nsize]),
72 [x1d]"r"(0x1d)
73 );
74 }
75
76 /*
77 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
78 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
79 */
80 asm volatile (".option push\n"
81 ".option arch,+v\n"
82 "vse8.v v0, (%[wp0])\n"
83 "vse8.v v1, (%[wq0])\n"
84 ".option pop\n"
85 : :
86 [wp0]"r"(&p[d + nsize * 0]),
87 [wq0]"r"(&q[d + nsize * 0])
88 );
89 }
90}
91
92static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop,
93 unsigned long bytes, void **ptrs)
94{
95 u8 **dptr = (u8 **)ptrs;
96 u8 *p, *q;
97 unsigned long vl, d, nsize;
98 int z, z0;
99
100 z0 = stop; /* P/Q right side optimization */
101 p = dptr[disks - 2]; /* XOR parity */
102 q = dptr[disks - 1]; /* RS syndrome */
103
104 asm volatile (".option push\n"
105 ".option arch,+v\n"
106 "vsetvli %0, x0, e8, m1, ta, ma\n"
107 ".option pop\n"
108 : "=&r" (vl)
109 );
110
111 nsize = vl;
112
113 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */
114 for (d = 0 ; d < bytes ; d += nsize * 1) {
115 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
116 asm volatile (".option push\n"
117 ".option arch,+v\n"
118 "vle8.v v0, (%[wp0])\n"
119 "vmv.v.v v1, v0\n"
120 ".option pop\n"
121 : :
122 [wp0]"r"(&dptr[z0][d + 0 * nsize])
123 );
124
125 /* P/Q data pages */
126 for (z = z0 - 1; z >= start; z--) {
127 /*
128 * w2$$ = MASK(wq$$);
129 * w1$$ = SHLBYTE(wq$$);
130 * w2$$ &= NBYTES(0x1d);
131 * w1$$ ^= w2$$;
132 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
133 * wq$$ = w1$$ ^ wd$$;
134 * wp$$ ^= wd$$;
135 */
136 asm volatile (".option push\n"
137 ".option arch,+v\n"
138 "vsra.vi v2, v1, 7\n"
139 "vsll.vi v3, v1, 1\n"
140 "vand.vx v2, v2, %[x1d]\n"
141 "vxor.vv v3, v3, v2\n"
142 "vle8.v v2, (%[wd0])\n"
143 "vxor.vv v1, v3, v2\n"
144 "vxor.vv v0, v0, v2\n"
145 ".option pop\n"
146 : :
147 [wd0]"r"(&dptr[z][d + 0 * nsize]),
148 [x1d]"r"(0x1d)
149 );
150 }
151
152 /* P/Q left side optimization */
153 for (z = start - 1; z >= 0; z--) {
154 /*
155 * w2$$ = MASK(wq$$);
156 * w1$$ = SHLBYTE(wq$$);
157 * w2$$ &= NBYTES(0x1d);
158 * wq$$ = w1$$ ^ w2$$;
159 */
160 asm volatile (".option push\n"
161 ".option arch,+v\n"
162 "vsra.vi v2, v1, 7\n"
163 "vsll.vi v3, v1, 1\n"
164 "vand.vx v2, v2, %[x1d]\n"
165 "vxor.vv v1, v3, v2\n"
166 ".option pop\n"
167 : :
168 [x1d]"r"(0x1d)
169 );
170 }
171
172 /*
173 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
174 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
175 * v0:wp0, v1:wq0, v2:p0, v3:q0
176 */
177 asm volatile (".option push\n"
178 ".option arch,+v\n"
179 "vle8.v v2, (%[wp0])\n"
180 "vle8.v v3, (%[wq0])\n"
181 "vxor.vv v2, v2, v0\n"
182 "vxor.vv v3, v3, v1\n"
183 "vse8.v v2, (%[wp0])\n"
184 "vse8.v v3, (%[wq0])\n"
185 ".option pop\n"
186 : :
187 [wp0]"r"(&p[d + nsize * 0]),
188 [wq0]"r"(&q[d + nsize * 0])
189 );
190 }
191}
192
193static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
194{
195 u8 **dptr = (u8 **)ptrs;
196 u8 *p, *q;
197 unsigned long vl, d, nsize;
198 int z, z0;
199
200 z0 = disks - 3; /* Highest data disk */
201 p = dptr[z0 + 1]; /* XOR parity */
202 q = dptr[z0 + 2]; /* RS syndrome */
203
204 asm volatile (".option push\n"
205 ".option arch,+v\n"
206 "vsetvli %0, x0, e8, m1, ta, ma\n"
207 ".option pop\n"
208 : "=&r" (vl)
209 );
210
211 nsize = vl;
212
213 /*
214 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
215 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
216 */
217 for (d = 0; d < bytes; d += nsize * 2) {
218 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
219 asm volatile (".option push\n"
220 ".option arch,+v\n"
221 "vle8.v v0, (%[wp0])\n"
222 "vmv.v.v v1, v0\n"
223 "vle8.v v4, (%[wp1])\n"
224 "vmv.v.v v5, v4\n"
225 ".option pop\n"
226 : :
227 [wp0]"r"(&dptr[z0][d + 0 * nsize]),
228 [wp1]"r"(&dptr[z0][d + 1 * nsize])
229 );
230
231 for (z = z0 - 1; z >= 0; z--) {
232 /*
233 * w2$$ = MASK(wq$$);
234 * w1$$ = SHLBYTE(wq$$);
235 * w2$$ &= NBYTES(0x1d);
236 * w1$$ ^= w2$$;
237 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
238 * wq$$ = w1$$ ^ wd$$;
239 * wp$$ ^= wd$$;
240 */
241 asm volatile (".option push\n"
242 ".option arch,+v\n"
243 "vsra.vi v2, v1, 7\n"
244 "vsll.vi v3, v1, 1\n"
245 "vand.vx v2, v2, %[x1d]\n"
246 "vxor.vv v3, v3, v2\n"
247 "vle8.v v2, (%[wd0])\n"
248 "vxor.vv v1, v3, v2\n"
249 "vxor.vv v0, v0, v2\n"
250
251 "vsra.vi v6, v5, 7\n"
252 "vsll.vi v7, v5, 1\n"
253 "vand.vx v6, v6, %[x1d]\n"
254 "vxor.vv v7, v7, v6\n"
255 "vle8.v v6, (%[wd1])\n"
256 "vxor.vv v5, v7, v6\n"
257 "vxor.vv v4, v4, v6\n"
258 ".option pop\n"
259 : :
260 [wd0]"r"(&dptr[z][d + 0 * nsize]),
261 [wd1]"r"(&dptr[z][d + 1 * nsize]),
262 [x1d]"r"(0x1d)
263 );
264 }
265
266 /*
267 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
268 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
269 */
270 asm volatile (".option push\n"
271 ".option arch,+v\n"
272 "vse8.v v0, (%[wp0])\n"
273 "vse8.v v1, (%[wq0])\n"
274 "vse8.v v4, (%[wp1])\n"
275 "vse8.v v5, (%[wq1])\n"
276 ".option pop\n"
277 : :
278 [wp0]"r"(&p[d + nsize * 0]),
279 [wq0]"r"(&q[d + nsize * 0]),
280 [wp1]"r"(&p[d + nsize * 1]),
281 [wq1]"r"(&q[d + nsize * 1])
282 );
283 }
284}
285
286static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop,
287 unsigned long bytes, void **ptrs)
288{
289 u8 **dptr = (u8 **)ptrs;
290 u8 *p, *q;
291 unsigned long vl, d, nsize;
292 int z, z0;
293
294 z0 = stop; /* P/Q right side optimization */
295 p = dptr[disks - 2]; /* XOR parity */
296 q = dptr[disks - 1]; /* RS syndrome */
297
298 asm volatile (".option push\n"
299 ".option arch,+v\n"
300 "vsetvli %0, x0, e8, m1, ta, ma\n"
301 ".option pop\n"
302 : "=&r" (vl)
303 );
304
305 nsize = vl;
306
307 /*
308 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
309 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
310 */
311 for (d = 0; d < bytes; d += nsize * 2) {
312 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
313 asm volatile (".option push\n"
314 ".option arch,+v\n"
315 "vle8.v v0, (%[wp0])\n"
316 "vmv.v.v v1, v0\n"
317 "vle8.v v4, (%[wp1])\n"
318 "vmv.v.v v5, v4\n"
319 ".option pop\n"
320 : :
321 [wp0]"r"(&dptr[z0][d + 0 * nsize]),
322 [wp1]"r"(&dptr[z0][d + 1 * nsize])
323 );
324
325 /* P/Q data pages */
326 for (z = z0 - 1; z >= start; z--) {
327 /*
328 * w2$$ = MASK(wq$$);
329 * w1$$ = SHLBYTE(wq$$);
330 * w2$$ &= NBYTES(0x1d);
331 * w1$$ ^= w2$$;
332 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
333 * wq$$ = w1$$ ^ wd$$;
334 * wp$$ ^= wd$$;
335 */
336 asm volatile (".option push\n"
337 ".option arch,+v\n"
338 "vsra.vi v2, v1, 7\n"
339 "vsll.vi v3, v1, 1\n"
340 "vand.vx v2, v2, %[x1d]\n"
341 "vxor.vv v3, v3, v2\n"
342 "vle8.v v2, (%[wd0])\n"
343 "vxor.vv v1, v3, v2\n"
344 "vxor.vv v0, v0, v2\n"
345
346 "vsra.vi v6, v5, 7\n"
347 "vsll.vi v7, v5, 1\n"
348 "vand.vx v6, v6, %[x1d]\n"
349 "vxor.vv v7, v7, v6\n"
350 "vle8.v v6, (%[wd1])\n"
351 "vxor.vv v5, v7, v6\n"
352 "vxor.vv v4, v4, v6\n"
353 ".option pop\n"
354 : :
355 [wd0]"r"(&dptr[z][d + 0 * nsize]),
356 [wd1]"r"(&dptr[z][d + 1 * nsize]),
357 [x1d]"r"(0x1d)
358 );
359 }
360
361 /* P/Q left side optimization */
362 for (z = start - 1; z >= 0; z--) {
363 /*
364 * w2$$ = MASK(wq$$);
365 * w1$$ = SHLBYTE(wq$$);
366 * w2$$ &= NBYTES(0x1d);
367 * wq$$ = w1$$ ^ w2$$;
368 */
369 asm volatile (".option push\n"
370 ".option arch,+v\n"
371 "vsra.vi v2, v1, 7\n"
372 "vsll.vi v3, v1, 1\n"
373 "vand.vx v2, v2, %[x1d]\n"
374 "vxor.vv v1, v3, v2\n"
375
376 "vsra.vi v6, v5, 7\n"
377 "vsll.vi v7, v5, 1\n"
378 "vand.vx v6, v6, %[x1d]\n"
379 "vxor.vv v5, v7, v6\n"
380 ".option pop\n"
381 : :
382 [x1d]"r"(0x1d)
383 );
384 }
385
386 /*
387 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
388 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
389 * v0:wp0, v1:wq0, v2:p0, v3:q0
390 * v4:wp1, v5:wq1, v6:p1, v7:q1
391 */
392 asm volatile (".option push\n"
393 ".option arch,+v\n"
394 "vle8.v v2, (%[wp0])\n"
395 "vle8.v v3, (%[wq0])\n"
396 "vxor.vv v2, v2, v0\n"
397 "vxor.vv v3, v3, v1\n"
398 "vse8.v v2, (%[wp0])\n"
399 "vse8.v v3, (%[wq0])\n"
400
401 "vle8.v v6, (%[wp1])\n"
402 "vle8.v v7, (%[wq1])\n"
403 "vxor.vv v6, v6, v4\n"
404 "vxor.vv v7, v7, v5\n"
405 "vse8.v v6, (%[wp1])\n"
406 "vse8.v v7, (%[wq1])\n"
407 ".option pop\n"
408 : :
409 [wp0]"r"(&p[d + nsize * 0]),
410 [wq0]"r"(&q[d + nsize * 0]),
411 [wp1]"r"(&p[d + nsize * 1]),
412 [wq1]"r"(&q[d + nsize * 1])
413 );
414 }
415}
416
417static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
418{
419 u8 **dptr = (u8 **)ptrs;
420 u8 *p, *q;
421 unsigned long vl, d, nsize;
422 int z, z0;
423
424 z0 = disks - 3; /* Highest data disk */
425 p = dptr[z0 + 1]; /* XOR parity */
426 q = dptr[z0 + 2]; /* RS syndrome */
427
428 asm volatile (".option push\n"
429 ".option arch,+v\n"
430 "vsetvli %0, x0, e8, m1, ta, ma\n"
431 ".option pop\n"
432 : "=&r" (vl)
433 );
434
435 nsize = vl;
436
437 /*
438 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
439 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
440 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
441 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
442 */
443 for (d = 0; d < bytes; d += nsize * 4) {
444 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
445 asm volatile (".option push\n"
446 ".option arch,+v\n"
447 "vle8.v v0, (%[wp0])\n"
448 "vmv.v.v v1, v0\n"
449 "vle8.v v4, (%[wp1])\n"
450 "vmv.v.v v5, v4\n"
451 "vle8.v v8, (%[wp2])\n"
452 "vmv.v.v v9, v8\n"
453 "vle8.v v12, (%[wp3])\n"
454 "vmv.v.v v13, v12\n"
455 ".option pop\n"
456 : :
457 [wp0]"r"(&dptr[z0][d + 0 * nsize]),
458 [wp1]"r"(&dptr[z0][d + 1 * nsize]),
459 [wp2]"r"(&dptr[z0][d + 2 * nsize]),
460 [wp3]"r"(&dptr[z0][d + 3 * nsize])
461 );
462
463 for (z = z0 - 1; z >= 0; z--) {
464 /*
465 * w2$$ = MASK(wq$$);
466 * w1$$ = SHLBYTE(wq$$);
467 * w2$$ &= NBYTES(0x1d);
468 * w1$$ ^= w2$$;
469 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
470 * wq$$ = w1$$ ^ wd$$;
471 * wp$$ ^= wd$$;
472 */
473 asm volatile (".option push\n"
474 ".option arch,+v\n"
475 "vsra.vi v2, v1, 7\n"
476 "vsll.vi v3, v1, 1\n"
477 "vand.vx v2, v2, %[x1d]\n"
478 "vxor.vv v3, v3, v2\n"
479 "vle8.v v2, (%[wd0])\n"
480 "vxor.vv v1, v3, v2\n"
481 "vxor.vv v0, v0, v2\n"
482
483 "vsra.vi v6, v5, 7\n"
484 "vsll.vi v7, v5, 1\n"
485 "vand.vx v6, v6, %[x1d]\n"
486 "vxor.vv v7, v7, v6\n"
487 "vle8.v v6, (%[wd1])\n"
488 "vxor.vv v5, v7, v6\n"
489 "vxor.vv v4, v4, v6\n"
490
491 "vsra.vi v10, v9, 7\n"
492 "vsll.vi v11, v9, 1\n"
493 "vand.vx v10, v10, %[x1d]\n"
494 "vxor.vv v11, v11, v10\n"
495 "vle8.v v10, (%[wd2])\n"
496 "vxor.vv v9, v11, v10\n"
497 "vxor.vv v8, v8, v10\n"
498
499 "vsra.vi v14, v13, 7\n"
500 "vsll.vi v15, v13, 1\n"
501 "vand.vx v14, v14, %[x1d]\n"
502 "vxor.vv v15, v15, v14\n"
503 "vle8.v v14, (%[wd3])\n"
504 "vxor.vv v13, v15, v14\n"
505 "vxor.vv v12, v12, v14\n"
506 ".option pop\n"
507 : :
508 [wd0]"r"(&dptr[z][d + 0 * nsize]),
509 [wd1]"r"(&dptr[z][d + 1 * nsize]),
510 [wd2]"r"(&dptr[z][d + 2 * nsize]),
511 [wd3]"r"(&dptr[z][d + 3 * nsize]),
512 [x1d]"r"(0x1d)
513 );
514 }
515
516 /*
517 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
518 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
519 */
520 asm volatile (".option push\n"
521 ".option arch,+v\n"
522 "vse8.v v0, (%[wp0])\n"
523 "vse8.v v1, (%[wq0])\n"
524 "vse8.v v4, (%[wp1])\n"
525 "vse8.v v5, (%[wq1])\n"
526 "vse8.v v8, (%[wp2])\n"
527 "vse8.v v9, (%[wq2])\n"
528 "vse8.v v12, (%[wp3])\n"
529 "vse8.v v13, (%[wq3])\n"
530 ".option pop\n"
531 : :
532 [wp0]"r"(&p[d + nsize * 0]),
533 [wq0]"r"(&q[d + nsize * 0]),
534 [wp1]"r"(&p[d + nsize * 1]),
535 [wq1]"r"(&q[d + nsize * 1]),
536 [wp2]"r"(&p[d + nsize * 2]),
537 [wq2]"r"(&q[d + nsize * 2]),
538 [wp3]"r"(&p[d + nsize * 3]),
539 [wq3]"r"(&q[d + nsize * 3])
540 );
541 }
542}
543
544static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop,
545 unsigned long bytes, void **ptrs)
546{
547 u8 **dptr = (u8 **)ptrs;
548 u8 *p, *q;
549 unsigned long vl, d, nsize;
550 int z, z0;
551
552 z0 = stop; /* P/Q right side optimization */
553 p = dptr[disks - 2]; /* XOR parity */
554 q = dptr[disks - 1]; /* RS syndrome */
555
556 asm volatile (".option push\n"
557 ".option arch,+v\n"
558 "vsetvli %0, x0, e8, m1, ta, ma\n"
559 ".option pop\n"
560 : "=&r" (vl)
561 );
562
563 nsize = vl;
564
565 /*
566 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
567 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
568 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
569 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
570 */
571 for (d = 0; d < bytes; d += nsize * 4) {
572 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
573 asm volatile (".option push\n"
574 ".option arch,+v\n"
575 "vle8.v v0, (%[wp0])\n"
576 "vmv.v.v v1, v0\n"
577 "vle8.v v4, (%[wp1])\n"
578 "vmv.v.v v5, v4\n"
579 "vle8.v v8, (%[wp2])\n"
580 "vmv.v.v v9, v8\n"
581 "vle8.v v12, (%[wp3])\n"
582 "vmv.v.v v13, v12\n"
583 ".option pop\n"
584 : :
585 [wp0]"r"(&dptr[z0][d + 0 * nsize]),
586 [wp1]"r"(&dptr[z0][d + 1 * nsize]),
587 [wp2]"r"(&dptr[z0][d + 2 * nsize]),
588 [wp3]"r"(&dptr[z0][d + 3 * nsize])
589 );
590
591 /* P/Q data pages */
592 for (z = z0 - 1; z >= start; z--) {
593 /*
594 * w2$$ = MASK(wq$$);
595 * w1$$ = SHLBYTE(wq$$);
596 * w2$$ &= NBYTES(0x1d);
597 * w1$$ ^= w2$$;
598 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
599 * wq$$ = w1$$ ^ wd$$;
600 * wp$$ ^= wd$$;
601 */
602 asm volatile (".option push\n"
603 ".option arch,+v\n"
604 "vsra.vi v2, v1, 7\n"
605 "vsll.vi v3, v1, 1\n"
606 "vand.vx v2, v2, %[x1d]\n"
607 "vxor.vv v3, v3, v2\n"
608 "vle8.v v2, (%[wd0])\n"
609 "vxor.vv v1, v3, v2\n"
610 "vxor.vv v0, v0, v2\n"
611
612 "vsra.vi v6, v5, 7\n"
613 "vsll.vi v7, v5, 1\n"
614 "vand.vx v6, v6, %[x1d]\n"
615 "vxor.vv v7, v7, v6\n"
616 "vle8.v v6, (%[wd1])\n"
617 "vxor.vv v5, v7, v6\n"
618 "vxor.vv v4, v4, v6\n"
619
620 "vsra.vi v10, v9, 7\n"
621 "vsll.vi v11, v9, 1\n"
622 "vand.vx v10, v10, %[x1d]\n"
623 "vxor.vv v11, v11, v10\n"
624 "vle8.v v10, (%[wd2])\n"
625 "vxor.vv v9, v11, v10\n"
626 "vxor.vv v8, v8, v10\n"
627
628 "vsra.vi v14, v13, 7\n"
629 "vsll.vi v15, v13, 1\n"
630 "vand.vx v14, v14, %[x1d]\n"
631 "vxor.vv v15, v15, v14\n"
632 "vle8.v v14, (%[wd3])\n"
633 "vxor.vv v13, v15, v14\n"
634 "vxor.vv v12, v12, v14\n"
635 ".option pop\n"
636 : :
637 [wd0]"r"(&dptr[z][d + 0 * nsize]),
638 [wd1]"r"(&dptr[z][d + 1 * nsize]),
639 [wd2]"r"(&dptr[z][d + 2 * nsize]),
640 [wd3]"r"(&dptr[z][d + 3 * nsize]),
641 [x1d]"r"(0x1d)
642 );
643 }
644
645 /* P/Q left side optimization */
646 for (z = start - 1; z >= 0; z--) {
647 /*
648 * w2$$ = MASK(wq$$);
649 * w1$$ = SHLBYTE(wq$$);
650 * w2$$ &= NBYTES(0x1d);
651 * wq$$ = w1$$ ^ w2$$;
652 */
653 asm volatile (".option push\n"
654 ".option arch,+v\n"
655 "vsra.vi v2, v1, 7\n"
656 "vsll.vi v3, v1, 1\n"
657 "vand.vx v2, v2, %[x1d]\n"
658 "vxor.vv v1, v3, v2\n"
659
660 "vsra.vi v6, v5, 7\n"
661 "vsll.vi v7, v5, 1\n"
662 "vand.vx v6, v6, %[x1d]\n"
663 "vxor.vv v5, v7, v6\n"
664
665 "vsra.vi v10, v9, 7\n"
666 "vsll.vi v11, v9, 1\n"
667 "vand.vx v10, v10, %[x1d]\n"
668 "vxor.vv v9, v11, v10\n"
669
670 "vsra.vi v14, v13, 7\n"
671 "vsll.vi v15, v13, 1\n"
672 "vand.vx v14, v14, %[x1d]\n"
673 "vxor.vv v13, v15, v14\n"
674 ".option pop\n"
675 : :
676 [x1d]"r"(0x1d)
677 );
678 }
679
680 /*
681 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
682 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
683 * v0:wp0, v1:wq0, v2:p0, v3:q0
684 * v4:wp1, v5:wq1, v6:p1, v7:q1
685 * v8:wp2, v9:wq2, v10:p2, v11:q2
686 * v12:wp3, v13:wq3, v14:p3, v15:q3
687 */
688 asm volatile (".option push\n"
689 ".option arch,+v\n"
690 "vle8.v v2, (%[wp0])\n"
691 "vle8.v v3, (%[wq0])\n"
692 "vxor.vv v2, v2, v0\n"
693 "vxor.vv v3, v3, v1\n"
694 "vse8.v v2, (%[wp0])\n"
695 "vse8.v v3, (%[wq0])\n"
696
697 "vle8.v v6, (%[wp1])\n"
698 "vle8.v v7, (%[wq1])\n"
699 "vxor.vv v6, v6, v4\n"
700 "vxor.vv v7, v7, v5\n"
701 "vse8.v v6, (%[wp1])\n"
702 "vse8.v v7, (%[wq1])\n"
703
704 "vle8.v v10, (%[wp2])\n"
705 "vle8.v v11, (%[wq2])\n"
706 "vxor.vv v10, v10, v8\n"
707 "vxor.vv v11, v11, v9\n"
708 "vse8.v v10, (%[wp2])\n"
709 "vse8.v v11, (%[wq2])\n"
710
711 "vle8.v v14, (%[wp3])\n"
712 "vle8.v v15, (%[wq3])\n"
713 "vxor.vv v14, v14, v12\n"
714 "vxor.vv v15, v15, v13\n"
715 "vse8.v v14, (%[wp3])\n"
716 "vse8.v v15, (%[wq3])\n"
717 ".option pop\n"
718 : :
719 [wp0]"r"(&p[d + nsize * 0]),
720 [wq0]"r"(&q[d + nsize * 0]),
721 [wp1]"r"(&p[d + nsize * 1]),
722 [wq1]"r"(&q[d + nsize * 1]),
723 [wp2]"r"(&p[d + nsize * 2]),
724 [wq2]"r"(&q[d + nsize * 2]),
725 [wp3]"r"(&p[d + nsize * 3]),
726 [wq3]"r"(&q[d + nsize * 3])
727 );
728 }
729}
730
731static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
732{
733 u8 **dptr = (u8 **)ptrs;
734 u8 *p, *q;
735 unsigned long vl, d, nsize;
736 int z, z0;
737
738 z0 = disks - 3; /* Highest data disk */
739 p = dptr[z0 + 1]; /* XOR parity */
740 q = dptr[z0 + 2]; /* RS syndrome */
741
742 asm volatile (".option push\n"
743 ".option arch,+v\n"
744 "vsetvli %0, x0, e8, m1, ta, ma\n"
745 ".option pop\n"
746 : "=&r" (vl)
747 );
748
749 nsize = vl;
750
751 /*
752 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
753 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
754 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
755 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
756 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14
757 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15
758 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16
759 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17
760 */
761 for (d = 0; d < bytes; d += nsize * 8) {
762 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
763 asm volatile (".option push\n"
764 ".option arch,+v\n"
765 "vle8.v v0, (%[wp0])\n"
766 "vmv.v.v v1, v0\n"
767 "vle8.v v4, (%[wp1])\n"
768 "vmv.v.v v5, v4\n"
769 "vle8.v v8, (%[wp2])\n"
770 "vmv.v.v v9, v8\n"
771 "vle8.v v12, (%[wp3])\n"
772 "vmv.v.v v13, v12\n"
773 "vle8.v v16, (%[wp4])\n"
774 "vmv.v.v v17, v16\n"
775 "vle8.v v20, (%[wp5])\n"
776 "vmv.v.v v21, v20\n"
777 "vle8.v v24, (%[wp6])\n"
778 "vmv.v.v v25, v24\n"
779 "vle8.v v28, (%[wp7])\n"
780 "vmv.v.v v29, v28\n"
781 ".option pop\n"
782 : :
783 [wp0]"r"(&dptr[z0][d + 0 * nsize]),
784 [wp1]"r"(&dptr[z0][d + 1 * nsize]),
785 [wp2]"r"(&dptr[z0][d + 2 * nsize]),
786 [wp3]"r"(&dptr[z0][d + 3 * nsize]),
787 [wp4]"r"(&dptr[z0][d + 4 * nsize]),
788 [wp5]"r"(&dptr[z0][d + 5 * nsize]),
789 [wp6]"r"(&dptr[z0][d + 6 * nsize]),
790 [wp7]"r"(&dptr[z0][d + 7 * nsize])
791 );
792
793 for (z = z0 - 1; z >= 0; z--) {
794 /*
795 * w2$$ = MASK(wq$$);
796 * w1$$ = SHLBYTE(wq$$);
797 * w2$$ &= NBYTES(0x1d);
798 * w1$$ ^= w2$$;
799 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
800 * wq$$ = w1$$ ^ wd$$;
801 * wp$$ ^= wd$$;
802 */
803 asm volatile (".option push\n"
804 ".option arch,+v\n"
805 "vsra.vi v2, v1, 7\n"
806 "vsll.vi v3, v1, 1\n"
807 "vand.vx v2, v2, %[x1d]\n"
808 "vxor.vv v3, v3, v2\n"
809 "vle8.v v2, (%[wd0])\n"
810 "vxor.vv v1, v3, v2\n"
811 "vxor.vv v0, v0, v2\n"
812
813 "vsra.vi v6, v5, 7\n"
814 "vsll.vi v7, v5, 1\n"
815 "vand.vx v6, v6, %[x1d]\n"
816 "vxor.vv v7, v7, v6\n"
817 "vle8.v v6, (%[wd1])\n"
818 "vxor.vv v5, v7, v6\n"
819 "vxor.vv v4, v4, v6\n"
820
821 "vsra.vi v10, v9, 7\n"
822 "vsll.vi v11, v9, 1\n"
823 "vand.vx v10, v10, %[x1d]\n"
824 "vxor.vv v11, v11, v10\n"
825 "vle8.v v10, (%[wd2])\n"
826 "vxor.vv v9, v11, v10\n"
827 "vxor.vv v8, v8, v10\n"
828
829 "vsra.vi v14, v13, 7\n"
830 "vsll.vi v15, v13, 1\n"
831 "vand.vx v14, v14, %[x1d]\n"
832 "vxor.vv v15, v15, v14\n"
833 "vle8.v v14, (%[wd3])\n"
834 "vxor.vv v13, v15, v14\n"
835 "vxor.vv v12, v12, v14\n"
836
837 "vsra.vi v18, v17, 7\n"
838 "vsll.vi v19, v17, 1\n"
839 "vand.vx v18, v18, %[x1d]\n"
840 "vxor.vv v19, v19, v18\n"
841 "vle8.v v18, (%[wd4])\n"
842 "vxor.vv v17, v19, v18\n"
843 "vxor.vv v16, v16, v18\n"
844
845 "vsra.vi v22, v21, 7\n"
846 "vsll.vi v23, v21, 1\n"
847 "vand.vx v22, v22, %[x1d]\n"
848 "vxor.vv v23, v23, v22\n"
849 "vle8.v v22, (%[wd5])\n"
850 "vxor.vv v21, v23, v22\n"
851 "vxor.vv v20, v20, v22\n"
852
853 "vsra.vi v26, v25, 7\n"
854 "vsll.vi v27, v25, 1\n"
855 "vand.vx v26, v26, %[x1d]\n"
856 "vxor.vv v27, v27, v26\n"
857 "vle8.v v26, (%[wd6])\n"
858 "vxor.vv v25, v27, v26\n"
859 "vxor.vv v24, v24, v26\n"
860
861 "vsra.vi v30, v29, 7\n"
862 "vsll.vi v31, v29, 1\n"
863 "vand.vx v30, v30, %[x1d]\n"
864 "vxor.vv v31, v31, v30\n"
865 "vle8.v v30, (%[wd7])\n"
866 "vxor.vv v29, v31, v30\n"
867 "vxor.vv v28, v28, v30\n"
868 ".option pop\n"
869 : :
870 [wd0]"r"(&dptr[z][d + 0 * nsize]),
871 [wd1]"r"(&dptr[z][d + 1 * nsize]),
872 [wd2]"r"(&dptr[z][d + 2 * nsize]),
873 [wd3]"r"(&dptr[z][d + 3 * nsize]),
874 [wd4]"r"(&dptr[z][d + 4 * nsize]),
875 [wd5]"r"(&dptr[z][d + 5 * nsize]),
876 [wd6]"r"(&dptr[z][d + 6 * nsize]),
877 [wd7]"r"(&dptr[z][d + 7 * nsize]),
878 [x1d]"r"(0x1d)
879 );
880 }
881
882 /*
883 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
884 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
885 */
886 asm volatile (".option push\n"
887 ".option arch,+v\n"
888 "vse8.v v0, (%[wp0])\n"
889 "vse8.v v1, (%[wq0])\n"
890 "vse8.v v4, (%[wp1])\n"
891 "vse8.v v5, (%[wq1])\n"
892 "vse8.v v8, (%[wp2])\n"
893 "vse8.v v9, (%[wq2])\n"
894 "vse8.v v12, (%[wp3])\n"
895 "vse8.v v13, (%[wq3])\n"
896 "vse8.v v16, (%[wp4])\n"
897 "vse8.v v17, (%[wq4])\n"
898 "vse8.v v20, (%[wp5])\n"
899 "vse8.v v21, (%[wq5])\n"
900 "vse8.v v24, (%[wp6])\n"
901 "vse8.v v25, (%[wq6])\n"
902 "vse8.v v28, (%[wp7])\n"
903 "vse8.v v29, (%[wq7])\n"
904 ".option pop\n"
905 : :
906 [wp0]"r"(&p[d + nsize * 0]),
907 [wq0]"r"(&q[d + nsize * 0]),
908 [wp1]"r"(&p[d + nsize * 1]),
909 [wq1]"r"(&q[d + nsize * 1]),
910 [wp2]"r"(&p[d + nsize * 2]),
911 [wq2]"r"(&q[d + nsize * 2]),
912 [wp3]"r"(&p[d + nsize * 3]),
913 [wq3]"r"(&q[d + nsize * 3]),
914 [wp4]"r"(&p[d + nsize * 4]),
915 [wq4]"r"(&q[d + nsize * 4]),
916 [wp5]"r"(&p[d + nsize * 5]),
917 [wq5]"r"(&q[d + nsize * 5]),
918 [wp6]"r"(&p[d + nsize * 6]),
919 [wq6]"r"(&q[d + nsize * 6]),
920 [wp7]"r"(&p[d + nsize * 7]),
921 [wq7]"r"(&q[d + nsize * 7])
922 );
923 }
924}
925
926static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop,
927 unsigned long bytes, void **ptrs)
928{
929 u8 **dptr = (u8 **)ptrs;
930 u8 *p, *q;
931 unsigned long vl, d, nsize;
932 int z, z0;
933
934 z0 = stop; /* P/Q right side optimization */
935 p = dptr[disks - 2]; /* XOR parity */
936 q = dptr[disks - 1]; /* RS syndrome */
937
938 asm volatile (".option push\n"
939 ".option arch,+v\n"
940 "vsetvli %0, x0, e8, m1, ta, ma\n"
941 ".option pop\n"
942 : "=&r" (vl)
943 );
944
945 nsize = vl;
946
947 /*
948 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
949 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
950 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
951 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
952 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14
953 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15
954 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16
955 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17
956 */
957 for (d = 0; d < bytes; d += nsize * 8) {
958 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
959 asm volatile (".option push\n"
960 ".option arch,+v\n"
961 "vle8.v v0, (%[wp0])\n"
962 "vmv.v.v v1, v0\n"
963 "vle8.v v4, (%[wp1])\n"
964 "vmv.v.v v5, v4\n"
965 "vle8.v v8, (%[wp2])\n"
966 "vmv.v.v v9, v8\n"
967 "vle8.v v12, (%[wp3])\n"
968 "vmv.v.v v13, v12\n"
969 "vle8.v v16, (%[wp4])\n"
970 "vmv.v.v v17, v16\n"
971 "vle8.v v20, (%[wp5])\n"
972 "vmv.v.v v21, v20\n"
973 "vle8.v v24, (%[wp6])\n"
974 "vmv.v.v v25, v24\n"
975 "vle8.v v28, (%[wp7])\n"
976 "vmv.v.v v29, v28\n"
977 ".option pop\n"
978 : :
979 [wp0]"r"(&dptr[z0][d + 0 * nsize]),
980 [wp1]"r"(&dptr[z0][d + 1 * nsize]),
981 [wp2]"r"(&dptr[z0][d + 2 * nsize]),
982 [wp3]"r"(&dptr[z0][d + 3 * nsize]),
983 [wp4]"r"(&dptr[z0][d + 4 * nsize]),
984 [wp5]"r"(&dptr[z0][d + 5 * nsize]),
985 [wp6]"r"(&dptr[z0][d + 6 * nsize]),
986 [wp7]"r"(&dptr[z0][d + 7 * nsize])
987 );
988
989 /* P/Q data pages */
990 for (z = z0 - 1; z >= start; z--) {
991 /*
992 * w2$$ = MASK(wq$$);
993 * w1$$ = SHLBYTE(wq$$);
994 * w2$$ &= NBYTES(0x1d);
995 * w1$$ ^= w2$$;
996 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
997 * wq$$ = w1$$ ^ wd$$;
998 * wp$$ ^= wd$$;
999 */
1000 asm volatile (".option push\n"
1001 ".option arch,+v\n"
1002 "vsra.vi v2, v1, 7\n"
1003 "vsll.vi v3, v1, 1\n"
1004 "vand.vx v2, v2, %[x1d]\n"
1005 "vxor.vv v3, v3, v2\n"
1006 "vle8.v v2, (%[wd0])\n"
1007 "vxor.vv v1, v3, v2\n"
1008 "vxor.vv v0, v0, v2\n"
1009
1010 "vsra.vi v6, v5, 7\n"
1011 "vsll.vi v7, v5, 1\n"
1012 "vand.vx v6, v6, %[x1d]\n"
1013 "vxor.vv v7, v7, v6\n"
1014 "vle8.v v6, (%[wd1])\n"
1015 "vxor.vv v5, v7, v6\n"
1016 "vxor.vv v4, v4, v6\n"
1017
1018 "vsra.vi v10, v9, 7\n"
1019 "vsll.vi v11, v9, 1\n"
1020 "vand.vx v10, v10, %[x1d]\n"
1021 "vxor.vv v11, v11, v10\n"
1022 "vle8.v v10, (%[wd2])\n"
1023 "vxor.vv v9, v11, v10\n"
1024 "vxor.vv v8, v8, v10\n"
1025
1026 "vsra.vi v14, v13, 7\n"
1027 "vsll.vi v15, v13, 1\n"
1028 "vand.vx v14, v14, %[x1d]\n"
1029 "vxor.vv v15, v15, v14\n"
1030 "vle8.v v14, (%[wd3])\n"
1031 "vxor.vv v13, v15, v14\n"
1032 "vxor.vv v12, v12, v14\n"
1033
1034 "vsra.vi v18, v17, 7\n"
1035 "vsll.vi v19, v17, 1\n"
1036 "vand.vx v18, v18, %[x1d]\n"
1037 "vxor.vv v19, v19, v18\n"
1038 "vle8.v v18, (%[wd4])\n"
1039 "vxor.vv v17, v19, v18\n"
1040 "vxor.vv v16, v16, v18\n"
1041
1042 "vsra.vi v22, v21, 7\n"
1043 "vsll.vi v23, v21, 1\n"
1044 "vand.vx v22, v22, %[x1d]\n"
1045 "vxor.vv v23, v23, v22\n"
1046 "vle8.v v22, (%[wd5])\n"
1047 "vxor.vv v21, v23, v22\n"
1048 "vxor.vv v20, v20, v22\n"
1049
1050 "vsra.vi v26, v25, 7\n"
1051 "vsll.vi v27, v25, 1\n"
1052 "vand.vx v26, v26, %[x1d]\n"
1053 "vxor.vv v27, v27, v26\n"
1054 "vle8.v v26, (%[wd6])\n"
1055 "vxor.vv v25, v27, v26\n"
1056 "vxor.vv v24, v24, v26\n"
1057
1058 "vsra.vi v30, v29, 7\n"
1059 "vsll.vi v31, v29, 1\n"
1060 "vand.vx v30, v30, %[x1d]\n"
1061 "vxor.vv v31, v31, v30\n"
1062 "vle8.v v30, (%[wd7])\n"
1063 "vxor.vv v29, v31, v30\n"
1064 "vxor.vv v28, v28, v30\n"
1065 ".option pop\n"
1066 : :
1067 [wd0]"r"(&dptr[z][d + 0 * nsize]),
1068 [wd1]"r"(&dptr[z][d + 1 * nsize]),
1069 [wd2]"r"(&dptr[z][d + 2 * nsize]),
1070 [wd3]"r"(&dptr[z][d + 3 * nsize]),
1071 [wd4]"r"(&dptr[z][d + 4 * nsize]),
1072 [wd5]"r"(&dptr[z][d + 5 * nsize]),
1073 [wd6]"r"(&dptr[z][d + 6 * nsize]),
1074 [wd7]"r"(&dptr[z][d + 7 * nsize]),
1075 [x1d]"r"(0x1d)
1076 );
1077 }
1078
1079 /* P/Q left side optimization */
1080 for (z = start - 1; z >= 0; z--) {
1081 /*
1082 * w2$$ = MASK(wq$$);
1083 * w1$$ = SHLBYTE(wq$$);
1084 * w2$$ &= NBYTES(0x1d);
1085 * wq$$ = w1$$ ^ w2$$;
1086 */
1087 asm volatile (".option push\n"
1088 ".option arch,+v\n"
1089 "vsra.vi v2, v1, 7\n"
1090 "vsll.vi v3, v1, 1\n"
1091 "vand.vx v2, v2, %[x1d]\n"
1092 "vxor.vv v1, v3, v2\n"
1093
1094 "vsra.vi v6, v5, 7\n"
1095 "vsll.vi v7, v5, 1\n"
1096 "vand.vx v6, v6, %[x1d]\n"
1097 "vxor.vv v5, v7, v6\n"
1098
1099 "vsra.vi v10, v9, 7\n"
1100 "vsll.vi v11, v9, 1\n"
1101 "vand.vx v10, v10, %[x1d]\n"
1102 "vxor.vv v9, v11, v10\n"
1103
1104 "vsra.vi v14, v13, 7\n"
1105 "vsll.vi v15, v13, 1\n"
1106 "vand.vx v14, v14, %[x1d]\n"
1107 "vxor.vv v13, v15, v14\n"
1108
1109 "vsra.vi v18, v17, 7\n"
1110 "vsll.vi v19, v17, 1\n"
1111 "vand.vx v18, v18, %[x1d]\n"
1112 "vxor.vv v17, v19, v18\n"
1113
1114 "vsra.vi v22, v21, 7\n"
1115 "vsll.vi v23, v21, 1\n"
1116 "vand.vx v22, v22, %[x1d]\n"
1117 "vxor.vv v21, v23, v22\n"
1118
1119 "vsra.vi v26, v25, 7\n"
1120 "vsll.vi v27, v25, 1\n"
1121 "vand.vx v26, v26, %[x1d]\n"
1122 "vxor.vv v25, v27, v26\n"
1123
1124 "vsra.vi v30, v29, 7\n"
1125 "vsll.vi v31, v29, 1\n"
1126 "vand.vx v30, v30, %[x1d]\n"
1127 "vxor.vv v29, v31, v30\n"
1128 ".option pop\n"
1129 : :
1130 [x1d]"r"(0x1d)
1131 );
1132 }
1133
1134 /*
1135 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
1136 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
1137 * v0:wp0, v1:wq0, v2:p0, v3:q0
1138 * v4:wp1, v5:wq1, v6:p1, v7:q1
1139 * v8:wp2, v9:wq2, v10:p2, v11:q2
1140 * v12:wp3, v13:wq3, v14:p3, v15:q3
1141 * v16:wp4, v17:wq4, v18:p4, v19:q4
1142 * v20:wp5, v21:wq5, v22:p5, v23:q5
1143 * v24:wp6, v25:wq6, v26:p6, v27:q6
1144 * v28:wp7, v29:wq7, v30:p7, v31:q7
1145 */
1146 asm volatile (".option push\n"
1147 ".option arch,+v\n"
1148 "vle8.v v2, (%[wp0])\n"
1149 "vle8.v v3, (%[wq0])\n"
1150 "vxor.vv v2, v2, v0\n"
1151 "vxor.vv v3, v3, v1\n"
1152 "vse8.v v2, (%[wp0])\n"
1153 "vse8.v v3, (%[wq0])\n"
1154
1155 "vle8.v v6, (%[wp1])\n"
1156 "vle8.v v7, (%[wq1])\n"
1157 "vxor.vv v6, v6, v4\n"
1158 "vxor.vv v7, v7, v5\n"
1159 "vse8.v v6, (%[wp1])\n"
1160 "vse8.v v7, (%[wq1])\n"
1161
1162 "vle8.v v10, (%[wp2])\n"
1163 "vle8.v v11, (%[wq2])\n"
1164 "vxor.vv v10, v10, v8\n"
1165 "vxor.vv v11, v11, v9\n"
1166 "vse8.v v10, (%[wp2])\n"
1167 "vse8.v v11, (%[wq2])\n"
1168
1169 "vle8.v v14, (%[wp3])\n"
1170 "vle8.v v15, (%[wq3])\n"
1171 "vxor.vv v14, v14, v12\n"
1172 "vxor.vv v15, v15, v13\n"
1173 "vse8.v v14, (%[wp3])\n"
1174 "vse8.v v15, (%[wq3])\n"
1175
1176 "vle8.v v18, (%[wp4])\n"
1177 "vle8.v v19, (%[wq4])\n"
1178 "vxor.vv v18, v18, v16\n"
1179 "vxor.vv v19, v19, v17\n"
1180 "vse8.v v18, (%[wp4])\n"
1181 "vse8.v v19, (%[wq4])\n"
1182
1183 "vle8.v v22, (%[wp5])\n"
1184 "vle8.v v23, (%[wq5])\n"
1185 "vxor.vv v22, v22, v20\n"
1186 "vxor.vv v23, v23, v21\n"
1187 "vse8.v v22, (%[wp5])\n"
1188 "vse8.v v23, (%[wq5])\n"
1189
1190 "vle8.v v26, (%[wp6])\n"
1191 "vle8.v v27, (%[wq6])\n"
1192 "vxor.vv v26, v26, v24\n"
1193 "vxor.vv v27, v27, v25\n"
1194 "vse8.v v26, (%[wp6])\n"
1195 "vse8.v v27, (%[wq6])\n"
1196
1197 "vle8.v v30, (%[wp7])\n"
1198 "vle8.v v31, (%[wq7])\n"
1199 "vxor.vv v30, v30, v28\n"
1200 "vxor.vv v31, v31, v29\n"
1201 "vse8.v v30, (%[wp7])\n"
1202 "vse8.v v31, (%[wq7])\n"
1203 ".option pop\n"
1204 : :
1205 [wp0]"r"(&p[d + nsize * 0]),
1206 [wq0]"r"(&q[d + nsize * 0]),
1207 [wp1]"r"(&p[d + nsize * 1]),
1208 [wq1]"r"(&q[d + nsize * 1]),
1209 [wp2]"r"(&p[d + nsize * 2]),
1210 [wq2]"r"(&q[d + nsize * 2]),
1211 [wp3]"r"(&p[d + nsize * 3]),
1212 [wq3]"r"(&q[d + nsize * 3]),
1213 [wp4]"r"(&p[d + nsize * 4]),
1214 [wq4]"r"(&q[d + nsize * 4]),
1215 [wp5]"r"(&p[d + nsize * 5]),
1216 [wq5]"r"(&q[d + nsize * 5]),
1217 [wp6]"r"(&p[d + nsize * 6]),
1218 [wq6]"r"(&q[d + nsize * 6]),
1219 [wp7]"r"(&p[d + nsize * 7]),
1220 [wq7]"r"(&q[d + nsize * 7])
1221 );
1222 }
1223}
1224
1225RAID6_RVV_WRAPPER(1);
1226RAID6_RVV_WRAPPER(2);
1227RAID6_RVV_WRAPPER(4);
1228RAID6_RVV_WRAPPER(8);