Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

raid6: riscv: Allow code to be compiled in userspace

To support userspace raid6test, this patch adds __KERNEL__ ifdef for kernel
header inclusions also userspace wrapper definitions to allow code to be
compiled in userspace.

This patch also drops the NSIZE macro, instead of using the vector length,
which can work for both kernel and user space.

Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
Link: https://patch.msgid.link/20250718072711.3865118-5-zhangchunyan@iscas.ac.cn
Signed-off-by: Paul Walmsley <pjw@kernel.org>

authored by

Chunyan Zhang and committed by
Paul Walmsley
3c58d7a5 6fcce9f0

+170 -151
+1 -6
lib/raid6/recov_rvv.c
··· 4 4 * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn> 5 5 */ 6 6 7 - #include <asm/vector.h> 8 7 #include <linux/raid/pq.h> 9 - 10 - static int rvv_has_vector(void) 11 - { 12 - return has_vector(); 13 - } 8 + #include "rvv.h" 14 9 15 10 static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp, 16 11 u8 *dq, const u8 *pbmul,
+152 -145
lib/raid6/rvv.c
··· 9 9 * Copyright 2002-2004 H. Peter Anvin 10 10 */ 11 11 12 - #include <asm/vector.h> 13 - #include <linux/raid/pq.h> 14 12 #include "rvv.h" 15 - 16 - #define NSIZE (riscv_v_vsize / 32) /* NSIZE = vlenb */ 17 - 18 - static int rvv_has_vector(void) 19 - { 20 - return has_vector(); 21 - } 22 13 23 14 #ifdef __riscv_vector 24 15 #error "This code must be built without compiler support for vector" ··· 19 28 { 20 29 u8 **dptr = (u8 **)ptrs; 21 30 u8 *p, *q; 22 - unsigned long vl, d; 31 + unsigned long vl, d, nsize; 23 32 int z, z0; 24 33 25 34 z0 = disks - 3; /* Highest data disk */ ··· 33 42 : "=&r" (vl) 34 43 ); 35 44 45 + nsize = vl; 46 + 36 47 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ 37 - for (d = 0; d < bytes; d += NSIZE * 1) { 48 + for (d = 0; d < bytes; d += nsize * 1) { 38 49 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 39 50 asm volatile (".option push\n" 40 51 ".option arch,+v\n" ··· 44 51 "vmv.v.v v1, v0\n" 45 52 ".option pop\n" 46 53 : : 47 - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) 54 + [wp0]"r"(&dptr[z0][d + 0 * nsize]) 48 55 ); 49 56 50 57 for (z = z0 - 1 ; z >= 0 ; z--) { ··· 68 75 "vxor.vv v0, v0, v2\n" 69 76 ".option pop\n" 70 77 : : 71 - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 78 + [wd0]"r"(&dptr[z][d + 0 * nsize]), 72 79 [x1d]"r"(0x1d) 73 80 ); 74 81 } ··· 83 90 "vse8.v v1, (%[wq0])\n" 84 91 ".option pop\n" 85 92 : : 86 - [wp0]"r"(&p[d + NSIZE * 0]), 87 - [wq0]"r"(&q[d + NSIZE * 0]) 93 + [wp0]"r"(&p[d + nsize * 0]), 94 + [wq0]"r"(&q[d + nsize * 0]) 88 95 ); 89 96 } 90 97 } ··· 94 101 { 95 102 u8 **dptr = (u8 **)ptrs; 96 103 u8 *p, *q; 97 - unsigned long vl, d; 104 + unsigned long vl, d, nsize; 98 105 int z, z0; 99 106 100 107 z0 = stop; /* P/Q right side optimization */ ··· 108 115 : "=&r" (vl) 109 116 ); 110 117 118 + nsize = vl; 119 + 111 120 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ 112 - for (d = 0 ; d < bytes ; d += NSIZE * 1) { 121 + for (d = 0 ; d < bytes ; d += nsize * 1) { 113 122 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 114 123 asm volatile (".option push\n" 115 124 ".option arch,+v\n" ··· 119 124 "vmv.v.v v1, v0\n" 120 125 ".option pop\n" 121 126 : : 122 - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) 127 + [wp0]"r"(&dptr[z0][d + 0 * nsize]) 123 128 ); 124 129 125 130 /* P/Q data pages */ ··· 144 149 "vxor.vv v0, v0, v2\n" 145 150 ".option pop\n" 146 151 : : 147 - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 152 + [wd0]"r"(&dptr[z][d + 0 * nsize]), 148 153 [x1d]"r"(0x1d) 149 154 ); 150 155 } ··· 184 189 "vse8.v v3, (%[wq0])\n" 185 190 ".option pop\n" 186 191 : : 187 - [wp0]"r"(&p[d + NSIZE * 0]), 188 - [wq0]"r"(&q[d + NSIZE * 0]) 192 + [wp0]"r"(&p[d + nsize * 0]), 193 + [wq0]"r"(&q[d + nsize * 0]) 189 194 ); 190 195 } 191 196 } ··· 194 199 { 195 200 u8 **dptr = (u8 **)ptrs; 196 201 u8 *p, *q; 197 - unsigned long vl, d; 202 + unsigned long vl, d, nsize; 198 203 int z, z0; 199 204 200 205 z0 = disks - 3; /* Highest data disk */ ··· 208 213 : "=&r" (vl) 209 214 ); 210 215 216 + nsize = vl; 217 + 211 218 /* 212 219 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 213 220 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 214 221 */ 215 - for (d = 0; d < bytes; d += NSIZE * 2) { 222 + for (d = 0; d < bytes; d += nsize * 2) { 216 223 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 217 224 asm volatile (".option push\n" 218 225 ".option arch,+v\n" ··· 224 227 "vmv.v.v v5, v4\n" 225 228 ".option pop\n" 226 229 : : 227 - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 228 - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) 230 + [wp0]"r"(&dptr[z0][d + 0 * nsize]), 231 + [wp1]"r"(&dptr[z0][d + 1 * nsize]) 229 232 ); 230 233 231 234 for (z = z0 - 1; z >= 0; z--) { ··· 257 260 "vxor.vv v4, v4, v6\n" 258 261 ".option pop\n" 259 262 : : 260 - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 261 - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 263 + [wd0]"r"(&dptr[z][d + 0 * nsize]), 264 + [wd1]"r"(&dptr[z][d + 1 * nsize]), 262 265 [x1d]"r"(0x1d) 263 266 ); 264 267 } ··· 275 278 "vse8.v v5, (%[wq1])\n" 276 279 ".option pop\n" 277 280 : : 278 - [wp0]"r"(&p[d + NSIZE * 0]), 279 - [wq0]"r"(&q[d + NSIZE * 0]), 280 - [wp1]"r"(&p[d + NSIZE * 1]), 281 - [wq1]"r"(&q[d + NSIZE * 1]) 281 + [wp0]"r"(&p[d + nsize * 0]), 282 + [wq0]"r"(&q[d + nsize * 0]), 283 + [wp1]"r"(&p[d + nsize * 1]), 284 + [wq1]"r"(&q[d + nsize * 1]) 282 285 ); 283 286 } 284 287 } ··· 288 291 { 289 292 u8 **dptr = (u8 **)ptrs; 290 293 u8 *p, *q; 291 - unsigned long vl, d; 294 + unsigned long vl, d, nsize; 292 295 int z, z0; 293 296 294 297 z0 = stop; /* P/Q right side optimization */ ··· 302 305 : "=&r" (vl) 303 306 ); 304 307 308 + nsize = vl; 309 + 305 310 /* 306 311 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 307 312 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 308 313 */ 309 - for (d = 0; d < bytes; d += NSIZE * 2) { 314 + for (d = 0; d < bytes; d += nsize * 2) { 310 315 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 311 316 asm volatile (".option push\n" 312 317 ".option arch,+v\n" ··· 318 319 "vmv.v.v v5, v4\n" 319 320 ".option pop\n" 320 321 : : 321 - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 322 - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) 322 + [wp0]"r"(&dptr[z0][d + 0 * nsize]), 323 + [wp1]"r"(&dptr[z0][d + 1 * nsize]) 323 324 ); 324 325 325 326 /* P/Q data pages */ ··· 352 353 "vxor.vv v4, v4, v6\n" 353 354 ".option pop\n" 354 355 : : 355 - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 356 - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 356 + [wd0]"r"(&dptr[z][d + 0 * nsize]), 357 + [wd1]"r"(&dptr[z][d + 1 * nsize]), 357 358 [x1d]"r"(0x1d) 358 359 ); 359 360 } ··· 406 407 "vse8.v v7, (%[wq1])\n" 407 408 ".option pop\n" 408 409 : : 409 - [wp0]"r"(&p[d + NSIZE * 0]), 410 - [wq0]"r"(&q[d + NSIZE * 0]), 411 - [wp1]"r"(&p[d + NSIZE * 1]), 412 - [wq1]"r"(&q[d + NSIZE * 1]) 410 + [wp0]"r"(&p[d + nsize * 0]), 411 + [wq0]"r"(&q[d + nsize * 0]), 412 + [wp1]"r"(&p[d + nsize * 1]), 413 + [wq1]"r"(&q[d + nsize * 1]) 413 414 ); 414 415 } 415 416 } ··· 418 419 { 419 420 u8 **dptr = (u8 **)ptrs; 420 421 u8 *p, *q; 421 - unsigned long vl, d; 422 + unsigned long vl, d, nsize; 422 423 int z, z0; 423 424 424 425 z0 = disks - 3; /* Highest data disk */ ··· 432 433 : "=&r" (vl) 433 434 ); 434 435 436 + nsize = vl; 437 + 435 438 /* 436 439 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 437 440 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 438 441 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 439 442 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 440 443 */ 441 - for (d = 0; d < bytes; d += NSIZE * 4) { 444 + for (d = 0; d < bytes; d += nsize * 4) { 442 445 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 443 446 asm volatile (".option push\n" 444 447 ".option arch,+v\n" ··· 454 453 "vmv.v.v v13, v12\n" 455 454 ".option pop\n" 456 455 : : 457 - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 458 - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 459 - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 460 - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) 456 + [wp0]"r"(&dptr[z0][d + 0 * nsize]), 457 + [wp1]"r"(&dptr[z0][d + 1 * nsize]), 458 + [wp2]"r"(&dptr[z0][d + 2 * nsize]), 459 + [wp3]"r"(&dptr[z0][d + 3 * nsize]) 461 460 ); 462 461 463 462 for (z = z0 - 1; z >= 0; z--) { ··· 505 504 "vxor.vv v12, v12, v14\n" 506 505 ".option pop\n" 507 506 : : 508 - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 509 - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 510 - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 511 - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 507 + [wd0]"r"(&dptr[z][d + 0 * nsize]), 508 + [wd1]"r"(&dptr[z][d + 1 * nsize]), 509 + [wd2]"r"(&dptr[z][d + 2 * nsize]), 510 + [wd3]"r"(&dptr[z][d + 3 * nsize]), 512 511 [x1d]"r"(0x1d) 513 512 ); 514 513 } ··· 529 528 "vse8.v v13, (%[wq3])\n" 530 529 ".option pop\n" 531 530 : : 532 - [wp0]"r"(&p[d + NSIZE * 0]), 533 - [wq0]"r"(&q[d + NSIZE * 0]), 534 - [wp1]"r"(&p[d + NSIZE * 1]), 535 - [wq1]"r"(&q[d + NSIZE * 1]), 536 - [wp2]"r"(&p[d + NSIZE * 2]), 537 - [wq2]"r"(&q[d + NSIZE * 2]), 538 - [wp3]"r"(&p[d + NSIZE * 3]), 539 - [wq3]"r"(&q[d + NSIZE * 3]) 531 + [wp0]"r"(&p[d + nsize * 0]), 532 + [wq0]"r"(&q[d + nsize * 0]), 533 + [wp1]"r"(&p[d + nsize * 1]), 534 + [wq1]"r"(&q[d + nsize * 1]), 535 + [wp2]"r"(&p[d + nsize * 2]), 536 + [wq2]"r"(&q[d + nsize * 2]), 537 + [wp3]"r"(&p[d + nsize * 3]), 538 + [wq3]"r"(&q[d + nsize * 3]) 540 539 ); 541 540 } 542 541 } ··· 546 545 { 547 546 u8 **dptr = (u8 **)ptrs; 548 547 u8 *p, *q; 549 - unsigned long vl, d; 548 + unsigned long vl, d, nsize; 550 549 int z, z0; 551 550 552 551 z0 = stop; /* P/Q right side optimization */ ··· 560 559 : "=&r" (vl) 561 560 ); 562 561 562 + nsize = vl; 563 + 563 564 /* 564 565 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 565 566 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 566 567 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 567 568 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 568 569 */ 569 - for (d = 0; d < bytes; d += NSIZE * 4) { 570 + for (d = 0; d < bytes; d += nsize * 4) { 570 571 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 571 572 asm volatile (".option push\n" 572 573 ".option arch,+v\n" ··· 582 579 "vmv.v.v v13, v12\n" 583 580 ".option pop\n" 584 581 : : 585 - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 586 - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 587 - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 588 - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) 582 + [wp0]"r"(&dptr[z0][d + 0 * nsize]), 583 + [wp1]"r"(&dptr[z0][d + 1 * nsize]), 584 + [wp2]"r"(&dptr[z0][d + 2 * nsize]), 585 + [wp3]"r"(&dptr[z0][d + 3 * nsize]) 589 586 ); 590 587 591 588 /* P/Q data pages */ ··· 634 631 "vxor.vv v12, v12, v14\n" 635 632 ".option pop\n" 636 633 : : 637 - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 638 - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 639 - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 640 - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 634 + [wd0]"r"(&dptr[z][d + 0 * nsize]), 635 + [wd1]"r"(&dptr[z][d + 1 * nsize]), 636 + [wd2]"r"(&dptr[z][d + 2 * nsize]), 637 + [wd3]"r"(&dptr[z][d + 3 * nsize]), 641 638 [x1d]"r"(0x1d) 642 639 ); 643 640 } ··· 716 713 "vse8.v v15, (%[wq3])\n" 717 714 ".option pop\n" 718 715 : : 719 - [wp0]"r"(&p[d + NSIZE * 0]), 720 - [wq0]"r"(&q[d + NSIZE * 0]), 721 - [wp1]"r"(&p[d + NSIZE * 1]), 722 - [wq1]"r"(&q[d + NSIZE * 1]), 723 - [wp2]"r"(&p[d + NSIZE * 2]), 724 - [wq2]"r"(&q[d + NSIZE * 2]), 725 - [wp3]"r"(&p[d + NSIZE * 3]), 726 - [wq3]"r"(&q[d + NSIZE * 3]) 716 + [wp0]"r"(&p[d + nsize * 0]), 717 + [wq0]"r"(&q[d + nsize * 0]), 718 + [wp1]"r"(&p[d + nsize * 1]), 719 + [wq1]"r"(&q[d + nsize * 1]), 720 + [wp2]"r"(&p[d + nsize * 2]), 721 + [wq2]"r"(&q[d + nsize * 2]), 722 + [wp3]"r"(&p[d + nsize * 3]), 723 + [wq3]"r"(&q[d + nsize * 3]) 727 724 ); 728 725 } 729 726 } ··· 732 729 { 733 730 u8 **dptr = (u8 **)ptrs; 734 731 u8 *p, *q; 735 - unsigned long vl, d; 732 + unsigned long vl, d, nsize; 736 733 int z, z0; 737 734 738 735 z0 = disks - 3; /* Highest data disk */ ··· 746 743 : "=&r" (vl) 747 744 ); 748 745 746 + nsize = vl; 747 + 749 748 /* 750 749 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 751 750 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 ··· 758 753 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 759 754 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 760 755 */ 761 - for (d = 0; d < bytes; d += NSIZE * 8) { 756 + for (d = 0; d < bytes; d += nsize * 8) { 762 757 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 763 758 asm volatile (".option push\n" 764 759 ".option arch,+v\n" ··· 780 775 "vmv.v.v v29, v28\n" 781 776 ".option pop\n" 782 777 : : 783 - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 784 - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 785 - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 786 - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), 787 - [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), 788 - [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), 789 - [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), 790 - [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) 778 + [wp0]"r"(&dptr[z0][d + 0 * nsize]), 779 + [wp1]"r"(&dptr[z0][d + 1 * nsize]), 780 + [wp2]"r"(&dptr[z0][d + 2 * nsize]), 781 + [wp3]"r"(&dptr[z0][d + 3 * nsize]), 782 + [wp4]"r"(&dptr[z0][d + 4 * nsize]), 783 + [wp5]"r"(&dptr[z0][d + 5 * nsize]), 784 + [wp6]"r"(&dptr[z0][d + 6 * nsize]), 785 + [wp7]"r"(&dptr[z0][d + 7 * nsize]) 791 786 ); 792 787 793 788 for (z = z0 - 1; z >= 0; z--) { ··· 867 862 "vxor.vv v28, v28, v30\n" 868 863 ".option pop\n" 869 864 : : 870 - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 871 - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 872 - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 873 - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 874 - [wd4]"r"(&dptr[z][d + 4 * NSIZE]), 875 - [wd5]"r"(&dptr[z][d + 5 * NSIZE]), 876 - [wd6]"r"(&dptr[z][d + 6 * NSIZE]), 877 - [wd7]"r"(&dptr[z][d + 7 * NSIZE]), 865 + [wd0]"r"(&dptr[z][d + 0 * nsize]), 866 + [wd1]"r"(&dptr[z][d + 1 * nsize]), 867 + [wd2]"r"(&dptr[z][d + 2 * nsize]), 868 + [wd3]"r"(&dptr[z][d + 3 * nsize]), 869 + [wd4]"r"(&dptr[z][d + 4 * nsize]), 870 + [wd5]"r"(&dptr[z][d + 5 * nsize]), 871 + [wd6]"r"(&dptr[z][d + 6 * nsize]), 872 + [wd7]"r"(&dptr[z][d + 7 * nsize]), 878 873 [x1d]"r"(0x1d) 879 874 ); 880 875 } ··· 903 898 "vse8.v v29, (%[wq7])\n" 904 899 ".option pop\n" 905 900 : : 906 - [wp0]"r"(&p[d + NSIZE * 0]), 907 - [wq0]"r"(&q[d + NSIZE * 0]), 908 - [wp1]"r"(&p[d + NSIZE * 1]), 909 - [wq1]"r"(&q[d + NSIZE * 1]), 910 - [wp2]"r"(&p[d + NSIZE * 2]), 911 - [wq2]"r"(&q[d + NSIZE * 2]), 912 - [wp3]"r"(&p[d + NSIZE * 3]), 913 - [wq3]"r"(&q[d + NSIZE * 3]), 914 - [wp4]"r"(&p[d + NSIZE * 4]), 915 - [wq4]"r"(&q[d + NSIZE * 4]), 916 - [wp5]"r"(&p[d + NSIZE * 5]), 917 - [wq5]"r"(&q[d + NSIZE * 5]), 918 - [wp6]"r"(&p[d + NSIZE * 6]), 919 - [wq6]"r"(&q[d + NSIZE * 6]), 920 - [wp7]"r"(&p[d + NSIZE * 7]), 921 - [wq7]"r"(&q[d + NSIZE * 7]) 901 + [wp0]"r"(&p[d + nsize * 0]), 902 + [wq0]"r"(&q[d + nsize * 0]), 903 + [wp1]"r"(&p[d + nsize * 1]), 904 + [wq1]"r"(&q[d + nsize * 1]), 905 + [wp2]"r"(&p[d + nsize * 2]), 906 + [wq2]"r"(&q[d + nsize * 2]), 907 + [wp3]"r"(&p[d + nsize * 3]), 908 + [wq3]"r"(&q[d + nsize * 3]), 909 + [wp4]"r"(&p[d + nsize * 4]), 910 + [wq4]"r"(&q[d + nsize * 4]), 911 + [wp5]"r"(&p[d + nsize * 5]), 912 + [wq5]"r"(&q[d + nsize * 5]), 913 + [wp6]"r"(&p[d + nsize * 6]), 914 + [wq6]"r"(&q[d + nsize * 6]), 915 + [wp7]"r"(&p[d + nsize * 7]), 916 + [wq7]"r"(&q[d + nsize * 7]) 922 917 ); 923 918 } 924 919 } ··· 928 923 { 929 924 u8 **dptr = (u8 **)ptrs; 930 925 u8 *p, *q; 931 - unsigned long vl, d; 926 + unsigned long vl, d, nsize; 932 927 int z, z0; 933 928 934 929 z0 = stop; /* P/Q right side optimization */ ··· 942 937 : "=&r" (vl) 943 938 ); 944 939 940 + nsize = vl; 941 + 945 942 /* 946 943 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 947 944 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 ··· 954 947 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 955 948 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 956 949 */ 957 - for (d = 0; d < bytes; d += NSIZE * 8) { 950 + for (d = 0; d < bytes; d += nsize * 8) { 958 951 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 959 952 asm volatile (".option push\n" 960 953 ".option arch,+v\n" ··· 976 969 "vmv.v.v v29, v28\n" 977 970 ".option pop\n" 978 971 : : 979 - [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 980 - [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 981 - [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 982 - [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), 983 - [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), 984 - [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), 985 - [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), 986 - [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) 972 + [wp0]"r"(&dptr[z0][d + 0 * nsize]), 973 + [wp1]"r"(&dptr[z0][d + 1 * nsize]), 974 + [wp2]"r"(&dptr[z0][d + 2 * nsize]), 975 + [wp3]"r"(&dptr[z0][d + 3 * nsize]), 976 + [wp4]"r"(&dptr[z0][d + 4 * nsize]), 977 + [wp5]"r"(&dptr[z0][d + 5 * nsize]), 978 + [wp6]"r"(&dptr[z0][d + 6 * nsize]), 979 + [wp7]"r"(&dptr[z0][d + 7 * nsize]) 987 980 ); 988 981 989 982 /* P/Q data pages */ ··· 1064 1057 "vxor.vv v28, v28, v30\n" 1065 1058 ".option pop\n" 1066 1059 : : 1067 - [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 1068 - [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 1069 - [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 1070 - [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 1071 - [wd4]"r"(&dptr[z][d + 4 * NSIZE]), 1072 - [wd5]"r"(&dptr[z][d + 5 * NSIZE]), 1073 - [wd6]"r"(&dptr[z][d + 6 * NSIZE]), 1074 - [wd7]"r"(&dptr[z][d + 7 * NSIZE]), 1060 + [wd0]"r"(&dptr[z][d + 0 * nsize]), 1061 + [wd1]"r"(&dptr[z][d + 1 * nsize]), 1062 + [wd2]"r"(&dptr[z][d + 2 * nsize]), 1063 + [wd3]"r"(&dptr[z][d + 3 * nsize]), 1064 + [wd4]"r"(&dptr[z][d + 4 * nsize]), 1065 + [wd5]"r"(&dptr[z][d + 5 * nsize]), 1066 + [wd6]"r"(&dptr[z][d + 6 * nsize]), 1067 + [wd7]"r"(&dptr[z][d + 7 * nsize]), 1075 1068 [x1d]"r"(0x1d) 1076 1069 ); 1077 1070 } ··· 1202 1195 "vse8.v v31, (%[wq7])\n" 1203 1196 ".option pop\n" 1204 1197 : : 1205 - [wp0]"r"(&p[d + NSIZE * 0]), 1206 - [wq0]"r"(&q[d + NSIZE * 0]), 1207 - [wp1]"r"(&p[d + NSIZE * 1]), 1208 - [wq1]"r"(&q[d + NSIZE * 1]), 1209 - [wp2]"r"(&p[d + NSIZE * 2]), 1210 - [wq2]"r"(&q[d + NSIZE * 2]), 1211 - [wp3]"r"(&p[d + NSIZE * 3]), 1212 - [wq3]"r"(&q[d + NSIZE * 3]), 1213 - [wp4]"r"(&p[d + NSIZE * 4]), 1214 - [wq4]"r"(&q[d + NSIZE * 4]), 1215 - [wp5]"r"(&p[d + NSIZE * 5]), 1216 - [wq5]"r"(&q[d + NSIZE * 5]), 1217 - [wp6]"r"(&p[d + NSIZE * 6]), 1218 - [wq6]"r"(&q[d + NSIZE * 6]), 1219 - [wp7]"r"(&p[d + NSIZE * 7]), 1220 - [wq7]"r"(&q[d + NSIZE * 7]) 1198 + [wp0]"r"(&p[d + nsize * 0]), 1199 + [wq0]"r"(&q[d + nsize * 0]), 1200 + [wp1]"r"(&p[d + nsize * 1]), 1201 + [wq1]"r"(&q[d + nsize * 1]), 1202 + [wp2]"r"(&p[d + nsize * 2]), 1203 + [wq2]"r"(&q[d + nsize * 2]), 1204 + [wp3]"r"(&p[d + nsize * 3]), 1205 + [wq3]"r"(&q[d + nsize * 3]), 1206 + [wp4]"r"(&p[d + nsize * 4]), 1207 + [wq4]"r"(&q[d + nsize * 4]), 1208 + [wp5]"r"(&p[d + nsize * 5]), 1209 + [wq5]"r"(&q[d + nsize * 5]), 1210 + [wp6]"r"(&p[d + nsize * 6]), 1211 + [wq6]"r"(&q[d + nsize * 6]), 1212 + [wp7]"r"(&p[d + nsize * 7]), 1213 + [wq7]"r"(&q[d + nsize * 7]) 1221 1214 ); 1222 1215 } 1223 1216 }
+17
lib/raid6/rvv.h
··· 7 7 * Definitions for RISC-V RAID-6 code 8 8 */ 9 9 10 + #ifdef __KERNEL__ 11 + #include <asm/vector.h> 12 + #else 13 + #define kernel_vector_begin() 14 + #define kernel_vector_end() 15 + #include <sys/auxv.h> 16 + #include <asm/hwcap.h> 17 + #define has_vector() (getauxval(AT_HWCAP) & COMPAT_HWCAP_ISA_V) 18 + #endif 19 + 20 + #include <linux/raid/pq.h> 21 + 22 + static int rvv_has_vector(void) 23 + { 24 + return has_vector(); 25 + } 26 + 10 27 #define RAID6_RVV_WRAPPER(_n) \ 11 28 static void raid6_rvv ## _n ## _gen_syndrome(int disks, \ 12 29 size_t bytes, void **ptrs) \