jcs's openbsd hax
openbsd
at rk3128 832 lines 21 kB view raw
1/* $OpenBSD: softraid_raid6.c,v 1.74 2025/06/13 13:00:49 jsg Exp $ */ 2/* 3 * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include "bio.h" 20 21#include <sys/param.h> 22#include <sys/systm.h> 23#include <sys/buf.h> 24#include <sys/device.h> 25#include <sys/ioctl.h> 26#include <sys/malloc.h> 27#include <sys/kernel.h> 28#include <sys/disk.h> 29#include <sys/rwlock.h> 30#include <sys/queue.h> 31#include <sys/fcntl.h> 32#include <sys/mount.h> 33#include <sys/sensors.h> 34#include <sys/stat.h> 35#include <sys/task.h> 36#include <sys/conf.h> 37#include <sys/uio.h> 38 39#include <scsi/scsi_all.h> 40#include <scsi/scsiconf.h> 41#include <scsi/scsi_disk.h> 42 43#include <dev/softraidvar.h> 44 45uint8_t *gf_map[256]; 46uint8_t gf_pow[768]; 47int gf_log[256]; 48 49/* RAID 6 functions. */ 50int sr_raid6_create(struct sr_discipline *, struct bioc_createraid *, 51 int, int64_t); 52int sr_raid6_assemble(struct sr_discipline *, struct bioc_createraid *, 53 int, void *); 54int sr_raid6_init(struct sr_discipline *); 55int sr_raid6_rw(struct sr_workunit *); 56int sr_raid6_openings(struct sr_discipline *); 57void sr_raid6_intr(struct buf *); 58int sr_raid6_wu_done(struct sr_workunit *); 59void sr_raid6_set_chunk_state(struct sr_discipline *, int, int); 60void sr_raid6_set_vol_state(struct sr_discipline *); 61 62void sr_raid6_xorp(void *, void *, int); 63void sr_raid6_xorq(void *, void *, int, int); 64int sr_raid6_addio(struct sr_workunit *wu, int, daddr_t, long, 65 void *, int, int, void *, void *, int); 66 67void gf_init(void); 68uint8_t gf_inv(uint8_t); 69int gf_premul(uint8_t); 70uint8_t gf_mul(uint8_t, uint8_t); 71 72#define SR_NOFAIL 0x00 73#define SR_FAILX (1L << 0) 74#define SR_FAILY (1L << 1) 75#define SR_FAILP (1L << 2) 76#define SR_FAILQ (1L << 3) 77 78struct sr_raid6_opaque { 79 int gn; 80 void *pbuf; 81 void *qbuf; 82}; 83 84/* discipline initialisation. */ 85void 86sr_raid6_discipline_init(struct sr_discipline *sd) 87{ 88 /* Initialize GF256 tables. */ 89 gf_init(); 90 91 /* Fill out discipline members. */ 92 sd->sd_type = SR_MD_RAID6; 93 strlcpy(sd->sd_name, "RAID 6", sizeof(sd->sd_name)); 94 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 95 SR_CAP_REDUNDANT; 96 sd->sd_max_wu = SR_RAID6_NOWU; 97 98 /* Setup discipline specific function pointers. */ 99 sd->sd_assemble = sr_raid6_assemble; 100 sd->sd_create = sr_raid6_create; 101 sd->sd_openings = sr_raid6_openings; 102 sd->sd_scsi_rw = sr_raid6_rw; 103 sd->sd_scsi_intr = sr_raid6_intr; 104 sd->sd_scsi_wu_done = sr_raid6_wu_done; 105 sd->sd_set_chunk_state = sr_raid6_set_chunk_state; 106 sd->sd_set_vol_state = sr_raid6_set_vol_state; 107} 108 109int 110sr_raid6_create(struct sr_discipline *sd, struct bioc_createraid *bc, 111 int no_chunk, int64_t coerced_size) 112{ 113 if (no_chunk < 4) { 114 sr_error(sd->sd_sc, "%s requires four or more chunks", 115 sd->sd_name); 116 return EINVAL; 117 } 118 119 /* 120 * XXX add variable strip size later even though MAXPHYS is really 121 * the clever value, users like * to tinker with that type of stuff. 122 */ 123 sd->sd_meta->ssdi.ssd_strip_size = MAXPHYS; 124 sd->sd_meta->ssdi.ssd_size = (coerced_size & 125 ~(((u_int64_t)sd->sd_meta->ssdi.ssd_strip_size >> 126 DEV_BSHIFT) - 1)) * (no_chunk - 2); 127 128 return sr_raid6_init(sd); 129} 130 131int 132sr_raid6_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 133 int no_chunk, void *data) 134{ 135 return sr_raid6_init(sd); 136} 137 138int 139sr_raid6_init(struct sr_discipline *sd) 140{ 141 /* Initialise runtime values. */ 142 sd->mds.mdd_raid6.sr6_strip_bits = 143 sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size); 144 if (sd->mds.mdd_raid6.sr6_strip_bits == -1) { 145 sr_error(sd->sd_sc, "invalid strip size"); 146 return EINVAL; 147 } 148 149 /* only if stripsize <= MAXPHYS */ 150 sd->sd_max_ccb_per_wu = max(6, 2 * sd->sd_meta->ssdi.ssd_chunk_no); 151 152 return 0; 153} 154 155int 156sr_raid6_openings(struct sr_discipline *sd) 157{ 158 return (sd->sd_max_wu >> 1); /* 2 wu's per IO */ 159} 160 161void 162sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 163{ 164 int old_state, s; 165 166 /* XXX this is for RAID 0 */ 167 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 168 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 169 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 170 171 /* ok to go to splbio since this only happens in error path */ 172 s = splbio(); 173 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 174 175 /* multiple IOs to the same chunk that fail will come through here */ 176 if (old_state == new_state) 177 goto done; 178 179 switch (old_state) { 180 case BIOC_SDONLINE: 181 switch (new_state) { 182 case BIOC_SDOFFLINE: 183 case BIOC_SDSCRUB: 184 break; 185 default: 186 goto die; 187 } 188 break; 189 190 case BIOC_SDOFFLINE: 191 if (new_state == BIOC_SDREBUILD) { 192 ; 193 } else 194 goto die; 195 break; 196 197 case BIOC_SDSCRUB: 198 switch (new_state) { 199 case BIOC_SDONLINE: 200 case BIOC_SDOFFLINE: 201 break; 202 default: 203 goto die; 204 } 205 break; 206 207 case BIOC_SDREBUILD: 208 switch (new_state) { 209 case BIOC_SDONLINE: 210 case BIOC_SDOFFLINE: 211 break; 212 default: 213 goto die; 214 } 215 break; 216 217 default: 218die: 219 splx(s); /* XXX */ 220 panic("%s: %s: %s: invalid chunk state transition %d -> %d", 221 DEVNAME(sd->sd_sc), 222 sd->sd_meta->ssd_devname, 223 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 224 old_state, new_state); 225 /* NOTREACHED */ 226 } 227 228 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 229 sd->sd_set_vol_state(sd); 230 231 sd->sd_must_flush = 1; 232 task_add(systq, &sd->sd_meta_save_task); 233done: 234 splx(s); 235} 236 237void 238sr_raid6_set_vol_state(struct sr_discipline *sd) 239{ 240 int states[SR_MAX_STATES]; 241 int new_state, i, s, nd; 242 int old_state = sd->sd_vol_status; 243 244 /* XXX this is for RAID 0 */ 245 246 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 247 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 248 249 nd = sd->sd_meta->ssdi.ssd_chunk_no; 250 251 for (i = 0; i < SR_MAX_STATES; i++) 252 states[i] = 0; 253 254 for (i = 0; i < nd; i++) { 255 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 256 if (s >= SR_MAX_STATES) 257 panic("%s: %s: %s: invalid chunk state", 258 DEVNAME(sd->sd_sc), 259 sd->sd_meta->ssd_devname, 260 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 261 states[s]++; 262 } 263 264 if (states[BIOC_SDONLINE] == nd) 265 new_state = BIOC_SVONLINE; 266 else if (states[BIOC_SDONLINE] < nd - 2) 267 new_state = BIOC_SVOFFLINE; 268 else if (states[BIOC_SDSCRUB] != 0) 269 new_state = BIOC_SVSCRUB; 270 else if (states[BIOC_SDREBUILD] != 0) 271 new_state = BIOC_SVREBUILD; 272 else if (states[BIOC_SDONLINE] < nd) 273 new_state = BIOC_SVDEGRADED; 274 else { 275 printf("old_state = %d, ", old_state); 276 for (i = 0; i < nd; i++) 277 printf("%d = %d, ", i, 278 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 279 panic("invalid new_state"); 280 } 281 282 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", 283 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 284 old_state, new_state); 285 286 switch (old_state) { 287 case BIOC_SVONLINE: 288 switch (new_state) { 289 case BIOC_SVONLINE: /* can go to same state */ 290 case BIOC_SVOFFLINE: 291 case BIOC_SVDEGRADED: 292 case BIOC_SVREBUILD: /* happens on boot */ 293 break; 294 default: 295 goto die; 296 } 297 break; 298 299 case BIOC_SVOFFLINE: 300 /* XXX this might be a little too much */ 301 goto die; 302 303 case BIOC_SVDEGRADED: 304 switch (new_state) { 305 case BIOC_SVOFFLINE: 306 case BIOC_SVREBUILD: 307 case BIOC_SVDEGRADED: /* can go to the same state */ 308 break; 309 default: 310 goto die; 311 } 312 break; 313 314 case BIOC_SVBUILDING: 315 switch (new_state) { 316 case BIOC_SVONLINE: 317 case BIOC_SVOFFLINE: 318 case BIOC_SVBUILDING: /* can go to the same state */ 319 break; 320 default: 321 goto die; 322 } 323 break; 324 325 case BIOC_SVSCRUB: 326 switch (new_state) { 327 case BIOC_SVONLINE: 328 case BIOC_SVOFFLINE: 329 case BIOC_SVDEGRADED: 330 case BIOC_SVSCRUB: /* can go to same state */ 331 break; 332 default: 333 goto die; 334 } 335 break; 336 337 case BIOC_SVREBUILD: 338 switch (new_state) { 339 case BIOC_SVONLINE: 340 case BIOC_SVOFFLINE: 341 case BIOC_SVDEGRADED: 342 case BIOC_SVREBUILD: /* can go to the same state */ 343 break; 344 default: 345 goto die; 346 } 347 break; 348 349 default: 350die: 351 panic("%s: %s: invalid volume state transition %d -> %d", 352 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 353 old_state, new_state); 354 /* NOTREACHED */ 355 } 356 357 sd->sd_vol_status = new_state; 358} 359 360/* modes: 361 * readq: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 362 * 0, qbuf, NULL, 0); 363 * readp: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 364 * 0, pbuf, NULL, 0); 365 * readx: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN, 366 * 0, pbuf, qbuf, gf_pow[i]); 367 */ 368 369int 370sr_raid6_rw(struct sr_workunit *wu) 371{ 372 struct sr_workunit *wu_r = NULL; 373 struct sr_discipline *sd = wu->swu_dis; 374 struct scsi_xfer *xs = wu->swu_xs; 375 struct sr_chunk *scp; 376 int s, fail, i, gxinv, pxinv; 377 daddr_t blkno, lba; 378 int64_t chunk_offs, lbaoffs, offset, strip_offs; 379 int64_t strip_no, strip_size, strip_bits, row_size; 380 int64_t fchunk, no_chunk, chunk, qchunk, pchunk; 381 long length, datalen; 382 void *pbuf, *data, *qbuf; 383 384 /* blkno and scsi error will be handled by sr_validate_io */ 385 if (sr_validate_io(wu, &blkno, "sr_raid6_rw")) 386 goto bad; 387 388 strip_size = sd->sd_meta->ssdi.ssd_strip_size; 389 strip_bits = sd->mds.mdd_raid6.sr6_strip_bits; 390 no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 2; 391 row_size = (no_chunk << strip_bits) >> DEV_BSHIFT; 392 393 data = xs->data; 394 datalen = xs->datalen; 395 lbaoffs = blkno << DEV_BSHIFT; 396 397 if (xs->flags & SCSI_DATA_OUT) { 398 if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP)) == NULL){ 399 printf("%s: can't get wu_r", DEVNAME(sd->sd_sc)); 400 goto bad; 401 } 402 wu_r->swu_state = SR_WU_INPROGRESS; 403 wu_r->swu_flags |= SR_WUF_DISCIPLINE; 404 } 405 406 wu->swu_blk_start = 0; 407 while (datalen != 0) { 408 strip_no = lbaoffs >> strip_bits; 409 strip_offs = lbaoffs & (strip_size - 1); 410 chunk_offs = (strip_no / no_chunk) << strip_bits; 411 offset = chunk_offs + strip_offs; 412 413 /* get size remaining in this stripe */ 414 length = MIN(strip_size - strip_offs, datalen); 415 416 /* map disk offset to parity/data drive */ 417 chunk = strip_no % no_chunk; 418 419 qchunk = (no_chunk + 1) - ((strip_no / no_chunk) % (no_chunk+2)); 420 if (qchunk == 0) 421 pchunk = no_chunk + 1; 422 else 423 pchunk = qchunk - 1; 424 if (chunk >= pchunk) 425 chunk++; 426 if (chunk >= qchunk) 427 chunk++; 428 429 lba = offset >> DEV_BSHIFT; 430 431 /* XXX big hammer.. exclude I/O from entire stripe */ 432 if (wu->swu_blk_start == 0) 433 wu->swu_blk_start = (strip_no / no_chunk) * row_size; 434 wu->swu_blk_end = (strip_no / no_chunk) * row_size + (row_size - 1); 435 436 fail = 0; 437 fchunk = -1; 438 439 /* Get disk-fail flags */ 440 for (i=0; i< no_chunk+2; i++) { 441 scp = sd->sd_vol.sv_chunks[i]; 442 switch (scp->src_meta.scm_status) { 443 case BIOC_SDOFFLINE: 444 case BIOC_SDREBUILD: 445 case BIOC_SDHOTSPARE: 446 if (i == qchunk) 447 fail |= SR_FAILQ; 448 else if (i == pchunk) 449 fail |= SR_FAILP; 450 else if (i == chunk) 451 fail |= SR_FAILX; 452 else { 453 /* dual data-disk failure */ 454 fail |= SR_FAILY; 455 fchunk = i; 456 } 457 break; 458 } 459 } 460 if (xs->flags & SCSI_DATA_IN) { 461 if (!(fail & SR_FAILX)) { 462 /* drive is good. issue single read request */ 463 if (sr_raid6_addio(wu, chunk, lba, length, 464 data, xs->flags, 0, NULL, NULL, 0)) 465 goto bad; 466 } else if (fail & SR_FAILP) { 467 /* Dx, P failed */ 468 printf("Disk %llx offline, " 469 "regenerating Dx+P\n", chunk); 470 471 gxinv = gf_inv(gf_pow[chunk]); 472 473 /* Calculate: Dx = (Q^Dz*gz)*inv(gx) */ 474 memset(data, 0, length); 475 if (sr_raid6_addio(wu, qchunk, lba, length, 476 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv)) 477 goto bad; 478 479 /* Read Dz * gz * inv(gx) */ 480 for (i = 0; i < no_chunk+2; i++) { 481 if (i == qchunk || i == pchunk || i == chunk) 482 continue; 483 484 if (sr_raid6_addio(wu, i, lba, length, 485 NULL, SCSI_DATA_IN, 0, NULL, data, 486 gf_mul(gf_pow[i], gxinv))) 487 goto bad; 488 } 489 490 /* data will contain correct value on completion */ 491 } else if (fail & SR_FAILY) { 492 /* Dx, Dy failed */ 493 printf("Disk %llx & %llx offline, " 494 "regenerating Dx+Dy\n", chunk, fchunk); 495 496 gxinv = gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]); 497 pxinv = gf_mul(gf_pow[fchunk], gxinv); 498 499 /* read Q * inv(gx + gy) */ 500 memset(data, 0, length); 501 if (sr_raid6_addio(wu, qchunk, lba, length, 502 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv)) 503 goto bad; 504 505 /* read P * gy * inv(gx + gy) */ 506 if (sr_raid6_addio(wu, pchunk, lba, length, 507 NULL, SCSI_DATA_IN, 0, NULL, data, pxinv)) 508 goto bad; 509 510 /* Calculate: Dx*gx^Dy*gy = Q^(Dz*gz) ; Dx^Dy = P^Dz 511 * Q: sr_raid6_xorp(qbuf, --, length); 512 * P: sr_raid6_xorp(pbuf, --, length); 513 * Dz: sr_raid6_xorp(pbuf, --, length); 514 * sr_raid6_xorq(qbuf, --, length, gf_pow[i]); 515 */ 516 for (i = 0; i < no_chunk+2; i++) { 517 if (i == qchunk || i == pchunk || 518 i == chunk || i == fchunk) 519 continue; 520 521 /* read Dz * (gz + gy) * inv(gx + gy) */ 522 if (sr_raid6_addio(wu, i, lba, length, 523 NULL, SCSI_DATA_IN, 0, NULL, data, 524 pxinv ^ gf_mul(gf_pow[i], gxinv))) 525 goto bad; 526 } 527 } else { 528 /* Two cases: single disk (Dx) or (Dx+Q) 529 * Dx = Dz ^ P (same as RAID5) 530 */ 531 printf("Disk %llx offline, " 532 "regenerating Dx%s\n", chunk, 533 fail & SR_FAILQ ? "+Q" : " single"); 534 535 /* Calculate: Dx = P^Dz 536 * P: sr_raid6_xorp(data, ---, length); 537 * Dz: sr_raid6_xorp(data, ---, length); 538 */ 539 memset(data, 0, length); 540 for (i = 0; i < no_chunk+2; i++) { 541 if (i != chunk && i != qchunk) { 542 /* Read Dz */ 543 if (sr_raid6_addio(wu, i, lba, 544 length, NULL, SCSI_DATA_IN, 545 0, data, NULL, 0)) 546 goto bad; 547 } 548 } 549 550 /* data will contain correct value on completion */ 551 } 552 } else { 553 /* XXX handle writes to failed/offline disk? */ 554 if (fail & (SR_FAILX|SR_FAILQ|SR_FAILP)) 555 goto bad; 556 557 /* 558 * initialize pbuf with contents of new data to be 559 * written. This will be XORed with old data and old 560 * parity in the intr routine. The result in pbuf 561 * is the new parity data. 562 */ 563 qbuf = sr_block_get(sd, length); 564 if (qbuf == NULL) 565 goto bad; 566 567 pbuf = sr_block_get(sd, length); 568 if (pbuf == NULL) 569 goto bad; 570 571 /* Calculate P = Dn; Q = gn * Dn */ 572 if (gf_premul(gf_pow[chunk])) 573 goto bad; 574 sr_raid6_xorp(pbuf, data, length); 575 sr_raid6_xorq(qbuf, data, length, gf_pow[chunk]); 576 577 /* Read old data: P ^= Dn' ; Q ^= (gn * Dn') */ 578 if (sr_raid6_addio(wu_r, chunk, lba, length, NULL, 579 SCSI_DATA_IN, 0, pbuf, qbuf, gf_pow[chunk])) 580 goto bad; 581 582 /* Read old xor-parity: P ^= P' */ 583 if (sr_raid6_addio(wu_r, pchunk, lba, length, NULL, 584 SCSI_DATA_IN, 0, pbuf, NULL, 0)) 585 goto bad; 586 587 /* Read old q-parity: Q ^= Q' */ 588 if (sr_raid6_addio(wu_r, qchunk, lba, length, NULL, 589 SCSI_DATA_IN, 0, qbuf, NULL, 0)) 590 goto bad; 591 592 /* write new data */ 593 if (sr_raid6_addio(wu, chunk, lba, length, data, 594 xs->flags, 0, NULL, NULL, 0)) 595 goto bad; 596 597 /* write new xor-parity */ 598 if (sr_raid6_addio(wu, pchunk, lba, length, pbuf, 599 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0)) 600 goto bad; 601 602 /* write new q-parity */ 603 if (sr_raid6_addio(wu, qchunk, lba, length, qbuf, 604 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0)) 605 goto bad; 606 } 607 608 /* advance to next block */ 609 lbaoffs += length; 610 datalen -= length; 611 data += length; 612 } 613 614 s = splbio(); 615 if (wu_r) { 616 /* collide write request with reads */ 617 wu_r->swu_blk_start = wu->swu_blk_start; 618 wu_r->swu_blk_end = wu->swu_blk_end; 619 620 wu->swu_state = SR_WU_DEFERRED; 621 wu_r->swu_collider = wu; 622 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 623 624 wu = wu_r; 625 } 626 splx(s); 627 628 sr_schedule_wu(wu); 629 630 return (0); 631bad: 632 /* XXX - can leak pbuf/qbuf on error. */ 633 /* wu is unwound by sr_wu_put */ 634 if (wu_r) 635 sr_scsi_wu_put(sd, wu_r); 636 return (1); 637} 638 639void 640sr_raid6_intr(struct buf *bp) 641{ 642 struct sr_ccb *ccb = (struct sr_ccb *)bp; 643 struct sr_workunit *wu = ccb->ccb_wu; 644 struct sr_discipline *sd = wu->swu_dis; 645 struct sr_raid6_opaque *pq = ccb->ccb_opaque; 646 int s; 647 648 DNPRINTF(SR_D_INTR, "%s: sr_raid6_intr bp %p xs %p\n", 649 DEVNAME(sd->sd_sc), bp, wu->swu_xs); 650 651 s = splbio(); 652 sr_ccb_done(ccb); 653 654 /* XOR data to result. */ 655 if (ccb->ccb_state == SR_CCB_OK && pq) { 656 if (pq->pbuf) 657 /* Calculate xor-parity */ 658 sr_raid6_xorp(pq->pbuf, ccb->ccb_buf.b_data, 659 ccb->ccb_buf.b_bcount); 660 if (pq->qbuf) 661 /* Calculate q-parity */ 662 sr_raid6_xorq(pq->qbuf, ccb->ccb_buf.b_data, 663 ccb->ccb_buf.b_bcount, pq->gn); 664 free(pq, M_DEVBUF, 0); 665 ccb->ccb_opaque = NULL; 666 } 667 668 /* Free allocated data buffer. */ 669 if (ccb->ccb_flags & SR_CCBF_FREEBUF) { 670 sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount); 671 ccb->ccb_buf.b_data = NULL; 672 } 673 674 sr_wu_done(wu); 675 splx(s); 676} 677 678int 679sr_raid6_wu_done(struct sr_workunit *wu) 680{ 681 struct sr_discipline *sd = wu->swu_dis; 682 struct scsi_xfer *xs = wu->swu_xs; 683 684 /* XXX - we have no way of propagating errors... */ 685 if (wu->swu_flags & SR_WUF_DISCIPLINE) 686 return SR_WU_OK; 687 688 /* XXX - This is insufficient for RAID 6. */ 689 if (wu->swu_ios_succeeded > 0) { 690 xs->error = XS_NOERROR; 691 return SR_WU_OK; 692 } 693 694 if (xs->flags & SCSI_DATA_IN) { 695 printf("%s: retrying read on block %lld\n", 696 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 697 sr_wu_release_ccbs(wu); 698 wu->swu_state = SR_WU_RESTART; 699 if (sd->sd_scsi_rw(wu) == 0) 700 return SR_WU_RESTART; 701 } else { 702 printf("%s: permanently fail write on block %lld\n", 703 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 704 } 705 706 wu->swu_state = SR_WU_FAILED; 707 xs->error = XS_DRIVER_STUFFUP; 708 709 return SR_WU_FAILED; 710} 711 712int 713sr_raid6_addio(struct sr_workunit *wu, int chunk, daddr_t blkno, 714 long len, void *data, int xsflags, int ccbflags, void *pbuf, 715 void *qbuf, int gn) 716{ 717 struct sr_discipline *sd = wu->swu_dis; 718 struct sr_ccb *ccb; 719 struct sr_raid6_opaque *pqbuf; 720 721 DNPRINTF(SR_D_DIS, "sr_raid6_addio: %s %d.%lld %ld %p:%p\n", 722 (xsflags & SCSI_DATA_IN) ? "read" : "write", chunk, 723 (long long)blkno, len, pbuf, qbuf); 724 725 /* Allocate temporary buffer. */ 726 if (data == NULL) { 727 data = sr_block_get(sd, len); 728 if (data == NULL) 729 return (-1); 730 ccbflags |= SR_CCBF_FREEBUF; 731 } 732 733 ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags); 734 if (ccb == NULL) { 735 if (ccbflags & SR_CCBF_FREEBUF) 736 sr_block_put(sd, data, len); 737 return (-1); 738 } 739 if (pbuf || qbuf) { 740 /* XXX - can leak data and ccb on failure. */ 741 if (qbuf && gf_premul(gn)) 742 return (-1); 743 744 /* XXX - should be preallocated? */ 745 pqbuf = malloc(sizeof(struct sr_raid6_opaque), 746 M_DEVBUF, M_ZERO | M_NOWAIT); 747 if (pqbuf == NULL) { 748 sr_ccb_put(ccb); 749 return (-1); 750 } 751 pqbuf->pbuf = pbuf; 752 pqbuf->qbuf = qbuf; 753 pqbuf->gn = gn; 754 ccb->ccb_opaque = pqbuf; 755 } 756 sr_wu_enqueue_ccb(wu, ccb); 757 758 return (0); 759} 760 761/* Perform RAID6 parity calculation. 762 * P=xor parity, Q=GF256 parity, D=data, gn=disk# */ 763void 764sr_raid6_xorp(void *p, void *d, int len) 765{ 766 uint32_t *pbuf = p, *data = d; 767 768 len >>= 2; 769 while (len--) 770 *pbuf++ ^= *data++; 771} 772 773void 774sr_raid6_xorq(void *q, void *d, int len, int gn) 775{ 776 uint32_t *qbuf = q, *data = d, x; 777 uint8_t *gn_map = gf_map[gn]; 778 779 len >>= 2; 780 while (len--) { 781 x = *data++; 782 *qbuf++ ^= (((uint32_t)gn_map[x & 0xff]) | 783 ((uint32_t)gn_map[(x >> 8) & 0xff] << 8) | 784 ((uint32_t)gn_map[(x >> 16) & 0xff] << 16) | 785 ((uint32_t)gn_map[(x >> 24) & 0xff] << 24)); 786 } 787} 788 789/* Create GF256 log/pow tables: polynomial = 0x11D */ 790void 791gf_init(void) 792{ 793 int i; 794 uint8_t p = 1; 795 796 /* use 2N pow table to avoid using % in multiply */ 797 for (i=0; i<256; i++) { 798 gf_log[p] = i; 799 gf_pow[i] = gf_pow[i+255] = p; 800 p = ((p << 1) ^ ((p & 0x80) ? 0x1D : 0x00)); 801 } 802 gf_log[0] = 512; 803} 804 805uint8_t 806gf_inv(uint8_t a) 807{ 808 return gf_pow[255 - gf_log[a]]; 809} 810 811uint8_t 812gf_mul(uint8_t a, uint8_t b) 813{ 814 return gf_pow[gf_log[a] + gf_log[b]]; 815} 816 817/* Precalculate multiplication tables for drive gn */ 818int 819gf_premul(uint8_t gn) 820{ 821 int i; 822 823 if (gf_map[gn] != NULL) 824 return (0); 825 826 if ((gf_map[gn] = malloc(256, M_DEVBUF, M_ZERO | M_NOWAIT)) == NULL) 827 return (-1); 828 829 for (i=0; i<256; i++) 830 gf_map[gn][i] = gf_pow[gf_log[i] + gf_log[gn]]; 831 return (0); 832}