jcs's openbsd hax
openbsd
1/* $OpenBSD: softraid_raid6.c,v 1.74 2025/06/13 13:00:49 jsg Exp $ */
2/*
3 * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us>
4 * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bio.h"
20
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/buf.h>
24#include <sys/device.h>
25#include <sys/ioctl.h>
26#include <sys/malloc.h>
27#include <sys/kernel.h>
28#include <sys/disk.h>
29#include <sys/rwlock.h>
30#include <sys/queue.h>
31#include <sys/fcntl.h>
32#include <sys/mount.h>
33#include <sys/sensors.h>
34#include <sys/stat.h>
35#include <sys/task.h>
36#include <sys/conf.h>
37#include <sys/uio.h>
38
39#include <scsi/scsi_all.h>
40#include <scsi/scsiconf.h>
41#include <scsi/scsi_disk.h>
42
43#include <dev/softraidvar.h>
44
45uint8_t *gf_map[256];
46uint8_t gf_pow[768];
47int gf_log[256];
48
49/* RAID 6 functions. */
50int sr_raid6_create(struct sr_discipline *, struct bioc_createraid *,
51 int, int64_t);
52int sr_raid6_assemble(struct sr_discipline *, struct bioc_createraid *,
53 int, void *);
54int sr_raid6_init(struct sr_discipline *);
55int sr_raid6_rw(struct sr_workunit *);
56int sr_raid6_openings(struct sr_discipline *);
57void sr_raid6_intr(struct buf *);
58int sr_raid6_wu_done(struct sr_workunit *);
59void sr_raid6_set_chunk_state(struct sr_discipline *, int, int);
60void sr_raid6_set_vol_state(struct sr_discipline *);
61
62void sr_raid6_xorp(void *, void *, int);
63void sr_raid6_xorq(void *, void *, int, int);
64int sr_raid6_addio(struct sr_workunit *wu, int, daddr_t, long,
65 void *, int, int, void *, void *, int);
66
67void gf_init(void);
68uint8_t gf_inv(uint8_t);
69int gf_premul(uint8_t);
70uint8_t gf_mul(uint8_t, uint8_t);
71
72#define SR_NOFAIL 0x00
73#define SR_FAILX (1L << 0)
74#define SR_FAILY (1L << 1)
75#define SR_FAILP (1L << 2)
76#define SR_FAILQ (1L << 3)
77
78struct sr_raid6_opaque {
79 int gn;
80 void *pbuf;
81 void *qbuf;
82};
83
84/* discipline initialisation. */
85void
86sr_raid6_discipline_init(struct sr_discipline *sd)
87{
88 /* Initialize GF256 tables. */
89 gf_init();
90
91 /* Fill out discipline members. */
92 sd->sd_type = SR_MD_RAID6;
93 strlcpy(sd->sd_name, "RAID 6", sizeof(sd->sd_name));
94 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
95 SR_CAP_REDUNDANT;
96 sd->sd_max_wu = SR_RAID6_NOWU;
97
98 /* Setup discipline specific function pointers. */
99 sd->sd_assemble = sr_raid6_assemble;
100 sd->sd_create = sr_raid6_create;
101 sd->sd_openings = sr_raid6_openings;
102 sd->sd_scsi_rw = sr_raid6_rw;
103 sd->sd_scsi_intr = sr_raid6_intr;
104 sd->sd_scsi_wu_done = sr_raid6_wu_done;
105 sd->sd_set_chunk_state = sr_raid6_set_chunk_state;
106 sd->sd_set_vol_state = sr_raid6_set_vol_state;
107}
108
109int
110sr_raid6_create(struct sr_discipline *sd, struct bioc_createraid *bc,
111 int no_chunk, int64_t coerced_size)
112{
113 if (no_chunk < 4) {
114 sr_error(sd->sd_sc, "%s requires four or more chunks",
115 sd->sd_name);
116 return EINVAL;
117 }
118
119 /*
120 * XXX add variable strip size later even though MAXPHYS is really
121 * the clever value, users like * to tinker with that type of stuff.
122 */
123 sd->sd_meta->ssdi.ssd_strip_size = MAXPHYS;
124 sd->sd_meta->ssdi.ssd_size = (coerced_size &
125 ~(((u_int64_t)sd->sd_meta->ssdi.ssd_strip_size >>
126 DEV_BSHIFT) - 1)) * (no_chunk - 2);
127
128 return sr_raid6_init(sd);
129}
130
131int
132sr_raid6_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
133 int no_chunk, void *data)
134{
135 return sr_raid6_init(sd);
136}
137
138int
139sr_raid6_init(struct sr_discipline *sd)
140{
141 /* Initialise runtime values. */
142 sd->mds.mdd_raid6.sr6_strip_bits =
143 sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size);
144 if (sd->mds.mdd_raid6.sr6_strip_bits == -1) {
145 sr_error(sd->sd_sc, "invalid strip size");
146 return EINVAL;
147 }
148
149 /* only if stripsize <= MAXPHYS */
150 sd->sd_max_ccb_per_wu = max(6, 2 * sd->sd_meta->ssdi.ssd_chunk_no);
151
152 return 0;
153}
154
155int
156sr_raid6_openings(struct sr_discipline *sd)
157{
158 return (sd->sd_max_wu >> 1); /* 2 wu's per IO */
159}
160
161void
162sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
163{
164 int old_state, s;
165
166 /* XXX this is for RAID 0 */
167 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
168 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
169 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
170
171 /* ok to go to splbio since this only happens in error path */
172 s = splbio();
173 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
174
175 /* multiple IOs to the same chunk that fail will come through here */
176 if (old_state == new_state)
177 goto done;
178
179 switch (old_state) {
180 case BIOC_SDONLINE:
181 switch (new_state) {
182 case BIOC_SDOFFLINE:
183 case BIOC_SDSCRUB:
184 break;
185 default:
186 goto die;
187 }
188 break;
189
190 case BIOC_SDOFFLINE:
191 if (new_state == BIOC_SDREBUILD) {
192 ;
193 } else
194 goto die;
195 break;
196
197 case BIOC_SDSCRUB:
198 switch (new_state) {
199 case BIOC_SDONLINE:
200 case BIOC_SDOFFLINE:
201 break;
202 default:
203 goto die;
204 }
205 break;
206
207 case BIOC_SDREBUILD:
208 switch (new_state) {
209 case BIOC_SDONLINE:
210 case BIOC_SDOFFLINE:
211 break;
212 default:
213 goto die;
214 }
215 break;
216
217 default:
218die:
219 splx(s); /* XXX */
220 panic("%s: %s: %s: invalid chunk state transition %d -> %d",
221 DEVNAME(sd->sd_sc),
222 sd->sd_meta->ssd_devname,
223 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
224 old_state, new_state);
225 /* NOTREACHED */
226 }
227
228 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
229 sd->sd_set_vol_state(sd);
230
231 sd->sd_must_flush = 1;
232 task_add(systq, &sd->sd_meta_save_task);
233done:
234 splx(s);
235}
236
237void
238sr_raid6_set_vol_state(struct sr_discipline *sd)
239{
240 int states[SR_MAX_STATES];
241 int new_state, i, s, nd;
242 int old_state = sd->sd_vol_status;
243
244 /* XXX this is for RAID 0 */
245
246 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
247 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
248
249 nd = sd->sd_meta->ssdi.ssd_chunk_no;
250
251 for (i = 0; i < SR_MAX_STATES; i++)
252 states[i] = 0;
253
254 for (i = 0; i < nd; i++) {
255 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
256 if (s >= SR_MAX_STATES)
257 panic("%s: %s: %s: invalid chunk state",
258 DEVNAME(sd->sd_sc),
259 sd->sd_meta->ssd_devname,
260 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
261 states[s]++;
262 }
263
264 if (states[BIOC_SDONLINE] == nd)
265 new_state = BIOC_SVONLINE;
266 else if (states[BIOC_SDONLINE] < nd - 2)
267 new_state = BIOC_SVOFFLINE;
268 else if (states[BIOC_SDSCRUB] != 0)
269 new_state = BIOC_SVSCRUB;
270 else if (states[BIOC_SDREBUILD] != 0)
271 new_state = BIOC_SVREBUILD;
272 else if (states[BIOC_SDONLINE] < nd)
273 new_state = BIOC_SVDEGRADED;
274 else {
275 printf("old_state = %d, ", old_state);
276 for (i = 0; i < nd; i++)
277 printf("%d = %d, ", i,
278 sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
279 panic("invalid new_state");
280 }
281
282 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
283 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
284 old_state, new_state);
285
286 switch (old_state) {
287 case BIOC_SVONLINE:
288 switch (new_state) {
289 case BIOC_SVONLINE: /* can go to same state */
290 case BIOC_SVOFFLINE:
291 case BIOC_SVDEGRADED:
292 case BIOC_SVREBUILD: /* happens on boot */
293 break;
294 default:
295 goto die;
296 }
297 break;
298
299 case BIOC_SVOFFLINE:
300 /* XXX this might be a little too much */
301 goto die;
302
303 case BIOC_SVDEGRADED:
304 switch (new_state) {
305 case BIOC_SVOFFLINE:
306 case BIOC_SVREBUILD:
307 case BIOC_SVDEGRADED: /* can go to the same state */
308 break;
309 default:
310 goto die;
311 }
312 break;
313
314 case BIOC_SVBUILDING:
315 switch (new_state) {
316 case BIOC_SVONLINE:
317 case BIOC_SVOFFLINE:
318 case BIOC_SVBUILDING: /* can go to the same state */
319 break;
320 default:
321 goto die;
322 }
323 break;
324
325 case BIOC_SVSCRUB:
326 switch (new_state) {
327 case BIOC_SVONLINE:
328 case BIOC_SVOFFLINE:
329 case BIOC_SVDEGRADED:
330 case BIOC_SVSCRUB: /* can go to same state */
331 break;
332 default:
333 goto die;
334 }
335 break;
336
337 case BIOC_SVREBUILD:
338 switch (new_state) {
339 case BIOC_SVONLINE:
340 case BIOC_SVOFFLINE:
341 case BIOC_SVDEGRADED:
342 case BIOC_SVREBUILD: /* can go to the same state */
343 break;
344 default:
345 goto die;
346 }
347 break;
348
349 default:
350die:
351 panic("%s: %s: invalid volume state transition %d -> %d",
352 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
353 old_state, new_state);
354 /* NOTREACHED */
355 }
356
357 sd->sd_vol_status = new_state;
358}
359
360/* modes:
361 * readq: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
362 * 0, qbuf, NULL, 0);
363 * readp: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
364 * 0, pbuf, NULL, 0);
365 * readx: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
366 * 0, pbuf, qbuf, gf_pow[i]);
367 */
368
369int
370sr_raid6_rw(struct sr_workunit *wu)
371{
372 struct sr_workunit *wu_r = NULL;
373 struct sr_discipline *sd = wu->swu_dis;
374 struct scsi_xfer *xs = wu->swu_xs;
375 struct sr_chunk *scp;
376 int s, fail, i, gxinv, pxinv;
377 daddr_t blkno, lba;
378 int64_t chunk_offs, lbaoffs, offset, strip_offs;
379 int64_t strip_no, strip_size, strip_bits, row_size;
380 int64_t fchunk, no_chunk, chunk, qchunk, pchunk;
381 long length, datalen;
382 void *pbuf, *data, *qbuf;
383
384 /* blkno and scsi error will be handled by sr_validate_io */
385 if (sr_validate_io(wu, &blkno, "sr_raid6_rw"))
386 goto bad;
387
388 strip_size = sd->sd_meta->ssdi.ssd_strip_size;
389 strip_bits = sd->mds.mdd_raid6.sr6_strip_bits;
390 no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 2;
391 row_size = (no_chunk << strip_bits) >> DEV_BSHIFT;
392
393 data = xs->data;
394 datalen = xs->datalen;
395 lbaoffs = blkno << DEV_BSHIFT;
396
397 if (xs->flags & SCSI_DATA_OUT) {
398 if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP)) == NULL){
399 printf("%s: can't get wu_r", DEVNAME(sd->sd_sc));
400 goto bad;
401 }
402 wu_r->swu_state = SR_WU_INPROGRESS;
403 wu_r->swu_flags |= SR_WUF_DISCIPLINE;
404 }
405
406 wu->swu_blk_start = 0;
407 while (datalen != 0) {
408 strip_no = lbaoffs >> strip_bits;
409 strip_offs = lbaoffs & (strip_size - 1);
410 chunk_offs = (strip_no / no_chunk) << strip_bits;
411 offset = chunk_offs + strip_offs;
412
413 /* get size remaining in this stripe */
414 length = MIN(strip_size - strip_offs, datalen);
415
416 /* map disk offset to parity/data drive */
417 chunk = strip_no % no_chunk;
418
419 qchunk = (no_chunk + 1) - ((strip_no / no_chunk) % (no_chunk+2));
420 if (qchunk == 0)
421 pchunk = no_chunk + 1;
422 else
423 pchunk = qchunk - 1;
424 if (chunk >= pchunk)
425 chunk++;
426 if (chunk >= qchunk)
427 chunk++;
428
429 lba = offset >> DEV_BSHIFT;
430
431 /* XXX big hammer.. exclude I/O from entire stripe */
432 if (wu->swu_blk_start == 0)
433 wu->swu_blk_start = (strip_no / no_chunk) * row_size;
434 wu->swu_blk_end = (strip_no / no_chunk) * row_size + (row_size - 1);
435
436 fail = 0;
437 fchunk = -1;
438
439 /* Get disk-fail flags */
440 for (i=0; i< no_chunk+2; i++) {
441 scp = sd->sd_vol.sv_chunks[i];
442 switch (scp->src_meta.scm_status) {
443 case BIOC_SDOFFLINE:
444 case BIOC_SDREBUILD:
445 case BIOC_SDHOTSPARE:
446 if (i == qchunk)
447 fail |= SR_FAILQ;
448 else if (i == pchunk)
449 fail |= SR_FAILP;
450 else if (i == chunk)
451 fail |= SR_FAILX;
452 else {
453 /* dual data-disk failure */
454 fail |= SR_FAILY;
455 fchunk = i;
456 }
457 break;
458 }
459 }
460 if (xs->flags & SCSI_DATA_IN) {
461 if (!(fail & SR_FAILX)) {
462 /* drive is good. issue single read request */
463 if (sr_raid6_addio(wu, chunk, lba, length,
464 data, xs->flags, 0, NULL, NULL, 0))
465 goto bad;
466 } else if (fail & SR_FAILP) {
467 /* Dx, P failed */
468 printf("Disk %llx offline, "
469 "regenerating Dx+P\n", chunk);
470
471 gxinv = gf_inv(gf_pow[chunk]);
472
473 /* Calculate: Dx = (Q^Dz*gz)*inv(gx) */
474 memset(data, 0, length);
475 if (sr_raid6_addio(wu, qchunk, lba, length,
476 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv))
477 goto bad;
478
479 /* Read Dz * gz * inv(gx) */
480 for (i = 0; i < no_chunk+2; i++) {
481 if (i == qchunk || i == pchunk || i == chunk)
482 continue;
483
484 if (sr_raid6_addio(wu, i, lba, length,
485 NULL, SCSI_DATA_IN, 0, NULL, data,
486 gf_mul(gf_pow[i], gxinv)))
487 goto bad;
488 }
489
490 /* data will contain correct value on completion */
491 } else if (fail & SR_FAILY) {
492 /* Dx, Dy failed */
493 printf("Disk %llx & %llx offline, "
494 "regenerating Dx+Dy\n", chunk, fchunk);
495
496 gxinv = gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]);
497 pxinv = gf_mul(gf_pow[fchunk], gxinv);
498
499 /* read Q * inv(gx + gy) */
500 memset(data, 0, length);
501 if (sr_raid6_addio(wu, qchunk, lba, length,
502 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv))
503 goto bad;
504
505 /* read P * gy * inv(gx + gy) */
506 if (sr_raid6_addio(wu, pchunk, lba, length,
507 NULL, SCSI_DATA_IN, 0, NULL, data, pxinv))
508 goto bad;
509
510 /* Calculate: Dx*gx^Dy*gy = Q^(Dz*gz) ; Dx^Dy = P^Dz
511 * Q: sr_raid6_xorp(qbuf, --, length);
512 * P: sr_raid6_xorp(pbuf, --, length);
513 * Dz: sr_raid6_xorp(pbuf, --, length);
514 * sr_raid6_xorq(qbuf, --, length, gf_pow[i]);
515 */
516 for (i = 0; i < no_chunk+2; i++) {
517 if (i == qchunk || i == pchunk ||
518 i == chunk || i == fchunk)
519 continue;
520
521 /* read Dz * (gz + gy) * inv(gx + gy) */
522 if (sr_raid6_addio(wu, i, lba, length,
523 NULL, SCSI_DATA_IN, 0, NULL, data,
524 pxinv ^ gf_mul(gf_pow[i], gxinv)))
525 goto bad;
526 }
527 } else {
528 /* Two cases: single disk (Dx) or (Dx+Q)
529 * Dx = Dz ^ P (same as RAID5)
530 */
531 printf("Disk %llx offline, "
532 "regenerating Dx%s\n", chunk,
533 fail & SR_FAILQ ? "+Q" : " single");
534
535 /* Calculate: Dx = P^Dz
536 * P: sr_raid6_xorp(data, ---, length);
537 * Dz: sr_raid6_xorp(data, ---, length);
538 */
539 memset(data, 0, length);
540 for (i = 0; i < no_chunk+2; i++) {
541 if (i != chunk && i != qchunk) {
542 /* Read Dz */
543 if (sr_raid6_addio(wu, i, lba,
544 length, NULL, SCSI_DATA_IN,
545 0, data, NULL, 0))
546 goto bad;
547 }
548 }
549
550 /* data will contain correct value on completion */
551 }
552 } else {
553 /* XXX handle writes to failed/offline disk? */
554 if (fail & (SR_FAILX|SR_FAILQ|SR_FAILP))
555 goto bad;
556
557 /*
558 * initialize pbuf with contents of new data to be
559 * written. This will be XORed with old data and old
560 * parity in the intr routine. The result in pbuf
561 * is the new parity data.
562 */
563 qbuf = sr_block_get(sd, length);
564 if (qbuf == NULL)
565 goto bad;
566
567 pbuf = sr_block_get(sd, length);
568 if (pbuf == NULL)
569 goto bad;
570
571 /* Calculate P = Dn; Q = gn * Dn */
572 if (gf_premul(gf_pow[chunk]))
573 goto bad;
574 sr_raid6_xorp(pbuf, data, length);
575 sr_raid6_xorq(qbuf, data, length, gf_pow[chunk]);
576
577 /* Read old data: P ^= Dn' ; Q ^= (gn * Dn') */
578 if (sr_raid6_addio(wu_r, chunk, lba, length, NULL,
579 SCSI_DATA_IN, 0, pbuf, qbuf, gf_pow[chunk]))
580 goto bad;
581
582 /* Read old xor-parity: P ^= P' */
583 if (sr_raid6_addio(wu_r, pchunk, lba, length, NULL,
584 SCSI_DATA_IN, 0, pbuf, NULL, 0))
585 goto bad;
586
587 /* Read old q-parity: Q ^= Q' */
588 if (sr_raid6_addio(wu_r, qchunk, lba, length, NULL,
589 SCSI_DATA_IN, 0, qbuf, NULL, 0))
590 goto bad;
591
592 /* write new data */
593 if (sr_raid6_addio(wu, chunk, lba, length, data,
594 xs->flags, 0, NULL, NULL, 0))
595 goto bad;
596
597 /* write new xor-parity */
598 if (sr_raid6_addio(wu, pchunk, lba, length, pbuf,
599 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0))
600 goto bad;
601
602 /* write new q-parity */
603 if (sr_raid6_addio(wu, qchunk, lba, length, qbuf,
604 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0))
605 goto bad;
606 }
607
608 /* advance to next block */
609 lbaoffs += length;
610 datalen -= length;
611 data += length;
612 }
613
614 s = splbio();
615 if (wu_r) {
616 /* collide write request with reads */
617 wu_r->swu_blk_start = wu->swu_blk_start;
618 wu_r->swu_blk_end = wu->swu_blk_end;
619
620 wu->swu_state = SR_WU_DEFERRED;
621 wu_r->swu_collider = wu;
622 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
623
624 wu = wu_r;
625 }
626 splx(s);
627
628 sr_schedule_wu(wu);
629
630 return (0);
631bad:
632 /* XXX - can leak pbuf/qbuf on error. */
633 /* wu is unwound by sr_wu_put */
634 if (wu_r)
635 sr_scsi_wu_put(sd, wu_r);
636 return (1);
637}
638
639void
640sr_raid6_intr(struct buf *bp)
641{
642 struct sr_ccb *ccb = (struct sr_ccb *)bp;
643 struct sr_workunit *wu = ccb->ccb_wu;
644 struct sr_discipline *sd = wu->swu_dis;
645 struct sr_raid6_opaque *pq = ccb->ccb_opaque;
646 int s;
647
648 DNPRINTF(SR_D_INTR, "%s: sr_raid6_intr bp %p xs %p\n",
649 DEVNAME(sd->sd_sc), bp, wu->swu_xs);
650
651 s = splbio();
652 sr_ccb_done(ccb);
653
654 /* XOR data to result. */
655 if (ccb->ccb_state == SR_CCB_OK && pq) {
656 if (pq->pbuf)
657 /* Calculate xor-parity */
658 sr_raid6_xorp(pq->pbuf, ccb->ccb_buf.b_data,
659 ccb->ccb_buf.b_bcount);
660 if (pq->qbuf)
661 /* Calculate q-parity */
662 sr_raid6_xorq(pq->qbuf, ccb->ccb_buf.b_data,
663 ccb->ccb_buf.b_bcount, pq->gn);
664 free(pq, M_DEVBUF, 0);
665 ccb->ccb_opaque = NULL;
666 }
667
668 /* Free allocated data buffer. */
669 if (ccb->ccb_flags & SR_CCBF_FREEBUF) {
670 sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount);
671 ccb->ccb_buf.b_data = NULL;
672 }
673
674 sr_wu_done(wu);
675 splx(s);
676}
677
678int
679sr_raid6_wu_done(struct sr_workunit *wu)
680{
681 struct sr_discipline *sd = wu->swu_dis;
682 struct scsi_xfer *xs = wu->swu_xs;
683
684 /* XXX - we have no way of propagating errors... */
685 if (wu->swu_flags & SR_WUF_DISCIPLINE)
686 return SR_WU_OK;
687
688 /* XXX - This is insufficient for RAID 6. */
689 if (wu->swu_ios_succeeded > 0) {
690 xs->error = XS_NOERROR;
691 return SR_WU_OK;
692 }
693
694 if (xs->flags & SCSI_DATA_IN) {
695 printf("%s: retrying read on block %lld\n",
696 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
697 sr_wu_release_ccbs(wu);
698 wu->swu_state = SR_WU_RESTART;
699 if (sd->sd_scsi_rw(wu) == 0)
700 return SR_WU_RESTART;
701 } else {
702 printf("%s: permanently fail write on block %lld\n",
703 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
704 }
705
706 wu->swu_state = SR_WU_FAILED;
707 xs->error = XS_DRIVER_STUFFUP;
708
709 return SR_WU_FAILED;
710}
711
712int
713sr_raid6_addio(struct sr_workunit *wu, int chunk, daddr_t blkno,
714 long len, void *data, int xsflags, int ccbflags, void *pbuf,
715 void *qbuf, int gn)
716{
717 struct sr_discipline *sd = wu->swu_dis;
718 struct sr_ccb *ccb;
719 struct sr_raid6_opaque *pqbuf;
720
721 DNPRINTF(SR_D_DIS, "sr_raid6_addio: %s %d.%lld %ld %p:%p\n",
722 (xsflags & SCSI_DATA_IN) ? "read" : "write", chunk,
723 (long long)blkno, len, pbuf, qbuf);
724
725 /* Allocate temporary buffer. */
726 if (data == NULL) {
727 data = sr_block_get(sd, len);
728 if (data == NULL)
729 return (-1);
730 ccbflags |= SR_CCBF_FREEBUF;
731 }
732
733 ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags);
734 if (ccb == NULL) {
735 if (ccbflags & SR_CCBF_FREEBUF)
736 sr_block_put(sd, data, len);
737 return (-1);
738 }
739 if (pbuf || qbuf) {
740 /* XXX - can leak data and ccb on failure. */
741 if (qbuf && gf_premul(gn))
742 return (-1);
743
744 /* XXX - should be preallocated? */
745 pqbuf = malloc(sizeof(struct sr_raid6_opaque),
746 M_DEVBUF, M_ZERO | M_NOWAIT);
747 if (pqbuf == NULL) {
748 sr_ccb_put(ccb);
749 return (-1);
750 }
751 pqbuf->pbuf = pbuf;
752 pqbuf->qbuf = qbuf;
753 pqbuf->gn = gn;
754 ccb->ccb_opaque = pqbuf;
755 }
756 sr_wu_enqueue_ccb(wu, ccb);
757
758 return (0);
759}
760
761/* Perform RAID6 parity calculation.
762 * P=xor parity, Q=GF256 parity, D=data, gn=disk# */
763void
764sr_raid6_xorp(void *p, void *d, int len)
765{
766 uint32_t *pbuf = p, *data = d;
767
768 len >>= 2;
769 while (len--)
770 *pbuf++ ^= *data++;
771}
772
773void
774sr_raid6_xorq(void *q, void *d, int len, int gn)
775{
776 uint32_t *qbuf = q, *data = d, x;
777 uint8_t *gn_map = gf_map[gn];
778
779 len >>= 2;
780 while (len--) {
781 x = *data++;
782 *qbuf++ ^= (((uint32_t)gn_map[x & 0xff]) |
783 ((uint32_t)gn_map[(x >> 8) & 0xff] << 8) |
784 ((uint32_t)gn_map[(x >> 16) & 0xff] << 16) |
785 ((uint32_t)gn_map[(x >> 24) & 0xff] << 24));
786 }
787}
788
789/* Create GF256 log/pow tables: polynomial = 0x11D */
790void
791gf_init(void)
792{
793 int i;
794 uint8_t p = 1;
795
796 /* use 2N pow table to avoid using % in multiply */
797 for (i=0; i<256; i++) {
798 gf_log[p] = i;
799 gf_pow[i] = gf_pow[i+255] = p;
800 p = ((p << 1) ^ ((p & 0x80) ? 0x1D : 0x00));
801 }
802 gf_log[0] = 512;
803}
804
805uint8_t
806gf_inv(uint8_t a)
807{
808 return gf_pow[255 - gf_log[a]];
809}
810
811uint8_t
812gf_mul(uint8_t a, uint8_t b)
813{
814 return gf_pow[gf_log[a] + gf_log[b]];
815}
816
817/* Precalculate multiplication tables for drive gn */
818int
819gf_premul(uint8_t gn)
820{
821 int i;
822
823 if (gf_map[gn] != NULL)
824 return (0);
825
826 if ((gf_map[gn] = malloc(256, M_DEVBUF, M_ZERO | M_NOWAIT)) == NULL)
827 return (-1);
828
829 for (i=0; i<256; i++)
830 gf_map[gn][i] = gf_pow[gf_log[i] + gf_log[gn]];
831 return (0);
832}