Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

async_tx: add support for asynchronous RAID6 recovery operations

async_raid6_2data_recov() recovers two data disk failures

async_raid6_datap_recov() recovers a data disk and the P disk

These routines are a port of the synchronous versions found in
drivers/md/raid6recov.c. The primary difference is breaking out the xor
operations into separate calls to async_xor. Two helper routines are
introduced to perform scalar multiplication where needed.
async_sum_product() multiplies two sources by scalar coefficients and
then sums (xor) the result. async_mult() simply multiplies a single
source by a scalar.

This implemention also includes, in contrast to the original
synchronous-only code, special case handling for the 4-disk and 5-disk
array cases. In these situations the default N-disk algorithm will
present 0-source or 1-source operations to dma devices. To cover for
dma devices where the minimum source count is 2 we implement 4-disk and
5-disk handling in the recovery code.

[ Impact: asynchronous raid6 recovery routines for 2data and datap cases ]

Cc: Yuri Tikhonov <yur@emcraft.com>
Cc: Ilya Yanok <yanok@emcraft.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: David Woodhouse <David.Woodhouse@intel.com>
Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>


+466
+4
Documentation/crypto/async-tx-api.txt
··· 67 67 pq - generate the p+q (raid6 syndrome) from a series of source buffers 68 68 pq_val - validate that a p and or q buffer are in sync with a given series of 69 69 sources 70 + datap - (raid6_datap_recov) recover a raid6 data block and the p block 71 + from the given sources 72 + 2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given 73 + sources 70 74 71 75 3.3 Descriptor management: 72 76 The return value is non-NULL and points to a 'descriptor' when the operation
+5
crypto/async_tx/Kconfig
··· 18 18 tristate 19 19 select ASYNC_CORE 20 20 21 + config ASYNC_RAID6_RECOV 22 + tristate 23 + select ASYNC_CORE 24 + select ASYNC_PQ 25 +
+1
crypto/async_tx/Makefile
··· 3 3 obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o 4 4 obj-$(CONFIG_ASYNC_XOR) += async_xor.o 5 5 obj-$(CONFIG_ASYNC_PQ) += async_pq.o 6 + obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
+448
crypto/async_tx/async_raid6_recov.c
··· 1 + /* 2 + * Asynchronous RAID-6 recovery calculations ASYNC_TX API. 3 + * Copyright(c) 2009 Intel Corporation 4 + * 5 + * based on raid6recov.c: 6 + * Copyright 2002 H. Peter Anvin 7 + * 8 + * This program is free software; you can redistribute it and/or modify it 9 + * under the terms of the GNU General Public License as published by the Free 10 + * Software Foundation; either version 2 of the License, or (at your option) 11 + * any later version. 12 + * 13 + * This program is distributed in the hope that it will be useful, but WITHOUT 14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 16 + * more details. 17 + * 18 + * You should have received a copy of the GNU General Public License along with 19 + * this program; if not, write to the Free Software Foundation, Inc., 51 20 + * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 21 + * 22 + */ 23 + #include <linux/kernel.h> 24 + #include <linux/interrupt.h> 25 + #include <linux/dma-mapping.h> 26 + #include <linux/raid/pq.h> 27 + #include <linux/async_tx.h> 28 + 29 + static struct dma_async_tx_descriptor * 30 + async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, 31 + size_t len, struct async_submit_ctl *submit) 32 + { 33 + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, 34 + &dest, 1, srcs, 2, len); 35 + struct dma_device *dma = chan ? chan->device : NULL; 36 + const u8 *amul, *bmul; 37 + u8 ax, bx; 38 + u8 *a, *b, *c; 39 + 40 + if (dma) { 41 + dma_addr_t dma_dest[2]; 42 + dma_addr_t dma_src[2]; 43 + struct device *dev = dma->dev; 44 + struct dma_async_tx_descriptor *tx; 45 + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 46 + 47 + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); 48 + dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); 49 + dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); 50 + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, 51 + len, dma_flags); 52 + if (tx) { 53 + async_tx_submit(chan, tx, submit); 54 + return tx; 55 + } 56 + } 57 + 58 + /* run the operation synchronously */ 59 + async_tx_quiesce(&submit->depend_tx); 60 + amul = raid6_gfmul[coef[0]]; 61 + bmul = raid6_gfmul[coef[1]]; 62 + a = page_address(srcs[0]); 63 + b = page_address(srcs[1]); 64 + c = page_address(dest); 65 + 66 + while (len--) { 67 + ax = amul[*a++]; 68 + bx = bmul[*b++]; 69 + *c++ = ax ^ bx; 70 + } 71 + 72 + return NULL; 73 + } 74 + 75 + static struct dma_async_tx_descriptor * 76 + async_mult(struct page *dest, struct page *src, u8 coef, size_t len, 77 + struct async_submit_ctl *submit) 78 + { 79 + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, 80 + &dest, 1, &src, 1, len); 81 + struct dma_device *dma = chan ? chan->device : NULL; 82 + const u8 *qmul; /* Q multiplier table */ 83 + u8 *d, *s; 84 + 85 + if (dma) { 86 + dma_addr_t dma_dest[2]; 87 + dma_addr_t dma_src[1]; 88 + struct device *dev = dma->dev; 89 + struct dma_async_tx_descriptor *tx; 90 + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; 91 + 92 + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); 93 + dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); 94 + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, 95 + len, dma_flags); 96 + if (tx) { 97 + async_tx_submit(chan, tx, submit); 98 + return tx; 99 + } 100 + } 101 + 102 + /* no channel available, or failed to allocate a descriptor, so 103 + * perform the operation synchronously 104 + */ 105 + async_tx_quiesce(&submit->depend_tx); 106 + qmul = raid6_gfmul[coef]; 107 + d = page_address(dest); 108 + s = page_address(src); 109 + 110 + while (len--) 111 + *d++ = qmul[*s++]; 112 + 113 + return NULL; 114 + } 115 + 116 + static struct dma_async_tx_descriptor * 117 + __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, 118 + struct async_submit_ctl *submit) 119 + { 120 + struct dma_async_tx_descriptor *tx = NULL; 121 + struct page *p, *q, *a, *b; 122 + struct page *srcs[2]; 123 + unsigned char coef[2]; 124 + enum async_tx_flags flags = submit->flags; 125 + dma_async_tx_callback cb_fn = submit->cb_fn; 126 + void *cb_param = submit->cb_param; 127 + void *scribble = submit->scribble; 128 + 129 + p = blocks[4-2]; 130 + q = blocks[4-1]; 131 + 132 + a = blocks[faila]; 133 + b = blocks[failb]; 134 + 135 + /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */ 136 + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 137 + srcs[0] = p; 138 + srcs[1] = q; 139 + coef[0] = raid6_gfexi[failb-faila]; 140 + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 141 + init_async_submit(submit, 0, tx, NULL, NULL, scribble); 142 + tx = async_sum_product(b, srcs, coef, bytes, submit); 143 + 144 + /* Dy = P+Pxy+Dx */ 145 + srcs[0] = p; 146 + srcs[1] = b; 147 + init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn, 148 + cb_param, scribble); 149 + tx = async_xor(a, srcs, 0, 2, bytes, submit); 150 + 151 + return tx; 152 + 153 + } 154 + 155 + static struct dma_async_tx_descriptor * 156 + __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, 157 + struct async_submit_ctl *submit) 158 + { 159 + struct dma_async_tx_descriptor *tx = NULL; 160 + struct page *p, *q, *g, *dp, *dq; 161 + struct page *srcs[2]; 162 + unsigned char coef[2]; 163 + enum async_tx_flags flags = submit->flags; 164 + dma_async_tx_callback cb_fn = submit->cb_fn; 165 + void *cb_param = submit->cb_param; 166 + void *scribble = submit->scribble; 167 + int uninitialized_var(good); 168 + int i; 169 + 170 + for (i = 0; i < 3; i++) { 171 + if (i == faila || i == failb) 172 + continue; 173 + else { 174 + good = i; 175 + break; 176 + } 177 + } 178 + BUG_ON(i >= 3); 179 + 180 + p = blocks[5-2]; 181 + q = blocks[5-1]; 182 + g = blocks[good]; 183 + 184 + /* Compute syndrome with zero for the missing data pages 185 + * Use the dead data pages as temporary storage for delta p and 186 + * delta q 187 + */ 188 + dp = blocks[faila]; 189 + dq = blocks[failb]; 190 + 191 + init_async_submit(submit, 0, tx, NULL, NULL, scribble); 192 + tx = async_memcpy(dp, g, 0, 0, bytes, submit); 193 + init_async_submit(submit, 0, tx, NULL, NULL, scribble); 194 + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); 195 + 196 + /* compute P + Pxy */ 197 + srcs[0] = dp; 198 + srcs[1] = p; 199 + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 200 + scribble); 201 + tx = async_xor(dp, srcs, 0, 2, bytes, submit); 202 + 203 + /* compute Q + Qxy */ 204 + srcs[0] = dq; 205 + srcs[1] = q; 206 + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 207 + scribble); 208 + tx = async_xor(dq, srcs, 0, 2, bytes, submit); 209 + 210 + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 211 + srcs[0] = dp; 212 + srcs[1] = dq; 213 + coef[0] = raid6_gfexi[failb-faila]; 214 + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 215 + init_async_submit(submit, 0, tx, NULL, NULL, scribble); 216 + tx = async_sum_product(dq, srcs, coef, bytes, submit); 217 + 218 + /* Dy = P+Pxy+Dx */ 219 + srcs[0] = dp; 220 + srcs[1] = dq; 221 + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, 222 + cb_param, scribble); 223 + tx = async_xor(dp, srcs, 0, 2, bytes, submit); 224 + 225 + return tx; 226 + } 227 + 228 + static struct dma_async_tx_descriptor * 229 + __2data_recov_n(int disks, size_t bytes, int faila, int failb, 230 + struct page **blocks, struct async_submit_ctl *submit) 231 + { 232 + struct dma_async_tx_descriptor *tx = NULL; 233 + struct page *p, *q, *dp, *dq; 234 + struct page *srcs[2]; 235 + unsigned char coef[2]; 236 + enum async_tx_flags flags = submit->flags; 237 + dma_async_tx_callback cb_fn = submit->cb_fn; 238 + void *cb_param = submit->cb_param; 239 + void *scribble = submit->scribble; 240 + 241 + p = blocks[disks-2]; 242 + q = blocks[disks-1]; 243 + 244 + /* Compute syndrome with zero for the missing data pages 245 + * Use the dead data pages as temporary storage for 246 + * delta p and delta q 247 + */ 248 + dp = blocks[faila]; 249 + blocks[faila] = (void *)raid6_empty_zero_page; 250 + blocks[disks-2] = dp; 251 + dq = blocks[failb]; 252 + blocks[failb] = (void *)raid6_empty_zero_page; 253 + blocks[disks-1] = dq; 254 + 255 + init_async_submit(submit, 0, tx, NULL, NULL, scribble); 256 + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); 257 + 258 + /* Restore pointer table */ 259 + blocks[faila] = dp; 260 + blocks[failb] = dq; 261 + blocks[disks-2] = p; 262 + blocks[disks-1] = q; 263 + 264 + /* compute P + Pxy */ 265 + srcs[0] = dp; 266 + srcs[1] = p; 267 + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 268 + scribble); 269 + tx = async_xor(dp, srcs, 0, 2, bytes, submit); 270 + 271 + /* compute Q + Qxy */ 272 + srcs[0] = dq; 273 + srcs[1] = q; 274 + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 275 + scribble); 276 + tx = async_xor(dq, srcs, 0, 2, bytes, submit); 277 + 278 + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ 279 + srcs[0] = dp; 280 + srcs[1] = dq; 281 + coef[0] = raid6_gfexi[failb-faila]; 282 + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; 283 + init_async_submit(submit, 0, tx, NULL, NULL, scribble); 284 + tx = async_sum_product(dq, srcs, coef, bytes, submit); 285 + 286 + /* Dy = P+Pxy+Dx */ 287 + srcs[0] = dp; 288 + srcs[1] = dq; 289 + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, 290 + cb_param, scribble); 291 + tx = async_xor(dp, srcs, 0, 2, bytes, submit); 292 + 293 + return tx; 294 + } 295 + 296 + /** 297 + * async_raid6_2data_recov - asynchronously calculate two missing data blocks 298 + * @disks: number of disks in the RAID-6 array 299 + * @bytes: block size 300 + * @faila: first failed drive index 301 + * @failb: second failed drive index 302 + * @blocks: array of source pointers where the last two entries are p and q 303 + * @submit: submission/completion modifiers 304 + */ 305 + struct dma_async_tx_descriptor * 306 + async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, 307 + struct page **blocks, struct async_submit_ctl *submit) 308 + { 309 + BUG_ON(faila == failb); 310 + if (failb < faila) 311 + swap(faila, failb); 312 + 313 + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); 314 + 315 + /* we need to preserve the contents of 'blocks' for the async 316 + * case, so punt to synchronous if a scribble buffer is not available 317 + */ 318 + if (!submit->scribble) { 319 + void **ptrs = (void **) blocks; 320 + int i; 321 + 322 + async_tx_quiesce(&submit->depend_tx); 323 + for (i = 0; i < disks; i++) 324 + ptrs[i] = page_address(blocks[i]); 325 + 326 + raid6_2data_recov(disks, bytes, faila, failb, ptrs); 327 + 328 + async_tx_sync_epilog(submit); 329 + 330 + return NULL; 331 + } 332 + 333 + switch (disks) { 334 + case 4: 335 + /* dma devices do not uniformly understand a zero source pq 336 + * operation (in contrast to the synchronous case), so 337 + * explicitly handle the 4 disk special case 338 + */ 339 + return __2data_recov_4(bytes, faila, failb, blocks, submit); 340 + case 5: 341 + /* dma devices do not uniformly understand a single 342 + * source pq operation (in contrast to the synchronous 343 + * case), so explicitly handle the 5 disk special case 344 + */ 345 + return __2data_recov_5(bytes, faila, failb, blocks, submit); 346 + default: 347 + return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); 348 + } 349 + } 350 + EXPORT_SYMBOL_GPL(async_raid6_2data_recov); 351 + 352 + /** 353 + * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block 354 + * @disks: number of disks in the RAID-6 array 355 + * @bytes: block size 356 + * @faila: failed drive index 357 + * @blocks: array of source pointers where the last two entries are p and q 358 + * @submit: submission/completion modifiers 359 + */ 360 + struct dma_async_tx_descriptor * 361 + async_raid6_datap_recov(int disks, size_t bytes, int faila, 362 + struct page **blocks, struct async_submit_ctl *submit) 363 + { 364 + struct dma_async_tx_descriptor *tx = NULL; 365 + struct page *p, *q, *dq; 366 + u8 coef; 367 + enum async_tx_flags flags = submit->flags; 368 + dma_async_tx_callback cb_fn = submit->cb_fn; 369 + void *cb_param = submit->cb_param; 370 + void *scribble = submit->scribble; 371 + struct page *srcs[2]; 372 + 373 + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); 374 + 375 + /* we need to preserve the contents of 'blocks' for the async 376 + * case, so punt to synchronous if a scribble buffer is not available 377 + */ 378 + if (!scribble) { 379 + void **ptrs = (void **) blocks; 380 + int i; 381 + 382 + async_tx_quiesce(&submit->depend_tx); 383 + for (i = 0; i < disks; i++) 384 + ptrs[i] = page_address(blocks[i]); 385 + 386 + raid6_datap_recov(disks, bytes, faila, ptrs); 387 + 388 + async_tx_sync_epilog(submit); 389 + 390 + return NULL; 391 + } 392 + 393 + p = blocks[disks-2]; 394 + q = blocks[disks-1]; 395 + 396 + /* Compute syndrome with zero for the missing data page 397 + * Use the dead data page as temporary storage for delta q 398 + */ 399 + dq = blocks[faila]; 400 + blocks[faila] = (void *)raid6_empty_zero_page; 401 + blocks[disks-1] = dq; 402 + 403 + /* in the 4 disk case we only need to perform a single source 404 + * multiplication 405 + */ 406 + if (disks == 4) { 407 + int good = faila == 0 ? 1 : 0; 408 + struct page *g = blocks[good]; 409 + 410 + init_async_submit(submit, 0, tx, NULL, NULL, scribble); 411 + tx = async_memcpy(p, g, 0, 0, bytes, submit); 412 + 413 + init_async_submit(submit, 0, tx, NULL, NULL, scribble); 414 + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); 415 + } else { 416 + init_async_submit(submit, 0, tx, NULL, NULL, scribble); 417 + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); 418 + } 419 + 420 + /* Restore pointer table */ 421 + blocks[faila] = dq; 422 + blocks[disks-1] = q; 423 + 424 + /* calculate g^{-faila} */ 425 + coef = raid6_gfinv[raid6_gfexp[faila]]; 426 + 427 + srcs[0] = dq; 428 + srcs[1] = q; 429 + init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, 430 + scribble); 431 + tx = async_xor(dq, srcs, 0, 2, bytes, submit); 432 + 433 + init_async_submit(submit, 0, tx, NULL, NULL, scribble); 434 + tx = async_mult(dq, dq, coef, bytes, submit); 435 + 436 + srcs[0] = p; 437 + srcs[1] = dq; 438 + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, 439 + cb_param, scribble); 440 + tx = async_xor(p, srcs, 0, 2, bytes, submit); 441 + 442 + return tx; 443 + } 444 + EXPORT_SYMBOL_GPL(async_raid6_datap_recov); 445 + 446 + MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>"); 447 + MODULE_DESCRIPTION("asynchronous RAID-6 recovery api"); 448 + MODULE_LICENSE("GPL");
+8
include/linux/async_tx.h
··· 194 194 size_t len, enum sum_check_flags *pqres, struct page *spare, 195 195 struct async_submit_ctl *submit); 196 196 197 + struct dma_async_tx_descriptor * 198 + async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, 199 + struct page **ptrs, struct async_submit_ctl *submit); 200 + 201 + struct dma_async_tx_descriptor * 202 + async_raid6_datap_recov(int src_num, size_t bytes, int faila, 203 + struct page **ptrs, struct async_submit_ctl *submit); 204 + 197 205 void async_tx_quiesce(struct dma_async_tx_descriptor **tx); 198 206 #endif /* _ASYNC_TX_H_ */