Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

md/raid6: let syndrome computor support different page offset

For now, syndrome compute functions require common offset in the pages
array. However, we expect them to support different offset when try to
use shared page in the following. Simplily covert them by adding page
offset where each page address are referred.

Since the only caller of async_gen_syndrome() and async_syndrome_val()
are in raid6, we don't want to reserve the old interface but modify the
interface directly. After that, replacing old interfaces with new ones
for raid6 and raid6test.

Signed-off-by: Yufen Yu <yuyufen@huawei.com>
Signed-off-by: Song Liu <songliubraving@fb.com>

authored by

Yufen Yu and committed by
Song Liu
d69454bc a7c224a8

+91 -47
+48 -24
crypto/async_tx/async_pq.c
··· 104 104 * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome 105 105 */ 106 106 static void 107 - do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, 107 + do_sync_gen_syndrome(struct page **blocks, unsigned int *offsets, int disks, 108 108 size_t len, struct async_submit_ctl *submit) 109 109 { 110 110 void **srcs; ··· 121 121 BUG_ON(i > disks - 3); /* P or Q can't be zero */ 122 122 srcs[i] = (void*)raid6_empty_zero_page; 123 123 } else { 124 - srcs[i] = page_address(blocks[i]) + offset; 124 + srcs[i] = page_address(blocks[i]) + offsets[i]; 125 + 125 126 if (i < disks - 2) { 126 127 stop = i; 127 128 if (start == -1) ··· 139 138 async_tx_sync_epilog(submit); 140 139 } 141 140 141 + static inline bool 142 + is_dma_pq_aligned_offs(struct dma_device *dev, unsigned int *offs, 143 + int src_cnt, size_t len) 144 + { 145 + int i; 146 + 147 + for (i = 0; i < src_cnt; i++) { 148 + if (!is_dma_pq_aligned(dev, offs[i], 0, len)) 149 + return false; 150 + } 151 + return true; 152 + } 153 + 142 154 /** 143 155 * async_gen_syndrome - asynchronously calculate a raid6 syndrome 144 156 * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 145 - * @offset: common offset into each block (src and dest) to start transaction 157 + * @offsets: offset array into each block (src and dest) to start transaction 146 158 * @disks: number of blocks (including missing P or Q, see below) 147 159 * @len: length of operation in bytes 148 160 * @submit: submission/completion modifiers ··· 174 160 * path. 175 161 */ 176 162 struct dma_async_tx_descriptor * 177 - async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, 163 + async_gen_syndrome(struct page **blocks, unsigned int *offsets, int disks, 178 164 size_t len, struct async_submit_ctl *submit) 179 165 { 180 166 int src_cnt = disks - 2; ··· 193 179 if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) && 194 180 (src_cnt <= dma_maxpq(device, 0) || 195 181 dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && 196 - is_dma_pq_aligned(device, offset, 0, len)) { 182 + is_dma_pq_aligned_offs(device, offsets, disks, len)) { 197 183 struct dma_async_tx_descriptor *tx; 198 184 enum dma_ctrl_flags dma_flags = 0; 199 185 unsigned char coefs[MAX_DISKS]; ··· 210 196 for (i = 0, j = 0; i < src_cnt; i++) { 211 197 if (blocks[i] == NULL) 212 198 continue; 213 - unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset, 214 - len, DMA_TO_DEVICE); 199 + unmap->addr[j] = dma_map_page(device->dev, blocks[i], 200 + offsets[i], len, DMA_TO_DEVICE); 215 201 coefs[j] = raid6_gfexp[i]; 216 202 unmap->to_cnt++; 217 203 j++; ··· 224 210 unmap->bidi_cnt++; 225 211 if (P(blocks, disks)) 226 212 unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks), 227 - offset, len, DMA_BIDIRECTIONAL); 213 + P(offsets, disks), 214 + len, DMA_BIDIRECTIONAL); 228 215 else { 229 216 unmap->addr[j++] = 0; 230 217 dma_flags |= DMA_PREP_PQ_DISABLE_P; ··· 234 219 unmap->bidi_cnt++; 235 220 if (Q(blocks, disks)) 236 221 unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks), 237 - offset, len, DMA_BIDIRECTIONAL); 222 + Q(offsets, disks), 223 + len, DMA_BIDIRECTIONAL); 238 224 else { 239 225 unmap->addr[j++] = 0; 240 226 dma_flags |= DMA_PREP_PQ_DISABLE_Q; ··· 256 240 257 241 if (!P(blocks, disks)) { 258 242 P(blocks, disks) = pq_scribble_page; 259 - BUG_ON(len + offset > PAGE_SIZE); 243 + P(offsets, disks) = 0; 260 244 } 261 245 if (!Q(blocks, disks)) { 262 246 Q(blocks, disks) = pq_scribble_page; 263 - BUG_ON(len + offset > PAGE_SIZE); 247 + Q(offsets, disks) = 0; 264 248 } 265 - do_sync_gen_syndrome(blocks, offset, disks, len, submit); 249 + do_sync_gen_syndrome(blocks, offsets, disks, len, submit); 266 250 267 251 return NULL; 268 252 } ··· 286 270 * @len: length of operation in bytes 287 271 * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set 288 272 * @spare: temporary result buffer for the synchronous case 273 + * @s_off: spare buffer page offset 289 274 * @submit: submission / completion modifiers 290 275 * 291 276 * The same notes from async_gen_syndrome apply to the 'blocks', ··· 295 278 * specified. 296 279 */ 297 280 struct dma_async_tx_descriptor * 298 - async_syndrome_val(struct page **blocks, unsigned int offset, int disks, 281 + async_syndrome_val(struct page **blocks, unsigned int *offsets, int disks, 299 282 size_t len, enum sum_check_flags *pqres, struct page *spare, 300 - struct async_submit_ctl *submit) 283 + unsigned int s_off, struct async_submit_ctl *submit) 301 284 { 302 285 struct dma_chan *chan = pq_val_chan(submit, blocks, disks, len); 303 286 struct dma_device *device = chan ? chan->device : NULL; ··· 312 295 unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT); 313 296 314 297 if (unmap && disks <= dma_maxpq(device, 0) && 315 - is_dma_pq_aligned(device, offset, 0, len)) { 298 + is_dma_pq_aligned_offs(device, offsets, disks, len)) { 316 299 struct device *dev = device->dev; 317 300 dma_addr_t pq[2]; 318 301 int i, j = 0, src_cnt = 0; ··· 324 307 for (i = 0; i < disks-2; i++) 325 308 if (likely(blocks[i])) { 326 309 unmap->addr[j] = dma_map_page(dev, blocks[i], 327 - offset, len, 310 + offsets[i], len, 328 311 DMA_TO_DEVICE); 329 312 coefs[j] = raid6_gfexp[i]; 330 313 unmap->to_cnt++; ··· 337 320 dma_flags |= DMA_PREP_PQ_DISABLE_P; 338 321 } else { 339 322 pq[0] = dma_map_page(dev, P(blocks, disks), 340 - offset, len, 323 + P(offsets, disks), len, 341 324 DMA_TO_DEVICE); 342 325 unmap->addr[j++] = pq[0]; 343 326 unmap->to_cnt++; ··· 347 330 dma_flags |= DMA_PREP_PQ_DISABLE_Q; 348 331 } else { 349 332 pq[1] = dma_map_page(dev, Q(blocks, disks), 350 - offset, len, 333 + Q(offsets, disks), len, 351 334 DMA_TO_DEVICE); 352 335 unmap->addr[j++] = pq[1]; 353 336 unmap->to_cnt++; ··· 372 355 async_tx_submit(chan, tx, submit); 373 356 } else { 374 357 struct page *p_src = P(blocks, disks); 358 + unsigned int p_off = P(offsets, disks); 375 359 struct page *q_src = Q(blocks, disks); 360 + unsigned int q_off = Q(offsets, disks); 376 361 enum async_tx_flags flags_orig = submit->flags; 377 362 dma_async_tx_callback cb_fn_orig = submit->cb_fn; 378 363 void *scribble = submit->scribble; ··· 400 381 if (p_src) { 401 382 init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL, 402 383 NULL, NULL, scribble); 403 - tx = async_xor(spare, blocks, offset, disks-2, len, submit); 384 + tx = async_xor_offs(spare, s_off, 385 + blocks, offsets, disks-2, len, submit); 404 386 async_tx_quiesce(&tx); 405 - p = page_address(p_src) + offset; 406 - s = page_address(spare) + offset; 387 + p = page_address(p_src) + p_off; 388 + s = page_address(spare) + s_off; 407 389 *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P; 408 390 } 409 391 410 392 if (q_src) { 411 393 P(blocks, disks) = NULL; 412 394 Q(blocks, disks) = spare; 395 + Q(offsets, disks) = s_off; 413 396 init_async_submit(submit, 0, NULL, NULL, NULL, scribble); 414 - tx = async_gen_syndrome(blocks, offset, disks, len, submit); 397 + tx = async_gen_syndrome(blocks, offsets, disks, 398 + len, submit); 415 399 async_tx_quiesce(&tx); 416 - q = page_address(q_src) + offset; 417 - s = page_address(spare) + offset; 400 + q = page_address(q_src) + q_off; 401 + s = page_address(spare) + s_off; 418 402 *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q; 419 403 } 420 404 421 405 /* restore P, Q and submit */ 422 406 P(blocks, disks) = p_src; 407 + P(offsets, disks) = p_off; 423 408 Q(blocks, disks) = q_src; 409 + Q(offsets, disks) = q_off; 424 410 425 411 submit->cb_fn = cb_fn_orig; 426 412 submit->cb_param = cb_param_orig;
+16 -8
crypto/async_tx/raid6test.c
··· 18 18 #define NDISKS 64 /* Including P and Q */ 19 19 20 20 static struct page *dataptrs[NDISKS]; 21 + unsigned int dataoffs[NDISKS]; 21 22 static addr_conv_t addr_conv[NDISKS]; 22 23 static struct page *data[NDISKS+3]; 23 24 static struct page *spare; ··· 39 38 for (i = 0; i < disks; i++) { 40 39 prandom_bytes(page_address(data[i]), PAGE_SIZE); 41 40 dataptrs[i] = data[i]; 41 + dataoffs[i] = 0; 42 42 } 43 43 } 44 44 ··· 54 52 } 55 53 56 54 /* Recover two failed blocks. */ 57 - static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs) 55 + static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, 56 + struct page **ptrs, unsigned int *offs) 58 57 { 59 58 struct async_submit_ctl submit; 60 59 struct completion cmp; ··· 69 66 if (faila == disks-2) { 70 67 /* P+Q failure. Just rebuild the syndrome. */ 71 68 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); 72 - tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); 69 + tx = async_gen_syndrome(ptrs, offs, 70 + disks, bytes, &submit); 73 71 } else { 74 72 struct page *blocks[NDISKS]; 75 73 struct page *dest; ··· 93 89 tx = async_xor(dest, blocks, 0, count, bytes, &submit); 94 90 95 91 init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv); 96 - tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); 92 + tx = async_gen_syndrome(ptrs, offs, 93 + disks, bytes, &submit); 97 94 } 98 95 } else { 99 96 if (failb == disks-2) { 100 97 /* data+P failure. */ 101 98 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); 102 - tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit); 99 + tx = async_raid6_datap_recov(disks, bytes, 100 + faila, ptrs, &submit); 103 101 } else { 104 102 /* data+data failure. */ 105 103 init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); 106 - tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit); 104 + tx = async_raid6_2data_recov(disks, bytes, 105 + faila, failb, ptrs, &submit); 107 106 } 108 107 } 109 108 init_completion(&cmp); 110 109 init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv); 111 - tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit); 110 + tx = async_syndrome_val(ptrs, offs, 111 + disks, bytes, &result, spare, 0, &submit); 112 112 async_tx_issue_pending(tx); 113 113 114 114 if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) ··· 134 126 dataptrs[i] = recovi; 135 127 dataptrs[j] = recovj; 136 128 137 - raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs); 129 + raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs, dataoffs); 138 130 139 131 erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE); 140 132 errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE); ··· 170 162 /* Generate assumed good syndrome */ 171 163 init_completion(&cmp); 172 164 init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv); 173 - tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit); 165 + tx = async_gen_syndrome(dataptrs, dataoffs, disks, PAGE_SIZE, &submit); 174 166 async_tx_issue_pending(tx); 175 167 176 168 if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
+24 -12
drivers/md/raid5.c
··· 1459 1459 1460 1460 /* set_syndrome_sources - populate source buffers for gen_syndrome 1461 1461 * @srcs - (struct page *) array of size sh->disks 1462 + * @offs - (unsigned int) array of offset for each page 1462 1463 * @sh - stripe_head to parse 1463 1464 * 1464 1465 * Populates srcs in proper layout order for the stripe and returns the ··· 1468 1467 * is recorded in srcs[count+1]]. 1469 1468 */ 1470 1469 static int set_syndrome_sources(struct page **srcs, 1470 + unsigned int *offs, 1471 1471 struct stripe_head *sh, 1472 1472 int srctype) 1473 1473 { ··· 1499 1497 srcs[slot] = sh->dev[i].orig_page; 1500 1498 else 1501 1499 srcs[slot] = sh->dev[i].page; 1500 + /* 1501 + * For R5_InJournal, PAGE_SIZE must be 4KB and will 1502 + * not shared page. In that case, dev[i].offset 1503 + * is 0. 1504 + */ 1505 + offs[slot] = sh->dev[i].offset; 1502 1506 } 1503 1507 i = raid6_next_disk(i, disks); 1504 1508 } while (i != d0_idx); ··· 1548 1540 atomic_inc(&sh->count); 1549 1541 1550 1542 if (target == qd_idx) { 1551 - count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL); 1543 + count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL); 1552 1544 blocks[count] = NULL; /* regenerating p is not necessary */ 1553 1545 BUG_ON(blocks[count+1] != dest); /* q should already be set */ 1554 1546 init_async_submit(&submit, ASYNC_TX_FENCE, NULL, 1555 1547 ops_complete_compute, sh, 1556 1548 to_addr_conv(sh, percpu, 0)); 1557 - tx = async_gen_syndrome(blocks, 0, count+2, 1549 + tx = async_gen_syndrome(blocks, offs, count+2, 1558 1550 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 1559 1551 } else { 1560 1552 /* Compute any data- or p-drive using XOR */ ··· 1636 1628 init_async_submit(&submit, ASYNC_TX_FENCE, NULL, 1637 1629 ops_complete_compute, sh, 1638 1630 to_addr_conv(sh, percpu, 0)); 1639 - return async_gen_syndrome(blocks, 0, syndrome_disks+2, 1631 + return async_gen_syndrome(blocks, offs, syndrome_disks+2, 1640 1632 RAID5_STRIPE_SIZE(sh->raid_conf), 1641 1633 &submit); 1642 1634 } else { ··· 1668 1660 RAID5_STRIPE_SIZE(sh->raid_conf), 1669 1661 &submit); 1670 1662 1671 - count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL); 1663 + count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL); 1672 1664 init_async_submit(&submit, ASYNC_TX_FENCE, tx, 1673 1665 ops_complete_compute, sh, 1674 1666 to_addr_conv(sh, percpu, 0)); 1675 - return async_gen_syndrome(blocks, 0, count+2, 1667 + return async_gen_syndrome(blocks, offs, count+2, 1676 1668 RAID5_STRIPE_SIZE(sh->raid_conf), 1677 1669 &submit); 1678 1670 } ··· 1758 1750 struct dma_async_tx_descriptor *tx) 1759 1751 { 1760 1752 struct page **blocks = to_addr_page(percpu, 0); 1753 + unsigned int *offs = to_addr_offs(sh, percpu); 1761 1754 int count; 1762 1755 struct async_submit_ctl submit; 1763 1756 1764 1757 pr_debug("%s: stripe %llu\n", __func__, 1765 1758 (unsigned long long)sh->sector); 1766 1759 1767 - count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_WANT_DRAIN); 1760 + count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_WANT_DRAIN); 1768 1761 1769 1762 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx, 1770 1763 ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0)); 1771 - tx = async_gen_syndrome(blocks, 0, count+2, 1764 + tx = async_gen_syndrome(blocks, offs, count+2, 1772 1765 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 1773 1766 1774 1767 return tx; ··· 1998 1989 { 1999 1990 struct async_submit_ctl submit; 2000 1991 struct page **blocks; 1992 + unsigned int *offs; 2001 1993 int count, i, j = 0; 2002 1994 struct stripe_head *head_sh = sh; 2003 1995 int last_stripe; ··· 2023 2013 2024 2014 again: 2025 2015 blocks = to_addr_page(percpu, j); 2016 + offs = to_addr_offs(sh, percpu); 2026 2017 2027 2018 if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { 2028 2019 synflags = SYNDROME_SRC_WRITTEN; ··· 2033 2022 txflags = ASYNC_TX_ACK; 2034 2023 } 2035 2024 2036 - count = set_syndrome_sources(blocks, sh, synflags); 2025 + count = set_syndrome_sources(blocks, offs, sh, synflags); 2037 2026 last_stripe = !head_sh->batch_head || 2038 2027 list_first_entry(&sh->batch_list, 2039 2028 struct stripe_head, batch_list) == head_sh; ··· 2045 2034 } else 2046 2035 init_async_submit(&submit, 0, tx, NULL, NULL, 2047 2036 to_addr_conv(sh, percpu, j)); 2048 - tx = async_gen_syndrome(blocks, 0, count+2, 2037 + tx = async_gen_syndrome(blocks, offs, count+2, 2049 2038 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); 2050 2039 if (!last_stripe) { 2051 2040 j++; ··· 2111 2100 static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp) 2112 2101 { 2113 2102 struct page **srcs = to_addr_page(percpu, 0); 2103 + unsigned int *offs = to_addr_offs(sh, percpu); 2114 2104 struct async_submit_ctl submit; 2115 2105 int count; 2116 2106 ··· 2119 2107 (unsigned long long)sh->sector, checkp); 2120 2108 2121 2109 BUG_ON(sh->batch_head); 2122 - count = set_syndrome_sources(srcs, sh, SYNDROME_SRC_ALL); 2110 + count = set_syndrome_sources(srcs, offs, sh, SYNDROME_SRC_ALL); 2123 2111 if (!checkp) 2124 2112 srcs[count] = NULL; 2125 2113 2126 2114 atomic_inc(&sh->count); 2127 2115 init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check, 2128 2116 sh, to_addr_conv(sh, percpu, 0)); 2129 - async_syndrome_val(srcs, 0, count+2, 2117 + async_syndrome_val(srcs, offs, count+2, 2130 2118 RAID5_STRIPE_SIZE(sh->raid_conf), 2131 - &sh->ops.zero_sum_result, percpu->spare_page, &submit); 2119 + &sh->ops.zero_sum_result, percpu->spare_page, 0, &submit); 2132 2120 } 2133 2121 2134 2122 static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+3 -3
include/linux/async_tx.h
··· 186 186 struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); 187 187 188 188 struct dma_async_tx_descriptor * 189 - async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, 189 + async_gen_syndrome(struct page **blocks, unsigned int *offsets, int src_cnt, 190 190 size_t len, struct async_submit_ctl *submit); 191 191 192 192 struct dma_async_tx_descriptor * 193 - async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, 193 + async_syndrome_val(struct page **blocks, unsigned int *offsets, int src_cnt, 194 194 size_t len, enum sum_check_flags *pqres, struct page *spare, 195 - struct async_submit_ctl *submit); 195 + unsigned int s_off, struct async_submit_ctl *submit); 196 196 197 197 struct dma_async_tx_descriptor * 198 198 async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,