Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

md: handle_stripe5 - add request/completion logic for async check ops

Check operations are scheduled when the array is being resynced or an
explicit 'check/repair' command was sent to the array. Previously check
operations would destroy the parity block in the cache such that even if
parity turned out to be correct the parity block would be marked
!R5_UPTODATE at the completion of the check. When the operation can be
carried out by a dma engine the assumption is that it can check parity as a
read-only operation. If raid5_run_ops notices that the check was handled
by hardware it will preserve the R5_UPTODATE status of the parity disk.

When a check operation determines that the parity needs to be repaired we
reuse the existing compute block infrastructure to carry out the operation.
Repair operations imply an immediate write back of the data, so to
differentiate a repair from a normal compute operation the
STRIPE_OP_MOD_REPAIR_PD flag is added.

Changelog:
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>

+65 -19
+65 -19
drivers/md/raid5.c
··· 2471 2471 struct stripe_head_state *s, int disks) 2472 2472 { 2473 2473 set_bit(STRIPE_HANDLE, &sh->state); 2474 - if (s->failed == 0) { 2475 - BUG_ON(s->uptodate != disks); 2476 - compute_parity5(sh, CHECK_PARITY); 2477 - s->uptodate--; 2478 - if (page_is_zero(sh->dev[sh->pd_idx].page)) { 2479 - /* parity is correct (on disc, not in buffer any more) 2480 - */ 2481 - set_bit(STRIPE_INSYNC, &sh->state); 2482 - } else { 2483 - conf->mddev->resync_mismatches += STRIPE_SECTORS; 2484 - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) 2485 - /* don't try to repair!! */ 2474 + /* Take one of the following actions: 2475 + * 1/ start a check parity operation if (uptodate == disks) 2476 + * 2/ finish a check parity operation and act on the result 2477 + * 3/ skip to the writeback section if we previously 2478 + * initiated a recovery operation 2479 + */ 2480 + if (s->failed == 0 && 2481 + !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { 2482 + if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { 2483 + BUG_ON(s->uptodate != disks); 2484 + clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); 2485 + sh->ops.count++; 2486 + s->uptodate--; 2487 + } else if ( 2488 + test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { 2489 + clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); 2490 + clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); 2491 + 2492 + if (sh->ops.zero_sum_result == 0) 2493 + /* parity is correct (on disc, 2494 + * not in buffer any more) 2495 + */ 2486 2496 set_bit(STRIPE_INSYNC, &sh->state); 2487 2497 else { 2488 - compute_block(sh, sh->pd_idx); 2489 - s->uptodate++; 2498 + conf->mddev->resync_mismatches += 2499 + STRIPE_SECTORS; 2500 + if (test_bit( 2501 + MD_RECOVERY_CHECK, &conf->mddev->recovery)) 2502 + /* don't try to repair!! */ 2503 + set_bit(STRIPE_INSYNC, &sh->state); 2504 + else { 2505 + set_bit(STRIPE_OP_COMPUTE_BLK, 2506 + &sh->ops.pending); 2507 + set_bit(STRIPE_OP_MOD_REPAIR_PD, 2508 + &sh->ops.pending); 2509 + set_bit(R5_Wantcompute, 2510 + &sh->dev[sh->pd_idx].flags); 2511 + sh->ops.target = sh->pd_idx; 2512 + sh->ops.count++; 2513 + s->uptodate++; 2514 + } 2490 2515 } 2491 2516 } 2492 2517 } 2493 - if (!test_bit(STRIPE_INSYNC, &sh->state)) { 2518 + 2519 + /* check if we can clear a parity disk reconstruct */ 2520 + if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && 2521 + test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { 2522 + 2523 + clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); 2524 + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); 2525 + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); 2526 + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); 2527 + } 2528 + 2529 + /* Wait for check parity and compute block operations to complete 2530 + * before write-back 2531 + */ 2532 + if (!test_bit(STRIPE_INSYNC, &sh->state) && 2533 + !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) && 2534 + !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) { 2494 2535 struct r5dev *dev; 2495 2536 /* either failed parity check, or recovery is happening */ 2496 2537 if (s->failed == 0) ··· 2896 2855 handle_issuing_new_write_requests5(conf, sh, &s, disks); 2897 2856 2898 2857 /* maybe we need to check and possibly fix the parity for this stripe 2899 - * Any reads will already have been scheduled, so we just see if enough data 2900 - * is available 2858 + * Any reads will already have been scheduled, so we just see if enough 2859 + * data is available. The parity check is held off while parity 2860 + * dependent operations are in flight. 2901 2861 */ 2902 - if (s.syncing && s.locked == 0 && 2903 - !test_bit(STRIPE_INSYNC, &sh->state)) 2862 + if ((s.syncing && s.locked == 0 && 2863 + !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) && 2864 + !test_bit(STRIPE_INSYNC, &sh->state)) || 2865 + test_bit(STRIPE_OP_CHECK, &sh->ops.pending) || 2866 + test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) 2904 2867 handle_parity_checks5(conf, sh, &s, disks); 2868 + 2905 2869 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { 2906 2870 md_done_sync(conf->mddev, STRIPE_SECTORS,1); 2907 2871 clear_bit(STRIPE_SYNCING, &sh->state);