Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

md: handle_stripe5 - add request/completion logic for async read ops

When a read bio is attached to the stripe and the corresponding block is
marked R5_UPTODATE, then a read (biofill) operation is scheduled to copy
the data from the stripe cache to the bio buffer. handle_stripe flags the
blocks to be operated on with the R5_Wantfill flag. If new read requests
arrive while raid5_run_ops is running they will not be handled until
handle_stripe is scheduled to run again.

Changelog:
* cleanup to_read and to_fill accounting
* do not fail reads that have reached the cache

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>

+26 -29
+25 -28
drivers/md/raid5.c
··· 2049 2049 bi = bi2; 2050 2050 } 2051 2051 2052 - /* fail any reads if this device is non-operational */ 2053 - if (!test_bit(R5_Insync, &sh->dev[i].flags) || 2054 - test_bit(R5_ReadError, &sh->dev[i].flags)) { 2052 + /* fail any reads if this device is non-operational and 2053 + * the data has not reached the cache yet. 2054 + */ 2055 + if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && 2056 + (!test_bit(R5_Insync, &sh->dev[i].flags) || 2057 + test_bit(R5_ReadError, &sh->dev[i].flags))) { 2055 2058 bi = sh->dev[i].toread; 2056 2059 sh->dev[i].toread = NULL; 2057 2060 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) ··· 2743 2740 struct r5dev *dev = &sh->dev[i]; 2744 2741 clear_bit(R5_Insync, &dev->flags); 2745 2742 2746 - pr_debug("check %d: state 0x%lx read %p write %p written %p\n", 2747 - i, dev->flags, dev->toread, dev->towrite, dev->written); 2748 - /* maybe we can reply to a read */ 2749 - if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { 2750 - struct bio *rbi, *rbi2; 2751 - pr_debug("Return read for disc %d\n", i); 2752 - spin_lock_irq(&conf->device_lock); 2753 - rbi = dev->toread; 2754 - dev->toread = NULL; 2755 - if (test_and_clear_bit(R5_Overlap, &dev->flags)) 2756 - wake_up(&conf->wait_for_overlap); 2757 - spin_unlock_irq(&conf->device_lock); 2758 - while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { 2759 - copy_data(0, rbi, dev->page, dev->sector); 2760 - rbi2 = r5_next_bio(rbi, dev->sector); 2761 - spin_lock_irq(&conf->device_lock); 2762 - if (--rbi->bi_phys_segments == 0) { 2763 - rbi->bi_next = return_bi; 2764 - return_bi = rbi; 2765 - } 2766 - spin_unlock_irq(&conf->device_lock); 2767 - rbi = rbi2; 2768 - } 2769 - } 2743 + pr_debug("check %d: state 0x%lx toread %p read %p write %p " 2744 + "written %p\n", i, dev->flags, dev->toread, dev->read, 2745 + dev->towrite, dev->written); 2746 + 2747 + /* maybe we can request a biofill operation 2748 + * 2749 + * new wantfill requests are only permitted while 2750 + * STRIPE_OP_BIOFILL is clear 2751 + */ 2752 + if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && 2753 + !test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) 2754 + set_bit(R5_Wantfill, &dev->flags); 2770 2755 2771 2756 /* now count some things */ 2772 2757 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; 2773 2758 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; 2774 2759 if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++; 2775 2760 2776 - if (dev->toread) 2761 + if (test_bit(R5_Wantfill, &dev->flags)) 2762 + s.to_fill++; 2763 + else if (dev->toread) 2777 2764 s.to_read++; 2778 2765 if (dev->towrite) { 2779 2766 s.to_write++; ··· 2786 2793 set_bit(R5_Insync, &dev->flags); 2787 2794 } 2788 2795 rcu_read_unlock(); 2796 + 2797 + if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) 2798 + sh->ops.count++; 2799 + 2789 2800 pr_debug("locked=%d uptodate=%d to_read=%d" 2790 2801 " to_write=%d failed=%d failed_num=%d\n", 2791 2802 s.locked, s.uptodate, s.to_read, s.to_write,
+1 -1
include/linux/raid/raid5.h
··· 200 200 struct stripe_head_state { 201 201 int syncing, expanding, expanded; 202 202 int locked, uptodate, to_read, to_write, failed, written; 203 - int compute, req_compute, non_overwrite; 203 + int to_fill, compute, req_compute, non_overwrite; 204 204 int failed_num; 205 205 }; 206 206