Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md: (24 commits)
md: clean up do_md_stop
md: fix another deadlock with removing sysfs attributes.
md: move revalidate_disk() back outside open_mutex
md/raid10: fix deadlock with unaligned read during resync
md/bitmap: separate out loading a bitmap from initialising the structures.
md/bitmap: prepare for storing write-intent-bitmap via dm-dirty-log.
md/bitmap: optimise scanning of empty bitmaps.
md/bitmap: clean up plugging calls.
md/bitmap: reduce dependence on sysfs.
md/bitmap: white space clean up and similar.
md/raid5: export raid5 unplugging interface.
md/plug: optionally use plugger to unplug an array during resync/recovery.
md/raid5: add simple plugging infrastructure.
md/raid5: export is_congested test
raid5: Don't set read-ahead when there is no queue
md: add support for raising dm events.
md: export various start/stop interfaces
md: split out md_rdev_init
md: be more careful setting MD_CHANGE_CLEAN
md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk
...

authored by Linus Torvalds and committed by Chris Metcalf 90a9ed95 8cbd84f2

+748 -509
+14
crypto/async_tx/Kconfig
··· 22 22 tristate 23 23 select ASYNC_CORE 24 24 select ASYNC_PQ 25 + select ASYNC_XOR 26 + 27 + config ASYNC_RAID6_TEST 28 + tristate "Self test for hardware accelerated raid6 recovery" 29 + depends on ASYNC_RAID6_RECOV 30 + select ASYNC_MEMCPY 31 + ---help--- 32 + This is a one-shot self test that permutes through the 33 + recovery of all the possible two disk failure scenarios for a 34 + N-disk array. Recovery is performed with the asynchronous 35 + raid6 recovery routines, and will optionally use an offload 36 + engine if one is available. 37 + 38 + If unsure, say N. 25 39 26 40 config ASYNC_TX_DISABLE_PQ_VAL_DMA 27 41 bool
+1 -17
drivers/md/Kconfig
··· 121 121 config MD_RAID456 122 122 tristate "RAID-4/RAID-5/RAID-6 mode" 123 123 depends on BLK_DEV_MD 124 - select MD_RAID6_PQ 124 + select RAID6_PQ 125 125 select ASYNC_MEMCPY 126 126 select ASYNC_XOR 127 127 select ASYNC_PQ ··· 162 162 ---help--- 163 163 Enable the raid456 module to dispatch per-stripe raid operations to a 164 164 thread pool. 165 - 166 - If unsure, say N. 167 - 168 - config MD_RAID6_PQ 169 - tristate 170 - 171 - config ASYNC_RAID6_TEST 172 - tristate "Self test for hardware accelerated raid6 recovery" 173 - depends on MD_RAID6_PQ 174 - select ASYNC_RAID6_RECOV 175 - ---help--- 176 - This is a one-shot self test that permutes through the 177 - recovery of all the possible two disk failure scenarios for a 178 - N-disk array. Recovery is performed with the asynchronous 179 - raid6 recovery routines, and will optionally use an offload 180 - engine if one is available. 181 165 182 166 If unsure, say N. 183 167
-77
drivers/md/Makefile
··· 12 12 += dm-log-userspace-base.o dm-log-userspace-transfer.o 13 13 md-mod-y += md.o bitmap.o 14 14 raid456-y += raid5.o 15 - raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ 16 - raid6int1.o raid6int2.o raid6int4.o \ 17 - raid6int8.o raid6int16.o raid6int32.o \ 18 - raid6altivec1.o raid6altivec2.o raid6altivec4.o \ 19 - raid6altivec8.o \ 20 - raid6mmx.o raid6sse1.o raid6sse2.o 21 - hostprogs-y += mktables 22 15 23 16 # Note: link order is important. All raid personalities 24 17 # and must come before md.o, as they each initialise ··· 22 29 obj-$(CONFIG_MD_RAID0) += raid0.o 23 30 obj-$(CONFIG_MD_RAID1) += raid1.o 24 31 obj-$(CONFIG_MD_RAID10) += raid10.o 25 - obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o 26 32 obj-$(CONFIG_MD_RAID456) += raid456.o 27 33 obj-$(CONFIG_MD_MULTIPATH) += multipath.o 28 34 obj-$(CONFIG_MD_FAULTY) += faulty.o ··· 37 45 obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o 38 46 obj-$(CONFIG_DM_ZERO) += dm-zero.o 39 47 40 - quiet_cmd_unroll = UNROLL $@ 41 - cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ 42 - < $< > $@ || ( rm -f $@ && exit 1 ) 43 - 44 - ifeq ($(CONFIG_ALTIVEC),y) 45 - altivec_flags := -maltivec -mabi=altivec 46 - endif 47 - 48 48 ifeq ($(CONFIG_DM_UEVENT),y) 49 49 dm-mod-objs += dm-uevent.o 50 50 endif 51 - 52 - targets += raid6int1.c 53 - $(obj)/raid6int1.c: UNROLL := 1 54 - $(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 55 - $(call if_changed,unroll) 56 - 57 - targets += raid6int2.c 58 - $(obj)/raid6int2.c: UNROLL := 2 59 - $(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 60 - $(call if_changed,unroll) 61 - 62 - targets += raid6int4.c 63 - $(obj)/raid6int4.c: UNROLL := 4 64 - $(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 65 - $(call if_changed,unroll) 66 - 67 - targets += raid6int8.c 68 - $(obj)/raid6int8.c: UNROLL := 8 69 - $(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 70 - $(call if_changed,unroll) 71 - 72 - targets += raid6int16.c 73 - $(obj)/raid6int16.c: UNROLL := 16 74 - $(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 75 - $(call if_changed,unroll) 76 - 77 - targets += raid6int32.c 78 - $(obj)/raid6int32.c: UNROLL := 32 79 - $(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 80 - $(call if_changed,unroll) 81 - 82 - CFLAGS_raid6altivec1.o += $(altivec_flags) 83 - targets += raid6altivec1.c 84 - $(obj)/raid6altivec1.c: UNROLL := 1 85 - $(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE 86 - $(call if_changed,unroll) 87 - 88 - CFLAGS_raid6altivec2.o += $(altivec_flags) 89 - targets += raid6altivec2.c 90 - $(obj)/raid6altivec2.c: UNROLL := 2 91 - $(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE 92 - $(call if_changed,unroll) 93 - 94 - CFLAGS_raid6altivec4.o += $(altivec_flags) 95 - targets += raid6altivec4.c 96 - $(obj)/raid6altivec4.c: UNROLL := 4 97 - $(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE 98 - $(call if_changed,unroll) 99 - 100 - CFLAGS_raid6altivec8.o += $(altivec_flags) 101 - targets += raid6altivec8.c 102 - $(obj)/raid6altivec8.c: UNROLL := 8 103 - $(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE 104 - $(call if_changed,unroll) 105 - 106 - quiet_cmd_mktable = TABLE $@ 107 - cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) 108 - 109 - targets += raid6tables.c 110 - $(obj)/raid6tables.c: $(obj)/mktables FORCE 111 - $(call if_changed,mktable)
+281 -231
drivers/md/bitmap.c
··· 13 13 * Still to do: 14 14 * 15 15 * flush after percent set rather than just time based. (maybe both). 16 - * wait if count gets too high, wake when it drops to half. 17 16 */ 18 17 19 18 #include <linux/blkdev.h> ··· 29 30 #include "md.h" 30 31 #include "bitmap.h" 31 32 33 + #include <linux/dm-dirty-log.h> 32 34 /* debug macros */ 33 35 34 36 #define DEBUG 0 ··· 51 51 #define INJECT_FATAL_FAULT_3 0 /* undef */ 52 52 #endif 53 53 54 - //#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */ 55 - #define DPRINTK(x...) do { } while(0) 56 - 57 54 #ifndef PRINTK 58 55 # if DEBUG > 0 59 56 # define PRINTK(x...) printk(KERN_DEBUG x) ··· 59 62 # endif 60 63 #endif 61 64 62 - static inline char * bmname(struct bitmap *bitmap) 65 + static inline char *bmname(struct bitmap *bitmap) 63 66 { 64 67 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; 65 68 } 66 - 67 69 68 70 /* 69 71 * just a placeholder - calls kmalloc for bitmap pages ··· 74 78 #ifdef INJECT_FAULTS_1 75 79 page = NULL; 76 80 #else 77 - page = kmalloc(PAGE_SIZE, GFP_NOIO); 81 + page = kzalloc(PAGE_SIZE, GFP_NOIO); 78 82 #endif 79 83 if (!page) 80 84 printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); ··· 103 107 * if we find our page, we increment the page's refcount so that it stays 104 108 * allocated while we're using it 105 109 */ 106 - static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) 110 + static int bitmap_checkpage(struct bitmap *bitmap, 111 + unsigned long page, int create) 107 112 __releases(bitmap->lock) 108 113 __acquires(bitmap->lock) 109 114 { ··· 118 121 return -EINVAL; 119 122 } 120 123 121 - 122 124 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ 123 125 return 0; 124 126 ··· 127 131 if (!create) 128 132 return -ENOENT; 129 133 130 - spin_unlock_irq(&bitmap->lock); 131 - 132 134 /* this page has not been allocated yet */ 133 135 134 - if ((mappage = bitmap_alloc_page(bitmap)) == NULL) { 136 + spin_unlock_irq(&bitmap->lock); 137 + mappage = bitmap_alloc_page(bitmap); 138 + spin_lock_irq(&bitmap->lock); 139 + 140 + if (mappage == NULL) { 135 141 PRINTK("%s: bitmap map page allocation failed, hijacking\n", 136 142 bmname(bitmap)); 137 143 /* failed - set the hijacked flag so that we can use the 138 144 * pointer as a counter */ 139 - spin_lock_irq(&bitmap->lock); 140 145 if (!bitmap->bp[page].map) 141 146 bitmap->bp[page].hijacked = 1; 142 - goto out; 143 - } 144 - 145 - /* got a page */ 146 - 147 - spin_lock_irq(&bitmap->lock); 148 - 149 - /* recheck the page */ 150 - 151 - if (bitmap->bp[page].map || bitmap->bp[page].hijacked) { 147 + } else if (bitmap->bp[page].map || 148 + bitmap->bp[page].hijacked) { 152 149 /* somebody beat us to getting the page */ 153 150 bitmap_free_page(bitmap, mappage); 154 151 return 0; 152 + } else { 153 + 154 + /* no page was in place and we have one, so install it */ 155 + 156 + bitmap->bp[page].map = mappage; 157 + bitmap->missing_pages--; 155 158 } 156 - 157 - /* no page was in place and we have one, so install it */ 158 - 159 - memset(mappage, 0, PAGE_SIZE); 160 - bitmap->bp[page].map = mappage; 161 - bitmap->missing_pages--; 162 - out: 163 159 return 0; 164 160 } 165 - 166 161 167 162 /* if page is completely empty, put it back on the free list, or dealloc it */ 168 163 /* if page was hijacked, unmark the flag so it might get alloced next time */ ··· 170 183 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ 171 184 bitmap->bp[page].hijacked = 0; 172 185 bitmap->bp[page].map = NULL; 173 - return; 186 + } else { 187 + /* normal case, free the page */ 188 + ptr = bitmap->bp[page].map; 189 + bitmap->bp[page].map = NULL; 190 + bitmap->missing_pages++; 191 + bitmap_free_page(bitmap, ptr); 174 192 } 175 - 176 - /* normal case, free the page */ 177 - 178 - #if 0 179 - /* actually ... let's not. We will probably need the page again exactly when 180 - * memory is tight and we are flusing to disk 181 - */ 182 - return; 183 - #else 184 - ptr = bitmap->bp[page].map; 185 - bitmap->bp[page].map = NULL; 186 - bitmap->missing_pages++; 187 - bitmap_free_page(bitmap, ptr); 188 - return; 189 - #endif 190 193 } 191 - 192 194 193 195 /* 194 196 * bitmap file handling - read and write the bitmap file and its superblock ··· 196 220 197 221 mdk_rdev_t *rdev; 198 222 sector_t target; 223 + int did_alloc = 0; 199 224 200 - if (!page) 225 + if (!page) { 201 226 page = alloc_page(GFP_KERNEL); 202 - if (!page) 203 - return ERR_PTR(-ENOMEM); 227 + if (!page) 228 + return ERR_PTR(-ENOMEM); 229 + did_alloc = 1; 230 + } 204 231 205 232 list_for_each_entry(rdev, &mddev->disks, same_set) { 206 233 if (! test_bit(In_sync, &rdev->flags) ··· 221 242 return page; 222 243 } 223 244 } 245 + if (did_alloc) 246 + put_page(page); 224 247 return ERR_PTR(-EIO); 225 248 226 249 } ··· 267 286 mddev_t *mddev = bitmap->mddev; 268 287 269 288 while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { 270 - int size = PAGE_SIZE; 271 - loff_t offset = mddev->bitmap_info.offset; 272 - if (page->index == bitmap->file_pages-1) 273 - size = roundup(bitmap->last_page_size, 274 - bdev_logical_block_size(rdev->bdev)); 275 - /* Just make sure we aren't corrupting data or 276 - * metadata 277 - */ 278 - if (mddev->external) { 279 - /* Bitmap could be anywhere. */ 280 - if (rdev->sb_start + offset + (page->index *(PAGE_SIZE/512)) > 281 - rdev->data_offset && 282 - rdev->sb_start + offset < 283 - rdev->data_offset + mddev->dev_sectors + 284 - (PAGE_SIZE/512)) 285 - goto bad_alignment; 286 - } else if (offset < 0) { 287 - /* DATA BITMAP METADATA */ 288 - if (offset 289 - + (long)(page->index * (PAGE_SIZE/512)) 290 - + size/512 > 0) 291 - /* bitmap runs in to metadata */ 292 - goto bad_alignment; 293 - if (rdev->data_offset + mddev->dev_sectors 294 - > rdev->sb_start + offset) 295 - /* data runs in to bitmap */ 296 - goto bad_alignment; 297 - } else if (rdev->sb_start < rdev->data_offset) { 298 - /* METADATA BITMAP DATA */ 299 - if (rdev->sb_start 300 - + offset 301 - + page->index*(PAGE_SIZE/512) + size/512 302 - > rdev->data_offset) 303 - /* bitmap runs in to data */ 304 - goto bad_alignment; 305 - } else { 306 - /* DATA METADATA BITMAP - no problems */ 307 - } 308 - md_super_write(mddev, rdev, 309 - rdev->sb_start + offset 310 - + page->index * (PAGE_SIZE/512), 311 - size, 312 - page); 289 + int size = PAGE_SIZE; 290 + loff_t offset = mddev->bitmap_info.offset; 291 + if (page->index == bitmap->file_pages-1) 292 + size = roundup(bitmap->last_page_size, 293 + bdev_logical_block_size(rdev->bdev)); 294 + /* Just make sure we aren't corrupting data or 295 + * metadata 296 + */ 297 + if (mddev->external) { 298 + /* Bitmap could be anywhere. */ 299 + if (rdev->sb_start + offset + (page->index 300 + * (PAGE_SIZE/512)) 301 + > rdev->data_offset 302 + && 303 + rdev->sb_start + offset 304 + < (rdev->data_offset + mddev->dev_sectors 305 + + (PAGE_SIZE/512))) 306 + goto bad_alignment; 307 + } else if (offset < 0) { 308 + /* DATA BITMAP METADATA */ 309 + if (offset 310 + + (long)(page->index * (PAGE_SIZE/512)) 311 + + size/512 > 0) 312 + /* bitmap runs in to metadata */ 313 + goto bad_alignment; 314 + if (rdev->data_offset + mddev->dev_sectors 315 + > rdev->sb_start + offset) 316 + /* data runs in to bitmap */ 317 + goto bad_alignment; 318 + } else if (rdev->sb_start < rdev->data_offset) { 319 + /* METADATA BITMAP DATA */ 320 + if (rdev->sb_start 321 + + offset 322 + + page->index*(PAGE_SIZE/512) + size/512 323 + > rdev->data_offset) 324 + /* bitmap runs in to data */ 325 + goto bad_alignment; 326 + } else { 327 + /* DATA METADATA BITMAP - no problems */ 328 + } 329 + md_super_write(mddev, rdev, 330 + rdev->sb_start + offset 331 + + page->index * (PAGE_SIZE/512), 332 + size, 333 + page); 313 334 } 314 335 315 336 if (wait) ··· 347 364 bh = bh->b_this_page; 348 365 } 349 366 350 - if (wait) { 367 + if (wait) 351 368 wait_event(bitmap->write_wait, 352 369 atomic_read(&bitmap->pending_writes)==0); 353 - } 354 370 } 355 371 if (bitmap->flags & BITMAP_WRITE_ERROR) 356 372 bitmap_file_kick(bitmap); ··· 406 424 struct buffer_head *bh; 407 425 sector_t block; 408 426 409 - PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE, 427 + PRINTK("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, 410 428 (unsigned long long)index << PAGE_SHIFT); 411 429 412 430 page = alloc_page(GFP_KERNEL); ··· 460 478 } 461 479 out: 462 480 if (IS_ERR(page)) 463 - printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", 481 + printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %ld\n", 464 482 (int)PAGE_SIZE, 465 483 (unsigned long long)index << PAGE_SHIFT, 466 484 PTR_ERR(page)); ··· 646 664 sb = kmap_atomic(bitmap->sb_page, KM_USER0); 647 665 old = le32_to_cpu(sb->state) & bits; 648 666 switch (op) { 649 - case MASK_SET: sb->state |= cpu_to_le32(bits); 650 - break; 651 - case MASK_UNSET: sb->state &= cpu_to_le32(~bits); 652 - break; 653 - default: BUG(); 667 + case MASK_SET: 668 + sb->state |= cpu_to_le32(bits); 669 + break; 670 + case MASK_UNSET: 671 + sb->state &= cpu_to_le32(~bits); 672 + break; 673 + default: 674 + BUG(); 654 675 } 655 676 kunmap_atomic(sb, KM_USER0); 656 677 return old; ··· 695 710 static inline struct page *filemap_get_page(struct bitmap *bitmap, 696 711 unsigned long chunk) 697 712 { 698 - if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL; 713 + if (bitmap->filemap == NULL) 714 + return NULL; 715 + if (file_page_index(bitmap, chunk) >= bitmap->file_pages) 716 + return NULL; 699 717 return bitmap->filemap[file_page_index(bitmap, chunk) 700 718 - file_page_index(bitmap, 0)]; 701 719 } 702 - 703 720 704 721 static void bitmap_file_unmap(struct bitmap *bitmap) 705 722 { ··· 753 766 } 754 767 } 755 768 756 - 757 769 /* 758 770 * bitmap_file_kick - if an error occurs while manipulating the bitmap file 759 771 * then it is no longer reliable, so we stop using it and we mark the file ··· 771 785 ptr = d_path(&bitmap->file->f_path, path, 772 786 PAGE_SIZE); 773 787 774 - 775 788 printk(KERN_ALERT 776 789 "%s: kicking failed bitmap file %s from array!\n", 777 790 bmname(bitmap), IS_ERR(ptr) ? "" : ptr); ··· 788 803 } 789 804 790 805 enum bitmap_page_attr { 791 - BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced 792 - BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared 793 - BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced 806 + BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ 807 + BITMAP_PAGE_CLEAN = 1, /* there are bits that might need to be cleared */ 808 + BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ 794 809 }; 795 810 796 811 static inline void set_page_attr(struct bitmap *bitmap, struct page *page, 797 812 enum bitmap_page_attr attr) 798 813 { 799 - __set_bit((page->index<<2) + attr, bitmap->filemap_attr); 814 + if (page) 815 + __set_bit((page->index<<2) + attr, bitmap->filemap_attr); 816 + else 817 + __set_bit(attr, &bitmap->logattrs); 800 818 } 801 819 802 820 static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, 803 821 enum bitmap_page_attr attr) 804 822 { 805 - __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); 823 + if (page) 824 + __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); 825 + else 826 + __clear_bit(attr, &bitmap->logattrs); 806 827 } 807 828 808 829 static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, 809 830 enum bitmap_page_attr attr) 810 831 { 811 - return test_bit((page->index<<2) + attr, bitmap->filemap_attr); 832 + if (page) 833 + return test_bit((page->index<<2) + attr, bitmap->filemap_attr); 834 + else 835 + return test_bit(attr, &bitmap->logattrs); 812 836 } 813 837 814 838 /* ··· 830 836 static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) 831 837 { 832 838 unsigned long bit; 833 - struct page *page; 839 + struct page *page = NULL; 834 840 void *kaddr; 835 841 unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); 836 842 837 843 if (!bitmap->filemap) { 838 - return; 844 + struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; 845 + if (log) 846 + log->type->mark_region(log, chunk); 847 + } else { 848 + 849 + page = filemap_get_page(bitmap, chunk); 850 + if (!page) 851 + return; 852 + bit = file_page_offset(bitmap, chunk); 853 + 854 + /* set the bit */ 855 + kaddr = kmap_atomic(page, KM_USER0); 856 + if (bitmap->flags & BITMAP_HOSTENDIAN) 857 + set_bit(bit, kaddr); 858 + else 859 + ext2_set_bit(bit, kaddr); 860 + kunmap_atomic(kaddr, KM_USER0); 861 + PRINTK("set file bit %lu page %lu\n", bit, page->index); 839 862 } 840 - 841 - page = filemap_get_page(bitmap, chunk); 842 - if (!page) return; 843 - bit = file_page_offset(bitmap, chunk); 844 - 845 - /* set the bit */ 846 - kaddr = kmap_atomic(page, KM_USER0); 847 - if (bitmap->flags & BITMAP_HOSTENDIAN) 848 - set_bit(bit, kaddr); 849 - else 850 - ext2_set_bit(bit, kaddr); 851 - kunmap_atomic(kaddr, KM_USER0); 852 - PRINTK("set file bit %lu page %lu\n", bit, page->index); 853 - 854 863 /* record page number so it gets flushed to disk when unplug occurs */ 855 864 set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 856 - 857 865 } 858 866 859 867 /* this gets called when the md device is ready to unplug its underlying ··· 870 874 871 875 if (!bitmap) 872 876 return; 877 + if (!bitmap->filemap) { 878 + /* Must be using a dirty_log */ 879 + struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; 880 + dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs); 881 + need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs); 882 + if (dirty || need_write) 883 + if (log->type->flush(log)) 884 + bitmap->flags |= BITMAP_WRITE_ERROR; 885 + goto out; 886 + } 873 887 874 888 /* look at each page to see if there are any set bits that need to be 875 889 * flushed out to disk */ ··· 898 892 wait = 1; 899 893 spin_unlock_irqrestore(&bitmap->lock, flags); 900 894 901 - if (dirty | need_write) 895 + if (dirty || need_write) 902 896 write_page(bitmap, page, 0); 903 897 } 904 898 if (wait) { /* if any writes were performed, we need to wait on them */ ··· 908 902 else 909 903 md_super_wait(bitmap->mddev); 910 904 } 905 + out: 911 906 if (bitmap->flags & BITMAP_WRITE_ERROR) 912 907 bitmap_file_kick(bitmap); 913 908 } 909 + EXPORT_SYMBOL(bitmap_unplug); 914 910 915 911 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); 916 912 /* * bitmap_init_from_disk -- called at bitmap_create time to initialize ··· 951 943 printk(KERN_INFO "%s: bitmap file is out of date, doing full " 952 944 "recovery\n", bmname(bitmap)); 953 945 954 - bytes = (chunks + 7) / 8; 946 + bytes = DIV_ROUND_UP(bitmap->chunks, 8); 955 947 if (!bitmap->mddev->bitmap_info.external) 956 948 bytes += sizeof(bitmap_super_t); 957 949 958 - 959 - num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE; 950 + num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); 960 951 961 952 if (file && i_size_read(file->f_mapping->host) < bytes) { 962 953 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", ··· 973 966 974 967 /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ 975 968 bitmap->filemap_attr = kzalloc( 976 - roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 969 + roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 977 970 GFP_KERNEL); 978 971 if (!bitmap->filemap_attr) 979 972 goto err; ··· 1028 1021 if (outofdate) { 1029 1022 /* 1030 1023 * if bitmap is out of date, dirty the 1031 - * whole page and write it out 1024 + * whole page and write it out 1032 1025 */ 1033 1026 paddr = kmap_atomic(page, KM_USER0); 1034 1027 memset(paddr + offset, 0xff, ··· 1059 1052 } 1060 1053 } 1061 1054 1062 - /* everything went OK */ 1055 + /* everything went OK */ 1063 1056 ret = 0; 1064 1057 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); 1065 1058 ··· 1087 1080 */ 1088 1081 int i; 1089 1082 1090 - for (i=0; i < bitmap->file_pages; i++) 1083 + for (i = 0; i < bitmap->file_pages; i++) 1091 1084 set_page_attr(bitmap, bitmap->filemap[i], 1092 1085 BITMAP_PAGE_NEEDWRITE); 1093 1086 } 1094 - 1095 1087 1096 1088 static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) 1097 1089 { 1098 1090 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); 1099 1091 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1100 1092 bitmap->bp[page].count += inc; 1101 - /* 1102 - if (page == 0) printk("count page 0, offset %llu: %d gives %d\n", 1103 - (unsigned long long)offset, inc, bitmap->bp[page].count); 1104 - */ 1105 1093 bitmap_checkfree(bitmap, page); 1106 1094 } 1107 1095 static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, ··· 1116 1114 struct page *page = NULL, *lastpage = NULL; 1117 1115 int blocks; 1118 1116 void *paddr; 1117 + struct dm_dirty_log *log = mddev->bitmap_info.log; 1119 1118 1120 1119 /* Use a mutex to guard daemon_work against 1121 1120 * bitmap_destroy. ··· 1141 1138 spin_lock_irqsave(&bitmap->lock, flags); 1142 1139 for (j = 0; j < bitmap->chunks; j++) { 1143 1140 bitmap_counter_t *bmc; 1144 - if (!bitmap->filemap) 1145 - /* error or shutdown */ 1146 - break; 1147 - 1148 - page = filemap_get_page(bitmap, j); 1141 + if (!bitmap->filemap) { 1142 + if (!log) 1143 + /* error or shutdown */ 1144 + break; 1145 + } else 1146 + page = filemap_get_page(bitmap, j); 1149 1147 1150 1148 if (page != lastpage) { 1151 1149 /* skip this page unless it's marked as needing cleaning */ ··· 1201 1197 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), 1202 1198 &blocks, 0); 1203 1199 if (bmc) { 1204 - /* 1205 - if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); 1206 - */ 1207 1200 if (*bmc) 1208 1201 bitmap->allclean = 0; 1209 1202 1210 1203 if (*bmc == 2) { 1211 - *bmc=1; /* maybe clear the bit next time */ 1204 + *bmc = 1; /* maybe clear the bit next time */ 1212 1205 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1213 1206 } else if (*bmc == 1 && !bitmap->need_sync) { 1214 1207 /* we can clear the bit */ ··· 1215 1214 -1); 1216 1215 1217 1216 /* clear the bit */ 1218 - paddr = kmap_atomic(page, KM_USER0); 1219 - if (bitmap->flags & BITMAP_HOSTENDIAN) 1220 - clear_bit(file_page_offset(bitmap, j), 1221 - paddr); 1222 - else 1223 - ext2_clear_bit(file_page_offset(bitmap, j), 1224 - paddr); 1225 - kunmap_atomic(paddr, KM_USER0); 1217 + if (page) { 1218 + paddr = kmap_atomic(page, KM_USER0); 1219 + if (bitmap->flags & BITMAP_HOSTENDIAN) 1220 + clear_bit(file_page_offset(bitmap, j), 1221 + paddr); 1222 + else 1223 + ext2_clear_bit(file_page_offset(bitmap, j), 1224 + paddr); 1225 + kunmap_atomic(paddr, KM_USER0); 1226 + } else 1227 + log->type->clear_region(log, j); 1226 1228 } 1227 1229 } else 1228 1230 j |= PAGE_COUNTER_MASK; ··· 1233 1229 spin_unlock_irqrestore(&bitmap->lock, flags); 1234 1230 1235 1231 /* now sync the final page */ 1236 - if (lastpage != NULL) { 1232 + if (lastpage != NULL || log != NULL) { 1237 1233 spin_lock_irqsave(&bitmap->lock, flags); 1238 1234 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { 1239 1235 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1240 1236 spin_unlock_irqrestore(&bitmap->lock, flags); 1241 - write_page(bitmap, lastpage, 0); 1237 + if (lastpage) 1238 + write_page(bitmap, lastpage, 0); 1239 + else 1240 + if (log->type->flush(log)) 1241 + bitmap->flags |= BITMAP_WRITE_ERROR; 1242 1242 } else { 1243 1243 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1244 1244 spin_unlock_irqrestore(&bitmap->lock, flags); ··· 1251 1243 1252 1244 done: 1253 1245 if (bitmap->allclean == 0) 1254 - bitmap->mddev->thread->timeout = 1246 + bitmap->mddev->thread->timeout = 1255 1247 bitmap->mddev->bitmap_info.daemon_sleep; 1256 1248 mutex_unlock(&mddev->bitmap_info.mutex); 1257 1249 } ··· 1270 1262 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1271 1263 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; 1272 1264 sector_t csize; 1265 + int err; 1273 1266 1274 - if (bitmap_checkpage(bitmap, page, create) < 0) { 1267 + err = bitmap_checkpage(bitmap, page, create); 1268 + 1269 + if (bitmap->bp[page].hijacked || 1270 + bitmap->bp[page].map == NULL) 1271 + csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + 1272 + PAGE_COUNTER_SHIFT - 1); 1273 + else 1275 1274 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); 1276 - *blocks = csize - (offset & (csize- 1)); 1275 + *blocks = csize - (offset & (csize - 1)); 1276 + 1277 + if (err < 0) 1277 1278 return NULL; 1278 - } 1279 + 1279 1280 /* now locked ... */ 1280 1281 1281 1282 if (bitmap->bp[page].hijacked) { /* hijacked pointer */ 1282 1283 /* should we use the first or second counter field 1283 1284 * of the hijacked pointer? */ 1284 1285 int hi = (pageoff > PAGE_COUNTER_MASK); 1285 - csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + 1286 - PAGE_COUNTER_SHIFT - 1); 1287 - *blocks = csize - (offset & (csize- 1)); 1288 1286 return &((bitmap_counter_t *) 1289 1287 &bitmap->bp[page].map)[hi]; 1290 - } else { /* page is allocated */ 1291 - csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); 1292 - *blocks = csize - (offset & (csize- 1)); 1288 + } else /* page is allocated */ 1293 1289 return (bitmap_counter_t *) 1294 1290 &(bitmap->bp[page].map[pageoff]); 1295 - } 1296 1291 } 1297 1292 1298 1293 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) 1299 1294 { 1300 - if (!bitmap) return 0; 1295 + if (!bitmap) 1296 + return 0; 1301 1297 1302 1298 if (behind) { 1303 1299 int bw; ··· 1334 1322 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1335 1323 TASK_UNINTERRUPTIBLE); 1336 1324 spin_unlock_irq(&bitmap->lock); 1337 - blk_unplug(bitmap->mddev->queue); 1325 + md_unplug(bitmap->mddev); 1338 1326 schedule(); 1339 1327 finish_wait(&bitmap->overflow_wait, &__wait); 1340 1328 continue; 1341 1329 } 1342 1330 1343 - switch(*bmc) { 1331 + switch (*bmc) { 1344 1332 case 0: 1345 1333 bitmap_file_set_bit(bitmap, offset); 1346 - bitmap_count_page(bitmap,offset, 1); 1347 - blk_plug_device_unlocked(bitmap->mddev->queue); 1334 + bitmap_count_page(bitmap, offset, 1); 1348 1335 /* fall through */ 1349 1336 case 1: 1350 1337 *bmc = 2; ··· 1356 1345 offset += blocks; 1357 1346 if (sectors > blocks) 1358 1347 sectors -= blocks; 1359 - else sectors = 0; 1348 + else 1349 + sectors = 0; 1360 1350 } 1361 1351 bitmap->allclean = 0; 1362 1352 return 0; 1363 1353 } 1354 + EXPORT_SYMBOL(bitmap_startwrite); 1364 1355 1365 1356 void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, 1366 1357 int success, int behind) 1367 1358 { 1368 - if (!bitmap) return; 1359 + if (!bitmap) 1360 + return; 1369 1361 if (behind) { 1370 1362 if (atomic_dec_and_test(&bitmap->behind_writes)) 1371 1363 wake_up(&bitmap->behind_wait); ··· 1395 1381 bitmap->events_cleared < bitmap->mddev->events) { 1396 1382 bitmap->events_cleared = bitmap->mddev->events; 1397 1383 bitmap->need_sync = 1; 1398 - sysfs_notify_dirent(bitmap->sysfs_can_clear); 1384 + sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); 1399 1385 } 1400 1386 1401 1387 if (!success && ! (*bmc & NEEDED_MASK)) ··· 1405 1391 wake_up(&bitmap->overflow_wait); 1406 1392 1407 1393 (*bmc)--; 1408 - if (*bmc <= 2) { 1394 + if (*bmc <= 2) 1409 1395 set_page_attr(bitmap, 1410 - filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), 1396 + filemap_get_page( 1397 + bitmap, 1398 + offset >> CHUNK_BLOCK_SHIFT(bitmap)), 1411 1399 BITMAP_PAGE_CLEAN); 1412 - } 1400 + 1413 1401 spin_unlock_irqrestore(&bitmap->lock, flags); 1414 1402 offset += blocks; 1415 1403 if (sectors > blocks) 1416 1404 sectors -= blocks; 1417 - else sectors = 0; 1405 + else 1406 + sectors = 0; 1418 1407 } 1419 1408 } 1409 + EXPORT_SYMBOL(bitmap_endwrite); 1420 1410 1421 1411 static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, 1422 1412 int degraded) ··· 1473 1455 } 1474 1456 return rv; 1475 1457 } 1458 + EXPORT_SYMBOL(bitmap_start_sync); 1476 1459 1477 1460 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) 1478 1461 { 1479 1462 bitmap_counter_t *bmc; 1480 1463 unsigned long flags; 1481 - /* 1482 - if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted); 1483 - */ if (bitmap == NULL) { 1464 + 1465 + if (bitmap == NULL) { 1484 1466 *blocks = 1024; 1485 1467 return; 1486 1468 } ··· 1489 1471 if (bmc == NULL) 1490 1472 goto unlock; 1491 1473 /* locked */ 1492 - /* 1493 - if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks); 1494 - */ 1495 1474 if (RESYNC(*bmc)) { 1496 1475 *bmc &= ~RESYNC_MASK; 1497 1476 1498 1477 if (!NEEDED(*bmc) && aborted) 1499 1478 *bmc |= NEEDED_MASK; 1500 1479 else { 1501 - if (*bmc <= 2) { 1480 + if (*bmc <= 2) 1502 1481 set_page_attr(bitmap, 1503 1482 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), 1504 1483 BITMAP_PAGE_CLEAN); 1505 - } 1506 1484 } 1507 1485 } 1508 1486 unlock: 1509 1487 spin_unlock_irqrestore(&bitmap->lock, flags); 1510 1488 bitmap->allclean = 0; 1511 1489 } 1490 + EXPORT_SYMBOL(bitmap_end_sync); 1512 1491 1513 1492 void bitmap_close_sync(struct bitmap *bitmap) 1514 1493 { ··· 1522 1507 sector += blocks; 1523 1508 } 1524 1509 } 1510 + EXPORT_SYMBOL(bitmap_close_sync); 1525 1511 1526 1512 void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) 1527 1513 { ··· 1542 1526 atomic_read(&bitmap->mddev->recovery_active) == 0); 1543 1527 1544 1528 bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; 1545 - set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); 1529 + if (bitmap->mddev->persistent) 1530 + set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); 1546 1531 sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); 1547 1532 s = 0; 1548 1533 while (s < sector && s < bitmap->mddev->resync_max_sectors) { ··· 1553 1536 bitmap->last_end_sync = jiffies; 1554 1537 sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); 1555 1538 } 1539 + EXPORT_SYMBOL(bitmap_cond_end_sync); 1556 1540 1557 1541 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) 1558 1542 { ··· 1570 1552 spin_unlock_irq(&bitmap->lock); 1571 1553 return; 1572 1554 } 1573 - if (! *bmc) { 1555 + if (!*bmc) { 1574 1556 struct page *page; 1575 - *bmc = 1 | (needed?NEEDED_MASK:0); 1557 + *bmc = 1 | (needed ? NEEDED_MASK : 0); 1576 1558 bitmap_count_page(bitmap, offset, 1); 1577 1559 page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); 1578 1560 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); ··· 1681 1663 unsigned long pages; 1682 1664 struct file *file = mddev->bitmap_info.file; 1683 1665 int err; 1684 - sector_t start; 1685 - struct sysfs_dirent *bm; 1666 + struct sysfs_dirent *bm = NULL; 1686 1667 1687 1668 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 1688 1669 1689 - if (!file && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */ 1670 + if (!file 1671 + && !mddev->bitmap_info.offset 1672 + && !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */ 1690 1673 return 0; 1691 1674 1692 1675 BUG_ON(file && mddev->bitmap_info.offset); 1676 + BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log); 1693 1677 1694 1678 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); 1695 1679 if (!bitmap) ··· 1705 1685 1706 1686 bitmap->mddev = mddev; 1707 1687 1708 - bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); 1688 + if (mddev->kobj.sd) 1689 + bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); 1709 1690 if (bm) { 1710 1691 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear"); 1711 1692 sysfs_put(bm); ··· 1740 1719 bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); 1741 1720 1742 1721 /* now that chunksize and chunkshift are set, we can use these macros */ 1743 - chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> 1722 + chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> 1744 1723 CHUNK_BLOCK_SHIFT(bitmap); 1745 - pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; 1724 + pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; 1746 1725 1747 1726 BUG_ON(!pages); 1748 1727 ··· 1762 1741 if (!bitmap->bp) 1763 1742 goto error; 1764 1743 1765 - /* now that we have some pages available, initialize the in-memory 1766 - * bitmap from the on-disk bitmap */ 1767 - start = 0; 1768 - if (mddev->degraded == 0 1769 - || bitmap->events_cleared == mddev->events) 1770 - /* no need to keep dirty bits to optimise a re-add of a missing device */ 1771 - start = mddev->recovery_cp; 1772 - err = bitmap_init_from_disk(bitmap, start); 1773 - 1774 - if (err) 1775 - goto error; 1776 - 1777 1744 printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", 1778 1745 pages, bmname(bitmap)); 1779 1746 1780 1747 mddev->bitmap = bitmap; 1781 1748 1782 - mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; 1783 - md_wakeup_thread(mddev->thread); 1784 - 1785 - bitmap_update_sb(bitmap); 1786 1749 1787 1750 return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; 1788 1751 ··· 1775 1770 return err; 1776 1771 } 1777 1772 1773 + int bitmap_load(mddev_t *mddev) 1774 + { 1775 + int err = 0; 1776 + sector_t sector = 0; 1777 + struct bitmap *bitmap = mddev->bitmap; 1778 + 1779 + if (!bitmap) 1780 + goto out; 1781 + 1782 + /* Clear out old bitmap info first: Either there is none, or we 1783 + * are resuming after someone else has possibly changed things, 1784 + * so we should forget old cached info. 1785 + * All chunks should be clean, but some might need_sync. 1786 + */ 1787 + while (sector < mddev->resync_max_sectors) { 1788 + int blocks; 1789 + bitmap_start_sync(bitmap, sector, &blocks, 0); 1790 + sector += blocks; 1791 + } 1792 + bitmap_close_sync(bitmap); 1793 + 1794 + if (mddev->bitmap_info.log) { 1795 + unsigned long i; 1796 + struct dm_dirty_log *log = mddev->bitmap_info.log; 1797 + for (i = 0; i < bitmap->chunks; i++) 1798 + if (!log->type->in_sync(log, i, 1)) 1799 + bitmap_set_memory_bits(bitmap, 1800 + (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), 1801 + 1); 1802 + } else { 1803 + sector_t start = 0; 1804 + if (mddev->degraded == 0 1805 + || bitmap->events_cleared == mddev->events) 1806 + /* no need to keep dirty bits to optimise a 1807 + * re-add of a missing device */ 1808 + start = mddev->recovery_cp; 1809 + 1810 + err = bitmap_init_from_disk(bitmap, start); 1811 + } 1812 + if (err) 1813 + goto out; 1814 + 1815 + mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; 1816 + md_wakeup_thread(mddev->thread); 1817 + 1818 + bitmap_update_sb(bitmap); 1819 + 1820 + if (bitmap->flags & BITMAP_WRITE_ERROR) 1821 + err = -EIO; 1822 + out: 1823 + return err; 1824 + } 1825 + EXPORT_SYMBOL_GPL(bitmap_load); 1826 + 1778 1827 static ssize_t 1779 1828 location_show(mddev_t *mddev, char *page) 1780 1829 { 1781 1830 ssize_t len; 1782 - if (mddev->bitmap_info.file) { 1831 + if (mddev->bitmap_info.file) 1783 1832 len = sprintf(page, "file"); 1784 - } else if (mddev->bitmap_info.offset) { 1833 + else if (mddev->bitmap_info.offset) 1785 1834 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); 1786 - } else 1835 + else 1787 1836 len = sprintf(page, "none"); 1788 1837 len += sprintf(page+len, "\n"); 1789 1838 return len; ··· 1926 1867 ssize_t len; 1927 1868 unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; 1928 1869 unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; 1929 - 1870 + 1930 1871 len = sprintf(page, "%lu", secs); 1931 1872 if (jifs) 1932 1873 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); ··· 2108 2049 .attrs = md_bitmap_attrs, 2109 2050 }; 2110 2051 2111 - 2112 - /* the bitmap API -- for raid personalities */ 2113 - EXPORT_SYMBOL(bitmap_startwrite); 2114 - EXPORT_SYMBOL(bitmap_endwrite); 2115 - EXPORT_SYMBOL(bitmap_start_sync); 2116 - EXPORT_SYMBOL(bitmap_end_sync); 2117 - EXPORT_SYMBOL(bitmap_unplug); 2118 - EXPORT_SYMBOL(bitmap_close_sync); 2119 - EXPORT_SYMBOL(bitmap_cond_end_sync);
+6
drivers/md/bitmap.h
··· 222 222 unsigned long file_pages; /* number of pages in the file */ 223 223 int last_page_size; /* bytes in the last page */ 224 224 225 + unsigned long logattrs; /* used when filemap_attr doesn't exist 226 + * because we are working with a dirty_log 227 + */ 228 + 225 229 unsigned long flags; 226 230 227 231 int allclean; ··· 247 243 wait_queue_head_t behind_wait; 248 244 249 245 struct sysfs_dirent *sysfs_can_clear; 246 + 250 247 }; 251 248 252 249 /* the bitmap API */ 253 250 254 251 /* these are used only by md/bitmap */ 255 252 int bitmap_create(mddev_t *mddev); 253 + int bitmap_load(mddev_t *mddev); 256 254 void bitmap_flush(mddev_t *mddev); 257 255 void bitmap_destroy(mddev_t *mddev); 258 256
+176 -110
drivers/md/md.c
··· 262 262 * Once ->stop is called and completes, the module will be completely 263 263 * unused. 264 264 */ 265 - static void mddev_suspend(mddev_t *mddev) 265 + void mddev_suspend(mddev_t *mddev) 266 266 { 267 267 BUG_ON(mddev->suspended); 268 268 mddev->suspended = 1; ··· 270 270 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); 271 271 mddev->pers->quiesce(mddev, 1); 272 272 } 273 + EXPORT_SYMBOL_GPL(mddev_suspend); 273 274 274 - static void mddev_resume(mddev_t *mddev) 275 + void mddev_resume(mddev_t *mddev) 275 276 { 276 277 mddev->suspended = 0; 277 278 wake_up(&mddev->sb_wait); 278 279 mddev->pers->quiesce(mddev, 0); 279 280 } 281 + EXPORT_SYMBOL_GPL(mddev_resume); 280 282 281 283 int mddev_congested(mddev_t *mddev, int bits) 282 284 { ··· 387 385 } 388 386 EXPORT_SYMBOL(md_barrier_request); 389 387 388 + /* Support for plugging. 389 + * This mirrors the plugging support in request_queue, but does not 390 + * require having a whole queue 391 + */ 392 + static void plugger_work(struct work_struct *work) 393 + { 394 + struct plug_handle *plug = 395 + container_of(work, struct plug_handle, unplug_work); 396 + plug->unplug_fn(plug); 397 + } 398 + static void plugger_timeout(unsigned long data) 399 + { 400 + struct plug_handle *plug = (void *)data; 401 + kblockd_schedule_work(NULL, &plug->unplug_work); 402 + } 403 + void plugger_init(struct plug_handle *plug, 404 + void (*unplug_fn)(struct plug_handle *)) 405 + { 406 + plug->unplug_flag = 0; 407 + plug->unplug_fn = unplug_fn; 408 + init_timer(&plug->unplug_timer); 409 + plug->unplug_timer.function = plugger_timeout; 410 + plug->unplug_timer.data = (unsigned long)plug; 411 + INIT_WORK(&plug->unplug_work, plugger_work); 412 + } 413 + EXPORT_SYMBOL_GPL(plugger_init); 414 + 415 + void plugger_set_plug(struct plug_handle *plug) 416 + { 417 + if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag)) 418 + mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1); 419 + } 420 + EXPORT_SYMBOL_GPL(plugger_set_plug); 421 + 422 + int plugger_remove_plug(struct plug_handle *plug) 423 + { 424 + if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) { 425 + del_timer(&plug->unplug_timer); 426 + return 1; 427 + } else 428 + return 0; 429 + } 430 + EXPORT_SYMBOL_GPL(plugger_remove_plug); 431 + 432 + 390 433 static inline mddev_t *mddev_get(mddev_t *mddev) 391 434 { 392 435 atomic_inc(&mddev->active); ··· 464 417 spin_unlock(&all_mddevs_lock); 465 418 } 466 419 467 - static void mddev_init(mddev_t *mddev) 420 + void mddev_init(mddev_t *mddev) 468 421 { 469 422 mutex_init(&mddev->open_mutex); 470 423 mutex_init(&mddev->reconfig_mutex); ··· 484 437 mddev->resync_max = MaxSector; 485 438 mddev->level = LEVEL_NONE; 486 439 } 440 + EXPORT_SYMBOL_GPL(mddev_init); 487 441 488 442 static mddev_t * mddev_find(dev_t unit) 489 443 { ··· 581 533 * an access to the files will try to take reconfig_mutex 582 534 * while holding the file unremovable, which leads to 583 535 * a deadlock. 584 - * So hold open_mutex instead - we are allowed to take 585 - * it while holding reconfig_mutex, and md_run can 586 - * use it to wait for the remove to complete. 536 + * So hold set sysfs_active while the remove in happeing, 537 + * and anything else which might set ->to_remove or my 538 + * otherwise change the sysfs namespace will fail with 539 + * -EBUSY if sysfs_active is still set. 540 + * We set sysfs_active under reconfig_mutex and elsewhere 541 + * test it under the same mutex to ensure its correct value 542 + * is seen. 587 543 */ 588 544 struct attribute_group *to_remove = mddev->to_remove; 589 545 mddev->to_remove = NULL; 590 - mutex_lock(&mddev->open_mutex); 546 + mddev->sysfs_active = 1; 591 547 mutex_unlock(&mddev->reconfig_mutex); 592 548 593 - if (to_remove != &md_redundancy_group) 594 - sysfs_remove_group(&mddev->kobj, to_remove); 595 - if (mddev->pers == NULL || 596 - mddev->pers->sync_request == NULL) { 597 - sysfs_remove_group(&mddev->kobj, &md_redundancy_group); 598 - if (mddev->sysfs_action) 599 - sysfs_put(mddev->sysfs_action); 600 - mddev->sysfs_action = NULL; 549 + if (mddev->kobj.sd) { 550 + if (to_remove != &md_redundancy_group) 551 + sysfs_remove_group(&mddev->kobj, to_remove); 552 + if (mddev->pers == NULL || 553 + mddev->pers->sync_request == NULL) { 554 + sysfs_remove_group(&mddev->kobj, &md_redundancy_group); 555 + if (mddev->sysfs_action) 556 + sysfs_put(mddev->sysfs_action); 557 + mddev->sysfs_action = NULL; 558 + } 601 559 } 602 - mutex_unlock(&mddev->open_mutex); 560 + mddev->sysfs_active = 0; 603 561 } else 604 562 mutex_unlock(&mddev->reconfig_mutex); 605 563 ··· 1866 1812 goto fail; 1867 1813 1868 1814 ko = &part_to_dev(rdev->bdev->bd_part)->kobj; 1869 - if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { 1870 - kobject_del(&rdev->kobj); 1871 - goto fail; 1872 - } 1873 - rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, NULL, "state"); 1815 + if (sysfs_create_link(&rdev->kobj, ko, "block")) 1816 + /* failure here is OK */; 1817 + rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); 1874 1818 1875 1819 list_add_rcu(&rdev->same_set, &mddev->disks); 1876 1820 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); ··· 2387 2335 set_bit(In_sync, &rdev->flags); 2388 2336 err = 0; 2389 2337 } 2390 - if (!err && rdev->sysfs_state) 2391 - sysfs_notify_dirent(rdev->sysfs_state); 2338 + if (!err) 2339 + sysfs_notify_dirent_safe(rdev->sysfs_state); 2392 2340 return err ? err : len; 2393 2341 } 2394 2342 static struct rdev_sysfs_entry rdev_state = ··· 2483 2431 rdev->raid_disk = -1; 2484 2432 return err; 2485 2433 } else 2486 - sysfs_notify_dirent(rdev->sysfs_state); 2434 + sysfs_notify_dirent_safe(rdev->sysfs_state); 2487 2435 sprintf(nm, "rd%d", rdev->raid_disk); 2488 2436 if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) 2489 - printk(KERN_WARNING 2490 - "md: cannot register " 2491 - "%s for %s\n", 2492 - nm, mdname(rdev->mddev)); 2493 - 2437 + /* failure here is OK */; 2494 2438 /* don't wakeup anyone, leave that to userspace. */ 2495 2439 } else { 2496 2440 if (slot >= rdev->mddev->raid_disks) ··· 2496 2448 clear_bit(Faulty, &rdev->flags); 2497 2449 clear_bit(WriteMostly, &rdev->flags); 2498 2450 set_bit(In_sync, &rdev->flags); 2499 - sysfs_notify_dirent(rdev->sysfs_state); 2451 + sysfs_notify_dirent_safe(rdev->sysfs_state); 2500 2452 } 2501 2453 return len; 2502 2454 } ··· 2744 2696 .default_attrs = rdev_default_attrs, 2745 2697 }; 2746 2698 2699 + void md_rdev_init(mdk_rdev_t *rdev) 2700 + { 2701 + rdev->desc_nr = -1; 2702 + rdev->saved_raid_disk = -1; 2703 + rdev->raid_disk = -1; 2704 + rdev->flags = 0; 2705 + rdev->data_offset = 0; 2706 + rdev->sb_events = 0; 2707 + rdev->last_read_error.tv_sec = 0; 2708 + rdev->last_read_error.tv_nsec = 0; 2709 + atomic_set(&rdev->nr_pending, 0); 2710 + atomic_set(&rdev->read_errors, 0); 2711 + atomic_set(&rdev->corrected_errors, 0); 2712 + 2713 + INIT_LIST_HEAD(&rdev->same_set); 2714 + init_waitqueue_head(&rdev->blocked_wait); 2715 + } 2716 + EXPORT_SYMBOL_GPL(md_rdev_init); 2747 2717 /* 2748 2718 * Import a device. If 'super_format' >= 0, then sanity check the superblock 2749 2719 * ··· 2785 2719 return ERR_PTR(-ENOMEM); 2786 2720 } 2787 2721 2722 + md_rdev_init(rdev); 2788 2723 if ((err = alloc_disk_sb(rdev))) 2789 2724 goto abort_free; 2790 2725 ··· 2794 2727 goto abort_free; 2795 2728 2796 2729 kobject_init(&rdev->kobj, &rdev_ktype); 2797 - 2798 - rdev->desc_nr = -1; 2799 - rdev->saved_raid_disk = -1; 2800 - rdev->raid_disk = -1; 2801 - rdev->flags = 0; 2802 - rdev->data_offset = 0; 2803 - rdev->sb_events = 0; 2804 - rdev->last_read_error.tv_sec = 0; 2805 - rdev->last_read_error.tv_nsec = 0; 2806 - atomic_set(&rdev->nr_pending, 0); 2807 - atomic_set(&rdev->read_errors, 0); 2808 - atomic_set(&rdev->corrected_errors, 0); 2809 2730 2810 2731 size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; 2811 2732 if (!size) { ··· 2822 2767 goto abort_free; 2823 2768 } 2824 2769 } 2825 - 2826 - INIT_LIST_HEAD(&rdev->same_set); 2827 - init_waitqueue_head(&rdev->blocked_wait); 2828 2770 2829 2771 return rdev; 2830 2772 ··· 3013 2961 * - new personality will access other array. 3014 2962 */ 3015 2963 3016 - if (mddev->sync_thread || mddev->reshape_position != MaxSector) 2964 + if (mddev->sync_thread || 2965 + mddev->reshape_position != MaxSector || 2966 + mddev->sysfs_active) 3017 2967 return -EBUSY; 3018 2968 3019 2969 if (!mddev->pers->quiesce) { ··· 3492 3438 if (err) 3493 3439 return err; 3494 3440 else { 3495 - sysfs_notify_dirent(mddev->sysfs_state); 3441 + sysfs_notify_dirent_safe(mddev->sysfs_state); 3496 3442 return len; 3497 3443 } 3498 3444 } ··· 3790 3736 } 3791 3737 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3792 3738 md_wakeup_thread(mddev->thread); 3793 - sysfs_notify_dirent(mddev->sysfs_action); 3739 + sysfs_notify_dirent_safe(mddev->sysfs_action); 3794 3740 return len; 3795 3741 } 3796 3742 ··· 4336 4282 disk->disk_name); 4337 4283 error = 0; 4338 4284 } 4339 - if (sysfs_create_group(&mddev->kobj, &md_bitmap_group)) 4285 + if (mddev->kobj.sd && 4286 + sysfs_create_group(&mddev->kobj, &md_bitmap_group)) 4340 4287 printk(KERN_DEBUG "pointless warning\n"); 4341 4288 abort: 4342 4289 mutex_unlock(&disks_mutex); 4343 - if (!error) { 4290 + if (!error && mddev->kobj.sd) { 4344 4291 kobject_uevent(&mddev->kobj, KOBJ_ADD); 4345 - mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, NULL, "array_state"); 4292 + mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); 4346 4293 } 4347 4294 mddev_put(mddev); 4348 4295 return error; ··· 4381 4326 if (!atomic_read(&mddev->writes_pending)) { 4382 4327 mddev->safemode = 1; 4383 4328 if (mddev->external) 4384 - sysfs_notify_dirent(mddev->sysfs_state); 4329 + sysfs_notify_dirent_safe(mddev->sysfs_state); 4385 4330 } 4386 4331 md_wakeup_thread(mddev->thread); 4387 4332 } 4388 4333 4389 4334 static int start_dirty_degraded; 4390 4335 4391 - static int md_run(mddev_t *mddev) 4336 + int md_run(mddev_t *mddev) 4392 4337 { 4393 4338 int err; 4394 4339 mdk_rdev_t *rdev; ··· 4400 4345 4401 4346 if (mddev->pers) 4402 4347 return -EBUSY; 4403 - 4404 - /* These two calls synchronise us with the 4405 - * sysfs_remove_group calls in mddev_unlock, 4406 - * so they must have completed. 4407 - */ 4408 - mutex_lock(&mddev->open_mutex); 4409 - mutex_unlock(&mddev->open_mutex); 4348 + /* Cannot run until previous stop completes properly */ 4349 + if (mddev->sysfs_active) 4350 + return -EBUSY; 4410 4351 4411 4352 /* 4412 4353 * Analyze all RAID superblock(s) ··· 4449 4398 return -EINVAL; 4450 4399 } 4451 4400 } 4452 - sysfs_notify_dirent(rdev->sysfs_state); 4401 + sysfs_notify_dirent_safe(rdev->sysfs_state); 4453 4402 } 4454 4403 4455 4404 spin_lock(&pers_lock); ··· 4548 4497 return err; 4549 4498 } 4550 4499 if (mddev->pers->sync_request) { 4551 - if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) 4500 + if (mddev->kobj.sd && 4501 + sysfs_create_group(&mddev->kobj, &md_redundancy_group)) 4552 4502 printk(KERN_WARNING 4553 4503 "md: cannot register extra attributes for %s\n", 4554 4504 mdname(mddev)); 4555 - mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action"); 4505 + mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action"); 4556 4506 } else if (mddev->ro == 2) /* auto-readonly not meaningful */ 4557 4507 mddev->ro = 0; 4558 4508 ··· 4571 4519 char nm[20]; 4572 4520 sprintf(nm, "rd%d", rdev->raid_disk); 4573 4521 if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) 4574 - printk("md: cannot register %s for %s\n", 4575 - nm, mdname(mddev)); 4522 + /* failure here is OK */; 4576 4523 } 4577 4524 4578 4525 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); ··· 4583 4532 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ 4584 4533 4585 4534 md_new_event(mddev); 4586 - sysfs_notify_dirent(mddev->sysfs_state); 4587 - if (mddev->sysfs_action) 4588 - sysfs_notify_dirent(mddev->sysfs_action); 4535 + sysfs_notify_dirent_safe(mddev->sysfs_state); 4536 + sysfs_notify_dirent_safe(mddev->sysfs_action); 4589 4537 sysfs_notify(&mddev->kobj, NULL, "degraded"); 4590 4538 return 0; 4591 4539 } 4540 + EXPORT_SYMBOL_GPL(md_run); 4592 4541 4593 4542 static int do_md_run(mddev_t *mddev) 4594 4543 { ··· 4597 4546 err = md_run(mddev); 4598 4547 if (err) 4599 4548 goto out; 4600 - 4549 + err = bitmap_load(mddev); 4550 + if (err) { 4551 + bitmap_destroy(mddev); 4552 + goto out; 4553 + } 4601 4554 set_capacity(mddev->gendisk, mddev->array_sectors); 4602 4555 revalidate_disk(mddev->gendisk); 4603 4556 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); ··· 4629 4574 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 4630 4575 md_wakeup_thread(mddev->thread); 4631 4576 md_wakeup_thread(mddev->sync_thread); 4632 - sysfs_notify_dirent(mddev->sysfs_state); 4577 + sysfs_notify_dirent_safe(mddev->sysfs_state); 4633 4578 return 0; 4634 4579 } 4635 4580 ··· 4700 4645 mddev->bitmap_info.chunksize = 0; 4701 4646 mddev->bitmap_info.daemon_sleep = 0; 4702 4647 mddev->bitmap_info.max_write_behind = 0; 4648 + mddev->plug = NULL; 4703 4649 } 4704 4650 4705 - static void md_stop_writes(mddev_t *mddev) 4651 + void md_stop_writes(mddev_t *mddev) 4706 4652 { 4707 4653 if (mddev->sync_thread) { 4708 4654 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); ··· 4723 4667 md_update_sb(mddev, 1); 4724 4668 } 4725 4669 } 4670 + EXPORT_SYMBOL_GPL(md_stop_writes); 4726 4671 4727 - static void md_stop(mddev_t *mddev) 4672 + void md_stop(mddev_t *mddev) 4728 4673 { 4729 - md_stop_writes(mddev); 4730 - 4731 4674 mddev->pers->stop(mddev); 4732 4675 if (mddev->pers->sync_request && mddev->to_remove == NULL) 4733 4676 mddev->to_remove = &md_redundancy_group; ··· 4734 4679 mddev->pers = NULL; 4735 4680 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4736 4681 } 4682 + EXPORT_SYMBOL_GPL(md_stop); 4737 4683 4738 4684 static int md_set_readonly(mddev_t *mddev, int is_open) 4739 4685 { ··· 4754 4698 mddev->ro = 1; 4755 4699 set_disk_ro(mddev->gendisk, 1); 4756 4700 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4757 - sysfs_notify_dirent(mddev->sysfs_state); 4701 + sysfs_notify_dirent_safe(mddev->sysfs_state); 4758 4702 err = 0; 4759 4703 } 4760 4704 out: ··· 4768 4712 */ 4769 4713 static int do_md_stop(mddev_t * mddev, int mode, int is_open) 4770 4714 { 4771 - int err = 0; 4772 4715 struct gendisk *disk = mddev->gendisk; 4773 4716 mdk_rdev_t *rdev; 4774 4717 4775 4718 mutex_lock(&mddev->open_mutex); 4776 - if (atomic_read(&mddev->openers) > is_open) { 4719 + if (atomic_read(&mddev->openers) > is_open || 4720 + mddev->sysfs_active) { 4777 4721 printk("md: %s still in use.\n",mdname(mddev)); 4778 - err = -EBUSY; 4779 - } else if (mddev->pers) { 4722 + mutex_unlock(&mddev->open_mutex); 4723 + return -EBUSY; 4724 + } 4780 4725 4726 + if (mddev->pers) { 4781 4727 if (mddev->ro) 4782 4728 set_disk_ro(disk, 0); 4783 4729 4730 + md_stop_writes(mddev); 4784 4731 md_stop(mddev); 4785 4732 mddev->queue->merge_bvec_fn = NULL; 4786 4733 mddev->queue->unplug_fn = NULL; 4787 4734 mddev->queue->backing_dev_info.congested_fn = NULL; 4788 4735 4789 4736 /* tell userspace to handle 'inactive' */ 4790 - sysfs_notify_dirent(mddev->sysfs_state); 4737 + sysfs_notify_dirent_safe(mddev->sysfs_state); 4791 4738 4792 4739 list_for_each_entry(rdev, &mddev->disks, same_set) 4793 4740 if (rdev->raid_disk >= 0) { ··· 4800 4741 } 4801 4742 4802 4743 set_capacity(disk, 0); 4744 + mutex_unlock(&mddev->open_mutex); 4803 4745 revalidate_disk(disk); 4804 4746 4805 4747 if (mddev->ro) 4806 4748 mddev->ro = 0; 4807 - 4808 - err = 0; 4809 - } 4810 - mutex_unlock(&mddev->open_mutex); 4811 - if (err) 4812 - return err; 4749 + } else 4750 + mutex_unlock(&mddev->open_mutex); 4813 4751 /* 4814 4752 * Free resources if final stop 4815 4753 */ 4816 4754 if (mode == 0) { 4817 - 4818 4755 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); 4819 4756 4820 4757 bitmap_destroy(mddev); ··· 4827 4772 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 4828 4773 if (mddev->hold_active == UNTIL_STOP) 4829 4774 mddev->hold_active = 0; 4830 - 4831 4775 } 4832 - err = 0; 4833 4776 blk_integrity_unregister(disk); 4834 4777 md_new_event(mddev); 4835 - sysfs_notify_dirent(mddev->sysfs_state); 4836 - return err; 4778 + sysfs_notify_dirent_safe(mddev->sysfs_state); 4779 + return 0; 4837 4780 } 4838 4781 4839 4782 #ifndef MODULE ··· 5192 5139 if (err) 5193 5140 export_rdev(rdev); 5194 5141 else 5195 - sysfs_notify_dirent(rdev->sysfs_state); 5142 + sysfs_notify_dirent_safe(rdev->sysfs_state); 5196 5143 5197 5144 md_update_sb(mddev, 1); 5198 5145 if (mddev->degraded) ··· 5385 5332 err = 0; 5386 5333 if (mddev->pers) { 5387 5334 mddev->pers->quiesce(mddev, 1); 5388 - if (fd >= 0) 5335 + if (fd >= 0) { 5389 5336 err = bitmap_create(mddev); 5337 + if (!err) 5338 + err = bitmap_load(mddev); 5339 + } 5390 5340 if (fd < 0 || err) { 5391 5341 bitmap_destroy(mddev); 5392 5342 fd = -1; /* make sure to put the file */ ··· 5638 5582 mddev->bitmap_info.default_offset; 5639 5583 mddev->pers->quiesce(mddev, 1); 5640 5584 rv = bitmap_create(mddev); 5585 + if (!rv) 5586 + rv = bitmap_load(mddev); 5641 5587 if (rv) 5642 5588 bitmap_destroy(mddev); 5643 5589 mddev->pers->quiesce(mddev, 0); ··· 5872 5814 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { 5873 5815 if (mddev->ro == 2) { 5874 5816 mddev->ro = 0; 5875 - sysfs_notify_dirent(mddev->sysfs_state); 5817 + sysfs_notify_dirent_safe(mddev->sysfs_state); 5876 5818 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5877 5819 md_wakeup_thread(mddev->thread); 5878 5820 } else { ··· 6123 6065 mddev->pers->error_handler(mddev,rdev); 6124 6066 if (mddev->degraded) 6125 6067 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 6126 - sysfs_notify_dirent(rdev->sysfs_state); 6068 + sysfs_notify_dirent_safe(rdev->sysfs_state); 6127 6069 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 6128 6070 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 6129 6071 md_wakeup_thread(mddev->thread); 6072 + if (mddev->event_work.func) 6073 + schedule_work(&mddev->event_work); 6130 6074 md_new_event_inintr(mddev); 6131 6075 } 6132 6076 ··· 6586 6526 spin_unlock_irq(&mddev->write_lock); 6587 6527 } 6588 6528 if (did_change) 6589 - sysfs_notify_dirent(mddev->sysfs_state); 6529 + sysfs_notify_dirent_safe(mddev->sysfs_state); 6590 6530 wait_event(mddev->sb_wait, 6591 6531 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && 6592 6532 !test_bit(MD_CHANGE_PENDING, &mddev->flags)); ··· 6629 6569 mddev->safemode = 1; 6630 6570 spin_unlock_irq(&mddev->write_lock); 6631 6571 md_update_sb(mddev, 0); 6632 - sysfs_notify_dirent(mddev->sysfs_state); 6572 + sysfs_notify_dirent_safe(mddev->sysfs_state); 6633 6573 } else 6634 6574 spin_unlock_irq(&mddev->write_lock); 6635 6575 ··· 6639 6579 return 0; 6640 6580 } 6641 6581 EXPORT_SYMBOL_GPL(md_allow_write); 6582 + 6583 + void md_unplug(mddev_t *mddev) 6584 + { 6585 + if (mddev->queue) 6586 + blk_unplug(mddev->queue); 6587 + if (mddev->plug) 6588 + mddev->plug->unplug_fn(mddev->plug); 6589 + } 6642 6590 6643 6591 #define SYNC_MARKS 10 6644 6592 #define SYNC_MARK_STEP (3*HZ) ··· 6826 6758 >= mddev->resync_max - mddev->curr_resync_completed 6827 6759 )) { 6828 6760 /* time to update curr_resync_completed */ 6829 - blk_unplug(mddev->queue); 6761 + md_unplug(mddev); 6830 6762 wait_event(mddev->recovery_wait, 6831 6763 atomic_read(&mddev->recovery_active) == 0); 6832 6764 mddev->curr_resync_completed = 6833 6765 mddev->curr_resync; 6834 - set_bit(MD_CHANGE_CLEAN, &mddev->flags); 6766 + if (mddev->persistent) 6767 + set_bit(MD_CHANGE_CLEAN, &mddev->flags); 6835 6768 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 6836 6769 } 6837 6770 ··· 6904 6835 * about not overloading the IO subsystem. (things like an 6905 6836 * e2fsck being done on the RAID array should execute fast) 6906 6837 */ 6907 - blk_unplug(mddev->queue); 6838 + md_unplug(mddev); 6908 6839 cond_resched(); 6909 6840 6910 6841 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 ··· 6923 6854 * this also signals 'finished resyncing' to md_stop 6924 6855 */ 6925 6856 out: 6926 - blk_unplug(mddev->queue); 6857 + md_unplug(mddev); 6927 6858 6928 6859 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); 6929 6860 ··· 7025 6956 sprintf(nm, "rd%d", rdev->raid_disk); 7026 6957 if (sysfs_create_link(&mddev->kobj, 7027 6958 &rdev->kobj, nm)) 7028 - printk(KERN_WARNING 7029 - "md: cannot register " 7030 - "%s for %s\n", 7031 - nm, mdname(mddev)); 6959 + /* failure here is OK */; 7032 6960 spares++; 7033 6961 md_new_event(mddev); 7034 6962 set_bit(MD_CHANGE_DEVS, &mddev->flags); ··· 7118 7052 mddev->safemode = 0; 7119 7053 spin_unlock_irq(&mddev->write_lock); 7120 7054 if (did_change) 7121 - sysfs_notify_dirent(mddev->sysfs_state); 7055 + sysfs_notify_dirent_safe(mddev->sysfs_state); 7122 7056 } 7123 7057 7124 7058 if (mddev->flags) ··· 7157 7091 mddev->recovery = 0; 7158 7092 /* flag recovery needed just to double check */ 7159 7093 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 7160 - sysfs_notify_dirent(mddev->sysfs_action); 7094 + sysfs_notify_dirent_safe(mddev->sysfs_action); 7161 7095 md_new_event(mddev); 7162 7096 goto unlock; 7163 7097 } ··· 7219 7153 mddev->recovery = 0; 7220 7154 } else 7221 7155 md_wakeup_thread(mddev->sync_thread); 7222 - sysfs_notify_dirent(mddev->sysfs_action); 7156 + sysfs_notify_dirent_safe(mddev->sysfs_action); 7223 7157 md_new_event(mddev); 7224 7158 } 7225 7159 unlock: ··· 7228 7162 if (test_and_clear_bit(MD_RECOVERY_RECOVER, 7229 7163 &mddev->recovery)) 7230 7164 if (mddev->sysfs_action) 7231 - sysfs_notify_dirent(mddev->sysfs_action); 7165 + sysfs_notify_dirent_safe(mddev->sysfs_action); 7232 7166 } 7233 7167 mddev_unlock(mddev); 7234 7168 } ··· 7236 7170 7237 7171 void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) 7238 7172 { 7239 - sysfs_notify_dirent(rdev->sysfs_state); 7173 + sysfs_notify_dirent_safe(rdev->sysfs_state); 7240 7174 wait_event_timeout(rdev->blocked_wait, 7241 7175 !test_bit(Blocked, &rdev->flags), 7242 7176 msecs_to_jiffies(5000));
+54 -1
drivers/md/md.h
··· 29 29 typedef struct mddev_s mddev_t; 30 30 typedef struct mdk_rdev_s mdk_rdev_t; 31 31 32 + /* generic plugging support - like that provided with request_queue, 33 + * but does not require a request_queue 34 + */ 35 + struct plug_handle { 36 + void (*unplug_fn)(struct plug_handle *); 37 + struct timer_list unplug_timer; 38 + struct work_struct unplug_work; 39 + unsigned long unplug_flag; 40 + }; 41 + #define PLUGGED_FLAG 1 42 + void plugger_init(struct plug_handle *plug, 43 + void (*unplug_fn)(struct plug_handle *)); 44 + void plugger_set_plug(struct plug_handle *plug); 45 + int plugger_remove_plug(struct plug_handle *plug); 46 + static inline void plugger_flush(struct plug_handle *plug) 47 + { 48 + del_timer_sync(&plug->unplug_timer); 49 + cancel_work_sync(&plug->unplug_work); 50 + } 51 + 32 52 /* 33 53 * MD's 'extended' device 34 54 */ ··· 145 125 int suspended; 146 126 atomic_t active_io; 147 127 int ro; 128 + int sysfs_active; /* set when sysfs deletes 129 + * are happening, so run/ 130 + * takeover/stop are not safe 131 + */ 148 132 149 133 struct gendisk *gendisk; 150 134 ··· 321 297 * hot-adding a bitmap. It should 322 298 * eventually be settable by sysfs. 323 299 */ 300 + /* When md is serving under dm, it might use a 301 + * dirty_log to store the bits. 302 + */ 303 + struct dm_dirty_log *log; 304 + 324 305 struct mutex mutex; 325 306 unsigned long chunksize; 326 - unsigned long daemon_sleep; /* how many seconds between updates? */ 307 + unsigned long daemon_sleep; /* how many jiffies between updates? */ 327 308 unsigned long max_write_behind; /* write-behind mode */ 328 309 int external; 329 310 } bitmap_info; ··· 337 308 struct list_head all_mddevs; 338 309 339 310 struct attribute_group *to_remove; 311 + struct plug_handle *plug; /* if used by personality */ 312 + 340 313 /* Generic barrier handling. 341 314 * If there is a pending barrier request, all other 342 315 * writes are blocked while the devices are flushed. ··· 349 318 struct bio *barrier; 350 319 atomic_t flush_pending; 351 320 struct work_struct barrier_work; 321 + struct work_struct event_work; /* used by dm to report failure event */ 352 322 }; 353 323 354 324 ··· 413 381 ssize_t (*store)(mddev_t *, const char *, size_t); 414 382 }; 415 383 extern struct attribute_group md_bitmap_group; 384 + 385 + static inline struct sysfs_dirent *sysfs_get_dirent_safe(struct sysfs_dirent *sd, char *name) 386 + { 387 + if (sd) 388 + return sysfs_get_dirent(sd, NULL, name); 389 + return sd; 390 + } 391 + static inline void sysfs_notify_dirent_safe(struct sysfs_dirent *sd) 392 + { 393 + if (sd) 394 + sysfs_notify_dirent(sd); 395 + } 416 396 417 397 static inline char * mdname (mddev_t * mddev) 418 398 { ··· 518 474 extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); 519 475 extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); 520 476 extern void restore_bitmap_write_access(struct file *file); 477 + extern void md_unplug(mddev_t *mddev); 521 478 479 + extern void mddev_init(mddev_t *mddev); 480 + extern int md_run(mddev_t *mddev); 481 + extern void md_stop(mddev_t *mddev); 482 + extern void md_stop_writes(mddev_t *mddev); 483 + extern void md_rdev_init(mdk_rdev_t *rdev); 484 + 485 + extern void mddev_suspend(mddev_t *mddev); 486 + extern void mddev_resume(mddev_t *mddev); 522 487 #endif /* _MD_MD_H */
drivers/md/mktables.c lib/raid6/mktables.c
+18
drivers/md/raid10.c
··· 825 825 */ 826 826 bp = bio_split(bio, 827 827 chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); 828 + 829 + /* Each of these 'make_request' calls will call 'wait_barrier'. 830 + * If the first succeeds but the second blocks due to the resync 831 + * thread raising the barrier, we will deadlock because the 832 + * IO to the underlying device will be queued in generic_make_request 833 + * and will never complete, so will never reduce nr_pending. 834 + * So increment nr_waiting here so no new raise_barriers will 835 + * succeed, and so the second wait_barrier cannot block. 836 + */ 837 + spin_lock_irq(&conf->resync_lock); 838 + conf->nr_waiting++; 839 + spin_unlock_irq(&conf->resync_lock); 840 + 828 841 if (make_request(mddev, &bp->bio1)) 829 842 generic_make_request(&bp->bio1); 830 843 if (make_request(mddev, &bp->bio2)) 831 844 generic_make_request(&bp->bio2); 845 + 846 + spin_lock_irq(&conf->resync_lock); 847 + conf->nr_waiting--; 848 + wake_up(&conf->wait_barrier); 849 + spin_unlock_irq(&conf->resync_lock); 832 850 833 851 bio_pair_release(bp); 834 852 return 0;
+108 -72
drivers/md/raid5.c
··· 201 201 if (test_bit(STRIPE_HANDLE, &sh->state)) { 202 202 if (test_bit(STRIPE_DELAYED, &sh->state)) { 203 203 list_add_tail(&sh->lru, &conf->delayed_list); 204 - blk_plug_device(conf->mddev->queue); 204 + plugger_set_plug(&conf->plug); 205 205 } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 206 206 sh->bm_seq - conf->seq_write > 0) { 207 207 list_add_tail(&sh->lru, &conf->bitmap_list); 208 - blk_plug_device(conf->mddev->queue); 208 + plugger_set_plug(&conf->plug); 209 209 } else { 210 210 clear_bit(STRIPE_BIT_DELAY, &sh->state); 211 211 list_add_tail(&sh->lru, &conf->handle_list); ··· 434 434 } 435 435 436 436 static void unplug_slaves(mddev_t *mddev); 437 - static void raid5_unplug_device(struct request_queue *q); 438 437 439 438 static struct stripe_head * 440 439 get_active_stripe(raid5_conf_t *conf, sector_t sector, ··· 463 464 < (conf->max_nr_stripes *3/4) 464 465 || !conf->inactive_blocked), 465 466 conf->device_lock, 466 - raid5_unplug_device(conf->mddev->queue) 467 + md_raid5_unplug_device(conf) 467 468 ); 468 469 conf->inactive_blocked = 0; 469 470 } else ··· 1336 1337 struct kmem_cache *sc; 1337 1338 int devs = max(conf->raid_disks, conf->previous_raid_disks); 1338 1339 1339 - sprintf(conf->cache_name[0], 1340 - "raid%d-%s", conf->level, mdname(conf->mddev)); 1341 - sprintf(conf->cache_name[1], 1342 - "raid%d-%s-alt", conf->level, mdname(conf->mddev)); 1340 + if (conf->mddev->gendisk) 1341 + sprintf(conf->cache_name[0], 1342 + "raid%d-%s", conf->level, mdname(conf->mddev)); 1343 + else 1344 + sprintf(conf->cache_name[0], 1345 + "raid%d-%p", conf->level, conf->mddev); 1346 + sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]); 1347 + 1343 1348 conf->active_name = 0; 1344 1349 sc = kmem_cache_create(conf->cache_name[conf->active_name], 1345 1350 sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), ··· 3617 3614 list_add_tail(&sh->lru, &conf->hold_list); 3618 3615 } 3619 3616 } else 3620 - blk_plug_device(conf->mddev->queue); 3617 + plugger_set_plug(&conf->plug); 3621 3618 } 3622 3619 3623 3620 static void activate_bit_delay(raid5_conf_t *conf) ··· 3658 3655 rcu_read_unlock(); 3659 3656 } 3660 3657 3661 - static void raid5_unplug_device(struct request_queue *q) 3658 + void md_raid5_unplug_device(raid5_conf_t *conf) 3662 3659 { 3663 - mddev_t *mddev = q->queuedata; 3664 - raid5_conf_t *conf = mddev->private; 3665 3660 unsigned long flags; 3666 3661 3667 3662 spin_lock_irqsave(&conf->device_lock, flags); 3668 3663 3669 - if (blk_remove_plug(q)) { 3664 + if (plugger_remove_plug(&conf->plug)) { 3670 3665 conf->seq_flush++; 3671 3666 raid5_activate_delayed(conf); 3672 3667 } 3673 - md_wakeup_thread(mddev->thread); 3668 + md_wakeup_thread(conf->mddev->thread); 3674 3669 3675 3670 spin_unlock_irqrestore(&conf->device_lock, flags); 3676 3671 3677 - unplug_slaves(mddev); 3672 + unplug_slaves(conf->mddev); 3673 + } 3674 + EXPORT_SYMBOL_GPL(md_raid5_unplug_device); 3675 + 3676 + static void raid5_unplug(struct plug_handle *plug) 3677 + { 3678 + raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); 3679 + md_raid5_unplug_device(conf); 3678 3680 } 3679 3681 3680 - static int raid5_congested(void *data, int bits) 3682 + static void raid5_unplug_queue(struct request_queue *q) 3681 3683 { 3682 - mddev_t *mddev = data; 3684 + mddev_t *mddev = q->queuedata; 3685 + md_raid5_unplug_device(mddev->private); 3686 + } 3687 + 3688 + int md_raid5_congested(mddev_t *mddev, int bits) 3689 + { 3683 3690 raid5_conf_t *conf = mddev->private; 3684 3691 3685 3692 /* No difference between reads and writes. Just check 3686 3693 * how busy the stripe_cache is 3687 3694 */ 3688 3695 3689 - if (mddev_congested(mddev, bits)) 3690 - return 1; 3691 3696 if (conf->inactive_blocked) 3692 3697 return 1; 3693 3698 if (conf->quiesce) ··· 3704 3693 return 1; 3705 3694 3706 3695 return 0; 3696 + } 3697 + EXPORT_SYMBOL_GPL(md_raid5_congested); 3698 + 3699 + static int raid5_congested(void *data, int bits) 3700 + { 3701 + mddev_t *mddev = data; 3702 + 3703 + return mddev_congested(mddev, bits) || 3704 + md_raid5_congested(mddev, bits); 3707 3705 } 3708 3706 3709 3707 /* We want read requests to align with chunks where possible, ··· 4095 4075 * add failed due to overlap. Flush everything 4096 4076 * and wait a while 4097 4077 */ 4098 - raid5_unplug_device(mddev->queue); 4078 + md_raid5_unplug_device(conf); 4099 4079 release_stripe(sh); 4100 4080 schedule(); 4101 4081 goto retry; ··· 4586 4566 return 0; 4587 4567 } 4588 4568 4569 + int 4570 + raid5_set_cache_size(mddev_t *mddev, int size) 4571 + { 4572 + raid5_conf_t *conf = mddev->private; 4573 + int err; 4574 + 4575 + if (size <= 16 || size > 32768) 4576 + return -EINVAL; 4577 + while (size < conf->max_nr_stripes) { 4578 + if (drop_one_stripe(conf)) 4579 + conf->max_nr_stripes--; 4580 + else 4581 + break; 4582 + } 4583 + err = md_allow_write(mddev); 4584 + if (err) 4585 + return err; 4586 + while (size > conf->max_nr_stripes) { 4587 + if (grow_one_stripe(conf)) 4588 + conf->max_nr_stripes++; 4589 + else break; 4590 + } 4591 + return 0; 4592 + } 4593 + EXPORT_SYMBOL(raid5_set_cache_size); 4594 + 4589 4595 static ssize_t 4590 4596 raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) 4591 4597 { ··· 4626 4580 4627 4581 if (strict_strtoul(page, 10, &new)) 4628 4582 return -EINVAL; 4629 - if (new <= 16 || new > 32768) 4630 - return -EINVAL; 4631 - while (new < conf->max_nr_stripes) { 4632 - if (drop_one_stripe(conf)) 4633 - conf->max_nr_stripes--; 4634 - else 4635 - break; 4636 - } 4637 - err = md_allow_write(mddev); 4583 + err = raid5_set_cache_size(mddev, new); 4638 4584 if (err) 4639 4585 return err; 4640 - while (new > conf->max_nr_stripes) { 4641 - if (grow_one_stripe(conf)) 4642 - conf->max_nr_stripes++; 4643 - else break; 4644 - } 4645 4586 return len; 4646 4587 } 4647 4588 ··· 4991 4958 static int run(mddev_t *mddev) 4992 4959 { 4993 4960 raid5_conf_t *conf; 4994 - int working_disks = 0, chunk_size; 4961 + int working_disks = 0; 4995 4962 int dirty_parity_disks = 0; 4996 4963 mdk_rdev_t *rdev; 4997 4964 sector_t reshape_offset = 0; ··· 5177 5144 "reshape"); 5178 5145 } 5179 5146 5180 - /* read-ahead size must cover two whole stripes, which is 5181 - * 2 * (datadisks) * chunksize where 'n' is the number of raid devices 5182 - */ 5183 - { 5147 + 5148 + /* Ok, everything is just fine now */ 5149 + if (mddev->to_remove == &raid5_attrs_group) 5150 + mddev->to_remove = NULL; 5151 + else if (mddev->kobj.sd && 5152 + sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) 5153 + printk(KERN_WARNING 5154 + "raid5: failed to create sysfs attributes for %s\n", 5155 + mdname(mddev)); 5156 + md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 5157 + 5158 + plugger_init(&conf->plug, raid5_unplug); 5159 + mddev->plug = &conf->plug; 5160 + if (mddev->queue) { 5161 + int chunk_size; 5162 + /* read-ahead size must cover two whole stripes, which 5163 + * is 2 * (datadisks) * chunksize where 'n' is the 5164 + * number of raid devices 5165 + */ 5184 5166 int data_disks = conf->previous_raid_disks - conf->max_degraded; 5185 5167 int stripe = data_disks * 5186 5168 ((mddev->chunk_sectors << 9) / PAGE_SIZE); 5187 5169 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) 5188 5170 mddev->queue->backing_dev_info.ra_pages = 2 * stripe; 5171 + 5172 + blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); 5173 + 5174 + mddev->queue->backing_dev_info.congested_data = mddev; 5175 + mddev->queue->backing_dev_info.congested_fn = raid5_congested; 5176 + mddev->queue->queue_lock = &conf->device_lock; 5177 + mddev->queue->unplug_fn = raid5_unplug_queue; 5178 + 5179 + chunk_size = mddev->chunk_sectors << 9; 5180 + blk_queue_io_min(mddev->queue, chunk_size); 5181 + blk_queue_io_opt(mddev->queue, chunk_size * 5182 + (conf->raid_disks - conf->max_degraded)); 5183 + 5184 + list_for_each_entry(rdev, &mddev->disks, same_set) 5185 + disk_stack_limits(mddev->gendisk, rdev->bdev, 5186 + rdev->data_offset << 9); 5189 5187 } 5190 - 5191 - /* Ok, everything is just fine now */ 5192 - if (mddev->to_remove == &raid5_attrs_group) 5193 - mddev->to_remove = NULL; 5194 - else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) 5195 - printk(KERN_WARNING 5196 - "md/raid:%s: failed to create sysfs attributes.\n", 5197 - mdname(mddev)); 5198 - 5199 - mddev->queue->queue_lock = &conf->device_lock; 5200 - 5201 - mddev->queue->unplug_fn = raid5_unplug_device; 5202 - mddev->queue->backing_dev_info.congested_data = mddev; 5203 - mddev->queue->backing_dev_info.congested_fn = raid5_congested; 5204 - 5205 - md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 5206 - 5207 - blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); 5208 - chunk_size = mddev->chunk_sectors << 9; 5209 - blk_queue_io_min(mddev->queue, chunk_size); 5210 - blk_queue_io_opt(mddev->queue, chunk_size * 5211 - (conf->raid_disks - conf->max_degraded)); 5212 - 5213 - list_for_each_entry(rdev, &mddev->disks, same_set) 5214 - disk_stack_limits(mddev->gendisk, rdev->bdev, 5215 - rdev->data_offset << 9); 5216 5188 5217 5189 return 0; 5218 5190 abort: ··· 5238 5200 5239 5201 md_unregister_thread(mddev->thread); 5240 5202 mddev->thread = NULL; 5241 - mddev->queue->backing_dev_info.congested_fn = NULL; 5242 - blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 5203 + if (mddev->queue) 5204 + mddev->queue->backing_dev_info.congested_fn = NULL; 5205 + plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/ 5243 5206 free_conf(conf); 5244 5207 mddev->private = NULL; 5245 5208 mddev->to_remove = &raid5_attrs_group; ··· 5584 5545 sprintf(nm, "rd%d", rdev->raid_disk); 5585 5546 if (sysfs_create_link(&mddev->kobj, 5586 5547 &rdev->kobj, nm)) 5587 - printk(KERN_WARNING 5588 - "md/raid:%s: failed to create " 5589 - " link %s\n", 5590 - mdname(mddev), nm); 5548 + /* Failure here is OK */; 5591 5549 } else 5592 5550 break; 5593 5551 } ··· 5639 5603 /* read-ahead size must cover two whole stripes, which is 5640 5604 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices 5641 5605 */ 5642 - { 5606 + if (conf->mddev->queue) { 5643 5607 int data_disks = conf->raid_disks - conf->max_degraded; 5644 5608 int stripe = data_disks * ((conf->chunk_sectors << 9) 5645 5609 / PAGE_SIZE);
+8 -1
drivers/md/raid5.h
··· 388 388 * two caches. 389 389 */ 390 390 int active_name; 391 - char cache_name[2][20]; 391 + char cache_name[2][32]; 392 392 struct kmem_cache *slab_cache; /* for allocating stripes */ 393 393 394 394 int seq_flush, seq_write; ··· 398 398 * (fresh device added). 399 399 * Cleared when a sync completes. 400 400 */ 401 + 402 + struct plug_handle plug; 403 + 401 404 /* per cpu variables */ 402 405 struct raid5_percpu { 403 406 struct page *spare_page; /* Used when checking P/Q in raid6 */ ··· 500 497 { 501 498 return layout >= 8 && layout <= 10; 502 499 } 500 + 501 + extern int md_raid5_congested(mddev_t *mddev, int bits); 502 + extern void md_raid5_unplug_device(raid5_conf_t *conf); 503 + extern int raid5_set_cache_size(mddev_t *mddev, int size); 503 504 #endif
drivers/md/raid6algos.c lib/raid6/raid6algos.c
drivers/md/raid6altivec.uc lib/raid6/raid6altivec.uc
drivers/md/raid6int.uc lib/raid6/raid6int.uc
drivers/md/raid6mmx.c lib/raid6/raid6mmx.c
drivers/md/raid6recov.c lib/raid6/raid6recov.c
drivers/md/raid6sse1.c lib/raid6/raid6sse1.c
drivers/md/raid6sse2.c lib/raid6/raid6sse2.c
drivers/md/raid6test/Makefile lib/raid6/raid6test/Makefile
drivers/md/raid6test/test.c lib/raid6/raid6test/test.c
drivers/md/raid6x86.h lib/raid6/raid6x86.h
drivers/md/unroll.awk lib/raid6/unroll.awk
+3
lib/Kconfig
··· 7 7 8 8 menu "Library routines" 9 9 10 + config RAID6_PQ 11 + tristate 12 + 10 13 config BITREVERSE 11 14 tristate 12 15
+1
lib/Makefile
··· 69 69 obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ 70 70 obj-$(CONFIG_LZO_COMPRESS) += lzo/ 71 71 obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ 72 + obj-$(CONFIG_RAID6_PQ) += raid6/ 72 73 73 74 lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o 74 75 lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
+78
lib/raid6/Makefile
··· 1 + obj-$(CONFIG_RAID6_PQ) += raid6_pq.o 2 + 3 + raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ 4 + raid6int1.o raid6int2.o raid6int4.o \ 5 + raid6int8.o raid6int16.o raid6int32.o \ 6 + raid6altivec1.o raid6altivec2.o raid6altivec4.o \ 7 + raid6altivec8.o \ 8 + raid6mmx.o raid6sse1.o raid6sse2.o 9 + hostprogs-y += mktables 10 + 11 + quiet_cmd_unroll = UNROLL $@ 12 + cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ 13 + < $< > $@ || ( rm -f $@ && exit 1 ) 14 + 15 + ifeq ($(CONFIG_ALTIVEC),y) 16 + altivec_flags := -maltivec -mabi=altivec 17 + endif 18 + 19 + targets += raid6int1.c 20 + $(obj)/raid6int1.c: UNROLL := 1 21 + $(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 22 + $(call if_changed,unroll) 23 + 24 + targets += raid6int2.c 25 + $(obj)/raid6int2.c: UNROLL := 2 26 + $(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 27 + $(call if_changed,unroll) 28 + 29 + targets += raid6int4.c 30 + $(obj)/raid6int4.c: UNROLL := 4 31 + $(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 32 + $(call if_changed,unroll) 33 + 34 + targets += raid6int8.c 35 + $(obj)/raid6int8.c: UNROLL := 8 36 + $(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 37 + $(call if_changed,unroll) 38 + 39 + targets += raid6int16.c 40 + $(obj)/raid6int16.c: UNROLL := 16 41 + $(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 42 + $(call if_changed,unroll) 43 + 44 + targets += raid6int32.c 45 + $(obj)/raid6int32.c: UNROLL := 32 46 + $(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE 47 + $(call if_changed,unroll) 48 + 49 + CFLAGS_raid6altivec1.o += $(altivec_flags) 50 + targets += raid6altivec1.c 51 + $(obj)/raid6altivec1.c: UNROLL := 1 52 + $(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE 53 + $(call if_changed,unroll) 54 + 55 + CFLAGS_raid6altivec2.o += $(altivec_flags) 56 + targets += raid6altivec2.c 57 + $(obj)/raid6altivec2.c: UNROLL := 2 58 + $(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE 59 + $(call if_changed,unroll) 60 + 61 + CFLAGS_raid6altivec4.o += $(altivec_flags) 62 + targets += raid6altivec4.c 63 + $(obj)/raid6altivec4.c: UNROLL := 4 64 + $(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE 65 + $(call if_changed,unroll) 66 + 67 + CFLAGS_raid6altivec8.o += $(altivec_flags) 68 + targets += raid6altivec8.c 69 + $(obj)/raid6altivec8.c: UNROLL := 8 70 + $(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE 71 + $(call if_changed,unroll) 72 + 73 + quiet_cmd_mktable = TABLE $@ 74 + cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) 75 + 76 + targets += raid6tables.c 77 + $(obj)/raid6tables.c: $(obj)/mktables FORCE 78 + $(call if_changed,mktable)