Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
md: remove rd%d links immediately after stopping an array.
md: remove ability to explicit set an inactive array to 'clean'.
md: constify VFTs
md: tidy up status_resync to handle large arrays.
md: fix some (more) errors with bitmaps on devices larger than 2TB.
md/raid10: don't clear bitmap during recovery if array will still be degraded.
md: fix loading of out-of-date bitmap.

+60 -51
+15 -14
drivers/md/bitmap.c
··· 986 986 oldindex = index; 987 987 oldpage = page; 988 988 989 + bitmap->filemap[bitmap->file_pages++] = page; 990 + bitmap->last_page_size = count; 991 + 989 992 if (outofdate) { 990 993 /* 991 994 * if bitmap is out of date, dirty the ··· 1001 998 write_page(bitmap, page, 1); 1002 999 1003 1000 ret = -EIO; 1004 - if (bitmap->flags & BITMAP_WRITE_ERROR) { 1005 - /* release, page not in filemap yet */ 1006 - put_page(page); 1001 + if (bitmap->flags & BITMAP_WRITE_ERROR) 1007 1002 goto err; 1008 - } 1009 1003 } 1010 - 1011 - bitmap->filemap[bitmap->file_pages++] = page; 1012 - bitmap->last_page_size = count; 1013 1004 } 1014 1005 paddr = kmap_atomic(page, KM_USER0); 1015 1006 if (bitmap->flags & BITMAP_HOSTENDIAN) ··· 1013 1016 kunmap_atomic(paddr, KM_USER0); 1014 1017 if (b) { 1015 1018 /* if the disk bit is set, set the memory bit */ 1016 - bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), 1017 - ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) 1018 - ); 1019 + int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) 1020 + >= start); 1021 + bitmap_set_memory_bits(bitmap, 1022 + (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), 1023 + needed); 1019 1024 bit_cnt++; 1020 1025 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1021 1026 } ··· 1153 1154 spin_lock_irqsave(&bitmap->lock, flags); 1154 1155 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1155 1156 } 1156 - bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), 1157 - &blocks, 0); 1157 + bmc = bitmap_get_counter(bitmap, 1158 + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), 1159 + &blocks, 0); 1158 1160 if (bmc) { 1159 1161 /* 1160 1162 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); ··· 1169 1169 } else if (*bmc == 1) { 1170 1170 /* we can clear the bit */ 1171 1171 *bmc = 0; 1172 - bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), 1172 + bitmap_count_page(bitmap, 1173 + (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), 1173 1174 -1); 1174 1175 1175 1176 /* clear the bit */ ··· 1515 1514 unsigned long chunk; 1516 1515 1517 1516 for (chunk = s; chunk <= e; chunk++) { 1518 - sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap); 1517 + sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap); 1519 1518 bitmap_set_memory_bits(bitmap, sec, 1); 1520 1519 bitmap_file_set_bit(bitmap, sec); 1521 1520 }
+39 -31
drivers/md/md.c
··· 3066 3066 } else 3067 3067 err = -EBUSY; 3068 3068 spin_unlock_irq(&mddev->write_lock); 3069 - } else { 3070 - mddev->ro = 0; 3071 - mddev->recovery_cp = MaxSector; 3072 - err = do_md_run(mddev); 3073 - } 3069 + } else 3070 + err = -EINVAL; 3074 3071 break; 3075 3072 case active: 3076 3073 if (mddev->pers) { ··· 4294 4297 { 4295 4298 int err = 0; 4296 4299 struct gendisk *disk = mddev->gendisk; 4300 + mdk_rdev_t *rdev; 4297 4301 4298 4302 if (atomic_read(&mddev->openers) > is_open) { 4299 4303 printk("md: %s still in use.\n",mdname(mddev)); ··· 4337 4339 /* tell userspace to handle 'inactive' */ 4338 4340 sysfs_notify_dirent(mddev->sysfs_state); 4339 4341 4342 + list_for_each_entry(rdev, &mddev->disks, same_set) 4343 + if (rdev->raid_disk >= 0) { 4344 + char nm[20]; 4345 + sprintf(nm, "rd%d", rdev->raid_disk); 4346 + sysfs_remove_link(&mddev->kobj, nm); 4347 + } 4348 + 4340 4349 set_capacity(disk, 0); 4341 4350 mddev->changed = 1; 4342 4351 ··· 4364 4359 * Free resources if final stop 4365 4360 */ 4366 4361 if (mode == 0) { 4367 - mdk_rdev_t *rdev; 4368 4362 4369 4363 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); 4370 4364 ··· 4374 4370 mddev->bitmap_file = NULL; 4375 4371 } 4376 4372 mddev->bitmap_offset = 0; 4377 - 4378 - list_for_each_entry(rdev, &mddev->disks, same_set) 4379 - if (rdev->raid_disk >= 0) { 4380 - char nm[20]; 4381 - sprintf(nm, "rd%d", rdev->raid_disk); 4382 - sysfs_remove_link(&mddev->kobj, nm); 4383 - } 4384 4373 4385 4374 /* make sure all md_delayed_delete calls have finished */ 4386 4375 flush_scheduled_work(); ··· 5702 5705 5703 5706 static void status_resync(struct seq_file *seq, mddev_t * mddev) 5704 5707 { 5705 - sector_t max_blocks, resync, res; 5706 - unsigned long dt, db, rt; 5708 + sector_t max_sectors, resync, res; 5709 + unsigned long dt, db; 5710 + sector_t rt; 5707 5711 int scale; 5708 5712 unsigned int per_milli; 5709 5713 5710 - resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; 5714 + resync = mddev->curr_resync - atomic_read(&mddev->recovery_active); 5711 5715 5712 5716 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) 5713 - max_blocks = mddev->resync_max_sectors >> 1; 5717 + max_sectors = mddev->resync_max_sectors; 5714 5718 else 5715 - max_blocks = mddev->dev_sectors / 2; 5719 + max_sectors = mddev->dev_sectors; 5716 5720 5717 5721 /* 5718 5722 * Should not happen. 5719 5723 */ 5720 - if (!max_blocks) { 5724 + if (!max_sectors) { 5721 5725 MD_BUG(); 5722 5726 return; 5723 5727 } 5724 5728 /* Pick 'scale' such that (resync>>scale)*1000 will fit 5725 - * in a sector_t, and (max_blocks>>scale) will fit in a 5729 + * in a sector_t, and (max_sectors>>scale) will fit in a 5726 5730 * u32, as those are the requirements for sector_div. 5727 5731 * Thus 'scale' must be at least 10 5728 5732 */ 5729 5733 scale = 10; 5730 5734 if (sizeof(sector_t) > sizeof(unsigned long)) { 5731 - while ( max_blocks/2 > (1ULL<<(scale+32))) 5735 + while ( max_sectors/2 > (1ULL<<(scale+32))) 5732 5736 scale++; 5733 5737 } 5734 5738 res = (resync>>scale)*1000; 5735 - sector_div(res, (u32)((max_blocks>>scale)+1)); 5739 + sector_div(res, (u32)((max_sectors>>scale)+1)); 5736 5740 5737 5741 per_milli = res; 5738 5742 { ··· 5754 5756 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? 5755 5757 "resync" : "recovery"))), 5756 5758 per_milli/10, per_milli % 10, 5757 - (unsigned long long) resync, 5758 - (unsigned long long) max_blocks); 5759 + (unsigned long long) resync/2, 5760 + (unsigned long long) max_sectors/2); 5759 5761 5760 5762 /* 5761 - * We do not want to overflow, so the order of operands and 5762 - * the * 100 / 100 trick are important. We do a +1 to be 5763 - * safe against division by zero. We only estimate anyway. 5764 - * 5765 5763 * dt: time from mark until now 5766 5764 * db: blocks written from mark until now 5767 5765 * rt: remaining time 5766 + * 5767 + * rt is a sector_t, so could be 32bit or 64bit. 5768 + * So we divide before multiply in case it is 32bit and close 5769 + * to the limit. 5770 + * We scale the divisor (db) by 32 to avoid loosing precision 5771 + * near the end of resync when the number of remaining sectors 5772 + * is close to 'db'. 5773 + * We then divide rt by 32 after multiplying by db to compensate. 5774 + * The '+1' avoids division by zero if db is very small. 5768 5775 */ 5769 5776 dt = ((jiffies - mddev->resync_mark) / HZ); 5770 5777 if (!dt) dt++; 5771 5778 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)) 5772 5779 - mddev->resync_mark_cnt; 5773 - rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100; 5774 5780 5775 - seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); 5781 + rt = max_sectors - resync; /* number of remaining sectors */ 5782 + sector_div(rt, db/32+1); 5783 + rt *= dt; 5784 + rt >>= 5; 5785 + 5786 + seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60, 5787 + ((unsigned long)rt % 60)/6); 5776 5788 5777 5789 seq_printf(seq, " speed=%ldK/sec", db/2/dt); 5778 5790 } ··· 5973 5965 return 0; 5974 5966 } 5975 5967 5976 - static struct seq_operations md_seq_ops = { 5968 + static const struct seq_operations md_seq_ops = { 5977 5969 .start = md_seq_start, 5978 5970 .next = md_seq_next, 5979 5971 .stop = md_seq_stop,
+6 -6
drivers/md/raid10.c
··· 1809 1809 r10_bio->sector = sect; 1810 1810 1811 1811 raid10_find_phys(conf, r10_bio); 1812 - /* Need to check if this section will still be 1812 + 1813 + /* Need to check if the array will still be 1813 1814 * degraded 1814 1815 */ 1815 - for (j=0; j<conf->copies;j++) { 1816 - int d = r10_bio->devs[j].devnum; 1817 - if (conf->mirrors[d].rdev == NULL || 1818 - test_bit(Faulty, &conf->mirrors[d].rdev->flags)) { 1816 + for (j=0; j<conf->raid_disks; j++) 1817 + if (conf->mirrors[j].rdev == NULL || 1818 + test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { 1819 1819 still_degraded = 1; 1820 1820 break; 1821 1821 } 1822 - } 1822 + 1823 1823 must_sync = bitmap_start_sync(mddev->bitmap, sect, 1824 1824 &sync_blocks, still_degraded); 1825 1825