Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'md/3.14' of git://neil.brown.name/md

Pull md updates from Neil Brown:
"All bug fixes, two tagged for -stable"

* tag 'md/3.14' of git://neil.brown.name/md:
md/raid5: close recently introduced race in stripe_head management.
md/raid5: fix long-standing problem with bitmap handling on write failure.
md: check command validity early in md_ioctl().
md: ensure metadata is writen after raid level change.
md/raid10: avoid fullsync when not necessary.
md: allow a partially recovered device to be hot-added to an array.
md: Change handling of save_raid_disk and metadata update during recovery.

+68 -26
+56 -20
drivers/md/md.c
··· 1173 1173 desc->raid_disk < mddev->raid_disks */) { 1174 1174 set_bit(In_sync, &rdev->flags); 1175 1175 rdev->raid_disk = desc->raid_disk; 1176 + rdev->saved_raid_disk = desc->raid_disk; 1176 1177 } else if (desc->state & (1<<MD_DISK_ACTIVE)) { 1177 1178 /* active but not in sync implies recovery up to 1178 1179 * reshape position. We don't know exactly where ··· 1672 1671 set_bit(Faulty, &rdev->flags); 1673 1672 break; 1674 1673 default: 1674 + rdev->saved_raid_disk = role; 1675 1675 if ((le32_to_cpu(sb->feature_map) & 1676 - MD_FEATURE_RECOVERY_OFFSET)) 1676 + MD_FEATURE_RECOVERY_OFFSET)) { 1677 1677 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset); 1678 - else 1678 + if (!(le32_to_cpu(sb->feature_map) & 1679 + MD_FEATURE_RECOVERY_BITMAP)) 1680 + rdev->saved_raid_disk = -1; 1681 + } else 1679 1682 set_bit(In_sync, &rdev->flags); 1680 1683 rdev->raid_disk = role; 1681 1684 break; ··· 1741 1736 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); 1742 1737 sb->recovery_offset = 1743 1738 cpu_to_le64(rdev->recovery_offset); 1739 + if (rdev->saved_raid_disk >= 0 && mddev->bitmap) 1740 + sb->feature_map |= 1741 + cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP); 1744 1742 } 1745 1743 if (test_bit(Replacement, &rdev->flags)) 1746 1744 sb->feature_map |= ··· 2485 2477 if (rdev->sb_loaded != 1) 2486 2478 continue; /* no noise on spare devices */ 2487 2479 2488 - if (!test_bit(Faulty, &rdev->flags) && 2489 - rdev->saved_raid_disk == -1) { 2480 + if (!test_bit(Faulty, &rdev->flags)) { 2490 2481 md_super_write(mddev,rdev, 2491 2482 rdev->sb_start, rdev->sb_size, 2492 2483 rdev->sb_page); ··· 2501 2494 rdev->badblocks.size = 0; 2502 2495 } 2503 2496 2504 - } else if (test_bit(Faulty, &rdev->flags)) 2497 + } else 2505 2498 pr_debug("md: %s (skipping faulty)\n", 2506 2499 bdevname(rdev->bdev, b)); 2507 - else 2508 - pr_debug("(skipping incremental s/r "); 2509 2500 2510 2501 if (mddev->level == LEVEL_MULTIPATH) 2511 2502 /* only need to write one superblock... */ ··· 2619 2614 * blocked - sets the Blocked flags 2620 2615 * -blocked - clears the Blocked and possibly simulates an error 2621 2616 * insync - sets Insync providing device isn't active 2617 + * -insync - clear Insync for a device with a slot assigned, 2618 + * so that it gets rebuilt based on bitmap 2622 2619 * write_error - sets WriteErrorSeen 2623 2620 * -write_error - clears WriteErrorSeen 2624 2621 */ ··· 2668 2661 err = 0; 2669 2662 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) { 2670 2663 set_bit(In_sync, &rdev->flags); 2664 + err = 0; 2665 + } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0) { 2666 + clear_bit(In_sync, &rdev->flags); 2667 + rdev->saved_raid_disk = rdev->raid_disk; 2668 + rdev->raid_disk = -1; 2671 2669 err = 0; 2672 2670 } else if (cmd_match(buf, "write_error")) { 2673 2671 set_bit(WriteErrorSeen, &rdev->flags); ··· 3601 3589 pers->run(mddev); 3602 3590 set_bit(MD_CHANGE_DEVS, &mddev->flags); 3603 3591 mddev_resume(mddev); 3592 + if (!mddev->thread) 3593 + md_update_sb(mddev, 1); 3604 3594 sysfs_notify(&mddev->kobj, NULL, "level"); 3605 3595 md_new_event(mddev); 3606 3596 return rv; ··· 5784 5770 clear_bit(Bitmap_sync, &rdev->flags); 5785 5771 } else 5786 5772 rdev->raid_disk = -1; 5773 + rdev->saved_raid_disk = rdev->raid_disk; 5787 5774 } else 5788 5775 super_types[mddev->major_version]. 5789 5776 validate_super(mddev, rdev); ··· 5796 5781 export_rdev(rdev); 5797 5782 return -EINVAL; 5798 5783 } 5799 - 5800 - if (test_bit(In_sync, &rdev->flags)) 5801 - rdev->saved_raid_disk = rdev->raid_disk; 5802 - else 5803 - rdev->saved_raid_disk = -1; 5804 5784 5805 5785 clear_bit(In_sync, &rdev->flags); /* just to be sure */ 5806 5786 if (info->state & (1<<MD_DISK_WRITEMOSTLY)) ··· 6346 6336 return 0; 6347 6337 } 6348 6338 6339 + static inline bool md_ioctl_valid(unsigned int cmd) 6340 + { 6341 + switch (cmd) { 6342 + case ADD_NEW_DISK: 6343 + case BLKROSET: 6344 + case GET_ARRAY_INFO: 6345 + case GET_BITMAP_FILE: 6346 + case GET_DISK_INFO: 6347 + case HOT_ADD_DISK: 6348 + case HOT_REMOVE_DISK: 6349 + case PRINT_RAID_DEBUG: 6350 + case RAID_AUTORUN: 6351 + case RAID_VERSION: 6352 + case RESTART_ARRAY_RW: 6353 + case RUN_ARRAY: 6354 + case SET_ARRAY_INFO: 6355 + case SET_BITMAP_FILE: 6356 + case SET_DISK_FAULTY: 6357 + case STOP_ARRAY: 6358 + case STOP_ARRAY_RO: 6359 + return true; 6360 + default: 6361 + return false; 6362 + } 6363 + } 6364 + 6349 6365 static int md_ioctl(struct block_device *bdev, fmode_t mode, 6350 6366 unsigned int cmd, unsigned long arg) 6351 6367 { ··· 6379 6343 void __user *argp = (void __user *)arg; 6380 6344 struct mddev *mddev = NULL; 6381 6345 int ro; 6346 + 6347 + if (!md_ioctl_valid(cmd)) 6348 + return -ENOTTY; 6382 6349 6383 6350 switch (cmd) { 6384 6351 case RAID_VERSION: ··· 7757 7718 !test_bit(Bitmap_sync, &rdev->flags))) 7758 7719 continue; 7759 7720 7760 - rdev->recovery_offset = 0; 7721 + if (rdev->saved_raid_disk < 0) 7722 + rdev->recovery_offset = 0; 7761 7723 if (mddev->pers-> 7762 7724 hot_add_disk(mddev, rdev) == 0) { 7763 7725 if (sysfs_link_rdev(mddev, rdev)) ··· 7978 7938 mddev->pers->finish_reshape(mddev); 7979 7939 7980 7940 /* If array is no-longer degraded, then any saved_raid_disk 7981 - * information must be scrapped. Also if any device is now 7982 - * In_sync we must scrape the saved_raid_disk for that device 7983 - * do the superblock for an incrementally recovered device 7984 - * written out. 7941 + * information must be scrapped. 7985 7942 */ 7986 - rdev_for_each(rdev, mddev) 7987 - if (!mddev->degraded || 7988 - test_bit(In_sync, &rdev->flags)) 7943 + if (!mddev->degraded) 7944 + rdev_for_each(rdev, mddev) 7989 7945 rdev->saved_raid_disk = -1; 7990 7946 7991 7947 md_update_sb(mddev, 1);
+2 -1
drivers/md/raid10.c
··· 3747 3747 !test_bit(In_sync, &disk->rdev->flags)) { 3748 3748 disk->head_position = 0; 3749 3749 mddev->degraded++; 3750 - if (disk->rdev) 3750 + if (disk->rdev && 3751 + disk->rdev->saved_raid_disk < 0) 3751 3752 conf->fullsync = 1; 3752 3753 } 3753 3754 disk->recovery_disabled = mddev->recovery_disabled - 1;
+5 -4
drivers/md/raid5.c
··· 675 675 || !conf->inactive_blocked), 676 676 *(conf->hash_locks + hash)); 677 677 conf->inactive_blocked = 0; 678 - } else 678 + } else { 679 679 init_stripe(sh, sector, previous); 680 + atomic_inc(&sh->count); 681 + } 680 682 } else { 681 683 spin_lock(&conf->device_lock); 682 684 if (atomic_read(&sh->count)) { ··· 697 695 sh->group = NULL; 698 696 } 699 697 } 698 + atomic_inc(&sh->count); 700 699 spin_unlock(&conf->device_lock); 701 700 } 702 701 } while (sh == NULL); 703 - 704 - if (sh) 705 - atomic_inc(&sh->count); 706 702 707 703 spin_unlock_irq(conf->hash_locks + hash); 708 704 return sh; ··· 2111 2111 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); 2112 2112 } else { 2113 2113 if (!uptodate) { 2114 + set_bit(STRIPE_DEGRADED, &sh->state); 2114 2115 set_bit(WriteErrorSeen, &rdev->flags); 2115 2116 set_bit(R5_WriteError, &sh->dev[i].flags); 2116 2117 if (!test_and_set_bit(WantReplacement, &rdev->flags))
+5 -1
include/uapi/linux/raid/md_p.h
··· 292 292 * backwards anyway. 293 293 */ 294 294 #define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */ 295 + #define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening 296 + * is guided by bitmap. 297 + */ 295 298 #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ 296 299 |MD_FEATURE_RECOVERY_OFFSET \ 297 300 |MD_FEATURE_RESHAPE_ACTIVE \ ··· 302 299 |MD_FEATURE_REPLACEMENT \ 303 300 |MD_FEATURE_RESHAPE_BACKWARDS \ 304 301 |MD_FEATURE_NEW_OFFSET \ 302 + |MD_FEATURE_RECOVERY_BITMAP \ 305 303 ) 306 304 307 - #endif 305 + #endif