Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] md: Improve locking around error handling

The error handling routines don't use proper locking, and so two concurrent
errors could trigger a problem.

So:
- use test-and-set and test-and-clear to synchonise
the In_sync bits with the ->degraded count
- use the spinlock to protect updates to the
degraded count (could use an atomic_t but that
would be a bigger change in code, and isn't
really justified)
- remove un-necessary locking in raid5

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

NeilBrown and committed by
Linus Torvalds
c04be0aa 11ce99e6

+31 -17
+11 -5
drivers/md/raid1.c
··· 959 959 * normal single drive 960 960 */ 961 961 return; 962 - if (test_bit(In_sync, &rdev->flags)) { 962 + if (test_and_clear_bit(In_sync, &rdev->flags)) { 963 + unsigned long flags; 964 + spin_lock_irqsave(&conf->device_lock, flags); 963 965 mddev->degraded++; 966 + spin_unlock_irqrestore(&conf->device_lock, flags); 964 967 /* 965 968 * if recovery is running, make sure it aborts. 966 969 */ 967 970 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 968 971 } 969 - clear_bit(In_sync, &rdev->flags); 970 972 set_bit(Faulty, &rdev->flags); 971 973 set_bit(MD_CHANGE_DEVS, &mddev->flags); 972 974 printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n" ··· 1024 1022 mdk_rdev_t *rdev = conf->mirrors[i].rdev; 1025 1023 if (rdev 1026 1024 && !test_bit(Faulty, &rdev->flags) 1027 - && !test_bit(In_sync, &rdev->flags)) { 1025 + && !test_and_set_bit(In_sync, &rdev->flags)) { 1026 + unsigned long flags; 1027 + spin_lock_irqsave(&conf->device_lock, flags); 1028 1028 mddev->degraded--; 1029 - set_bit(In_sync, &rdev->flags); 1029 + spin_unlock_irqrestore(&conf->device_lock, flags); 1030 1030 } 1031 1031 } 1032 1032 ··· 2052 2048 mirror_info_t *newmirrors; 2053 2049 conf_t *conf = mddev_to_conf(mddev); 2054 2050 int cnt, raid_disks; 2055 - 2051 + unsigned long flags; 2056 2052 int d, d2; 2057 2053 2058 2054 /* Cannot change chunk_size, layout, or level */ ··· 2111 2107 kfree(conf->poolinfo); 2112 2108 conf->poolinfo = newpoolinfo; 2113 2109 2110 + spin_lock_irqsave(&conf->device_lock, flags); 2114 2111 mddev->degraded += (raid_disks - conf->raid_disks); 2112 + spin_unlock_irqrestore(&conf->device_lock, flags); 2115 2113 conf->raid_disks = mddev->raid_disks = raid_disks; 2116 2114 mddev->delta_disks = 0; 2117 2115
+8 -4
drivers/md/raid10.c
··· 950 950 * really dead" tests... 951 951 */ 952 952 return; 953 - if (test_bit(In_sync, &rdev->flags)) { 953 + if (test_and_clear_bit(In_sync, &rdev->flags)) { 954 + unsigned long flags; 955 + spin_lock_irqsave(&conf->device_lock, flags); 954 956 mddev->degraded++; 957 + spin_unlock_irqrestore(&conf->device_lock, flags); 955 958 /* 956 959 * if recovery is running, make sure it aborts. 957 960 */ 958 961 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 959 962 } 960 - clear_bit(In_sync, &rdev->flags); 961 963 set_bit(Faulty, &rdev->flags); 962 964 set_bit(MD_CHANGE_DEVS, &mddev->flags); 963 965 printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n" ··· 1035 1033 tmp = conf->mirrors + i; 1036 1034 if (tmp->rdev 1037 1035 && !test_bit(Faulty, &tmp->rdev->flags) 1038 - && !test_bit(In_sync, &tmp->rdev->flags)) { 1036 + && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { 1037 + unsigned long flags; 1038 + spin_lock_irqsave(&conf->device_lock, flags); 1039 1039 mddev->degraded--; 1040 - set_bit(In_sync, &tmp->rdev->flags); 1040 + spin_unlock_irqrestore(&conf->device_lock, flags); 1041 1041 } 1042 1042 } 1043 1043
+12 -8
drivers/md/raid5.c
··· 636 636 struct stripe_head *sh = bi->bi_private; 637 637 raid5_conf_t *conf = sh->raid_conf; 638 638 int disks = sh->disks, i; 639 - unsigned long flags; 640 639 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); 641 640 642 641 if (bi->bi_size) ··· 653 654 return 0; 654 655 } 655 656 656 - spin_lock_irqsave(&conf->device_lock, flags); 657 657 if (!uptodate) 658 658 md_error(conf->mddev, conf->disks[i].rdev); 659 659 ··· 660 662 661 663 clear_bit(R5_LOCKED, &sh->dev[i].flags); 662 664 set_bit(STRIPE_HANDLE, &sh->state); 663 - __release_stripe(conf, sh); 664 - spin_unlock_irqrestore(&conf->device_lock, flags); 665 + release_stripe(sh); 665 666 return 0; 666 667 } 667 668 ··· 694 697 695 698 if (!test_bit(Faulty, &rdev->flags)) { 696 699 set_bit(MD_CHANGE_DEVS, &mddev->flags); 697 - if (test_bit(In_sync, &rdev->flags)) { 700 + if (test_and_clear_bit(In_sync, &rdev->flags)) { 701 + unsigned long flags; 702 + spin_lock_irqsave(&conf->device_lock, flags); 698 703 mddev->degraded++; 699 - clear_bit(In_sync, &rdev->flags); 704 + spin_unlock_irqrestore(&conf->device_lock, flags); 700 705 /* 701 706 * if recovery was running, make sure it aborts. 702 707 */ ··· 3417 3418 tmp = conf->disks + i; 3418 3419 if (tmp->rdev 3419 3420 && !test_bit(Faulty, &tmp->rdev->flags) 3420 - && !test_bit(In_sync, &tmp->rdev->flags)) { 3421 + && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { 3422 + unsigned long flags; 3423 + spin_lock_irqsave(&conf->device_lock, flags); 3421 3424 mddev->degraded--; 3422 - set_bit(In_sync, &tmp->rdev->flags); 3425 + spin_unlock_irqrestore(&conf->device_lock, flags); 3423 3426 } 3424 3427 } 3425 3428 print_raid5_conf(conf); ··· 3557 3556 struct list_head *rtmp; 3558 3557 int spares = 0; 3559 3558 int added_devices = 0; 3559 + unsigned long flags; 3560 3560 3561 3561 if (mddev->degraded || 3562 3562 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) ··· 3599 3597 break; 3600 3598 } 3601 3599 3600 + spin_lock_irqsave(&conf->device_lock, flags); 3602 3601 mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices; 3602 + spin_unlock_irqrestore(&conf->device_lock, flags); 3603 3603 mddev->raid_disks = conf->raid_disks; 3604 3604 mddev->reshape_position = 0; 3605 3605 set_bit(MD_CHANGE_DEVS, &mddev->flags);