md: Push down data integrity code to personalities.

This patch replaces md_integrity_check() by two new public functions:
md_integrity_register() and md_integrity_add_rdev() which are both
personality-independent.

md_integrity_register() is called from the ->run and ->hot_remove
methods of all personalities that support data integrity. The
function iterates over the component devices of the array and
determines if all active devices are integrity capable and if their
profiles match. If this is the case, the common profile is registered
for the mddev via blk_integrity_register().

The second new function, md_integrity_add_rdev() is called from the
->hot_add_disk methods, i.e. whenever a new device is being added
to a raid array. If the new device does not support data integrity,
or has a profile different from the one already registered, data
integrity for the mddev is disabled.

For raid0 and linear, only the call to md_integrity_register() from
the ->run method is necessary.

Signed-off-by: Andre Noll <maan@systemlinux.org>
Signed-off-by: NeilBrown <neilb@suse.de>

authored by Andre Noll and committed by NeilBrown ac5e7113 95fc17aa

+81 -34
+1
drivers/md/linear.c
··· 220 mddev->queue->unplug_fn = linear_unplug; 221 mddev->queue->backing_dev_info.congested_fn = linear_congested; 222 mddev->queue->backing_dev_info.congested_data = mddev; 223 return 0; 224 } 225
··· 220 mddev->queue->unplug_fn = linear_unplug; 221 mddev->queue->backing_dev_info.congested_fn = linear_congested; 222 mddev->queue->backing_dev_info.congested_data = mddev; 223 + md_integrity_register(mddev); 224 return 0; 225 } 226
+65 -31
drivers/md/md.c
··· 1487 1488 static LIST_HEAD(pending_raid_disks); 1489 1490 - static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) 1491 { 1492 - struct mdk_personality *pers = mddev->pers; 1493 - struct gendisk *disk = mddev->gendisk; 1494 - struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); 1495 - struct blk_integrity *bi_mddev = blk_get_integrity(disk); 1496 1497 - /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ 1498 - if (pers && pers->level >= 4 && pers->level <= 6) 1499 - return; 1500 - 1501 - /* If rdev is integrity capable, register profile for mddev */ 1502 - if (!bi_mddev && bi_rdev) { 1503 - if (blk_integrity_register(disk, bi_rdev)) 1504 - printk(KERN_ERR "%s: %s Could not register integrity!\n", 1505 - __func__, disk->disk_name); 1506 - else 1507 - printk(KERN_NOTICE "Enabling data integrity on %s\n", 1508 - disk->disk_name); 1509 - return; 1510 } 1511 - 1512 - /* Check that mddev and rdev have matching profiles */ 1513 - if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { 1514 - printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, 1515 - disk->disk_name, rdev->bdev->bd_disk->disk_name); 1516 - printk(KERN_NOTICE "Disabling data integrity on %s\n", 1517 - disk->disk_name); 1518 - blk_integrity_unregister(disk); 1519 } 1520 } 1521 1522 static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) 1523 { ··· 1630 /* May as well allow recovery to be retried once */ 1631 mddev->recovery_disabled = 0; 1632 1633 - md_integrity_check(rdev, mddev); 1634 return 0; 1635 1636 fail: ··· 4085 mddev->new_level = pers->level; 4086 } 4087 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 4088 - 4089 - if (pers->level >= 4 && pers->level <= 6) 4090 - /* Cannot support integrity (yet) */ 4091 - blk_integrity_unregister(mddev->gendisk); 4092 4093 if (mddev->reshape_position != MaxSector && 4094 pers->start_reshape == NULL) {
··· 1487 1488 static LIST_HEAD(pending_raid_disks); 1489 1490 + /* 1491 + * Try to register data integrity profile for an mddev 1492 + * 1493 + * This is called when an array is started and after a disk has been kicked 1494 + * from the array. It only succeeds if all working and active component devices 1495 + * are integrity capable with matching profiles. 1496 + */ 1497 + int md_integrity_register(mddev_t *mddev) 1498 { 1499 + mdk_rdev_t *rdev, *reference = NULL; 1500 1501 + if (list_empty(&mddev->disks)) 1502 + return 0; /* nothing to do */ 1503 + if (blk_get_integrity(mddev->gendisk)) 1504 + return 0; /* already registered */ 1505 + list_for_each_entry(rdev, &mddev->disks, same_set) { 1506 + /* skip spares and non-functional disks */ 1507 + if (test_bit(Faulty, &rdev->flags)) 1508 + continue; 1509 + if (rdev->raid_disk < 0) 1510 + continue; 1511 + /* 1512 + * If at least one rdev is not integrity capable, we can not 1513 + * enable data integrity for the md device. 1514 + */ 1515 + if (!bdev_get_integrity(rdev->bdev)) 1516 + return -EINVAL; 1517 + if (!reference) { 1518 + /* Use the first rdev as the reference */ 1519 + reference = rdev; 1520 + continue; 1521 + } 1522 + /* does this rdev's profile match the reference profile? */ 1523 + if (blk_integrity_compare(reference->bdev->bd_disk, 1524 + rdev->bdev->bd_disk) < 0) 1525 + return -EINVAL; 1526 } 1527 + /* 1528 + * All component devices are integrity capable and have matching 1529 + * profiles, register the common profile for the md device. 1530 + */ 1531 + if (blk_integrity_register(mddev->gendisk, 1532 + bdev_get_integrity(reference->bdev)) != 0) { 1533 + printk(KERN_ERR "md: failed to register integrity for %s\n", 1534 + mdname(mddev)); 1535 + return -EINVAL; 1536 } 1537 + printk(KERN_NOTICE "md: data integrity on %s enabled\n", 1538 + mdname(mddev)); 1539 + return 0; 1540 } 1541 + EXPORT_SYMBOL(md_integrity_register); 1542 + 1543 + /* Disable data integrity if non-capable/non-matching disk is being added */ 1544 + void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev) 1545 + { 1546 + struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); 1547 + struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk); 1548 + 1549 + if (!bi_mddev) /* nothing to do */ 1550 + return; 1551 + if (rdev->raid_disk < 0) /* skip spares */ 1552 + return; 1553 + if (bi_rdev && blk_integrity_compare(mddev->gendisk, 1554 + rdev->bdev->bd_disk) >= 0) 1555 + return; 1556 + printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev)); 1557 + blk_integrity_unregister(mddev->gendisk); 1558 + } 1559 + EXPORT_SYMBOL(md_integrity_add_rdev); 1560 1561 static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) 1562 { ··· 1591 /* May as well allow recovery to be retried once */ 1592 mddev->recovery_disabled = 0; 1593 1594 return 0; 1595 1596 fail: ··· 4047 mddev->new_level = pers->level; 4048 } 4049 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 4050 4051 if (mddev->reshape_position != MaxSector && 4052 pers->start_reshape == NULL) {
+2
drivers/md/md.h
··· 431 extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); 432 extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); 433 extern int md_check_no_bitmap(mddev_t *mddev); 434 435 #endif /* _MD_MD_H */
··· 431 extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); 432 extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); 433 extern int md_check_no_bitmap(mddev_t *mddev); 434 + extern int md_integrity_register(mddev_t *mddev); 435 + void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); 436 437 #endif /* _MD_MD_H */
+4 -1
drivers/md/multipath.c
··· 313 set_bit(In_sync, &rdev->flags); 314 rcu_assign_pointer(p->rdev, rdev); 315 err = 0; 316 break; 317 } 318 ··· 346 /* lost the race, try later */ 347 err = -EBUSY; 348 p->rdev = rdev; 349 } 350 } 351 abort: 352 ··· 522 mddev->queue->unplug_fn = multipath_unplug; 523 mddev->queue->backing_dev_info.congested_fn = multipath_congested; 524 mddev->queue->backing_dev_info.congested_data = mddev; 525 - 526 return 0; 527 528 out_free_conf:
··· 313 set_bit(In_sync, &rdev->flags); 314 rcu_assign_pointer(p->rdev, rdev); 315 err = 0; 316 + md_integrity_add_rdev(rdev, mddev); 317 break; 318 } 319 ··· 345 /* lost the race, try later */ 346 err = -EBUSY; 347 p->rdev = rdev; 348 + goto abort; 349 } 350 + md_integrity_register(mddev); 351 } 352 abort: 353 ··· 519 mddev->queue->unplug_fn = multipath_unplug; 520 mddev->queue->backing_dev_info.congested_fn = multipath_congested; 521 mddev->queue->backing_dev_info.congested_data = mddev; 522 + md_integrity_register(mddev); 523 return 0; 524 525 out_free_conf:
+1
drivers/md/raid0.c
··· 351 352 blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); 353 dump_zones(mddev); 354 return 0; 355 } 356
··· 351 352 blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); 353 dump_zones(mddev); 354 + md_integrity_register(mddev); 355 return 0; 356 } 357
+4 -2
drivers/md/raid1.c
··· 1144 rcu_assign_pointer(p->rdev, rdev); 1145 break; 1146 } 1147 - 1148 print_conf(conf); 1149 return err; 1150 } ··· 1178 /* lost the race, try later */ 1179 err = -EBUSY; 1180 p->rdev = rdev; 1181 } 1182 } 1183 abort: 1184 ··· 2069 mddev->queue->unplug_fn = raid1_unplug; 2070 mddev->queue->backing_dev_info.congested_fn = raid1_congested; 2071 mddev->queue->backing_dev_info.congested_data = mddev; 2072 - 2073 return 0; 2074 2075 out_no_mem:
··· 1144 rcu_assign_pointer(p->rdev, rdev); 1145 break; 1146 } 1147 + md_integrity_add_rdev(rdev, mddev); 1148 print_conf(conf); 1149 return err; 1150 } ··· 1178 /* lost the race, try later */ 1179 err = -EBUSY; 1180 p->rdev = rdev; 1181 + goto abort; 1182 } 1183 + md_integrity_register(mddev); 1184 } 1185 abort: 1186 ··· 2067 mddev->queue->unplug_fn = raid1_unplug; 2068 mddev->queue->backing_dev_info.congested_fn = raid1_congested; 2069 mddev->queue->backing_dev_info.congested_data = mddev; 2070 + md_integrity_register(mddev); 2071 return 0; 2072 2073 out_no_mem:
+4
drivers/md/raid10.c
··· 1170 break; 1171 } 1172 1173 print_conf(conf); 1174 return err; 1175 } ··· 1204 /* lost the race, try later */ 1205 err = -EBUSY; 1206 p->rdev = rdev; 1207 } 1208 } 1209 abort: 1210 ··· 2228 2229 if (conf->near_copies < mddev->raid_disks) 2230 blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); 2231 return 0; 2232 2233 out_free_conf:
··· 1170 break; 1171 } 1172 1173 + md_integrity_add_rdev(rdev, mddev); 1174 print_conf(conf); 1175 return err; 1176 } ··· 1203 /* lost the race, try later */ 1204 err = -EBUSY; 1205 p->rdev = rdev; 1206 + goto abort; 1207 } 1208 + md_integrity_register(mddev); 1209 } 1210 abort: 1211 ··· 2225 2226 if (conf->near_copies < mddev->raid_disks) 2227 blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); 2228 + md_integrity_register(mddev); 2229 return 0; 2230 2231 out_free_conf: