dm: calculate queue limits during resume not load

Currently, device-mapper maintains a separate instance of 'struct
queue_limits' for each table of each device. When the configuration of
a device is to be changed, first its table is loaded and this structure
is populated, then the device is 'resumed' and the calculated
queue_limits are applied.

This places restrictions on how userspace may process related devices,
where it is often advantageous to 'load' tables for several devices
at once before 'resuming' them together. As the new queue_limits
only take effect after the 'resume', if they are changing and one
device uses another, the latter must be 'resumed' before the former
may be 'loaded'.

This patch moves the calculation of these queue_limits out of
the 'load' operation into 'resume'. Since we are no longer
pre-calculating this struct, we no longer need to maintain copies
within our dm structs.

dm_set_device_limits() now passes the 'start' of the device's
data area (aka pe_start) as the 'offset' to blk_stack_limits().

init_valid_queue_limits() is replaced by blk_set_default_limits().

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: martin.petersen@oracle.com
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

authored by

Mike Snitzer and committed by
Alasdair G Kergon
754c5fc7 18d8594d

+117 -95
+102 -83
drivers/md/dm-table.c
··· 62 /* a list of devices used by this table */ 63 struct list_head devices; 64 65 - /* 66 - * These are optimistic limits taken from all the 67 - * targets, some targets will need smaller limits. 68 - */ 69 - struct queue_limits limits; 70 - 71 /* events get handed up using this callback */ 72 void (*event_fn)(void *); 73 void *event_context; ··· 340 /* 341 * If possible, this checks an area of a destination device is valid. 342 */ 343 - static int device_area_is_valid(struct dm_target *ti, struct block_device *bdev, 344 - sector_t start, sector_t len) 345 { 346 - sector_t dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; 347 unsigned short logical_block_size_sectors = 348 - ti->limits.logical_block_size >> SECTOR_SHIFT; 349 char b[BDEVNAME_SIZE]; 350 351 if (!dev_size) 352 return 1; 353 354 - if ((start >= dev_size) || (start + len > dev_size)) { 355 DMWARN("%s: %s too small for target", 356 dm_device_name(ti->table->md), bdevname(bdev, b)); 357 return 0; ··· 368 "logical block size %hu of %s", 369 dm_device_name(ti->table->md), 370 (unsigned long long)start, 371 - ti->limits.logical_block_size, bdevname(bdev, b)); 372 return 0; 373 } 374 375 - if (len & (logical_block_size_sectors - 1)) { 376 DMWARN("%s: len=%llu not aligned to h/w " 377 "logical block size %hu of %s", 378 dm_device_name(ti->table->md), 379 - (unsigned long long)len, 380 - ti->limits.logical_block_size, bdevname(bdev, b)); 381 return 0; 382 } 383 ··· 476 */ 477 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 478 479 - void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) 480 { 481 struct request_queue *q = bdev_get_queue(bdev); 482 char b[BDEVNAME_SIZE]; 483 484 if (unlikely(!q)) { 485 DMWARN("%s: Cannot set limits for nonexistent device %s", 486 dm_device_name(ti->table->md), bdevname(bdev, b)); 487 - return; 488 } 489 490 - if (blk_stack_limits(&ti->limits, &q->limits, 0) < 0) 491 DMWARN("%s: target device %s is misaligned", 492 dm_device_name(ti->table->md), bdevname(bdev, b)); 493 ··· 501 */ 502 503 if (q->merge_bvec_fn && !ti->type->merge) 504 - ti->limits.max_sectors = 505 - min_not_zero(ti->limits.max_sectors, 506 (unsigned int) (PAGE_SIZE >> 9)); 507 } 508 EXPORT_SYMBOL_GPL(dm_set_device_limits); 509 510 int dm_get_device(struct dm_target *ti, const char *path, sector_t start, 511 sector_t len, fmode_t mode, struct dm_dev **result) 512 { 513 - int r = __table_get_device(ti->table, ti, path, 514 - start, len, mode, result); 515 - 516 - if (r) 517 - return r; 518 - 519 - dm_set_device_limits(ti, (*result)->bdev); 520 - 521 - if (!device_area_is_valid(ti, (*result)->bdev, start, len)) { 522 - dm_put_device(ti, *result); 523 - *result = NULL; 524 - return -EINVAL; 525 - } 526 - 527 - return r; 528 } 529 530 /* 531 * Decrement a devices use count and remove it if necessary. ··· 630 return 0; 631 } 632 633 - static void init_valid_queue_limits(struct queue_limits *limits) 634 - { 635 - if (!limits->max_sectors) 636 - limits->max_sectors = SAFE_MAX_SECTORS; 637 - if (!limits->max_hw_sectors) 638 - limits->max_hw_sectors = SAFE_MAX_SECTORS; 639 - if (!limits->max_phys_segments) 640 - limits->max_phys_segments = MAX_PHYS_SEGMENTS; 641 - if (!limits->max_hw_segments) 642 - limits->max_hw_segments = MAX_HW_SEGMENTS; 643 - if (!limits->logical_block_size) 644 - limits->logical_block_size = 1 << SECTOR_SHIFT; 645 - if (!limits->physical_block_size) 646 - limits->physical_block_size = 1 << SECTOR_SHIFT; 647 - if (!limits->io_min) 648 - limits->io_min = 1 << SECTOR_SHIFT; 649 - if (!limits->max_segment_size) 650 - limits->max_segment_size = MAX_SEGMENT_SIZE; 651 - if (!limits->seg_boundary_mask) 652 - limits->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; 653 - if (!limits->bounce_pfn) 654 - limits->bounce_pfn = -1; 655 - /* 656 - * The other fields (alignment_offset, io_opt, misaligned) 657 - * hold 0 from the kzalloc(). 658 - */ 659 - } 660 - 661 /* 662 * Impose necessary and sufficient conditions on a devices's table such 663 * that any incoming bio which respects its logical_block_size can be ··· 637 * two or more targets, the size of each piece it gets split into must 638 * be compatible with the logical_block_size of the target processing it. 639 */ 640 - static int validate_hardware_logical_block_alignment(struct dm_table *table) 641 { 642 /* 643 * This function uses arithmetic modulo the logical_block_size 644 * (in units of 512-byte sectors). 645 */ 646 unsigned short device_logical_block_size_sects = 647 - table->limits.logical_block_size >> SECTOR_SHIFT; 648 649 /* 650 * Offset of the start of the next table entry, mod logical_block_size. ··· 659 unsigned short remaining = 0; 660 661 struct dm_target *uninitialized_var(ti); 662 unsigned i = 0; 663 664 /* ··· 668 while (i < dm_table_get_num_targets(table)) { 669 ti = dm_table_get_target(table, i++); 670 671 /* 672 * If the remaining sectors fall entirely within this 673 * table entry are they compatible with its logical_block_size? 674 */ 675 if (remaining < ti->len && 676 - remaining & ((ti->limits.logical_block_size >> 677 SECTOR_SHIFT) - 1)) 678 break; /* Error */ 679 ··· 693 694 if (remaining) { 695 DMWARN("%s: table line %u (start sect %llu len %llu) " 696 - "not aligned to hardware logical block size %hu", 697 dm_device_name(table->md), i, 698 (unsigned long long) ti->begin, 699 (unsigned long long) ti->len, 700 - table->limits.logical_block_size); 701 return -EINVAL; 702 } 703 ··· 756 757 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; 758 759 - if (blk_stack_limits(&t->limits, &tgt->limits, 0) < 0) 760 - DMWARN("%s: target device (start sect %llu len %llu) " 761 - "is misaligned", 762 - dm_device_name(t->md), 763 - (unsigned long long) tgt->begin, 764 - (unsigned long long) tgt->len); 765 return 0; 766 767 bad: ··· 797 { 798 int r = 0; 799 unsigned int leaf_nodes; 800 - 801 - init_valid_queue_limits(&t->limits); 802 - 803 - r = validate_hardware_logical_block_alignment(t); 804 - if (r) 805 - return r; 806 807 /* how many indexes will the btree have ? */ 808 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); ··· 873 } 874 875 /* 876 * Set the integrity profile for this device if all devices used have 877 * matching profiles. 878 */ ··· 962 return; 963 } 964 965 - void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) 966 { 967 /* 968 * Copy table's limits to the DM device's request_queue 969 */ 970 - q->limits = t->limits; 971 972 - if (t->limits.no_cluster) 973 queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 974 else 975 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
··· 62 /* a list of devices used by this table */ 63 struct list_head devices; 64 65 /* events get handed up using this callback */ 66 void (*event_fn)(void *); 67 void *event_context; ··· 346 /* 347 * If possible, this checks an area of a destination device is valid. 348 */ 349 + static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, 350 + sector_t start, void *data) 351 { 352 + struct queue_limits *limits = data; 353 + struct block_device *bdev = dev->bdev; 354 + sector_t dev_size = 355 + i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; 356 unsigned short logical_block_size_sectors = 357 + limits->logical_block_size >> SECTOR_SHIFT; 358 char b[BDEVNAME_SIZE]; 359 360 if (!dev_size) 361 return 1; 362 363 + if ((start >= dev_size) || (start + ti->len > dev_size)) { 364 DMWARN("%s: %s too small for target", 365 dm_device_name(ti->table->md), bdevname(bdev, b)); 366 return 0; ··· 371 "logical block size %hu of %s", 372 dm_device_name(ti->table->md), 373 (unsigned long long)start, 374 + limits->logical_block_size, bdevname(bdev, b)); 375 return 0; 376 } 377 378 + if (ti->len & (logical_block_size_sectors - 1)) { 379 DMWARN("%s: len=%llu not aligned to h/w " 380 "logical block size %hu of %s", 381 dm_device_name(ti->table->md), 382 + (unsigned long long)ti->len, 383 + limits->logical_block_size, bdevname(bdev, b)); 384 return 0; 385 } 386 ··· 479 */ 480 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 481 482 + int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, 483 + sector_t start, void *data) 484 { 485 + struct queue_limits *limits = data; 486 + struct block_device *bdev = dev->bdev; 487 struct request_queue *q = bdev_get_queue(bdev); 488 char b[BDEVNAME_SIZE]; 489 490 if (unlikely(!q)) { 491 DMWARN("%s: Cannot set limits for nonexistent device %s", 492 dm_device_name(ti->table->md), bdevname(bdev, b)); 493 + return 0; 494 } 495 496 + if (blk_stack_limits(limits, &q->limits, start) < 0) 497 DMWARN("%s: target device %s is misaligned", 498 dm_device_name(ti->table->md), bdevname(bdev, b)); 499 ··· 501 */ 502 503 if (q->merge_bvec_fn && !ti->type->merge) 504 + limits->max_sectors = 505 + min_not_zero(limits->max_sectors, 506 (unsigned int) (PAGE_SIZE >> 9)); 507 + return 0; 508 } 509 EXPORT_SYMBOL_GPL(dm_set_device_limits); 510 511 int dm_get_device(struct dm_target *ti, const char *path, sector_t start, 512 sector_t len, fmode_t mode, struct dm_dev **result) 513 { 514 + return __table_get_device(ti->table, ti, path, 515 + start, len, mode, result); 516 } 517 + 518 519 /* 520 * Decrement a devices use count and remove it if necessary. ··· 641 return 0; 642 } 643 644 /* 645 * Impose necessary and sufficient conditions on a devices's table such 646 * that any incoming bio which respects its logical_block_size can be ··· 676 * two or more targets, the size of each piece it gets split into must 677 * be compatible with the logical_block_size of the target processing it. 678 */ 679 + static int validate_hardware_logical_block_alignment(struct dm_table *table, 680 + struct queue_limits *limits) 681 { 682 /* 683 * This function uses arithmetic modulo the logical_block_size 684 * (in units of 512-byte sectors). 685 */ 686 unsigned short device_logical_block_size_sects = 687 + limits->logical_block_size >> SECTOR_SHIFT; 688 689 /* 690 * Offset of the start of the next table entry, mod logical_block_size. ··· 697 unsigned short remaining = 0; 698 699 struct dm_target *uninitialized_var(ti); 700 + struct queue_limits ti_limits; 701 unsigned i = 0; 702 703 /* ··· 705 while (i < dm_table_get_num_targets(table)) { 706 ti = dm_table_get_target(table, i++); 707 708 + blk_set_default_limits(&ti_limits); 709 + 710 + /* combine all target devices' limits */ 711 + if (ti->type->iterate_devices) 712 + ti->type->iterate_devices(ti, dm_set_device_limits, 713 + &ti_limits); 714 + 715 /* 716 * If the remaining sectors fall entirely within this 717 * table entry are they compatible with its logical_block_size? 718 */ 719 if (remaining < ti->len && 720 + remaining & ((ti_limits.logical_block_size >> 721 SECTOR_SHIFT) - 1)) 722 break; /* Error */ 723 ··· 723 724 if (remaining) { 725 DMWARN("%s: table line %u (start sect %llu len %llu) " 726 + "not aligned to h/w logical block size %hu", 727 dm_device_name(table->md), i, 728 (unsigned long long) ti->begin, 729 (unsigned long long) ti->len, 730 + limits->logical_block_size); 731 return -EINVAL; 732 } 733 ··· 786 787 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; 788 789 return 0; 790 791 bad: ··· 833 { 834 int r = 0; 835 unsigned int leaf_nodes; 836 837 /* how many indexes will the btree have ? */ 838 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); ··· 915 } 916 917 /* 918 + * Establish the new table's queue_limits and validate them. 919 + */ 920 + int dm_calculate_queue_limits(struct dm_table *table, 921 + struct queue_limits *limits) 922 + { 923 + struct dm_target *uninitialized_var(ti); 924 + struct queue_limits ti_limits; 925 + unsigned i = 0; 926 + 927 + blk_set_default_limits(limits); 928 + 929 + while (i < dm_table_get_num_targets(table)) { 930 + blk_set_default_limits(&ti_limits); 931 + 932 + ti = dm_table_get_target(table, i++); 933 + 934 + if (!ti->type->iterate_devices) 935 + goto combine_limits; 936 + 937 + /* 938 + * Combine queue limits of all the devices this target uses. 939 + */ 940 + ti->type->iterate_devices(ti, dm_set_device_limits, 941 + &ti_limits); 942 + 943 + /* 944 + * Check each device area is consistent with the target's 945 + * overall queue limits. 946 + */ 947 + if (!ti->type->iterate_devices(ti, device_area_is_valid, 948 + &ti_limits)) 949 + return -EINVAL; 950 + 951 + combine_limits: 952 + /* 953 + * Merge this target's queue limits into the overall limits 954 + * for the table. 955 + */ 956 + if (blk_stack_limits(limits, &ti_limits, 0) < 0) 957 + DMWARN("%s: target device " 958 + "(start sect %llu len %llu) " 959 + "is misaligned", 960 + dm_device_name(table->md), 961 + (unsigned long long) ti->begin, 962 + (unsigned long long) ti->len); 963 + } 964 + 965 + return validate_hardware_logical_block_alignment(table, limits); 966 + } 967 + 968 + /* 969 * Set the integrity profile for this device if all devices used have 970 * matching profiles. 971 */ ··· 953 return; 954 } 955 956 + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, 957 + struct queue_limits *limits) 958 { 959 + /* 960 + * Each target device in the table has a data area that should normally 961 + * be aligned such that the DM device's alignment_offset is 0. 962 + * FIXME: Propagate alignment_offsets up the stack and warn of 963 + * sub-optimal or inconsistent settings. 964 + */ 965 + limits->alignment_offset = 0; 966 + limits->misaligned = 0; 967 + 968 /* 969 * Copy table's limits to the DM device's request_queue 970 */ 971 + q->limits = *limits; 972 973 + if (limits->no_cluster) 974 queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 975 else 976 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
+9 -3
drivers/md/dm.c
··· 1313 mutex_unlock(&md->bdev->bd_inode->i_mutex); 1314 } 1315 1316 - static int __bind(struct mapped_device *md, struct dm_table *t) 1317 { 1318 struct request_queue *q = md->queue; 1319 sector_t size; ··· 1338 1339 write_lock(&md->map_lock); 1340 md->map = t; 1341 - dm_table_set_restrictions(t, q); 1342 write_unlock(&md->map_lock); 1343 1344 return 0; ··· 1563 */ 1564 int dm_swap_table(struct mapped_device *md, struct dm_table *table) 1565 { 1566 int r = -EINVAL; 1567 1568 mutex_lock(&md->suspend_lock); ··· 1572 if (!dm_suspended(md)) 1573 goto out; 1574 1575 __unbind(md); 1576 - r = __bind(md, table); 1577 1578 out: 1579 mutex_unlock(&md->suspend_lock);
··· 1313 mutex_unlock(&md->bdev->bd_inode->i_mutex); 1314 } 1315 1316 + static int __bind(struct mapped_device *md, struct dm_table *t, 1317 + struct queue_limits *limits) 1318 { 1319 struct request_queue *q = md->queue; 1320 sector_t size; ··· 1337 1338 write_lock(&md->map_lock); 1339 md->map = t; 1340 + dm_table_set_restrictions(t, q, limits); 1341 write_unlock(&md->map_lock); 1342 1343 return 0; ··· 1562 */ 1563 int dm_swap_table(struct mapped_device *md, struct dm_table *table) 1564 { 1565 + struct queue_limits limits; 1566 int r = -EINVAL; 1567 1568 mutex_lock(&md->suspend_lock); ··· 1570 if (!dm_suspended(md)) 1571 goto out; 1572 1573 + r = dm_calculate_queue_limits(table, &limits); 1574 + if (r) 1575 + goto out; 1576 + 1577 __unbind(md); 1578 + r = __bind(md, table, &limits); 1579 1580 out: 1581 mutex_unlock(&md->suspend_lock);
+4 -1
drivers/md/dm.h
··· 41 void (*fn)(void *), void *context); 42 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); 43 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); 44 - void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q); 45 struct list_head *dm_table_get_devices(struct dm_table *t); 46 void dm_table_presuspend_targets(struct dm_table *t); 47 void dm_table_postsuspend_targets(struct dm_table *t);
··· 41 void (*fn)(void *), void *context); 42 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); 43 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); 44 + int dm_calculate_queue_limits(struct dm_table *table, 45 + struct queue_limits *limits); 46 + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, 47 + struct queue_limits *limits); 48 struct list_head *dm_table_get_devices(struct dm_table *t); 49 void dm_table_presuspend_targets(struct dm_table *t); 50 void dm_table_postsuspend_targets(struct dm_table *t);
+2 -8
include/linux/device-mapper.h
··· 103 /* 104 * Combine device limits. 105 */ 106 - void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev); 107 108 struct dm_dev { 109 struct block_device *bdev; ··· 164 sector_t begin; 165 sector_t len; 166 167 - /* FIXME: turn this into a mask, and merge with queue_limits */ 168 /* Always a power of 2 */ 169 sector_t split_io; 170 ··· 176 * to the real underlying devices. 177 */ 178 unsigned num_flush_requests; 179 - 180 - /* 181 - * These are automatically filled in by 182 - * dm_table_get_device. 183 - */ 184 - struct queue_limits limits; 185 186 /* target specific data */ 187 void *private;
··· 103 /* 104 * Combine device limits. 105 */ 106 + int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, 107 + sector_t start, void *data); 108 109 struct dm_dev { 110 struct block_device *bdev; ··· 163 sector_t begin; 164 sector_t len; 165 166 /* Always a power of 2 */ 167 sector_t split_io; 168 ··· 176 * to the real underlying devices. 177 */ 178 unsigned num_flush_requests; 179 180 /* target specific data */ 181 void *private;