Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm: add infrastructure for DAX support

Change mapped device to implement direct_access function,
dm_blk_direct_access(), which calls a target direct_access function.
'struct target_type' is extended to have target direct_access interface.
This function limits direct accessible size to the dm_target's limit
with max_io_len().

Add dm_table_supports_dax() to iterate all targets and associated block
devices to check for DAX support. To add DAX support to a DM target the
target must only implement the direct_access function.

Add a new dm type, DM_TYPE_DAX_BIO_BASED, which indicates that mapped
device supports DAX and is bio based. This new type is used to assure
that all target devices have DAX support and remain that way after
QUEUE_FLAG_DAX is set in mapped device.

At initial table load, QUEUE_FLAG_DAX is set to mapped device when setting
DM_TYPE_DAX_BIO_BASED to the type. Any subsequent table load to the
mapped device must have the same type, or else it fails per the check in
table_load().

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>

authored by

Toshi Kani and committed by
Mike Snitzer
545ed20e e9ccb945

+92 -5
+43 -1
drivers/md/dm-table.c
··· 827 827 } 828 828 EXPORT_SYMBOL(dm_consume_args); 829 829 830 + static bool __table_type_bio_based(unsigned table_type) 831 + { 832 + return (table_type == DM_TYPE_BIO_BASED || 833 + table_type == DM_TYPE_DAX_BIO_BASED); 834 + } 835 + 830 836 static bool __table_type_request_based(unsigned table_type) 831 837 { 832 838 return (table_type == DM_TYPE_REQUEST_BASED || ··· 844 838 t->type = type; 845 839 } 846 840 EXPORT_SYMBOL_GPL(dm_table_set_type); 841 + 842 + static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, 843 + sector_t start, sector_t len, void *data) 844 + { 845 + struct request_queue *q = bdev_get_queue(dev->bdev); 846 + 847 + return q && blk_queue_dax(q); 848 + } 849 + 850 + static bool dm_table_supports_dax(struct dm_table *t) 851 + { 852 + struct dm_target *ti; 853 + unsigned i = 0; 854 + 855 + /* Ensure that all targets support DAX. */ 856 + while (i < dm_table_get_num_targets(t)) { 857 + ti = dm_table_get_target(t, i++); 858 + 859 + if (!ti->type->direct_access) 860 + return false; 861 + 862 + if (!ti->type->iterate_devices || 863 + !ti->type->iterate_devices(ti, device_supports_dax, NULL)) 864 + return false; 865 + } 866 + 867 + return true; 868 + } 847 869 848 870 static int dm_table_determine_type(struct dm_table *t) 849 871 { ··· 887 853 /* target already set the table's type */ 888 854 if (t->type == DM_TYPE_BIO_BASED) 889 855 return 0; 856 + BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED); 890 857 goto verify_rq_based; 891 858 } 892 859 ··· 922 887 if (bio_based) { 923 888 /* We must use this table as bio-based */ 924 889 t->type = DM_TYPE_BIO_BASED; 890 + if (dm_table_supports_dax(t)) 891 + t->type = DM_TYPE_DAX_BIO_BASED; 925 892 return 0; 926 893 } 927 894 ··· 1016 979 return NULL; 1017 980 } 1018 981 982 + bool dm_table_bio_based(struct dm_table *t) 983 + { 984 + return __table_type_bio_based(dm_table_get_type(t)); 985 + } 986 + 1019 987 bool dm_table_request_based(struct dm_table *t) 1020 988 { 1021 989 return __table_type_request_based(dm_table_get_type(t)); ··· 1043 1001 return -EINVAL; 1044 1002 } 1045 1003 1046 - if (type == DM_TYPE_BIO_BASED) 1004 + if (__table_type_bio_based(type)) 1047 1005 for (i = 0; i < t->num_targets; i++) { 1048 1006 tgt = t->targets + i; 1049 1007 per_io_data_size = max(per_io_data_size, tgt->per_io_data_size);
+36 -2
drivers/md/dm.c
··· 905 905 } 906 906 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); 907 907 908 + static long dm_blk_direct_access(struct block_device *bdev, sector_t sector, 909 + void __pmem **kaddr, pfn_t *pfn, long size) 910 + { 911 + struct mapped_device *md = bdev->bd_disk->private_data; 912 + struct dm_table *map; 913 + struct dm_target *ti; 914 + int srcu_idx; 915 + long len, ret = -EIO; 916 + 917 + map = dm_get_live_table(md, &srcu_idx); 918 + if (!map) 919 + goto out; 920 + 921 + ti = dm_table_find_target(map, sector); 922 + if (!dm_target_is_valid(ti)) 923 + goto out; 924 + 925 + len = max_io_len(sector, ti) << SECTOR_SHIFT; 926 + size = min(len, size); 927 + 928 + if (ti->type->direct_access) 929 + ret = ti->type->direct_access(ti, sector, kaddr, pfn, size); 930 + out: 931 + dm_put_live_table(md, srcu_idx); 932 + return min(ret, size); 933 + } 934 + 908 935 /* 909 936 * A target may call dm_accept_partial_bio only from the map routine. It is 910 937 * allowed for all bio types except REQ_PREFLUSH. ··· 1575 1548 1576 1549 if (md->bs) { 1577 1550 /* The md already has necessary mempools. */ 1578 - if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) { 1551 + if (dm_table_bio_based(t)) { 1579 1552 /* 1580 1553 * Reload bioset because front_pad may have changed 1581 1554 * because a different table was loaded. ··· 1771 1744 int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) 1772 1745 { 1773 1746 int r; 1747 + unsigned type = dm_get_md_type(md); 1774 1748 1775 - switch (dm_get_md_type(md)) { 1749 + switch (type) { 1776 1750 case DM_TYPE_REQUEST_BASED: 1777 1751 r = dm_old_init_request_queue(md); 1778 1752 if (r) { ··· 1789 1761 } 1790 1762 break; 1791 1763 case DM_TYPE_BIO_BASED: 1764 + case DM_TYPE_DAX_BIO_BASED: 1792 1765 dm_init_normal_md_queue(md); 1793 1766 blk_queue_make_request(md->queue, dm_make_request); 1794 1767 /* ··· 1798 1769 */ 1799 1770 bioset_free(md->queue->bio_split); 1800 1771 md->queue->bio_split = NULL; 1772 + 1773 + if (type == DM_TYPE_DAX_BIO_BASED) 1774 + queue_flag_set_unlocked(QUEUE_FLAG_DAX, md->queue); 1801 1775 break; 1802 1776 } 1803 1777 ··· 2497 2465 2498 2466 switch (type) { 2499 2467 case DM_TYPE_BIO_BASED: 2468 + case DM_TYPE_DAX_BIO_BASED: 2500 2469 cachep = _io_cache; 2501 2470 pool_size = dm_get_reserved_bio_based_ios(); 2502 2471 front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); ··· 2724 2691 .open = dm_blk_open, 2725 2692 .release = dm_blk_close, 2726 2693 .ioctl = dm_blk_ioctl, 2694 + .direct_access = dm_blk_direct_access, 2727 2695 .getgeo = dm_blk_getgeo, 2728 2696 .pr_ops = &dm_pr_ops, 2729 2697 .owner = THIS_MODULE
+1
drivers/md/dm.h
··· 68 68 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); 69 69 struct dm_target *dm_table_get_immutable_target(struct dm_table *t); 70 70 struct dm_target *dm_table_get_wildcard_target(struct dm_table *t); 71 + bool dm_table_bio_based(struct dm_table *t); 71 72 bool dm_table_request_based(struct dm_table *t); 72 73 bool dm_table_all_blk_mq_devices(struct dm_table *t); 73 74 void dm_table_free_md_mempools(struct dm_table *t);
+10
include/linux/device-mapper.h
··· 26 26 #define DM_TYPE_BIO_BASED 1 27 27 #define DM_TYPE_REQUEST_BASED 2 28 28 #define DM_TYPE_MQ_REQUEST_BASED 3 29 + #define DM_TYPE_DAX_BIO_BASED 4 29 30 30 31 typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; 31 32 ··· 125 124 */ 126 125 typedef int (*dm_busy_fn) (struct dm_target *ti); 127 126 127 + /* 128 + * Returns: 129 + * < 0 : error 130 + * >= 0 : the number of bytes accessible at the address 131 + */ 132 + typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, 133 + void __pmem **kaddr, pfn_t *pfn, long size); 134 + 128 135 void dm_error(const char *message); 129 136 130 137 struct dm_dev { ··· 179 170 dm_busy_fn busy; 180 171 dm_iterate_devices_fn iterate_devices; 181 172 dm_io_hints_fn io_hints; 173 + dm_direct_access_fn direct_access; 182 174 183 175 /* For internal device-mapper use. */ 184 176 struct list_head list;
+2 -2
include/uapi/linux/dm-ioctl.h
··· 267 267 #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) 268 268 269 269 #define DM_VERSION_MAJOR 4 270 - #define DM_VERSION_MINOR 34 270 + #define DM_VERSION_MINOR 35 271 271 #define DM_VERSION_PATCHLEVEL 0 272 - #define DM_VERSION_EXTRA "-ioctl (2015-10-28)" 272 + #define DM_VERSION_EXTRA "-ioctl (2016-06-23)" 273 273 274 274 /* Status bits */ 275 275 #define DM_READONLY_FLAG (1 << 0) /* In/Out */