Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dax: remove the copy_from_iter and copy_to_iter methods

These methods indirect the actual DAX read/write path. In the end pmem
uses magic flush and mc safe variants and fuse and dcssblk use plain ones
while device mapper picks redirects to the underlying device.

Add set_dax_nocache() and set_dax_nomc() APIs to control which copy
routines are used to remove indirect call from the read/write fast path
as well as a lot of boilerplate code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Vivek Goyal <vgoyal@redhat.com> [virtiofs]
Link: https://lore.kernel.org/r/20211215084508.435401-5-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

authored by

Christoph Hellwig and committed by
Dan Williams
7ac5360c 30c6828a

+41 -237
+2
drivers/dax/bus.c
··· 1330 1330 goto err_alloc_dax; 1331 1331 } 1332 1332 set_dax_synchronous(dax_dev); 1333 + set_dax_nocache(dax_dev); 1334 + set_dax_nomc(dax_dev); 1333 1335 1334 1336 /* a device_dax instance is dead while the driver is not attached */ 1335 1337 kill_dax(dax_dev);
+32 -4
drivers/dax/super.c
··· 105 105 DAXDEV_WRITE_CACHE, 106 106 /* flag to check if device supports synchronous flush */ 107 107 DAXDEV_SYNC, 108 + /* do not leave the caches dirty after writes */ 109 + DAXDEV_NOCACHE, 110 + /* handle CPU fetch exceptions during reads */ 111 + DAXDEV_NOMC, 108 112 }; 109 113 110 114 /** ··· 150 146 if (!dax_alive(dax_dev)) 151 147 return 0; 152 148 153 - return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i); 149 + /* 150 + * The userspace address for the memory copy has already been validated 151 + * via access_ok() in vfs_write, so use the 'no check' version to bypass 152 + * the HARDENED_USERCOPY overhead. 153 + */ 154 + if (test_bit(DAXDEV_NOCACHE, &dax_dev->flags)) 155 + return _copy_from_iter_flushcache(addr, bytes, i); 156 + return _copy_from_iter(addr, bytes, i); 154 157 } 155 - EXPORT_SYMBOL_GPL(dax_copy_from_iter); 156 158 157 159 size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 158 160 size_t bytes, struct iov_iter *i) ··· 166 156 if (!dax_alive(dax_dev)) 167 157 return 0; 168 158 169 - return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i); 159 + /* 160 + * The userspace address for the memory copy has already been validated 161 + * via access_ok() in vfs_red, so use the 'no check' version to bypass 162 + * the HARDENED_USERCOPY overhead. 163 + */ 164 + if (test_bit(DAXDEV_NOMC, &dax_dev->flags)) 165 + return _copy_mc_to_iter(addr, bytes, i); 166 + return _copy_to_iter(addr, bytes, i); 170 167 } 171 - EXPORT_SYMBOL_GPL(dax_copy_to_iter); 172 168 173 169 int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, 174 170 size_t nr_pages) ··· 235 219 set_bit(DAXDEV_SYNC, &dax_dev->flags); 236 220 } 237 221 EXPORT_SYMBOL_GPL(set_dax_synchronous); 222 + 223 + void set_dax_nocache(struct dax_device *dax_dev) 224 + { 225 + set_bit(DAXDEV_NOCACHE, &dax_dev->flags); 226 + } 227 + EXPORT_SYMBOL_GPL(set_dax_nocache); 228 + 229 + void set_dax_nomc(struct dax_device *dax_dev) 230 + { 231 + set_bit(DAXDEV_NOMC, &dax_dev->flags); 232 + } 233 + EXPORT_SYMBOL_GPL(set_dax_nomc); 238 234 239 235 bool dax_alive(struct dax_device *dax_dev) 240 236 {
-20
drivers/md/dm-linear.c
··· 180 180 return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); 181 181 } 182 182 183 - static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, 184 - void *addr, size_t bytes, struct iov_iter *i) 185 - { 186 - struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); 187 - 188 - return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); 189 - } 190 - 191 - static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, 192 - void *addr, size_t bytes, struct iov_iter *i) 193 - { 194 - struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); 195 - 196 - return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); 197 - } 198 - 199 183 static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, 200 184 size_t nr_pages) 201 185 { ··· 190 206 191 207 #else 192 208 #define linear_dax_direct_access NULL 193 - #define linear_dax_copy_from_iter NULL 194 - #define linear_dax_copy_to_iter NULL 195 209 #define linear_dax_zero_page_range NULL 196 210 #endif 197 211 ··· 207 225 .prepare_ioctl = linear_prepare_ioctl, 208 226 .iterate_devices = linear_iterate_devices, 209 227 .direct_access = linear_dax_direct_access, 210 - .dax_copy_from_iter = linear_dax_copy_from_iter, 211 - .dax_copy_to_iter = linear_dax_copy_to_iter, 212 228 .dax_zero_page_range = linear_dax_zero_page_range, 213 229 }; 214 230
-80
drivers/md/dm-log-writes.c
··· 902 902 } 903 903 904 904 #if IS_ENABLED(CONFIG_FS_DAX) 905 - static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes, 906 - struct iov_iter *i) 907 - { 908 - struct pending_block *block; 909 - 910 - if (!bytes) 911 - return 0; 912 - 913 - block = kzalloc(sizeof(struct pending_block), GFP_KERNEL); 914 - if (!block) { 915 - DMERR("Error allocating dax pending block"); 916 - return -ENOMEM; 917 - } 918 - 919 - block->data = kzalloc(bytes, GFP_KERNEL); 920 - if (!block->data) { 921 - DMERR("Error allocating dax data space"); 922 - kfree(block); 923 - return -ENOMEM; 924 - } 925 - 926 - /* write data provided via the iterator */ 927 - if (!copy_from_iter(block->data, bytes, i)) { 928 - DMERR("Error copying dax data"); 929 - kfree(block->data); 930 - kfree(block); 931 - return -EIO; 932 - } 933 - 934 - /* rewind the iterator so that the block driver can use it */ 935 - iov_iter_revert(i, bytes); 936 - 937 - block->datalen = bytes; 938 - block->sector = bio_to_dev_sectors(lc, sector); 939 - block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift; 940 - 941 - atomic_inc(&lc->pending_blocks); 942 - spin_lock_irq(&lc->blocks_lock); 943 - list_add_tail(&block->list, &lc->unflushed_blocks); 944 - spin_unlock_irq(&lc->blocks_lock); 945 - wake_up_process(lc->log_kthread); 946 - 947 - return 0; 948 - } 949 - 950 905 static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti, 951 906 pgoff_t *pgoff) 952 907 { ··· 919 964 return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); 920 965 } 921 966 922 - static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, 923 - pgoff_t pgoff, void *addr, size_t bytes, 924 - struct iov_iter *i) 925 - { 926 - struct log_writes_c *lc = ti->private; 927 - sector_t sector = pgoff * PAGE_SECTORS; 928 - struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); 929 - int err; 930 - 931 - /* Don't bother doing anything if logging has been disabled */ 932 - if (!lc->logging_enabled) 933 - goto dax_copy; 934 - 935 - err = log_dax(lc, sector, bytes, i); 936 - if (err) { 937 - DMWARN("Error %d logging DAX write", err); 938 - return 0; 939 - } 940 - dax_copy: 941 - return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); 942 - } 943 - 944 - static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, 945 - pgoff_t pgoff, void *addr, size_t bytes, 946 - struct iov_iter *i) 947 - { 948 - struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); 949 - 950 - return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); 951 - } 952 - 953 967 static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, 954 968 size_t nr_pages) 955 969 { ··· 929 1005 930 1006 #else 931 1007 #define log_writes_dax_direct_access NULL 932 - #define log_writes_dax_copy_from_iter NULL 933 - #define log_writes_dax_copy_to_iter NULL 934 1008 #define log_writes_dax_zero_page_range NULL 935 1009 #endif 936 1010 ··· 946 1024 .iterate_devices = log_writes_iterate_devices, 947 1025 .io_hints = log_writes_io_hints, 948 1026 .direct_access = log_writes_dax_direct_access, 949 - .dax_copy_from_iter = log_writes_dax_copy_from_iter, 950 - .dax_copy_to_iter = log_writes_dax_copy_to_iter, 951 1027 .dax_zero_page_range = log_writes_dax_zero_page_range, 952 1028 }; 953 1029
-20
drivers/md/dm-stripe.c
··· 324 324 return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); 325 325 } 326 326 327 - static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, 328 - void *addr, size_t bytes, struct iov_iter *i) 329 - { 330 - struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); 331 - 332 - return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); 333 - } 334 - 335 - static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, 336 - void *addr, size_t bytes, struct iov_iter *i) 337 - { 338 - struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); 339 - 340 - return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); 341 - } 342 - 343 327 static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, 344 328 size_t nr_pages) 345 329 { ··· 334 350 335 351 #else 336 352 #define stripe_dax_direct_access NULL 337 - #define stripe_dax_copy_from_iter NULL 338 - #define stripe_dax_copy_to_iter NULL 339 353 #define stripe_dax_zero_page_range NULL 340 354 #endif 341 355 ··· 470 488 .iterate_devices = stripe_iterate_devices, 471 489 .io_hints = stripe_io_hints, 472 490 .direct_access = stripe_dax_direct_access, 473 - .dax_copy_from_iter = stripe_dax_copy_from_iter, 474 - .dax_copy_to_iter = stripe_dax_copy_to_iter, 475 491 .dax_zero_page_range = stripe_dax_zero_page_range, 476 492 }; 477 493
+2 -50
drivers/md/dm.c
··· 1027 1027 return ret; 1028 1028 } 1029 1029 1030 - static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, 1031 - void *addr, size_t bytes, struct iov_iter *i) 1032 - { 1033 - struct mapped_device *md = dax_get_private(dax_dev); 1034 - sector_t sector = pgoff * PAGE_SECTORS; 1035 - struct dm_target *ti; 1036 - long ret = 0; 1037 - int srcu_idx; 1038 - 1039 - ti = dm_dax_get_live_target(md, sector, &srcu_idx); 1040 - 1041 - if (!ti) 1042 - goto out; 1043 - if (!ti->type->dax_copy_from_iter) { 1044 - ret = copy_from_iter(addr, bytes, i); 1045 - goto out; 1046 - } 1047 - ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i); 1048 - out: 1049 - dm_put_live_table(md, srcu_idx); 1050 - 1051 - return ret; 1052 - } 1053 - 1054 - static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, 1055 - void *addr, size_t bytes, struct iov_iter *i) 1056 - { 1057 - struct mapped_device *md = dax_get_private(dax_dev); 1058 - sector_t sector = pgoff * PAGE_SECTORS; 1059 - struct dm_target *ti; 1060 - long ret = 0; 1061 - int srcu_idx; 1062 - 1063 - ti = dm_dax_get_live_target(md, sector, &srcu_idx); 1064 - 1065 - if (!ti) 1066 - goto out; 1067 - if (!ti->type->dax_copy_to_iter) { 1068 - ret = copy_to_iter(addr, bytes, i); 1069 - goto out; 1070 - } 1071 - ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i); 1072 - out: 1073 - dm_put_live_table(md, srcu_idx); 1074 - 1075 - return ret; 1076 - } 1077 - 1078 1030 static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, 1079 1031 size_t nr_pages) 1080 1032 { ··· 1722 1770 md->dax_dev = NULL; 1723 1771 goto bad; 1724 1772 } 1773 + set_dax_nocache(md->dax_dev); 1774 + set_dax_nomc(md->dax_dev); 1725 1775 if (dax_add_host(md->dax_dev, md->disk)) 1726 1776 goto bad; 1727 1777 } ··· 2978 3024 2979 3025 static const struct dax_operations dm_dax_ops = { 2980 3026 .direct_access = dm_dax_direct_access, 2981 - .copy_from_iter = dm_dax_copy_from_iter, 2982 - .copy_to_iter = dm_dax_copy_to_iter, 2983 3027 .zero_page_range = dm_dax_zero_page_range, 2984 3028 }; 2985 3029
+2 -18
drivers/nvdimm/pmem.c
··· 301 301 return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); 302 302 } 303 303 304 - /* 305 - * Bounds checking, both file offset and device offset, is handled by 306 - * dax_iomap_actor() 307 - */ 308 - static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, 309 - void *addr, size_t bytes, struct iov_iter *i) 310 - { 311 - return _copy_from_iter_flushcache(addr, bytes, i); 312 - } 313 - 314 - static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, 315 - void *addr, size_t bytes, struct iov_iter *i) 316 - { 317 - return _copy_mc_to_iter(addr, bytes, i); 318 - } 319 - 320 304 static const struct dax_operations pmem_dax_ops = { 321 305 .direct_access = pmem_dax_direct_access, 322 - .copy_from_iter = pmem_copy_from_iter, 323 - .copy_to_iter = pmem_copy_to_iter, 324 306 .zero_page_range = pmem_dax_zero_page_range, 325 307 }; 326 308 ··· 479 497 rc = PTR_ERR(dax_dev); 480 498 goto out; 481 499 } 500 + set_dax_nocache(dax_dev); 501 + set_dax_nomc(dax_dev); 482 502 if (is_nvdimm_sync(nd_region)) 483 503 set_dax_synchronous(dax_dev); 484 504 rc = dax_add_host(dax_dev, disk);
-14
drivers/s390/block/dcssblk.c
··· 44 44 .release = dcssblk_release, 45 45 }; 46 46 47 - static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev, 48 - pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) 49 - { 50 - return copy_from_iter(addr, bytes, i); 51 - } 52 - 53 - static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev, 54 - pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) 55 - { 56 - return copy_to_iter(addr, bytes, i); 57 - } 58 - 59 47 static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev, 60 48 pgoff_t pgoff, size_t nr_pages) 61 49 { ··· 60 72 61 73 static const struct dax_operations dcssblk_dax_ops = { 62 74 .direct_access = dcssblk_dax_direct_access, 63 - .copy_from_iter = dcssblk_dax_copy_from_iter, 64 - .copy_to_iter = dcssblk_dax_copy_to_iter, 65 75 .zero_page_range = dcssblk_dax_zero_page_range, 66 76 }; 67 77
-5
fs/dax.c
··· 1260 1260 if (map_len > end - pos) 1261 1261 map_len = end - pos; 1262 1262 1263 - /* 1264 - * The userspace address for the memory copy has already been 1265 - * validated via access_ok() in either vfs_read() or 1266 - * vfs_write(), depending on which operation we are doing. 1267 - */ 1268 1263 if (iov_iter_rw(iter) == WRITE) 1269 1264 xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, 1270 1265 map_len, iter);
-16
fs/fuse/virtio_fs.c
··· 753 753 return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 754 754 } 755 755 756 - static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev, 757 - pgoff_t pgoff, void *addr, 758 - size_t bytes, struct iov_iter *i) 759 - { 760 - return copy_from_iter(addr, bytes, i); 761 - } 762 - 763 - static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev, 764 - pgoff_t pgoff, void *addr, 765 - size_t bytes, struct iov_iter *i) 766 - { 767 - return copy_to_iter(addr, bytes, i); 768 - } 769 - 770 756 static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 771 757 pgoff_t pgoff, size_t nr_pages) 772 758 { ··· 769 783 770 784 static const struct dax_operations virtio_fs_dax_ops = { 771 785 .direct_access = virtio_fs_direct_access, 772 - .copy_from_iter = virtio_fs_copy_from_iter, 773 - .copy_to_iter = virtio_fs_copy_to_iter, 774 786 .zero_page_range = virtio_fs_zero_page_range, 775 787 }; 776 788
+3 -6
include/linux/dax.h
··· 28 28 */ 29 29 bool (*dax_supported)(struct dax_device *, struct block_device *, int, 30 30 sector_t, sector_t); 31 - /* copy_from_iter: required operation for fs-dax direct-i/o */ 32 - size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, 33 - struct iov_iter *); 34 - /* copy_to_iter: required operation for fs-dax direct-i/o */ 35 - size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, 36 - struct iov_iter *); 37 31 /* zero_page_range: required operation. Zero page range */ 38 32 int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); 39 33 }; ··· 88 94 return !(vma->vm_flags & VM_SYNC); 89 95 } 90 96 #endif 97 + 98 + void set_dax_nocache(struct dax_device *dax_dev); 99 + void set_dax_nomc(struct dax_device *dax_dev); 91 100 92 101 struct writeback_control; 93 102 #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
-4
include/linux/device-mapper.h
··· 147 147 */ 148 148 typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, 149 149 long nr_pages, void **kaddr, pfn_t *pfn); 150 - typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, 151 - void *addr, size_t bytes, struct iov_iter *i); 152 150 typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, 153 151 size_t nr_pages); 154 152 ··· 198 200 dm_iterate_devices_fn iterate_devices; 199 201 dm_io_hints_fn io_hints; 200 202 dm_dax_direct_access_fn direct_access; 201 - dm_dax_copy_iter_fn dax_copy_from_iter; 202 - dm_dax_copy_iter_fn dax_copy_to_iter; 203 203 dm_dax_zero_page_range_fn dax_zero_page_range; 204 204 205 205 /* For internal device-mapper use. */