dax: remove the pmem_dax_ops->flush abstraction

Commit abebfbe2f731 ("dm: add ->flush() dax operation support") is
buggy. A DM device may be composed of multiple underlying devices and
all of them need to be flushed. That commit just routes the flush
request to the first device and ignores the other devices.

It could be fixed by adding more complex logic to the device mapper. But
there is only one implementation of the method pmem_dax_ops->flush - that
is pmem_dax_flush() - and it calls arch_wb_cache_pmem(). Consequently, we
don't need the pmem_dax_ops->flush abstraction at all, we can call
arch_wb_cache_pmem() directly from dax_flush() because dax_dev->ops->flush
can't ever reach anything different from arch_wb_cache_pmem().

It should be also pointed out that for some uses of persistent memory it
is needed to flush only a very small amount of data (such as 1 cacheline),
and it would be overkill if we go through that device mapper machinery for
a single flushed cache line.

Fix this by removing the pmem_dax_ops->flush abstraction and call
arch_wb_cache_pmem() directly from dax_flush(). Also, remove the device
mapper code that forwards the flushes.

Fixes: abebfbe2f731 ("dm: add ->flush() dax operation support")
Cc: stable@vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>

authored by

Mikulas Patocka and committed by
Mike Snitzer
c3ca015f b5e8ad92

+17 -77
+14 -7
drivers/dax/super.c
··· 189 189 if (!dax_dev) 190 190 return 0; 191 191 192 - if (a == &dev_attr_write_cache.attr && !dax_dev->ops->flush) 192 + #ifndef CONFIG_ARCH_HAS_PMEM_API 193 + if (a == &dev_attr_write_cache.attr) 193 194 return 0; 195 + #endif 194 196 return a->mode; 195 197 } 196 198 ··· 257 255 } 258 256 EXPORT_SYMBOL_GPL(dax_copy_from_iter); 259 257 260 - void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 261 - size_t size) 258 + #ifdef CONFIG_ARCH_HAS_PMEM_API 259 + void arch_wb_cache_pmem(void *addr, size_t size); 260 + void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) 262 261 { 263 - if (!dax_alive(dax_dev)) 262 + if (unlikely(!dax_alive(dax_dev))) 264 263 return; 265 264 266 - if (!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags)) 265 + if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags))) 267 266 return; 268 267 269 - if (dax_dev->ops->flush) 270 - dax_dev->ops->flush(dax_dev, pgoff, addr, size); 268 + arch_wb_cache_pmem(addr, size); 271 269 } 270 + #else 271 + void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) 272 + { 273 + } 274 + #endif 272 275 EXPORT_SYMBOL_GPL(dax_flush); 273 276 274 277 void dax_write_cache(struct dax_device *dax_dev, bool wc)
-15
drivers/md/dm-linear.c
··· 184 184 return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); 185 185 } 186 186 187 - static void linear_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr, 188 - size_t size) 189 - { 190 - struct linear_c *lc = ti->private; 191 - struct block_device *bdev = lc->dev->bdev; 192 - struct dax_device *dax_dev = lc->dev->dax_dev; 193 - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; 194 - 195 - dev_sector = linear_map_sector(ti, sector); 196 - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff)) 197 - return; 198 - dax_flush(dax_dev, pgoff, addr, size); 199 - } 200 - 201 187 static struct target_type linear_target = { 202 188 .name = "linear", 203 189 .version = {1, 4, 0}, ··· 198 212 .iterate_devices = linear_iterate_devices, 199 213 .direct_access = linear_dax_direct_access, 200 214 .dax_copy_from_iter = linear_dax_copy_from_iter, 201 - .dax_flush = linear_dax_flush, 202 215 }; 203 216 204 217 int __init dm_linear_init(void)
-20
drivers/md/dm-stripe.c
··· 351 351 return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); 352 352 } 353 353 354 - static void stripe_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr, 355 - size_t size) 356 - { 357 - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; 358 - struct stripe_c *sc = ti->private; 359 - struct dax_device *dax_dev; 360 - struct block_device *bdev; 361 - uint32_t stripe; 362 - 363 - stripe_map_sector(sc, sector, &stripe, &dev_sector); 364 - dev_sector += sc->stripe[stripe].physical_start; 365 - dax_dev = sc->stripe[stripe].dev->dax_dev; 366 - bdev = sc->stripe[stripe].dev->bdev; 367 - 368 - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff)) 369 - return; 370 - dax_flush(dax_dev, pgoff, addr, size); 371 - } 372 - 373 354 /* 374 355 * Stripe status: 375 356 * ··· 472 491 .io_hints = stripe_io_hints, 473 492 .direct_access = stripe_dax_direct_access, 474 493 .dax_copy_from_iter = stripe_dax_copy_from_iter, 475 - .dax_flush = stripe_dax_flush, 476 494 }; 477 495 478 496 int __init dm_stripe_init(void)
-19
drivers/md/dm.c
··· 987 987 return ret; 988 988 } 989 989 990 - static void dm_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 991 - size_t size) 992 - { 993 - struct mapped_device *md = dax_get_private(dax_dev); 994 - sector_t sector = pgoff * PAGE_SECTORS; 995 - struct dm_target *ti; 996 - int srcu_idx; 997 - 998 - ti = dm_dax_get_live_target(md, sector, &srcu_idx); 999 - 1000 - if (!ti) 1001 - goto out; 1002 - if (ti->type->dax_flush) 1003 - ti->type->dax_flush(ti, pgoff, addr, size); 1004 - out: 1005 - dm_put_live_table(md, srcu_idx); 1006 - } 1007 - 1008 990 /* 1009 991 * A target may call dm_accept_partial_bio only from the map routine. It is 1010 992 * allowed for all bio types except REQ_PREFLUSH. ··· 2974 2992 static const struct dax_operations dm_dax_ops = { 2975 2993 .direct_access = dm_dax_direct_access, 2976 2994 .copy_from_iter = dm_dax_copy_from_iter, 2977 - .flush = dm_dax_flush, 2978 2995 }; 2979 2996 2980 2997 /*
-7
drivers/nvdimm/pmem.c
··· 243 243 return copy_from_iter_flushcache(addr, bytes, i); 244 244 } 245 245 246 - static void pmem_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, 247 - void *addr, size_t size) 248 - { 249 - arch_wb_cache_pmem(addr, size); 250 - } 251 - 252 246 static const struct dax_operations pmem_dax_ops = { 253 247 .direct_access = pmem_dax_direct_access, 254 248 .copy_from_iter = pmem_copy_from_iter, 255 - .flush = pmem_dax_flush, 256 249 }; 257 250 258 251 static const struct attribute_group *pmem_attribute_groups[] = {
+2 -2
fs/dax.c
··· 783 783 } 784 784 785 785 dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn)); 786 - dax_flush(dax_dev, pgoff, kaddr, size); 786 + dax_flush(dax_dev, kaddr, size); 787 787 /* 788 788 * After we have flushed the cache, we can clear the dirty tag. There 789 789 * cannot be new dirty data in the pfn after the flush has completed as ··· 978 978 return rc; 979 979 } 980 980 memset(kaddr + offset, 0, size); 981 - dax_flush(dax_dev, pgoff, kaddr + offset, size); 981 + dax_flush(dax_dev, kaddr + offset, size); 982 982 dax_read_unlock(id); 983 983 } 984 984 return 0;
+1 -4
include/linux/dax.h
··· 19 19 /* copy_from_iter: required operation for fs-dax direct-i/o */ 20 20 size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, 21 21 struct iov_iter *); 22 - /* flush: optional driver-specific cache management after writes */ 23 - void (*flush)(struct dax_device *, pgoff_t, void *, size_t); 24 22 }; 25 23 26 24 extern struct attribute_group dax_attribute_group; ··· 82 84 void **kaddr, pfn_t *pfn); 83 85 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 84 86 size_t bytes, struct iov_iter *i); 85 - void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, 86 - size_t size); 87 + void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); 87 88 void dax_write_cache(struct dax_device *dax_dev, bool wc); 88 89 bool dax_write_cache_enabled(struct dax_device *dax_dev); 89 90
-3
include/linux/device-mapper.h
··· 134 134 long nr_pages, void **kaddr, pfn_t *pfn); 135 135 typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, 136 136 void *addr, size_t bytes, struct iov_iter *i); 137 - typedef void (*dm_dax_flush_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, 138 - size_t size); 139 137 #define PAGE_SECTORS (PAGE_SIZE / 512) 140 138 141 139 void dm_error(const char *message); ··· 184 186 dm_io_hints_fn io_hints; 185 187 dm_dax_direct_access_fn direct_access; 186 188 dm_dax_copy_from_iter_fn dax_copy_from_iter; 187 - dm_dax_flush_fn dax_flush; 188 189 189 190 /* For internal device-mapper use. */ 190 191 struct list_head list;