Merge tag 'dax-fixes-4.20-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull dax fixes from Dan Williams:
"The last of the known regression fixes and fallout from the Xarray
conversion of the filesystem-dax implementation.

On the path to debugging why the dax memory-failure injection test
started failing after the Xarray conversion a couple more fixes for
the dax_lock_mapping_entry(), now called dax_lock_page(), surfaced.
Those plus the bug that started the hunt are now addressed. These
patches have appeared in a -next release with no issues reported.

Note the touches to mm/memory-failure.c are just the conversion to the
new function signature for dax_lock_page().

Summary:

- Fix the Xarray conversion of fsdax to properly handle
dax_lock_mapping_entry() in the presense of pmd entries

- Fix inode destruction racing a new lock request"

* tag 'dax-fixes-4.20-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
dax: Fix unlock mismatch with updated API
dax: Don't access a freed inode
dax: Check page->mapping isn't NULL

Changed files
+50 -25
fs
include
linux
mm
+38 -17
fs/dax.c
··· 232 } 233 } 234 235 static void put_unlocked_entry(struct xa_state *xas, void *entry) 236 { 237 /* If we were the only waiter woken, wake the next one */ ··· 379 * @page: The page whose entry we want to lock 380 * 381 * Context: Process context. 382 - * Return: %true if the entry was locked or does not need to be locked. 383 */ 384 - bool dax_lock_mapping_entry(struct page *page) 385 { 386 XA_STATE(xas, NULL, 0); 387 void *entry; 388 - bool locked; 389 390 /* Ensure page->mapping isn't freed while we look at it */ 391 rcu_read_lock(); 392 for (;;) { 393 struct address_space *mapping = READ_ONCE(page->mapping); 394 395 - locked = false; 396 - if (!dax_mapping(mapping)) 397 break; 398 399 /* ··· 403 * otherwise we would not have a valid pfn_to_page() 404 * translation. 405 */ 406 - locked = true; 407 if (S_ISCHR(mapping->host->i_mode)) 408 break; 409 ··· 417 entry = xas_load(&xas); 418 if (dax_is_locked(entry)) { 419 rcu_read_unlock(); 420 - entry = get_unlocked_entry(&xas); 421 - xas_unlock_irq(&xas); 422 - put_unlocked_entry(&xas, entry); 423 rcu_read_lock(); 424 continue; 425 } ··· 426 break; 427 } 428 rcu_read_unlock(); 429 - return locked; 430 } 431 432 - void dax_unlock_mapping_entry(struct page *page) 433 { 434 struct address_space *mapping = page->mapping; 435 XA_STATE(xas, &mapping->i_pages, page->index); 436 - void *entry; 437 438 if (S_ISCHR(mapping->host->i_mode)) 439 return; 440 441 - rcu_read_lock(); 442 - entry = xas_load(&xas); 443 - rcu_read_unlock(); 444 - entry = dax_make_entry(page_to_pfn_t(page), dax_is_pmd_entry(entry)); 445 - dax_unlock_entry(&xas, entry); 446 } 447 448 /*
··· 232 } 233 } 234 235 + /* 236 + * The only thing keeping the address space around is the i_pages lock 237 + * (it's cycled in clear_inode() after removing the entries from i_pages) 238 + * After we call xas_unlock_irq(), we cannot touch xas->xa. 239 + */ 240 + static void wait_entry_unlocked(struct xa_state *xas, void *entry) 241 + { 242 + struct wait_exceptional_entry_queue ewait; 243 + wait_queue_head_t *wq; 244 + 245 + init_wait(&ewait.wait); 246 + ewait.wait.func = wake_exceptional_entry_func; 247 + 248 + wq = dax_entry_waitqueue(xas, entry, &ewait.key); 249 + prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); 250 + xas_unlock_irq(xas); 251 + schedule(); 252 + finish_wait(wq, &ewait.wait); 253 + 254 + /* 255 + * Entry lock waits are exclusive. Wake up the next waiter since 256 + * we aren't sure we will acquire the entry lock and thus wake 257 + * the next waiter up on unlock. 258 + */ 259 + if (waitqueue_active(wq)) 260 + __wake_up(wq, TASK_NORMAL, 1, &ewait.key); 261 + } 262 + 263 static void put_unlocked_entry(struct xa_state *xas, void *entry) 264 { 265 /* If we were the only waiter woken, wake the next one */ ··· 351 * @page: The page whose entry we want to lock 352 * 353 * Context: Process context. 354 + * Return: A cookie to pass to dax_unlock_page() or 0 if the entry could 355 + * not be locked. 356 */ 357 + dax_entry_t dax_lock_page(struct page *page) 358 { 359 XA_STATE(xas, NULL, 0); 360 void *entry; 361 362 /* Ensure page->mapping isn't freed while we look at it */ 363 rcu_read_lock(); 364 for (;;) { 365 struct address_space *mapping = READ_ONCE(page->mapping); 366 367 + entry = NULL; 368 + if (!mapping || !dax_mapping(mapping)) 369 break; 370 371 /* ··· 375 * otherwise we would not have a valid pfn_to_page() 376 * translation. 377 */ 378 + entry = (void *)~0UL; 379 if (S_ISCHR(mapping->host->i_mode)) 380 break; 381 ··· 389 entry = xas_load(&xas); 390 if (dax_is_locked(entry)) { 391 rcu_read_unlock(); 392 + wait_entry_unlocked(&xas, entry); 393 rcu_read_lock(); 394 continue; 395 } ··· 400 break; 401 } 402 rcu_read_unlock(); 403 + return (dax_entry_t)entry; 404 } 405 406 + void dax_unlock_page(struct page *page, dax_entry_t cookie) 407 { 408 struct address_space *mapping = page->mapping; 409 XA_STATE(xas, &mapping->i_pages, page->index); 410 411 if (S_ISCHR(mapping->host->i_mode)) 412 return; 413 414 + dax_unlock_entry(&xas, (void *)cookie); 415 } 416 417 /*
+8 -6
include/linux/dax.h
··· 7 #include <linux/radix-tree.h> 8 #include <asm/pgtable.h> 9 10 struct iomap_ops; 11 struct dax_device; 12 struct dax_operations { ··· 90 struct block_device *bdev, struct writeback_control *wbc); 91 92 struct page *dax_layout_busy_page(struct address_space *mapping); 93 - bool dax_lock_mapping_entry(struct page *page); 94 - void dax_unlock_mapping_entry(struct page *page); 95 #else 96 static inline bool bdev_dax_supported(struct block_device *bdev, 97 int blocksize) ··· 124 return -EOPNOTSUPP; 125 } 126 127 - static inline bool dax_lock_mapping_entry(struct page *page) 128 { 129 if (IS_DAX(page->mapping->host)) 130 - return true; 131 - return false; 132 } 133 134 - static inline void dax_unlock_mapping_entry(struct page *page) 135 { 136 } 137 #endif
··· 7 #include <linux/radix-tree.h> 8 #include <asm/pgtable.h> 9 10 + typedef unsigned long dax_entry_t; 11 + 12 struct iomap_ops; 13 struct dax_device; 14 struct dax_operations { ··· 88 struct block_device *bdev, struct writeback_control *wbc); 89 90 struct page *dax_layout_busy_page(struct address_space *mapping); 91 + dax_entry_t dax_lock_page(struct page *page); 92 + void dax_unlock_page(struct page *page, dax_entry_t cookie); 93 #else 94 static inline bool bdev_dax_supported(struct block_device *bdev, 95 int blocksize) ··· 122 return -EOPNOTSUPP; 123 } 124 125 + static inline dax_entry_t dax_lock_page(struct page *page) 126 { 127 if (IS_DAX(page->mapping->host)) 128 + return ~0UL; 129 + return 0; 130 } 131 132 + static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) 133 { 134 } 135 #endif
+4 -2
mm/memory-failure.c
··· 1161 LIST_HEAD(tokill); 1162 int rc = -EBUSY; 1163 loff_t start; 1164 1165 /* 1166 * Prevent the inode from being freed while we are interrogating ··· 1170 * also prevents changes to the mapping of this pfn until 1171 * poison signaling is complete. 1172 */ 1173 - if (!dax_lock_mapping_entry(page)) 1174 goto out; 1175 1176 if (hwpoison_filter(page)) { ··· 1222 kill_procs(&tokill, flags & MF_MUST_KILL, !unmap_success, pfn, flags); 1223 rc = 0; 1224 unlock: 1225 - dax_unlock_mapping_entry(page); 1226 out: 1227 /* drop pgmap ref acquired in caller */ 1228 put_dev_pagemap(pgmap);
··· 1161 LIST_HEAD(tokill); 1162 int rc = -EBUSY; 1163 loff_t start; 1164 + dax_entry_t cookie; 1165 1166 /* 1167 * Prevent the inode from being freed while we are interrogating ··· 1169 * also prevents changes to the mapping of this pfn until 1170 * poison signaling is complete. 1171 */ 1172 + cookie = dax_lock_page(page); 1173 + if (!cookie) 1174 goto out; 1175 1176 if (hwpoison_filter(page)) { ··· 1220 kill_procs(&tokill, flags & MF_MUST_KILL, !unmap_success, pfn, flags); 1221 rc = 0; 1222 unlock: 1223 + dax_unlock_page(page, cookie); 1224 out: 1225 /* drop pgmap ref acquired in caller */ 1226 put_dev_pagemap(pgmap);