fs: remove inode_lock from iput_final and prune_icache

Now that inode state changes are protected by the inode->i_lock and
the inode LRU manipulations by the inode_lru_lock, we can remove the
inode_lock from prune_icache and the initial part of iput_final().

instead of using the inode_lock to protect the inode during
iput_final, use the inode->i_lock instead. This protects the inode
against new references being taken while we change the inode state
to I_FREEING, as well as preventing prune_icache from grabbing the
inode while we are manipulating it. Hence we no longer need the
inode_lock in iput_final prior to setting I_FREEING on the inode.

For prune_icache, we no longer need the inode_lock to protect the
LRU list, and the inodes themselves are protected against freeing
races by the inode->i_lock. Hence we can lift the inode_lock from
prune_icache as well.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

authored by Dave Chinner and committed by Al Viro f283c86a 02afc410

+17 -22
+1 -1
Documentation/filesystems/Locking
··· 128 destroy_inode: 129 dirty_inode: (must not sleep) 130 write_inode: 131 - drop_inode: !!!inode_lock!!! 132 evict_inode: 133 put_super: write 134 write_super: read
··· 128 destroy_inode: 129 dirty_inode: (must not sleep) 130 write_inode: 131 + drop_inode: !!!inode->i_lock!!! 132 evict_inode: 133 put_super: write 134 write_super: read
+11 -5
Documentation/filesystems/porting
··· 298 remaining links or not. Caller does *not* evict the pagecache or inode-associated 299 metadata buffers; getting rid of those is responsibility of method, as it had 300 been for ->delete_inode(). 301 - ->drop_inode() returns int now; it's called on final iput() with inode_lock 302 - held and it returns true if filesystems wants the inode to be dropped. As before, 303 - generic_drop_inode() is still the default and it's been updated appropriately. 304 - generic_delete_inode() is also alive and it consists simply of return 1. Note that 305 - all actual eviction work is done by caller after ->drop_inode() returns. 306 clear_inode() is gone; use end_writeback() instead. As before, it must 307 be called exactly once on each call of ->evict_inode() (as it used to be for 308 each call of ->delete_inode()). Unlike before, if you are using inode-associated ··· 397 Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set, 398 so the i_size should not change when hole punching, even when puching the end of 399 a file off. 400 401 -- 402 [mandatory]
··· 298 remaining links or not. Caller does *not* evict the pagecache or inode-associated 299 metadata buffers; getting rid of those is responsibility of method, as it had 300 been for ->delete_inode(). 301 + 302 + ->drop_inode() returns int now; it's called on final iput() with 303 + inode->i_lock held and it returns true if filesystems wants the inode to be 304 + dropped. As before, generic_drop_inode() is still the default and it's been 305 + updated appropriately. generic_delete_inode() is also alive and it consists 306 + simply of return 1. Note that all actual eviction work is done by caller after 307 + ->drop_inode() returns. 308 + 309 clear_inode() is gone; use end_writeback() instead. As before, it must 310 be called exactly once on each call of ->evict_inode() (as it used to be for 311 each call of ->delete_inode()). Unlike before, if you are using inode-associated ··· 394 Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set, 395 so the i_size should not change when hole punching, even when puching the end of 396 a file off. 397 + 398 + -- 399 + [mandatory] 400 401 -- 402 [mandatory]
+1 -1
Documentation/filesystems/vfs.txt
··· 254 should be synchronous or not, not all filesystems check this flag. 255 256 drop_inode: called when the last access to the inode is dropped, 257 - with the inode_lock spinlock held. 258 259 This method should be either NULL (normal UNIX filesystem 260 semantics) or "generic_delete_inode" (for filesystems that do not
··· 254 should be synchronous or not, not all filesystems check this flag. 255 256 drop_inode: called when the last access to the inode is dropped, 257 + with the inode->i_lock spinlock held. 258 259 This method should be either NULL (normal UNIX filesystem 260 semantics) or "generic_delete_inode" (for filesystems that do not
+3 -14
fs/inode.c
··· 650 unsigned long reap = 0; 651 652 down_read(&iprune_sem); 653 - spin_lock(&inode_lock); 654 spin_lock(&inode_lru_lock); 655 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 656 struct inode *inode; ··· 675 */ 676 if (atomic_read(&inode->i_count) || 677 (inode->i_state & ~I_REFERENCED)) { 678 - spin_unlock(&inode->i_lock); 679 list_del_init(&inode->i_lru); 680 inodes_stat.nr_unused--; 681 continue; 682 } ··· 684 /* recently referenced inodes get one more pass */ 685 if (inode->i_state & I_REFERENCED) { 686 inode->i_state &= ~I_REFERENCED; 687 - spin_unlock(&inode->i_lock); 688 list_move(&inode->i_lru, &inode_lru); 689 continue; 690 } 691 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 692 __iget(inode); 693 spin_unlock(&inode->i_lock); 694 spin_unlock(&inode_lru_lock); 695 - spin_unlock(&inode_lock); 696 if (remove_inode_buffers(inode)) 697 reap += invalidate_mapping_pages(&inode->i_data, 698 0, -1); 699 iput(inode); 700 - spin_lock(&inode_lock); 701 spin_lock(&inode_lru_lock); 702 703 if (inode != list_entry(inode_lru.next, ··· 721 else 722 __count_vm_events(PGINODESTEAL, reap); 723 spin_unlock(&inode_lru_lock); 724 - spin_unlock(&inode_lock); 725 726 dispose_list(&freeable); 727 up_read(&iprune_sem); ··· 1078 1079 struct inode *igrab(struct inode *inode) 1080 { 1081 - spin_lock(&inode_lock); 1082 spin_lock(&inode->i_lock); 1083 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { 1084 __iget(inode); ··· 1091 */ 1092 inode = NULL; 1093 } 1094 - spin_unlock(&inode_lock); 1095 return inode; 1096 } 1097 EXPORT_SYMBOL(igrab); ··· 1433 const struct super_operations *op = inode->i_sb->s_op; 1434 int drop; 1435 1436 - spin_lock(&inode->i_lock); 1437 WARN_ON(inode->i_state & I_NEW); 1438 1439 if (op && op->drop_inode) ··· 1445 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1446 inode_lru_list_add(inode); 1447 spin_unlock(&inode->i_lock); 1448 - spin_unlock(&inode_lock); 1449 return; 1450 } 1451 1452 if (!drop) { 1453 inode->i_state |= I_WILL_FREE; 1454 spin_unlock(&inode->i_lock); 1455 - spin_unlock(&inode_lock); 1456 write_inode_now(inode, 1); 1457 - spin_lock(&inode_lock); 1458 spin_lock(&inode->i_lock); 1459 WARN_ON(inode->i_state & I_NEW); 1460 inode->i_state &= ~I_WILL_FREE; ··· 1460 inode->i_state |= I_FREEING; 1461 inode_lru_list_del(inode); 1462 spin_unlock(&inode->i_lock); 1463 - spin_unlock(&inode_lock); 1464 1465 evict(inode); 1466 } ··· 1478 if (inode) { 1479 BUG_ON(inode->i_state & I_CLEAR); 1480 1481 - if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1482 iput_final(inode); 1483 } 1484 }
··· 650 unsigned long reap = 0; 651 652 down_read(&iprune_sem); 653 spin_lock(&inode_lru_lock); 654 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 655 struct inode *inode; ··· 676 */ 677 if (atomic_read(&inode->i_count) || 678 (inode->i_state & ~I_REFERENCED)) { 679 list_del_init(&inode->i_lru); 680 + spin_unlock(&inode->i_lock); 681 inodes_stat.nr_unused--; 682 continue; 683 } ··· 685 /* recently referenced inodes get one more pass */ 686 if (inode->i_state & I_REFERENCED) { 687 inode->i_state &= ~I_REFERENCED; 688 list_move(&inode->i_lru, &inode_lru); 689 + spin_unlock(&inode->i_lock); 690 continue; 691 } 692 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 693 __iget(inode); 694 spin_unlock(&inode->i_lock); 695 spin_unlock(&inode_lru_lock); 696 if (remove_inode_buffers(inode)) 697 reap += invalidate_mapping_pages(&inode->i_data, 698 0, -1); 699 iput(inode); 700 spin_lock(&inode_lru_lock); 701 702 if (inode != list_entry(inode_lru.next, ··· 724 else 725 __count_vm_events(PGINODESTEAL, reap); 726 spin_unlock(&inode_lru_lock); 727 728 dispose_list(&freeable); 729 up_read(&iprune_sem); ··· 1082 1083 struct inode *igrab(struct inode *inode) 1084 { 1085 spin_lock(&inode->i_lock); 1086 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { 1087 __iget(inode); ··· 1096 */ 1097 inode = NULL; 1098 } 1099 return inode; 1100 } 1101 EXPORT_SYMBOL(igrab); ··· 1439 const struct super_operations *op = inode->i_sb->s_op; 1440 int drop; 1441 1442 WARN_ON(inode->i_state & I_NEW); 1443 1444 if (op && op->drop_inode) ··· 1452 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1453 inode_lru_list_add(inode); 1454 spin_unlock(&inode->i_lock); 1455 return; 1456 } 1457 1458 if (!drop) { 1459 inode->i_state |= I_WILL_FREE; 1460 spin_unlock(&inode->i_lock); 1461 write_inode_now(inode, 1); 1462 spin_lock(&inode->i_lock); 1463 WARN_ON(inode->i_state & I_NEW); 1464 inode->i_state &= ~I_WILL_FREE; ··· 1470 inode->i_state |= I_FREEING; 1471 inode_lru_list_del(inode); 1472 spin_unlock(&inode->i_lock); 1473 1474 evict(inode); 1475 } ··· 1489 if (inode) { 1490 BUG_ON(inode->i_state & I_CLEAR); 1491 1492 + if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) 1493 iput_final(inode); 1494 } 1495 }
+1 -1
fs/logfs/inode.c
··· 293 return ret; 294 } 295 296 - /* called with inode_lock held */ 297 static int logfs_drop_inode(struct inode *inode) 298 { 299 struct logfs_super *super = logfs_super(inode->i_sb);
··· 293 return ret; 294 } 295 296 + /* called with inode->i_lock held */ 297 static int logfs_drop_inode(struct inode *inode) 298 { 299 struct logfs_super *super = logfs_super(inode->i_sb);