fs: move i_sb_list out from under inode_lock

Protect the per-sb inode list with a new global lock
inode_sb_list_lock and use it to protect the list manipulations and
traversals. This lock replaces the inode_lock as the inodes on the
list can be validity checked while holding the inode->i_lock and
hence the inode_lock is no longer needed to protect the list.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

authored by Dave Chinner and committed by Al Viro 55fa6091 f283c86a

+67 -56
+5 -4
fs/drop_caches.c
··· 8 8 #include <linux/writeback.h> 9 9 #include <linux/sysctl.h> 10 10 #include <linux/gfp.h> 11 + #include "internal.h" 11 12 12 13 /* A global variable is a bit ugly, but it keeps the code simple */ 13 14 int sysctl_drop_caches; ··· 17 16 { 18 17 struct inode *inode, *toput_inode = NULL; 19 18 20 - spin_lock(&inode_lock); 19 + spin_lock(&inode_sb_list_lock); 21 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 22 21 spin_lock(&inode->i_lock); 23 22 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || ··· 27 26 } 28 27 __iget(inode); 29 28 spin_unlock(&inode->i_lock); 30 - spin_unlock(&inode_lock); 29 + spin_unlock(&inode_sb_list_lock); 31 30 invalidate_mapping_pages(inode->i_mapping, 0, -1); 32 31 iput(toput_inode); 33 32 toput_inode = inode; 34 - spin_lock(&inode_lock); 33 + spin_lock(&inode_sb_list_lock); 35 34 } 36 - spin_unlock(&inode_lock); 35 + spin_unlock(&inode_sb_list_lock); 37 36 iput(toput_inode); 38 37 } 39 38
+11 -10
fs/fs-writeback.c
··· 1123 1123 */ 1124 1124 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1125 1125 1126 - spin_lock(&inode_lock); 1126 + spin_lock(&inode_sb_list_lock); 1127 1127 1128 1128 /* 1129 1129 * Data integrity sync. Must wait for all pages under writeback, ··· 1143 1143 } 1144 1144 __iget(inode); 1145 1145 spin_unlock(&inode->i_lock); 1146 - spin_unlock(&inode_lock); 1146 + spin_unlock(&inode_sb_list_lock); 1147 + 1147 1148 /* 1148 - * We hold a reference to 'inode' so it couldn't have 1149 - * been removed from s_inodes list while we dropped the 1150 - * inode_lock. We cannot iput the inode now as we can 1151 - * be holding the last reference and we cannot iput it 1152 - * under inode_lock. So we keep the reference and iput 1153 - * it later. 1149 + * We hold a reference to 'inode' so it couldn't have been 1150 + * removed from s_inodes list while we dropped the 1151 + * inode_sb_list_lock. We cannot iput the inode now as we can 1152 + * be holding the last reference and we cannot iput it under 1153 + * inode_sb_list_lock. So we keep the reference and iput it 1154 + * later. 1154 1155 */ 1155 1156 iput(old_inode); 1156 1157 old_inode = inode; ··· 1160 1159 1161 1160 cond_resched(); 1162 1161 1163 - spin_lock(&inode_lock); 1162 + spin_lock(&inode_sb_list_lock); 1164 1163 } 1165 - spin_unlock(&inode_lock); 1164 + spin_unlock(&inode_sb_list_lock); 1166 1165 iput(old_inode); 1167 1166 } 1168 1167
+23 -20
fs/inode.c
··· 34 34 * inode->i_state, inode->i_hash, __iget() 35 35 * inode_lru_lock protects: 36 36 * inode_lru, inode->i_lru 37 + * inode_sb_list_lock protects: 38 + * sb->s_inodes, inode->i_sb_list 37 39 * 38 40 * Lock ordering: 39 41 * inode_lock 42 + * inode->i_lock 43 + * 44 + * inode_sb_list_lock 40 45 * inode->i_lock 41 46 * inode_lru_lock 42 47 */ ··· 103 98 * the i_state of an inode while it is in use.. 104 99 */ 105 100 DEFINE_SPINLOCK(inode_lock); 101 + 102 + __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); 106 103 107 104 /* 108 105 * iprune_sem provides exclusion between the icache shrinking and the ··· 385 378 spin_unlock(&inode_lru_lock); 386 379 } 387 380 388 - static inline void __inode_sb_list_add(struct inode *inode) 389 - { 390 - list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 391 - } 392 - 393 381 /** 394 382 * inode_sb_list_add - add inode to the superblock list of inodes 395 383 * @inode: inode to add 396 384 */ 397 385 void inode_sb_list_add(struct inode *inode) 398 386 { 399 - spin_lock(&inode_lock); 400 - __inode_sb_list_add(inode); 401 - spin_unlock(&inode_lock); 387 + spin_lock(&inode_sb_list_lock); 388 + list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 389 + spin_unlock(&inode_sb_list_lock); 402 390 } 403 391 EXPORT_SYMBOL_GPL(inode_sb_list_add); 404 392 405 - static inline void __inode_sb_list_del(struct inode *inode) 393 + static inline void inode_sb_list_del(struct inode *inode) 406 394 { 395 + spin_lock(&inode_sb_list_lock); 407 396 list_del_init(&inode->i_sb_list); 397 + spin_unlock(&inode_sb_list_lock); 408 398 } 409 399 410 400 static unsigned long hash(struct super_block *sb, unsigned long hashval) ··· 485 481 486 482 spin_lock(&inode_lock); 487 483 list_del_init(&inode->i_wb_list); 488 - __inode_sb_list_del(inode); 489 484 spin_unlock(&inode_lock); 485 + 486 + inode_sb_list_del(inode); 490 487 491 488 if (op->evict_inode) { 492 489 op->evict_inode(inode); ··· 544 539 struct inode *inode, *next; 545 540 LIST_HEAD(dispose); 546 541 547 - spin_lock(&inode_lock); 542 + spin_lock(&inode_sb_list_lock); 548 543 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 549 544 if (atomic_read(&inode->i_count)) 550 545 continue; ··· 560 555 spin_unlock(&inode->i_lock); 561 556 list_add(&inode->i_lru, &dispose); 562 557 } 563 - spin_unlock(&inode_lock); 558 + spin_unlock(&inode_sb_list_lock); 564 559 565 560 dispose_list(&dispose); 566 561 ··· 589 584 struct inode *inode, *next; 590 585 LIST_HEAD(dispose); 591 586 592 - spin_lock(&inode_lock); 587 + spin_lock(&inode_sb_list_lock); 593 588 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 594 589 spin_lock(&inode->i_lock); 595 590 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { ··· 612 607 spin_unlock(&inode->i_lock); 613 608 list_add(&inode->i_lru, &dispose); 614 609 } 615 - spin_unlock(&inode_lock); 610 + spin_unlock(&inode_sb_list_lock); 616 611 617 612 dispose_list(&dispose); 618 613 ··· 872 867 { 873 868 struct inode *inode; 874 869 875 - spin_lock_prefetch(&inode_lock); 870 + spin_lock_prefetch(&inode_sb_list_lock); 876 871 877 872 inode = alloc_inode(sb); 878 873 if (inode) { 879 - spin_lock(&inode_lock); 880 874 spin_lock(&inode->i_lock); 881 875 inode->i_state = 0; 882 876 spin_unlock(&inode->i_lock); 883 - __inode_sb_list_add(inode); 884 - spin_unlock(&inode_lock); 877 + inode_sb_list_add(inode); 885 878 } 886 879 return inode; 887 880 } ··· 948 945 inode->i_state = I_NEW; 949 946 hlist_add_head(&inode->i_hash, head); 950 947 spin_unlock(&inode->i_lock); 951 - __inode_sb_list_add(inode); 948 + inode_sb_list_add(inode); 952 949 spin_unlock(&inode_lock); 953 950 954 951 /* Return the locked inode with I_NEW set, the ··· 997 994 inode->i_state = I_NEW; 998 995 hlist_add_head(&inode->i_hash, head); 999 996 spin_unlock(&inode->i_lock); 1000 - __inode_sb_list_add(inode); 997 + inode_sb_list_add(inode); 1001 998 spin_unlock(&inode_lock); 1002 999 1003 1000 /* Return the locked inode with I_NEW set, the
+2
fs/internal.h
··· 125 125 /* 126 126 * inode.c 127 127 */ 128 + extern spinlock_t inode_sb_list_lock; 129 + 128 130 extern int get_nr_dirty_inodes(void); 129 131 extern void evict_inodes(struct super_block *); 130 132 extern int invalidate_inodes(struct super_block *, bool);
+10 -10
fs/notify/inode_mark.c
··· 29 29 #include <linux/fsnotify_backend.h> 30 30 #include "fsnotify.h" 31 31 32 + #include "../internal.h" 33 + 32 34 /* 33 35 * Recalculate the mask of events relevant to a given inode locked. 34 36 */ ··· 239 237 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. 240 238 * @list: list of inodes being unmounted (sb->s_inodes) 241 239 * 242 - * Called with inode_lock held, protecting the unmounting super block's list 243 - * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. 244 - * We temporarily drop inode_lock, however, and CAN block. 240 + * Called during unmount with no locks held, so needs to be safe against 241 + * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block. 245 242 */ 246 243 void fsnotify_unmount_inodes(struct list_head *list) 247 244 { 248 245 struct inode *inode, *next_i, *need_iput = NULL; 249 246 250 - spin_lock(&inode_lock); 247 + spin_lock(&inode_sb_list_lock); 251 248 list_for_each_entry_safe(inode, next_i, list, i_sb_list) { 252 249 struct inode *need_iput_tmp; 253 250 ··· 294 293 } 295 294 296 295 /* 297 - * We can safely drop inode_lock here because we hold 296 + * We can safely drop inode_sb_list_lock here because we hold 298 297 * references on both inode and next_i. Also no new inodes 299 - * will be added since the umount has begun. Finally, 300 - * iprune_mutex keeps shrink_icache_memory() away. 298 + * will be added since the umount has begun. 301 299 */ 302 - spin_unlock(&inode_lock); 300 + spin_unlock(&inode_sb_list_lock); 303 301 304 302 if (need_iput_tmp) 305 303 iput(need_iput_tmp); ··· 310 310 311 311 iput(inode); 312 312 313 - spin_lock(&inode_lock); 313 + spin_lock(&inode_sb_list_lock); 314 314 } 315 - spin_unlock(&inode_lock); 315 + spin_unlock(&inode_sb_list_lock); 316 316 }
+16 -12
fs/quota/dquot.c
··· 76 76 #include <linux/buffer_head.h> 77 77 #include <linux/capability.h> 78 78 #include <linux/quotaops.h> 79 - #include <linux/writeback.h> /* for inode_lock, oddly enough.. */ 79 + #include "../internal.h" /* ugh */ 80 80 81 81 #include <asm/uaccess.h> 82 82 ··· 900 900 int reserved = 0; 901 901 #endif 902 902 903 - spin_lock(&inode_lock); 903 + spin_lock(&inode_sb_list_lock); 904 904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 905 905 spin_lock(&inode->i_lock); 906 906 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || ··· 915 915 #endif 916 916 __iget(inode); 917 917 spin_unlock(&inode->i_lock); 918 - spin_unlock(&inode_lock); 918 + spin_unlock(&inode_sb_list_lock); 919 919 920 920 iput(old_inode); 921 921 __dquot_initialize(inode, type); 922 - /* We hold a reference to 'inode' so it couldn't have been 923 - * removed from s_inodes list while we dropped the inode_lock. 924 - * We cannot iput the inode now as we can be holding the last 925 - * reference and we cannot iput it under inode_lock. So we 926 - * keep the reference and iput it later. */ 922 + 923 + /* 924 + * We hold a reference to 'inode' so it couldn't have been 925 + * removed from s_inodes list while we dropped the 926 + * inode_sb_list_lock We cannot iput the inode now as we can be 927 + * holding the last reference and we cannot iput it under 928 + * inode_sb_list_lock. So we keep the reference and iput it 929 + * later. 930 + */ 927 931 old_inode = inode; 928 - spin_lock(&inode_lock); 932 + spin_lock(&inode_sb_list_lock); 929 933 } 930 - spin_unlock(&inode_lock); 934 + spin_unlock(&inode_sb_list_lock); 931 935 iput(old_inode); 932 936 933 937 #ifdef CONFIG_QUOTA_DEBUG ··· 1012 1008 struct inode *inode; 1013 1009 int reserved = 0; 1014 1010 1015 - spin_lock(&inode_lock); 1011 + spin_lock(&inode_sb_list_lock); 1016 1012 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1017 1013 /* 1018 1014 * We have to scan also I_NEW inodes because they can already ··· 1026 1022 remove_inode_dquot_ref(inode, type, tofree_head); 1027 1023 } 1028 1024 } 1029 - spin_unlock(&inode_lock); 1025 + spin_unlock(&inode_sb_list_lock); 1030 1026 #ifdef CONFIG_QUOTA_DEBUG 1031 1027 if (reserved) { 1032 1028 printk(KERN_WARNING "VFS (%s): Writes happened after quota"