fs: move i_sb_list out from under inode_lock

Protect the per-sb inode list with a new global lock
inode_sb_list_lock and use it to protect the list manipulations and
traversals. This lock replaces the inode_lock as the inodes on the
list can be validity checked while holding the inode->i_lock and
hence the inode_lock is no longer needed to protect the list.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

authored by Dave Chinner and committed by Al Viro 55fa6091 f283c86a

+67 -56
+5 -4
fs/drop_caches.c
··· 8 #include <linux/writeback.h> 9 #include <linux/sysctl.h> 10 #include <linux/gfp.h> 11 12 /* A global variable is a bit ugly, but it keeps the code simple */ 13 int sysctl_drop_caches; ··· 17 { 18 struct inode *inode, *toput_inode = NULL; 19 20 - spin_lock(&inode_lock); 21 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 22 spin_lock(&inode->i_lock); 23 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || ··· 27 } 28 __iget(inode); 29 spin_unlock(&inode->i_lock); 30 - spin_unlock(&inode_lock); 31 invalidate_mapping_pages(inode->i_mapping, 0, -1); 32 iput(toput_inode); 33 toput_inode = inode; 34 - spin_lock(&inode_lock); 35 } 36 - spin_unlock(&inode_lock); 37 iput(toput_inode); 38 } 39
··· 8 #include <linux/writeback.h> 9 #include <linux/sysctl.h> 10 #include <linux/gfp.h> 11 + #include "internal.h" 12 13 /* A global variable is a bit ugly, but it keeps the code simple */ 14 int sysctl_drop_caches; ··· 16 { 17 struct inode *inode, *toput_inode = NULL; 18 19 + spin_lock(&inode_sb_list_lock); 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 21 spin_lock(&inode->i_lock); 22 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || ··· 26 } 27 __iget(inode); 28 spin_unlock(&inode->i_lock); 29 + spin_unlock(&inode_sb_list_lock); 30 invalidate_mapping_pages(inode->i_mapping, 0, -1); 31 iput(toput_inode); 32 toput_inode = inode; 33 + spin_lock(&inode_sb_list_lock); 34 } 35 + spin_unlock(&inode_sb_list_lock); 36 iput(toput_inode); 37 } 38
+11 -10
fs/fs-writeback.c
··· 1123 */ 1124 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1125 1126 - spin_lock(&inode_lock); 1127 1128 /* 1129 * Data integrity sync. Must wait for all pages under writeback, ··· 1143 } 1144 __iget(inode); 1145 spin_unlock(&inode->i_lock); 1146 - spin_unlock(&inode_lock); 1147 /* 1148 - * We hold a reference to 'inode' so it couldn't have 1149 - * been removed from s_inodes list while we dropped the 1150 - * inode_lock. We cannot iput the inode now as we can 1151 - * be holding the last reference and we cannot iput it 1152 - * under inode_lock. So we keep the reference and iput 1153 - * it later. 1154 */ 1155 iput(old_inode); 1156 old_inode = inode; ··· 1160 1161 cond_resched(); 1162 1163 - spin_lock(&inode_lock); 1164 } 1165 - spin_unlock(&inode_lock); 1166 iput(old_inode); 1167 } 1168
··· 1123 */ 1124 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1125 1126 + spin_lock(&inode_sb_list_lock); 1127 1128 /* 1129 * Data integrity sync. Must wait for all pages under writeback, ··· 1143 } 1144 __iget(inode); 1145 spin_unlock(&inode->i_lock); 1146 + spin_unlock(&inode_sb_list_lock); 1147 + 1148 /* 1149 + * We hold a reference to 'inode' so it couldn't have been 1150 + * removed from s_inodes list while we dropped the 1151 + * inode_sb_list_lock. We cannot iput the inode now as we can 1152 + * be holding the last reference and we cannot iput it under 1153 + * inode_sb_list_lock. So we keep the reference and iput it 1154 + * later. 1155 */ 1156 iput(old_inode); 1157 old_inode = inode; ··· 1159 1160 cond_resched(); 1161 1162 + spin_lock(&inode_sb_list_lock); 1163 } 1164 + spin_unlock(&inode_sb_list_lock); 1165 iput(old_inode); 1166 } 1167
+23 -20
fs/inode.c
··· 34 * inode->i_state, inode->i_hash, __iget() 35 * inode_lru_lock protects: 36 * inode_lru, inode->i_lru 37 * 38 * Lock ordering: 39 * inode_lock 40 * inode->i_lock 41 * inode_lru_lock 42 */ ··· 103 * the i_state of an inode while it is in use.. 104 */ 105 DEFINE_SPINLOCK(inode_lock); 106 107 /* 108 * iprune_sem provides exclusion between the icache shrinking and the ··· 385 spin_unlock(&inode_lru_lock); 386 } 387 388 - static inline void __inode_sb_list_add(struct inode *inode) 389 - { 390 - list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 391 - } 392 - 393 /** 394 * inode_sb_list_add - add inode to the superblock list of inodes 395 * @inode: inode to add 396 */ 397 void inode_sb_list_add(struct inode *inode) 398 { 399 - spin_lock(&inode_lock); 400 - __inode_sb_list_add(inode); 401 - spin_unlock(&inode_lock); 402 } 403 EXPORT_SYMBOL_GPL(inode_sb_list_add); 404 405 - static inline void __inode_sb_list_del(struct inode *inode) 406 { 407 list_del_init(&inode->i_sb_list); 408 } 409 410 static unsigned long hash(struct super_block *sb, unsigned long hashval) ··· 485 486 spin_lock(&inode_lock); 487 list_del_init(&inode->i_wb_list); 488 - __inode_sb_list_del(inode); 489 spin_unlock(&inode_lock); 490 491 if (op->evict_inode) { 492 op->evict_inode(inode); ··· 544 struct inode *inode, *next; 545 LIST_HEAD(dispose); 546 547 - spin_lock(&inode_lock); 548 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 549 if (atomic_read(&inode->i_count)) 550 continue; ··· 560 spin_unlock(&inode->i_lock); 561 list_add(&inode->i_lru, &dispose); 562 } 563 - spin_unlock(&inode_lock); 564 565 dispose_list(&dispose); 566 ··· 589 struct inode *inode, *next; 590 LIST_HEAD(dispose); 591 592 - spin_lock(&inode_lock); 593 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 594 spin_lock(&inode->i_lock); 595 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { ··· 612 spin_unlock(&inode->i_lock); 613 list_add(&inode->i_lru, &dispose); 614 } 615 - spin_unlock(&inode_lock); 616 617 dispose_list(&dispose); 618 ··· 872 { 873 struct inode *inode; 874 875 - spin_lock_prefetch(&inode_lock); 876 877 inode = alloc_inode(sb); 878 if (inode) { 879 - spin_lock(&inode_lock); 880 spin_lock(&inode->i_lock); 881 inode->i_state = 0; 882 spin_unlock(&inode->i_lock); 883 - __inode_sb_list_add(inode); 884 - spin_unlock(&inode_lock); 885 } 886 return inode; 887 } ··· 948 inode->i_state = I_NEW; 949 hlist_add_head(&inode->i_hash, head); 950 spin_unlock(&inode->i_lock); 951 - __inode_sb_list_add(inode); 952 spin_unlock(&inode_lock); 953 954 /* Return the locked inode with I_NEW set, the ··· 997 inode->i_state = I_NEW; 998 hlist_add_head(&inode->i_hash, head); 999 spin_unlock(&inode->i_lock); 1000 - __inode_sb_list_add(inode); 1001 spin_unlock(&inode_lock); 1002 1003 /* Return the locked inode with I_NEW set, the
··· 34 * inode->i_state, inode->i_hash, __iget() 35 * inode_lru_lock protects: 36 * inode_lru, inode->i_lru 37 + * inode_sb_list_lock protects: 38 + * sb->s_inodes, inode->i_sb_list 39 * 40 * Lock ordering: 41 * inode_lock 42 + * inode->i_lock 43 + * 44 + * inode_sb_list_lock 45 * inode->i_lock 46 * inode_lru_lock 47 */ ··· 98 * the i_state of an inode while it is in use.. 99 */ 100 DEFINE_SPINLOCK(inode_lock); 101 + 102 + __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); 103 104 /* 105 * iprune_sem provides exclusion between the icache shrinking and the ··· 378 spin_unlock(&inode_lru_lock); 379 } 380 381 /** 382 * inode_sb_list_add - add inode to the superblock list of inodes 383 * @inode: inode to add 384 */ 385 void inode_sb_list_add(struct inode *inode) 386 { 387 + spin_lock(&inode_sb_list_lock); 388 + list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 389 + spin_unlock(&inode_sb_list_lock); 390 } 391 EXPORT_SYMBOL_GPL(inode_sb_list_add); 392 393 + static inline void inode_sb_list_del(struct inode *inode) 394 { 395 + spin_lock(&inode_sb_list_lock); 396 list_del_init(&inode->i_sb_list); 397 + spin_unlock(&inode_sb_list_lock); 398 } 399 400 static unsigned long hash(struct super_block *sb, unsigned long hashval) ··· 481 482 spin_lock(&inode_lock); 483 list_del_init(&inode->i_wb_list); 484 spin_unlock(&inode_lock); 485 + 486 + inode_sb_list_del(inode); 487 488 if (op->evict_inode) { 489 op->evict_inode(inode); ··· 539 struct inode *inode, *next; 540 LIST_HEAD(dispose); 541 542 + spin_lock(&inode_sb_list_lock); 543 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 544 if (atomic_read(&inode->i_count)) 545 continue; ··· 555 spin_unlock(&inode->i_lock); 556 list_add(&inode->i_lru, &dispose); 557 } 558 + spin_unlock(&inode_sb_list_lock); 559 560 dispose_list(&dispose); 561 ··· 584 struct inode *inode, *next; 585 LIST_HEAD(dispose); 586 587 + spin_lock(&inode_sb_list_lock); 588 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 589 spin_lock(&inode->i_lock); 590 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { ··· 607 spin_unlock(&inode->i_lock); 608 list_add(&inode->i_lru, &dispose); 609 } 610 + spin_unlock(&inode_sb_list_lock); 611 612 dispose_list(&dispose); 613 ··· 867 { 868 struct inode *inode; 869 870 + spin_lock_prefetch(&inode_sb_list_lock); 871 872 inode = alloc_inode(sb); 873 if (inode) { 874 spin_lock(&inode->i_lock); 875 inode->i_state = 0; 876 spin_unlock(&inode->i_lock); 877 + inode_sb_list_add(inode); 878 } 879 return inode; 880 } ··· 945 inode->i_state = I_NEW; 946 hlist_add_head(&inode->i_hash, head); 947 spin_unlock(&inode->i_lock); 948 + inode_sb_list_add(inode); 949 spin_unlock(&inode_lock); 950 951 /* Return the locked inode with I_NEW set, the ··· 994 inode->i_state = I_NEW; 995 hlist_add_head(&inode->i_hash, head); 996 spin_unlock(&inode->i_lock); 997 + inode_sb_list_add(inode); 998 spin_unlock(&inode_lock); 999 1000 /* Return the locked inode with I_NEW set, the
+2
fs/internal.h
··· 125 /* 126 * inode.c 127 */ 128 extern int get_nr_dirty_inodes(void); 129 extern void evict_inodes(struct super_block *); 130 extern int invalidate_inodes(struct super_block *, bool);
··· 125 /* 126 * inode.c 127 */ 128 + extern spinlock_t inode_sb_list_lock; 129 + 130 extern int get_nr_dirty_inodes(void); 131 extern void evict_inodes(struct super_block *); 132 extern int invalidate_inodes(struct super_block *, bool);
+10 -10
fs/notify/inode_mark.c
··· 29 #include <linux/fsnotify_backend.h> 30 #include "fsnotify.h" 31 32 /* 33 * Recalculate the mask of events relevant to a given inode locked. 34 */ ··· 239 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. 240 * @list: list of inodes being unmounted (sb->s_inodes) 241 * 242 - * Called with inode_lock held, protecting the unmounting super block's list 243 - * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. 244 - * We temporarily drop inode_lock, however, and CAN block. 245 */ 246 void fsnotify_unmount_inodes(struct list_head *list) 247 { 248 struct inode *inode, *next_i, *need_iput = NULL; 249 250 - spin_lock(&inode_lock); 251 list_for_each_entry_safe(inode, next_i, list, i_sb_list) { 252 struct inode *need_iput_tmp; 253 ··· 294 } 295 296 /* 297 - * We can safely drop inode_lock here because we hold 298 * references on both inode and next_i. Also no new inodes 299 - * will be added since the umount has begun. Finally, 300 - * iprune_mutex keeps shrink_icache_memory() away. 301 */ 302 - spin_unlock(&inode_lock); 303 304 if (need_iput_tmp) 305 iput(need_iput_tmp); ··· 310 311 iput(inode); 312 313 - spin_lock(&inode_lock); 314 } 315 - spin_unlock(&inode_lock); 316 }
··· 29 #include <linux/fsnotify_backend.h> 30 #include "fsnotify.h" 31 32 + #include "../internal.h" 33 + 34 /* 35 * Recalculate the mask of events relevant to a given inode locked. 36 */ ··· 237 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. 238 * @list: list of inodes being unmounted (sb->s_inodes) 239 * 240 + * Called during unmount with no locks held, so needs to be safe against 241 + * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block. 242 */ 243 void fsnotify_unmount_inodes(struct list_head *list) 244 { 245 struct inode *inode, *next_i, *need_iput = NULL; 246 247 + spin_lock(&inode_sb_list_lock); 248 list_for_each_entry_safe(inode, next_i, list, i_sb_list) { 249 struct inode *need_iput_tmp; 250 ··· 293 } 294 295 /* 296 + * We can safely drop inode_sb_list_lock here because we hold 297 * references on both inode and next_i. Also no new inodes 298 + * will be added since the umount has begun. 299 */ 300 + spin_unlock(&inode_sb_list_lock); 301 302 if (need_iput_tmp) 303 iput(need_iput_tmp); ··· 310 311 iput(inode); 312 313 + spin_lock(&inode_sb_list_lock); 314 } 315 + spin_unlock(&inode_sb_list_lock); 316 }
+16 -12
fs/quota/dquot.c
··· 76 #include <linux/buffer_head.h> 77 #include <linux/capability.h> 78 #include <linux/quotaops.h> 79 - #include <linux/writeback.h> /* for inode_lock, oddly enough.. */ 80 81 #include <asm/uaccess.h> 82 ··· 900 int reserved = 0; 901 #endif 902 903 - spin_lock(&inode_lock); 904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 905 spin_lock(&inode->i_lock); 906 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || ··· 915 #endif 916 __iget(inode); 917 spin_unlock(&inode->i_lock); 918 - spin_unlock(&inode_lock); 919 920 iput(old_inode); 921 __dquot_initialize(inode, type); 922 - /* We hold a reference to 'inode' so it couldn't have been 923 - * removed from s_inodes list while we dropped the inode_lock. 924 - * We cannot iput the inode now as we can be holding the last 925 - * reference and we cannot iput it under inode_lock. So we 926 - * keep the reference and iput it later. */ 927 old_inode = inode; 928 - spin_lock(&inode_lock); 929 } 930 - spin_unlock(&inode_lock); 931 iput(old_inode); 932 933 #ifdef CONFIG_QUOTA_DEBUG ··· 1012 struct inode *inode; 1013 int reserved = 0; 1014 1015 - spin_lock(&inode_lock); 1016 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1017 /* 1018 * We have to scan also I_NEW inodes because they can already ··· 1026 remove_inode_dquot_ref(inode, type, tofree_head); 1027 } 1028 } 1029 - spin_unlock(&inode_lock); 1030 #ifdef CONFIG_QUOTA_DEBUG 1031 if (reserved) { 1032 printk(KERN_WARNING "VFS (%s): Writes happened after quota"
··· 76 #include <linux/buffer_head.h> 77 #include <linux/capability.h> 78 #include <linux/quotaops.h> 79 + #include "../internal.h" /* ugh */ 80 81 #include <asm/uaccess.h> 82 ··· 900 int reserved = 0; 901 #endif 902 903 + spin_lock(&inode_sb_list_lock); 904 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 905 spin_lock(&inode->i_lock); 906 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || ··· 915 #endif 916 __iget(inode); 917 spin_unlock(&inode->i_lock); 918 + spin_unlock(&inode_sb_list_lock); 919 920 iput(old_inode); 921 __dquot_initialize(inode, type); 922 + 923 + /* 924 + * We hold a reference to 'inode' so it couldn't have been 925 + * removed from s_inodes list while we dropped the 926 + * inode_sb_list_lock We cannot iput the inode now as we can be 927 + * holding the last reference and we cannot iput it under 928 + * inode_sb_list_lock. So we keep the reference and iput it 929 + * later. 930 + */ 931 old_inode = inode; 932 + spin_lock(&inode_sb_list_lock); 933 } 934 + spin_unlock(&inode_sb_list_lock); 935 iput(old_inode); 936 937 #ifdef CONFIG_QUOTA_DEBUG ··· 1008 struct inode *inode; 1009 int reserved = 0; 1010 1011 + spin_lock(&inode_sb_list_lock); 1012 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1013 /* 1014 * We have to scan also I_NEW inodes because they can already ··· 1022 remove_inode_dquot_ref(inode, type, tofree_head); 1023 } 1024 } 1025 + spin_unlock(&inode_sb_list_lock); 1026 #ifdef CONFIG_QUOTA_DEBUG 1027 if (reserved) { 1028 printk(KERN_WARNING "VFS (%s): Writes happened after quota"