Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs: scale mntget/mntput

The problem that this patch aims to fix is vfsmount refcounting scalability.
We need to take a reference on the vfsmount for every successful path lookup,
which often go to the same mount point.

The fundamental difficulty is that a "simple" reference count can never be made
scalable, because any time a reference is dropped, we must check whether that
was the last reference. To do that requires communication with all other CPUs
that may have taken a reference count.

We can make refcounts more scalable in a couple of ways, involving keeping
distributed counters, and checking for the global-zero condition less
frequently.

- check the global sum once every interval (this will delay zero detection
for some interval, so it's probably a showstopper for vfsmounts).

- keep a local count and only taking the global sum when local reaches 0 (this
is difficult for vfsmounts, because we can't hold preempt off for the life of
a reference, so a counter would need to be per-thread or tied strongly to a
particular CPU which requires more locking).

- keep a local difference of increments and decrements, which allows us to sum
the total difference and hence find the refcount when summing all CPUs. Then,
keep a single integer "long" refcount for slow and long lasting references,
and only take the global sum of local counters when the long refcount is 0.

This last scheme is what I implemented here. Attached mounts and process root
and working directory references are "long" references, and everything else is
a short reference.

This allows scalable vfsmount references during path walking over mounted
subtrees and unattached (lazy umounted) mounts with processes still running
in them.

This results in one fewer atomic op in the fastpath: mntget is now just a
per-CPU inc, rather than an atomic inc; and mntput just requires a spinlock
and non-atomic decrement in the common case. However code is otherwise bigger
and heavier, so single threaded performance is basically a wash.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>

+285 -100
+1 -1
arch/ia64/kernel/perfmon.c
··· 1542 1542 * any operations on the root directory. However, we need a non-trivial 1543 1543 * d_name - pfm: will go nicely and kill the special-casing in procfs. 1544 1544 */ 1545 - static struct vfsmount *pfmfs_mnt; 1545 + static struct vfsmount *pfmfs_mnt __read_mostly; 1546 1546 1547 1547 static int __init 1548 1548 init_pfm_fs(void)
+1 -1
drivers/mtd/mtdchar.c
··· 1201 1201 static void __exit cleanup_mtdchar(void) 1202 1202 { 1203 1203 unregister_mtd_user(&mtdchar_notifier); 1204 - mntput(mtd_inode_mnt); 1204 + mntput_long(mtd_inode_mnt); 1205 1205 unregister_filesystem(&mtd_inodefs_type); 1206 1206 __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); 1207 1207 }
+1 -1
fs/anon_inodes.c
··· 232 232 return 0; 233 233 234 234 err_mntput: 235 - mntput(anon_inode_mnt); 235 + mntput_long(anon_inode_mnt); 236 236 err_unregister_filesystem: 237 237 unregister_filesystem(&anon_inode_fs_type); 238 238 err_exit:
+16 -10
fs/fs_struct.c
··· 17 17 write_seqcount_begin(&fs->seq); 18 18 old_root = fs->root; 19 19 fs->root = *path; 20 - path_get(path); 20 + path_get_long(path); 21 21 write_seqcount_end(&fs->seq); 22 22 spin_unlock(&fs->lock); 23 23 if (old_root.dentry) 24 - path_put(&old_root); 24 + path_put_long(&old_root); 25 25 } 26 26 27 27 /* ··· 36 36 write_seqcount_begin(&fs->seq); 37 37 old_pwd = fs->pwd; 38 38 fs->pwd = *path; 39 - path_get(path); 39 + path_get_long(path); 40 40 write_seqcount_end(&fs->seq); 41 41 spin_unlock(&fs->lock); 42 42 43 43 if (old_pwd.dentry) 44 - path_put(&old_pwd); 44 + path_put_long(&old_pwd); 45 45 } 46 46 47 47 void chroot_fs_refs(struct path *old_root, struct path *new_root) ··· 59 59 write_seqcount_begin(&fs->seq); 60 60 if (fs->root.dentry == old_root->dentry 61 61 && fs->root.mnt == old_root->mnt) { 62 - path_get(new_root); 62 + path_get_long(new_root); 63 63 fs->root = *new_root; 64 64 count++; 65 65 } 66 66 if (fs->pwd.dentry == old_root->dentry 67 67 && fs->pwd.mnt == old_root->mnt) { 68 - path_get(new_root); 68 + path_get_long(new_root); 69 69 fs->pwd = *new_root; 70 70 count++; 71 71 } ··· 76 76 } while_each_thread(g, p); 77 77 read_unlock(&tasklist_lock); 78 78 while (count--) 79 - path_put(old_root); 79 + path_put_long(old_root); 80 80 } 81 81 82 82 void free_fs_struct(struct fs_struct *fs) 83 83 { 84 - path_put(&fs->root); 85 - path_put(&fs->pwd); 84 + path_put_long(&fs->root); 85 + path_put_long(&fs->pwd); 86 86 kmem_cache_free(fs_cachep, fs); 87 87 } 88 88 ··· 115 115 spin_lock_init(&fs->lock); 116 116 seqcount_init(&fs->seq); 117 117 fs->umask = old->umask; 118 - get_fs_root_and_pwd(old, &fs->root, &fs->pwd); 118 + 119 + spin_lock(&old->lock); 120 + fs->root = old->root; 121 + path_get_long(&fs->root); 122 + fs->pwd = old->pwd; 123 + path_get_long(&fs->pwd); 124 + spin_unlock(&old->lock); 119 125 } 120 126 return fs; 121 127 }
+1
fs/internal.h
··· 63 63 64 64 extern void free_vfsmnt(struct vfsmount *); 65 65 extern struct vfsmount *alloc_vfsmnt(const char *); 66 + extern unsigned int mnt_get_count(struct vfsmount *mnt); 66 67 extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); 67 68 extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, 68 69 struct vfsmount *);
+24
fs/namei.c
··· 368 368 EXPORT_SYMBOL(path_get); 369 369 370 370 /** 371 + * path_get_long - get a long reference to a path 372 + * @path: path to get the reference to 373 + * 374 + * Given a path increment the reference count to the dentry and the vfsmount. 375 + */ 376 + void path_get_long(struct path *path) 377 + { 378 + mntget_long(path->mnt); 379 + dget(path->dentry); 380 + } 381 + 382 + /** 371 383 * path_put - put a reference to a path 372 384 * @path: path to put the reference to 373 385 * ··· 391 379 mntput(path->mnt); 392 380 } 393 381 EXPORT_SYMBOL(path_put); 382 + 383 + /** 384 + * path_put_long - put a long reference to a path 385 + * @path: path to put the reference to 386 + * 387 + * Given a path decrement the reference count to the dentry and the vfsmount. 388 + */ 389 + void path_put_long(struct path *path) 390 + { 391 + dput(path->dentry); 392 + mntput_long(path->mnt); 393 + } 394 394 395 395 /** 396 396 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
+201 -45
fs/namespace.c
··· 138 138 mnt->mnt_group_id = 0; 139 139 } 140 140 141 + /* 142 + * vfsmount lock must be held for read 143 + */ 144 + static inline void mnt_add_count(struct vfsmount *mnt, int n) 145 + { 146 + #ifdef CONFIG_SMP 147 + this_cpu_add(mnt->mnt_pcp->mnt_count, n); 148 + #else 149 + preempt_disable(); 150 + mnt->mnt_count += n; 151 + preempt_enable(); 152 + #endif 153 + } 154 + 155 + static inline void mnt_set_count(struct vfsmount *mnt, int n) 156 + { 157 + #ifdef CONFIG_SMP 158 + this_cpu_write(mnt->mnt_pcp->mnt_count, n); 159 + #else 160 + mnt->mnt_count = n; 161 + #endif 162 + } 163 + 164 + /* 165 + * vfsmount lock must be held for read 166 + */ 167 + static inline void mnt_inc_count(struct vfsmount *mnt) 168 + { 169 + mnt_add_count(mnt, 1); 170 + } 171 + 172 + /* 173 + * vfsmount lock must be held for read 174 + */ 175 + static inline void mnt_dec_count(struct vfsmount *mnt) 176 + { 177 + mnt_add_count(mnt, -1); 178 + } 179 + 180 + /* 181 + * vfsmount lock must be held for write 182 + */ 183 + unsigned int mnt_get_count(struct vfsmount *mnt) 184 + { 185 + #ifdef CONFIG_SMP 186 + unsigned int count = atomic_read(&mnt->mnt_longrefs); 187 + int cpu; 188 + 189 + for_each_possible_cpu(cpu) { 190 + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count; 191 + } 192 + 193 + return count; 194 + #else 195 + return mnt->mnt_count; 196 + #endif 197 + } 198 + 141 199 struct vfsmount *alloc_vfsmnt(const char *name) 142 200 { 143 201 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); ··· 212 154 goto out_free_id; 213 155 } 214 156 215 - atomic_set(&mnt->mnt_count, 1); 157 + #ifdef CONFIG_SMP 158 + mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); 159 + if (!mnt->mnt_pcp) 160 + goto out_free_devname; 161 + 162 + atomic_set(&mnt->mnt_longrefs, 1); 163 + #else 164 + mnt->mnt_count = 1; 165 + mnt->mnt_writers = 0; 166 + #endif 167 + 216 168 INIT_LIST_HEAD(&mnt->mnt_hash); 217 169 INIT_LIST_HEAD(&mnt->mnt_child); 218 170 INIT_LIST_HEAD(&mnt->mnt_mounts); ··· 233 165 INIT_LIST_HEAD(&mnt->mnt_slave); 234 166 #ifdef CONFIG_FSNOTIFY 235 167 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); 236 - #endif 237 - #ifdef CONFIG_SMP 238 - mnt->mnt_writers = alloc_percpu(int); 239 - if (!mnt->mnt_writers) 240 - goto out_free_devname; 241 - #else 242 - mnt->mnt_writers = 0; 243 168 #endif 244 169 } 245 170 return mnt; ··· 280 219 static inline void mnt_inc_writers(struct vfsmount *mnt) 281 220 { 282 221 #ifdef CONFIG_SMP 283 - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; 222 + this_cpu_inc(mnt->mnt_pcp->mnt_writers); 284 223 #else 285 224 mnt->mnt_writers++; 286 225 #endif ··· 289 228 static inline void mnt_dec_writers(struct vfsmount *mnt) 290 229 { 291 230 #ifdef CONFIG_SMP 292 - (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; 231 + this_cpu_dec(mnt->mnt_pcp->mnt_writers); 293 232 #else 294 233 mnt->mnt_writers--; 295 234 #endif ··· 302 241 int cpu; 303 242 304 243 for_each_possible_cpu(cpu) { 305 - count += *per_cpu_ptr(mnt->mnt_writers, cpu); 244 + count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers; 306 245 } 307 246 308 247 return count; ··· 479 418 kfree(mnt->mnt_devname); 480 419 mnt_free_id(mnt); 481 420 #ifdef CONFIG_SMP 482 - free_percpu(mnt->mnt_writers); 421 + free_percpu(mnt->mnt_pcp); 483 422 #endif 484 423 kmem_cache_free(mnt_cache, mnt); 485 424 } ··· 713 652 return NULL; 714 653 } 715 654 716 - static inline void __mntput(struct vfsmount *mnt) 655 + static inline void mntfree(struct vfsmount *mnt) 717 656 { 718 657 struct super_block *sb = mnt->mnt_sb; 658 + 719 659 /* 720 660 * This probably indicates that somebody messed 721 661 * up a mnt_want/drop_write() pair. If this ··· 724 662 * to make r/w->r/o transitions. 725 663 */ 726 664 /* 727 - * atomic_dec_and_lock() used to deal with ->mnt_count decrements 728 - * provides barriers, so mnt_get_writers() below is safe. AV 665 + * The locking used to deal with mnt_count decrement provides barriers, 666 + * so mnt_get_writers() below is safe. 729 667 */ 730 668 WARN_ON(mnt_get_writers(mnt)); 731 669 fsnotify_vfsmount_delete(mnt); ··· 734 672 deactivate_super(sb); 735 673 } 736 674 737 - void mntput_no_expire(struct vfsmount *mnt) 675 + #ifdef CONFIG_SMP 676 + static inline void __mntput(struct vfsmount *mnt, int longrefs) 738 677 { 739 - repeat: 740 - if (atomic_add_unless(&mnt->mnt_count, -1, 1)) 678 + if (!longrefs) { 679 + put_again: 680 + br_read_lock(vfsmount_lock); 681 + if (likely(atomic_read(&mnt->mnt_longrefs))) { 682 + mnt_dec_count(mnt); 683 + br_read_unlock(vfsmount_lock); 684 + return; 685 + } 686 + br_read_unlock(vfsmount_lock); 687 + } else { 688 + BUG_ON(!atomic_read(&mnt->mnt_longrefs)); 689 + if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1)) 690 + return; 691 + } 692 + 693 + br_write_lock(vfsmount_lock); 694 + if (!longrefs) 695 + mnt_dec_count(mnt); 696 + else 697 + atomic_dec(&mnt->mnt_longrefs); 698 + if (mnt_get_count(mnt)) { 699 + br_write_unlock(vfsmount_lock); 700 + return; 701 + } 702 + if (unlikely(mnt->mnt_pinned)) { 703 + mnt_add_count(mnt, mnt->mnt_pinned + 1); 704 + mnt->mnt_pinned = 0; 705 + br_write_unlock(vfsmount_lock); 706 + acct_auto_close_mnt(mnt); 707 + goto put_again; 708 + } 709 + br_write_unlock(vfsmount_lock); 710 + mntfree(mnt); 711 + } 712 + #else 713 + static inline void __mntput(struct vfsmount *mnt, int longrefs) 714 + { 715 + put_again: 716 + mnt_dec_count(mnt); 717 + if (likely(mnt_get_count(mnt))) 741 718 return; 742 719 br_write_lock(vfsmount_lock); 743 - if (!atomic_dec_and_test(&mnt->mnt_count)) { 720 + if (unlikely(mnt->mnt_pinned)) { 721 + mnt_add_count(mnt, mnt->mnt_pinned + 1); 722 + mnt->mnt_pinned = 0; 744 723 br_write_unlock(vfsmount_lock); 745 - return; 724 + acct_auto_close_mnt(mnt); 725 + goto put_again; 746 726 } 747 - if (likely(!mnt->mnt_pinned)) { 748 - br_write_unlock(vfsmount_lock); 749 - __mntput(mnt); 750 - return; 751 - } 752 - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); 753 - mnt->mnt_pinned = 0; 754 727 br_write_unlock(vfsmount_lock); 755 - acct_auto_close_mnt(mnt); 756 - goto repeat; 728 + mntfree(mnt); 757 729 } 758 - EXPORT_SYMBOL(mntput_no_expire); 730 + #endif 731 + 732 + static void mntput_no_expire(struct vfsmount *mnt) 733 + { 734 + __mntput(mnt, 0); 735 + } 736 + 737 + void mntput(struct vfsmount *mnt) 738 + { 739 + if (mnt) { 740 + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ 741 + if (unlikely(mnt->mnt_expiry_mark)) 742 + mnt->mnt_expiry_mark = 0; 743 + __mntput(mnt, 0); 744 + } 745 + } 746 + EXPORT_SYMBOL(mntput); 747 + 748 + struct vfsmount *mntget(struct vfsmount *mnt) 749 + { 750 + if (mnt) 751 + mnt_inc_count(mnt); 752 + return mnt; 753 + } 754 + EXPORT_SYMBOL(mntget); 755 + 756 + void mntput_long(struct vfsmount *mnt) 757 + { 758 + #ifdef CONFIG_SMP 759 + if (mnt) { 760 + /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ 761 + if (unlikely(mnt->mnt_expiry_mark)) 762 + mnt->mnt_expiry_mark = 0; 763 + __mntput(mnt, 1); 764 + } 765 + #else 766 + mntput(mnt); 767 + #endif 768 + } 769 + EXPORT_SYMBOL(mntput_long); 770 + 771 + struct vfsmount *mntget_long(struct vfsmount *mnt) 772 + { 773 + #ifdef CONFIG_SMP 774 + if (mnt) 775 + atomic_inc(&mnt->mnt_longrefs); 776 + return mnt; 777 + #else 778 + return mntget(mnt); 779 + #endif 780 + } 781 + EXPORT_SYMBOL(mntget_long); 759 782 760 783 void mnt_pin(struct vfsmount *mnt) 761 784 { ··· 848 701 mnt->mnt_pinned++; 849 702 br_write_unlock(vfsmount_lock); 850 703 } 851 - 852 704 EXPORT_SYMBOL(mnt_pin); 853 705 854 706 void mnt_unpin(struct vfsmount *mnt) 855 707 { 856 708 br_write_lock(vfsmount_lock); 857 709 if (mnt->mnt_pinned) { 858 - atomic_inc(&mnt->mnt_count); 710 + mnt_inc_count(mnt); 859 711 mnt->mnt_pinned--; 860 712 } 861 713 br_write_unlock(vfsmount_lock); 862 714 } 863 - 864 715 EXPORT_SYMBOL(mnt_unpin); 865 716 866 717 static inline void mangle(struct seq_file *m, const char *s) ··· 1153 1008 int minimum_refs = 0; 1154 1009 struct vfsmount *p; 1155 1010 1156 - br_read_lock(vfsmount_lock); 1011 + /* write lock needed for mnt_get_count */ 1012 + br_write_lock(vfsmount_lock); 1157 1013 for (p = mnt; p; p = next_mnt(p, mnt)) { 1158 - actual_refs += atomic_read(&p->mnt_count); 1014 + actual_refs += mnt_get_count(p); 1159 1015 minimum_refs += 2; 1160 1016 } 1161 - br_read_unlock(vfsmount_lock); 1017 + br_write_unlock(vfsmount_lock); 1162 1018 1163 1019 if (actual_refs > minimum_refs) 1164 1020 return 0; ··· 1186 1040 { 1187 1041 int ret = 1; 1188 1042 down_read(&namespace_sem); 1189 - br_read_lock(vfsmount_lock); 1043 + br_write_lock(vfsmount_lock); 1190 1044 if (propagate_mount_busy(mnt, 2)) 1191 1045 ret = 0; 1192 - br_read_unlock(vfsmount_lock); 1046 + br_write_unlock(vfsmount_lock); 1193 1047 up_read(&namespace_sem); 1194 1048 return ret; 1195 1049 } ··· 1216 1070 dput(dentry); 1217 1071 mntput(m); 1218 1072 } 1219 - mntput(mnt); 1073 + mntput_long(mnt); 1220 1074 } 1221 1075 } 1222 1076 ··· 1271 1125 flags & (MNT_FORCE | MNT_DETACH)) 1272 1126 return -EINVAL; 1273 1127 1274 - if (atomic_read(&mnt->mnt_count) != 2) 1128 + /* 1129 + * probably don't strictly need the lock here if we examined 1130 + * all race cases, but it's a slowpath. 1131 + */ 1132 + br_write_lock(vfsmount_lock); 1133 + if (mnt_get_count(mnt) != 2) { 1134 + br_write_lock(vfsmount_lock); 1275 1135 return -EBUSY; 1136 + } 1137 + br_write_unlock(vfsmount_lock); 1276 1138 1277 1139 if (!xchg(&mnt->mnt_expiry_mark, 1)) 1278 1140 return -EAGAIN; ··· 1969 1815 1970 1816 unlock: 1971 1817 up_write(&namespace_sem); 1972 - mntput(newmnt); 1818 + mntput_long(newmnt); 1973 1819 return err; 1974 1820 } 1975 1821 ··· 2302 2148 if (fs) { 2303 2149 if (p == fs->root.mnt) { 2304 2150 rootmnt = p; 2305 - fs->root.mnt = mntget(q); 2151 + fs->root.mnt = mntget_long(q); 2306 2152 } 2307 2153 if (p == fs->pwd.mnt) { 2308 2154 pwdmnt = p; 2309 - fs->pwd.mnt = mntget(q); 2155 + fs->pwd.mnt = mntget_long(q); 2310 2156 } 2311 2157 } 2312 2158 p = next_mnt(p, mnt_ns->root); ··· 2315 2161 up_write(&namespace_sem); 2316 2162 2317 2163 if (rootmnt) 2318 - mntput(rootmnt); 2164 + mntput_long(rootmnt); 2319 2165 if (pwdmnt) 2320 - mntput(pwdmnt); 2166 + mntput_long(pwdmnt); 2321 2167 2322 2168 return new_ns; 2323 2169 } ··· 2504 2350 touch_mnt_namespace(current->nsproxy->mnt_ns); 2505 2351 br_write_unlock(vfsmount_lock); 2506 2352 chroot_fs_refs(&root, &new); 2353 + 2507 2354 error = 0; 2508 2355 path_put(&root_parent); 2509 2356 path_put(&parent_path); ··· 2531 2376 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2532 2377 if (IS_ERR(mnt)) 2533 2378 panic("Can't create rootfs"); 2379 + 2534 2380 ns = create_mnt_ns(mnt); 2535 2381 if (IS_ERR(ns)) 2536 2382 panic("Can't allocate initial namespace");
+1 -1
fs/pipe.c
··· 1292 1292 static void __exit exit_pipe_fs(void) 1293 1293 { 1294 1294 unregister_filesystem(&pipe_fs_type); 1295 - mntput(pipe_mnt); 1295 + mntput_long(pipe_mnt); 1296 1296 } 1297 1297 1298 1298 fs_initcall(init_pipe_fs);
+2 -2
fs/pnode.c
··· 288 288 */ 289 289 static inline int do_refcount_check(struct vfsmount *mnt, int count) 290 290 { 291 - int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; 291 + int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; 292 292 return (mycount > count); 293 293 } 294 294 ··· 300 300 * Check if any of these mounts that **do not have submounts** 301 301 * have more references than 'refcnt'. If so return busy. 302 302 * 303 - * vfsmount lock must be held for read or write 303 + * vfsmount lock must be held for write 304 304 */ 305 305 int propagate_mount_busy(struct vfsmount *mnt, int refcnt) 306 306 {
+1 -1
fs/super.c
··· 1140 1140 return mnt; 1141 1141 1142 1142 err: 1143 - mntput(mnt); 1143 + mntput_long(mnt); 1144 1144 return ERR_PTR(err); 1145 1145 } 1146 1146
+17 -36
include/linux/mount.h
··· 13 13 #include <linux/list.h> 14 14 #include <linux/nodemask.h> 15 15 #include <linux/spinlock.h> 16 + #include <linux/seqlock.h> 16 17 #include <asm/atomic.h> 17 18 18 19 struct super_block; ··· 47 46 48 47 #define MNT_INTERNAL 0x4000 49 48 49 + struct mnt_pcp { 50 + int mnt_count; 51 + int mnt_writers; 52 + }; 53 + 50 54 struct vfsmount { 51 55 struct list_head mnt_hash; 52 56 struct vfsmount *mnt_parent; /* fs we are mounted on */ 53 57 struct dentry *mnt_mountpoint; /* dentry of mountpoint */ 54 58 struct dentry *mnt_root; /* root of the mounted tree */ 55 59 struct super_block *mnt_sb; /* pointer to superblock */ 60 + #ifdef CONFIG_SMP 61 + struct mnt_pcp __percpu *mnt_pcp; 62 + atomic_t mnt_longrefs; 63 + #else 64 + int mnt_count; 65 + int mnt_writers; 66 + #endif 56 67 struct list_head mnt_mounts; /* list of children, anchored here */ 57 68 struct list_head mnt_child; /* and going through their mnt_child */ 58 69 int mnt_flags; ··· 83 70 struct mnt_namespace *mnt_ns; /* containing namespace */ 84 71 int mnt_id; /* mount identifier */ 85 72 int mnt_group_id; /* peer group identifier */ 86 - /* 87 - * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount 88 - * to let these frequently modified fields in a separate cache line 89 - * (so that reads of mnt_flags wont ping-pong on SMP machines) 90 - */ 91 - atomic_t mnt_count; 92 73 int mnt_expiry_mark; /* true if marked for expiry */ 93 74 int mnt_pinned; 94 75 int mnt_ghosts; 95 - #ifdef CONFIG_SMP 96 - int __percpu *mnt_writers; 97 - #else 98 - int mnt_writers; 99 - #endif 100 76 }; 101 - 102 - static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) 103 - { 104 - #ifdef CONFIG_SMP 105 - return mnt->mnt_writers; 106 - #else 107 - return &mnt->mnt_writers; 108 - #endif 109 - } 110 - 111 - static inline struct vfsmount *mntget(struct vfsmount *mnt) 112 - { 113 - if (mnt) 114 - atomic_inc(&mnt->mnt_count); 115 - return mnt; 116 - } 117 77 118 78 struct file; /* forward dec */ 119 79 ··· 94 108 extern int mnt_want_write_file(struct file *file); 95 109 extern int mnt_clone_write(struct vfsmount *mnt); 96 110 extern void mnt_drop_write(struct vfsmount *mnt); 97 - extern void mntput_no_expire(struct vfsmount *mnt); 111 + extern void mntput(struct vfsmount *mnt); 112 + extern struct vfsmount *mntget(struct vfsmount *mnt); 113 + extern void mntput_long(struct vfsmount *mnt); 114 + extern struct vfsmount *mntget_long(struct vfsmount *mnt); 98 115 extern void mnt_pin(struct vfsmount *mnt); 99 116 extern void mnt_unpin(struct vfsmount *mnt); 100 117 extern int __mnt_is_readonly(struct vfsmount *mnt); 101 - 102 - static inline void mntput(struct vfsmount *mnt) 103 - { 104 - if (mnt) { 105 - mnt->mnt_expiry_mark = 0; 106 - mntput_no_expire(mnt); 107 - } 108 - } 109 118 110 119 extern struct vfsmount *do_kern_mount(const char *fstype, int flags, 111 120 const char *name, void *data);
+2
include/linux/path.h
··· 10 10 }; 11 11 12 12 extern void path_get(struct path *); 13 + extern void path_get_long(struct path *); 13 14 extern void path_put(struct path *); 15 + extern void path_put_long(struct path *); 14 16 15 17 static inline int path_equal(const struct path *path1, const struct path *path2) 16 18 {
+17 -2
net/socket.c
··· 2390 2390 2391 2391 static int __init sock_init(void) 2392 2392 { 2393 + int err; 2394 + 2393 2395 /* 2394 2396 * Initialize sock SLAB cache. 2395 2397 */ ··· 2408 2406 */ 2409 2407 2410 2408 init_inodecache(); 2411 - register_filesystem(&sock_fs_type); 2409 + 2410 + err = register_filesystem(&sock_fs_type); 2411 + if (err) 2412 + goto out_fs; 2412 2413 sock_mnt = kern_mount(&sock_fs_type); 2414 + if (IS_ERR(sock_mnt)) { 2415 + err = PTR_ERR(sock_mnt); 2416 + goto out_mount; 2417 + } 2413 2418 2414 2419 /* The real protocol initialization is performed in later initcalls. 2415 2420 */ ··· 2429 2420 skb_timestamping_init(); 2430 2421 #endif 2431 2422 2432 - return 0; 2423 + out: 2424 + return err; 2425 + 2426 + out_mount: 2427 + unregister_filesystem(&sock_fs_type); 2428 + out_fs: 2429 + goto out; 2433 2430 } 2434 2431 2435 2432 core_initcall(sock_init); /* early initcall */