Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'pull-work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs dcache updates from Al Viro:
"The main part here is making parallel lookups safe for RT - making
sure preemption is disabled in start_dir_add()/ end_dir_add() sections
(on non-RT it's automatic, on RT it needs to to be done explicitly)
and moving wakeups from __d_lookup_done() inside of such to the end of
those sections.

Wakeups can be safely delayed for as long as ->d_lock on in-lookup
dentry is held; proving that has caught a bug in d_add_ci() that
allows memory corruption when sufficiently bogus ntfs (or
case-insensitive xfs) image is mounted. Easily fixed, fortunately"

* tag 'pull-work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
fs/dcache: Move wakeup out of i_seq_dir write held region.
fs/dcache: Move the wakeup from __d_lookup_done() to the caller.
fs/dcache: Disable preemption on i_dir_seq write side on PREEMPT_RT
d_add_ci(): make sure we don't miss d_lookup_done()

+46 -17
+43 -11
fs/dcache.c
··· 2240 2240 } 2241 2241 res = d_splice_alias(inode, found); 2242 2242 if (res) { 2243 + d_lookup_done(found); 2243 2244 dput(found); 2244 2245 return res; 2245 2246 } ··· 2564 2563 2565 2564 static inline unsigned start_dir_add(struct inode *dir) 2566 2565 { 2567 - 2566 + /* 2567 + * The caller holds a spinlock (dentry::d_lock). On !PREEMPT_RT 2568 + * kernels spin_lock() implicitly disables preemption, but not on 2569 + * PREEMPT_RT. So for RT it has to be done explicitly to protect 2570 + * the sequence count write side critical section against a reader 2571 + * or another writer preempting, which would result in a live lock. 2572 + */ 2573 + if (IS_ENABLED(CONFIG_PREEMPT_RT)) 2574 + preempt_disable(); 2568 2575 for (;;) { 2569 2576 unsigned n = dir->i_dir_seq; 2570 2577 if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) ··· 2581 2572 } 2582 2573 } 2583 2574 2584 - static inline void end_dir_add(struct inode *dir, unsigned n) 2575 + static inline void end_dir_add(struct inode *dir, unsigned int n, 2576 + wait_queue_head_t *d_wait) 2585 2577 { 2586 2578 smp_store_release(&dir->i_dir_seq, n + 2); 2579 + if (IS_ENABLED(CONFIG_PREEMPT_RT)) 2580 + preempt_enable(); 2581 + wake_up_all(d_wait); 2587 2582 } 2588 2583 2589 2584 static void d_wait_lookup(struct dentry *dentry) ··· 2714 2701 } 2715 2702 EXPORT_SYMBOL(d_alloc_parallel); 2716 2703 2717 - void __d_lookup_done(struct dentry *dentry) 2704 + /* 2705 + * - Unhash the dentry 2706 + * - Retrieve and clear the waitqueue head in dentry 2707 + * - Return the waitqueue head 2708 + */ 2709 + static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry) 2718 2710 { 2719 - struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent, 2720 - dentry->d_name.hash); 2711 + wait_queue_head_t *d_wait; 2712 + struct hlist_bl_head *b; 2713 + 2714 + lockdep_assert_held(&dentry->d_lock); 2715 + 2716 + b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash); 2721 2717 hlist_bl_lock(b); 2722 2718 dentry->d_flags &= ~DCACHE_PAR_LOOKUP; 2723 2719 __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); 2724 - wake_up_all(dentry->d_wait); 2720 + d_wait = dentry->d_wait; 2725 2721 dentry->d_wait = NULL; 2726 2722 hlist_bl_unlock(b); 2727 2723 INIT_HLIST_NODE(&dentry->d_u.d_alias); 2728 2724 INIT_LIST_HEAD(&dentry->d_lru); 2725 + return d_wait; 2729 2726 } 2730 - EXPORT_SYMBOL(__d_lookup_done); 2727 + 2728 + void __d_lookup_unhash_wake(struct dentry *dentry) 2729 + { 2730 + spin_lock(&dentry->d_lock); 2731 + wake_up_all(__d_lookup_unhash(dentry)); 2732 + spin_unlock(&dentry->d_lock); 2733 + } 2734 + EXPORT_SYMBOL(__d_lookup_unhash_wake); 2731 2735 2732 2736 /* inode->i_lock held if inode is non-NULL */ 2733 2737 2734 2738 static inline void __d_add(struct dentry *dentry, struct inode *inode) 2735 2739 { 2740 + wait_queue_head_t *d_wait; 2736 2741 struct inode *dir = NULL; 2737 2742 unsigned n; 2738 2743 spin_lock(&dentry->d_lock); 2739 2744 if (unlikely(d_in_lookup(dentry))) { 2740 2745 dir = dentry->d_parent->d_inode; 2741 2746 n = start_dir_add(dir); 2742 - __d_lookup_done(dentry); 2747 + d_wait = __d_lookup_unhash(dentry); 2743 2748 } 2744 2749 if (inode) { 2745 2750 unsigned add_flags = d_flags_for_inode(inode); ··· 2769 2738 } 2770 2739 __d_rehash(dentry); 2771 2740 if (dir) 2772 - end_dir_add(dir, n); 2741 + end_dir_add(dir, n, d_wait); 2773 2742 spin_unlock(&dentry->d_lock); 2774 2743 if (inode) 2775 2744 spin_unlock(&inode->i_lock); ··· 2916 2885 bool exchange) 2917 2886 { 2918 2887 struct dentry *old_parent, *p; 2888 + wait_queue_head_t *d_wait; 2919 2889 struct inode *dir = NULL; 2920 2890 unsigned n; 2921 2891 ··· 2947 2915 if (unlikely(d_in_lookup(target))) { 2948 2916 dir = target->d_parent->d_inode; 2949 2917 n = start_dir_add(dir); 2950 - __d_lookup_done(target); 2918 + d_wait = __d_lookup_unhash(target); 2951 2919 } 2952 2920 2953 2921 write_seqcount_begin(&dentry->d_seq); ··· 2983 2951 write_seqcount_end(&dentry->d_seq); 2984 2952 2985 2953 if (dir) 2986 - end_dir_add(dir, n); 2954 + end_dir_add(dir, n, d_wait); 2987 2955 2988 2956 if (dentry->d_parent != old_parent) 2989 2957 spin_unlock(&dentry->d_parent->d_lock);
+3 -6
include/linux/dcache.h
··· 349 349 spin_unlock(&dentry->d_lock); 350 350 } 351 351 352 - extern void __d_lookup_done(struct dentry *); 352 + extern void __d_lookup_unhash_wake(struct dentry *dentry); 353 353 354 354 static inline int d_in_lookup(const struct dentry *dentry) 355 355 { ··· 358 358 359 359 static inline void d_lookup_done(struct dentry *dentry) 360 360 { 361 - if (unlikely(d_in_lookup(dentry))) { 362 - spin_lock(&dentry->d_lock); 363 - __d_lookup_done(dentry); 364 - spin_unlock(&dentry->d_lock); 365 - } 361 + if (unlikely(d_in_lookup(dentry))) 362 + __d_lookup_unhash_wake(dentry); 366 363 } 367 364 368 365 extern void dput(struct dentry *);