Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

debugfs: defer debugfs_fsdata allocation to first usage

Currently, __debugfs_create_file allocates one struct debugfs_fsdata
instance for every file created. However, there are potentially many
debugfs file around, most of which are never touched by userspace.

Thus, defer the allocations to the first usage, i.e. to the first
debugfs_file_get().

A dentry's ->d_fsdata starts out to point to the "real", user provided
fops. After a debugfs_fsdata instance has been allocated (and the real
fops pointer has been moved over into its ->real_fops member),
->d_fsdata is changed to point to it from then on. The two cases are
distinguished by setting BIT(0) for the real fops case.

struct debugfs_fsdata's foremost purpose is to track active users and to
make debugfs_remove() block until they are done. Since no debugfs_fsdata
instance means no active users, make debugfs_remove() return immediately
in this case.

Take care of possible races between debugfs_file_get() and
debugfs_remove(): either debugfs_remove() must see a debugfs_fsdata
instance and thus wait for possible active users or debugfs_file_get() must
see a dead dentry and return immediately.

Make a dentry's ->d_release(), i.e. debugfs_release_dentry(), check whether
->d_fsdata is actually a debugfs_fsdata instance before kfree()ing it.

Similarly, make debugfs_real_fops() check whether ->d_fsdata is actually
a debugfs_fsdata instance before returning it, otherwise emit a warning.

The set of possible error codes returned from debugfs_file_get() has grown
from -EIO to -EIO and -ENOMEM. Make open_proxy_open() and full_proxy_open()
pass the -ENOMEM onwards to their callers.

Signed-off-by: Nicolai Stange <nicstange@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Nicolai Stange and committed by
Greg Kroah-Hartman
7d39bc50 154b9d75

+73 -26
+46 -9
fs/debugfs/file.c
··· 53 53 { 54 54 struct debugfs_fsdata *fsd = F_DENTRY(filp)->d_fsdata; 55 55 56 + if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) { 57 + /* 58 + * Urgh, we've been called w/o a protecting 59 + * debugfs_file_get(). 60 + */ 61 + WARN_ON(1); 62 + return NULL; 63 + } 64 + 56 65 return fsd->real_fops; 57 66 } 58 67 EXPORT_SYMBOL_GPL(debugfs_real_fops); ··· 83 74 */ 84 75 int debugfs_file_get(struct dentry *dentry) 85 76 { 86 - struct debugfs_fsdata *fsd = dentry->d_fsdata; 77 + struct debugfs_fsdata *fsd; 78 + void *d_fsd; 87 79 88 - /* Avoid starvation of removers. */ 80 + d_fsd = READ_ONCE(dentry->d_fsdata); 81 + if (!((unsigned long)d_fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) { 82 + fsd = d_fsd; 83 + } else { 84 + fsd = kmalloc(sizeof(*fsd), GFP_KERNEL); 85 + if (!fsd) 86 + return -ENOMEM; 87 + 88 + fsd->real_fops = (void *)((unsigned long)d_fsd & 89 + ~DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); 90 + refcount_set(&fsd->active_users, 1); 91 + init_completion(&fsd->active_users_drained); 92 + if (cmpxchg(&dentry->d_fsdata, d_fsd, fsd) != d_fsd) { 93 + kfree(fsd); 94 + fsd = READ_ONCE(dentry->d_fsdata); 95 + } 96 + } 97 + 98 + /* 99 + * In case of a successful cmpxchg() above, this check is 100 + * strictly necessary and must follow it, see the comment in 101 + * __debugfs_remove_file(). 102 + * OTOH, if the cmpxchg() hasn't been executed or wasn't 103 + * successful, this serves the purpose of not starving 104 + * removers. 105 + */ 89 106 if (d_unlinked(dentry)) 90 107 return -EIO; 91 108 ··· 133 98 */ 134 99 void debugfs_file_put(struct dentry *dentry) 135 100 { 136 - struct debugfs_fsdata *fsd = dentry->d_fsdata; 101 + struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata); 137 102 138 103 if (refcount_dec_and_test(&fsd->active_users)) 139 104 complete(&fsd->active_users_drained); ··· 144 109 { 145 110 struct dentry *dentry = F_DENTRY(filp); 146 111 const struct file_operations *real_fops = NULL; 147 - int r = 0; 112 + int r; 148 113 149 - if (debugfs_file_get(dentry)) 150 - return -ENOENT; 114 + r = debugfs_file_get(dentry); 115 + if (r) 116 + return r == -EIO ? -ENOENT : r; 151 117 152 118 real_fops = debugfs_real_fops(filp); 153 119 real_fops = fops_get(real_fops); ··· 269 233 struct dentry *dentry = F_DENTRY(filp); 270 234 const struct file_operations *real_fops = NULL; 271 235 struct file_operations *proxy_fops = NULL; 272 - int r = 0; 236 + int r; 273 237 274 - if (debugfs_file_get(dentry)) 275 - return -ENOENT; 238 + r = debugfs_file_get(dentry); 239 + if (r) 240 + return r == -EIO ? -ENOENT : r; 276 241 277 242 real_fops = debugfs_real_fops(filp); 278 243 real_fops = fops_get(real_fops);
+19 -17
fs/debugfs/inode.c
··· 184 184 185 185 static void debugfs_release_dentry(struct dentry *dentry) 186 186 { 187 - kfree(dentry->d_fsdata); 187 + void *fsd = dentry->d_fsdata; 188 + 189 + if (!((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) 190 + kfree(dentry->d_fsdata); 188 191 } 189 192 190 193 static struct vfsmount *debugfs_automount(struct path *path) ··· 347 344 { 348 345 struct dentry *dentry; 349 346 struct inode *inode; 350 - struct debugfs_fsdata *fsd; 351 - 352 - fsd = kmalloc(sizeof(*fsd), GFP_KERNEL); 353 - if (!fsd) 354 - return NULL; 355 347 356 348 if (!(mode & S_IFMT)) 357 349 mode |= S_IFREG; 358 350 BUG_ON(!S_ISREG(mode)); 359 351 dentry = start_creating(name, parent); 360 352 361 - if (IS_ERR(dentry)) { 362 - kfree(fsd); 353 + if (IS_ERR(dentry)) 363 354 return NULL; 364 - } 365 355 366 356 inode = debugfs_get_inode(dentry->d_sb); 367 - if (unlikely(!inode)) { 368 - kfree(fsd); 357 + if (unlikely(!inode)) 369 358 return failed_creating(dentry); 370 - } 371 359 372 360 inode->i_mode = mode; 373 361 inode->i_private = data; 374 362 375 363 inode->i_fop = proxy_fops; 376 - fsd->real_fops = real_fops; 377 - refcount_set(&fsd->active_users, 1); 378 - dentry->d_fsdata = fsd; 364 + dentry->d_fsdata = (void *)((unsigned long)real_fops | 365 + DEBUGFS_FSDATA_IS_REAL_FOPS_BIT); 379 366 380 367 d_instantiate(dentry, inode); 381 368 fsnotify_create(d_inode(dentry->d_parent), dentry); ··· 628 635 629 636 simple_unlink(d_inode(parent), dentry); 630 637 d_delete(dentry); 631 - fsd = dentry->d_fsdata; 632 - init_completion(&fsd->active_users_drained); 638 + 639 + /* 640 + * Paired with the closing smp_mb() implied by a successful 641 + * cmpxchg() in debugfs_file_get(): either 642 + * debugfs_file_get() must see a dead dentry or we must see a 643 + * debugfs_fsdata instance at ->d_fsdata here (or both). 644 + */ 645 + smp_mb(); 646 + fsd = READ_ONCE(dentry->d_fsdata); 647 + if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT) 648 + return; 633 649 if (!refcount_dec_and_test(&fsd->active_users)) 634 650 wait_for_completion(&fsd->active_users_drained); 635 651 }
+8
fs/debugfs/internal.h
··· 25 25 struct completion active_users_drained; 26 26 }; 27 27 28 + /* 29 + * A dentry's ->d_fsdata either points to the real fops or to a 30 + * dynamically allocated debugfs_fsdata instance. 31 + * In order to distinguish between these two cases, a real fops 32 + * pointer gets its lowest bit set. 33 + */ 34 + #define DEBUGFS_FSDATA_IS_REAL_FOPS_BIT BIT(0) 35 + 28 36 #endif /* _DEBUGFS_INTERNAL_H_ */