Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * event_inode.c - part of tracefs, a pseudo file system for activating tracing
4 *
5 * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org>
6 * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
7 * Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org>
8 *
9 * eventfs is used to dynamically create inodes and dentries based on the
10 * meta data provided by the tracing system.
11 *
12 * eventfs stores the meta-data of files/dirs and holds off on creating
13 * inodes/dentries of the files. When accessed, the eventfs will create the
14 * inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up
15 * and delete the inodes/dentries when they are no longer referenced.
16 */
17#include <linux/fsnotify.h>
18#include <linux/fs.h>
19#include <linux/namei.h>
20#include <linux/workqueue.h>
21#include <linux/security.h>
22#include <linux/tracefs.h>
23#include <linux/kref.h>
24#include <linux/delay.h>
25#include "internal.h"
26
27/*
28 * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
29 * to the ei->dentry must be done under this mutex and after checking
30 * if ei->is_freed is not set. When ei->is_freed is set, the dentry
31 * is on its way to being freed after the last dput() is made on it.
32 */
33static DEFINE_MUTEX(eventfs_mutex);
34
35/*
36 * The eventfs_inode (ei) itself is protected by SRCU. It is released from
37 * its parent's list and will have is_freed set (under eventfs_mutex).
38 * After the SRCU grace period is over and the last dput() is called
39 * the ei is freed.
40 */
41DEFINE_STATIC_SRCU(eventfs_srcu);
42
43/* Mode is unsigned short, use the upper bits for flags */
44enum {
45 EVENTFS_SAVE_MODE = BIT(16),
46 EVENTFS_SAVE_UID = BIT(17),
47 EVENTFS_SAVE_GID = BIT(18),
48};
49
50#define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1)
51
52static struct dentry *eventfs_root_lookup(struct inode *dir,
53 struct dentry *dentry,
54 unsigned int flags);
55static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
56static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
57static int eventfs_release(struct inode *inode, struct file *file);
58
59static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
60{
61 unsigned int ia_valid = iattr->ia_valid;
62
63 if (ia_valid & ATTR_MODE) {
64 attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) |
65 (iattr->ia_mode & EVENTFS_MODE_MASK) |
66 EVENTFS_SAVE_MODE;
67 }
68 if (ia_valid & ATTR_UID) {
69 attr->mode |= EVENTFS_SAVE_UID;
70 attr->uid = iattr->ia_uid;
71 }
72 if (ia_valid & ATTR_GID) {
73 attr->mode |= EVENTFS_SAVE_GID;
74 attr->gid = iattr->ia_gid;
75 }
76}
77
78static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
79 struct iattr *iattr)
80{
81 const struct eventfs_entry *entry;
82 struct eventfs_inode *ei;
83 const char *name;
84 int ret;
85
86 mutex_lock(&eventfs_mutex);
87 ei = dentry->d_fsdata;
88 if (ei->is_freed) {
89 /* Do not allow changes if the event is about to be removed. */
90 mutex_unlock(&eventfs_mutex);
91 return -ENODEV;
92 }
93
94 /* Preallocate the children mode array if necessary */
95 if (!(dentry->d_inode->i_mode & S_IFDIR)) {
96 if (!ei->entry_attrs) {
97 ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
98 GFP_NOFS);
99 if (!ei->entry_attrs) {
100 ret = -ENOMEM;
101 goto out;
102 }
103 }
104 }
105
106 ret = simple_setattr(idmap, dentry, iattr);
107 if (ret < 0)
108 goto out;
109
110 /*
111 * If this is a dir, then update the ei cache, only the file
112 * mode is saved in the ei->m_children, and the ownership is
113 * determined by the parent directory.
114 */
115 if (dentry->d_inode->i_mode & S_IFDIR) {
116 update_attr(&ei->attr, iattr);
117
118 } else {
119 name = dentry->d_name.name;
120
121 for (int i = 0; i < ei->nr_entries; i++) {
122 entry = &ei->entries[i];
123 if (strcmp(name, entry->name) == 0) {
124 update_attr(&ei->entry_attrs[i], iattr);
125 break;
126 }
127 }
128 }
129 out:
130 mutex_unlock(&eventfs_mutex);
131 return ret;
132}
133
134static const struct inode_operations eventfs_root_dir_inode_operations = {
135 .lookup = eventfs_root_lookup,
136 .setattr = eventfs_set_attr,
137};
138
139static const struct inode_operations eventfs_file_inode_operations = {
140 .setattr = eventfs_set_attr,
141};
142
143static const struct file_operations eventfs_file_operations = {
144 .open = dcache_dir_open_wrapper,
145 .read = generic_read_dir,
146 .iterate_shared = dcache_readdir_wrapper,
147 .llseek = generic_file_llseek,
148 .release = eventfs_release,
149};
150
151static void update_inode_attr(struct inode *inode, struct eventfs_attr *attr, umode_t mode)
152{
153 if (!attr) {
154 inode->i_mode = mode;
155 return;
156 }
157
158 if (attr->mode & EVENTFS_SAVE_MODE)
159 inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
160 else
161 inode->i_mode = mode;
162
163 if (attr->mode & EVENTFS_SAVE_UID)
164 inode->i_uid = attr->uid;
165
166 if (attr->mode & EVENTFS_SAVE_GID)
167 inode->i_gid = attr->gid;
168}
169
170/**
171 * create_file - create a file in the tracefs filesystem
172 * @name: the name of the file to create.
173 * @mode: the permission that the file should have.
174 * @attr: saved attributes changed by user
175 * @parent: parent dentry for this file.
176 * @data: something that the caller will want to get to later on.
177 * @fop: struct file_operations that should be used for this file.
178 *
179 * This function creates a dentry that represents a file in the eventsfs_inode
180 * directory. The inode.i_private pointer will point to @data in the open()
181 * call.
182 */
183static struct dentry *create_file(const char *name, umode_t mode,
184 struct eventfs_attr *attr,
185 struct dentry *parent, void *data,
186 const struct file_operations *fop)
187{
188 struct tracefs_inode *ti;
189 struct dentry *dentry;
190 struct inode *inode;
191
192 if (!(mode & S_IFMT))
193 mode |= S_IFREG;
194
195 if (WARN_ON_ONCE(!S_ISREG(mode)))
196 return NULL;
197
198 WARN_ON_ONCE(!parent);
199 dentry = eventfs_start_creating(name, parent);
200
201 if (IS_ERR(dentry))
202 return dentry;
203
204 inode = tracefs_get_inode(dentry->d_sb);
205 if (unlikely(!inode))
206 return eventfs_failed_creating(dentry);
207
208 /* If the user updated the directory's attributes, use them */
209 update_inode_attr(inode, attr, mode);
210
211 inode->i_op = &eventfs_file_inode_operations;
212 inode->i_fop = fop;
213 inode->i_private = data;
214
215 ti = get_tracefs(inode);
216 ti->flags |= TRACEFS_EVENT_INODE;
217 d_instantiate(dentry, inode);
218 fsnotify_create(dentry->d_parent->d_inode, dentry);
219 return eventfs_end_creating(dentry);
220};
221
222/**
223 * create_dir - create a dir in the tracefs filesystem
224 * @ei: the eventfs_inode that represents the directory to create
225 * @parent: parent dentry for this file.
226 *
227 * This function will create a dentry for a directory represented by
228 * a eventfs_inode.
229 */
230static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
231{
232 struct tracefs_inode *ti;
233 struct dentry *dentry;
234 struct inode *inode;
235
236 dentry = eventfs_start_creating(ei->name, parent);
237 if (IS_ERR(dentry))
238 return dentry;
239
240 inode = tracefs_get_inode(dentry->d_sb);
241 if (unlikely(!inode))
242 return eventfs_failed_creating(dentry);
243
244 /* If the user updated the directory's attributes, use them */
245 update_inode_attr(inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
246
247 inode->i_op = &eventfs_root_dir_inode_operations;
248 inode->i_fop = &eventfs_file_operations;
249
250 ti = get_tracefs(inode);
251 ti->flags |= TRACEFS_EVENT_INODE;
252
253 inc_nlink(inode);
254 d_instantiate(dentry, inode);
255 inc_nlink(dentry->d_parent->d_inode);
256 fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
257 return eventfs_end_creating(dentry);
258}
259
260static void free_ei(struct eventfs_inode *ei)
261{
262 kfree_const(ei->name);
263 kfree(ei->d_children);
264 kfree(ei->entry_attrs);
265 kfree(ei);
266}
267
268/**
269 * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
270 * @ti: the tracefs_inode of the dentry
271 * @dentry: dentry which has the reference to remove.
272 *
273 * Remove the association between a dentry from an eventfs_inode.
274 */
275void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
276{
277 struct eventfs_inode *ei;
278 int i;
279
280 mutex_lock(&eventfs_mutex);
281
282 ei = dentry->d_fsdata;
283 if (!ei)
284 goto out;
285
286 /* This could belong to one of the files of the ei */
287 if (ei->dentry != dentry) {
288 for (i = 0; i < ei->nr_entries; i++) {
289 if (ei->d_children[i] == dentry)
290 break;
291 }
292 if (WARN_ON_ONCE(i == ei->nr_entries))
293 goto out;
294 ei->d_children[i] = NULL;
295 } else if (ei->is_freed) {
296 free_ei(ei);
297 } else {
298 ei->dentry = NULL;
299 }
300
301 dentry->d_fsdata = NULL;
302 out:
303 mutex_unlock(&eventfs_mutex);
304}
305
306/**
307 * create_file_dentry - create a dentry for a file of an eventfs_inode
308 * @ei: the eventfs_inode that the file will be created under
309 * @idx: the index into the d_children[] of the @ei
310 * @parent: The parent dentry of the created file.
311 * @name: The name of the file to create
312 * @mode: The mode of the file.
313 * @data: The data to use to set the inode of the file with on open()
314 * @fops: The fops of the file to be created.
315 * @lookup: If called by the lookup routine, in which case, dput() the created dentry.
316 *
317 * Create a dentry for a file of an eventfs_inode @ei and place it into the
318 * address located at @e_dentry. If the @e_dentry already has a dentry, then
319 * just do a dget() on it and return. Otherwise create the dentry and attach it.
320 */
321static struct dentry *
322create_file_dentry(struct eventfs_inode *ei, int idx,
323 struct dentry *parent, const char *name, umode_t mode, void *data,
324 const struct file_operations *fops, bool lookup)
325{
326 struct eventfs_attr *attr = NULL;
327 struct dentry **e_dentry = &ei->d_children[idx];
328 struct dentry *dentry;
329
330 WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
331
332 mutex_lock(&eventfs_mutex);
333 if (ei->is_freed) {
334 mutex_unlock(&eventfs_mutex);
335 return NULL;
336 }
337 /* If the e_dentry already has a dentry, use it */
338 if (*e_dentry) {
339 /* lookup does not need to up the ref count */
340 if (!lookup)
341 dget(*e_dentry);
342 mutex_unlock(&eventfs_mutex);
343 return *e_dentry;
344 }
345
346 /* ei->entry_attrs are protected by SRCU */
347 if (ei->entry_attrs)
348 attr = &ei->entry_attrs[idx];
349
350 mutex_unlock(&eventfs_mutex);
351
352 dentry = create_file(name, mode, attr, parent, data, fops);
353
354 mutex_lock(&eventfs_mutex);
355
356 if (IS_ERR_OR_NULL(dentry)) {
357 /*
358 * When the mutex was released, something else could have
359 * created the dentry for this e_dentry. In which case
360 * use that one.
361 *
362 * If ei->is_freed is set, the e_dentry is currently on its
363 * way to being freed, don't return it. If e_dentry is NULL
364 * it means it was already freed.
365 */
366 if (ei->is_freed)
367 dentry = NULL;
368 else
369 dentry = *e_dentry;
370 /* The lookup does not need to up the dentry refcount */
371 if (dentry && !lookup)
372 dget(dentry);
373 mutex_unlock(&eventfs_mutex);
374 return dentry;
375 }
376
377 if (!*e_dentry && !ei->is_freed) {
378 *e_dentry = dentry;
379 dentry->d_fsdata = ei;
380 } else {
381 /*
382 * Should never happen unless we get here due to being freed.
383 * Otherwise it means two dentries exist with the same name.
384 */
385 WARN_ON_ONCE(!ei->is_freed);
386 dentry = NULL;
387 }
388 mutex_unlock(&eventfs_mutex);
389
390 if (lookup)
391 dput(dentry);
392
393 return dentry;
394}
395
396/**
397 * eventfs_post_create_dir - post create dir routine
398 * @ei: eventfs_inode of recently created dir
399 *
400 * Map the meta-data of files within an eventfs dir to their parent dentry
401 */
402static void eventfs_post_create_dir(struct eventfs_inode *ei)
403{
404 struct eventfs_inode *ei_child;
405 struct tracefs_inode *ti;
406
407 lockdep_assert_held(&eventfs_mutex);
408
409 /* srcu lock already held */
410 /* fill parent-child relation */
411 list_for_each_entry_srcu(ei_child, &ei->children, list,
412 srcu_read_lock_held(&eventfs_srcu)) {
413 ei_child->d_parent = ei->dentry;
414 }
415
416 ti = get_tracefs(ei->dentry->d_inode);
417 ti->private = ei;
418}
419
420/**
421 * create_dir_dentry - Create a directory dentry for the eventfs_inode
422 * @pei: The eventfs_inode parent of ei.
423 * @ei: The eventfs_inode to create the directory for
424 * @parent: The dentry of the parent of this directory
425 * @lookup: True if this is called by the lookup code
426 *
427 * This creates and attaches a directory dentry to the eventfs_inode @ei.
428 */
429static struct dentry *
430create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
431 struct dentry *parent, bool lookup)
432{
433 struct dentry *dentry = NULL;
434
435 WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
436
437 mutex_lock(&eventfs_mutex);
438 if (pei->is_freed || ei->is_freed) {
439 mutex_unlock(&eventfs_mutex);
440 return NULL;
441 }
442 if (ei->dentry) {
443 /* If the dentry already has a dentry, use it */
444 dentry = ei->dentry;
445 /* lookup does not need to up the ref count */
446 if (!lookup)
447 dget(dentry);
448 mutex_unlock(&eventfs_mutex);
449 return dentry;
450 }
451 mutex_unlock(&eventfs_mutex);
452
453 dentry = create_dir(ei, parent);
454
455 mutex_lock(&eventfs_mutex);
456
457 if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
458 /*
459 * When the mutex was released, something else could have
460 * created the dentry for this e_dentry. In which case
461 * use that one.
462 *
463 * If ei->is_freed is set, the e_dentry is currently on its
464 * way to being freed.
465 */
466 dentry = ei->dentry;
467 if (dentry && !lookup)
468 dget(dentry);
469 mutex_unlock(&eventfs_mutex);
470 return dentry;
471 }
472
473 if (!ei->dentry && !ei->is_freed) {
474 ei->dentry = dentry;
475 eventfs_post_create_dir(ei);
476 dentry->d_fsdata = ei;
477 } else {
478 /*
479 * Should never happen unless we get here due to being freed.
480 * Otherwise it means two dentries exist with the same name.
481 */
482 WARN_ON_ONCE(!ei->is_freed);
483 dentry = NULL;
484 }
485 mutex_unlock(&eventfs_mutex);
486
487 if (lookup)
488 dput(dentry);
489
490 return dentry;
491}
492
493/**
494 * eventfs_root_lookup - lookup routine to create file/dir
495 * @dir: in which a lookup is being done
496 * @dentry: file/dir dentry
497 * @flags: Just passed to simple_lookup()
498 *
499 * Used to create dynamic file/dir with-in @dir, search with-in @ei
500 * list, if @dentry found go ahead and create the file/dir
501 */
502
503static struct dentry *eventfs_root_lookup(struct inode *dir,
504 struct dentry *dentry,
505 unsigned int flags)
506{
507 const struct file_operations *fops;
508 const struct eventfs_entry *entry;
509 struct eventfs_inode *ei_child;
510 struct tracefs_inode *ti;
511 struct eventfs_inode *ei;
512 struct dentry *ei_dentry = NULL;
513 struct dentry *ret = NULL;
514 const char *name = dentry->d_name.name;
515 bool created = false;
516 umode_t mode;
517 void *data;
518 int idx;
519 int i;
520 int r;
521
522 ti = get_tracefs(dir);
523 if (!(ti->flags & TRACEFS_EVENT_INODE))
524 return NULL;
525
526 /* Grab srcu to prevent the ei from going away */
527 idx = srcu_read_lock(&eventfs_srcu);
528
529 /*
530 * Grab the eventfs_mutex to consistent value from ti->private.
531 * This s
532 */
533 mutex_lock(&eventfs_mutex);
534 ei = READ_ONCE(ti->private);
535 if (ei && !ei->is_freed)
536 ei_dentry = READ_ONCE(ei->dentry);
537 mutex_unlock(&eventfs_mutex);
538
539 if (!ei || !ei_dentry)
540 goto out;
541
542 data = ei->data;
543
544 list_for_each_entry_srcu(ei_child, &ei->children, list,
545 srcu_read_lock_held(&eventfs_srcu)) {
546 if (strcmp(ei_child->name, name) != 0)
547 continue;
548 ret = simple_lookup(dir, dentry, flags);
549 create_dir_dentry(ei, ei_child, ei_dentry, true);
550 created = true;
551 break;
552 }
553
554 if (created)
555 goto out;
556
557 for (i = 0; i < ei->nr_entries; i++) {
558 entry = &ei->entries[i];
559 if (strcmp(name, entry->name) == 0) {
560 void *cdata = data;
561 mutex_lock(&eventfs_mutex);
562 /* If ei->is_freed, then the event itself may be too */
563 if (!ei->is_freed)
564 r = entry->callback(name, &mode, &cdata, &fops);
565 else
566 r = -1;
567 mutex_unlock(&eventfs_mutex);
568 if (r <= 0)
569 continue;
570 ret = simple_lookup(dir, dentry, flags);
571 create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
572 fops, true);
573 break;
574 }
575 }
576 out:
577 srcu_read_unlock(&eventfs_srcu, idx);
578 return ret;
579}
580
581struct dentry_list {
582 void *cursor;
583 struct dentry **dentries;
584};
585
586/**
587 * eventfs_release - called to release eventfs file/dir
588 * @inode: inode to be released
589 * @file: file to be released (not used)
590 */
591static int eventfs_release(struct inode *inode, struct file *file)
592{
593 struct tracefs_inode *ti;
594 struct dentry_list *dlist = file->private_data;
595 void *cursor;
596 int i;
597
598 ti = get_tracefs(inode);
599 if (!(ti->flags & TRACEFS_EVENT_INODE))
600 return -EINVAL;
601
602 if (WARN_ON_ONCE(!dlist))
603 return -EINVAL;
604
605 for (i = 0; dlist->dentries && dlist->dentries[i]; i++) {
606 dput(dlist->dentries[i]);
607 }
608
609 cursor = dlist->cursor;
610 kfree(dlist->dentries);
611 kfree(dlist);
612 file->private_data = cursor;
613 return dcache_dir_close(inode, file);
614}
615
616static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
617{
618 struct dentry **tmp;
619
620 tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
621 if (!tmp)
622 return -1;
623 tmp[cnt] = d;
624 tmp[cnt + 1] = NULL;
625 *dentries = tmp;
626 return 0;
627}
628
629/**
630 * dcache_dir_open_wrapper - eventfs open wrapper
631 * @inode: not used
632 * @file: dir to be opened (to create it's children)
633 *
634 * Used to dynamic create file/dir with-in @file, all the
635 * file/dir will be created. If already created then references
636 * will be increased
637 */
638static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
639{
640 const struct file_operations *fops;
641 const struct eventfs_entry *entry;
642 struct eventfs_inode *ei_child;
643 struct tracefs_inode *ti;
644 struct eventfs_inode *ei;
645 struct dentry_list *dlist;
646 struct dentry **dentries = NULL;
647 struct dentry *parent = file_dentry(file);
648 struct dentry *d;
649 struct inode *f_inode = file_inode(file);
650 const char *name = parent->d_name.name;
651 umode_t mode;
652 void *data;
653 int cnt = 0;
654 int idx;
655 int ret;
656 int i;
657 int r;
658
659 ti = get_tracefs(f_inode);
660 if (!(ti->flags & TRACEFS_EVENT_INODE))
661 return -EINVAL;
662
663 if (WARN_ON_ONCE(file->private_data))
664 return -EINVAL;
665
666 idx = srcu_read_lock(&eventfs_srcu);
667
668 mutex_lock(&eventfs_mutex);
669 ei = READ_ONCE(ti->private);
670 mutex_unlock(&eventfs_mutex);
671
672 if (!ei) {
673 srcu_read_unlock(&eventfs_srcu, idx);
674 return -EINVAL;
675 }
676
677
678 data = ei->data;
679
680 dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
681 if (!dlist) {
682 srcu_read_unlock(&eventfs_srcu, idx);
683 return -ENOMEM;
684 }
685
686 inode_lock(parent->d_inode);
687 list_for_each_entry_srcu(ei_child, &ei->children, list,
688 srcu_read_lock_held(&eventfs_srcu)) {
689 d = create_dir_dentry(ei, ei_child, parent, false);
690 if (d) {
691 ret = add_dentries(&dentries, d, cnt);
692 if (ret < 0)
693 break;
694 cnt++;
695 }
696 }
697
698 for (i = 0; i < ei->nr_entries; i++) {
699 void *cdata = data;
700 entry = &ei->entries[i];
701 name = entry->name;
702 mutex_lock(&eventfs_mutex);
703 /* If ei->is_freed, then the event itself may be too */
704 if (!ei->is_freed)
705 r = entry->callback(name, &mode, &cdata, &fops);
706 else
707 r = -1;
708 mutex_unlock(&eventfs_mutex);
709 if (r <= 0)
710 continue;
711 d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
712 if (d) {
713 ret = add_dentries(&dentries, d, cnt);
714 if (ret < 0)
715 break;
716 cnt++;
717 }
718 }
719 inode_unlock(parent->d_inode);
720 srcu_read_unlock(&eventfs_srcu, idx);
721 ret = dcache_dir_open(inode, file);
722
723 /*
724 * dcache_dir_open() sets file->private_data to a dentry cursor.
725 * Need to save that but also save all the dentries that were
726 * opened by this function.
727 */
728 dlist->cursor = file->private_data;
729 dlist->dentries = dentries;
730 file->private_data = dlist;
731 return ret;
732}
733
734/*
735 * This just sets the file->private_data back to the cursor and back.
736 */
737static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx)
738{
739 struct dentry_list *dlist = file->private_data;
740 int ret;
741
742 file->private_data = dlist->cursor;
743 ret = dcache_readdir(file, ctx);
744 dlist->cursor = file->private_data;
745 file->private_data = dlist;
746 return ret;
747}
748
749/**
750 * eventfs_create_dir - Create the eventfs_inode for this directory
751 * @name: The name of the directory to create.
752 * @parent: The eventfs_inode of the parent directory.
753 * @entries: A list of entries that represent the files under this directory
754 * @size: The number of @entries
755 * @data: The default data to pass to the files (an entry may override it).
756 *
757 * This function creates the descriptor to represent a directory in the
758 * eventfs. This descriptor is an eventfs_inode, and it is returned to be
759 * used to create other children underneath.
760 *
761 * The @entries is an array of eventfs_entry structures which has:
762 * const char *name
763 * eventfs_callback callback;
764 *
765 * The name is the name of the file, and the callback is a pointer to a function
766 * that will be called when the file is reference (either by lookup or by
767 * reading a directory). The callback is of the prototype:
768 *
769 * int callback(const char *name, umode_t *mode, void **data,
770 * const struct file_operations **fops);
771 *
772 * When a file needs to be created, this callback will be called with
773 * name = the name of the file being created (so that the same callback
774 * may be used for multiple files).
775 * mode = a place to set the file's mode
776 * data = A pointer to @data, and the callback may replace it, which will
777 * cause the file created to pass the new data to the open() call.
778 * fops = the fops to use for the created file.
779 *
780 * NB. @callback is called while holding internal locks of the eventfs
781 * system. The callback must not call any code that might also call into
782 * the tracefs or eventfs system or it will risk creating a deadlock.
783 */
784struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
785 const struct eventfs_entry *entries,
786 int size, void *data)
787{
788 struct eventfs_inode *ei;
789
790 if (!parent)
791 return ERR_PTR(-EINVAL);
792
793 ei = kzalloc(sizeof(*ei), GFP_KERNEL);
794 if (!ei)
795 return ERR_PTR(-ENOMEM);
796
797 ei->name = kstrdup_const(name, GFP_KERNEL);
798 if (!ei->name) {
799 kfree(ei);
800 return ERR_PTR(-ENOMEM);
801 }
802
803 if (size) {
804 ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
805 if (!ei->d_children) {
806 kfree_const(ei->name);
807 kfree(ei);
808 return ERR_PTR(-ENOMEM);
809 }
810 }
811
812 ei->entries = entries;
813 ei->nr_entries = size;
814 ei->data = data;
815 INIT_LIST_HEAD(&ei->children);
816 INIT_LIST_HEAD(&ei->list);
817
818 mutex_lock(&eventfs_mutex);
819 if (!parent->is_freed) {
820 list_add_tail(&ei->list, &parent->children);
821 ei->d_parent = parent->dentry;
822 }
823 mutex_unlock(&eventfs_mutex);
824
825 /* Was the parent freed? */
826 if (list_empty(&ei->list)) {
827 free_ei(ei);
828 ei = NULL;
829 }
830 return ei;
831}
832
833/**
834 * eventfs_create_events_dir - create the top level events directory
835 * @name: The name of the top level directory to create.
836 * @parent: Parent dentry for this file in the tracefs directory.
837 * @entries: A list of entries that represent the files under this directory
838 * @size: The number of @entries
839 * @data: The default data to pass to the files (an entry may override it).
840 *
841 * This function creates the top of the trace event directory.
842 *
843 * See eventfs_create_dir() for use of @entries.
844 */
845struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
846 const struct eventfs_entry *entries,
847 int size, void *data)
848{
849 struct dentry *dentry = tracefs_start_creating(name, parent);
850 struct eventfs_inode *ei;
851 struct tracefs_inode *ti;
852 struct inode *inode;
853
854 if (security_locked_down(LOCKDOWN_TRACEFS))
855 return NULL;
856
857 if (IS_ERR(dentry))
858 return ERR_CAST(dentry);
859
860 ei = kzalloc(sizeof(*ei), GFP_KERNEL);
861 if (!ei)
862 goto fail_ei;
863
864 inode = tracefs_get_inode(dentry->d_sb);
865 if (unlikely(!inode))
866 goto fail;
867
868 if (size) {
869 ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
870 if (!ei->d_children)
871 goto fail;
872 }
873
874 ei->dentry = dentry;
875 ei->entries = entries;
876 ei->nr_entries = size;
877 ei->data = data;
878 ei->name = kstrdup_const(name, GFP_KERNEL);
879 if (!ei->name)
880 goto fail;
881
882 INIT_LIST_HEAD(&ei->children);
883 INIT_LIST_HEAD(&ei->list);
884
885 ti = get_tracefs(inode);
886 ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
887 ti->private = ei;
888
889 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
890 inode->i_op = &eventfs_root_dir_inode_operations;
891 inode->i_fop = &eventfs_file_operations;
892
893 dentry->d_fsdata = ei;
894
895 /* directory inodes start off with i_nlink == 2 (for "." entry) */
896 inc_nlink(inode);
897 d_instantiate(dentry, inode);
898 inc_nlink(dentry->d_parent->d_inode);
899 fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
900 tracefs_end_creating(dentry);
901
902 return ei;
903
904 fail:
905 kfree(ei->d_children);
906 kfree(ei);
907 fail_ei:
908 tracefs_failed_creating(dentry);
909 return ERR_PTR(-ENOMEM);
910}
911
912static LLIST_HEAD(free_list);
913
914static void eventfs_workfn(struct work_struct *work)
915{
916 struct eventfs_inode *ei, *tmp;
917 struct llist_node *llnode;
918
919 llnode = llist_del_all(&free_list);
920 llist_for_each_entry_safe(ei, tmp, llnode, llist) {
921 /* This dput() matches the dget() from unhook_dentry() */
922 for (int i = 0; i < ei->nr_entries; i++) {
923 if (ei->d_children[i])
924 dput(ei->d_children[i]);
925 }
926 /* This should only get here if it had a dentry */
927 if (!WARN_ON_ONCE(!ei->dentry))
928 dput(ei->dentry);
929 }
930}
931
932static DECLARE_WORK(eventfs_work, eventfs_workfn);
933
934static void free_rcu_ei(struct rcu_head *head)
935{
936 struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
937
938 if (ei->dentry) {
939 /* Do not free the ei until all references of dentry are gone */
940 if (llist_add(&ei->llist, &free_list))
941 queue_work(system_unbound_wq, &eventfs_work);
942 return;
943 }
944
945 /* If the ei doesn't have a dentry, neither should its children */
946 for (int i = 0; i < ei->nr_entries; i++) {
947 WARN_ON_ONCE(ei->d_children[i]);
948 }
949
950 free_ei(ei);
951}
952
953static void unhook_dentry(struct dentry *dentry)
954{
955 if (!dentry)
956 return;
957 /*
958 * Need to add a reference to the dentry that is expected by
959 * simple_recursive_removal(), which will include a dput().
960 */
961 dget(dentry);
962
963 /*
964 * Also add a reference for the dput() in eventfs_workfn().
965 * That is required as that dput() will free the ei after
966 * the SRCU grace period is over.
967 */
968 dget(dentry);
969}
970
971/**
972 * eventfs_remove_rec - remove eventfs dir or file from list
973 * @ei: eventfs_inode to be removed.
974 * @level: prevent recursion from going more than 3 levels deep.
975 *
976 * This function recursively removes eventfs_inodes which
977 * contains info of files and/or directories.
978 */
979static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
980{
981 struct eventfs_inode *ei_child;
982
983 if (!ei)
984 return;
985 /*
986 * Check recursion depth. It should never be greater than 3:
987 * 0 - events/
988 * 1 - events/group/
989 * 2 - events/group/event/
990 * 3 - events/group/event/file
991 */
992 if (WARN_ON_ONCE(level > 3))
993 return;
994
995 /* search for nested folders or files */
996 list_for_each_entry_srcu(ei_child, &ei->children, list,
997 lockdep_is_held(&eventfs_mutex)) {
998 /* Children only have dentry if parent does */
999 WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
1000 eventfs_remove_rec(ei_child, level + 1);
1001 }
1002
1003
1004 ei->is_freed = 1;
1005
1006 for (int i = 0; i < ei->nr_entries; i++) {
1007 if (ei->d_children[i]) {
1008 /* Children only have dentry if parent does */
1009 WARN_ON_ONCE(!ei->dentry);
1010 unhook_dentry(ei->d_children[i]);
1011 }
1012 }
1013
1014 unhook_dentry(ei->dentry);
1015
1016 list_del_rcu(&ei->list);
1017 call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
1018}
1019
1020/**
1021 * eventfs_remove_dir - remove eventfs dir or file from list
1022 * @ei: eventfs_inode to be removed.
1023 *
1024 * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
1025 */
1026void eventfs_remove_dir(struct eventfs_inode *ei)
1027{
1028 struct dentry *dentry;
1029
1030 if (!ei)
1031 return;
1032
1033 mutex_lock(&eventfs_mutex);
1034 dentry = ei->dentry;
1035 eventfs_remove_rec(ei, 0);
1036 mutex_unlock(&eventfs_mutex);
1037
1038 /*
1039 * If any of the ei children has a dentry, then the ei itself
1040 * must have a dentry.
1041 */
1042 if (dentry)
1043 simple_recursive_removal(dentry, NULL);
1044}
1045
1046/**
1047 * eventfs_remove_events_dir - remove the top level eventfs directory
1048 * @ei: the event_inode returned by eventfs_create_events_dir().
1049 *
1050 * This function removes the events main directory
1051 */
1052void eventfs_remove_events_dir(struct eventfs_inode *ei)
1053{
1054 struct dentry *dentry;
1055
1056 dentry = ei->dentry;
1057 eventfs_remove_dir(ei);
1058
1059 /*
1060 * Matches the dget() done by tracefs_start_creating()
1061 * in eventfs_create_events_dir() when it the dentry was
1062 * created. In other words, it's a normal dentry that
1063 * sticks around while the other ei->dentry are created
1064 * and destroyed dynamically.
1065 */
1066 dput(dentry);
1067}