Merge tag 'trace-v6.7-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing fixes from Steven Rostedt::
"Eventfs fixes:

- With the usage of simple_recursive_remove() recommended by Al Viro,
the code should not be calling "d_invalidate()" itself. Doing so is
causing crashes. The code was calling d_invalidate() on the race of
trying to look up a file while the parent was being deleted. This
was detected, and the added dentry was having d_invalidate() called
on it, but the deletion of the directory was also calling
d_invalidate() on that same dentry.

- A fix to not free the eventfs_inode (ei) until the last dput() was
called on its ei->dentry made the ei->dentry exist even after it
was marked for free by setting the ei->is_freed. But code elsewhere
still was checking if ei->dentry was NULL if ei->is_freed is set
and would trigger WARN_ON if that was the case. That's no longer
true and there should not be any warnings when it is true.

- Use GFP_NOFS for allocations done under eventfs_mutex. The
eventfs_mutex can be taken on file system reclaim, make sure that
allocations done under that mutex do not trigger file system
reclaim.

- Clean up code by moving the taking of inode_lock out of the helper
functions and into where they are needed, and not use the parameter
to know to take it or not. It must always be held but some callers
of the helper function have it taken when they were called.

- Warn if the inode_lock is not held in the helper functions.

- Warn if eventfs_start_creating() is called without a parent. As
eventfs is underneath tracefs, all files created will have a parent
(the top one will have a tracefs parent).

Tracing update:

- Add Mathieu Desnoyers as an official reviewer of the tracing subsystem"

* tag 'trace-v6.7-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
MAINTAINERS: TRACING: Add Mathieu Desnoyers as Reviewer
eventfs: Make sure that parent->d_inode is locked in creating files/dirs
eventfs: Do not allow NULL parent to eventfs_start_creating()
eventfs: Move taking of inode_lock into dcache_dir_open_wrapper()
eventfs: Use GFP_NOFS for allocation when eventfs_mutex is held
eventfs: Do not invalidate dentry in create_file/dir_dentry()
eventfs: Remove expectation that ei->is_freed means ei->dentry == NULL

Changed files
+31 -48
fs
+1
MAINTAINERS
··· 22079 22079 TRACING 22080 22080 M: Steven Rostedt <rostedt@goodmis.org> 22081 22081 M: Masami Hiramatsu <mhiramat@kernel.org> 22082 + R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> 22082 22083 L: linux-kernel@vger.kernel.org 22083 22084 L: linux-trace-kernel@vger.kernel.org 22084 22085 S: Maintained
+26 -39
fs/tracefs/event_inode.c
··· 27 27 /* 28 28 * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access 29 29 * to the ei->dentry must be done under this mutex and after checking 30 - * if ei->is_freed is not set. The ei->dentry is released under the 31 - * mutex at the same time ei->is_freed is set. If ei->is_freed is set 32 - * then the ei->dentry is invalid. 30 + * if ei->is_freed is not set. When ei->is_freed is set, the dentry 31 + * is on its way to being freed after the last dput() is made on it. 33 32 */ 34 33 static DEFINE_MUTEX(eventfs_mutex); 35 34 36 35 /* 37 36 * The eventfs_inode (ei) itself is protected by SRCU. It is released from 38 37 * its parent's list and will have is_freed set (under eventfs_mutex). 39 - * After the SRCU grace period is over, the ei may be freed. 38 + * After the SRCU grace period is over and the last dput() is called 39 + * the ei is freed. 40 40 */ 41 41 DEFINE_STATIC_SRCU(eventfs_srcu); 42 42 ··· 95 95 if (!(dentry->d_inode->i_mode & S_IFDIR)) { 96 96 if (!ei->entry_attrs) { 97 97 ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries, 98 - GFP_KERNEL); 98 + GFP_NOFS); 99 99 if (!ei->entry_attrs) { 100 100 ret = -ENOMEM; 101 101 goto out; ··· 326 326 struct eventfs_attr *attr = NULL; 327 327 struct dentry **e_dentry = &ei->d_children[idx]; 328 328 struct dentry *dentry; 329 - bool invalidate = false; 329 + 330 + WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 330 331 331 332 mutex_lock(&eventfs_mutex); 332 333 if (ei->is_freed) { ··· 349 348 350 349 mutex_unlock(&eventfs_mutex); 351 350 352 - /* The lookup already has the parent->d_inode locked */ 353 - if (!lookup) 354 - inode_lock(parent->d_inode); 355 - 356 351 dentry = create_file(name, mode, attr, parent, data, fops); 357 - 358 - if (!lookup) 359 - inode_unlock(parent->d_inode); 360 352 361 353 mutex_lock(&eventfs_mutex); 362 354 ··· 359 365 * created the dentry for this e_dentry. In which case 360 366 * use that one. 361 367 * 362 - * Note, with the mutex held, the e_dentry cannot have content 363 - * and the ei->is_freed be true at the same time. 368 + * If ei->is_freed is set, the e_dentry is currently on its 369 + * way to being freed, don't return it. If e_dentry is NULL 370 + * it means it was already freed. 364 371 */ 365 - dentry = *e_dentry; 366 - if (WARN_ON_ONCE(dentry && ei->is_freed)) 372 + if (ei->is_freed) 367 373 dentry = NULL; 374 + else 375 + dentry = *e_dentry; 368 376 /* The lookup does not need to up the dentry refcount */ 369 377 if (dentry && !lookup) 370 378 dget(dentry); ··· 383 387 * Otherwise it means two dentries exist with the same name. 384 388 */ 385 389 WARN_ON_ONCE(!ei->is_freed); 386 - invalidate = true; 390 + dentry = NULL; 387 391 } 388 392 mutex_unlock(&eventfs_mutex); 389 393 390 - if (invalidate) 391 - d_invalidate(dentry); 392 - 393 - if (lookup || invalidate) 394 + if (lookup) 394 395 dput(dentry); 395 396 396 - return invalidate ? NULL : dentry; 397 + return dentry; 397 398 } 398 399 399 400 /** ··· 430 437 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, 431 438 struct dentry *parent, bool lookup) 432 439 { 433 - bool invalidate = false; 434 440 struct dentry *dentry = NULL; 441 + 442 + WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 435 443 436 444 mutex_lock(&eventfs_mutex); 437 445 if (pei->is_freed || ei->is_freed) { ··· 450 456 } 451 457 mutex_unlock(&eventfs_mutex); 452 458 453 - /* The lookup already has the parent->d_inode locked */ 454 - if (!lookup) 455 - inode_lock(parent->d_inode); 456 - 457 459 dentry = create_dir(ei, parent); 458 - 459 - if (!lookup) 460 - inode_unlock(parent->d_inode); 461 460 462 461 mutex_lock(&eventfs_mutex); 463 462 ··· 460 473 * created the dentry for this e_dentry. In which case 461 474 * use that one. 462 475 * 463 - * Note, with the mutex held, the e_dentry cannot have content 464 - * and the ei->is_freed be true at the same time. 476 + * If ei->is_freed is set, the e_dentry is currently on its 477 + * way to being freed. 465 478 */ 466 479 dentry = ei->dentry; 467 480 if (dentry && !lookup) ··· 480 493 * Otherwise it means two dentries exist with the same name. 481 494 */ 482 495 WARN_ON_ONCE(!ei->is_freed); 483 - invalidate = true; 496 + dentry = NULL; 484 497 } 485 498 mutex_unlock(&eventfs_mutex); 486 - if (invalidate) 487 - d_invalidate(dentry); 488 499 489 - if (lookup || invalidate) 500 + if (lookup) 490 501 dput(dentry); 491 502 492 - return invalidate ? NULL : dentry; 503 + return dentry; 493 504 } 494 505 495 506 /** ··· 617 632 { 618 633 struct dentry **tmp; 619 634 620 - tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL); 635 + tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS); 621 636 if (!tmp) 622 637 return -1; 623 638 tmp[cnt] = d; ··· 683 698 return -ENOMEM; 684 699 } 685 700 701 + inode_lock(parent->d_inode); 686 702 list_for_each_entry_srcu(ei_child, &ei->children, list, 687 703 srcu_read_lock_held(&eventfs_srcu)) { 688 704 d = create_dir_dentry(ei, ei_child, parent, false); ··· 716 730 cnt++; 717 731 } 718 732 } 733 + inode_unlock(parent->d_inode); 719 734 srcu_read_unlock(&eventfs_srcu, idx); 720 735 ret = dcache_dir_open(inode, file); 721 736
+4 -9
fs/tracefs/inode.c
··· 509 509 struct dentry *dentry; 510 510 int error; 511 511 512 + /* Must always have a parent. */ 513 + if (WARN_ON_ONCE(!parent)) 514 + return ERR_PTR(-EINVAL); 515 + 512 516 error = simple_pin_fs(&trace_fs_type, &tracefs_mount, 513 517 &tracefs_mount_count); 514 518 if (error) 515 519 return ERR_PTR(error); 516 - 517 - /* 518 - * If the parent is not specified, we create it in the root. 519 - * We need the root dentry to do this, which is in the super 520 - * block. A pointer to that is in the struct vfsmount that we 521 - * have around. 522 - */ 523 - if (!parent) 524 - parent = tracefs_mount->mnt_root; 525 520 526 521 if (unlikely(IS_DEADDIR(parent->d_inode))) 527 522 dentry = ERR_PTR(-ENOENT);