ftrace: Fix accounting of subop hashes

The function graph infrastructure uses ftrace to hook to functions. It has
a single ftrace_ops to manage all the users of function graph. Each
individual user (tracing, bpf, fprobes, etc) has its own ftrace_ops to
track the functions it will have its callback called from. These
ftrace_ops are "subops" to the main ftrace_ops of the function graph
infrastructure.

Each ftrace_ops has a filter_hash and a notrace_hash that is defined as:

Only trace functions that are in the filter_hash but not in the
notrace_hash.

If the filter_hash is empty, it means to trace all functions.
If the notrace_hash is empty, it means do not disable any function.

The function graph main ftrace_ops needs to be a superset containing all
the functions to be traced by all the subops it has. The algorithm to
perform this merge was incorrect.

When the first subops was added to the main ops, it simply made the main
ops a copy of the subops (same filter_hash and notrace_hash).

When a second ops was added, it joined the new subops filter_hash with the
main ops filter_hash as a union of the two sets. The intersect between the
new subops notrace_hash and the main ops notrace_hash was created as the
new notrace_hash of the main ops.

The issue here is that it would then start tracing functions than no
subops were tracing. For example if you had two subops that had:

subops 1:

filter_hash = '*sched*' # trace all functions with "sched" in it
notrace_hash = '*time*' # except do not trace functions with "time"

subops 2:

filter_hash = '*lock*' # trace all functions with "lock" in it
notrace_hash = '*clock*' # except do not trace functions with "clock"

The intersect of '*time*' functions with '*clock*' functions could be the
empty set. That means the main ops will be tracing all functions with
'*time*' and all "*clock*" in it!

Instead, modify the algorithm to be a bit simpler and correct.

First, when adding a new subops, even if it's the first one, do not add
the notrace_hash if the filter_hash is not empty. Instead, just add the
functions that are in the filter_hash of the subops but not in the
notrace_hash of the subops into the main ops filter_hash. There's no
reason to add anything to the main ops notrace_hash.

The notrace_hash of the main ops should only be non empty iff all subops
filter_hashes are empty (meaning to trace all functions) and all subops
notrace_hashes include the same functions.

That is, the main ops notrace_hash is empty if any subops filter_hash is
non empty.

The main ops notrace_hash only has content in it if all subops
filter_hashes are empty, and the content are only functions that intersect
all the subops notrace_hashes. If any subops notrace_hash is empty, then
so is the main ops notrace_hash.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Andy Chiu <andybnac@gmail.com>
Link: https://lore.kernel.org/20250409152720.216356767@goodmis.org
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

Changed files
+177 -137
kernel
trace
+177 -137
kernel/trace/ftrace.c
··· 3256 } 3257 3258 /* 3259 * Add to @hash only those that are in both @new_hash1 and @new_hash2 3260 * 3261 * The notrace_hash updates uses just the intersect_hash() function ··· 3318 *hash = EMPTY_HASH; 3319 } 3320 return 0; 3321 - } 3322 - 3323 - /* Return a new hash that has a union of all @ops->filter_hash entries */ 3324 - static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) 3325 - { 3326 - struct ftrace_hash *new_hash = NULL; 3327 - struct ftrace_ops *subops; 3328 - int size_bits; 3329 - int ret; 3330 - 3331 - if (ops->func_hash->filter_hash) 3332 - size_bits = ops->func_hash->filter_hash->size_bits; 3333 - else 3334 - size_bits = FTRACE_HASH_DEFAULT_BITS; 3335 - 3336 - list_for_each_entry(subops, &ops->subop_list, list) { 3337 - ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits); 3338 - if (ret < 0) { 3339 - free_ftrace_hash(new_hash); 3340 - return NULL; 3341 - } 3342 - /* Nothing more to do if new_hash is empty */ 3343 - if (ftrace_hash_empty(new_hash)) 3344 - break; 3345 - } 3346 - /* Can't return NULL as that means this failed */ 3347 - return new_hash ? : EMPTY_HASH; 3348 - } 3349 - 3350 - /* Make @ops trace evenything except what all its subops do not trace */ 3351 - static struct ftrace_hash *intersect_hashes(struct ftrace_ops *ops) 3352 - { 3353 - struct ftrace_hash *new_hash = NULL; 3354 - struct ftrace_ops *subops; 3355 - int size_bits; 3356 - int ret; 3357 - 3358 - list_for_each_entry(subops, &ops->subop_list, list) { 3359 - struct ftrace_hash *next_hash; 3360 - 3361 - if (!new_hash) { 3362 - size_bits = subops->func_hash->notrace_hash->size_bits; 3363 - new_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->notrace_hash); 3364 - if (!new_hash) 3365 - return NULL; 3366 - continue; 3367 - } 3368 - size_bits = new_hash->size_bits; 3369 - next_hash = new_hash; 3370 - new_hash = alloc_ftrace_hash(size_bits); 3371 - ret = intersect_hash(&new_hash, next_hash, subops->func_hash->notrace_hash); 3372 - free_ftrace_hash(next_hash); 3373 - if (ret < 0) { 3374 - free_ftrace_hash(new_hash); 3375 - return NULL; 3376 - } 3377 - /* Nothing more to do if new_hash is empty */ 3378 - if (ftrace_hash_empty(new_hash)) 3379 - break; 3380 - } 3381 - return new_hash; 3382 } 3383 3384 static bool ops_equal(struct ftrace_hash *A, struct ftrace_hash *B) ··· 3391 return 0; 3392 } 3393 3394 /** 3395 * ftrace_startup_subops - enable tracing for subops of an ops 3396 * @ops: Manager ops (used to pick all the functions of its subops) ··· 3494 struct ftrace_hash *notrace_hash; 3495 struct ftrace_hash *save_filter_hash; 3496 struct ftrace_hash *save_notrace_hash; 3497 - int size_bits; 3498 int ret; 3499 3500 if (unlikely(ftrace_disabled)) ··· 3517 3518 /* For the first subops to ops just enable it normally */ 3519 if (list_empty(&ops->subop_list)) { 3520 - /* Just use the subops hashes */ 3521 - filter_hash = copy_hash(subops->func_hash->filter_hash); 3522 - notrace_hash = copy_hash(subops->func_hash->notrace_hash); 3523 - if (!filter_hash || !notrace_hash) { 3524 - free_ftrace_hash(filter_hash); 3525 - free_ftrace_hash(notrace_hash); 3526 - return -ENOMEM; 3527 - } 3528 3529 save_filter_hash = ops->func_hash->filter_hash; 3530 save_notrace_hash = ops->func_hash->notrace_hash; ··· 3550 3551 /* 3552 * Here there's already something attached. Here are the rules: 3553 - * o If either filter_hash is empty then the final stays empty 3554 - * o Otherwise, the final is a superset of both hashes 3555 - * o If either notrace_hash is empty then the final stays empty 3556 - * o Otherwise, the final is an intersection between the hashes 3557 */ 3558 - if (ftrace_hash_empty(ops->func_hash->filter_hash) || 3559 - ftrace_hash_empty(subops->func_hash->filter_hash)) { 3560 - filter_hash = EMPTY_HASH; 3561 - } else { 3562 - size_bits = max(ops->func_hash->filter_hash->size_bits, 3563 - subops->func_hash->filter_hash->size_bits); 3564 - filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash); 3565 - if (!filter_hash) 3566 - return -ENOMEM; 3567 - ret = append_hash(&filter_hash, subops->func_hash->filter_hash, 3568 - size_bits); 3569 - if (ret < 0) { 3570 - free_ftrace_hash(filter_hash); 3571 - return ret; 3572 - } 3573 - } 3574 3575 - if (ftrace_hash_empty(ops->func_hash->notrace_hash) || 3576 - ftrace_hash_empty(subops->func_hash->notrace_hash)) { 3577 - notrace_hash = EMPTY_HASH; 3578 - } else { 3579 - size_bits = max(ops->func_hash->notrace_hash->size_bits, 3580 - subops->func_hash->notrace_hash->size_bits); 3581 - notrace_hash = alloc_ftrace_hash(size_bits); 3582 - if (!notrace_hash) { 3583 - free_ftrace_hash(filter_hash); 3584 - return -ENOMEM; 3585 - } 3586 - 3587 - ret = intersect_hash(&notrace_hash, ops->func_hash->notrace_hash, 3588 - subops->func_hash->notrace_hash); 3589 - if (ret < 0) { 3590 - free_ftrace_hash(filter_hash); 3591 - free_ftrace_hash(notrace_hash); 3592 - return ret; 3593 - } 3594 - } 3595 3596 list_add(&subops->list, &ops->subop_list); 3597 ··· 3573 subops->managed = ops; 3574 } 3575 return ret; 3576 } 3577 3578 /** ··· 3659 } 3660 3661 /* Rebuild the hashes without subops */ 3662 - filter_hash = append_hashes(ops); 3663 - notrace_hash = intersect_hashes(ops); 3664 - if (!filter_hash || !notrace_hash) { 3665 - free_ftrace_hash(filter_hash); 3666 - free_ftrace_hash(notrace_hash); 3667 - list_add(&subops->list, &ops->subop_list); 3668 - return -ENOMEM; 3669 - } 3670 3671 ret = ftrace_update_ops(ops, filter_hash, notrace_hash); 3672 if (ret < 0) { ··· 3677 3678 static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, 3679 struct ftrace_hash **orig_subhash, 3680 - struct ftrace_hash *hash, 3681 - int enable) 3682 { 3683 struct ftrace_ops *ops = subops->managed; 3684 - struct ftrace_hash **orig_hash; 3685 struct ftrace_hash *save_hash; 3686 struct ftrace_hash *new_hash; 3687 int ret; ··· 3698 return -ENOMEM; 3699 } 3700 3701 - /* Create a new_hash to hold the ops new functions */ 3702 - if (enable) { 3703 - orig_hash = &ops->func_hash->filter_hash; 3704 - new_hash = append_hashes(ops); 3705 - } else { 3706 - orig_hash = &ops->func_hash->notrace_hash; 3707 - new_hash = intersect_hashes(ops); 3708 - } 3709 - 3710 - /* Move the hash over to the new hash */ 3711 - ret = __ftrace_hash_move_and_update_ops(ops, orig_hash, new_hash, enable); 3712 - 3713 - free_ftrace_hash(new_hash); 3714 3715 if (ret) { 3716 /* Put back the original hash */ 3717 - free_ftrace_hash_rcu(*orig_subhash); 3718 *orig_subhash = save_hash; 3719 } else { 3720 free_ftrace_hash_rcu(save_hash); 3721 } ··· 4930 int enable) 4931 { 4932 if (ops->flags & FTRACE_OPS_FL_SUBOP) 4933 - return ftrace_hash_move_and_update_subops(ops, orig_hash, hash, enable); 4934 4935 /* 4936 * If this ops is not enabled, it could be sharing its filters ··· 4949 list_for_each_entry(subops, &op->subop_list, list) { 4950 if ((subops->flags & FTRACE_OPS_FL_ENABLED) && 4951 subops->func_hash == ops->func_hash) { 4952 - return ftrace_hash_move_and_update_subops(subops, orig_hash, hash, enable); 4953 } 4954 } 4955 } while_for_each_ftrace_op(op);
··· 3256 } 3257 3258 /* 3259 + * Remove functions from @hash that are in @notrace_hash 3260 + */ 3261 + static void remove_hash(struct ftrace_hash *hash, struct ftrace_hash *notrace_hash) 3262 + { 3263 + struct ftrace_func_entry *entry; 3264 + struct hlist_node *tmp; 3265 + int size; 3266 + int i; 3267 + 3268 + /* If the notrace hash is empty, there's nothing to do */ 3269 + if (ftrace_hash_empty(notrace_hash)) 3270 + return; 3271 + 3272 + size = 1 << hash->size_bits; 3273 + for (i = 0; i < size; i++) { 3274 + hlist_for_each_entry_safe(entry, tmp, &hash->buckets[i], hlist) { 3275 + if (!__ftrace_lookup_ip(notrace_hash, entry->ip)) 3276 + continue; 3277 + remove_hash_entry(hash, entry); 3278 + kfree(entry); 3279 + } 3280 + } 3281 + } 3282 + 3283 + /* 3284 * Add to @hash only those that are in both @new_hash1 and @new_hash2 3285 * 3286 * The notrace_hash updates uses just the intersect_hash() function ··· 3293 *hash = EMPTY_HASH; 3294 } 3295 return 0; 3296 } 3297 3298 static bool ops_equal(struct ftrace_hash *A, struct ftrace_hash *B) ··· 3427 return 0; 3428 } 3429 3430 + static int add_first_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, 3431 + struct ftrace_ops_hash *func_hash) 3432 + { 3433 + /* If the filter hash is not empty, simply remove the nohash from it */ 3434 + if (!ftrace_hash_empty(func_hash->filter_hash)) { 3435 + *filter_hash = copy_hash(func_hash->filter_hash); 3436 + if (!*filter_hash) 3437 + return -ENOMEM; 3438 + remove_hash(*filter_hash, func_hash->notrace_hash); 3439 + *notrace_hash = EMPTY_HASH; 3440 + 3441 + } else { 3442 + *notrace_hash = copy_hash(func_hash->notrace_hash); 3443 + if (!*notrace_hash) 3444 + return -ENOMEM; 3445 + *filter_hash = EMPTY_HASH; 3446 + } 3447 + return 0; 3448 + } 3449 + 3450 + static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, 3451 + struct ftrace_ops_hash *ops_hash, struct ftrace_ops_hash *subops_hash) 3452 + { 3453 + int size_bits; 3454 + int ret; 3455 + 3456 + /* If the subops trace all functions so must the main ops */ 3457 + if (ftrace_hash_empty(ops_hash->filter_hash) || 3458 + ftrace_hash_empty(subops_hash->filter_hash)) { 3459 + *filter_hash = EMPTY_HASH; 3460 + } else { 3461 + /* 3462 + * The main ops filter hash is not empty, so its 3463 + * notrace_hash had better be, as the notrace hash 3464 + * is only used for empty main filter hashes. 3465 + */ 3466 + WARN_ON_ONCE(!ftrace_hash_empty(ops_hash->notrace_hash)); 3467 + 3468 + size_bits = max(ops_hash->filter_hash->size_bits, 3469 + subops_hash->filter_hash->size_bits); 3470 + 3471 + /* Copy the subops hash */ 3472 + *filter_hash = alloc_and_copy_ftrace_hash(size_bits, subops_hash->filter_hash); 3473 + if (!filter_hash) 3474 + return -ENOMEM; 3475 + /* Remove any notrace functions from the copy */ 3476 + remove_hash(*filter_hash, subops_hash->notrace_hash); 3477 + 3478 + ret = append_hash(filter_hash, ops_hash->filter_hash, 3479 + size_bits); 3480 + if (ret < 0) { 3481 + free_ftrace_hash(*filter_hash); 3482 + return ret; 3483 + } 3484 + } 3485 + 3486 + /* 3487 + * Only process notrace hashes if the main filter hash is empty 3488 + * (tracing all functions), otherwise the filter hash will just 3489 + * remove the notrace hash functions, and the notrace hash is 3490 + * not needed. 3491 + */ 3492 + if (ftrace_hash_empty(*filter_hash)) { 3493 + /* 3494 + * Intersect the notrace functions. That is, if two 3495 + * subops are not tracing a set of functions, the 3496 + * main ops will only not trace the functions that are 3497 + * in both subops, but has to trace the functions that 3498 + * are only notrace in one of the subops, for the other 3499 + * subops to be able to trace them. 3500 + */ 3501 + size_bits = max(ops_hash->notrace_hash->size_bits, 3502 + subops_hash->notrace_hash->size_bits); 3503 + *notrace_hash = alloc_ftrace_hash(size_bits); 3504 + if (!*notrace_hash) 3505 + return -ENOMEM; 3506 + 3507 + ret = intersect_hash(notrace_hash, ops_hash->notrace_hash, 3508 + subops_hash->notrace_hash); 3509 + if (ret < 0) { 3510 + free_ftrace_hash(*notrace_hash); 3511 + return ret; 3512 + } 3513 + } 3514 + return 0; 3515 + } 3516 + 3517 /** 3518 * ftrace_startup_subops - enable tracing for subops of an ops 3519 * @ops: Manager ops (used to pick all the functions of its subops) ··· 3443 struct ftrace_hash *notrace_hash; 3444 struct ftrace_hash *save_filter_hash; 3445 struct ftrace_hash *save_notrace_hash; 3446 int ret; 3447 3448 if (unlikely(ftrace_disabled)) ··· 3467 3468 /* For the first subops to ops just enable it normally */ 3469 if (list_empty(&ops->subop_list)) { 3470 + 3471 + /* The ops was empty, should have empty hashes */ 3472 + WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->filter_hash)); 3473 + WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->notrace_hash)); 3474 + 3475 + ret = add_first_hash(&filter_hash, &notrace_hash, subops->func_hash); 3476 + if (ret < 0) 3477 + return ret; 3478 3479 save_filter_hash = ops->func_hash->filter_hash; 3480 save_notrace_hash = ops->func_hash->notrace_hash; ··· 3500 3501 /* 3502 * Here there's already something attached. Here are the rules: 3503 + * If the new subops and main ops filter hashes are not empty: 3504 + * o Make a copy of the subops filter hash 3505 + * o Remove all functions in the nohash from it. 3506 + * o Add in the main hash filter functions 3507 + * o Remove any of these functions from the main notrace hash 3508 */ 3509 3510 + ret = add_next_hash(&filter_hash, &notrace_hash, ops->func_hash, subops->func_hash); 3511 + if (ret < 0) 3512 + return ret; 3513 3514 list_add(&subops->list, &ops->subop_list); 3515 ··· 3555 subops->managed = ops; 3556 } 3557 return ret; 3558 + } 3559 + 3560 + static int rebuild_hashes(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, 3561 + struct ftrace_ops *ops) 3562 + { 3563 + struct ftrace_ops_hash temp_hash; 3564 + struct ftrace_ops *subops; 3565 + bool first = true; 3566 + int ret; 3567 + 3568 + temp_hash.filter_hash = EMPTY_HASH; 3569 + temp_hash.notrace_hash = EMPTY_HASH; 3570 + 3571 + list_for_each_entry(subops, &ops->subop_list, list) { 3572 + *filter_hash = EMPTY_HASH; 3573 + *notrace_hash = EMPTY_HASH; 3574 + 3575 + if (first) { 3576 + ret = add_first_hash(filter_hash, notrace_hash, subops->func_hash); 3577 + if (ret < 0) 3578 + return ret; 3579 + first = false; 3580 + } else { 3581 + ret = add_next_hash(filter_hash, notrace_hash, 3582 + &temp_hash, subops->func_hash); 3583 + if (ret < 0) { 3584 + free_ftrace_hash(temp_hash.filter_hash); 3585 + free_ftrace_hash(temp_hash.notrace_hash); 3586 + return ret; 3587 + } 3588 + } 3589 + 3590 + temp_hash.filter_hash = *filter_hash; 3591 + temp_hash.notrace_hash = *notrace_hash; 3592 + } 3593 + return 0; 3594 } 3595 3596 /** ··· 3605 } 3606 3607 /* Rebuild the hashes without subops */ 3608 + ret = rebuild_hashes(&filter_hash, &notrace_hash, ops); 3609 + if (ret < 0) 3610 + return ret; 3611 3612 ret = ftrace_update_ops(ops, filter_hash, notrace_hash); 3613 if (ret < 0) { ··· 3628 3629 static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, 3630 struct ftrace_hash **orig_subhash, 3631 + struct ftrace_hash *hash) 3632 { 3633 struct ftrace_ops *ops = subops->managed; 3634 + struct ftrace_hash *notrace_hash; 3635 + struct ftrace_hash *filter_hash; 3636 struct ftrace_hash *save_hash; 3637 struct ftrace_hash *new_hash; 3638 int ret; ··· 3649 return -ENOMEM; 3650 } 3651 3652 + ret = rebuild_hashes(&filter_hash, &notrace_hash, ops); 3653 + if (!ret) 3654 + ret = ftrace_update_ops(ops, filter_hash, notrace_hash); 3655 3656 if (ret) { 3657 /* Put back the original hash */ 3658 + new_hash = *orig_subhash; 3659 *orig_subhash = save_hash; 3660 + free_ftrace_hash_rcu(new_hash); 3661 } else { 3662 free_ftrace_hash_rcu(save_hash); 3663 } ··· 4890 int enable) 4891 { 4892 if (ops->flags & FTRACE_OPS_FL_SUBOP) 4893 + return ftrace_hash_move_and_update_subops(ops, orig_hash, hash); 4894 4895 /* 4896 * If this ops is not enabled, it could be sharing its filters ··· 4909 list_for_each_entry(subops, &op->subop_list, list) { 4910 if ((subops->flags & FTRACE_OPS_FL_ENABLED) && 4911 subops->func_hash == ops->func_hash) { 4912 + return ftrace_hash_move_and_update_subops(subops, orig_hash, hash); 4913 } 4914 } 4915 } while_for_each_ftrace_op(op);