Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'probes-fixes-v6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull probes fixes from Masami Hiramatsu:

- fprobe: remove fprobe_hlist_node when module unloading

When a fprobe target module is removed, the fprobe_hlist_node should
be removed from the fprobe's hash table to prevent reusing
accidentally if another module is loaded at the same address.

- fprobe: lock module while registering fprobe

The module containing the function to be probeed is locked using a
reference counter until the fprobe registration is complete, which
prevents use after free.

- fprobe-events: fix possible UAF on modules

Basically as same as above, but in the fprobe-events layer we also
need to get module reference counter when we find the tracepoint in
the module.

* tag 'probes-fixes-v6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
tracing: fprobe: Cleanup fprobe hash when module unloading
tracing: fprobe events: Fix possible UAF on modules
tracing: fprobe: Fix to lock module while registering fprobe

+166 -30
+149 -21
kernel/trace/fprobe.c
··· 89 89 { 90 90 lockdep_assert_held(&fprobe_mutex); 91 91 92 - WRITE_ONCE(node->fp, NULL); 93 - hlist_del_rcu(&node->hlist); 92 + /* Avoid double deleting */ 93 + if (READ_ONCE(node->fp) != NULL) { 94 + WRITE_ONCE(node->fp, NULL); 95 + hlist_del_rcu(&node->hlist); 96 + } 94 97 return !!find_first_fprobe_node(node->addr); 95 98 } 96 99 ··· 414 411 ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); 415 412 } 416 413 414 + #ifdef CONFIG_MODULES 415 + 416 + #define FPROBE_IPS_BATCH_INIT 8 417 + /* instruction pointer address list */ 418 + struct fprobe_addr_list { 419 + int index; 420 + int size; 421 + unsigned long *addrs; 422 + }; 423 + 424 + static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long addr) 425 + { 426 + unsigned long *addrs; 427 + 428 + if (alist->index >= alist->size) 429 + return -ENOMEM; 430 + 431 + alist->addrs[alist->index++] = addr; 432 + if (alist->index < alist->size) 433 + return 0; 434 + 435 + /* Expand the address list */ 436 + addrs = kcalloc(alist->size * 2, sizeof(*addrs), GFP_KERNEL); 437 + if (!addrs) 438 + return -ENOMEM; 439 + 440 + memcpy(addrs, alist->addrs, alist->size * sizeof(*addrs)); 441 + alist->size *= 2; 442 + kfree(alist->addrs); 443 + alist->addrs = addrs; 444 + 445 + return 0; 446 + } 447 + 448 + static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *head, 449 + struct fprobe_addr_list *alist) 450 + { 451 + struct fprobe_hlist_node *node; 452 + int ret = 0; 453 + 454 + hlist_for_each_entry_rcu(node, head, hlist) { 455 + if (!within_module(node->addr, mod)) 456 + continue; 457 + if (delete_fprobe_node(node)) 458 + continue; 459 + /* 460 + * If failed to update alist, just continue to update hlist. 461 + * Therefore, at list user handler will not hit anymore. 462 + */ 463 + if (!ret) 464 + ret = fprobe_addr_list_add(alist, node->addr); 465 + } 466 + } 467 + 468 + /* Handle module unloading to manage fprobe_ip_table. */ 469 + static int fprobe_module_callback(struct notifier_block *nb, 470 + unsigned long val, void *data) 471 + { 472 + struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT}; 473 + struct module *mod = data; 474 + int i; 475 + 476 + if (val != MODULE_STATE_GOING) 477 + return NOTIFY_DONE; 478 + 479 + alist.addrs = kcalloc(alist.size, sizeof(*alist.addrs), GFP_KERNEL); 480 + /* If failed to alloc memory, we can not remove ips from hash. */ 481 + if (!alist.addrs) 482 + return NOTIFY_DONE; 483 + 484 + mutex_lock(&fprobe_mutex); 485 + for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++) 486 + fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist); 487 + 488 + if (alist.index < alist.size && alist.index > 0) 489 + ftrace_set_filter_ips(&fprobe_graph_ops.ops, 490 + alist.addrs, alist.index, 1, 0); 491 + mutex_unlock(&fprobe_mutex); 492 + 493 + kfree(alist.addrs); 494 + 495 + return NOTIFY_DONE; 496 + } 497 + 498 + static struct notifier_block fprobe_module_nb = { 499 + .notifier_call = fprobe_module_callback, 500 + .priority = 0, 501 + }; 502 + 503 + static int __init init_fprobe_module(void) 504 + { 505 + return register_module_notifier(&fprobe_module_nb); 506 + } 507 + early_initcall(init_fprobe_module); 508 + #endif 509 + 417 510 static int symbols_cmp(const void *a, const void *b) 418 511 { 419 512 const char **str_a = (const char **) a; ··· 544 445 size_t index; 545 446 size_t size; 546 447 unsigned long *addrs; 448 + struct module **mods; 547 449 }; 548 450 549 451 static int filter_match_callback(void *data, const char *name, unsigned long addr) ··· 558 458 if (!ftrace_location(addr)) 559 459 return 0; 560 460 561 - if (match->addrs) 562 - match->addrs[match->index] = addr; 461 + if (match->addrs) { 462 + struct module *mod = __module_text_address(addr); 563 463 464 + if (mod && !try_module_get(mod)) 465 + return 0; 466 + 467 + match->mods[match->index] = mod; 468 + match->addrs[match->index] = addr; 469 + } 564 470 match->index++; 565 471 return match->index == match->size; 566 472 } 567 473 568 474 /* 569 475 * Make IP list from the filter/no-filter glob patterns. 570 - * Return the number of matched symbols, or -ENOENT. 476 + * Return the number of matched symbols, or errno. 477 + * If @addrs == NULL, this just counts the number of matched symbols. If @addrs 478 + * is passed with an array, we need to pass the an @mods array of the same size 479 + * to increment the module refcount for each symbol. 480 + * This means we also need to call `module_put` for each element of @mods after 481 + * using the @addrs. 571 482 */ 572 - static int ip_list_from_filter(const char *filter, const char *notfilter, 573 - unsigned long *addrs, size_t size) 483 + static int get_ips_from_filter(const char *filter, const char *notfilter, 484 + unsigned long *addrs, struct module **mods, 485 + size_t size) 574 486 { 575 487 struct filter_match_data match = { .filter = filter, .notfilter = notfilter, 576 - .index = 0, .size = size, .addrs = addrs}; 488 + .index = 0, .size = size, .addrs = addrs, .mods = mods}; 577 489 int ret; 490 + 491 + if (addrs && !mods) 492 + return -EINVAL; 578 493 579 494 ret = kallsyms_on_each_symbol(filter_match_callback, &match); 580 495 if (ret < 0) 581 496 return ret; 582 - ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match); 583 - if (ret < 0) 584 - return ret; 497 + if (IS_ENABLED(CONFIG_MODULES)) { 498 + ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match); 499 + if (ret < 0) 500 + return ret; 501 + } 585 502 586 503 return match.index ?: -ENOENT; 587 504 } ··· 660 543 */ 661 544 int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter) 662 545 { 663 - unsigned long *addrs; 664 - int ret; 546 + unsigned long *addrs __free(kfree) = NULL; 547 + struct module **mods __free(kfree) = NULL; 548 + int ret, num; 665 549 666 550 if (!fp || !filter) 667 551 return -EINVAL; 668 552 669 - ret = ip_list_from_filter(filter, notfilter, NULL, FPROBE_IPS_MAX); 553 + num = get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX); 554 + if (num < 0) 555 + return num; 556 + 557 + addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL); 558 + if (!addrs) 559 + return -ENOMEM; 560 + 561 + mods = kcalloc(num, sizeof(*mods), GFP_KERNEL); 562 + if (!mods) 563 + return -ENOMEM; 564 + 565 + ret = get_ips_from_filter(filter, notfilter, addrs, mods, num); 670 566 if (ret < 0) 671 567 return ret; 672 568 673 - addrs = kcalloc(ret, sizeof(unsigned long), GFP_KERNEL); 674 - if (!addrs) 675 - return -ENOMEM; 676 - ret = ip_list_from_filter(filter, notfilter, addrs, ret); 677 - if (ret > 0) 678 - ret = register_fprobe_ips(fp, addrs, ret); 569 + ret = register_fprobe_ips(fp, addrs, ret); 679 570 680 - kfree(addrs); 571 + for (int i = 0; i < num; i++) { 572 + if (mods[i]) 573 + module_put(mods[i]); 574 + } 681 575 return ret; 682 576 } 683 577 EXPORT_SYMBOL_GPL(register_fprobe);
+17 -9
kernel/trace/trace_fprobe.c
··· 919 919 struct __find_tracepoint_cb_data *data = priv; 920 920 921 921 if (!data->tpoint && !strcmp(data->tp_name, tp->name)) { 922 - data->tpoint = tp; 923 - if (!data->mod) 922 + /* If module is not specified, try getting module refcount. */ 923 + if (!data->mod && mod) { 924 + /* If failed to get refcount, ignore this tracepoint. */ 925 + if (!try_module_get(mod)) 926 + return; 927 + 924 928 data->mod = mod; 929 + } 930 + data->tpoint = tp; 925 931 } 926 932 } 927 933 ··· 939 933 data->tpoint = tp; 940 934 } 941 935 942 - /* Find a tracepoint from kernel and module. */ 936 + /* 937 + * Find a tracepoint from kernel and module. If the tracepoint is on the module, 938 + * the module's refcount is incremented and returned as *@tp_mod. Thus, if it is 939 + * not NULL, caller must call module_put(*tp_mod) after used the tracepoint. 940 + */ 943 941 static struct tracepoint *find_tracepoint(const char *tp_name, 944 942 struct module **tp_mod) 945 943 { ··· 972 962 } 973 963 } 974 964 975 - /* Find a tracepoint from specified module. */ 965 + /* 966 + * Find a tracepoint from specified module. In this case, this does not get the 967 + * module's refcount. The caller must ensure the module is not freed. 968 + */ 976 969 static struct tracepoint *find_tracepoint_in_module(struct module *mod, 977 970 const char *tp_name) 978 971 { ··· 1182 1169 if (is_tracepoint) { 1183 1170 ctx->flags |= TPARG_FL_TPOINT; 1184 1171 tpoint = find_tracepoint(symbol, &tp_mod); 1185 - /* lock module until register this tprobe. */ 1186 - if (tp_mod && !try_module_get(tp_mod)) { 1187 - tpoint = NULL; 1188 - tp_mod = NULL; 1189 - } 1190 1172 if (tpoint) { 1191 1173 ctx->funcname = kallsyms_lookup( 1192 1174 (unsigned long)tpoint->probestub,