Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ftrace: Fix unregister ftrace_ops accounting

Multiple users of the function tracer can register their functions
with the ftrace_ops structure. The accounting within ftrace will
update the counter on each function record that is being traced.
When the ftrace_ops filtering adds or removes functions, the
function records will be updated accordingly if the ftrace_ops is
still registered.

When a ftrace_ops is removed, the counter of the function records,
that the ftrace_ops traces, are decremented. When they reach zero
the functions that they represent are modified to stop calling the
mcount code.

When changes are made, the code is updated via stop_machine() with
a command passed to the function to tell it what to do. There is an
ENABLE and DISABLE command that tells the called function to enable
or disable the functions. But the ENABLE is really a misnomer as it
should just update the records, as records that have been enabled
and now have a count of zero should be disabled.

The DISABLE command is used to disable all functions regardless of
their counter values. This is the big off switch and is not the
complement of the ENABLE command.

To make matters worse, when a ftrace_ops is unregistered and there
is another ftrace_ops registered, neither the DISABLE nor the
ENABLE command are set when calling into the stop_machine() function
and the records will not be updated to match their counter. A command
is passed to that function that will update the mcount code to call
the registered callback directly if it is the only one left. This
means that the ftrace_ops that is still registered will have its callback
called by all functions that have been set for it as well as the ftrace_ops
that was just unregistered.

Here's a way to trigger this bug. Compile the kernel with
CONFIG_FUNCTION_PROFILER set and with CONFIG_FUNCTION_GRAPH not set:

CONFIG_FUNCTION_PROFILER=y
# CONFIG_FUNCTION_GRAPH is not set

This will force the function profiler to use the function tracer instead
of the function graph tracer.

# cd /sys/kernel/debug/tracing
# echo schedule > set_ftrace_filter
# echo function > current_tracer
# cat set_ftrace_filter
schedule
# cat trace
# tracer: nop
#
# entries-in-buffer/entries-written: 692/68108025 #P:4
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
kworker/0:2-909 [000] .... 531.235574: schedule <-worker_thread
<idle>-0 [001] .N.. 531.235575: schedule <-cpu_idle
kworker/0:2-909 [000] .... 531.235597: schedule <-worker_thread
sshd-2563 [001] .... 531.235647: schedule <-schedule_hrtimeout_range_clock

# echo 1 > function_profile_enabled
# echo 0 > function_porfile_enabled
# cat set_ftrace_filter
schedule
# cat trace
# tracer: function
#
# entries-in-buffer/entries-written: 159701/118821262 #P:4
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
<idle>-0 [002] ...1 604.870655: local_touch_nmi <-cpu_idle
<idle>-0 [002] d..1 604.870655: enter_idle <-cpu_idle
<idle>-0 [002] d..1 604.870656: atomic_notifier_call_chain <-enter_idle
<idle>-0 [002] d..1 604.870656: __atomic_notifier_call_chain <-atomic_notifier_call_chain

The same problem could have happened with the trace_probe_ops,
but they are modified with the set_frace_filter file which does the
update at closure of the file.

The simple solution is to change ENABLE to UPDATE and call it every
time an ftrace_ops is unregistered.

Link: http://lkml.kernel.org/r/1323105776-26961-3-git-send-email-jolsa@redhat.com

Cc: stable@vger.kernel.org # 3.0+
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

authored by

Jiri Olsa and committed by
Steven Rostedt
30fb6aa7 74eec26f

+13 -14
+13 -14
kernel/trace/ftrace.c
··· 948 948 }; 949 949 950 950 enum { 951 - FTRACE_ENABLE_CALLS = (1 << 0), 951 + FTRACE_UPDATE_CALLS = (1 << 0), 952 952 FTRACE_DISABLE_CALLS = (1 << 1), 953 953 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 954 954 FTRACE_START_FUNC_RET = (1 << 3), ··· 1519 1519 1520 1520 1521 1521 static int 1522 - __ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1522 + __ftrace_replace_code(struct dyn_ftrace *rec, int update) 1523 1523 { 1524 1524 unsigned long ftrace_addr; 1525 1525 unsigned long flag = 0UL; ··· 1527 1527 ftrace_addr = (unsigned long)FTRACE_ADDR; 1528 1528 1529 1529 /* 1530 - * If we are enabling tracing: 1530 + * If we are updating calls: 1531 1531 * 1532 1532 * If the record has a ref count, then we need to enable it 1533 1533 * because someone is using it. 1534 1534 * 1535 1535 * Otherwise we make sure its disabled. 1536 1536 * 1537 - * If we are disabling tracing, then disable all records that 1537 + * If we are disabling calls, then disable all records that 1538 1538 * are enabled. 1539 1539 */ 1540 - if (enable && (rec->flags & ~FTRACE_FL_MASK)) 1540 + if (update && (rec->flags & ~FTRACE_FL_MASK)) 1541 1541 flag = FTRACE_FL_ENABLED; 1542 1542 1543 1543 /* If the state of this record hasn't changed, then do nothing */ ··· 1553 1553 return ftrace_make_nop(NULL, rec, ftrace_addr); 1554 1554 } 1555 1555 1556 - static void ftrace_replace_code(int enable) 1556 + static void ftrace_replace_code(int update) 1557 1557 { 1558 1558 struct dyn_ftrace *rec; 1559 1559 struct ftrace_page *pg; ··· 1567 1567 if (rec->flags & FTRACE_FL_FREE) 1568 1568 continue; 1569 1569 1570 - failed = __ftrace_replace_code(rec, enable); 1570 + failed = __ftrace_replace_code(rec, update); 1571 1571 if (failed) { 1572 1572 ftrace_bug(failed, rec->ip); 1573 1573 /* Stop processing */ ··· 1623 1623 */ 1624 1624 function_trace_stop++; 1625 1625 1626 - if (*command & FTRACE_ENABLE_CALLS) 1626 + if (*command & FTRACE_UPDATE_CALLS) 1627 1627 ftrace_replace_code(1); 1628 1628 else if (*command & FTRACE_DISABLE_CALLS) 1629 1629 ftrace_replace_code(0); ··· 1691 1691 return -ENODEV; 1692 1692 1693 1693 ftrace_start_up++; 1694 - command |= FTRACE_ENABLE_CALLS; 1694 + command |= FTRACE_UPDATE_CALLS; 1695 1695 1696 1696 /* ops marked global share the filter hashes */ 1697 1697 if (ops->flags & FTRACE_OPS_FL_GLOBAL) { ··· 1743 1743 if (ops != &global_ops || !global_start_up) 1744 1744 ops->flags &= ~FTRACE_OPS_FL_ENABLED; 1745 1745 1746 - if (!ftrace_start_up) 1747 - command |= FTRACE_DISABLE_CALLS; 1746 + command |= FTRACE_UPDATE_CALLS; 1748 1747 1749 1748 if (saved_ftrace_func != ftrace_trace_function) { 1750 1749 saved_ftrace_func = ftrace_trace_function; ··· 1765 1766 saved_ftrace_func = NULL; 1766 1767 /* ftrace_start_up is true if we want ftrace running */ 1767 1768 if (ftrace_start_up) 1768 - ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1769 + ftrace_run_update_code(FTRACE_UPDATE_CALLS); 1769 1770 } 1770 1771 1771 1772 static void ftrace_shutdown_sysctl(void) ··· 2918 2919 ret = ftrace_hash_move(ops, enable, orig_hash, hash); 2919 2920 if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED 2920 2921 && ftrace_enabled) 2921 - ftrace_run_update_code(FTRACE_ENABLE_CALLS); 2922 + ftrace_run_update_code(FTRACE_UPDATE_CALLS); 2922 2923 2923 2924 mutex_unlock(&ftrace_lock); 2924 2925 ··· 3106 3107 orig_hash, iter->hash); 3107 3108 if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) 3108 3109 && ftrace_enabled) 3109 - ftrace_run_update_code(FTRACE_ENABLE_CALLS); 3110 + ftrace_run_update_code(FTRACE_UPDATE_CALLS); 3110 3111 3111 3112 mutex_unlock(&ftrace_lock); 3112 3113 }