perf machine: Protect the machine->threads with a rwlock

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

In addition to using refcounts for the struct thread lifetime
management, we need to protect access to machine->threads from
concurrent access.

That happens in 'perf top', where a thread processes events, inserting
and deleting entries from that rb_tree while another thread decays
hist_entries, that end up dropping references and ultimately deleting
threads from the rb_tree and releasing its resources when no further
hist_entry (or other data structures, like in 'perf sched') references
it.

So the rule is the same for refcounts + protected trees in the kernel,
get the tree lock, find object, bump the refcount, drop the tree lock,
return, use object, drop the refcount if no more use of it is needed,
keep it if storing it in some other data structure, drop when releasing
that data structure.

I.e. pair "t = machine__find(new)_thread()" with a "thread__put(t)", and
"perf_event__preprocess_sample(&al)" with "addr_location__put(&al)".

The addr_location__put() one is because as we return references to
several data structures, we may end up adding more reference counting
for the other data structures and then we'll drop it at
addr_location__put() time.

Acked-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-bs9rt4n0jw3hi9f3zxyy3xln@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Arnaldo Carvalho de Melo 11 years ago b91fc39f e1ed3a5b

+284 -114

29 changed files

expand all

tools

perf

builtin-annotate.c

builtin-diff.c

builtin-inject.c

builtin-kmem.c

builtin-kvm.c

builtin-lock.c

builtin-mem.c

builtin-report.c

builtin-sched.c

builtin-script.c

builtin-timechart.c

builtin-top.c

builtin-trace.c

tests

code-reading.c

dwarf-unwind.c

hists_common.c

hists_cumulate.c

hists_filter.c

hists_link.c

hists_output.c

mmap-thread-lookup.c

thread-mg-share.c

util

build-id.c

db-export.c

event.c

event.h

machine.c

machine.h

thread.c

+6 -4

tools/perf/builtin-annotate.c

··· 84 84 { 85 85 struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool); 86 86 struct addr_location al; 87 + int ret = 0; 87 88 88 89 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { 89 90 pr_warning("problem processing %d event, skipping it.\n", ··· 93 92 } 94 93 95 94 if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap)) 96 - return 0; 95 + goto out_put; 97 96 98 97 if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) { 99 98 pr_warning("problem incrementing symbol count, " 100 99 "skipping event\n"); 101 - return -1; 100 + ret = -1; 102 101 } 103 - 104 - return 0; 102 + out_put: 103 + addr_location__put(&al); 104 + return ret; 105 105 } 106 106 107 107 static int hist_entry__tty_annotate(struct hist_entry *he,

+6 -3

tools/perf/builtin-diff.c

··· 328 328 { 329 329 struct addr_location al; 330 330 struct hists *hists = evsel__hists(evsel); 331 + int ret = -1; 331 332 332 333 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { 333 334 pr_warning("problem processing %d event, skipping it.\n", ··· 339 338 if (hists__add_entry(hists, &al, sample->period, 340 339 sample->weight, sample->transaction)) { 341 340 pr_warning("problem incrementing symbol period, skipping event\n"); 342 - return -1; 341 + goto out_put; 343 342 } 344 343 345 344 /* ··· 351 350 hists->stats.total_period += sample->period; 352 351 if (!al.filtered) 353 352 hists->stats.total_non_filtered_period += sample->period; 354 - 355 - return 0; 353 + ret = 0; 354 + out_put: 355 + addr_location__put(&al); 356 + return ret; 356 357 } 357 358 358 359 static struct perf_tool tool = {

tools/perf/builtin-inject.c

··· 365 365 } 366 366 } 367 367 368 + thread__put(thread); 368 369 repipe: 369 370 perf_event__repipe(tool, event, sample, machine); 370 371 return 0;

+5 -2

tools/perf/builtin-kmem.c

··· 906 906 struct perf_evsel *evsel, 907 907 struct machine *machine) 908 908 { 909 + int err = 0; 909 910 struct thread *thread = machine__findnew_thread(machine, sample->pid, 910 911 sample->tid); 911 912 ··· 920 919 921 920 if (evsel->handler != NULL) { 922 921 tracepoint_handler f = evsel->handler; 923 - return f(evsel, sample); 922 + err = f(evsel, sample); 924 923 } 925 924 926 - return 0; 925 + thread__put(thread); 926 + 927 + return err; 927 928 } 928 929 929 930 static struct perf_tool perf_kmem = {

+4 -2

tools/perf/builtin-kvm.c

··· 651 651 struct perf_evsel *evsel, 652 652 struct machine *machine) 653 653 { 654 + int err = 0; 654 655 struct thread *thread; 655 656 struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, 656 657 tool); ··· 667 666 } 668 667 669 668 if (!handle_kvm_event(kvm, thread, evsel, sample)) 670 - return -1; 669 + err = -1; 671 670 672 - return 0; 671 + thread__put(thread); 672 + return err; 673 673 } 674 674 675 675 static int cpu_isa_config(struct perf_kvm_stat *kvm)

+6 -2

tools/perf/builtin-lock.c

··· 769 769 t = perf_session__findnew(session, st->tid); 770 770 pr_info("%10d: %s\n", st->tid, thread__comm_str(t)); 771 771 node = rb_next(node); 772 + thread__put(t); 772 773 }; 773 774 } 774 775 ··· 811 810 struct perf_evsel *evsel, 812 811 struct machine *machine) 813 812 { 813 + int err = 0; 814 814 struct thread *thread = machine__findnew_thread(machine, sample->pid, 815 815 sample->tid); 816 816 ··· 823 821 824 822 if (evsel->handler != NULL) { 825 823 tracepoint_handler f = evsel->handler; 826 - return f(evsel, sample); 824 + err = f(evsel, sample); 827 825 } 828 826 829 - return 0; 827 + thread__put(thread); 828 + 829 + return err; 830 830 } 831 831 832 832 static void sort_result(void)

+3 -2

tools/perf/builtin-mem.c

··· 74 74 } 75 75 76 76 if (al.filtered || (mem->hide_unresolved && al.sym == NULL)) 77 - return 0; 77 + goto out_put; 78 78 79 79 if (al.map != NULL) 80 80 al.map->dso->hit = 1; ··· 103 103 symbol_conf.field_sep, 104 104 al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", 105 105 al.sym ? al.sym->name : "???"); 106 - 106 + out_put: 107 + addr_location__put(&al); 107 108 return 0; 108 109 } 109 110

+5 -4

tools/perf/builtin-report.c

··· 142 142 .hide_unresolved = rep->hide_unresolved, 143 143 .add_entry_cb = hist_iter__report_callback, 144 144 }; 145 - int ret; 145 + int ret = 0; 146 146 147 147 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { 148 148 pr_debug("problem processing %d event, skipping it.\n", ··· 151 151 } 152 152 153 153 if (rep->hide_unresolved && al.sym == NULL) 154 - return 0; 154 + goto out_put; 155 155 156 156 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) 157 - return 0; 157 + goto out_put; 158 158 159 159 if (sort__mode == SORT_MODE__BRANCH) 160 160 iter.ops = &hist_iter_branch; ··· 172 172 rep); 173 173 if (ret < 0) 174 174 pr_debug("problem adding hist entry, skipping event\n"); 175 - 175 + out_put: 176 + addr_location__put(&al); 176 177 return ret; 177 178 } 178 179

+56 -26

tools/perf/builtin-sched.c

··· 770 770 if (child == NULL || parent == NULL) { 771 771 pr_debug("thread does not exist on fork event: child %p, parent %p\n", 772 772 child, parent); 773 - return 0; 773 + goto out_put; 774 774 } 775 775 776 776 if (verbose) { ··· 781 781 782 782 register_pid(sched, parent->tid, thread__comm_str(parent)); 783 783 register_pid(sched, child->tid, thread__comm_str(child)); 784 + out_put: 785 + thread__put(child); 786 + thread__put(parent); 784 787 return 0; 785 788 } 786 789 ··· 960 957 struct work_atoms *out_events, *in_events; 961 958 struct thread *sched_out, *sched_in; 962 959 u64 timestamp0, timestamp = sample->time; 963 - int cpu = sample->cpu; 960 + int cpu = sample->cpu, err = -1; 964 961 s64 delta; 965 962 966 963 BUG_ON(cpu >= MAX_CPUS || cpu < 0); ··· 979 976 980 977 sched_out = machine__findnew_thread(machine, -1, prev_pid); 981 978 sched_in = machine__findnew_thread(machine, -1, next_pid); 979 + if (sched_out == NULL || sched_in == NULL) 980 + goto out_put; 982 981 983 982 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 984 983 if (!out_events) { 985 984 if (thread_atoms_insert(sched, sched_out)) 986 - return -1; 985 + goto out_put; 987 986 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 988 987 if (!out_events) { 989 988 pr_err("out-event: Internal tree error"); 990 - return -1; 989 + goto out_put; 991 990 } 992 991 } 993 992 if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp)) ··· 998 993 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 999 994 if (!in_events) { 1000 995 if (thread_atoms_insert(sched, sched_in)) 1001 - return -1; 996 + goto out_put; 1002 997 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1003 998 if (!in_events) { 1004 999 pr_err("in-event: Internal tree error"); 1005 - return -1; 1000 + goto out_put; 1006 1001 } 1007 1002 /* 1008 1003 * Take came in we have not heard about yet, 1009 1004 * add in an initial atom in runnable state: 1010 1005 */ 1011 1006 if (add_sched_out_event(in_events, 'R', timestamp)) 1012 - return -1; 1007 + goto out_put; 1013 1008 } 1014 1009 add_sched_in_event(in_events, timestamp); 1015 - 1016 - return 0; 1010 + err = 0; 1011 + out_put: 1012 + thread__put(sched_out); 1013 + thread__put(sched_in); 1014 + return err; 1017 1015 } 1018 1016 1019 1017 static int latency_runtime_event(struct perf_sched *sched, ··· 1029 1021 struct thread *thread = machine__findnew_thread(machine, -1, pid); 1030 1022 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1031 1023 u64 timestamp = sample->time; 1032 - int cpu = sample->cpu; 1024 + int cpu = sample->cpu, err = -1; 1025 + 1026 + if (thread == NULL) 1027 + return -1; 1033 1028 1034 1029 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1035 1030 if (!atoms) { 1036 1031 if (thread_atoms_insert(sched, thread)) 1037 - return -1; 1032 + goto out_put; 1038 1033 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1039 1034 if (!atoms) { 1040 1035 pr_err("in-event: Internal tree error"); 1041 - return -1; 1036 + goto out_put; 1042 1037 } 1043 1038 if (add_sched_out_event(atoms, 'R', timestamp)) 1044 - return -1; 1039 + goto out_put; 1045 1040 } 1046 1041 1047 1042 add_runtime_event(atoms, runtime, timestamp); 1048 - return 0; 1043 + err = 0; 1044 + out_put: 1045 + thread__put(thread); 1046 + return err; 1049 1047 } 1050 1048 1051 1049 static int latency_wakeup_event(struct perf_sched *sched, ··· 1064 1050 struct work_atom *atom; 1065 1051 struct thread *wakee; 1066 1052 u64 timestamp = sample->time; 1053 + int err = -1; 1067 1054 1068 1055 wakee = machine__findnew_thread(machine, -1, pid); 1056 + if (wakee == NULL) 1057 + return -1; 1069 1058 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1070 1059 if (!atoms) { 1071 1060 if (thread_atoms_insert(sched, wakee)) 1072 - return -1; 1061 + goto out_put; 1073 1062 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1074 1063 if (!atoms) { 1075 1064 pr_err("wakeup-event: Internal tree error"); 1076 - return -1; 1065 + goto out_put; 1077 1066 } 1078 1067 if (add_sched_out_event(atoms, 'S', timestamp)) 1079 - return -1; 1068 + goto out_put; 1080 1069 } 1081 1070 1082 1071 BUG_ON(list_empty(&atoms->work_list)); ··· 1098 1081 * skip in this case. 1099 1082 */ 1100 1083 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1101 - return 0; 1084 + goto out_ok; 1102 1085 1103 1086 sched->nr_timestamps++; 1104 1087 if (atom->sched_out_time > timestamp) { 1105 1088 sched->nr_unordered_timestamps++; 1106 - return 0; 1089 + goto out_ok; 1107 1090 } 1108 1091 1109 1092 atom->state = THREAD_WAIT_CPU; 1110 1093 atom->wake_up_time = timestamp; 1111 - return 0; 1094 + out_ok: 1095 + err = 0; 1096 + out_put: 1097 + thread__put(wakee); 1098 + return err; 1112 1099 } 1113 1100 1114 1101 static int latency_migrate_task_event(struct perf_sched *sched, ··· 1125 1104 struct work_atoms *atoms; 1126 1105 struct work_atom *atom; 1127 1106 struct thread *migrant; 1107 + int err = -1; 1128 1108 1129 1109 /* 1130 1110 * Only need to worry about migration when profiling one CPU. ··· 1134 1112 return 0; 1135 1113 1136 1114 migrant = machine__findnew_thread(machine, -1, pid); 1115 + if (migrant == NULL) 1116 + return -1; 1137 1117 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1138 1118 if (!atoms) { 1139 1119 if (thread_atoms_insert(sched, migrant)) 1140 - return -1; 1120 + goto out_put; 1141 1121 register_pid(sched, migrant->tid, thread__comm_str(migrant)); 1142 1122 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1143 1123 if (!atoms) { 1144 1124 pr_err("migration-event: Internal tree error"); 1145 - return -1; 1125 + goto out_put; 1146 1126 } 1147 1127 if (add_sched_out_event(atoms, 'R', timestamp)) 1148 - return -1; 1128 + goto out_put; 1149 1129 } 1150 1130 1151 1131 BUG_ON(list_empty(&atoms->work_list)); ··· 1159 1135 1160 1136 if (atom->sched_out_time > timestamp) 1161 1137 sched->nr_unordered_timestamps++; 1162 - 1163 - return 0; 1138 + err = 0; 1139 + out_put: 1140 + thread__put(migrant); 1141 + return err; 1164 1142 } 1165 1143 1166 1144 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) ··· 1356 1330 } 1357 1331 1358 1332 sched_in = machine__findnew_thread(machine, -1, next_pid); 1333 + if (sched_in == NULL) 1334 + return -1; 1359 1335 1360 - sched->curr_thread[this_cpu] = sched_in; 1336 + sched->curr_thread[this_cpu] = thread__get(sched_in); 1361 1337 1362 1338 printf(" "); 1363 1339 ··· 1408 1380 } else { 1409 1381 printf("\n"); 1410 1382 } 1383 + 1384 + thread__put(sched_in); 1411 1385 1412 1386 return 0; 1413 1387 }

+12 -8

tools/perf/builtin-script.c

··· 607 607 } 608 608 609 609 if (al.filtered) 610 - return 0; 610 + goto out_put; 611 611 612 612 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 613 - return 0; 613 + goto out_put; 614 614 615 615 scripting_ops->process_event(event, sample, evsel, &al); 616 - 616 + out_put: 617 + addr_location__put(&al); 617 618 return 0; 618 619 } 619 620 ··· 682 681 print_sample_start(sample, thread, evsel); 683 682 perf_event__fprintf(event, stdout); 684 683 ret = 0; 685 - 686 684 out: 685 + thread__put(thread); 687 686 return ret; 688 687 } 689 688 ··· 714 713 } 715 714 print_sample_start(sample, thread, evsel); 716 715 perf_event__fprintf(event, stdout); 716 + thread__put(thread); 717 717 718 718 return 0; 719 719 } ··· 723 721 struct perf_sample *sample, 724 722 struct machine *machine) 725 723 { 724 + int err = 0; 726 725 struct thread *thread; 727 726 struct perf_script *script = container_of(tool, struct perf_script, tool); 728 727 struct perf_session *session = script->session; ··· 745 742 perf_event__fprintf(event, stdout); 746 743 747 744 if (perf_event__process_exit(tool, event, sample, machine) < 0) 748 - return -1; 745 + err = -1; 749 746 750 - return 0; 747 + thread__put(thread); 748 + return err; 751 749 } 752 750 753 751 static int process_mmap_event(struct perf_tool *tool, ··· 778 774 } 779 775 print_sample_start(sample, thread, evsel); 780 776 perf_event__fprintf(event, stdout); 781 - 777 + thread__put(thread); 782 778 return 0; 783 779 } 784 780 ··· 809 805 } 810 806 print_sample_start(sample, thread, evsel); 811 807 perf_event__fprintf(event, stdout); 812 - 808 + thread__put(thread); 813 809 return 0; 814 810 } 815 811

+3 -2

tools/perf/builtin-timechart.c

··· 523 523 * Discard all. 524 524 */ 525 525 zfree(&p); 526 - goto exit; 526 + goto exit_put; 527 527 } 528 528 continue; 529 529 } ··· 538 538 else 539 539 fprintf(f, "..... %016" PRIx64 "\n", ip); 540 540 } 541 - 541 + exit_put: 542 + addr_location__put(&al); 542 543 exit: 543 544 fclose(f); 544 545

+1 -1

tools/perf/builtin-top.c

··· 793 793 pthread_mutex_unlock(&hists->lock); 794 794 } 795 795 796 - return; 796 + addr_location__put(&al); 797 797 } 798 798 799 799 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)

+23 -13

tools/perf/builtin-trace.c

··· 1712 1712 void *args; 1713 1713 size_t printed = 0; 1714 1714 struct thread *thread; 1715 - int id = perf_evsel__sc_tp_uint(evsel, id, sample); 1715 + int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1716 1716 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1717 1717 struct thread_trace *ttrace; 1718 1718 ··· 1725 1725 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1726 1726 ttrace = thread__trace(thread, trace->output); 1727 1727 if (ttrace == NULL) 1728 - return -1; 1728 + goto out_put; 1729 1729 1730 1730 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1731 1731 1732 1732 if (ttrace->entry_str == NULL) { 1733 1733 ttrace->entry_str = malloc(1024); 1734 1734 if (!ttrace->entry_str) 1735 - return -1; 1735 + goto out_put; 1736 1736 } 1737 1737 1738 1738 if (!trace->summary_only) ··· 1757 1757 thread__put(trace->current); 1758 1758 trace->current = thread__get(thread); 1759 1759 } 1760 - 1761 - return 0; 1760 + err = 0; 1761 + out_put: 1762 + thread__put(thread); 1763 + return err; 1762 1764 } 1763 1765 1764 1766 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ··· 1770 1768 long ret; 1771 1769 u64 duration = 0; 1772 1770 struct thread *thread; 1773 - int id = perf_evsel__sc_tp_uint(evsel, id, sample); 1771 + int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1774 1772 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1775 1773 struct thread_trace *ttrace; 1776 1774 ··· 1783 1781 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1784 1782 ttrace = thread__trace(thread, trace->output); 1785 1783 if (ttrace == NULL) 1786 - return -1; 1784 + goto out_put; 1787 1785 1788 1786 if (trace->summary) 1789 1787 thread__update_stats(ttrace, id, sample); ··· 1837 1835 fputc('\n', trace->output); 1838 1836 out: 1839 1837 ttrace->entry_pending = false; 1840 - 1841 - return 0; 1838 + err = 0; 1839 + out_put: 1840 + thread__put(thread); 1841 + return err; 1842 1842 } 1843 1843 1844 1844 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, ··· 1867 1863 1868 1864 ttrace->runtime_ms += runtime_ms; 1869 1865 trace->runtime_ms += runtime_ms; 1866 + thread__put(thread); 1870 1867 return 0; 1871 1868 1872 1869 out_dump: ··· 1877 1872 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1878 1873 runtime, 1879 1874 perf_evsel__intval(evsel, sample, "vruntime")); 1875 + thread__put(thread); 1880 1876 return 0; 1881 1877 } 1882 1878 ··· 1930 1924 struct addr_location al; 1931 1925 char map_type = 'd'; 1932 1926 struct thread_trace *ttrace; 1927 + int err = -1; 1933 1928 1934 1929 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1935 1930 ttrace = thread__trace(thread, trace->output); 1936 1931 if (ttrace == NULL) 1937 - return -1; 1932 + goto out_put; 1938 1933 1939 1934 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 1940 1935 ttrace->pfmaj++; ··· 1943 1936 ttrace->pfmin++; 1944 1937 1945 1938 if (trace->summary_only) 1946 - return 0; 1939 + goto out; 1947 1940 1948 1941 thread__find_addr_location(thread, cpumode, MAP__FUNCTION, 1949 1942 sample->ip, &al); ··· 1974 1967 print_location(trace->output, sample, &al, true, false); 1975 1968 1976 1969 fprintf(trace->output, " (%c%c)\n", map_type, al.level); 1977 - 1978 - return 0; 1970 + out: 1971 + err = 0; 1972 + out_put: 1973 + thread__put(thread); 1974 + return err; 1979 1975 } 1980 1976 1981 1977 static bool skip_sample(struct trace *trace, struct perf_sample *sample)

+14 -8

tools/perf/tests/code-reading.c

··· 248 248 struct perf_sample sample; 249 249 struct thread *thread; 250 250 u8 cpumode; 251 + int ret; 251 252 252 253 if (perf_evlist__parse_sample(evlist, event, &sample)) { 253 254 pr_debug("perf_evlist__parse_sample failed\n"); ··· 263 262 264 263 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 265 264 266 - return read_object_code(sample.ip, READLEN, cpumode, thread, state); 265 + ret = read_object_code(sample.ip, READLEN, cpumode, thread, state); 266 + thread__put(thread); 267 + return ret; 267 268 } 268 269 269 270 static int process_event(struct machine *machine, struct perf_evlist *evlist, ··· 460 457 thread = machine__findnew_thread(machine, pid, pid); 461 458 if (!thread) { 462 459 pr_debug("machine__findnew_thread failed\n"); 463 - goto out_err; 460 + goto out_put; 464 461 } 465 462 466 463 cpus = cpu_map__new(NULL); 467 464 if (!cpus) { 468 465 pr_debug("cpu_map__new failed\n"); 469 - goto out_err; 466 + goto out_put; 470 467 } 471 468 472 469 while (1) { ··· 475 472 evlist = perf_evlist__new(); 476 473 if (!evlist) { 477 474 pr_debug("perf_evlist__new failed\n"); 478 - goto out_err; 475 + goto out_put; 479 476 } 480 477 481 478 perf_evlist__set_maps(evlist, cpus, threads); ··· 488 485 ret = parse_events(evlist, str, NULL); 489 486 if (ret < 0) { 490 487 pr_debug("parse_events failed\n"); 491 - goto out_err; 488 + goto out_put; 492 489 } 493 490 494 491 perf_evlist__config(evlist, &opts); ··· 509 506 continue; 510 507 } 511 508 pr_debug("perf_evlist__open failed\n"); 512 - goto out_err; 509 + goto out_put; 513 510 } 514 511 break; 515 512 } ··· 517 514 ret = perf_evlist__mmap(evlist, UINT_MAX, false); 518 515 if (ret < 0) { 519 516 pr_debug("perf_evlist__mmap failed\n"); 520 - goto out_err; 517 + goto out_put; 521 518 } 522 519 523 520 perf_evlist__enable(evlist); ··· 528 525 529 526 ret = process_events(machine, evlist, &state); 530 527 if (ret < 0) 531 - goto out_err; 528 + goto out_put; 532 529 533 530 if (!have_vmlinux && !have_kcore && !try_kcore) 534 531 err = TEST_CODE_READING_NO_KERNEL_OBJ; ··· 538 535 err = TEST_CODE_READING_NO_ACCESS; 539 536 else 540 537 err = TEST_CODE_READING_OK; 538 + out_put: 539 + thread__put(thread); 541 540 out_err: 541 + 542 542 if (evlist) { 543 543 perf_evlist__delete(evlist); 544 544 } else {

tools/perf/tests/dwarf-unwind.c

··· 170 170 } 171 171 172 172 err = krava_1(thread); 173 + thread__put(thread); 173 174 174 175 out: 175 176 machine__delete_threads(machine);

tools/perf/tests/hists_common.c

··· 96 96 goto out; 97 97 98 98 thread__set_comm(thread, fake_threads[i].comm, 0); 99 + thread__put(thread); 99 100 } 100 101 101 102 for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {

+3 -1

tools/perf/tests/hists_cumulate.c

··· 105 105 goto out; 106 106 107 107 if (hist_entry_iter__add(&iter, &al, evsel, &sample, 108 - PERF_MAX_STACK_DEPTH, NULL) < 0) 108 + PERF_MAX_STACK_DEPTH, NULL) < 0) { 109 + addr_location__put(&al); 109 110 goto out; 111 + } 110 112 111 113 fake_samples[i].thread = al.thread; 112 114 fake_samples[i].map = al.map;

+3 -1

tools/perf/tests/hists_filter.c

··· 82 82 goto out; 83 83 84 84 if (hist_entry_iter__add(&iter, &al, evsel, &sample, 85 - PERF_MAX_STACK_DEPTH, NULL) < 0) 85 + PERF_MAX_STACK_DEPTH, NULL) < 0) { 86 + addr_location__put(&al); 86 87 goto out; 88 + } 87 89 88 90 fake_samples[i].thread = al.thread; 89 91 fake_samples[i].map = al.map;

+6 -2

tools/perf/tests/hists_link.c

··· 91 91 92 92 he = __hists__add_entry(hists, &al, NULL, 93 93 NULL, NULL, 1, 1, 0, true); 94 - if (he == NULL) 94 + if (he == NULL) { 95 + addr_location__put(&al); 95 96 goto out; 97 + } 96 98 97 99 fake_common_samples[k].thread = al.thread; 98 100 fake_common_samples[k].map = al.map; ··· 117 115 118 116 he = __hists__add_entry(hists, &al, NULL, 119 117 NULL, NULL, 1, 1, 0, true); 120 - if (he == NULL) 118 + if (he == NULL) { 119 + addr_location__put(&al); 121 120 goto out; 121 + } 122 122 123 123 fake_samples[i][k].thread = al.thread; 124 124 fake_samples[i][k].map = al.map;

+3 -1

tools/perf/tests/hists_output.c

··· 71 71 goto out; 72 72 73 73 if (hist_entry_iter__add(&iter, &al, evsel, &sample, 74 - PERF_MAX_STACK_DEPTH, NULL) < 0) 74 + PERF_MAX_STACK_DEPTH, NULL) < 0) { 75 + addr_location__put(&al); 75 76 goto out; 77 + } 76 78 77 79 fake_samples[i].thread = al.thread; 78 80 fake_samples[i].map = al.map;

tools/perf/tests/mmap-thread-lookup.c

··· 191 191 PERF_RECORD_MISC_USER, MAP__FUNCTION, 192 192 (unsigned long) (td->map + 1), &al); 193 193 194 + thread__put(thread); 195 + 194 196 if (!al.map) { 195 197 pr_debug("failed, couldn't find map\n"); 196 198 err = -1;

+6 -6

tools/perf/tests/thread-mg-share.c

···

+4 -1

tools/perf/util/build-id.c

··· 43 43 if (al.map != NULL) 44 44 al.map->dso->hit = 1; 45 45 46 + thread__put(thread); 46 47 return 0; 47 48 } 48 49 ··· 60 59 dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, 61 60 event->fork.ppid, event->fork.ptid); 62 61 63 - if (thread) 62 + if (thread) { 64 63 machine__remove_thread(machine, thread); 64 + thread__put(thread); 65 + } 65 66 66 67 return 0; 67 68 }

+10 -4

tools/perf/util/db-export.c

··· 122 122 int db_export__thread(struct db_export *dbe, struct thread *thread, 123 123 struct machine *machine, struct comm *comm) 124 124 { 125 + struct thread *main_thread; 125 126 u64 main_thread_db_id = 0; 126 127 int err; 127 128 ··· 132 131 thread->db_id = ++dbe->thread_last_db_id; 133 132 134 133 if (thread->pid_ != -1) { 135 - struct thread *main_thread; 136 - 137 134 if (thread->pid_ == thread->tid) { 138 135 main_thread = thread; 139 136 } else { ··· 143 144 err = db_export__thread(dbe, main_thread, machine, 144 145 comm); 145 146 if (err) 146 - return err; 147 + goto out_put; 147 148 if (comm) { 148 149 err = db_export__comm_thread(dbe, comm, thread); 149 150 if (err) 150 - return err; 151 + goto out_put; 151 152 } 152 153 } 153 154 main_thread_db_id = main_thread->db_id; 155 + if (main_thread != thread) 156 + thread__put(main_thread); 154 157 } 155 158 156 159 if (dbe->export_thread) ··· 160 159 machine); 161 160 162 161 return 0; 162 + 163 + out_put: 164 + thread__put(main_thread); 165 + return err; 163 166 } 164 167 165 168 int db_export__comm(struct db_export *dbe, struct comm *comm, ··· 308 303 if (err) 309 304 return err; 310 305 306 + /* FIXME: check refcounting for get_main_thread, that calls machine__find_thread... */ 311 307 main_thread = get_main_thread(al->machine, thread); 312 308 if (main_thread) 313 309 comm = machine__thread_exec_comm(al->machine, main_thread);

+15

tools/perf/util/event.c

··· 919 919 al->sym = NULL; 920 920 } 921 921 922 + /* 923 + * Callers need to drop the reference to al->thread, obtained in 924 + * machine__findnew_thread() 925 + */ 922 926 int perf_event__preprocess_sample(const union perf_event *event, 923 927 struct machine *machine, 924 928 struct addr_location *al, ··· 981 977 } 982 978 983 979 return 0; 980 + } 981 + 982 + /* 983 + * The preprocess_sample method will return with reference counts for the 984 + * in it, when done using (and perhaps getting ref counts if needing to 985 + * keep a pointer to one of those entries) it must be paired with 986 + * addr_location__put(), so that the refcounts can be decremented. 987 + */ 988 + void addr_location__put(struct addr_location *al) 989 + { 990 + thread__zput(al->thread); 984 991 } 985 992 986 993 bool is_bts_event(struct perf_event_attr *attr)

tools/perf/util/event.h

··· 426 426 struct addr_location *al, 427 427 struct perf_sample *sample); 428 428 429 + void addr_location__put(struct addr_location *al); 430 + 429 431 struct thread; 430 432 431 433 bool is_bts_event(struct perf_event_attr *attr);

+72 -17

tools/perf/util/machine.c

··· 14 14 #include "unwind.h" 15 15 #include "linux/hash.h" 16 16 17 + static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock); 18 + 17 19 static void dsos__init(struct dsos *dsos) 18 20 { 19 21 INIT_LIST_HEAD(&dsos->head); ··· 30 28 dsos__init(&machine->kernel_dsos); 31 29 32 30 machine->threads = RB_ROOT; 31 + pthread_rwlock_init(&machine->threads_lock, NULL); 33 32 INIT_LIST_HEAD(&machine->dead_threads); 34 33 machine->last_match = NULL; 35 34 ··· 57 54 58 55 snprintf(comm, sizeof(comm), "[guest/%d]", pid); 59 56 thread__set_comm(thread, comm, 0); 57 + thread__put(thread); 60 58 } 61 59 62 60 machine->current_tid = NULL; ··· 95 91 96 92 void machine__delete_threads(struct machine *machine) 97 93 { 98 - struct rb_node *nd = rb_first(&machine->threads); 94 + struct rb_node *nd; 99 95 96 + pthread_rwlock_wrlock(&machine->threads_lock); 97 + nd = rb_first(&machine->threads); 100 98 while (nd) { 101 99 struct thread *t = rb_entry(nd, struct thread, rb_node); 102 100 103 101 nd = rb_next(nd); 104 - machine__remove_thread(machine, t); 102 + __machine__remove_thread(machine, t, false); 105 103 } 104 + pthread_rwlock_unlock(&machine->threads_lock); 106 105 } 107 106 108 107 void machine__exit(struct machine *machine) ··· 116 109 vdso__exit(machine); 117 110 zfree(&machine->root_dir); 118 111 zfree(&machine->current_tid); 112 + pthread_rwlock_destroy(&machine->threads_lock); 119 113 } 120 114 121 115 void machine__delete(struct machine *machine) ··· 311 303 if (th->pid_ == th->tid) 312 304 return; 313 305 314 - leader = machine__findnew_thread(machine, th->pid_, th->pid_); 306 + leader = __machine__findnew_thread(machine, th->pid_, th->pid_); 315 307 if (!leader) 316 308 goto out_err; 317 309 ··· 344 336 pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid); 345 337 } 346 338 347 - static struct thread *__machine__findnew_thread(struct machine *machine, 348 - pid_t pid, pid_t tid, 349 - bool create) 339 + static struct thread *____machine__findnew_thread(struct machine *machine, 340 + pid_t pid, pid_t tid, 341 + bool create) 350 342 { 351 343 struct rb_node **p = &machine->threads.rb_node; 352 344 struct rb_node *parent = NULL; ··· 401 393 */ 402 394 if (thread__init_map_groups(th, machine)) { 403 395 rb_erase(&th->rb_node, &machine->threads); 396 + RB_CLEAR_NODE(&th->rb_node); 404 397 thread__delete(th); 405 398 return NULL; 406 399 } ··· 415 406 return th; 416 407 } 417 408 409 + struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) 410 + { 411 + return ____machine__findnew_thread(machine, pid, tid, true); 412 + } 413 + 418 414 struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, 419 415 pid_t tid) 420 416 { 421 - return __machine__findnew_thread(machine, pid, tid, true); 417 + struct thread *th; 418 + 419 + pthread_rwlock_wrlock(&machine->threads_lock); 420 + th = thread__get(__machine__findnew_thread(machine, pid, tid)); 421 + pthread_rwlock_unlock(&machine->threads_lock); 422 + return th; 422 423 } 423 424 424 425 struct thread *machine__find_thread(struct machine *machine, pid_t pid, 425 426 pid_t tid) 426 427 { 427 - return __machine__findnew_thread(machine, pid, tid, false); 428 + struct thread *th; 429 + pthread_rwlock_rdlock(&machine->threads_lock); 430 + th = thread__get(____machine__findnew_thread(machine, pid, tid, false)); 431 + pthread_rwlock_unlock(&machine->threads_lock); 432 + return th; 428 433 } 429 434 430 435 struct comm *machine__thread_exec_comm(struct machine *machine, ··· 457 434 event->comm.pid, 458 435 event->comm.tid); 459 436 bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC; 437 + int err = 0; 460 438 461 439 if (exec) 462 440 machine->comm_exec = true; ··· 468 444 if (thread == NULL || 469 445 __thread__set_comm(thread, event->comm.comm, sample->time, exec)) { 470 446 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); 471 - return -1; 447 + err = -1; 472 448 } 473 449 474 - return 0; 450 + thread__put(thread); 451 + 452 + return err; 475 453 } 476 454 477 455 int machine__process_lost_event(struct machine *machine __maybe_unused, ··· 617 591 size_t ret = 0; 618 592 struct rb_node *nd; 619 593 594 + pthread_rwlock_rdlock(&machine->threads_lock); 595 + 620 596 for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { 621 597 struct thread *pos = rb_entry(nd, struct thread, rb_node); 622 598 623 599 ret += thread__fprintf(pos, fp); 624 600 } 601 + 602 + pthread_rwlock_unlock(&machine->threads_lock); 625 603 626 604 return ret; 627 605 } ··· 1243 1213 event->mmap2.filename, type, thread); 1244 1214 1245 1215 if (map == NULL) 1246 - goto out_problem; 1216 + goto out_problem_map; 1247 1217 1248 1218 thread__insert_map(thread, map); 1219 + thread__put(thread); 1249 1220 return 0; 1250 1221 1222 + out_problem_map: 1223 + thread__put(thread); 1251 1224 out_problem: 1252 1225 dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n"); 1253 1226 return 0; ··· 1293 1260 type, thread); 1294 1261 1295 1262 if (map == NULL) 1296 - goto out_problem; 1263 + goto out_problem_map; 1297 1264 1298 1265 thread__insert_map(thread, map); 1266 + thread__put(thread); 1299 1267 return 0; 1300 1268 1269 + out_problem_map: 1270 + thread__put(thread); 1301 1271 out_problem: 1302 1272 dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); 1303 1273 return 0; 1304 1274 } 1305 1275 1306 - void machine__remove_thread(struct machine *machine, struct thread *th) 1276 + static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock) 1307 1277 { 1308 1278 if (machine->last_match == th) 1309 1279 thread__zput(machine->last_match); 1310 1280 1281 + BUG_ON(th->refcnt.counter == 0); 1282 + if (lock) 1283 + pthread_rwlock_wrlock(&machine->threads_lock); 1311 1284 rb_erase(&th->rb_node, &machine->threads); 1285 + RB_CLEAR_NODE(&th->rb_node); 1312 1286 /* 1313 1287 * Move it first to the dead_threads list, then drop the reference, 1314 1288 * if this is the last reference, then the thread__delete destructor 1315 1289 * will be called and we will remove it from the dead_threads list. 1316 1290 */ 1317 1291 list_add_tail(&th->node, &machine->dead_threads); 1292 + if (lock) 1293 + pthread_rwlock_unlock(&machine->threads_lock); 1318 1294 thread__put(th); 1295 + } 1296 + 1297 + void machine__remove_thread(struct machine *machine, struct thread *th) 1298 + { 1299 + return __machine__remove_thread(machine, th, true); 1319 1300 } 1320 1301 1321 1302 int machine__process_fork_event(struct machine *machine, union perf_event *event, ··· 1341 1294 struct thread *parent = machine__findnew_thread(machine, 1342 1295 event->fork.ppid, 1343 1296 event->fork.ptid); 1297 + int err = 0; 1344 1298 1345 1299 /* if a thread currently exists for the thread id remove it */ 1346 - if (thread != NULL) 1300 + if (thread != NULL) { 1347 1301 machine__remove_thread(machine, thread); 1302 + thread__put(thread); 1303 + } 1348 1304 1349 1305 thread = machine__findnew_thread(machine, event->fork.pid, 1350 1306 event->fork.tid); ··· 1357 1307 if (thread == NULL || parent == NULL || 1358 1308 thread__fork(thread, parent, sample->time) < 0) { 1359 1309 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); 1360 - return -1; 1310 + err = -1; 1361 1311 } 1312 + thread__put(thread); 1313 + thread__put(parent); 1362 1314 1363 - return 0; 1315 + return err; 1364 1316 } 1365 1317 1366 1318 int machine__process_exit_event(struct machine *machine, union perf_event *event, ··· 1375 1323 if (dump_trace) 1376 1324 perf_event__fprintf_task(event, stdout); 1377 1325 1378 - if (thread != NULL) 1326 + if (thread != NULL) { 1379 1327 thread__exited(thread); 1328 + thread__put(thread); 1329 + } 1380 1330 1381 1331 return 0; 1382 1332 } ··· 1895 1841 return -ENOMEM; 1896 1842 1897 1843 thread->cpu = cpu; 1844 + thread__put(thread); 1898 1845 1899 1846 return 0; 1900 1847 }

+3 -2

tools/perf/util/machine.h

··· 30 30 bool comm_exec; 31 31 char *root_dir; 32 32 struct rb_root threads; 33 + pthread_rwlock_t threads_lock; 33 34 struct list_head dead_threads; 34 35 struct thread *last_match; 35 36 struct vdso_info *vdso_info; ··· 152 151 return machine ? machine->pid == HOST_KERNEL_ID : false; 153 152 } 154 153 155 - struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, 156 - pid_t tid); 154 + struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); 155 + struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); 157 156 158 157 size_t machine__fprintf(struct machine *machine, FILE *fp); 159 158

+8 -2

tools/perf/util/thread.c

··· 18 18 if (pid == thread->tid || pid == -1) { 19 19 thread->mg = map_groups__new(machine); 20 20 } else { 21 - leader = machine__findnew_thread(machine, pid, pid); 21 + leader = __machine__findnew_thread(machine, pid, pid); 22 22 if (leader) 23 23 thread->mg = map_groups__get(leader->mg); 24 24 } ··· 54 54 55 55 list_add(&comm->list, &thread->comm_list); 56 56 atomic_set(&thread->refcnt, 0); 57 + INIT_LIST_HEAD(&thread->node); 58 + RB_CLEAR_NODE(&thread->rb_node); 57 59 } 58 60 59 61 return thread; ··· 68 66 void thread__delete(struct thread *thread) 69 67 { 70 68 struct comm *comm, *tmp; 69 + 70 + BUG_ON(!RB_EMPTY_NODE(&thread->rb_node)); 71 + BUG_ON(!list_empty(&thread->node)); 71 72 72 73 thread_stack__free(thread); 73 74 ··· 89 84 90 85 struct thread *thread__get(struct thread *thread) 91 86 { 92 - atomic_inc(&thread->refcnt); 87 + if (thread) 88 + atomic_inc(&thread->refcnt); 93 89 return thread; 94 90 } 95 91