Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2#include "builtin.h"
3#include "perf.h"
4
5#include "util/dso.h"
6#include "util/evlist.h"
7#include "util/evsel.h"
8#include "util/config.h"
9#include "util/map.h"
10#include "util/symbol.h"
11#include "util/thread.h"
12#include "util/header.h"
13#include "util/session.h"
14#include "util/tool.h"
15#include "util/callchain.h"
16#include "util/time-utils.h"
17#include <linux/err.h>
18
19#include <subcmd/pager.h>
20#include <subcmd/parse-options.h>
21#include "util/trace-event.h"
22#include "util/data.h"
23#include "util/cpumap.h"
24
25#include "util/debug.h"
26#include "util/string2.h"
27
28#include <linux/kernel.h>
29#include <linux/rbtree.h>
30#include <linux/string.h>
31#include <linux/zalloc.h>
32#include <errno.h>
33#include <inttypes.h>
34#include <locale.h>
35#include <regex.h>
36
37#include <linux/ctype.h>
38#include <traceevent/event-parse.h>
39
40static int kmem_slab;
41static int kmem_page;
42
43static long kmem_page_size;
44static enum {
45 KMEM_SLAB,
46 KMEM_PAGE,
47} kmem_default = KMEM_SLAB; /* for backward compatibility */
48
49struct alloc_stat;
50typedef int (*sort_fn_t)(void *, void *);
51
52static int alloc_flag;
53static int caller_flag;
54
55static int alloc_lines = -1;
56static int caller_lines = -1;
57
58static bool raw_ip;
59
60struct alloc_stat {
61 u64 call_site;
62 u64 ptr;
63 u64 bytes_req;
64 u64 bytes_alloc;
65 u64 last_alloc;
66 u32 hit;
67 u32 pingpong;
68
69 short alloc_cpu;
70
71 struct rb_node node;
72};
73
74static struct rb_root root_alloc_stat;
75static struct rb_root root_alloc_sorted;
76static struct rb_root root_caller_stat;
77static struct rb_root root_caller_sorted;
78
79static unsigned long total_requested, total_allocated, total_freed;
80static unsigned long nr_allocs, nr_cross_allocs;
81
82/* filters for controlling start and stop of time of analysis */
83static struct perf_time_interval ptime;
84const char *time_str;
85
86static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
87 int bytes_req, int bytes_alloc, int cpu)
88{
89 struct rb_node **node = &root_alloc_stat.rb_node;
90 struct rb_node *parent = NULL;
91 struct alloc_stat *data = NULL;
92
93 while (*node) {
94 parent = *node;
95 data = rb_entry(*node, struct alloc_stat, node);
96
97 if (ptr > data->ptr)
98 node = &(*node)->rb_right;
99 else if (ptr < data->ptr)
100 node = &(*node)->rb_left;
101 else
102 break;
103 }
104
105 if (data && data->ptr == ptr) {
106 data->hit++;
107 data->bytes_req += bytes_req;
108 data->bytes_alloc += bytes_alloc;
109 } else {
110 data = malloc(sizeof(*data));
111 if (!data) {
112 pr_err("%s: malloc failed\n", __func__);
113 return -1;
114 }
115 data->ptr = ptr;
116 data->pingpong = 0;
117 data->hit = 1;
118 data->bytes_req = bytes_req;
119 data->bytes_alloc = bytes_alloc;
120
121 rb_link_node(&data->node, parent, node);
122 rb_insert_color(&data->node, &root_alloc_stat);
123 }
124 data->call_site = call_site;
125 data->alloc_cpu = cpu;
126 data->last_alloc = bytes_alloc;
127
128 return 0;
129}
130
131static int insert_caller_stat(unsigned long call_site,
132 int bytes_req, int bytes_alloc)
133{
134 struct rb_node **node = &root_caller_stat.rb_node;
135 struct rb_node *parent = NULL;
136 struct alloc_stat *data = NULL;
137
138 while (*node) {
139 parent = *node;
140 data = rb_entry(*node, struct alloc_stat, node);
141
142 if (call_site > data->call_site)
143 node = &(*node)->rb_right;
144 else if (call_site < data->call_site)
145 node = &(*node)->rb_left;
146 else
147 break;
148 }
149
150 if (data && data->call_site == call_site) {
151 data->hit++;
152 data->bytes_req += bytes_req;
153 data->bytes_alloc += bytes_alloc;
154 } else {
155 data = malloc(sizeof(*data));
156 if (!data) {
157 pr_err("%s: malloc failed\n", __func__);
158 return -1;
159 }
160 data->call_site = call_site;
161 data->pingpong = 0;
162 data->hit = 1;
163 data->bytes_req = bytes_req;
164 data->bytes_alloc = bytes_alloc;
165
166 rb_link_node(&data->node, parent, node);
167 rb_insert_color(&data->node, &root_caller_stat);
168 }
169
170 return 0;
171}
172
173static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *sample)
174{
175 unsigned long ptr = evsel__intval(evsel, sample, "ptr"),
176 call_site = evsel__intval(evsel, sample, "call_site");
177 int bytes_req = evsel__intval(evsel, sample, "bytes_req"),
178 bytes_alloc = evsel__intval(evsel, sample, "bytes_alloc");
179
180 if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
181 insert_caller_stat(call_site, bytes_req, bytes_alloc))
182 return -1;
183
184 total_requested += bytes_req;
185 total_allocated += bytes_alloc;
186
187 nr_allocs++;
188 return 0;
189}
190
191static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample)
192{
193 int ret = evsel__process_alloc_event(evsel, sample);
194
195 if (!ret) {
196 int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}),
197 node2 = evsel__intval(evsel, sample, "node");
198
199 if (node1 != node2)
200 nr_cross_allocs++;
201 }
202
203 return ret;
204}
205
206static int ptr_cmp(void *, void *);
207static int slab_callsite_cmp(void *, void *);
208
209static struct alloc_stat *search_alloc_stat(unsigned long ptr,
210 unsigned long call_site,
211 struct rb_root *root,
212 sort_fn_t sort_fn)
213{
214 struct rb_node *node = root->rb_node;
215 struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
216
217 while (node) {
218 struct alloc_stat *data;
219 int cmp;
220
221 data = rb_entry(node, struct alloc_stat, node);
222
223 cmp = sort_fn(&key, data);
224 if (cmp < 0)
225 node = node->rb_left;
226 else if (cmp > 0)
227 node = node->rb_right;
228 else
229 return data;
230 }
231 return NULL;
232}
233
234static int evsel__process_free_event(struct evsel *evsel, struct perf_sample *sample)
235{
236 unsigned long ptr = evsel__intval(evsel, sample, "ptr");
237 struct alloc_stat *s_alloc, *s_caller;
238
239 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
240 if (!s_alloc)
241 return 0;
242
243 total_freed += s_alloc->last_alloc;
244
245 if ((short)sample->cpu != s_alloc->alloc_cpu) {
246 s_alloc->pingpong++;
247
248 s_caller = search_alloc_stat(0, s_alloc->call_site,
249 &root_caller_stat,
250 slab_callsite_cmp);
251 if (!s_caller)
252 return -1;
253 s_caller->pingpong++;
254 }
255 s_alloc->alloc_cpu = -1;
256
257 return 0;
258}
259
260static u64 total_page_alloc_bytes;
261static u64 total_page_free_bytes;
262static u64 total_page_nomatch_bytes;
263static u64 total_page_fail_bytes;
264static unsigned long nr_page_allocs;
265static unsigned long nr_page_frees;
266static unsigned long nr_page_fails;
267static unsigned long nr_page_nomatch;
268
269static bool use_pfn;
270static bool live_page;
271static struct perf_session *kmem_session;
272
273#define MAX_MIGRATE_TYPES 6
274#define MAX_PAGE_ORDER 11
275
276static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
277
278struct page_stat {
279 struct rb_node node;
280 u64 page;
281 u64 callsite;
282 int order;
283 unsigned gfp_flags;
284 unsigned migrate_type;
285 u64 alloc_bytes;
286 u64 free_bytes;
287 int nr_alloc;
288 int nr_free;
289};
290
291static struct rb_root page_live_tree;
292static struct rb_root page_alloc_tree;
293static struct rb_root page_alloc_sorted;
294static struct rb_root page_caller_tree;
295static struct rb_root page_caller_sorted;
296
297struct alloc_func {
298 u64 start;
299 u64 end;
300 char *name;
301};
302
303static int nr_alloc_funcs;
304static struct alloc_func *alloc_func_list;
305
306static int funcmp(const void *a, const void *b)
307{
308 const struct alloc_func *fa = a;
309 const struct alloc_func *fb = b;
310
311 if (fa->start > fb->start)
312 return 1;
313 else
314 return -1;
315}
316
317static int callcmp(const void *a, const void *b)
318{
319 const struct alloc_func *fa = a;
320 const struct alloc_func *fb = b;
321
322 if (fb->start <= fa->start && fa->end < fb->end)
323 return 0;
324
325 if (fa->start > fb->start)
326 return 1;
327 else
328 return -1;
329}
330
331static int build_alloc_func_list(void)
332{
333 int ret;
334 struct map *kernel_map;
335 struct symbol *sym;
336 struct rb_node *node;
337 struct alloc_func *func;
338 struct machine *machine = &kmem_session->machines.host;
339 regex_t alloc_func_regex;
340 static const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
341
342 ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
343 if (ret) {
344 char err[BUFSIZ];
345
346 regerror(ret, &alloc_func_regex, err, sizeof(err));
347 pr_err("Invalid regex: %s\n%s", pattern, err);
348 return -EINVAL;
349 }
350
351 kernel_map = machine__kernel_map(machine);
352 if (map__load(kernel_map) < 0) {
353 pr_err("cannot load kernel map\n");
354 return -ENOENT;
355 }
356
357 map__for_each_symbol(kernel_map, sym, node) {
358 if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
359 continue;
360
361 func = realloc(alloc_func_list,
362 (nr_alloc_funcs + 1) * sizeof(*func));
363 if (func == NULL)
364 return -ENOMEM;
365
366 pr_debug("alloc func: %s\n", sym->name);
367 func[nr_alloc_funcs].start = sym->start;
368 func[nr_alloc_funcs].end = sym->end;
369 func[nr_alloc_funcs].name = sym->name;
370
371 alloc_func_list = func;
372 nr_alloc_funcs++;
373 }
374
375 qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
376
377 regfree(&alloc_func_regex);
378 return 0;
379}
380
381/*
382 * Find first non-memory allocation function from callchain.
383 * The allocation functions are in the 'alloc_func_list'.
384 */
385static u64 find_callsite(struct evsel *evsel, struct perf_sample *sample)
386{
387 struct addr_location al;
388 struct machine *machine = &kmem_session->machines.host;
389 struct callchain_cursor_node *node;
390
391 if (alloc_func_list == NULL) {
392 if (build_alloc_func_list() < 0)
393 goto out;
394 }
395
396 al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
397 sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
398
399 callchain_cursor_commit(&callchain_cursor);
400 while (true) {
401 struct alloc_func key, *caller;
402 u64 addr;
403
404 node = callchain_cursor_current(&callchain_cursor);
405 if (node == NULL)
406 break;
407
408 key.start = key.end = node->ip;
409 caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
410 sizeof(key), callcmp);
411 if (!caller) {
412 /* found */
413 if (node->ms.map)
414 addr = map__unmap_ip(node->ms.map, node->ip);
415 else
416 addr = node->ip;
417
418 return addr;
419 } else
420 pr_debug3("skipping alloc function: %s\n", caller->name);
421
422 callchain_cursor_advance(&callchain_cursor);
423 }
424
425out:
426 pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
427 return sample->ip;
428}
429
430struct sort_dimension {
431 const char name[20];
432 sort_fn_t cmp;
433 struct list_head list;
434};
435
436static LIST_HEAD(page_alloc_sort_input);
437static LIST_HEAD(page_caller_sort_input);
438
439static struct page_stat *
440__page_stat__findnew_page(struct page_stat *pstat, bool create)
441{
442 struct rb_node **node = &page_live_tree.rb_node;
443 struct rb_node *parent = NULL;
444 struct page_stat *data;
445
446 while (*node) {
447 s64 cmp;
448
449 parent = *node;
450 data = rb_entry(*node, struct page_stat, node);
451
452 cmp = data->page - pstat->page;
453 if (cmp < 0)
454 node = &parent->rb_left;
455 else if (cmp > 0)
456 node = &parent->rb_right;
457 else
458 return data;
459 }
460
461 if (!create)
462 return NULL;
463
464 data = zalloc(sizeof(*data));
465 if (data != NULL) {
466 data->page = pstat->page;
467 data->order = pstat->order;
468 data->gfp_flags = pstat->gfp_flags;
469 data->migrate_type = pstat->migrate_type;
470
471 rb_link_node(&data->node, parent, node);
472 rb_insert_color(&data->node, &page_live_tree);
473 }
474
475 return data;
476}
477
478static struct page_stat *page_stat__find_page(struct page_stat *pstat)
479{
480 return __page_stat__findnew_page(pstat, false);
481}
482
483static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
484{
485 return __page_stat__findnew_page(pstat, true);
486}
487
488static struct page_stat *
489__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
490{
491 struct rb_node **node = &page_alloc_tree.rb_node;
492 struct rb_node *parent = NULL;
493 struct page_stat *data;
494 struct sort_dimension *sort;
495
496 while (*node) {
497 int cmp = 0;
498
499 parent = *node;
500 data = rb_entry(*node, struct page_stat, node);
501
502 list_for_each_entry(sort, &page_alloc_sort_input, list) {
503 cmp = sort->cmp(pstat, data);
504 if (cmp)
505 break;
506 }
507
508 if (cmp < 0)
509 node = &parent->rb_left;
510 else if (cmp > 0)
511 node = &parent->rb_right;
512 else
513 return data;
514 }
515
516 if (!create)
517 return NULL;
518
519 data = zalloc(sizeof(*data));
520 if (data != NULL) {
521 data->page = pstat->page;
522 data->order = pstat->order;
523 data->gfp_flags = pstat->gfp_flags;
524 data->migrate_type = pstat->migrate_type;
525
526 rb_link_node(&data->node, parent, node);
527 rb_insert_color(&data->node, &page_alloc_tree);
528 }
529
530 return data;
531}
532
533static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
534{
535 return __page_stat__findnew_alloc(pstat, false);
536}
537
538static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
539{
540 return __page_stat__findnew_alloc(pstat, true);
541}
542
543static struct page_stat *
544__page_stat__findnew_caller(struct page_stat *pstat, bool create)
545{
546 struct rb_node **node = &page_caller_tree.rb_node;
547 struct rb_node *parent = NULL;
548 struct page_stat *data;
549 struct sort_dimension *sort;
550
551 while (*node) {
552 int cmp = 0;
553
554 parent = *node;
555 data = rb_entry(*node, struct page_stat, node);
556
557 list_for_each_entry(sort, &page_caller_sort_input, list) {
558 cmp = sort->cmp(pstat, data);
559 if (cmp)
560 break;
561 }
562
563 if (cmp < 0)
564 node = &parent->rb_left;
565 else if (cmp > 0)
566 node = &parent->rb_right;
567 else
568 return data;
569 }
570
571 if (!create)
572 return NULL;
573
574 data = zalloc(sizeof(*data));
575 if (data != NULL) {
576 data->callsite = pstat->callsite;
577 data->order = pstat->order;
578 data->gfp_flags = pstat->gfp_flags;
579 data->migrate_type = pstat->migrate_type;
580
581 rb_link_node(&data->node, parent, node);
582 rb_insert_color(&data->node, &page_caller_tree);
583 }
584
585 return data;
586}
587
588static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
589{
590 return __page_stat__findnew_caller(pstat, false);
591}
592
593static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
594{
595 return __page_stat__findnew_caller(pstat, true);
596}
597
598static bool valid_page(u64 pfn_or_page)
599{
600 if (use_pfn && pfn_or_page == -1UL)
601 return false;
602 if (!use_pfn && pfn_or_page == 0)
603 return false;
604 return true;
605}
606
607struct gfp_flag {
608 unsigned int flags;
609 char *compact_str;
610 char *human_readable;
611};
612
613static struct gfp_flag *gfps;
614static int nr_gfps;
615
616static int gfpcmp(const void *a, const void *b)
617{
618 const struct gfp_flag *fa = a;
619 const struct gfp_flag *fb = b;
620
621 return fa->flags - fb->flags;
622}
623
624/* see include/trace/events/mmflags.h */
625static const struct {
626 const char *original;
627 const char *compact;
628} gfp_compact_table[] = {
629 { "GFP_TRANSHUGE", "THP" },
630 { "GFP_TRANSHUGE_LIGHT", "THL" },
631 { "GFP_HIGHUSER_MOVABLE", "HUM" },
632 { "GFP_HIGHUSER", "HU" },
633 { "GFP_USER", "U" },
634 { "GFP_KERNEL_ACCOUNT", "KAC" },
635 { "GFP_KERNEL", "K" },
636 { "GFP_NOFS", "NF" },
637 { "GFP_ATOMIC", "A" },
638 { "GFP_NOIO", "NI" },
639 { "GFP_NOWAIT", "NW" },
640 { "GFP_DMA", "D" },
641 { "__GFP_HIGHMEM", "HM" },
642 { "GFP_DMA32", "D32" },
643 { "__GFP_HIGH", "H" },
644 { "__GFP_ATOMIC", "_A" },
645 { "__GFP_IO", "I" },
646 { "__GFP_FS", "F" },
647 { "__GFP_NOWARN", "NWR" },
648 { "__GFP_RETRY_MAYFAIL", "R" },
649 { "__GFP_NOFAIL", "NF" },
650 { "__GFP_NORETRY", "NR" },
651 { "__GFP_COMP", "C" },
652 { "__GFP_ZERO", "Z" },
653 { "__GFP_NOMEMALLOC", "NMA" },
654 { "__GFP_MEMALLOC", "MA" },
655 { "__GFP_HARDWALL", "HW" },
656 { "__GFP_THISNODE", "TN" },
657 { "__GFP_RECLAIMABLE", "RC" },
658 { "__GFP_MOVABLE", "M" },
659 { "__GFP_ACCOUNT", "AC" },
660 { "__GFP_WRITE", "WR" },
661 { "__GFP_RECLAIM", "R" },
662 { "__GFP_DIRECT_RECLAIM", "DR" },
663 { "__GFP_KSWAPD_RECLAIM", "KR" },
664};
665
666static size_t max_gfp_len;
667
668static char *compact_gfp_flags(char *gfp_flags)
669{
670 char *orig_flags = strdup(gfp_flags);
671 char *new_flags = NULL;
672 char *str, *pos = NULL;
673 size_t len = 0;
674
675 if (orig_flags == NULL)
676 return NULL;
677
678 str = strtok_r(orig_flags, "|", &pos);
679 while (str) {
680 size_t i;
681 char *new;
682 const char *cpt;
683
684 for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
685 if (strcmp(gfp_compact_table[i].original, str))
686 continue;
687
688 cpt = gfp_compact_table[i].compact;
689 new = realloc(new_flags, len + strlen(cpt) + 2);
690 if (new == NULL) {
691 free(new_flags);
692 free(orig_flags);
693 return NULL;
694 }
695
696 new_flags = new;
697
698 if (!len) {
699 strcpy(new_flags, cpt);
700 } else {
701 strcat(new_flags, "|");
702 strcat(new_flags, cpt);
703 len++;
704 }
705
706 len += strlen(cpt);
707 }
708
709 str = strtok_r(NULL, "|", &pos);
710 }
711
712 if (max_gfp_len < len)
713 max_gfp_len = len;
714
715 free(orig_flags);
716 return new_flags;
717}
718
719static char *compact_gfp_string(unsigned long gfp_flags)
720{
721 struct gfp_flag key = {
722 .flags = gfp_flags,
723 };
724 struct gfp_flag *gfp;
725
726 gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
727 if (gfp)
728 return gfp->compact_str;
729
730 return NULL;
731}
732
733static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample,
734 unsigned int gfp_flags)
735{
736 struct tep_record record = {
737 .cpu = sample->cpu,
738 .data = sample->raw_data,
739 .size = sample->raw_size,
740 };
741 struct trace_seq seq;
742 char *str, *pos = NULL;
743
744 if (nr_gfps) {
745 struct gfp_flag key = {
746 .flags = gfp_flags,
747 };
748
749 if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
750 return 0;
751 }
752
753 trace_seq_init(&seq);
754 tep_print_event(evsel->tp_format->tep,
755 &seq, &record, "%s", TEP_PRINT_INFO);
756
757 str = strtok_r(seq.buffer, " ", &pos);
758 while (str) {
759 if (!strncmp(str, "gfp_flags=", 10)) {
760 struct gfp_flag *new;
761
762 new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
763 if (new == NULL)
764 return -ENOMEM;
765
766 gfps = new;
767 new += nr_gfps++;
768
769 new->flags = gfp_flags;
770 new->human_readable = strdup(str + 10);
771 new->compact_str = compact_gfp_flags(str + 10);
772 if (!new->human_readable || !new->compact_str)
773 return -ENOMEM;
774
775 qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
776 }
777
778 str = strtok_r(NULL, " ", &pos);
779 }
780
781 trace_seq_destroy(&seq);
782 return 0;
783}
784
785static int evsel__process_page_alloc_event(struct evsel *evsel, struct perf_sample *sample)
786{
787 u64 page;
788 unsigned int order = evsel__intval(evsel, sample, "order");
789 unsigned int gfp_flags = evsel__intval(evsel, sample, "gfp_flags");
790 unsigned int migrate_type = evsel__intval(evsel, sample,
791 "migratetype");
792 u64 bytes = kmem_page_size << order;
793 u64 callsite;
794 struct page_stat *pstat;
795 struct page_stat this = {
796 .order = order,
797 .gfp_flags = gfp_flags,
798 .migrate_type = migrate_type,
799 };
800
801 if (use_pfn)
802 page = evsel__intval(evsel, sample, "pfn");
803 else
804 page = evsel__intval(evsel, sample, "page");
805
806 nr_page_allocs++;
807 total_page_alloc_bytes += bytes;
808
809 if (!valid_page(page)) {
810 nr_page_fails++;
811 total_page_fail_bytes += bytes;
812
813 return 0;
814 }
815
816 if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
817 return -1;
818
819 callsite = find_callsite(evsel, sample);
820
821 /*
822 * This is to find the current page (with correct gfp flags and
823 * migrate type) at free event.
824 */
825 this.page = page;
826 pstat = page_stat__findnew_page(&this);
827 if (pstat == NULL)
828 return -ENOMEM;
829
830 pstat->nr_alloc++;
831 pstat->alloc_bytes += bytes;
832 pstat->callsite = callsite;
833
834 if (!live_page) {
835 pstat = page_stat__findnew_alloc(&this);
836 if (pstat == NULL)
837 return -ENOMEM;
838
839 pstat->nr_alloc++;
840 pstat->alloc_bytes += bytes;
841 pstat->callsite = callsite;
842 }
843
844 this.callsite = callsite;
845 pstat = page_stat__findnew_caller(&this);
846 if (pstat == NULL)
847 return -ENOMEM;
848
849 pstat->nr_alloc++;
850 pstat->alloc_bytes += bytes;
851
852 order_stats[order][migrate_type]++;
853
854 return 0;
855}
856
857static int evsel__process_page_free_event(struct evsel *evsel, struct perf_sample *sample)
858{
859 u64 page;
860 unsigned int order = evsel__intval(evsel, sample, "order");
861 u64 bytes = kmem_page_size << order;
862 struct page_stat *pstat;
863 struct page_stat this = {
864 .order = order,
865 };
866
867 if (use_pfn)
868 page = evsel__intval(evsel, sample, "pfn");
869 else
870 page = evsel__intval(evsel, sample, "page");
871
872 nr_page_frees++;
873 total_page_free_bytes += bytes;
874
875 this.page = page;
876 pstat = page_stat__find_page(&this);
877 if (pstat == NULL) {
878 pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
879 page, order);
880
881 nr_page_nomatch++;
882 total_page_nomatch_bytes += bytes;
883
884 return 0;
885 }
886
887 this.gfp_flags = pstat->gfp_flags;
888 this.migrate_type = pstat->migrate_type;
889 this.callsite = pstat->callsite;
890
891 rb_erase(&pstat->node, &page_live_tree);
892 free(pstat);
893
894 if (live_page) {
895 order_stats[this.order][this.migrate_type]--;
896 } else {
897 pstat = page_stat__find_alloc(&this);
898 if (pstat == NULL)
899 return -ENOMEM;
900
901 pstat->nr_free++;
902 pstat->free_bytes += bytes;
903 }
904
905 pstat = page_stat__find_caller(&this);
906 if (pstat == NULL)
907 return -ENOENT;
908
909 pstat->nr_free++;
910 pstat->free_bytes += bytes;
911
912 if (live_page) {
913 pstat->nr_alloc--;
914 pstat->alloc_bytes -= bytes;
915
916 if (pstat->nr_alloc == 0) {
917 rb_erase(&pstat->node, &page_caller_tree);
918 free(pstat);
919 }
920 }
921
922 return 0;
923}
924
925static bool perf_kmem__skip_sample(struct perf_sample *sample)
926{
927 /* skip sample based on time? */
928 if (perf_time__skip_sample(&ptime, sample->time))
929 return true;
930
931 return false;
932}
933
934typedef int (*tracepoint_handler)(struct evsel *evsel,
935 struct perf_sample *sample);
936
937static int process_sample_event(struct perf_tool *tool __maybe_unused,
938 union perf_event *event,
939 struct perf_sample *sample,
940 struct evsel *evsel,
941 struct machine *machine)
942{
943 int err = 0;
944 struct thread *thread = machine__findnew_thread(machine, sample->pid,
945 sample->tid);
946
947 if (thread == NULL) {
948 pr_debug("problem processing %d event, skipping it.\n",
949 event->header.type);
950 return -1;
951 }
952
953 if (perf_kmem__skip_sample(sample))
954 return 0;
955
956 dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
957
958 if (evsel->handler != NULL) {
959 tracepoint_handler f = evsel->handler;
960 err = f(evsel, sample);
961 }
962
963 thread__put(thread);
964
965 return err;
966}
967
968static struct perf_tool perf_kmem = {
969 .sample = process_sample_event,
970 .comm = perf_event__process_comm,
971 .mmap = perf_event__process_mmap,
972 .mmap2 = perf_event__process_mmap2,
973 .namespaces = perf_event__process_namespaces,
974 .ordered_events = true,
975};
976
977static double fragmentation(unsigned long n_req, unsigned long n_alloc)
978{
979 if (n_alloc == 0)
980 return 0.0;
981 else
982 return 100.0 - (100.0 * n_req / n_alloc);
983}
984
985static void __print_slab_result(struct rb_root *root,
986 struct perf_session *session,
987 int n_lines, int is_caller)
988{
989 struct rb_node *next;
990 struct machine *machine = &session->machines.host;
991
992 printf("%.105s\n", graph_dotted_line);
993 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
994 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
995 printf("%.105s\n", graph_dotted_line);
996
997 next = rb_first(root);
998
999 while (next && n_lines--) {
1000 struct alloc_stat *data = rb_entry(next, struct alloc_stat,
1001 node);
1002 struct symbol *sym = NULL;
1003 struct map *map;
1004 char buf[BUFSIZ];
1005 u64 addr;
1006
1007 if (is_caller) {
1008 addr = data->call_site;
1009 if (!raw_ip)
1010 sym = machine__find_kernel_symbol(machine, addr, &map);
1011 } else
1012 addr = data->ptr;
1013
1014 if (sym != NULL)
1015 snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
1016 addr - map->unmap_ip(map, sym->start));
1017 else
1018 snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
1019 printf(" %-34s |", buf);
1020
1021 printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
1022 (unsigned long long)data->bytes_alloc,
1023 (unsigned long)data->bytes_alloc / data->hit,
1024 (unsigned long long)data->bytes_req,
1025 (unsigned long)data->bytes_req / data->hit,
1026 (unsigned long)data->hit,
1027 (unsigned long)data->pingpong,
1028 fragmentation(data->bytes_req, data->bytes_alloc));
1029
1030 next = rb_next(next);
1031 }
1032
1033 if (n_lines == -1)
1034 printf(" ... | ... | ... | ... | ... | ... \n");
1035
1036 printf("%.105s\n", graph_dotted_line);
1037}
1038
1039static const char * const migrate_type_str[] = {
1040 "UNMOVABL",
1041 "RECLAIM",
1042 "MOVABLE",
1043 "RESERVED",
1044 "CMA/ISLT",
1045 "UNKNOWN",
1046};
1047
1048static void __print_page_alloc_result(struct perf_session *session, int n_lines)
1049{
1050 struct rb_node *next = rb_first(&page_alloc_sorted);
1051 struct machine *machine = &session->machines.host;
1052 const char *format;
1053 int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1054
1055 printf("\n%.105s\n", graph_dotted_line);
1056 printf(" %-16s | %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n",
1057 use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
1058 gfp_len, "GFP flags");
1059 printf("%.105s\n", graph_dotted_line);
1060
1061 if (use_pfn)
1062 format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1063 else
1064 format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1065
1066 while (next && n_lines--) {
1067 struct page_stat *data;
1068 struct symbol *sym;
1069 struct map *map;
1070 char buf[32];
1071 char *caller = buf;
1072
1073 data = rb_entry(next, struct page_stat, node);
1074 sym = machine__find_kernel_symbol(machine, data->callsite, &map);
1075 if (sym)
1076 caller = sym->name;
1077 else
1078 scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1079
1080 printf(format, (unsigned long long)data->page,
1081 (unsigned long long)data->alloc_bytes / 1024,
1082 data->nr_alloc, data->order,
1083 migrate_type_str[data->migrate_type],
1084 gfp_len, compact_gfp_string(data->gfp_flags), caller);
1085
1086 next = rb_next(next);
1087 }
1088
1089 if (n_lines == -1) {
1090 printf(" ... | ... | ... | ... | ... | %-*s | ...\n",
1091 gfp_len, "...");
1092 }
1093
1094 printf("%.105s\n", graph_dotted_line);
1095}
1096
1097static void __print_page_caller_result(struct perf_session *session, int n_lines)
1098{
1099 struct rb_node *next = rb_first(&page_caller_sorted);
1100 struct machine *machine = &session->machines.host;
1101 int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1102
1103 printf("\n%.105s\n", graph_dotted_line);
1104 printf(" %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n",
1105 live_page ? "Live" : "Total", gfp_len, "GFP flags");
1106 printf("%.105s\n", graph_dotted_line);
1107
1108 while (next && n_lines--) {
1109 struct page_stat *data;
1110 struct symbol *sym;
1111 struct map *map;
1112 char buf[32];
1113 char *caller = buf;
1114
1115 data = rb_entry(next, struct page_stat, node);
1116 sym = machine__find_kernel_symbol(machine, data->callsite, &map);
1117 if (sym)
1118 caller = sym->name;
1119 else
1120 scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1121
1122 printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
1123 (unsigned long long)data->alloc_bytes / 1024,
1124 data->nr_alloc, data->order,
1125 migrate_type_str[data->migrate_type],
1126 gfp_len, compact_gfp_string(data->gfp_flags), caller);
1127
1128 next = rb_next(next);
1129 }
1130
1131 if (n_lines == -1) {
1132 printf(" ... | ... | ... | ... | %-*s | ...\n",
1133 gfp_len, "...");
1134 }
1135
1136 printf("%.105s\n", graph_dotted_line);
1137}
1138
1139static void print_gfp_flags(void)
1140{
1141 int i;
1142
1143 printf("#\n");
1144 printf("# GFP flags\n");
1145 printf("# ---------\n");
1146 for (i = 0; i < nr_gfps; i++) {
1147 printf("# %08x: %*s: %s\n", gfps[i].flags,
1148 (int) max_gfp_len, gfps[i].compact_str,
1149 gfps[i].human_readable);
1150 }
1151}
1152
1153static void print_slab_summary(void)
1154{
1155 printf("\nSUMMARY (SLAB allocator)");
1156 printf("\n========================\n");
1157 printf("Total bytes requested: %'lu\n", total_requested);
1158 printf("Total bytes allocated: %'lu\n", total_allocated);
1159 printf("Total bytes freed: %'lu\n", total_freed);
1160 if (total_allocated > total_freed) {
1161 printf("Net total bytes allocated: %'lu\n",
1162 total_allocated - total_freed);
1163 }
1164 printf("Total bytes wasted on internal fragmentation: %'lu\n",
1165 total_allocated - total_requested);
1166 printf("Internal fragmentation: %f%%\n",
1167 fragmentation(total_requested, total_allocated));
1168 printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
1169}
1170
1171static void print_page_summary(void)
1172{
1173 int o, m;
1174 u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
1175 u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
1176
1177 printf("\nSUMMARY (page allocator)");
1178 printf("\n========================\n");
1179 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation requests",
1180 nr_page_allocs, total_page_alloc_bytes / 1024);
1181 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free requests",
1182 nr_page_frees, total_page_free_bytes / 1024);
1183 printf("\n");
1184
1185 printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
1186 nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
1187 printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
1188 nr_page_allocs - nr_alloc_freed,
1189 (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
1190 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests",
1191 nr_page_nomatch, total_page_nomatch_bytes / 1024);
1192 printf("\n");
1193
1194 printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation failures",
1195 nr_page_fails, total_page_fail_bytes / 1024);
1196 printf("\n");
1197
1198 printf("%5s %12s %12s %12s %12s %12s\n", "Order", "Unmovable",
1199 "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
1200 printf("%.5s %.12s %.12s %.12s %.12s %.12s\n", graph_dotted_line,
1201 graph_dotted_line, graph_dotted_line, graph_dotted_line,
1202 graph_dotted_line, graph_dotted_line);
1203
1204 for (o = 0; o < MAX_PAGE_ORDER; o++) {
1205 printf("%5d", o);
1206 for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
1207 if (order_stats[o][m])
1208 printf(" %'12d", order_stats[o][m]);
1209 else
1210 printf(" %12c", '.');
1211 }
1212 printf("\n");
1213 }
1214}
1215
1216static void print_slab_result(struct perf_session *session)
1217{
1218 if (caller_flag)
1219 __print_slab_result(&root_caller_sorted, session, caller_lines, 1);
1220 if (alloc_flag)
1221 __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
1222 print_slab_summary();
1223}
1224
1225static void print_page_result(struct perf_session *session)
1226{
1227 if (caller_flag || alloc_flag)
1228 print_gfp_flags();
1229 if (caller_flag)
1230 __print_page_caller_result(session, caller_lines);
1231 if (alloc_flag)
1232 __print_page_alloc_result(session, alloc_lines);
1233 print_page_summary();
1234}
1235
1236static void print_result(struct perf_session *session)
1237{
1238 if (kmem_slab)
1239 print_slab_result(session);
1240 if (kmem_page)
1241 print_page_result(session);
1242}
1243
1244static LIST_HEAD(slab_caller_sort);
1245static LIST_HEAD(slab_alloc_sort);
1246static LIST_HEAD(page_caller_sort);
1247static LIST_HEAD(page_alloc_sort);
1248
1249static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
1250 struct list_head *sort_list)
1251{
1252 struct rb_node **new = &(root->rb_node);
1253 struct rb_node *parent = NULL;
1254 struct sort_dimension *sort;
1255
1256 while (*new) {
1257 struct alloc_stat *this;
1258 int cmp = 0;
1259
1260 this = rb_entry(*new, struct alloc_stat, node);
1261 parent = *new;
1262
1263 list_for_each_entry(sort, sort_list, list) {
1264 cmp = sort->cmp(data, this);
1265 if (cmp)
1266 break;
1267 }
1268
1269 if (cmp > 0)
1270 new = &((*new)->rb_left);
1271 else
1272 new = &((*new)->rb_right);
1273 }
1274
1275 rb_link_node(&data->node, parent, new);
1276 rb_insert_color(&data->node, root);
1277}
1278
1279static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
1280 struct list_head *sort_list)
1281{
1282 struct rb_node *node;
1283 struct alloc_stat *data;
1284
1285 for (;;) {
1286 node = rb_first(root);
1287 if (!node)
1288 break;
1289
1290 rb_erase(node, root);
1291 data = rb_entry(node, struct alloc_stat, node);
1292 sort_slab_insert(root_sorted, data, sort_list);
1293 }
1294}
1295
1296static void sort_page_insert(struct rb_root *root, struct page_stat *data,
1297 struct list_head *sort_list)
1298{
1299 struct rb_node **new = &root->rb_node;
1300 struct rb_node *parent = NULL;
1301 struct sort_dimension *sort;
1302
1303 while (*new) {
1304 struct page_stat *this;
1305 int cmp = 0;
1306
1307 this = rb_entry(*new, struct page_stat, node);
1308 parent = *new;
1309
1310 list_for_each_entry(sort, sort_list, list) {
1311 cmp = sort->cmp(data, this);
1312 if (cmp)
1313 break;
1314 }
1315
1316 if (cmp > 0)
1317 new = &parent->rb_left;
1318 else
1319 new = &parent->rb_right;
1320 }
1321
1322 rb_link_node(&data->node, parent, new);
1323 rb_insert_color(&data->node, root);
1324}
1325
1326static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
1327 struct list_head *sort_list)
1328{
1329 struct rb_node *node;
1330 struct page_stat *data;
1331
1332 for (;;) {
1333 node = rb_first(root);
1334 if (!node)
1335 break;
1336
1337 rb_erase(node, root);
1338 data = rb_entry(node, struct page_stat, node);
1339 sort_page_insert(root_sorted, data, sort_list);
1340 }
1341}
1342
1343static void sort_result(void)
1344{
1345 if (kmem_slab) {
1346 __sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
1347 &slab_alloc_sort);
1348 __sort_slab_result(&root_caller_stat, &root_caller_sorted,
1349 &slab_caller_sort);
1350 }
1351 if (kmem_page) {
1352 if (live_page)
1353 __sort_page_result(&page_live_tree, &page_alloc_sorted,
1354 &page_alloc_sort);
1355 else
1356 __sort_page_result(&page_alloc_tree, &page_alloc_sorted,
1357 &page_alloc_sort);
1358
1359 __sort_page_result(&page_caller_tree, &page_caller_sorted,
1360 &page_caller_sort);
1361 }
1362}
1363
1364static int __cmd_kmem(struct perf_session *session)
1365{
1366 int err = -EINVAL;
1367 struct evsel *evsel;
1368 const struct evsel_str_handler kmem_tracepoints[] = {
1369 /* slab allocator */
1370 { "kmem:kmalloc", evsel__process_alloc_event, },
1371 { "kmem:kmem_cache_alloc", evsel__process_alloc_event, },
1372 { "kmem:kmalloc_node", evsel__process_alloc_node_event, },
1373 { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, },
1374 { "kmem:kfree", evsel__process_free_event, },
1375 { "kmem:kmem_cache_free", evsel__process_free_event, },
1376 /* page allocator */
1377 { "kmem:mm_page_alloc", evsel__process_page_alloc_event, },
1378 { "kmem:mm_page_free", evsel__process_page_free_event, },
1379 };
1380
1381 if (!perf_session__has_traces(session, "kmem record"))
1382 goto out;
1383
1384 if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
1385 pr_err("Initializing perf session tracepoint handlers failed\n");
1386 goto out;
1387 }
1388
1389 evlist__for_each_entry(session->evlist, evsel) {
1390 if (!strcmp(evsel__name(evsel), "kmem:mm_page_alloc") &&
1391 evsel__field(evsel, "pfn")) {
1392 use_pfn = true;
1393 break;
1394 }
1395 }
1396
1397 setup_pager();
1398 err = perf_session__process_events(session);
1399 if (err != 0) {
1400 pr_err("error during process events: %d\n", err);
1401 goto out;
1402 }
1403 sort_result();
1404 print_result(session);
1405out:
1406 return err;
1407}
1408
1409/* slab sort keys */
1410static int ptr_cmp(void *a, void *b)
1411{
1412 struct alloc_stat *l = a;
1413 struct alloc_stat *r = b;
1414
1415 if (l->ptr < r->ptr)
1416 return -1;
1417 else if (l->ptr > r->ptr)
1418 return 1;
1419 return 0;
1420}
1421
1422static struct sort_dimension ptr_sort_dimension = {
1423 .name = "ptr",
1424 .cmp = ptr_cmp,
1425};
1426
1427static int slab_callsite_cmp(void *a, void *b)
1428{
1429 struct alloc_stat *l = a;
1430 struct alloc_stat *r = b;
1431
1432 if (l->call_site < r->call_site)
1433 return -1;
1434 else if (l->call_site > r->call_site)
1435 return 1;
1436 return 0;
1437}
1438
1439static struct sort_dimension callsite_sort_dimension = {
1440 .name = "callsite",
1441 .cmp = slab_callsite_cmp,
1442};
1443
1444static int hit_cmp(void *a, void *b)
1445{
1446 struct alloc_stat *l = a;
1447 struct alloc_stat *r = b;
1448
1449 if (l->hit < r->hit)
1450 return -1;
1451 else if (l->hit > r->hit)
1452 return 1;
1453 return 0;
1454}
1455
1456static struct sort_dimension hit_sort_dimension = {
1457 .name = "hit",
1458 .cmp = hit_cmp,
1459};
1460
1461static int bytes_cmp(void *a, void *b)
1462{
1463 struct alloc_stat *l = a;
1464 struct alloc_stat *r = b;
1465
1466 if (l->bytes_alloc < r->bytes_alloc)
1467 return -1;
1468 else if (l->bytes_alloc > r->bytes_alloc)
1469 return 1;
1470 return 0;
1471}
1472
1473static struct sort_dimension bytes_sort_dimension = {
1474 .name = "bytes",
1475 .cmp = bytes_cmp,
1476};
1477
1478static int frag_cmp(void *a, void *b)
1479{
1480 double x, y;
1481 struct alloc_stat *l = a;
1482 struct alloc_stat *r = b;
1483
1484 x = fragmentation(l->bytes_req, l->bytes_alloc);
1485 y = fragmentation(r->bytes_req, r->bytes_alloc);
1486
1487 if (x < y)
1488 return -1;
1489 else if (x > y)
1490 return 1;
1491 return 0;
1492}
1493
1494static struct sort_dimension frag_sort_dimension = {
1495 .name = "frag",
1496 .cmp = frag_cmp,
1497};
1498
1499static int pingpong_cmp(void *a, void *b)
1500{
1501 struct alloc_stat *l = a;
1502 struct alloc_stat *r = b;
1503
1504 if (l->pingpong < r->pingpong)
1505 return -1;
1506 else if (l->pingpong > r->pingpong)
1507 return 1;
1508 return 0;
1509}
1510
1511static struct sort_dimension pingpong_sort_dimension = {
1512 .name = "pingpong",
1513 .cmp = pingpong_cmp,
1514};
1515
1516/* page sort keys */
1517static int page_cmp(void *a, void *b)
1518{
1519 struct page_stat *l = a;
1520 struct page_stat *r = b;
1521
1522 if (l->page < r->page)
1523 return -1;
1524 else if (l->page > r->page)
1525 return 1;
1526 return 0;
1527}
1528
1529static struct sort_dimension page_sort_dimension = {
1530 .name = "page",
1531 .cmp = page_cmp,
1532};
1533
1534static int page_callsite_cmp(void *a, void *b)
1535{
1536 struct page_stat *l = a;
1537 struct page_stat *r = b;
1538
1539 if (l->callsite < r->callsite)
1540 return -1;
1541 else if (l->callsite > r->callsite)
1542 return 1;
1543 return 0;
1544}
1545
1546static struct sort_dimension page_callsite_sort_dimension = {
1547 .name = "callsite",
1548 .cmp = page_callsite_cmp,
1549};
1550
1551static int page_hit_cmp(void *a, void *b)
1552{
1553 struct page_stat *l = a;
1554 struct page_stat *r = b;
1555
1556 if (l->nr_alloc < r->nr_alloc)
1557 return -1;
1558 else if (l->nr_alloc > r->nr_alloc)
1559 return 1;
1560 return 0;
1561}
1562
1563static struct sort_dimension page_hit_sort_dimension = {
1564 .name = "hit",
1565 .cmp = page_hit_cmp,
1566};
1567
1568static int page_bytes_cmp(void *a, void *b)
1569{
1570 struct page_stat *l = a;
1571 struct page_stat *r = b;
1572
1573 if (l->alloc_bytes < r->alloc_bytes)
1574 return -1;
1575 else if (l->alloc_bytes > r->alloc_bytes)
1576 return 1;
1577 return 0;
1578}
1579
1580static struct sort_dimension page_bytes_sort_dimension = {
1581 .name = "bytes",
1582 .cmp = page_bytes_cmp,
1583};
1584
1585static int page_order_cmp(void *a, void *b)
1586{
1587 struct page_stat *l = a;
1588 struct page_stat *r = b;
1589
1590 if (l->order < r->order)
1591 return -1;
1592 else if (l->order > r->order)
1593 return 1;
1594 return 0;
1595}
1596
1597static struct sort_dimension page_order_sort_dimension = {
1598 .name = "order",
1599 .cmp = page_order_cmp,
1600};
1601
1602static int migrate_type_cmp(void *a, void *b)
1603{
1604 struct page_stat *l = a;
1605 struct page_stat *r = b;
1606
1607 /* for internal use to find free'd page */
1608 if (l->migrate_type == -1U)
1609 return 0;
1610
1611 if (l->migrate_type < r->migrate_type)
1612 return -1;
1613 else if (l->migrate_type > r->migrate_type)
1614 return 1;
1615 return 0;
1616}
1617
1618static struct sort_dimension migrate_type_sort_dimension = {
1619 .name = "migtype",
1620 .cmp = migrate_type_cmp,
1621};
1622
1623static int gfp_flags_cmp(void *a, void *b)
1624{
1625 struct page_stat *l = a;
1626 struct page_stat *r = b;
1627
1628 /* for internal use to find free'd page */
1629 if (l->gfp_flags == -1U)
1630 return 0;
1631
1632 if (l->gfp_flags < r->gfp_flags)
1633 return -1;
1634 else if (l->gfp_flags > r->gfp_flags)
1635 return 1;
1636 return 0;
1637}
1638
1639static struct sort_dimension gfp_flags_sort_dimension = {
1640 .name = "gfp",
1641 .cmp = gfp_flags_cmp,
1642};
1643
1644static struct sort_dimension *slab_sorts[] = {
1645 &ptr_sort_dimension,
1646 &callsite_sort_dimension,
1647 &hit_sort_dimension,
1648 &bytes_sort_dimension,
1649 &frag_sort_dimension,
1650 &pingpong_sort_dimension,
1651};
1652
1653static struct sort_dimension *page_sorts[] = {
1654 &page_sort_dimension,
1655 &page_callsite_sort_dimension,
1656 &page_hit_sort_dimension,
1657 &page_bytes_sort_dimension,
1658 &page_order_sort_dimension,
1659 &migrate_type_sort_dimension,
1660 &gfp_flags_sort_dimension,
1661};
1662
1663static int slab_sort_dimension__add(const char *tok, struct list_head *list)
1664{
1665 struct sort_dimension *sort;
1666 int i;
1667
1668 for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
1669 if (!strcmp(slab_sorts[i]->name, tok)) {
1670 sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
1671 if (!sort) {
1672 pr_err("%s: memdup failed\n", __func__);
1673 return -1;
1674 }
1675 list_add_tail(&sort->list, list);
1676 return 0;
1677 }
1678 }
1679
1680 return -1;
1681}
1682
1683static int page_sort_dimension__add(const char *tok, struct list_head *list)
1684{
1685 struct sort_dimension *sort;
1686 int i;
1687
1688 for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
1689 if (!strcmp(page_sorts[i]->name, tok)) {
1690 sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
1691 if (!sort) {
1692 pr_err("%s: memdup failed\n", __func__);
1693 return -1;
1694 }
1695 list_add_tail(&sort->list, list);
1696 return 0;
1697 }
1698 }
1699
1700 return -1;
1701}
1702
1703static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
1704{
1705 char *tok;
1706 char *str = strdup(arg);
1707 char *pos = str;
1708
1709 if (!str) {
1710 pr_err("%s: strdup failed\n", __func__);
1711 return -1;
1712 }
1713
1714 while (true) {
1715 tok = strsep(&pos, ",");
1716 if (!tok)
1717 break;
1718 if (slab_sort_dimension__add(tok, sort_list) < 0) {
1719 pr_err("Unknown slab --sort key: '%s'", tok);
1720 free(str);
1721 return -1;
1722 }
1723 }
1724
1725 free(str);
1726 return 0;
1727}
1728
1729static int setup_page_sorting(struct list_head *sort_list, const char *arg)
1730{
1731 char *tok;
1732 char *str = strdup(arg);
1733 char *pos = str;
1734
1735 if (!str) {
1736 pr_err("%s: strdup failed\n", __func__);
1737 return -1;
1738 }
1739
1740 while (true) {
1741 tok = strsep(&pos, ",");
1742 if (!tok)
1743 break;
1744 if (page_sort_dimension__add(tok, sort_list) < 0) {
1745 pr_err("Unknown page --sort key: '%s'", tok);
1746 free(str);
1747 return -1;
1748 }
1749 }
1750
1751 free(str);
1752 return 0;
1753}
1754
1755static int parse_sort_opt(const struct option *opt __maybe_unused,
1756 const char *arg, int unset __maybe_unused)
1757{
1758 if (!arg)
1759 return -1;
1760
1761 if (kmem_page > kmem_slab ||
1762 (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
1763 if (caller_flag > alloc_flag)
1764 return setup_page_sorting(&page_caller_sort, arg);
1765 else
1766 return setup_page_sorting(&page_alloc_sort, arg);
1767 } else {
1768 if (caller_flag > alloc_flag)
1769 return setup_slab_sorting(&slab_caller_sort, arg);
1770 else
1771 return setup_slab_sorting(&slab_alloc_sort, arg);
1772 }
1773
1774 return 0;
1775}
1776
1777static int parse_caller_opt(const struct option *opt __maybe_unused,
1778 const char *arg __maybe_unused,
1779 int unset __maybe_unused)
1780{
1781 caller_flag = (alloc_flag + 1);
1782 return 0;
1783}
1784
1785static int parse_alloc_opt(const struct option *opt __maybe_unused,
1786 const char *arg __maybe_unused,
1787 int unset __maybe_unused)
1788{
1789 alloc_flag = (caller_flag + 1);
1790 return 0;
1791}
1792
1793static int parse_slab_opt(const struct option *opt __maybe_unused,
1794 const char *arg __maybe_unused,
1795 int unset __maybe_unused)
1796{
1797 kmem_slab = (kmem_page + 1);
1798 return 0;
1799}
1800
1801static int parse_page_opt(const struct option *opt __maybe_unused,
1802 const char *arg __maybe_unused,
1803 int unset __maybe_unused)
1804{
1805 kmem_page = (kmem_slab + 1);
1806 return 0;
1807}
1808
1809static int parse_line_opt(const struct option *opt __maybe_unused,
1810 const char *arg, int unset __maybe_unused)
1811{
1812 int lines;
1813
1814 if (!arg)
1815 return -1;
1816
1817 lines = strtoul(arg, NULL, 10);
1818
1819 if (caller_flag > alloc_flag)
1820 caller_lines = lines;
1821 else
1822 alloc_lines = lines;
1823
1824 return 0;
1825}
1826
1827static int __cmd_record(int argc, const char **argv)
1828{
1829 const char * const record_args[] = {
1830 "record", "-a", "-R", "-c", "1",
1831 };
1832 const char * const slab_events[] = {
1833 "-e", "kmem:kmalloc",
1834 "-e", "kmem:kmalloc_node",
1835 "-e", "kmem:kfree",
1836 "-e", "kmem:kmem_cache_alloc",
1837 "-e", "kmem:kmem_cache_alloc_node",
1838 "-e", "kmem:kmem_cache_free",
1839 };
1840 const char * const page_events[] = {
1841 "-e", "kmem:mm_page_alloc",
1842 "-e", "kmem:mm_page_free",
1843 };
1844 unsigned int rec_argc, i, j;
1845 const char **rec_argv;
1846
1847 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1848 if (kmem_slab)
1849 rec_argc += ARRAY_SIZE(slab_events);
1850 if (kmem_page)
1851 rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
1852
1853 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1854
1855 if (rec_argv == NULL)
1856 return -ENOMEM;
1857
1858 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1859 rec_argv[i] = strdup(record_args[i]);
1860
1861 if (kmem_slab) {
1862 for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
1863 rec_argv[i] = strdup(slab_events[j]);
1864 }
1865 if (kmem_page) {
1866 rec_argv[i++] = strdup("-g");
1867
1868 for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
1869 rec_argv[i] = strdup(page_events[j]);
1870 }
1871
1872 for (j = 1; j < (unsigned int)argc; j++, i++)
1873 rec_argv[i] = argv[j];
1874
1875 return cmd_record(i, rec_argv);
1876}
1877
1878static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
1879{
1880 if (!strcmp(var, "kmem.default")) {
1881 if (!strcmp(value, "slab"))
1882 kmem_default = KMEM_SLAB;
1883 else if (!strcmp(value, "page"))
1884 kmem_default = KMEM_PAGE;
1885 else
1886 pr_err("invalid default value ('slab' or 'page' required): %s\n",
1887 value);
1888 return 0;
1889 }
1890
1891 return 0;
1892}
1893
1894int cmd_kmem(int argc, const char **argv)
1895{
1896 const char * const default_slab_sort = "frag,hit,bytes";
1897 const char * const default_page_sort = "bytes,hit";
1898 struct perf_data data = {
1899 .mode = PERF_DATA_MODE_READ,
1900 };
1901 const struct option kmem_options[] = {
1902 OPT_STRING('i', "input", &input_name, "file", "input file name"),
1903 OPT_INCR('v', "verbose", &verbose,
1904 "be more verbose (show symbol address, etc)"),
1905 OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
1906 "show per-callsite statistics", parse_caller_opt),
1907 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
1908 "show per-allocation statistics", parse_alloc_opt),
1909 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
1910 "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
1911 "page, order, migtype, gfp", parse_sort_opt),
1912 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
1913 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
1914 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
1915 OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
1916 parse_slab_opt),
1917 OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
1918 parse_page_opt),
1919 OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
1920 OPT_STRING(0, "time", &time_str, "str",
1921 "Time span of interest (start,stop)"),
1922 OPT_END()
1923 };
1924 const char *const kmem_subcommands[] = { "record", "stat", NULL };
1925 const char *kmem_usage[] = {
1926 NULL,
1927 NULL
1928 };
1929 struct perf_session *session;
1930 static const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n";
1931 int ret = perf_config(kmem_config, NULL);
1932
1933 if (ret)
1934 return ret;
1935
1936 argc = parse_options_subcommand(argc, argv, kmem_options,
1937 kmem_subcommands, kmem_usage,
1938 PARSE_OPT_STOP_AT_NON_OPTION);
1939
1940 if (!argc)
1941 usage_with_options(kmem_usage, kmem_options);
1942
1943 if (kmem_slab == 0 && kmem_page == 0) {
1944 if (kmem_default == KMEM_SLAB)
1945 kmem_slab = 1;
1946 else
1947 kmem_page = 1;
1948 }
1949
1950 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
1951 symbol__init(NULL);
1952 return __cmd_record(argc, argv);
1953 }
1954
1955 data.path = input_name;
1956
1957 kmem_session = session = perf_session__new(&data, &perf_kmem);
1958 if (IS_ERR(session))
1959 return PTR_ERR(session);
1960
1961 ret = -1;
1962
1963 if (kmem_slab) {
1964 if (!evlist__find_tracepoint_by_name(session->evlist, "kmem:kmalloc")) {
1965 pr_err(errmsg, "slab", "slab");
1966 goto out_delete;
1967 }
1968 }
1969
1970 if (kmem_page) {
1971 struct evsel *evsel = evlist__find_tracepoint_by_name(session->evlist, "kmem:mm_page_alloc");
1972
1973 if (evsel == NULL) {
1974 pr_err(errmsg, "page", "page");
1975 goto out_delete;
1976 }
1977
1978 kmem_page_size = tep_get_page_size(evsel->tp_format->tep);
1979 symbol_conf.use_callchain = true;
1980 }
1981
1982 symbol__init(&session->header.env);
1983
1984 if (perf_time__parse_str(&ptime, time_str) != 0) {
1985 pr_err("Invalid time string\n");
1986 ret = -EINVAL;
1987 goto out_delete;
1988 }
1989
1990 if (!strcmp(argv[0], "stat")) {
1991 setlocale(LC_ALL, "");
1992
1993 if (cpu__setup_cpunode_map())
1994 goto out_delete;
1995
1996 if (list_empty(&slab_caller_sort))
1997 setup_slab_sorting(&slab_caller_sort, default_slab_sort);
1998 if (list_empty(&slab_alloc_sort))
1999 setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
2000 if (list_empty(&page_caller_sort))
2001 setup_page_sorting(&page_caller_sort, default_page_sort);
2002 if (list_empty(&page_alloc_sort))
2003 setup_page_sorting(&page_alloc_sort, default_page_sort);
2004
2005 if (kmem_page) {
2006 setup_page_sorting(&page_alloc_sort_input,
2007 "page,order,migtype,gfp");
2008 setup_page_sorting(&page_caller_sort_input,
2009 "callsite,order,migtype,gfp");
2010 }
2011 ret = __cmd_kmem(session);
2012 } else
2013 usage_with_options(kmem_usage, kmem_options);
2014
2015out_delete:
2016 perf_session__delete(session);
2017
2018 return ret;
2019}
2020