Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

percpu: improve percpu_alloc_percpu event trace

Add call_site, bytes_alloc and gfp_flags fields to the output of the
percpu_alloc_percpu ftrace event:

mkdir-4393 [001] 169.334788: percpu_alloc_percpu:
call_site=mem_cgroup_css_alloc+0xa6 reserved=0 is_atomic=0 size=2408 align=8
base_addr=0xffffc7117fc00000 off=402176 ptr=0x3dc867a62300 bytes_alloc=14448
gfp_flags=GFP_KERNEL_ACCOUNT

This is required to track memcg-accounted percpu allocations.

Link: https://lkml.kernel.org/r/a07be858-c8a3-7851-9086-e3262cbcf707@openvz.org
Signed-off-by: Vasily Averin <vvs@openvz.org>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Vasily Averin and committed by
Andrew Morton
f67bed13 d1ed51fc

+24 -12
+17 -6
include/trace/events/percpu.h
··· 6 6 #define _TRACE_PERCPU_H 7 7 8 8 #include <linux/tracepoint.h> 9 + #include <trace/events/mmflags.h> 9 10 10 11 TRACE_EVENT(percpu_alloc_percpu, 11 12 12 - TP_PROTO(bool reserved, bool is_atomic, size_t size, 13 - size_t align, void *base_addr, int off, void __percpu *ptr), 13 + TP_PROTO(unsigned long call_site, 14 + bool reserved, bool is_atomic, size_t size, 15 + size_t align, void *base_addr, int off, 16 + void __percpu *ptr, size_t bytes_alloc, gfp_t gfp_flags), 14 17 15 - TP_ARGS(reserved, is_atomic, size, align, base_addr, off, ptr), 18 + TP_ARGS(call_site, reserved, is_atomic, size, align, base_addr, off, 19 + ptr, bytes_alloc, gfp_flags), 16 20 17 21 TP_STRUCT__entry( 22 + __field( unsigned long, call_site ) 18 23 __field( bool, reserved ) 19 24 __field( bool, is_atomic ) 20 25 __field( size_t, size ) ··· 27 22 __field( void *, base_addr ) 28 23 __field( int, off ) 29 24 __field( void __percpu *, ptr ) 25 + __field( size_t, bytes_alloc ) 26 + __field( gfp_t, gfp_flags ) 30 27 ), 31 - 32 28 TP_fast_assign( 29 + __entry->call_site = call_site; 33 30 __entry->reserved = reserved; 34 31 __entry->is_atomic = is_atomic; 35 32 __entry->size = size; ··· 39 32 __entry->base_addr = base_addr; 40 33 __entry->off = off; 41 34 __entry->ptr = ptr; 35 + __entry->bytes_alloc = bytes_alloc; 36 + __entry->gfp_flags = gfp_flags; 42 37 ), 43 38 44 - TP_printk("reserved=%d is_atomic=%d size=%zu align=%zu base_addr=%p off=%d ptr=%p", 39 + TP_printk("call_site=%pS reserved=%d is_atomic=%d size=%zu align=%zu base_addr=%p off=%d ptr=%p bytes_alloc=%zu gfp_flags=%s", 40 + (void *)__entry->call_site, 45 41 __entry->reserved, __entry->is_atomic, 46 42 __entry->size, __entry->align, 47 - __entry->base_addr, __entry->off, __entry->ptr) 43 + __entry->base_addr, __entry->off, __entry->ptr, 44 + __entry->bytes_alloc, show_gfp_flags(__entry->gfp_flags)) 48 45 ); 49 46 50 47 TRACE_EVENT(percpu_free_percpu,
+4 -4
mm/percpu-internal.h
··· 113 113 return pcpu_nr_pages_to_map_bits(chunk->nr_pages); 114 114 } 115 115 116 - #ifdef CONFIG_MEMCG_KMEM 117 116 /** 118 117 * pcpu_obj_full_size - helper to calculate size of each accounted object 119 118 * @size: size of area to allocate in bytes ··· 122 123 */ 123 124 static inline size_t pcpu_obj_full_size(size_t size) 124 125 { 125 - size_t extra_size; 126 + size_t extra_size = 0; 126 127 127 - extra_size = size / PCPU_MIN_ALLOC_SIZE * sizeof(struct obj_cgroup *); 128 + #ifdef CONFIG_MEMCG_KMEM 129 + extra_size += size / PCPU_MIN_ALLOC_SIZE * sizeof(struct obj_cgroup *); 130 + #endif 128 131 129 132 return size * num_possible_cpus() + extra_size; 130 133 } 131 - #endif /* CONFIG_MEMCG_KMEM */ 132 134 133 135 #ifdef CONFIG_PERCPU_STATS 134 136
+3 -2
mm/percpu.c
··· 1884 1884 ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); 1885 1885 kmemleak_alloc_percpu(ptr, size, gfp); 1886 1886 1887 - trace_percpu_alloc_percpu(reserved, is_atomic, size, align, 1888 - chunk->base_addr, off, ptr); 1887 + trace_percpu_alloc_percpu(_RET_IP_, reserved, is_atomic, size, align, 1888 + chunk->base_addr, off, ptr, 1889 + pcpu_obj_full_size(size), gfp); 1889 1890 1890 1891 pcpu_memcg_post_alloc_hook(objcg, chunk, off, size); 1891 1892