Serenity Operating System
1/*
2 * Copyright (c) 2020-2021, Andreas Kling <kling@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/JsonArraySerializer.h>
8#include <AK/JsonObjectSerializer.h>
9#include <AK/ScopeGuard.h>
10#include <Kernel/Arch/RegisterState.h>
11#include <Kernel/Arch/SafeMem.h>
12#include <Kernel/Arch/SmapDisabler.h>
13#include <Kernel/FileSystem/Custody.h>
14#include <Kernel/KBufferBuilder.h>
15#include <Kernel/PerformanceEventBuffer.h>
16#include <Kernel/Process.h>
17#include <Kernel/Time/TimeManagement.h>
18
19namespace Kernel {
20
21PerformanceEventBuffer::PerformanceEventBuffer(NonnullOwnPtr<KBuffer> buffer)
22 : m_buffer(move(buffer))
23{
24}
25
26NEVER_INLINE ErrorOr<void> PerformanceEventBuffer::append(int type, FlatPtr arg1, FlatPtr arg2, StringView arg3, Thread* current_thread, FlatPtr arg4, u64 arg5, ErrorOr<FlatPtr> const& arg6)
27{
28 FlatPtr base_pointer = (FlatPtr)__builtin_frame_address(0);
29 return append_with_ip_and_bp(current_thread->pid(), current_thread->tid(), 0, base_pointer, type, 0, arg1, arg2, arg3, arg4, arg5, arg6);
30}
31
32static Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> raw_backtrace(FlatPtr bp, FlatPtr ip)
33{
34 Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> backtrace;
35 if (ip != 0)
36 backtrace.unchecked_append(ip);
37 FlatPtr stack_ptr_copy;
38 FlatPtr stack_ptr = bp;
39 // FIXME: Figure out how to remove this SmapDisabler without breaking profile stacks.
40 SmapDisabler disabler;
41 // NOTE: The stack should always have kernel frames first, followed by userspace frames.
42 // If a userspace frame points back into kernel memory, something is afoot.
43 bool is_walking_userspace_stack = false;
44 while (stack_ptr) {
45 void* fault_at;
46 if (!safe_memcpy(&stack_ptr_copy, (void*)stack_ptr, sizeof(FlatPtr), fault_at))
47 break;
48 if (!Memory::is_user_address(VirtualAddress { stack_ptr })) {
49 if (is_walking_userspace_stack) {
50 dbgln("SHENANIGANS! Userspace stack points back into kernel memory");
51 break;
52 }
53 } else {
54 is_walking_userspace_stack = true;
55 }
56 FlatPtr retaddr;
57 if (!safe_memcpy(&retaddr, (void*)(stack_ptr + sizeof(FlatPtr)), sizeof(FlatPtr), fault_at))
58 break;
59 if (retaddr == 0)
60 break;
61 backtrace.unchecked_append(retaddr);
62 if (backtrace.size() == PerformanceEvent::max_stack_frame_count)
63 break;
64 stack_ptr = stack_ptr_copy;
65 }
66 return backtrace;
67}
68
69ErrorOr<void> PerformanceEventBuffer::append_with_ip_and_bp(ProcessID pid, ThreadID tid, RegisterState const& regs,
70 int type, u32 lost_samples, FlatPtr arg1, FlatPtr arg2, StringView arg3, FlatPtr arg4, u64 arg5, ErrorOr<FlatPtr> const& arg6)
71{
72 return append_with_ip_and_bp(pid, tid, regs.ip(), regs.bp(), type, lost_samples, arg1, arg2, arg3, arg4, arg5, arg6);
73}
74
75ErrorOr<void> PerformanceEventBuffer::append_with_ip_and_bp(ProcessID pid, ThreadID tid,
76 FlatPtr ip, FlatPtr bp, int type, u32 lost_samples, FlatPtr arg1, FlatPtr arg2, StringView arg3, FlatPtr arg4, u64 arg5, ErrorOr<FlatPtr> const& arg6)
77{
78 if (count() >= capacity())
79 return ENOBUFS;
80
81 if ((g_profiling_event_mask & type) == 0)
82 return EINVAL;
83
84 auto* current_thread = Thread::current();
85 u32 enter_count = 0;
86 if (current_thread)
87 enter_count = current_thread->enter_profiler();
88 ScopeGuard leave_profiler([&] {
89 if (current_thread)
90 current_thread->leave_profiler();
91 });
92 if (enter_count > 0)
93 return EINVAL;
94
95 PerformanceEvent event;
96 event.type = type;
97 event.lost_samples = lost_samples;
98
99 switch (type) {
100 case PERF_EVENT_SAMPLE:
101 break;
102 case PERF_EVENT_MALLOC:
103 event.data.malloc.size = arg1;
104 event.data.malloc.ptr = arg2;
105 break;
106 case PERF_EVENT_FREE:
107 event.data.free.ptr = arg1;
108 break;
109 case PERF_EVENT_MMAP:
110 event.data.mmap.ptr = arg1;
111 event.data.mmap.size = arg2;
112 memset(event.data.mmap.name, 0, sizeof(event.data.mmap.name));
113 if (!arg3.is_empty())
114 memcpy(event.data.mmap.name, arg3.characters_without_null_termination(), min(arg3.length(), sizeof(event.data.mmap.name) - 1));
115 break;
116 case PERF_EVENT_MUNMAP:
117 event.data.munmap.ptr = arg1;
118 event.data.munmap.size = arg2;
119 break;
120 case PERF_EVENT_PROCESS_CREATE:
121 event.data.process_create.parent_pid = arg1;
122 memset(event.data.process_create.executable, 0, sizeof(event.data.process_create.executable));
123 if (!arg3.is_empty()) {
124 memcpy(event.data.process_create.executable, arg3.characters_without_null_termination(),
125 min(arg3.length(), sizeof(event.data.process_create.executable) - 1));
126 }
127 break;
128 case PERF_EVENT_PROCESS_EXEC:
129 memset(event.data.process_exec.executable, 0, sizeof(event.data.process_exec.executable));
130 if (!arg3.is_empty()) {
131 memcpy(event.data.process_exec.executable, arg3.characters_without_null_termination(),
132 min(arg3.length(), sizeof(event.data.process_exec.executable) - 1));
133 }
134 break;
135 case PERF_EVENT_PROCESS_EXIT:
136 break;
137 case PERF_EVENT_THREAD_CREATE:
138 event.data.thread_create.parent_tid = arg1;
139 break;
140 case PERF_EVENT_THREAD_EXIT:
141 break;
142 case PERF_EVENT_CONTEXT_SWITCH:
143 event.data.context_switch.next_pid = arg1;
144 event.data.context_switch.next_tid = arg2;
145 break;
146 case PERF_EVENT_KMALLOC:
147 event.data.kmalloc.size = arg1;
148 event.data.kmalloc.ptr = arg2;
149 break;
150 case PERF_EVENT_KFREE:
151 event.data.kfree.size = arg1;
152 event.data.kfree.ptr = arg2;
153 break;
154 case PERF_EVENT_PAGE_FAULT:
155 break;
156 case PERF_EVENT_SYSCALL:
157 break;
158 case PERF_EVENT_SIGNPOST:
159 event.data.signpost.arg1 = arg1;
160 event.data.signpost.arg2 = arg2;
161 break;
162 case PERF_EVENT_READ:
163 event.data.read.fd = arg1;
164 event.data.read.size = arg2;
165 event.data.read.filename_index = arg4;
166 event.data.read.start_timestamp = arg5;
167 event.data.read.success = !arg6.is_error();
168 break;
169 default:
170 return EINVAL;
171 }
172
173 auto backtrace = raw_backtrace(bp, ip);
174 event.stack_size = min(sizeof(event.stack) / sizeof(FlatPtr), static_cast<size_t>(backtrace.size()));
175 memcpy(event.stack, backtrace.data(), event.stack_size * sizeof(FlatPtr));
176
177 event.pid = pid.value();
178 event.tid = tid.value();
179 event.timestamp = TimeManagement::the().uptime_ms();
180 at(m_count++) = event;
181 return {};
182}
183
184PerformanceEvent& PerformanceEventBuffer::at(size_t index)
185{
186 VERIFY(index < capacity());
187 auto* events = reinterpret_cast<PerformanceEvent*>(m_buffer->data());
188 return events[index];
189}
190
191template<typename Serializer>
192ErrorOr<void> PerformanceEventBuffer::to_json_impl(Serializer& object) const
193{
194 {
195 auto strings = TRY(object.add_array("strings"sv));
196 Vector<KString*> strings_sorted_by_index;
197 TRY(strings_sorted_by_index.try_resize(m_strings.size()));
198
199 for (auto& entry : m_strings) {
200 strings_sorted_by_index[entry.value] = const_cast<Kernel::KString*>(entry.key.ptr());
201 }
202
203 for (size_t i = 0; i < m_strings.size(); i++) {
204 TRY(strings.add(strings_sorted_by_index[i]->view()));
205 }
206
207 TRY(strings.finish());
208 }
209
210 auto current_process_credentials = Process::current().credentials();
211 bool show_kernel_addresses = current_process_credentials->is_superuser();
212 auto array = TRY(object.add_array("events"sv));
213 bool seen_first_sample = false;
214 for (size_t i = 0; i < m_count; ++i) {
215 auto const& event = at(i);
216
217 if (!show_kernel_addresses) {
218 if (event.type == PERF_EVENT_KMALLOC || event.type == PERF_EVENT_KFREE)
219 continue;
220 }
221
222 auto event_object = TRY(array.add_object());
223 switch (event.type) {
224 case PERF_EVENT_SAMPLE:
225 TRY(event_object.add("type"sv, "sample"));
226 break;
227 case PERF_EVENT_MALLOC:
228 TRY(event_object.add("type"sv, "malloc"));
229 TRY(event_object.add("ptr"sv, static_cast<u64>(event.data.malloc.ptr)));
230 TRY(event_object.add("size"sv, static_cast<u64>(event.data.malloc.size)));
231 break;
232 case PERF_EVENT_FREE:
233 TRY(event_object.add("type"sv, "free"));
234 TRY(event_object.add("ptr"sv, static_cast<u64>(event.data.free.ptr)));
235 break;
236 case PERF_EVENT_MMAP:
237 TRY(event_object.add("type"sv, "mmap"));
238 TRY(event_object.add("ptr"sv, static_cast<u64>(event.data.mmap.ptr)));
239 TRY(event_object.add("size"sv, static_cast<u64>(event.data.mmap.size)));
240 TRY(event_object.add("name"sv, event.data.mmap.name));
241 break;
242 case PERF_EVENT_MUNMAP:
243 TRY(event_object.add("type"sv, "munmap"));
244 TRY(event_object.add("ptr"sv, static_cast<u64>(event.data.munmap.ptr)));
245 TRY(event_object.add("size"sv, static_cast<u64>(event.data.munmap.size)));
246 break;
247 case PERF_EVENT_PROCESS_CREATE:
248 TRY(event_object.add("type"sv, "process_create"));
249 TRY(event_object.add("parent_pid"sv, static_cast<u64>(event.data.process_create.parent_pid)));
250 TRY(event_object.add("executable"sv, event.data.process_create.executable));
251 break;
252 case PERF_EVENT_PROCESS_EXEC:
253 TRY(event_object.add("type"sv, "process_exec"));
254 TRY(event_object.add("executable"sv, event.data.process_exec.executable));
255 break;
256 case PERF_EVENT_PROCESS_EXIT:
257 TRY(event_object.add("type"sv, "process_exit"));
258 break;
259 case PERF_EVENT_THREAD_CREATE:
260 TRY(event_object.add("type"sv, "thread_create"));
261 TRY(event_object.add("parent_tid"sv, static_cast<u64>(event.data.thread_create.parent_tid)));
262 break;
263 case PERF_EVENT_THREAD_EXIT:
264 TRY(event_object.add("type"sv, "thread_exit"));
265 break;
266 case PERF_EVENT_CONTEXT_SWITCH:
267 TRY(event_object.add("type"sv, "context_switch"));
268 TRY(event_object.add("next_pid"sv, static_cast<u64>(event.data.context_switch.next_pid)));
269 TRY(event_object.add("next_tid"sv, static_cast<u64>(event.data.context_switch.next_tid)));
270 break;
271 case PERF_EVENT_KMALLOC:
272 TRY(event_object.add("type"sv, "kmalloc"));
273 TRY(event_object.add("ptr"sv, static_cast<u64>(event.data.kmalloc.ptr)));
274 TRY(event_object.add("size"sv, static_cast<u64>(event.data.kmalloc.size)));
275 break;
276 case PERF_EVENT_KFREE:
277 TRY(event_object.add("type"sv, "kfree"));
278 TRY(event_object.add("ptr"sv, static_cast<u64>(event.data.kfree.ptr)));
279 TRY(event_object.add("size"sv, static_cast<u64>(event.data.kfree.size)));
280 break;
281 case PERF_EVENT_PAGE_FAULT:
282 TRY(event_object.add("type"sv, "page_fault"));
283 break;
284 case PERF_EVENT_SYSCALL:
285 TRY(event_object.add("type"sv, "syscall"));
286 break;
287 case PERF_EVENT_SIGNPOST:
288 TRY(event_object.add("type"sv, "signpost"sv));
289 TRY(event_object.add("arg1"sv, event.data.signpost.arg1));
290 TRY(event_object.add("arg2"sv, event.data.signpost.arg2));
291 break;
292 case PERF_EVENT_READ:
293 TRY(event_object.add("type"sv, "read"));
294 TRY(event_object.add("fd"sv, event.data.read.fd));
295 TRY(event_object.add("size"sv, event.data.read.size));
296 TRY(event_object.add("filename_index"sv, event.data.read.filename_index));
297 TRY(event_object.add("start_timestamp"sv, event.data.read.start_timestamp));
298 TRY(event_object.add("success"sv, event.data.read.success));
299 break;
300 }
301 TRY(event_object.add("pid"sv, event.pid));
302 TRY(event_object.add("tid"sv, event.tid));
303 TRY(event_object.add("timestamp"sv, event.timestamp));
304 TRY(event_object.add("lost_samples"sv, seen_first_sample ? event.lost_samples : 0));
305 if (event.type == PERF_EVENT_SAMPLE)
306 seen_first_sample = true;
307 auto stack_array = TRY(event_object.add_array("stack"sv));
308 for (size_t j = 0; j < event.stack_size; ++j) {
309 auto address = event.stack[j];
310 if (!show_kernel_addresses && !Memory::is_user_address(VirtualAddress { address }))
311 address = 0xdeadc0de;
312 TRY(stack_array.add(address));
313 }
314 TRY(stack_array.finish());
315 TRY(event_object.finish());
316 }
317 TRY(array.finish());
318 TRY(object.finish());
319 return {};
320}
321
322ErrorOr<void> PerformanceEventBuffer::to_json(KBufferBuilder& builder) const
323{
324 auto object = TRY(JsonObjectSerializer<>::try_create(builder));
325 return to_json_impl(object);
326}
327
328OwnPtr<PerformanceEventBuffer> PerformanceEventBuffer::try_create_with_size(size_t buffer_size)
329{
330 auto buffer_or_error = KBuffer::try_create_with_size("Performance events"sv, buffer_size, Memory::Region::Access::ReadWrite, AllocationStrategy::AllocateNow);
331 if (buffer_or_error.is_error())
332 return {};
333 return adopt_own_if_nonnull(new (nothrow) PerformanceEventBuffer(buffer_or_error.release_value()));
334}
335
336ErrorOr<void> PerformanceEventBuffer::add_process(Process const& process, ProcessEventType event_type)
337{
338 OwnPtr<KString> executable;
339 if (process.executable()) {
340 executable = TRY(process.executable()->try_serialize_absolute_path());
341 } else {
342 executable = TRY(process.name().with([&](auto& process_name) {
343 return KString::formatted("<{}>", process_name->view());
344 }));
345 }
346
347 TRY(append_with_ip_and_bp(process.pid(), 0, 0, 0,
348 event_type == ProcessEventType::Create ? PERF_EVENT_PROCESS_CREATE : PERF_EVENT_PROCESS_EXEC,
349 0, process.pid().value(), 0, executable->view()));
350
351 ErrorOr<void> result;
352 process.for_each_thread([&](auto& thread) {
353 result = append_with_ip_and_bp(process.pid(), thread.tid().value(),
354 0, 0, PERF_EVENT_THREAD_CREATE, 0, 0, 0, {});
355 return result.is_error() ? IterationDecision::Break : IterationDecision::Continue;
356 });
357 TRY(result);
358
359 return process.address_space().with([&](auto& space) -> ErrorOr<void> {
360 for (auto const& region : space->region_tree().regions()) {
361 TRY(append_with_ip_and_bp(process.pid(), 0,
362 0, 0, PERF_EVENT_MMAP, 0, region.range().base().get(), region.range().size(), region.name()));
363 }
364 return {};
365 });
366}
367
368ErrorOr<FlatPtr> PerformanceEventBuffer::register_string(NonnullOwnPtr<KString> string)
369{
370 auto it = m_strings.find(string);
371 if (it != m_strings.end()) {
372 return it->value;
373 }
374
375 auto new_index = m_strings.size();
376 TRY(m_strings.try_set(move(string), move(new_index)));
377 return new_index;
378}
379
380}