Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2020 Facebook */
3#include <vmlinux.h>
4#include <bpf/bpf_core_read.h>
5#include <bpf/bpf_helpers.h>
6#include <bpf/bpf_tracing.h>
7
8#include "profiler.h"
9
10#ifndef NULL
11#define NULL 0
12#endif
13
14#define O_WRONLY 00000001
15#define O_RDWR 00000002
16#define O_DIRECTORY 00200000
17#define __O_TMPFILE 020000000
18#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
19#define MAX_ERRNO 4095
20#define S_IFMT 00170000
21#define S_IFSOCK 0140000
22#define S_IFLNK 0120000
23#define S_IFREG 0100000
24#define S_IFBLK 0060000
25#define S_IFDIR 0040000
26#define S_IFCHR 0020000
27#define S_IFIFO 0010000
28#define S_ISUID 0004000
29#define S_ISGID 0002000
30#define S_ISVTX 0001000
31#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
32#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
33#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
34#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
35#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
36#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
37#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
38
39#define KILL_DATA_ARRAY_SIZE 8
40
41struct var_kill_data_arr_t {
42 struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
43};
44
45union any_profiler_data_t {
46 struct var_exec_data_t var_exec;
47 struct var_kill_data_t var_kill;
48 struct var_sysctl_data_t var_sysctl;
49 struct var_filemod_data_t var_filemod;
50 struct var_fork_data_t var_fork;
51 struct var_kill_data_arr_t var_kill_data_arr;
52};
53
54volatile struct profiler_config_struct bpf_config = {};
55
56#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
57#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
58#define CGROUP_LOGIN_SESSION_INODE \
59 (bpf_config.cgroup_login_session_inode)
60#define KILL_SIGNALS (bpf_config.kill_signals_mask)
61#define STALE_INFO (bpf_config.stale_info_secs)
62#define INODE_FILTER (bpf_config.inode_filter)
63#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
64#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
65
66struct kernfs_iattrs___52 {
67 struct iattr ia_iattr;
68};
69
70struct kernfs_node___52 {
71 union /* kernfs_node_id */ {
72 struct {
73 u32 ino;
74 u32 generation;
75 };
76 u64 id;
77 } id;
78};
79
80struct {
81 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
82 __uint(max_entries, 1);
83 __type(key, u32);
84 __type(value, union any_profiler_data_t);
85} data_heap SEC(".maps");
86
87struct {
88 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
89 __uint(key_size, sizeof(int));
90 __uint(value_size, sizeof(int));
91} events SEC(".maps");
92
93struct {
94 __uint(type, BPF_MAP_TYPE_HASH);
95 __uint(max_entries, KILL_DATA_ARRAY_SIZE);
96 __type(key, u32);
97 __type(value, struct var_kill_data_arr_t);
98} var_tpid_to_data SEC(".maps");
99
100struct {
101 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
102 __uint(max_entries, profiler_bpf_max_function_id);
103 __type(key, u32);
104 __type(value, struct bpf_func_stats_data);
105} bpf_func_stats SEC(".maps");
106
107struct {
108 __uint(type, BPF_MAP_TYPE_HASH);
109 __type(key, u32);
110 __type(value, bool);
111 __uint(max_entries, 16);
112} allowed_devices SEC(".maps");
113
114struct {
115 __uint(type, BPF_MAP_TYPE_HASH);
116 __type(key, u64);
117 __type(value, bool);
118 __uint(max_entries, 1024);
119} allowed_file_inodes SEC(".maps");
120
121struct {
122 __uint(type, BPF_MAP_TYPE_HASH);
123 __type(key, u64);
124 __type(value, bool);
125 __uint(max_entries, 1024);
126} allowed_directory_inodes SEC(".maps");
127
128struct {
129 __uint(type, BPF_MAP_TYPE_HASH);
130 __type(key, u32);
131 __type(value, bool);
132 __uint(max_entries, 16);
133} disallowed_exec_inodes SEC(".maps");
134
135#ifndef ARRAY_SIZE
136#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
137#endif
138
139static INLINE bool IS_ERR(const void* ptr)
140{
141 return IS_ERR_VALUE((unsigned long)ptr);
142}
143
144static INLINE u32 get_userspace_pid()
145{
146 return bpf_get_current_pid_tgid() >> 32;
147}
148
149static INLINE bool is_init_process(u32 tgid)
150{
151 return tgid == 1 || tgid == 0;
152}
153
154static INLINE unsigned long
155probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
156{
157 len = len < max ? len : max;
158 if (len > 1) {
159 if (bpf_probe_read_kernel(dst, len, src))
160 return 0;
161 } else if (len == 1) {
162 if (bpf_probe_read_kernel(dst, 1, src))
163 return 0;
164 }
165 return len;
166}
167
168static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
169 int spid)
170{
171#ifdef UNROLL
172#pragma unroll
173#endif
174 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
175 if (arr_struct->array[i].meta.pid == spid)
176 return i;
177 return -1;
178}
179
180static INLINE void populate_ancestors(struct task_struct* task,
181 struct ancestors_data_t* ancestors_data)
182{
183 struct task_struct* parent = task;
184 u32 num_ancestors, ppid;
185
186 ancestors_data->num_ancestors = 0;
187#ifdef UNROLL
188#pragma unroll
189#endif
190 for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
191 parent = BPF_CORE_READ(parent, real_parent);
192 if (parent == NULL)
193 break;
194 ppid = BPF_CORE_READ(parent, tgid);
195 if (is_init_process(ppid))
196 break;
197 ancestors_data->ancestor_pids[num_ancestors] = ppid;
198 ancestors_data->ancestor_exec_ids[num_ancestors] =
199 BPF_CORE_READ(parent, self_exec_id);
200 ancestors_data->ancestor_start_times[num_ancestors] =
201 BPF_CORE_READ(parent, start_time);
202 ancestors_data->num_ancestors = num_ancestors;
203 }
204}
205
206static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
207 struct kernfs_node* cgroup_root_node,
208 void* payload,
209 int* root_pos)
210{
211 void* payload_start = payload;
212 size_t filepart_length;
213
214#ifdef UNROLL
215#pragma unroll
216#endif
217 for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
218 filepart_length =
219 bpf_probe_read_kernel_str(payload, MAX_PATH,
220 BPF_CORE_READ(cgroup_node, name));
221 if (!cgroup_node)
222 return payload;
223 if (cgroup_node == cgroup_root_node)
224 *root_pos = payload - payload_start;
225 if (filepart_length <= MAX_PATH) {
226 barrier_var(filepart_length);
227 payload += filepart_length;
228 }
229 cgroup_node = BPF_CORE_READ(cgroup_node, parent);
230 }
231 return payload;
232}
233
234static ino_t get_inode_from_kernfs(struct kernfs_node* node)
235{
236 struct kernfs_node___52* node52 = (void*)node;
237
238 if (bpf_core_field_exists(node52->id.ino)) {
239 barrier_var(node52);
240 return BPF_CORE_READ(node52, id.ino);
241 } else {
242 barrier_var(node);
243 return (u64)BPF_CORE_READ(node, id);
244 }
245}
246
247extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
248enum cgroup_subsys_id___local {
249 pids_cgrp_id___local = 123, /* value doesn't matter */
250};
251
252static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
253 struct task_struct* task,
254 void* payload)
255{
256 struct kernfs_node* root_kernfs =
257 BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
258 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
259
260#if __has_builtin(__builtin_preserve_enum_value)
261 if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
262 int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
263 pids_cgrp_id___local);
264#ifdef UNROLL
265#pragma unroll
266#endif
267 for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
268 struct cgroup_subsys_state* subsys =
269 BPF_CORE_READ(task, cgroups, subsys[i]);
270 if (subsys != NULL) {
271 int subsys_id = BPF_CORE_READ(subsys, ss, id);
272 if (subsys_id == cgrp_id) {
273 proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
274 root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
275 break;
276 }
277 }
278 }
279 }
280#endif
281
282 cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
283 cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
284
285 if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
286 cgroup_data->cgroup_root_mtime =
287 BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
288 cgroup_data->cgroup_proc_mtime =
289 BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
290 } else {
291 struct kernfs_iattrs___52* root_iattr =
292 (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
293 cgroup_data->cgroup_root_mtime =
294 BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
295
296 struct kernfs_iattrs___52* proc_iattr =
297 (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
298 cgroup_data->cgroup_proc_mtime =
299 BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
300 }
301
302 cgroup_data->cgroup_root_length = 0;
303 cgroup_data->cgroup_proc_length = 0;
304 cgroup_data->cgroup_full_length = 0;
305
306 size_t cgroup_root_length =
307 bpf_probe_read_kernel_str(payload, MAX_PATH,
308 BPF_CORE_READ(root_kernfs, name));
309 barrier_var(cgroup_root_length);
310 if (cgroup_root_length <= MAX_PATH) {
311 barrier_var(cgroup_root_length);
312 cgroup_data->cgroup_root_length = cgroup_root_length;
313 payload += cgroup_root_length;
314 }
315
316 size_t cgroup_proc_length =
317 bpf_probe_read_kernel_str(payload, MAX_PATH,
318 BPF_CORE_READ(proc_kernfs, name));
319 barrier_var(cgroup_proc_length);
320 if (cgroup_proc_length <= MAX_PATH) {
321 barrier_var(cgroup_proc_length);
322 cgroup_data->cgroup_proc_length = cgroup_proc_length;
323 payload += cgroup_proc_length;
324 }
325
326 if (FETCH_CGROUPS_FROM_BPF) {
327 cgroup_data->cgroup_full_path_root_pos = -1;
328 void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
329 &cgroup_data->cgroup_full_path_root_pos);
330 cgroup_data->cgroup_full_length = payload_end_pos - payload;
331 payload = payload_end_pos;
332 }
333
334 return (void*)payload;
335}
336
337static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
338 struct task_struct* task,
339 u32 pid, void* payload)
340{
341 u64 uid_gid = bpf_get_current_uid_gid();
342
343 metadata->uid = (u32)uid_gid;
344 metadata->gid = uid_gid >> 32;
345 metadata->pid = pid;
346 metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
347 metadata->start_time = BPF_CORE_READ(task, start_time);
348 metadata->comm_length = 0;
349
350 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
351 barrier_var(comm_length);
352 if (comm_length <= TASK_COMM_LEN) {
353 barrier_var(comm_length);
354 metadata->comm_length = comm_length;
355 payload += comm_length;
356 }
357
358 return (void*)payload;
359}
360
361static INLINE struct var_kill_data_t*
362get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
363{
364 int zero = 0;
365 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
366
367 if (kill_data == NULL)
368 return NULL;
369 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
370
371 void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
372 payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
373 size_t payload_length = payload - (void*)kill_data->payload;
374 kill_data->payload_length = payload_length;
375 populate_ancestors(task, &kill_data->ancestors_info);
376 kill_data->meta.type = KILL_EVENT;
377 kill_data->kill_target_pid = tpid;
378 kill_data->kill_sig = sig;
379 kill_data->kill_count = 1;
380 kill_data->last_kill_time = bpf_ktime_get_ns();
381 return kill_data;
382}
383
384static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
385{
386 if ((KILL_SIGNALS & (1ULL << sig)) == 0)
387 return 0;
388
389 u32 spid = get_userspace_pid();
390 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
391
392 if (arr_struct == NULL) {
393 struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
394 int zero = 0;
395
396 if (kill_data == NULL)
397 return 0;
398 arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
399 if (arr_struct == NULL)
400 return 0;
401 bpf_probe_read_kernel(&arr_struct->array[0],
402 sizeof(arr_struct->array[0]), kill_data);
403 } else {
404 int index = get_var_spid_index(arr_struct, spid);
405
406 if (index == -1) {
407 struct var_kill_data_t* kill_data =
408 get_var_kill_data(ctx, spid, tpid, sig);
409 if (kill_data == NULL)
410 return 0;
411#ifdef UNROLL
412#pragma unroll
413#endif
414 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
415 if (arr_struct->array[i].meta.pid == 0) {
416 bpf_probe_read_kernel(&arr_struct->array[i],
417 sizeof(arr_struct->array[i]),
418 kill_data);
419 bpf_map_update_elem(&var_tpid_to_data, &tpid,
420 arr_struct, 0);
421
422 return 0;
423 }
424 return 0;
425 }
426
427 struct var_kill_data_t* kill_data = &arr_struct->array[index];
428
429 u64 delta_sec =
430 (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
431
432 if (delta_sec < STALE_INFO) {
433 kill_data->kill_count++;
434 kill_data->last_kill_time = bpf_ktime_get_ns();
435 bpf_probe_read_kernel(&arr_struct->array[index],
436 sizeof(arr_struct->array[index]),
437 kill_data);
438 } else {
439 struct var_kill_data_t* kill_data =
440 get_var_kill_data(ctx, spid, tpid, sig);
441 if (kill_data == NULL)
442 return 0;
443 bpf_probe_read_kernel(&arr_struct->array[index],
444 sizeof(arr_struct->array[index]),
445 kill_data);
446 }
447 }
448 bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
449 return 0;
450}
451
452static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
453 enum bpf_function_id func_id)
454{
455 int func_id_key = func_id;
456
457 bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
458 bpf_stat_ctx->bpf_func_stats_data_val =
459 bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
460 if (bpf_stat_ctx->bpf_func_stats_data_val)
461 bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
462}
463
464static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
465{
466 if (bpf_stat_ctx->bpf_func_stats_data_val)
467 bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
468 bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
469}
470
471static INLINE void
472bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
473 struct var_metadata_t* meta)
474{
475 if (bpf_stat_ctx->bpf_func_stats_data_val) {
476 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
477 meta->bpf_stats_num_perf_events =
478 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
479 }
480 meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
481 meta->cpu_id = bpf_get_smp_processor_id();
482}
483
484static INLINE size_t
485read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
486{
487 size_t length = 0;
488 size_t filepart_length;
489 struct dentry* parent_dentry;
490
491#ifdef UNROLL
492#pragma unroll
493#endif
494 for (int i = 0; i < MAX_PATH_DEPTH; i++) {
495 filepart_length =
496 bpf_probe_read_kernel_str(payload, MAX_PATH,
497 BPF_CORE_READ(filp_dentry, d_name.name));
498 barrier_var(filepart_length);
499 if (filepart_length > MAX_PATH)
500 break;
501 barrier_var(filepart_length);
502 payload += filepart_length;
503 length += filepart_length;
504
505 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
506 if (filp_dentry == parent_dentry)
507 break;
508 filp_dentry = parent_dentry;
509 }
510
511 return length;
512}
513
514static INLINE bool
515is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
516{
517 struct dentry* parent_dentry;
518#ifdef UNROLL
519#pragma unroll
520#endif
521 for (int i = 0; i < MAX_PATH_DEPTH; i++) {
522 u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
523 bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
524
525 if (allowed_dir != NULL)
526 return true;
527 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
528 if (filp_dentry == parent_dentry)
529 break;
530 filp_dentry = parent_dentry;
531 }
532 return false;
533}
534
535static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
536 u32* device_id,
537 u64* file_ino)
538{
539 u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
540 *device_id = dev_id;
541 bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
542
543 if (allowed_device == NULL)
544 return false;
545
546 u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
547 *file_ino = ino;
548 bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
549
550 if (allowed_file == NULL)
551 if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
552 return false;
553 return true;
554}
555
556SEC("kprobe/proc_sys_write")
557ssize_t BPF_KPROBE(kprobe__proc_sys_write,
558 struct file* filp, const char* buf,
559 size_t count, loff_t* ppos)
560{
561 struct bpf_func_stats_ctx stats_ctx;
562 bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
563
564 u32 pid = get_userspace_pid();
565 int zero = 0;
566 struct var_sysctl_data_t* sysctl_data =
567 bpf_map_lookup_elem(&data_heap, &zero);
568 if (!sysctl_data)
569 goto out;
570
571 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
572 sysctl_data->meta.type = SYSCTL_EVENT;
573 void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
574 payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
575
576 populate_ancestors(task, &sysctl_data->ancestors_info);
577
578 sysctl_data->sysctl_val_length = 0;
579 sysctl_data->sysctl_path_length = 0;
580
581 size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
582 CTL_MAXNAME, buf);
583 barrier_var(sysctl_val_length);
584 if (sysctl_val_length <= CTL_MAXNAME) {
585 barrier_var(sysctl_val_length);
586 sysctl_data->sysctl_val_length = sysctl_val_length;
587 payload += sysctl_val_length;
588 }
589
590 size_t sysctl_path_length =
591 bpf_probe_read_kernel_str(payload, MAX_PATH,
592 BPF_CORE_READ(filp, f_path.dentry,
593 d_name.name));
594 barrier_var(sysctl_path_length);
595 if (sysctl_path_length <= MAX_PATH) {
596 barrier_var(sysctl_path_length);
597 sysctl_data->sysctl_path_length = sysctl_path_length;
598 payload += sysctl_path_length;
599 }
600
601 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
602 unsigned long data_len = payload - (void*)sysctl_data;
603 data_len = data_len > sizeof(struct var_sysctl_data_t)
604 ? sizeof(struct var_sysctl_data_t)
605 : data_len;
606 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
607out:
608 bpf_stats_exit(&stats_ctx);
609 return 0;
610}
611
612SEC("tracepoint/syscalls/sys_enter_kill")
613int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
614{
615 struct bpf_func_stats_ctx stats_ctx;
616
617 bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
618 int pid = ctx->args[0];
619 int sig = ctx->args[1];
620 int ret = trace_var_sys_kill(ctx, pid, sig);
621 bpf_stats_exit(&stats_ctx);
622 return ret;
623};
624
625SEC("raw_tracepoint/sched_process_exit")
626int raw_tracepoint__sched_process_exit(void* ctx)
627{
628 int zero = 0;
629 struct bpf_func_stats_ctx stats_ctx;
630 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
631
632 u32 tpid = get_userspace_pid();
633
634 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
635 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
636
637 if (arr_struct == NULL || kill_data == NULL)
638 goto out;
639
640 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
641 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
642
643#ifdef UNROLL
644#pragma unroll
645#endif
646 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
647 struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
648
649 if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
650 bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
651 past_kill_data);
652 void* payload = kill_data->payload;
653 size_t offset = kill_data->payload_length;
654 if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
655 return 0;
656 payload += offset;
657
658 kill_data->kill_target_name_length = 0;
659 kill_data->kill_target_cgroup_proc_length = 0;
660
661 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
662 barrier_var(comm_length);
663 if (comm_length <= TASK_COMM_LEN) {
664 barrier_var(comm_length);
665 kill_data->kill_target_name_length = comm_length;
666 payload += comm_length;
667 }
668
669 size_t cgroup_proc_length =
670 bpf_probe_read_kernel_str(payload,
671 KILL_TARGET_LEN,
672 BPF_CORE_READ(proc_kernfs, name));
673 barrier_var(cgroup_proc_length);
674 if (cgroup_proc_length <= KILL_TARGET_LEN) {
675 barrier_var(cgroup_proc_length);
676 kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
677 payload += cgroup_proc_length;
678 }
679
680 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
681 unsigned long data_len = (void*)payload - (void*)kill_data;
682 data_len = data_len > sizeof(struct var_kill_data_t)
683 ? sizeof(struct var_kill_data_t)
684 : data_len;
685 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
686 }
687 }
688 bpf_map_delete_elem(&var_tpid_to_data, &tpid);
689out:
690 bpf_stats_exit(&stats_ctx);
691 return 0;
692}
693
694SEC("raw_tracepoint/sched_process_exec")
695int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
696{
697 struct bpf_func_stats_ctx stats_ctx;
698 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
699
700 struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
701 u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
702
703 bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
704 if (should_filter_binprm != NULL)
705 goto out;
706
707 int zero = 0;
708 struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
709 if (!proc_exec_data)
710 goto out;
711
712 if (INODE_FILTER && inode != INODE_FILTER)
713 return 0;
714
715 u32 pid = get_userspace_pid();
716 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
717
718 proc_exec_data->meta.type = EXEC_EVENT;
719 proc_exec_data->bin_path_length = 0;
720 proc_exec_data->cmdline_length = 0;
721 proc_exec_data->environment_length = 0;
722 void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
723 proc_exec_data->payload);
724 payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
725
726 struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
727 proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
728 proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
729 proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
730 proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
731
732 const char* filename = BPF_CORE_READ(bprm, filename);
733 size_t bin_path_length =
734 bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
735 barrier_var(bin_path_length);
736 if (bin_path_length <= MAX_FILENAME_LEN) {
737 barrier_var(bin_path_length);
738 proc_exec_data->bin_path_length = bin_path_length;
739 payload += bin_path_length;
740 }
741
742 void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
743 void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
744 unsigned int cmdline_length = probe_read_lim(payload, arg_start,
745 arg_end - arg_start, MAX_ARGS_LEN);
746
747 if (cmdline_length <= MAX_ARGS_LEN) {
748 barrier_var(cmdline_length);
749 proc_exec_data->cmdline_length = cmdline_length;
750 payload += cmdline_length;
751 }
752
753 if (READ_ENVIRON_FROM_EXEC) {
754 void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
755 void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
756 unsigned long env_len = probe_read_lim(payload, env_start,
757 env_end - env_start, MAX_ENVIRON_LEN);
758 if (cmdline_length <= MAX_ENVIRON_LEN) {
759 proc_exec_data->environment_length = env_len;
760 payload += env_len;
761 }
762 }
763
764 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
765 unsigned long data_len = payload - (void*)proc_exec_data;
766 data_len = data_len > sizeof(struct var_exec_data_t)
767 ? sizeof(struct var_exec_data_t)
768 : data_len;
769 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
770out:
771 bpf_stats_exit(&stats_ctx);
772 return 0;
773}
774
775SEC("kretprobe/do_filp_open")
776int kprobe_ret__do_filp_open(struct pt_regs* ctx)
777{
778 struct bpf_func_stats_ctx stats_ctx;
779 bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
780
781 struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
782
783 if (filp == NULL || IS_ERR(filp))
784 goto out;
785 unsigned int flags = BPF_CORE_READ(filp, f_flags);
786 if ((flags & (O_RDWR | O_WRONLY)) == 0)
787 goto out;
788 if ((flags & O_TMPFILE) > 0)
789 goto out;
790 struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
791 umode_t mode = BPF_CORE_READ(file_inode, i_mode);
792 if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
793 S_ISSOCK(mode))
794 goto out;
795
796 struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
797 u32 device_id = 0;
798 u64 file_ino = 0;
799 if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
800 goto out;
801
802 int zero = 0;
803 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
804 if (!filemod_data)
805 goto out;
806
807 u32 pid = get_userspace_pid();
808 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
809
810 filemod_data->meta.type = FILEMOD_EVENT;
811 filemod_data->fmod_type = FMOD_OPEN;
812 filemod_data->dst_flags = flags;
813 filemod_data->src_inode = 0;
814 filemod_data->dst_inode = file_ino;
815 filemod_data->src_device_id = 0;
816 filemod_data->dst_device_id = device_id;
817 filemod_data->src_filepath_length = 0;
818 filemod_data->dst_filepath_length = 0;
819
820 void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
821 filemod_data->payload);
822 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
823
824 size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
825 barrier_var(len);
826 if (len <= MAX_FILEPATH_LENGTH) {
827 barrier_var(len);
828 payload += len;
829 filemod_data->dst_filepath_length = len;
830 }
831 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
832 unsigned long data_len = payload - (void*)filemod_data;
833 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
834 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
835out:
836 bpf_stats_exit(&stats_ctx);
837 return 0;
838}
839
840SEC("kprobe/vfs_link")
841int BPF_KPROBE(kprobe__vfs_link,
842 struct dentry* old_dentry, struct mnt_idmap *idmap,
843 struct inode* dir, struct dentry* new_dentry,
844 struct inode** delegated_inode)
845{
846 struct bpf_func_stats_ctx stats_ctx;
847 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
848
849 u32 src_device_id = 0;
850 u64 src_file_ino = 0;
851 u32 dst_device_id = 0;
852 u64 dst_file_ino = 0;
853 if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
854 !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
855 goto out;
856
857 int zero = 0;
858 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
859 if (!filemod_data)
860 goto out;
861
862 u32 pid = get_userspace_pid();
863 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
864
865 filemod_data->meta.type = FILEMOD_EVENT;
866 filemod_data->fmod_type = FMOD_LINK;
867 filemod_data->dst_flags = 0;
868 filemod_data->src_inode = src_file_ino;
869 filemod_data->dst_inode = dst_file_ino;
870 filemod_data->src_device_id = src_device_id;
871 filemod_data->dst_device_id = dst_device_id;
872 filemod_data->src_filepath_length = 0;
873 filemod_data->dst_filepath_length = 0;
874
875 void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
876 filemod_data->payload);
877 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
878
879 size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
880 barrier_var(len);
881 if (len <= MAX_FILEPATH_LENGTH) {
882 barrier_var(len);
883 payload += len;
884 filemod_data->src_filepath_length = len;
885 }
886
887 len = read_absolute_file_path_from_dentry(new_dentry, payload);
888 barrier_var(len);
889 if (len <= MAX_FILEPATH_LENGTH) {
890 barrier_var(len);
891 payload += len;
892 filemod_data->dst_filepath_length = len;
893 }
894
895 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
896 unsigned long data_len = payload - (void*)filemod_data;
897 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
898 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
899out:
900 bpf_stats_exit(&stats_ctx);
901 return 0;
902}
903
904SEC("kprobe/vfs_symlink")
905int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
906 const char* oldname)
907{
908 struct bpf_func_stats_ctx stats_ctx;
909 bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
910
911 u32 dst_device_id = 0;
912 u64 dst_file_ino = 0;
913 if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
914 goto out;
915
916 int zero = 0;
917 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
918 if (!filemod_data)
919 goto out;
920
921 u32 pid = get_userspace_pid();
922 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
923
924 filemod_data->meta.type = FILEMOD_EVENT;
925 filemod_data->fmod_type = FMOD_SYMLINK;
926 filemod_data->dst_flags = 0;
927 filemod_data->src_inode = 0;
928 filemod_data->dst_inode = dst_file_ino;
929 filemod_data->src_device_id = 0;
930 filemod_data->dst_device_id = dst_device_id;
931 filemod_data->src_filepath_length = 0;
932 filemod_data->dst_filepath_length = 0;
933
934 void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
935 filemod_data->payload);
936 payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
937
938 size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
939 oldname);
940 barrier_var(len);
941 if (len <= MAX_FILEPATH_LENGTH) {
942 barrier_var(len);
943 payload += len;
944 filemod_data->src_filepath_length = len;
945 }
946 len = read_absolute_file_path_from_dentry(dentry, payload);
947 barrier_var(len);
948 if (len <= MAX_FILEPATH_LENGTH) {
949 barrier_var(len);
950 payload += len;
951 filemod_data->dst_filepath_length = len;
952 }
953 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
954 unsigned long data_len = payload - (void*)filemod_data;
955 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
956 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
957out:
958 bpf_stats_exit(&stats_ctx);
959 return 0;
960}
961
962SEC("raw_tracepoint/sched_process_fork")
963int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
964{
965 struct bpf_func_stats_ctx stats_ctx;
966 bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
967
968 int zero = 0;
969 struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
970 if (!fork_data)
971 goto out;
972
973 struct task_struct* parent = (struct task_struct*)ctx->args[0];
974 struct task_struct* child = (struct task_struct*)ctx->args[1];
975 fork_data->meta.type = FORK_EVENT;
976
977 void* payload = populate_var_metadata(&fork_data->meta, child,
978 BPF_CORE_READ(child, pid), fork_data->payload);
979 fork_data->parent_pid = BPF_CORE_READ(parent, pid);
980 fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
981 fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
982 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
983
984 unsigned long data_len = payload - (void*)fork_data;
985 data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
986 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
987out:
988 bpf_stats_exit(&stats_ctx);
989 return 0;
990}
991char _license[] SEC("license") = "GPL";