bpf: per-cgroup lsm flavor · tjh.dev/kernel@69fd337

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

bpf: per-cgroup lsm flavor

Allow attaching to lsm hooks in the cgroup context.

Attaching to per-cgroup LSM works exactly like attaching
to other per-cgroup hooks. New BPF_LSM_CGROUP is added
to trigger new mode; the actual lsm hook we attach to is
signaled via existing attach_btf_id.

For the hooks that have 'struct socket' or 'struct sock' as its first
argument, we use the cgroup associated with that socket. For the rest,
we use 'current' cgroup (this is all on default hierarchy == v2 only).
Note that for some hooks that work on 'struct sock' we still
take the cgroup from 'current' because some of them work on the socket
that hasn't been properly initialized yet.

Behind the scenes, we allocate a shim program that is attached
to the trampoline and runs cgroup effective BPF programs array.
This shim has some rudimentary ref counting and can be shared
between several programs attaching to the same lsm hook from
different cgroups.

Note that this patch bloats cgroup size because we add 211
cgroup_bpf_attach_type(s) for simplicity sake. This will be
addressed in the subsequent patch.

Also note that we only add non-sleepable flavor for now. To enable
sleepable use-cases, bpf_prog_run_array_cg has to grab trace rcu,
shim programs have to be freed via trace rcu, cgroup_bpf.effective
should be also trace-rcu-managed + maybe some other changes that
I'm not aware of.

Reviewed-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20220628174314.1216643-4-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Stanislav Fomichev and committed by

Alexei Starovoitov 3 years ago 69fd337a 00442143

+498 -20

15 changed files

expand all collapse all

arch

x86

net

bpf_jit_comp.c

include

linux

bpf-cgroup-defs.h

bpf-cgroup.h

bpf.h

bpf_lsm.h

btf_ids.h

uapi

linux

bpf.h

kernel

bpf

bpf_lsm.c

btf.c

cgroup.c

core.c

syscall.c

trampoline.c

verifier.c

tools

include

uapi

linux

bpf.h

+16 -8

arch/x86/net/bpf_jit_comp.c

reviewed

··· 1770 1770 struct bpf_tramp_link *l, int stack_size, 1771 1771 int run_ctx_off, bool save_ret) 1772 1772 { 1773 1773 + void (*exit)(struct bpf_prog *prog, u64 start, 1774 1774 + struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_exit; 1775 1775 + u64 (*enter)(struct bpf_prog *prog, 1776 1776 + struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_enter; 1773 1777 u8 *prog = *pprog; 1774 1778 u8 *jmp_insn; 1775 1779 int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); ··· 1792 1788 */ 1793 1789 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off); 1794 1790 1791 1791 + if (p->aux->sleepable) { 1792 1792 + enter = __bpf_prog_enter_sleepable; 1793 1793 + exit = __bpf_prog_exit_sleepable; 1794 1794 + } else if (p->expected_attach_type == BPF_LSM_CGROUP) { 1795 1795 + enter = __bpf_prog_enter_lsm_cgroup; 1796 1796 + exit = __bpf_prog_exit_lsm_cgroup; 1797 1797 + } 1798 1798 + 1795 1799 /* arg1: mov rdi, progs[i] */ 1796 1800 emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); 1797 1801 /* arg2: lea rsi, [rbp - ctx_cookie_off] */ 1798 1802 EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); 1799 1803 1800 1800 - if (emit_call(&prog, 1801 1801 - p->aux->sleepable ? __bpf_prog_enter_sleepable : 1802 1802 - __bpf_prog_enter, prog)) 1803 1803 - return -EINVAL; 1804 1804 + if (emit_call(&prog, enter, prog)) 1805 1805 + return -EINVAL; 1804 1806 /* remember prog start time returned by __bpf_prog_enter */ 1805 1807 emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); 1806 1808 ··· 1850 1840 emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); 1851 1841 /* arg3: lea rdx, [rbp - run_ctx_off] */ 1852 1842 EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); 1853 1853 - if (emit_call(&prog, 1854 1854 - p->aux->sleepable ? __bpf_prog_exit_sleepable : 1855 1855 - __bpf_prog_exit, prog)) 1856 1856 - return -EINVAL; 1843 1843 + if (emit_call(&prog, exit, prog)) 1844 1844 + return -EINVAL; 1857 1845 1858 1846 *pprog = prog; 1859 1847 return 0;

include/linux/bpf-cgroup-defs.h

reviewed

··· 10 10 11 11 struct bpf_prog_array; 12 12 13 13 + #ifdef CONFIG_BPF_LSM 14 14 + #define CGROUP_LSM_NUM 211 /* will be addressed in the next patch */ 15 15 + #else 16 16 + #define CGROUP_LSM_NUM 0 17 17 + #endif 18 18 + 13 19 enum cgroup_bpf_attach_type { 14 20 CGROUP_BPF_ATTACH_TYPE_INVALID = -1, 15 21 CGROUP_INET_INGRESS = 0, ··· 41 35 CGROUP_INET4_GETSOCKNAME, 42 36 CGROUP_INET6_GETSOCKNAME, 43 37 CGROUP_INET_SOCK_RELEASE, 38 38 + CGROUP_LSM_START, 39 39 + CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, 44 40 MAX_CGROUP_BPF_ATTACH_TYPE 45 41 }; 46 42

include/linux/bpf-cgroup.h

reviewed

··· 23 23 struct ctl_table_header; 24 24 struct task_struct; 25 25 26 26 + unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, 27 27 + const struct bpf_insn *insn); 28 28 + unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, 29 29 + const struct bpf_insn *insn); 30 30 + unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, 31 31 + const struct bpf_insn *insn); 32 32 + 26 33 #ifdef CONFIG_CGROUP_BPF 27 34 28 35 #define CGROUP_ATYPE(type) \

+24

include/linux/bpf.h

reviewed

··· 794 794 u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); 795 795 void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, 796 796 struct bpf_tramp_run_ctx *run_ctx); 797 797 + u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, 798 798 + struct bpf_tramp_run_ctx *run_ctx); 799 799 + void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, 800 800 + struct bpf_tramp_run_ctx *run_ctx); 797 801 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); 798 802 void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); 799 803 ··· 1064 1060 struct user_struct *user; 1065 1061 u64 load_time; /* ns since boottime */ 1066 1062 u32 verified_insns; 1063 1063 + int cgroup_atype; /* enum cgroup_bpf_attach_type */ 1067 1064 struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; 1068 1065 char name[BPF_OBJ_NAME_LEN]; 1069 1066 #ifdef CONFIG_SECURITY ··· 1172 1167 u64 cookie; 1173 1168 }; 1174 1169 1170 1170 + struct bpf_shim_tramp_link { 1171 1171 + struct bpf_tramp_link link; 1172 1172 + struct bpf_trampoline *trampoline; 1173 1173 + }; 1174 1174 + 1175 1175 struct bpf_tracing_link { 1176 1176 struct bpf_tramp_link link; 1177 1177 enum bpf_attach_type attach_type; ··· 1255 1245 int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, 1256 1246 union bpf_attr __user *uattr); 1257 1247 #endif 1248 1248 + int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, 1249 1249 + int cgroup_atype); 1250 1250 + void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); 1258 1251 #else 1259 1252 static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) 1260 1253 { ··· 1280 1267 void *value) 1281 1268 { 1282 1269 return -EINVAL; 1270 1270 + } 1271 1271 + static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, 1272 1272 + int cgroup_atype) 1273 1273 + { 1274 1274 + return -EOPNOTSUPP; 1275 1275 + } 1276 1276 + static inline void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog) 1277 1277 + { 1283 1278 } 1284 1279 #endif 1285 1280 ··· 2389 2368 extern const struct bpf_func_proto bpf_find_vma_proto; 2390 2369 extern const struct bpf_func_proto bpf_loop_proto; 2391 2370 extern const struct bpf_func_proto bpf_copy_from_user_task_proto; 2371 2371 + extern const struct bpf_func_proto bpf_set_retval_proto; 2372 2372 + extern const struct bpf_func_proto bpf_get_retval_proto; 2392 2373 2393 2374 const struct bpf_func_proto *tracing_prog_func_proto( 2394 2375 enum bpf_func_id func_id, const struct bpf_prog *prog); ··· 2508 2485 2509 2486 struct btf_id_set; 2510 2487 bool btf_id_set_contains(const struct btf_id_set *set, u32 id); 2488 2488 + int btf_id_set_index(const struct btf_id_set *set, u32 id); 2511 2489 2512 2490 #define MAX_BPRINTF_VARARGS 12 2513 2491

+13

include/linux/bpf_lsm.h

reviewed

··· 42 42 extern const struct bpf_func_proto bpf_inode_storage_delete_proto; 43 43 void bpf_inode_storage_free(struct inode *inode); 44 44 45 45 + int bpf_lsm_hook_idx(u32 btf_id); 46 46 + void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func); 47 47 + 45 48 #else /* !CONFIG_BPF_LSM */ 46 49 47 50 static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) ··· 66 63 67 64 static inline void bpf_inode_storage_free(struct inode *inode) 68 65 { 66 66 + } 67 67 + 68 68 + static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, 69 69 + bpf_func_t *bpf_func) 70 70 + { 71 71 + } 72 72 + 73 73 + static inline int bpf_lsm_hook_idx(u32 btf_id) 74 74 + { 75 75 + return -EINVAL; 69 76 } 70 77 71 78 #endif /* CONFIG_BPF_LSM */

+2 -1

include/linux/btf_ids.h

reviewed

··· 179 179 BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ 180 180 BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ 181 181 BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ 182 182 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) 182 182 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) \ 183 183 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket) 183 184 184 185 enum { 185 186 #define BTF_SOCK_TYPE(name, str) name,

include/uapi/linux/bpf.h

reviewed

··· 998 998 BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, 999 999 BPF_PERF_EVENT, 1000 1000 BPF_TRACE_KPROBE_MULTI, 1001 1001 + BPF_LSM_CGROUP, 1001 1002 __MAX_BPF_ATTACH_TYPE 1002 1003 }; 1003 1004

+48

kernel/bpf/bpf_lsm.c

reviewed

··· 16 16 #include <linux/bpf_local_storage.h> 17 17 #include <linux/btf_ids.h> 18 18 #include <linux/ima.h> 19 19 + #include <linux/bpf-cgroup.h> 19 20 20 21 /* For every LSM hook that allows attachment of BPF programs, declare a nop 21 22 * function where a BPF program can be attached. ··· 35 34 #include <linux/lsm_hook_defs.h> 36 35 #undef LSM_HOOK 37 36 BTF_SET_END(bpf_lsm_hooks) 37 37 + 38 38 + /* List of LSM hooks that should operate on 'current' cgroup regardless 39 39 + * of function signature. 40 40 + */ 41 41 + BTF_SET_START(bpf_lsm_current_hooks) 42 42 + /* operate on freshly allocated sk without any cgroup association */ 43 43 + BTF_ID(func, bpf_lsm_sk_alloc_security) 44 44 + BTF_ID(func, bpf_lsm_sk_free_security) 45 45 + BTF_SET_END(bpf_lsm_current_hooks) 46 46 + 47 47 + void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, 48 48 + bpf_func_t *bpf_func) 49 49 + { 50 50 + const struct btf_param *args; 51 51 + 52 52 + if (btf_type_vlen(prog->aux->attach_func_proto) < 1 || 53 53 + btf_id_set_contains(&bpf_lsm_current_hooks, 54 54 + prog->aux->attach_btf_id)) { 55 55 + *bpf_func = __cgroup_bpf_run_lsm_current; 56 56 + return; 57 57 + } 58 58 + 59 59 + args = btf_params(prog->aux->attach_func_proto); 60 60 + 61 61 + #ifdef CONFIG_NET 62 62 + if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCKET]) 63 63 + *bpf_func = __cgroup_bpf_run_lsm_socket; 64 64 + else if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCK]) 65 65 + *bpf_func = __cgroup_bpf_run_lsm_sock; 66 66 + else 67 67 + #endif 68 68 + *bpf_func = __cgroup_bpf_run_lsm_current; 69 69 + } 70 70 + 71 71 + int bpf_lsm_hook_idx(u32 btf_id) 72 72 + { 73 73 + return btf_id_set_index(&bpf_lsm_hooks, btf_id); 74 74 + } 38 75 39 76 int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, 40 77 const struct bpf_prog *prog) ··· 197 158 return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL; 198 159 case BPF_FUNC_get_attach_cookie: 199 160 return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL; 161 161 + case BPF_FUNC_get_local_storage: 162 162 + return prog->expected_attach_type == BPF_LSM_CGROUP ? 163 163 + &bpf_get_local_storage_proto : NULL; 164 164 + case BPF_FUNC_set_retval: 165 165 + return prog->expected_attach_type == BPF_LSM_CGROUP ? 166 166 + &bpf_set_retval_proto : NULL; 167 167 + case BPF_FUNC_get_retval: 168 168 + return prog->expected_attach_type == BPF_LSM_CGROUP ? 169 169 + &bpf_get_retval_proto : NULL; 200 170 default: 201 171 return tracing_prog_func_proto(func_id, prog); 202 172 }

+11

kernel/bpf/btf.c

reviewed

··· 5363 5363 5364 5364 if (arg == nr_args) { 5365 5365 switch (prog->expected_attach_type) { 5366 5366 + case BPF_LSM_CGROUP: 5366 5367 case BPF_LSM_MAC: 5367 5368 case BPF_TRACE_FEXIT: 5368 5369 /* When LSM programs are attached to void LSM hooks ··· 6841 6840 const int *pa = a, *pb = b; 6842 6841 6843 6842 return *pa - *pb; 6843 6843 + } 6844 6844 + 6845 6845 + int btf_id_set_index(const struct btf_id_set *set, u32 id) 6846 6846 + { 6847 6847 + const u32 *p; 6848 6848 + 6849 6849 + p = bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func); 6850 6850 + if (!p) 6851 6851 + return -1; 6852 6852 + return p - set->ids; 6844 6853 } 6845 6854 6846 6855 bool btf_id_set_contains(const struct btf_id_set *set, u32 id)

+125 -11

kernel/bpf/cgroup.c

reviewed

··· 14 14 #include <linux/string.h> 15 15 #include <linux/bpf.h> 16 16 #include <linux/bpf-cgroup.h> 17 17 + #include <linux/bpf_lsm.h> 18 18 + #include <linux/bpf_verifier.h> 17 19 #include <net/sock.h> 18 20 #include <net/bpf_sk_storage.h> 19 21 ··· 62 60 migrate_enable(); 63 61 return run_ctx.retval; 64 62 } 63 63 + 64 64 + unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, 65 65 + const struct bpf_insn *insn) 66 66 + { 67 67 + const struct bpf_prog *shim_prog; 68 68 + struct sock *sk; 69 69 + struct cgroup *cgrp; 70 70 + int ret = 0; 71 71 + u64 *args; 72 72 + 73 73 + args = (u64 *)ctx; 74 74 + sk = (void *)(unsigned long)args[0]; 75 75 + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 76 76 + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 77 77 + 78 78 + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 79 79 + if (likely(cgrp)) 80 80 + ret = bpf_prog_run_array_cg(&cgrp->bpf, 81 81 + shim_prog->aux->cgroup_atype, 82 82 + ctx, bpf_prog_run, 0, NULL); 83 83 + return ret; 84 84 + } 85 85 + 86 86 + unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, 87 87 + const struct bpf_insn *insn) 88 88 + { 89 89 + const struct bpf_prog *shim_prog; 90 90 + struct socket *sock; 91 91 + struct cgroup *cgrp; 92 92 + int ret = 0; 93 93 + u64 *args; 94 94 + 95 95 + args = (u64 *)ctx; 96 96 + sock = (void *)(unsigned long)args[0]; 97 97 + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 98 98 + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 99 99 + 100 100 + cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data); 101 101 + if (likely(cgrp)) 102 102 + ret = bpf_prog_run_array_cg(&cgrp->bpf, 103 103 + shim_prog->aux->cgroup_atype, 104 104 + ctx, bpf_prog_run, 0, NULL); 105 105 + return ret; 106 106 + } 107 107 + 108 108 + unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, 109 109 + const struct bpf_insn *insn) 110 110 + { 111 111 + const struct bpf_prog *shim_prog; 112 112 + struct cgroup *cgrp; 113 113 + int ret = 0; 114 114 + 115 115 + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 116 116 + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 117 117 + 118 118 + /* We rely on trampoline's __bpf_prog_enter_lsm_cgroup to grab RCU read lock. */ 119 119 + cgrp = task_dfl_cgroup(current); 120 120 + if (likely(cgrp)) 121 121 + ret = bpf_prog_run_array_cg(&cgrp->bpf, 122 122 + shim_prog->aux->cgroup_atype, 123 123 + ctx, bpf_prog_run, 0, NULL); 124 124 + return ret; 125 125 + } 126 126 + 127 127 + #ifdef CONFIG_BPF_LSM 128 128 + static enum cgroup_bpf_attach_type 129 129 + bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) 130 130 + { 131 131 + if (attach_type != BPF_LSM_CGROUP) 132 132 + return to_cgroup_bpf_attach_type(attach_type); 133 133 + return CGROUP_LSM_START + bpf_lsm_hook_idx(attach_btf_id); 134 134 + } 135 135 + #else 136 136 + static enum cgroup_bpf_attach_type 137 137 + bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) 138 138 + { 139 139 + if (attach_type != BPF_LSM_CGROUP) 140 140 + return to_cgroup_bpf_attach_type(attach_type); 141 141 + return -EOPNOTSUPP; 142 142 + } 143 143 + #endif /* CONFIG_BPF_LSM */ 65 144 66 145 void cgroup_bpf_offline(struct cgroup *cgrp) 67 146 { ··· 246 163 247 164 hlist_for_each_entry_safe(pl, pltmp, progs, node) { 248 165 hlist_del(&pl->node); 249 249 - if (pl->prog) 166 166 + if (pl->prog) { 167 167 + if (pl->prog->expected_attach_type == BPF_LSM_CGROUP) 168 168 + bpf_trampoline_unlink_cgroup_shim(pl->prog); 250 169 bpf_prog_put(pl->prog); 251 251 - if (pl->link) 170 170 + } 171 171 + if (pl->link) { 172 172 + if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP) 173 173 + bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog); 252 174 bpf_cgroup_link_auto_detach(pl->link); 175 175 + } 253 176 kfree(pl); 254 177 static_branch_dec(&cgroup_bpf_enabled_key[atype]); 255 178 } ··· 568 479 struct bpf_prog *old_prog = NULL; 569 480 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; 570 481 struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; 482 482 + struct bpf_prog *new_prog = prog ? : link->link.prog; 571 483 enum cgroup_bpf_attach_type atype; 572 484 struct bpf_prog_list *pl; 573 485 struct hlist_head *progs; ··· 585 495 /* replace_prog implies BPF_F_REPLACE, and vice versa */ 586 496 return -EINVAL; 587 497 588 588 - atype = to_cgroup_bpf_attach_type(type); 498 498 + atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id); 589 499 if (atype < 0) 590 500 return -EINVAL; 591 501 ··· 639 549 bpf_cgroup_storages_assign(pl->storage, storage); 640 550 cgrp->bpf.flags[atype] = saved_flags; 641 551 552 552 + if (type == BPF_LSM_CGROUP) { 553 553 + err = bpf_trampoline_link_cgroup_shim(new_prog, atype); 554 554 + if (err) 555 555 + goto cleanup; 556 556 + } 557 557 + 642 558 err = update_effective_progs(cgrp, atype); 643 559 if (err) 644 644 - goto cleanup; 560 560 + goto cleanup_trampoline; 645 561 646 646 - if (old_prog) 562 562 + if (old_prog) { 563 563 + if (type == BPF_LSM_CGROUP) 564 564 + bpf_trampoline_unlink_cgroup_shim(old_prog); 647 565 bpf_prog_put(old_prog); 648 648 - else 566 566 + } else { 649 567 static_branch_inc(&cgroup_bpf_enabled_key[atype]); 568 568 + } 650 569 bpf_cgroup_storages_link(new_storage, cgrp, type); 651 570 return 0; 571 571 + 572 572 + cleanup_trampoline: 573 573 + if (type == BPF_LSM_CGROUP) 574 574 + bpf_trampoline_unlink_cgroup_shim(new_prog); 652 575 653 576 cleanup: 654 577 if (old_prog) { ··· 754 651 struct hlist_head *progs; 755 652 bool found = false; 756 653 757 757 - atype = to_cgroup_bpf_attach_type(link->type); 654 654 + atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id); 758 655 if (atype < 0) 759 656 return -EINVAL; 760 657 ··· 906 803 struct bpf_prog *old_prog; 907 804 struct bpf_prog_list *pl; 908 805 struct hlist_head *progs; 806 806 + u32 attach_btf_id = 0; 909 807 u32 flags; 910 808 911 911 - atype = to_cgroup_bpf_attach_type(type); 809 809 + if (prog) 810 810 + attach_btf_id = prog->aux->attach_btf_id; 811 811 + if (link) 812 812 + attach_btf_id = link->link.prog->aux->attach_btf_id; 813 813 + 814 814 + atype = bpf_cgroup_atype_find(type, attach_btf_id); 912 815 if (atype < 0) 913 816 return -EINVAL; 914 817 ··· 948 839 if (hlist_empty(progs)) 949 840 /* last program was detached, reset flags to zero */ 950 841 cgrp->bpf.flags[atype] = 0; 951 951 - if (old_prog) 842 842 + if (old_prog) { 843 843 + if (type == BPF_LSM_CGROUP) 844 844 + bpf_trampoline_unlink_cgroup_shim(old_prog); 952 845 bpf_prog_put(old_prog); 846 846 + } 953 847 static_branch_dec(&cgroup_bpf_enabled_key[atype]); 954 848 return 0; 955 849 } ··· 1111 999 1112 1000 WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link, 1113 1001 cg_link->type)); 1002 1002 + if (cg_link->type == BPF_LSM_CGROUP) 1003 1003 + bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog); 1114 1004 1115 1005 cg = cg_link->cgroup; 1116 1006 cg_link->cgroup = NULL; ··· 1457 1343 return ctx->retval; 1458 1344 } 1459 1345 1460 1460 - static const struct bpf_func_proto bpf_get_retval_proto = { 1346 1346 + const struct bpf_func_proto bpf_get_retval_proto = { 1461 1347 .func = bpf_get_retval, 1462 1348 .gpl_only = false, 1463 1349 .ret_type = RET_INTEGER, ··· 1472 1358 return 0; 1473 1359 } 1474 1360 1475 1475 - static const struct bpf_func_proto bpf_set_retval_proto = { 1361 1361 + const struct bpf_func_proto bpf_set_retval_proto = { 1476 1362 .func = bpf_set_retval, 1477 1363 .gpl_only = false, 1478 1364 .ret_type = RET_INTEGER,

kernel/bpf/core.c

reviewed

··· 2666 2666 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak; 2667 2667 const struct bpf_func_proto bpf_snprintf_btf_proto __weak; 2668 2668 const struct bpf_func_proto bpf_seq_printf_btf_proto __weak; 2669 2669 + const struct bpf_func_proto bpf_set_retval_proto __weak; 2670 2670 + const struct bpf_func_proto bpf_get_retval_proto __weak; 2669 2671 2670 2672 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) 2671 2673 {

+10

kernel/bpf/syscall.c

reviewed

··· 3416 3416 return BPF_PROG_TYPE_SK_LOOKUP; 3417 3417 case BPF_XDP: 3418 3418 return BPF_PROG_TYPE_XDP; 3419 3419 + case BPF_LSM_CGROUP: 3420 3420 + return BPF_PROG_TYPE_LSM; 3419 3421 default: 3420 3422 return BPF_PROG_TYPE_UNSPEC; 3421 3423 } ··· 3471 3469 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3472 3470 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3473 3471 case BPF_PROG_TYPE_SOCK_OPS: 3472 3472 + case BPF_PROG_TYPE_LSM: 3473 3473 + if (ptype == BPF_PROG_TYPE_LSM && 3474 3474 + prog->expected_attach_type != BPF_LSM_CGROUP) 3475 3475 + return -EINVAL; 3476 3476 + 3474 3477 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 3475 3478 break; 3476 3479 default: ··· 3513 3506 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3514 3507 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3515 3508 case BPF_PROG_TYPE_SOCK_OPS: 3509 3509 + case BPF_PROG_TYPE_LSM: 3516 3510 return cgroup_bpf_prog_detach(attr, ptype); 3517 3511 default: 3518 3512 return -EINVAL; ··· 4548 4540 ret = bpf_raw_tp_link_attach(prog, NULL); 4549 4541 else if (prog->expected_attach_type == BPF_TRACE_ITER) 4550 4542 ret = bpf_iter_link_attach(attr, uattr, prog); 4543 4543 + else if (prog->expected_attach_type == BPF_LSM_CGROUP) 4544 4544 + ret = cgroup_bpf_link_attach(attr, prog); 4551 4545 else 4552 4546 ret = bpf_tracing_prog_attach(prog, 4553 4547 attr->link_create.target_fd,

+198

kernel/bpf/trampoline.c

reviewed

··· 11 11 #include <linux/rcupdate_wait.h> 12 12 #include <linux/module.h> 13 13 #include <linux/static_call.h> 14 14 + #include <linux/bpf_verifier.h> 15 15 + #include <linux/bpf_lsm.h> 14 16 15 17 /* dummy _ops. The verifier will operate on target program's ops. */ 16 18 const struct bpf_verifier_ops bpf_extension_verifier_ops = { ··· 498 496 return err; 499 497 } 500 498 499 499 + #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) 500 500 + static void bpf_shim_tramp_link_release(struct bpf_link *link) 501 501 + { 502 502 + struct bpf_shim_tramp_link *shim_link = 503 503 + container_of(link, struct bpf_shim_tramp_link, link.link); 504 504 + 505 505 + /* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */ 506 506 + if (!shim_link->trampoline) 507 507 + return; 508 508 + 509 509 + WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline)); 510 510 + bpf_trampoline_put(shim_link->trampoline); 511 511 + } 512 512 + 513 513 + static void bpf_shim_tramp_link_dealloc(struct bpf_link *link) 514 514 + { 515 515 + struct bpf_shim_tramp_link *shim_link = 516 516 + container_of(link, struct bpf_shim_tramp_link, link.link); 517 517 + 518 518 + kfree(shim_link); 519 519 + } 520 520 + 521 521 + static const struct bpf_link_ops bpf_shim_tramp_link_lops = { 522 522 + .release = bpf_shim_tramp_link_release, 523 523 + .dealloc = bpf_shim_tramp_link_dealloc, 524 524 + }; 525 525 + 526 526 + static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog, 527 527 + bpf_func_t bpf_func, 528 528 + int cgroup_atype) 529 529 + { 530 530 + struct bpf_shim_tramp_link *shim_link = NULL; 531 531 + struct bpf_prog *p; 532 532 + 533 533 + shim_link = kzalloc(sizeof(*shim_link), GFP_USER); 534 534 + if (!shim_link) 535 535 + return NULL; 536 536 + 537 537 + p = bpf_prog_alloc(1, 0); 538 538 + if (!p) { 539 539 + kfree(shim_link); 540 540 + return NULL; 541 541 + } 542 542 + 543 543 + p->jited = false; 544 544 + p->bpf_func = bpf_func; 545 545 + 546 546 + p->aux->cgroup_atype = cgroup_atype; 547 547 + p->aux->attach_func_proto = prog->aux->attach_func_proto; 548 548 + p->aux->attach_btf_id = prog->aux->attach_btf_id; 549 549 + p->aux->attach_btf = prog->aux->attach_btf; 550 550 + btf_get(p->aux->attach_btf); 551 551 + p->type = BPF_PROG_TYPE_LSM; 552 552 + p->expected_attach_type = BPF_LSM_MAC; 553 553 + bpf_prog_inc(p); 554 554 + bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC, 555 555 + &bpf_shim_tramp_link_lops, p); 556 556 + 557 557 + return shim_link; 558 558 + } 559 559 + 560 560 + static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr, 561 561 + bpf_func_t bpf_func) 562 562 + { 563 563 + struct bpf_tramp_link *link; 564 564 + int kind; 565 565 + 566 566 + for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { 567 567 + hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) { 568 568 + struct bpf_prog *p = link->link.prog; 569 569 + 570 570 + if (p->bpf_func == bpf_func) 571 571 + return container_of(link, struct bpf_shim_tramp_link, link); 572 572 + } 573 573 + } 574 574 + 575 575 + return NULL; 576 576 + } 577 577 + 578 578 + int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, 579 579 + int cgroup_atype) 580 580 + { 581 581 + struct bpf_shim_tramp_link *shim_link = NULL; 582 582 + struct bpf_attach_target_info tgt_info = {}; 583 583 + struct bpf_trampoline *tr; 584 584 + bpf_func_t bpf_func; 585 585 + u64 key; 586 586 + int err; 587 587 + 588 588 + err = bpf_check_attach_target(NULL, prog, NULL, 589 589 + prog->aux->attach_btf_id, 590 590 + &tgt_info); 591 591 + if (err) 592 592 + return err; 593 593 + 594 594 + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, 595 595 + prog->aux->attach_btf_id); 596 596 + 597 597 + bpf_lsm_find_cgroup_shim(prog, &bpf_func); 598 598 + tr = bpf_trampoline_get(key, &tgt_info); 599 599 + if (!tr) 600 600 + return -ENOMEM; 601 601 + 602 602 + mutex_lock(&tr->mutex); 603 603 + 604 604 + shim_link = cgroup_shim_find(tr, bpf_func); 605 605 + if (shim_link) { 606 606 + /* Reusing existing shim attached by the other program. */ 607 607 + bpf_link_inc(&shim_link->link.link); 608 608 + 609 609 + mutex_unlock(&tr->mutex); 610 610 + bpf_trampoline_put(tr); /* bpf_trampoline_get above */ 611 611 + return 0; 612 612 + } 613 613 + 614 614 + /* Allocate and install new shim. */ 615 615 + 616 616 + shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype); 617 617 + if (!shim_link) { 618 618 + err = -ENOMEM; 619 619 + goto err; 620 620 + } 621 621 + 622 622 + err = __bpf_trampoline_link_prog(&shim_link->link, tr); 623 623 + if (err) 624 624 + goto err; 625 625 + 626 626 + shim_link->trampoline = tr; 627 627 + /* note, we're still holding tr refcnt from above */ 628 628 + 629 629 + mutex_unlock(&tr->mutex); 630 630 + 631 631 + return 0; 632 632 + err: 633 633 + mutex_unlock(&tr->mutex); 634 634 + 635 635 + if (shim_link) 636 636 + bpf_link_put(&shim_link->link.link); 637 637 + 638 638 + /* have to release tr while _not_ holding its mutex */ 639 639 + bpf_trampoline_put(tr); /* bpf_trampoline_get above */ 640 640 + 641 641 + return err; 642 642 + } 643 643 + 644 644 + void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog) 645 645 + { 646 646 + struct bpf_shim_tramp_link *shim_link = NULL; 647 647 + struct bpf_trampoline *tr; 648 648 + bpf_func_t bpf_func; 649 649 + u64 key; 650 650 + 651 651 + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, 652 652 + prog->aux->attach_btf_id); 653 653 + 654 654 + bpf_lsm_find_cgroup_shim(prog, &bpf_func); 655 655 + tr = bpf_trampoline_lookup(key); 656 656 + if (WARN_ON_ONCE(!tr)) 657 657 + return; 658 658 + 659 659 + mutex_lock(&tr->mutex); 660 660 + shim_link = cgroup_shim_find(tr, bpf_func); 661 661 + mutex_unlock(&tr->mutex); 662 662 + 663 663 + if (shim_link) 664 664 + bpf_link_put(&shim_link->link.link); 665 665 + 666 666 + bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */ 667 667 + } 668 668 + #endif 669 669 + 501 670 struct bpf_trampoline *bpf_trampoline_get(u64 key, 502 671 struct bpf_attach_target_info *tgt_info) 503 672 { ··· 797 624 798 625 update_prog_stats(prog, start); 799 626 __this_cpu_dec(*(prog->active)); 627 627 + migrate_enable(); 628 628 + rcu_read_unlock(); 629 629 + } 630 630 + 631 631 + u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, 632 632 + struct bpf_tramp_run_ctx *run_ctx) 633 633 + __acquires(RCU) 634 634 + { 635 635 + /* Runtime stats are exported via actual BPF_LSM_CGROUP 636 636 + * programs, not the shims. 637 637 + */ 638 638 + rcu_read_lock(); 639 639 + migrate_disable(); 640 640 + 641 641 + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); 642 642 + 643 643 + return NO_START_TIME; 644 644 + } 645 645 + 646 646 + void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, 647 647 + struct bpf_tramp_run_ctx *run_ctx) 648 648 + __releases(RCU) 649 649 + { 650 650 + bpf_reset_run_ctx(run_ctx->saved_run_ctx); 651 651 + 800 652 migrate_enable(); 801 653 rcu_read_unlock(); 802 654 }

+32

kernel/bpf/verifier.c

reviewed

··· 7322 7322 reg_type_str(env, regs[BPF_REG_1].type)); 7323 7323 return -EACCES; 7324 7324 } 7325 7325 + break; 7326 7326 + case BPF_FUNC_set_retval: 7327 7327 + if (env->prog->expected_attach_type == BPF_LSM_CGROUP) { 7328 7328 + if (!env->prog->aux->attach_func_proto->type) { 7329 7329 + /* Make sure programs that attach to void 7330 7330 + * hooks don't try to modify return value. 7331 7331 + */ 7332 7332 + verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n"); 7333 7333 + return -EINVAL; 7334 7334 + } 7335 7335 + } 7336 7336 + break; 7325 7337 } 7326 7338 7327 7339 if (err) ··· 10539 10527 case BPF_PROG_TYPE_SK_LOOKUP: 10540 10528 range = tnum_range(SK_DROP, SK_PASS); 10541 10529 break; 10530 10530 + 10531 10531 + case BPF_PROG_TYPE_LSM: 10532 10532 + if (env->prog->expected_attach_type != BPF_LSM_CGROUP) { 10533 10533 + /* Regular BPF_PROG_TYPE_LSM programs can return 10534 10534 + * any value. 10535 10535 + */ 10536 10536 + return 0; 10537 10537 + } 10538 10538 + if (!env->prog->aux->attach_func_proto->type) { 10539 10539 + /* Make sure programs that attach to void 10540 10540 + * hooks don't try to modify return value. 10541 10541 + */ 10542 10542 + range = tnum_range(1, 1); 10543 10543 + } 10544 10544 + break; 10545 10545 + 10542 10546 case BPF_PROG_TYPE_EXT: 10543 10547 /* freplace program can return anything as its return value 10544 10548 * depends on the to-be-replaced kernel func or bpf program. ··· 10571 10543 10572 10544 if (!tnum_in(range, reg->var_off)) { 10573 10545 verbose_invalid_scalar(env, reg, &range, "program exit", "R0"); 10546 10546 + if (prog->expected_attach_type == BPF_LSM_CGROUP && 10547 10547 + !prog->aux->attach_func_proto->type) 10548 10548 + verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n"); 10574 10549 return -EINVAL; 10575 10550 } 10576 10551 ··· 14933 14902 fallthrough; 14934 14903 case BPF_MODIFY_RETURN: 14935 14904 case BPF_LSM_MAC: 14905 14905 + case BPF_LSM_CGROUP: 14936 14906 case BPF_TRACE_FENTRY: 14937 14907 case BPF_TRACE_FEXIT: 14938 14908 if (!btf_type_is_func(t)) {

tools/include/uapi/linux/bpf.h

reviewed

··· 998 998 BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, 999 999 BPF_PERF_EVENT, 1000 1000 BPF_TRACE_KPROBE_MULTI, 1001 1001 + BPF_LSM_CGROUP, 1001 1002 __MAX_BPF_ATTACH_TYPE 1002 1003 }; 1003 1004