Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _BPF_CGROUP_H
3#define _BPF_CGROUP_H
4
5#include <linux/bpf.h>
6#include <linux/errno.h>
7#include <linux/jump_label.h>
8#include <linux/percpu.h>
9#include <linux/percpu-refcount.h>
10#include <linux/rbtree.h>
11#include <uapi/linux/bpf.h>
12
13struct sock;
14struct sockaddr;
15struct cgroup;
16struct sk_buff;
17struct bpf_map;
18struct bpf_prog;
19struct bpf_sock_ops_kern;
20struct bpf_cgroup_storage;
21struct ctl_table;
22struct ctl_table_header;
23
24#ifdef CONFIG_CGROUP_BPF
25
26extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
27#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
28
29DECLARE_PER_CPU(struct bpf_cgroup_storage*,
30 bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
31
32#define for_each_cgroup_storage_type(stype) \
33 for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
34
35struct bpf_cgroup_storage_map;
36
37struct bpf_storage_buffer {
38 struct rcu_head rcu;
39 char data[];
40};
41
42struct bpf_cgroup_storage {
43 union {
44 struct bpf_storage_buffer *buf;
45 void __percpu *percpu_buf;
46 };
47 struct bpf_cgroup_storage_map *map;
48 struct bpf_cgroup_storage_key key;
49 struct list_head list_map;
50 struct list_head list_cg;
51 struct rb_node node;
52 struct rcu_head rcu;
53};
54
55struct bpf_cgroup_link {
56 struct bpf_link link;
57 struct cgroup *cgroup;
58 enum bpf_attach_type type;
59};
60
61struct bpf_prog_list {
62 struct list_head node;
63 struct bpf_prog *prog;
64 struct bpf_cgroup_link *link;
65 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
66};
67
68struct bpf_prog_array;
69
70struct cgroup_bpf {
71 /* array of effective progs in this cgroup */
72 struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE];
73
74 /* attached progs to this cgroup and attach flags
75 * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
76 * have either zero or one element
77 * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
78 */
79 struct list_head progs[MAX_BPF_ATTACH_TYPE];
80 u32 flags[MAX_BPF_ATTACH_TYPE];
81
82 /* list of cgroup shared storages */
83 struct list_head storages;
84
85 /* temp storage for effective prog array used by prog_attach/detach */
86 struct bpf_prog_array *inactive;
87
88 /* reference counter used to detach bpf programs after cgroup removal */
89 struct percpu_ref refcnt;
90
91 /* cgroup_bpf is released using a work queue */
92 struct work_struct release_work;
93};
94
95int cgroup_bpf_inherit(struct cgroup *cgrp);
96void cgroup_bpf_offline(struct cgroup *cgrp);
97
98int __cgroup_bpf_attach(struct cgroup *cgrp,
99 struct bpf_prog *prog, struct bpf_prog *replace_prog,
100 struct bpf_cgroup_link *link,
101 enum bpf_attach_type type, u32 flags);
102int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
103 struct bpf_cgroup_link *link,
104 enum bpf_attach_type type);
105int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
106 union bpf_attr __user *uattr);
107
108/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
109int cgroup_bpf_attach(struct cgroup *cgrp,
110 struct bpf_prog *prog, struct bpf_prog *replace_prog,
111 struct bpf_cgroup_link *link, enum bpf_attach_type type,
112 u32 flags);
113int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
114 enum bpf_attach_type type);
115int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
116 union bpf_attr __user *uattr);
117
118int __cgroup_bpf_run_filter_skb(struct sock *sk,
119 struct sk_buff *skb,
120 enum bpf_attach_type type);
121
122int __cgroup_bpf_run_filter_sk(struct sock *sk,
123 enum bpf_attach_type type);
124
125int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
126 struct sockaddr *uaddr,
127 enum bpf_attach_type type,
128 void *t_ctx,
129 u32 *flags);
130
131int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
132 struct bpf_sock_ops_kern *sock_ops,
133 enum bpf_attach_type type);
134
135int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
136 short access, enum bpf_attach_type type);
137
138int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
139 struct ctl_table *table, int write,
140 char **buf, size_t *pcount, loff_t *ppos,
141 enum bpf_attach_type type);
142
143int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
144 int *optname, char __user *optval,
145 int *optlen, char **kernel_optval);
146int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
147 int optname, char __user *optval,
148 int __user *optlen, int max_optlen,
149 int retval);
150
151int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
152 int optname, void *optval,
153 int *optlen, int retval);
154
155static inline enum bpf_cgroup_storage_type cgroup_storage_type(
156 struct bpf_map *map)
157{
158 if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
159 return BPF_CGROUP_STORAGE_PERCPU;
160
161 return BPF_CGROUP_STORAGE_SHARED;
162}
163
164static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
165 *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
166{
167 enum bpf_cgroup_storage_type stype;
168
169 for_each_cgroup_storage_type(stype)
170 this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
171}
172
173struct bpf_cgroup_storage *
174cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
175 void *key, bool locked);
176struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
177 enum bpf_cgroup_storage_type stype);
178void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
179void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
180 struct cgroup *cgroup,
181 enum bpf_attach_type type);
182void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
183int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map);
184
185int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
186int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
187 void *value, u64 flags);
188
189/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
190#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
191({ \
192 int __ret = 0; \
193 if (cgroup_bpf_enabled(BPF_CGROUP_INET_INGRESS)) \
194 __ret = __cgroup_bpf_run_filter_skb(sk, skb, \
195 BPF_CGROUP_INET_INGRESS); \
196 \
197 __ret; \
198})
199
200#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
201({ \
202 int __ret = 0; \
203 if (cgroup_bpf_enabled(BPF_CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
204 typeof(sk) __sk = sk_to_full_sk(sk); \
205 if (sk_fullsock(__sk)) \
206 __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
207 BPF_CGROUP_INET_EGRESS); \
208 } \
209 __ret; \
210})
211
212#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
213({ \
214 int __ret = 0; \
215 if (cgroup_bpf_enabled(type)) { \
216 __ret = __cgroup_bpf_run_filter_sk(sk, type); \
217 } \
218 __ret; \
219})
220
221#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
222 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
223
224#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) \
225 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_RELEASE)
226
227#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
228 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
229
230#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
231 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
232
233#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
234({ \
235 u32 __unused_flags; \
236 int __ret = 0; \
237 if (cgroup_bpf_enabled(type)) \
238 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
239 NULL, \
240 &__unused_flags); \
241 __ret; \
242})
243
244#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
245({ \
246 u32 __unused_flags; \
247 int __ret = 0; \
248 if (cgroup_bpf_enabled(type)) { \
249 lock_sock(sk); \
250 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
251 t_ctx, \
252 &__unused_flags); \
253 release_sock(sk); \
254 } \
255 __ret; \
256})
257
258/* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
259 * via upper bits of return code. The only flag that is supported
260 * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
261 * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
262 */
263#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags) \
264({ \
265 u32 __flags = 0; \
266 int __ret = 0; \
267 if (cgroup_bpf_enabled(type)) { \
268 lock_sock(sk); \
269 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
270 NULL, &__flags); \
271 release_sock(sk); \
272 if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
273 *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \
274 } \
275 __ret; \
276})
277
278#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \
279 ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \
280 cgroup_bpf_enabled(BPF_CGROUP_INET6_CONNECT)) && \
281 (sk)->sk_prot->pre_connect)
282
283#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
284 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
285
286#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
287 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
288
289#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
290 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL)
291
292#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
293 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL)
294
295#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
296 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx)
297
298#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
299 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)
300
301#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \
302 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_RECVMSG, NULL)
303
304#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \
305 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
306
307/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
308 * fullsock and its parent fullsock cannot be traced by
309 * sk_to_full_sk().
310 *
311 * e.g. sock_ops->sk is a request_sock and it is under syncookie mode.
312 * Its listener-sk is not attached to the rsk_listener.
313 * In this case, the caller holds the listener-sk (unlocked),
314 * set its sock_ops->sk to req_sk, and call this SOCK_OPS"_SK" with
315 * the listener-sk such that the cgroup-bpf-progs of the
316 * listener-sk will be run.
317 *
318 * Regardless of syncookie mode or not,
319 * calling bpf_setsockopt on listener-sk will not make sense anyway,
320 * so passing 'sock_ops->sk == req_sk' to the bpf prog is appropriate here.
321 */
322#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \
323({ \
324 int __ret = 0; \
325 if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS)) \
326 __ret = __cgroup_bpf_run_filter_sock_ops(sk, \
327 sock_ops, \
328 BPF_CGROUP_SOCK_OPS); \
329 __ret; \
330})
331
332#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
333({ \
334 int __ret = 0; \
335 if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS) && (sock_ops)->sk) { \
336 typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
337 if (__sk && sk_fullsock(__sk)) \
338 __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
339 sock_ops, \
340 BPF_CGROUP_SOCK_OPS); \
341 } \
342 __ret; \
343})
344
345#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \
346({ \
347 int __ret = 0; \
348 if (cgroup_bpf_enabled(BPF_CGROUP_DEVICE)) \
349 __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
350 access, \
351 BPF_CGROUP_DEVICE); \
352 \
353 __ret; \
354})
355
356
357#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \
358({ \
359 int __ret = 0; \
360 if (cgroup_bpf_enabled(BPF_CGROUP_SYSCTL)) \
361 __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
362 buf, count, pos, \
363 BPF_CGROUP_SYSCTL); \
364 __ret; \
365})
366
367#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
368 kernel_optval) \
369({ \
370 int __ret = 0; \
371 if (cgroup_bpf_enabled(BPF_CGROUP_SETSOCKOPT)) \
372 __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
373 optname, optval, \
374 optlen, \
375 kernel_optval); \
376 __ret; \
377})
378
379#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
380({ \
381 int __ret = 0; \
382 if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
383 get_user(__ret, optlen); \
384 __ret; \
385})
386
387#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \
388 max_optlen, retval) \
389({ \
390 int __ret = retval; \
391 if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
392 if (!(sock)->sk_prot->bpf_bypass_getsockopt || \
393 !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
394 tcp_bpf_bypass_getsockopt, \
395 level, optname)) \
396 __ret = __cgroup_bpf_run_filter_getsockopt( \
397 sock, level, optname, optval, optlen, \
398 max_optlen, retval); \
399 __ret; \
400})
401
402#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
403 optlen, retval) \
404({ \
405 int __ret = retval; \
406 if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
407 __ret = __cgroup_bpf_run_filter_getsockopt_kern( \
408 sock, level, optname, optval, optlen, retval); \
409 __ret; \
410})
411
412int cgroup_bpf_prog_attach(const union bpf_attr *attr,
413 enum bpf_prog_type ptype, struct bpf_prog *prog);
414int cgroup_bpf_prog_detach(const union bpf_attr *attr,
415 enum bpf_prog_type ptype);
416int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
417int cgroup_bpf_prog_query(const union bpf_attr *attr,
418 union bpf_attr __user *uattr);
419#else
420
421struct bpf_prog;
422struct cgroup_bpf {};
423static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
424static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
425
426static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
427 enum bpf_prog_type ptype,
428 struct bpf_prog *prog)
429{
430 return -EINVAL;
431}
432
433static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
434 enum bpf_prog_type ptype)
435{
436 return -EINVAL;
437}
438
439static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
440 struct bpf_prog *prog)
441{
442 return -EINVAL;
443}
444
445static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
446 union bpf_attr __user *uattr)
447{
448 return -EINVAL;
449}
450
451static inline void bpf_cgroup_storage_set(
452 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
453static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
454 struct bpf_map *map) { return 0; }
455static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
456 struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; }
457static inline void bpf_cgroup_storage_free(
458 struct bpf_cgroup_storage *storage) {}
459static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
460 void *value) {
461 return 0;
462}
463static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
464 void *key, void *value, u64 flags) {
465 return 0;
466}
467
468#define cgroup_bpf_enabled(type) (0)
469#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; })
470#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
471#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
472#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
473#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
474#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
475#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
476#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
477#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
478#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
479#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
480#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
481#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
482#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
483#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
484#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
485#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
486#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
487#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
488#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
489#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
490#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
491 optlen, max_optlen, retval) ({ retval; })
492#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
493 optlen, retval) ({ retval; })
494#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
495 kernel_optval) ({ 0; })
496
497#define for_each_cgroup_storage_type(stype) for (; false; )
498
499#endif /* CONFIG_CGROUP_BPF */
500
501#endif /* _BPF_CGROUP_H */