bpf: Add support for writing to nf_conn:mark

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Support direct writes to nf_conn:mark from TC and XDP prog types. This
is useful when applications want to store per-connection metadata. This
is also particularly useful for applications that run both bpf and
iptables/nftables because the latter can trivially access this metadata.

One example use case would be if a bpf prog is responsible for advanced
packet classification and iptables/nftables is later used for routing
due to pre-existing/legacy code.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/ebca06dea366e3e7e861c12f375a548cc4c61108.1662568410.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Daniel Xu and committed by

Alexei Starovoitov 3 years ago 864b656f 84c6ac41

+143 -1

4 changed files

expand all

include

net

netfilter

nf_conntrack_bpf.h

net

core

filter.c

netfilter

nf_conntrack_bpf.c

nf_conntrack_core.c

+23

include/net/netfilter/nf_conntrack_bpf.h

··· 3 3 #ifndef _NF_CONNTRACK_BPF_H 4 4 #define _NF_CONNTRACK_BPF_H 5 5 6 + #include <linux/bpf.h> 6 7 #include <linux/btf.h> 7 8 #include <linux/kconfig.h> 9 + #include <linux/mutex.h> 8 10 9 11 #if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ 10 12 (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) 11 13 12 14 extern int register_nf_conntrack_bpf(void); 15 + extern void cleanup_nf_conntrack_bpf(void); 16 + 17 + extern struct mutex nf_conn_btf_access_lock; 18 + extern int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf, 19 + const struct btf_type *t, int off, int size, 20 + enum bpf_access_type atype, u32 *next_btf_id, 21 + enum bpf_type_flag *flag); 13 22 14 23 #else 15 24 16 25 static inline int register_nf_conntrack_bpf(void) 17 26 { 18 27 return 0; 28 + } 29 + 30 + static inline void cleanup_nf_conntrack_bpf(void) 31 + { 32 + } 33 + 34 + static inline int nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, 35 + const struct btf *btf, 36 + const struct btf_type *t, int off, 37 + int size, enum bpf_access_type atype, 38 + u32 *next_btf_id, 39 + enum bpf_type_flag *flag) 40 + { 41 + return -EACCES; 19 42 } 20 43 21 44 #endif

+54

net/core/filter.c

··· 18 18 */ 19 19 20 20 #include <linux/atomic.h> 21 + #include <linux/bpf_verifier.h> 21 22 #include <linux/module.h> 22 23 #include <linux/types.h> 23 24 #include <linux/mm.h> ··· 8605 8604 return bpf_skb_is_valid_access(off, size, type, prog, info); 8606 8605 } 8607 8606 8607 + DEFINE_MUTEX(nf_conn_btf_access_lock); 8608 + EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock); 8609 + 8610 + int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf, 8611 + const struct btf_type *t, int off, int size, 8612 + enum bpf_access_type atype, u32 *next_btf_id, 8613 + enum bpf_type_flag *flag); 8614 + EXPORT_SYMBOL_GPL(nfct_bsa); 8615 + 8616 + static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log, 8617 + const struct btf *btf, 8618 + const struct btf_type *t, int off, 8619 + int size, enum bpf_access_type atype, 8620 + u32 *next_btf_id, 8621 + enum bpf_type_flag *flag) 8622 + { 8623 + int ret = -EACCES; 8624 + 8625 + if (atype == BPF_READ) 8626 + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, 8627 + flag); 8628 + 8629 + mutex_lock(&nf_conn_btf_access_lock); 8630 + if (nfct_bsa) 8631 + ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag); 8632 + mutex_unlock(&nf_conn_btf_access_lock); 8633 + 8634 + return ret; 8635 + } 8636 + 8608 8637 static bool __is_valid_xdp_access(int off, int size) 8609 8638 { 8610 8639 if (off < 0 || off >= sizeof(struct xdp_md)) ··· 8693 8662 act, prog->aux->name, prog->aux->id, dev ? dev->name : "N/A"); 8694 8663 } 8695 8664 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); 8665 + 8666 + static int xdp_btf_struct_access(struct bpf_verifier_log *log, 8667 + const struct btf *btf, 8668 + const struct btf_type *t, int off, 8669 + int size, enum bpf_access_type atype, 8670 + u32 *next_btf_id, 8671 + enum bpf_type_flag *flag) 8672 + { 8673 + int ret = -EACCES; 8674 + 8675 + if (atype == BPF_READ) 8676 + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, 8677 + flag); 8678 + 8679 + mutex_lock(&nf_conn_btf_access_lock); 8680 + if (nfct_bsa) 8681 + ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag); 8682 + mutex_unlock(&nf_conn_btf_access_lock); 8683 + 8684 + return ret; 8685 + } 8696 8686 8697 8687 static bool sock_addr_is_valid_access(int off, int size, 8698 8688 enum bpf_access_type type, ··· 10609 10557 .convert_ctx_access = tc_cls_act_convert_ctx_access, 10610 10558 .gen_prologue = tc_cls_act_prologue, 10611 10559 .gen_ld_abs = bpf_gen_ld_abs, 10560 + .btf_struct_access = tc_cls_act_btf_struct_access, 10612 10561 }; 10613 10562 10614 10563 const struct bpf_prog_ops tc_cls_act_prog_ops = { ··· 10621 10568 .is_valid_access = xdp_is_valid_access, 10622 10569 .convert_ctx_access = xdp_convert_ctx_access, 10623 10570 .gen_prologue = bpf_noop_prologue, 10571 + .btf_struct_access = xdp_btf_struct_access, 10624 10572 }; 10625 10573 10626 10574 const struct bpf_prog_ops xdp_prog_ops = {

+65 -1

net/netfilter/nf_conntrack_bpf.c

··· 6 6 * are exposed through to BPF programs is explicitly unstable. 7 7 */ 8 8 9 + #include <linux/bpf_verifier.h> 9 10 #include <linux/bpf.h> 10 11 #include <linux/btf.h> 12 + #include <linux/mutex.h> 11 13 #include <linux/types.h> 12 14 #include <linux/btf_ids.h> 13 15 #include <linux/net_namespace.h> ··· 184 182 opts->dir = NF_CT_DIRECTION(hash); 185 183 186 184 return ct; 185 + } 186 + 187 + BTF_ID_LIST(btf_nf_conn_ids) 188 + BTF_ID(struct, nf_conn) 189 + BTF_ID(struct, nf_conn___init) 190 + 191 + /* Check writes into `struct nf_conn` */ 192 + static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, 193 + const struct btf *btf, 194 + const struct btf_type *t, int off, 195 + int size, enum bpf_access_type atype, 196 + u32 *next_btf_id, 197 + enum bpf_type_flag *flag) 198 + { 199 + const struct btf_type *ncit; 200 + const struct btf_type *nct; 201 + size_t end; 202 + 203 + ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]); 204 + nct = btf_type_by_id(btf, btf_nf_conn_ids[0]); 205 + 206 + if (t != nct && t != ncit) { 207 + bpf_log(log, "only read is supported\n"); 208 + return -EACCES; 209 + } 210 + 211 + /* `struct nf_conn` and `struct nf_conn___init` have the same layout 212 + * so we are safe to simply merge offset checks here 213 + */ 214 + switch (off) { 215 + #if defined(CONFIG_NF_CONNTRACK_MARK) 216 + case offsetof(struct nf_conn, mark): 217 + end = offsetofend(struct nf_conn, mark); 218 + break; 219 + #endif 220 + default: 221 + bpf_log(log, "no write support to nf_conn at off %d\n", off); 222 + return -EACCES; 223 + } 224 + 225 + if (off + size > end) { 226 + bpf_log(log, 227 + "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n", 228 + off, size, end); 229 + return -EACCES; 230 + } 231 + 232 + return 0; 187 233 } 188 234 189 235 __diag_push(); ··· 499 449 int ret; 500 450 501 451 ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set); 502 - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set); 452 + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set); 453 + if (!ret) { 454 + mutex_lock(&nf_conn_btf_access_lock); 455 + nfct_bsa = _nf_conntrack_btf_struct_access; 456 + mutex_unlock(&nf_conn_btf_access_lock); 457 + } 458 + 459 + return ret; 460 + } 461 + 462 + void cleanup_nf_conntrack_bpf(void) 463 + { 464 + mutex_lock(&nf_conn_btf_access_lock); 465 + nfct_bsa = NULL; 466 + mutex_unlock(&nf_conn_btf_access_lock); 503 467 }

net/netfilter/nf_conntrack_core.c

··· 2512 2512 2513 2513 void nf_conntrack_cleanup_start(void) 2514 2514 { 2515 + cleanup_nf_conntrack_bpf(); 2515 2516 conntrack_gc_work.exiting = true; 2516 2517 } 2517 2518