Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: add bpf_link support for BPF_NETFILTER programs

Add bpf_link support skeleton. To keep this reviewable, no bpf program
can be invoked yet, if a program is attached only a c-stub is called and
not the actual bpf program.

Defaults to 'y' if both netfilter and bpf syscall are enabled in kconfig.

Uapi example usage:
union bpf_attr attr = { };

attr.link_create.prog_fd = progfd;
attr.link_create.attach_type = 0; /* unused */
attr.link_create.netfilter.pf = PF_INET;
attr.link_create.netfilter.hooknum = NF_INET_LOCAL_IN;
attr.link_create.netfilter.priority = -128;

err = bpf(BPF_LINK_CREATE, &attr, sizeof(attr));

... this would attach progfd to ipv4:input hook.

Such hook gets removed automatically if the calling program exits.

BPF_NETFILTER program invocation is added in followup change.

NF_HOOK_OP_BPF enum will eventually be read from nfnetlink_hook, it
allows to tell userspace which program is attached at the given hook
when user runs 'nft hook list' command rather than just the priority
and not-very-helpful 'this hook runs a bpf prog but I can't tell which
one'.

Will also be used to disallow registration of two bpf programs with
same priority in a followup patch.

v4: arm32 cmpxchg only supports 32bit operand
s/prio/priority/
v3: restrict prog attachment to ip/ip6 for now, lets lift restrictions if
more use cases pop up (arptables, ebtables, netdev ingress/egress etc).

Signed-off-by: Florian Westphal <fw@strlen.de>
Link: https://lore.kernel.org/r/20230421170300.24115-2-fw@strlen.de
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Florian Westphal and committed by
Alexei Starovoitov
84601d6e 45cea721

+194
+1
include/linux/netfilter.h
··· 80 80 enum nf_hook_ops_type { 81 81 NF_HOOK_OP_UNDEFINED, 82 82 NF_HOOK_OP_NF_TABLES, 83 + NF_HOOK_OP_BPF, 83 84 }; 84 85 85 86 struct nf_hook_ops {
+10
include/net/netfilter/nf_bpf_link.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #if IS_ENABLED(CONFIG_NETFILTER_BPF_LINK) 4 + int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); 5 + #else 6 + static inline int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 7 + { 8 + return -EOPNOTSUPP; 9 + } 10 + #endif
+14
include/uapi/linux/bpf.h
··· 986 986 BPF_PROG_TYPE_LSM, 987 987 BPF_PROG_TYPE_SK_LOOKUP, 988 988 BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ 989 + BPF_PROG_TYPE_NETFILTER, 989 990 }; 990 991 991 992 enum bpf_attach_type { ··· 1051 1050 BPF_LINK_TYPE_PERF_EVENT = 7, 1052 1051 BPF_LINK_TYPE_KPROBE_MULTI = 8, 1053 1052 BPF_LINK_TYPE_STRUCT_OPS = 9, 1053 + BPF_LINK_TYPE_NETFILTER = 10, 1054 1054 1055 1055 MAX_BPF_LINK_TYPE, 1056 1056 }; ··· 1562 1560 */ 1563 1561 __u64 cookie; 1564 1562 } tracing; 1563 + struct { 1564 + __u32 pf; 1565 + __u32 hooknum; 1566 + __s32 priority; 1567 + __u32 flags; 1568 + } netfilter; 1565 1569 }; 1566 1570 } link_create; 1567 1571 ··· 6418 6410 struct { 6419 6411 __u32 map_id; 6420 6412 } struct_ops; 6413 + struct { 6414 + __u32 pf; 6415 + __u32 hooknum; 6416 + __s32 priority; 6417 + __u32 flags; 6418 + } netfilter; 6421 6419 }; 6422 6420 } __attribute__((aligned(8))); 6423 6421
+6
kernel/bpf/syscall.c
··· 35 35 #include <linux/rcupdate_trace.h> 36 36 #include <linux/memcontrol.h> 37 37 #include <linux/trace_events.h> 38 + #include <net/netfilter/nf_bpf_link.h> 38 39 39 40 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 40 41 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ ··· 2463 2462 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2464 2463 case BPF_PROG_TYPE_SOCK_OPS: 2465 2464 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2465 + case BPF_PROG_TYPE_NETFILTER: 2466 2466 return true; 2467 2467 case BPF_PROG_TYPE_CGROUP_SKB: 2468 2468 /* always unpriv */ ··· 4590 4588 4591 4589 switch (prog->type) { 4592 4590 case BPF_PROG_TYPE_EXT: 4591 + case BPF_PROG_TYPE_NETFILTER: 4593 4592 break; 4594 4593 case BPF_PROG_TYPE_PERF_EVENT: 4595 4594 case BPF_PROG_TYPE_TRACEPOINT: ··· 4656 4653 #ifdef CONFIG_NET 4657 4654 case BPF_PROG_TYPE_XDP: 4658 4655 ret = bpf_xdp_link_attach(attr, prog); 4656 + break; 4657 + case BPF_PROG_TYPE_NETFILTER: 4658 + ret = bpf_nf_link_attach(attr, prog); 4659 4659 break; 4660 4660 #endif 4661 4661 case BPF_PROG_TYPE_PERF_EVENT:
+3
net/netfilter/Kconfig
··· 30 30 config NETFILTER_FAMILY_ARP 31 31 bool 32 32 33 + config NETFILTER_BPF_LINK 34 + def_bool BPF_SYSCALL 35 + 33 36 config NETFILTER_NETLINK_HOOK 34 37 tristate "Netfilter base hook dump support" 35 38 depends on NETFILTER_ADVANCED
+1
net/netfilter/Makefile
··· 22 22 endif 23 23 24 24 obj-$(CONFIG_NETFILTER) = netfilter.o 25 + obj-$(CONFIG_NETFILTER_BPF_LINK) += nf_bpf_link.o 25 26 26 27 obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o 27 28 obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
+159
net/netfilter/nf_bpf_link.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <linux/netfilter.h> 4 + 5 + #include <net/netfilter/nf_bpf_link.h> 6 + #include <uapi/linux/netfilter_ipv4.h> 7 + 8 + static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb, 9 + const struct nf_hook_state *s) 10 + { 11 + return NF_ACCEPT; 12 + } 13 + 14 + struct bpf_nf_link { 15 + struct bpf_link link; 16 + struct nf_hook_ops hook_ops; 17 + struct net *net; 18 + u32 dead; 19 + }; 20 + 21 + static void bpf_nf_link_release(struct bpf_link *link) 22 + { 23 + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); 24 + 25 + if (nf_link->dead) 26 + return; 27 + 28 + /* prevent hook-not-found warning splat from netfilter core when 29 + * .detach was already called 30 + */ 31 + if (!cmpxchg(&nf_link->dead, 0, 1)) 32 + nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops); 33 + } 34 + 35 + static void bpf_nf_link_dealloc(struct bpf_link *link) 36 + { 37 + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); 38 + 39 + kfree(nf_link); 40 + } 41 + 42 + static int bpf_nf_link_detach(struct bpf_link *link) 43 + { 44 + bpf_nf_link_release(link); 45 + return 0; 46 + } 47 + 48 + static void bpf_nf_link_show_info(const struct bpf_link *link, 49 + struct seq_file *seq) 50 + { 51 + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); 52 + 53 + seq_printf(seq, "pf:\t%u\thooknum:\t%u\tprio:\t%d\n", 54 + nf_link->hook_ops.pf, nf_link->hook_ops.hooknum, 55 + nf_link->hook_ops.priority); 56 + } 57 + 58 + static int bpf_nf_link_fill_link_info(const struct bpf_link *link, 59 + struct bpf_link_info *info) 60 + { 61 + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); 62 + 63 + info->netfilter.pf = nf_link->hook_ops.pf; 64 + info->netfilter.hooknum = nf_link->hook_ops.hooknum; 65 + info->netfilter.priority = nf_link->hook_ops.priority; 66 + info->netfilter.flags = 0; 67 + 68 + return 0; 69 + } 70 + 71 + static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog, 72 + struct bpf_prog *old_prog) 73 + { 74 + return -EOPNOTSUPP; 75 + } 76 + 77 + static const struct bpf_link_ops bpf_nf_link_lops = { 78 + .release = bpf_nf_link_release, 79 + .dealloc = bpf_nf_link_dealloc, 80 + .detach = bpf_nf_link_detach, 81 + .show_fdinfo = bpf_nf_link_show_info, 82 + .fill_link_info = bpf_nf_link_fill_link_info, 83 + .update_prog = bpf_nf_link_update, 84 + }; 85 + 86 + static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr) 87 + { 88 + switch (attr->link_create.netfilter.pf) { 89 + case NFPROTO_IPV4: 90 + case NFPROTO_IPV6: 91 + if (attr->link_create.netfilter.hooknum >= NF_INET_NUMHOOKS) 92 + return -EPROTO; 93 + break; 94 + default: 95 + return -EAFNOSUPPORT; 96 + } 97 + 98 + if (attr->link_create.netfilter.flags) 99 + return -EOPNOTSUPP; 100 + 101 + /* make sure conntrack confirm is always last. 102 + * 103 + * In the future, if userspace can e.g. request defrag, then 104 + * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG" 105 + * should fail. 106 + */ 107 + switch (attr->link_create.netfilter.priority) { 108 + case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */ 109 + case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */ 110 + } 111 + 112 + return 0; 113 + } 114 + 115 + int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 116 + { 117 + struct net *net = current->nsproxy->net_ns; 118 + struct bpf_link_primer link_primer; 119 + struct bpf_nf_link *link; 120 + int err; 121 + 122 + if (attr->link_create.flags) 123 + return -EINVAL; 124 + 125 + err = bpf_nf_check_pf_and_hooks(attr); 126 + if (err) 127 + return err; 128 + 129 + link = kzalloc(sizeof(*link), GFP_USER); 130 + if (!link) 131 + return -ENOMEM; 132 + 133 + bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog); 134 + 135 + link->hook_ops.hook = nf_hook_run_bpf; 136 + link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF; 137 + link->hook_ops.priv = prog; 138 + 139 + link->hook_ops.pf = attr->link_create.netfilter.pf; 140 + link->hook_ops.priority = attr->link_create.netfilter.priority; 141 + link->hook_ops.hooknum = attr->link_create.netfilter.hooknum; 142 + 143 + link->net = net; 144 + link->dead = false; 145 + 146 + err = bpf_link_prime(&link->link, &link_primer); 147 + if (err) { 148 + kfree(link); 149 + return err; 150 + } 151 + 152 + err = nf_register_net_hook(net, &link->hook_ops); 153 + if (err) { 154 + bpf_link_cleanup(&link_primer); 155 + return err; 156 + } 157 + 158 + return bpf_link_settle(&link_primer); 159 + }