Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: add skeleton of bpfilter kernel module

bpfilter.ko consists of bpfilter_kern.c (normal kernel module code)
and user mode helper code that is embedded into bpfilter.ko

The steps to build bpfilter.ko are the following:
- main.c is compiled by HOSTCC into the bpfilter_umh elf executable file
- with quite a bit of objcopy and Makefile magic the bpfilter_umh elf file
is converted into bpfilter_umh.o object file
with _binary_net_bpfilter_bpfilter_umh_start and _end symbols
Example:
$ nm ./bld_x64/net/bpfilter/bpfilter_umh.o
0000000000004cf8 T _binary_net_bpfilter_bpfilter_umh_end
0000000000004cf8 A _binary_net_bpfilter_bpfilter_umh_size
0000000000000000 T _binary_net_bpfilter_bpfilter_umh_start
- bpfilter_umh.o and bpfilter_kern.o are linked together into bpfilter.ko

bpfilter_kern.c is a normal kernel module code that calls
the fork_usermode_blob() helper to execute part of its own data
as a user mode process.

Notice that _binary_net_bpfilter_bpfilter_umh_start - end
is placed into .init.rodata section, so it's freed as soon as __init
function of bpfilter.ko is finished.
As part of __init the bpfilter.ko does first request/reply action
via two unix pipe provided by fork_usermode_blob() helper to
make sure that umh is healthy. If not it will kill it via pid.

Later bpfilter_process_sockopt() will be called from bpfilter hooks
in get/setsockopt() to pass iptable commands into umh via bpfilter.ko

If admin does 'rmmod bpfilter' the __exit code bpfilter.ko will
kill umh as well.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Alexei Starovoitov and committed by
David S. Miller
d2ba09c1 449325b5

+339
+15
include/linux/bpfilter.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_BPFILTER_H 3 + #define _LINUX_BPFILTER_H 4 + 5 + #include <uapi/linux/bpfilter.h> 6 + 7 + struct sock; 8 + int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval, 9 + unsigned int optlen); 10 + int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval, 11 + int *optlen); 12 + extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname, 13 + char __user *optval, 14 + unsigned int optlen, bool is_set); 15 + #endif
+21
include/uapi/linux/bpfilter.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _UAPI_LINUX_BPFILTER_H 3 + #define _UAPI_LINUX_BPFILTER_H 4 + 5 + #include <linux/if.h> 6 + 7 + enum { 8 + BPFILTER_IPT_SO_SET_REPLACE = 64, 9 + BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65, 10 + BPFILTER_IPT_SET_MAX, 11 + }; 12 + 13 + enum { 14 + BPFILTER_IPT_SO_GET_INFO = 64, 15 + BPFILTER_IPT_SO_GET_ENTRIES = 65, 16 + BPFILTER_IPT_SO_GET_REVISION_MATCH = 66, 17 + BPFILTER_IPT_SO_GET_REVISION_TARGET = 67, 18 + BPFILTER_IPT_GET_MAX, 19 + }; 20 + 21 + #endif /* _UAPI_LINUX_BPFILTER_H */
+2
net/Kconfig
··· 202 202 203 203 endif 204 204 205 + source "net/bpfilter/Kconfig" 206 + 205 207 source "net/dccp/Kconfig" 206 208 source "net/sctp/Kconfig" 207 209 source "net/rds/Kconfig"
+1
net/Makefile
··· 20 20 obj-$(CONFIG_XFRM) += xfrm/ 21 21 obj-$(CONFIG_UNIX) += unix/ 22 22 obj-$(CONFIG_NET) += ipv6/ 23 + obj-$(CONFIG_BPFILTER) += bpfilter/ 23 24 obj-$(CONFIG_PACKET) += packet/ 24 25 obj-$(CONFIG_NET_KEY) += key/ 25 26 obj-$(CONFIG_BRIDGE) += bridge/
+16
net/bpfilter/Kconfig
··· 1 + menuconfig BPFILTER 2 + bool "BPF based packet filtering framework (BPFILTER)" 3 + default n 4 + depends on NET && BPF 5 + help 6 + This builds experimental bpfilter framework that is aiming to 7 + provide netfilter compatible functionality via BPF 8 + 9 + if BPFILTER 10 + config BPFILTER_UMH 11 + tristate "bpfilter kernel module with user mode helper" 12 + default m 13 + help 14 + This builds bpfilter kernel module with embedded user mode helper 15 + endif 16 +
+30
net/bpfilter/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + # 3 + # Makefile for the Linux BPFILTER layer. 4 + # 5 + 6 + hostprogs-y := bpfilter_umh 7 + bpfilter_umh-objs := main.o 8 + HOSTCFLAGS += -I. -Itools/include/ 9 + ifeq ($(CONFIG_BPFILTER_UMH), y) 10 + # builtin bpfilter_umh should be compiled with -static 11 + # since rootfs isn't mounted at the time of __init 12 + # function is called and do_execv won't find elf interpreter 13 + HOSTLDFLAGS += -static 14 + endif 15 + 16 + # a bit of elf magic to convert bpfilter_umh binary into a binary blob 17 + # inside bpfilter_umh.o elf file referenced by 18 + # _binary_net_bpfilter_bpfilter_umh_start symbol 19 + # which bpfilter_kern.c passes further into umh blob loader at run-time 20 + quiet_cmd_copy_umh = GEN $@ 21 + cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \ 22 + $(OBJCOPY) -I binary -O $(CONFIG_OUTPUT_FORMAT) \ 23 + -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \ 24 + --rename-section .data=.init.rodata $< $@ 25 + 26 + $(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh 27 + $(call cmd,copy_umh) 28 + 29 + obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o 30 + bpfilter-objs += bpfilter_kern.o bpfilter_umh.o
+111
net/bpfilter/bpfilter_kern.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 3 + #include <linux/init.h> 4 + #include <linux/module.h> 5 + #include <linux/umh.h> 6 + #include <linux/bpfilter.h> 7 + #include <linux/sched.h> 8 + #include <linux/sched/signal.h> 9 + #include <linux/fs.h> 10 + #include <linux/file.h> 11 + #include "msgfmt.h" 12 + 13 + #define UMH_start _binary_net_bpfilter_bpfilter_umh_start 14 + #define UMH_end _binary_net_bpfilter_bpfilter_umh_end 15 + 16 + extern char UMH_start; 17 + extern char UMH_end; 18 + 19 + static struct umh_info info; 20 + /* since ip_getsockopt() can run in parallel, serialize access to umh */ 21 + static DEFINE_MUTEX(bpfilter_lock); 22 + 23 + static void shutdown_umh(struct umh_info *info) 24 + { 25 + struct task_struct *tsk; 26 + 27 + tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID); 28 + if (tsk) 29 + force_sig(SIGKILL, tsk); 30 + fput(info->pipe_to_umh); 31 + fput(info->pipe_from_umh); 32 + } 33 + 34 + static void __stop_umh(void) 35 + { 36 + if (bpfilter_process_sockopt) { 37 + bpfilter_process_sockopt = NULL; 38 + shutdown_umh(&info); 39 + } 40 + } 41 + 42 + static void stop_umh(void) 43 + { 44 + mutex_lock(&bpfilter_lock); 45 + __stop_umh(); 46 + mutex_unlock(&bpfilter_lock); 47 + } 48 + 49 + static int __bpfilter_process_sockopt(struct sock *sk, int optname, 50 + char __user *optval, 51 + unsigned int optlen, bool is_set) 52 + { 53 + struct mbox_request req; 54 + struct mbox_reply reply; 55 + loff_t pos; 56 + ssize_t n; 57 + int ret; 58 + 59 + req.is_set = is_set; 60 + req.pid = current->pid; 61 + req.cmd = optname; 62 + req.addr = (long)optval; 63 + req.len = optlen; 64 + mutex_lock(&bpfilter_lock); 65 + n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos); 66 + if (n != sizeof(req)) { 67 + pr_err("write fail %zd\n", n); 68 + __stop_umh(); 69 + ret = -EFAULT; 70 + goto out; 71 + } 72 + pos = 0; 73 + n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos); 74 + if (n != sizeof(reply)) { 75 + pr_err("read fail %zd\n", n); 76 + __stop_umh(); 77 + ret = -EFAULT; 78 + goto out; 79 + } 80 + ret = reply.status; 81 + out: 82 + mutex_unlock(&bpfilter_lock); 83 + return ret; 84 + } 85 + 86 + static int __init load_umh(void) 87 + { 88 + int err; 89 + 90 + /* fork usermode process */ 91 + err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info); 92 + if (err) 93 + return err; 94 + pr_info("Loaded bpfilter_umh pid %d\n", info.pid); 95 + 96 + /* health check that usermode process started correctly */ 97 + if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) { 98 + stop_umh(); 99 + return -EFAULT; 100 + } 101 + bpfilter_process_sockopt = &__bpfilter_process_sockopt; 102 + return 0; 103 + } 104 + 105 + static void __exit fini_umh(void) 106 + { 107 + stop_umh(); 108 + } 109 + module_init(load_umh); 110 + module_exit(fini_umh); 111 + MODULE_LICENSE("GPL");
+63
net/bpfilter/main.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #define _GNU_SOURCE 3 + #include <sys/uio.h> 4 + #include <errno.h> 5 + #include <stdio.h> 6 + #include <sys/socket.h> 7 + #include <fcntl.h> 8 + #include <unistd.h> 9 + #include "include/uapi/linux/bpf.h" 10 + #include <asm/unistd.h> 11 + #include "msgfmt.h" 12 + 13 + int debug_fd; 14 + 15 + static int handle_get_cmd(struct mbox_request *cmd) 16 + { 17 + switch (cmd->cmd) { 18 + case 0: 19 + return 0; 20 + default: 21 + break; 22 + } 23 + return -ENOPROTOOPT; 24 + } 25 + 26 + static int handle_set_cmd(struct mbox_request *cmd) 27 + { 28 + return -ENOPROTOOPT; 29 + } 30 + 31 + static void loop(void) 32 + { 33 + while (1) { 34 + struct mbox_request req; 35 + struct mbox_reply reply; 36 + int n; 37 + 38 + n = read(0, &req, sizeof(req)); 39 + if (n != sizeof(req)) { 40 + dprintf(debug_fd, "invalid request %d\n", n); 41 + return; 42 + } 43 + 44 + reply.status = req.is_set ? 45 + handle_set_cmd(&req) : 46 + handle_get_cmd(&req); 47 + 48 + n = write(1, &reply, sizeof(reply)); 49 + if (n != sizeof(reply)) { 50 + dprintf(debug_fd, "reply failed %d\n", n); 51 + return; 52 + } 53 + } 54 + } 55 + 56 + int main(void) 57 + { 58 + debug_fd = open("/dev/console", 00000002 | 00000100); 59 + dprintf(debug_fd, "Started bpfilter\n"); 60 + loop(); 61 + close(debug_fd); 62 + return 0; 63 + }
+17
net/bpfilter/msgfmt.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _NET_BPFILTER_MSGFMT_H 3 + #define _NET_BPFILTER_MSGFMT_H 4 + 5 + struct mbox_request { 6 + __u64 addr; 7 + __u32 len; 8 + __u32 is_set; 9 + __u32 cmd; 10 + __u32 pid; 11 + }; 12 + 13 + struct mbox_reply { 14 + __u32 status; 15 + }; 16 + 17 + #endif
+2
net/ipv4/Makefile
··· 16 16 inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ 17 17 metrics.o 18 18 19 + obj-$(CONFIG_BPFILTER) += bpfilter/ 20 + 19 21 obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o 20 22 obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 21 23 obj-$(CONFIG_PROC_FS) += proc.o
+2
net/ipv4/bpfilter/Makefile
··· 1 + obj-$(CONFIG_BPFILTER) += sockopt.o 2 +
+42
net/ipv4/bpfilter/sockopt.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/uaccess.h> 3 + #include <linux/bpfilter.h> 4 + #include <uapi/linux/bpf.h> 5 + #include <linux/wait.h> 6 + #include <linux/kmod.h> 7 + 8 + int (*bpfilter_process_sockopt)(struct sock *sk, int optname, 9 + char __user *optval, 10 + unsigned int optlen, bool is_set); 11 + EXPORT_SYMBOL_GPL(bpfilter_process_sockopt); 12 + 13 + int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval, 14 + unsigned int optlen, bool is_set) 15 + { 16 + if (!bpfilter_process_sockopt) { 17 + int err = request_module("bpfilter"); 18 + 19 + if (err) 20 + return err; 21 + if (!bpfilter_process_sockopt) 22 + return -ECHILD; 23 + } 24 + return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set); 25 + } 26 + 27 + int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, 28 + unsigned int optlen) 29 + { 30 + return bpfilter_mbox_request(sk, optname, optval, optlen, true); 31 + } 32 + 33 + int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, 34 + int __user *optlen) 35 + { 36 + int len; 37 + 38 + if (get_user(len, optlen)) 39 + return -EFAULT; 40 + 41 + return bpfilter_mbox_request(sk, optname, optval, len, false); 42 + }
+17
net/ipv4/ip_sockglue.c
··· 47 47 #include <linux/errqueue.h> 48 48 #include <linux/uaccess.h> 49 49 50 + #include <linux/bpfilter.h> 51 + 50 52 /* 51 53 * SOL_IP control messages. 52 54 */ ··· 1246 1244 return -ENOPROTOOPT; 1247 1245 1248 1246 err = do_ip_setsockopt(sk, level, optname, optval, optlen); 1247 + #ifdef CONFIG_BPFILTER 1248 + if (optname >= BPFILTER_IPT_SO_SET_REPLACE && 1249 + optname < BPFILTER_IPT_SET_MAX) 1250 + err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); 1251 + #endif 1249 1252 #ifdef CONFIG_NETFILTER 1250 1253 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1251 1254 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && ··· 1559 1552 int err; 1560 1553 1561 1554 err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); 1555 + #ifdef CONFIG_BPFILTER 1556 + if (optname >= BPFILTER_IPT_SO_GET_INFO && 1557 + optname < BPFILTER_IPT_GET_MAX) 1558 + err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); 1559 + #endif 1562 1560 #ifdef CONFIG_NETFILTER 1563 1561 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1564 1562 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && ··· 1596 1584 err = do_ip_getsockopt(sk, level, optname, optval, optlen, 1597 1585 MSG_CMSG_COMPAT); 1598 1586 1587 + #ifdef CONFIG_BPFILTER 1588 + if (optname >= BPFILTER_IPT_SO_GET_INFO && 1589 + optname < BPFILTER_IPT_GET_MAX) 1590 + err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); 1591 + #endif 1599 1592 #ifdef CONFIG_NETFILTER 1600 1593 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1601 1594 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&