Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: sock: allow eBPF programs to be attached to sockets

introduce new setsockopt() command:

setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, sizeof(prog_fd))

where prog_fd was received from syscall bpf(BPF_PROG_LOAD, attr, ...)
and attr->prog_type == BPF_PROG_TYPE_SOCKET_FILTER

setsockopt() calls bpf_prog_get() which increments refcnt of the program,
so it doesn't get unloaded while socket is using the program.

The same eBPF program can be attached to multiple sockets.

User task exit automatically closes socket which calls sk_filter_uncharge()
which decrements refcnt of eBPF program

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Alexei Starovoitov and committed by
David S. Miller
89aa0758 ddd872bc

+155 -2
+3
arch/alpha/include/uapi/asm/socket.h
··· 89 89 90 90 #define SO_INCOMING_CPU 49 91 91 92 + #define SO_ATTACH_BPF 50 93 + #define SO_DETACH_BPF SO_DETACH_FILTER 94 + 92 95 #endif /* _UAPI_ASM_SOCKET_H */
+3
arch/avr32/include/uapi/asm/socket.h
··· 82 82 83 83 #define SO_INCOMING_CPU 49 84 84 85 + #define SO_ATTACH_BPF 50 86 + #define SO_DETACH_BPF SO_DETACH_FILTER 87 + 85 88 #endif /* _UAPI__ASM_AVR32_SOCKET_H */
+3
arch/cris/include/uapi/asm/socket.h
··· 84 84 85 85 #define SO_INCOMING_CPU 49 86 86 87 + #define SO_ATTACH_BPF 50 88 + #define SO_DETACH_BPF SO_DETACH_FILTER 89 + 87 90 #endif /* _ASM_SOCKET_H */ 88 91 89 92
+3
arch/frv/include/uapi/asm/socket.h
··· 82 82 83 83 #define SO_INCOMING_CPU 49 84 84 85 + #define SO_ATTACH_BPF 50 86 + #define SO_DETACH_BPF SO_DETACH_FILTER 87 + 85 88 #endif /* _ASM_SOCKET_H */ 86 89
+3
arch/ia64/include/uapi/asm/socket.h
··· 91 91 92 92 #define SO_INCOMING_CPU 49 93 93 94 + #define SO_ATTACH_BPF 50 95 + #define SO_DETACH_BPF SO_DETACH_FILTER 96 + 94 97 #endif /* _ASM_IA64_SOCKET_H */
+3
arch/m32r/include/uapi/asm/socket.h
··· 82 82 83 83 #define SO_INCOMING_CPU 49 84 84 85 + #define SO_ATTACH_BPF 50 86 + #define SO_DETACH_BPF SO_DETACH_FILTER 87 + 85 88 #endif /* _ASM_M32R_SOCKET_H */
+3
arch/mips/include/uapi/asm/socket.h
··· 100 100 101 101 #define SO_INCOMING_CPU 49 102 102 103 + #define SO_ATTACH_BPF 50 104 + #define SO_DETACH_BPF SO_DETACH_FILTER 105 + 103 106 #endif /* _UAPI_ASM_SOCKET_H */
+3
arch/mn10300/include/uapi/asm/socket.h
··· 82 82 83 83 #define SO_INCOMING_CPU 49 84 84 85 + #define SO_ATTACH_BPF 50 86 + #define SO_DETACH_BPF SO_DETACH_FILTER 87 + 85 88 #endif /* _ASM_SOCKET_H */
+3
arch/parisc/include/uapi/asm/socket.h
··· 81 81 82 82 #define SO_INCOMING_CPU 0x402A 83 83 84 + #define SO_ATTACH_BPF 0x402B 85 + #define SO_DETACH_BPF SO_DETACH_FILTER 86 + 84 87 #endif /* _UAPI_ASM_SOCKET_H */
+3
arch/powerpc/include/uapi/asm/socket.h
··· 89 89 90 90 #define SO_INCOMING_CPU 49 91 91 92 + #define SO_ATTACH_BPF 50 93 + #define SO_DETACH_BPF SO_DETACH_FILTER 94 + 92 95 #endif /* _ASM_POWERPC_SOCKET_H */
+3
arch/s390/include/uapi/asm/socket.h
··· 88 88 89 89 #define SO_INCOMING_CPU 49 90 90 91 + #define SO_ATTACH_BPF 50 92 + #define SO_DETACH_BPF SO_DETACH_FILTER 93 + 91 94 #endif /* _ASM_SOCKET_H */
+3
arch/sparc/include/uapi/asm/socket.h
··· 78 78 79 79 #define SO_INCOMING_CPU 0x0033 80 80 81 + #define SO_ATTACH_BPF 0x0034 82 + #define SO_DETACH_BPF SO_DETACH_FILTER 83 + 81 84 /* Security levels - as per NRL IPv6 - don't actually do anything */ 82 85 #define SO_SECURITY_AUTHENTICATION 0x5001 83 86 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
+3
arch/xtensa/include/uapi/asm/socket.h
··· 93 93 94 94 #define SO_INCOMING_CPU 49 95 95 96 + #define SO_ATTACH_BPF 50 97 + #define SO_DETACH_BPF SO_DETACH_FILTER 98 + 96 99 #endif /* _XTENSA_SOCKET_H */
+4
include/linux/bpf.h
··· 128 128 struct work_struct work; 129 129 }; 130 130 131 + #ifdef CONFIG_BPF_SYSCALL 131 132 void bpf_prog_put(struct bpf_prog *prog); 133 + #else 134 + static inline void bpf_prog_put(struct bpf_prog *prog) {} 135 + #endif 132 136 struct bpf_prog *bpf_prog_get(u32 ufd); 133 137 /* verify correctness of eBPF program */ 134 138 int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
+1
include/linux/filter.h
··· 381 381 void bpf_prog_destroy(struct bpf_prog *fp); 382 382 383 383 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); 384 + int sk_attach_bpf(u32 ufd, struct sock *sk); 384 385 int sk_detach_filter(struct sock *sk); 385 386 386 387 int bpf_check_classic(const struct sock_filter *filter, unsigned int flen);
+3
include/uapi/asm-generic/socket.h
··· 84 84 85 85 #define SO_INCOMING_CPU 49 86 86 87 + #define SO_ATTACH_BPF 50 88 + #define SO_DETACH_BPF SO_DETACH_FILTER 89 + 87 90 #endif /* __ASM_GENERIC_SOCKET_H */
+95 -2
net/core/filter.c
··· 44 44 #include <linux/ratelimit.h> 45 45 #include <linux/seccomp.h> 46 46 #include <linux/if_vlan.h> 47 + #include <linux/bpf.h> 47 48 48 49 /** 49 50 * sk_filter - run a packet through a socket filter ··· 814 813 815 814 static void __bpf_prog_release(struct bpf_prog *prog) 816 815 { 817 - bpf_release_orig_filter(prog); 818 - bpf_prog_free(prog); 816 + if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { 817 + bpf_prog_put(prog); 818 + } else { 819 + bpf_release_orig_filter(prog); 820 + bpf_prog_free(prog); 821 + } 819 822 } 820 823 821 824 static void __sk_filter_release(struct sk_filter *fp) ··· 1093 1088 } 1094 1089 EXPORT_SYMBOL_GPL(sk_attach_filter); 1095 1090 1091 + #ifdef CONFIG_BPF_SYSCALL 1092 + int sk_attach_bpf(u32 ufd, struct sock *sk) 1093 + { 1094 + struct sk_filter *fp, *old_fp; 1095 + struct bpf_prog *prog; 1096 + 1097 + if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1098 + return -EPERM; 1099 + 1100 + prog = bpf_prog_get(ufd); 1101 + if (!prog) 1102 + return -EINVAL; 1103 + 1104 + if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { 1105 + /* valid fd, but invalid program type */ 1106 + bpf_prog_put(prog); 1107 + return -EINVAL; 1108 + } 1109 + 1110 + fp = kmalloc(sizeof(*fp), GFP_KERNEL); 1111 + if (!fp) { 1112 + bpf_prog_put(prog); 1113 + return -ENOMEM; 1114 + } 1115 + fp->prog = prog; 1116 + 1117 + atomic_set(&fp->refcnt, 0); 1118 + 1119 + if (!sk_filter_charge(sk, fp)) { 1120 + __sk_filter_release(fp); 1121 + return -ENOMEM; 1122 + } 1123 + 1124 + old_fp = rcu_dereference_protected(sk->sk_filter, 1125 + sock_owned_by_user(sk)); 1126 + rcu_assign_pointer(sk->sk_filter, fp); 1127 + 1128 + if (old_fp) 1129 + sk_filter_uncharge(sk, old_fp); 1130 + 1131 + return 0; 1132 + } 1133 + 1134 + /* allow socket filters to call 1135 + * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem() 1136 + */ 1137 + static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id) 1138 + { 1139 + switch (func_id) { 1140 + case BPF_FUNC_map_lookup_elem: 1141 + return &bpf_map_lookup_elem_proto; 1142 + case BPF_FUNC_map_update_elem: 1143 + return &bpf_map_update_elem_proto; 1144 + case BPF_FUNC_map_delete_elem: 1145 + return &bpf_map_delete_elem_proto; 1146 + default: 1147 + return NULL; 1148 + } 1149 + } 1150 + 1151 + static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type) 1152 + { 1153 + /* skb fields cannot be accessed yet */ 1154 + return false; 1155 + } 1156 + 1157 + static struct bpf_verifier_ops sock_filter_ops = { 1158 + .get_func_proto = sock_filter_func_proto, 1159 + .is_valid_access = sock_filter_is_valid_access, 1160 + }; 1161 + 1162 + static struct bpf_prog_type_list tl = { 1163 + .ops = &sock_filter_ops, 1164 + .type = BPF_PROG_TYPE_SOCKET_FILTER, 1165 + }; 1166 + 1167 + static int __init register_sock_filter_ops(void) 1168 + { 1169 + bpf_register_prog_type(&tl); 1170 + return 0; 1171 + } 1172 + late_initcall(register_sock_filter_ops); 1173 + #else 1174 + int sk_attach_bpf(u32 ufd, struct sock *sk) 1175 + { 1176 + return -EOPNOTSUPP; 1177 + } 1178 + #endif 1096 1179 int sk_detach_filter(struct sock *sk) 1097 1180 { 1098 1181 int ret = -ENOENT;
+13
net/core/sock.c
··· 888 888 } 889 889 break; 890 890 891 + case SO_ATTACH_BPF: 892 + ret = -EINVAL; 893 + if (optlen == sizeof(u32)) { 894 + u32 ufd; 895 + 896 + ret = -EFAULT; 897 + if (copy_from_user(&ufd, optval, sizeof(ufd))) 898 + break; 899 + 900 + ret = sk_attach_bpf(ufd, sk); 901 + } 902 + break; 903 + 891 904 case SO_DETACH_FILTER: 892 905 ret = sk_detach_filter(sk); 893 906 break;