bpf: introduce BPF_PROG_TEST_RUN command

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

development and testing of networking bpf programs is quite cumbersome.
Despite availability of user space bpf interpreters the kernel is
the ultimate authority and execution environment.
Current test frameworks for TC include creation of netns, veth,
qdiscs and use of various packet generators just to test functionality
of a bpf program. XDP testing is even more complicated, since
qemu needs to be started with gro/gso disabled and precise queue
configuration, transferring of xdp program from host into guest,
attaching to virtio/eth0 and generating traffic from the host
while capturing the results from the guest.

Moreover analyzing performance bottlenecks in XDP program is
impossible in virtio environment, since cost of running the program
is tiny comparing to the overhead of virtio packet processing,
so performance testing can only be done on physical nic
with another server generating traffic.

Furthermore ongoing changes to user space control plane of production
applications cannot be run on the test servers leaving bpf programs
stubbed out for testing.

Last but not least, the upstream llvm changes are validated by the bpf
backend testsuite which has no ability to test the code generated.

To improve this situation introduce BPF_PROG_TEST_RUN command
to test and performance benchmark bpf programs.

Joint work with Daniel Borkmann.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Alexei Starovoitov and committed by

David S. Miller 9 years ago 1cf1cae9 98cd1552

+223 -3

7 changed files

expand all

include

linux

bpf.h

uapi

linux

bpf.h

kernel

bpf

syscall.c

net

Makefile

bpf

Makefile

test_run.c

core

filter.c

include/linux/bpf.h

··· 169 169 const struct bpf_insn *src, 170 170 struct bpf_insn *dst, 171 171 struct bpf_prog *prog); 172 + int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, 173 + union bpf_attr __user *uattr); 172 174 }; 173 175 174 176 struct bpf_prog_type_list { ··· 234 232 235 233 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 236 234 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); 235 + 236 + int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, 237 + union bpf_attr __user *uattr); 238 + int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, 239 + union bpf_attr __user *uattr); 237 240 238 241 #ifdef CONFIG_BPF_SYSCALL 239 242 DECLARE_PER_CPU(int, bpf_prog_active);

+12

include/uapi/linux/bpf.h

··· 81 81 BPF_OBJ_GET, 82 82 BPF_PROG_ATTACH, 83 83 BPF_PROG_DETACH, 84 + BPF_PROG_TEST_RUN, 84 85 }; 85 86 86 87 enum bpf_map_type { ··· 190 189 __u32 attach_type; 191 190 __u32 attach_flags; 192 191 }; 192 + 193 + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ 194 + __u32 prog_fd; 195 + __u32 retval; 196 + __u32 data_size_in; 197 + __u32 data_size_out; 198 + __aligned_u64 data_in; 199 + __aligned_u64 data_out; 200 + __u32 repeat; 201 + __u32 duration; 202 + } test; 193 203 } __attribute__((aligned(8))); 194 204 195 205 /* BPF helper function descriptions:

+25 -2

kernel/bpf/syscall.c

··· 973 973 } 974 974 #endif /* CONFIG_CGROUP_BPF */ 975 975 976 + #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 977 + 978 + static int bpf_prog_test_run(const union bpf_attr *attr, 979 + union bpf_attr __user *uattr) 980 + { 981 + struct bpf_prog *prog; 982 + int ret = -ENOTSUPP; 983 + 984 + if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 985 + return -EINVAL; 986 + 987 + prog = bpf_prog_get(attr->test.prog_fd); 988 + if (IS_ERR(prog)) 989 + return PTR_ERR(prog); 990 + 991 + if (prog->aux->ops->test_run) 992 + ret = prog->aux->ops->test_run(prog, attr, uattr); 993 + 994 + bpf_prog_put(prog); 995 + return ret; 996 + } 997 + 976 998 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 977 999 { 978 1000 union bpf_attr attr = {}; ··· 1061 1039 case BPF_OBJ_GET: 1062 1040 err = bpf_obj_get(&attr); 1063 1041 break; 1064 - 1065 1042 #ifdef CONFIG_CGROUP_BPF 1066 1043 case BPF_PROG_ATTACH: 1067 1044 err = bpf_prog_attach(&attr); ··· 1069 1048 err = bpf_prog_detach(&attr); 1070 1049 break; 1071 1050 #endif 1072 - 1051 + case BPF_PROG_TEST_RUN: 1052 + err = bpf_prog_test_run(&attr, uattr); 1053 + break; 1073 1054 default: 1074 1055 err = -EINVAL; 1075 1056 break;

+1 -1

net/Makefile

··· 12 12 13 13 # LLC has to be linked before the files in net/802/ 14 14 obj-$(CONFIG_LLC) += llc/ 15 - obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ 15 + obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ 16 16 obj-$(CONFIG_NETFILTER) += netfilter/ 17 17 obj-$(CONFIG_INET) += ipv4/ 18 18 obj-$(CONFIG_XFRM) += xfrm/

net/bpf/Makefile

··· 1 + obj-y := test_run.o

+172

net/bpf/test_run.c

··· 1 + /* Copyright (c) 2017 Facebook 2 + * 3 + * This program is free software; you can redistribute it and/or 4 + * modify it under the terms of version 2 of the GNU General Public 5 + * License as published by the Free Software Foundation. 6 + */ 7 + #include <linux/bpf.h> 8 + #include <linux/slab.h> 9 + #include <linux/vmalloc.h> 10 + #include <linux/etherdevice.h> 11 + #include <linux/filter.h> 12 + #include <linux/sched/signal.h> 13 + 14 + static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx) 15 + { 16 + u32 ret; 17 + 18 + preempt_disable(); 19 + rcu_read_lock(); 20 + ret = BPF_PROG_RUN(prog, ctx); 21 + rcu_read_unlock(); 22 + preempt_enable(); 23 + 24 + return ret; 25 + } 26 + 27 + static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) 28 + { 29 + u64 time_start, time_spent = 0; 30 + u32 ret = 0, i; 31 + 32 + if (!repeat) 33 + repeat = 1; 34 + time_start = ktime_get_ns(); 35 + for (i = 0; i < repeat; i++) { 36 + ret = bpf_test_run_one(prog, ctx); 37 + if (need_resched()) { 38 + if (signal_pending(current)) 39 + break; 40 + time_spent += ktime_get_ns() - time_start; 41 + cond_resched(); 42 + time_start = ktime_get_ns(); 43 + } 44 + } 45 + time_spent += ktime_get_ns() - time_start; 46 + do_div(time_spent, repeat); 47 + *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; 48 + 49 + return ret; 50 + } 51 + 52 + static int bpf_test_finish(union bpf_attr __user *uattr, const void *data, 53 + u32 size, u32 retval, u32 duration) 54 + { 55 + void __user *data_out = u64_to_user_ptr(uattr->test.data_out); 56 + int err = -EFAULT; 57 + 58 + if (data_out && copy_to_user(data_out, data, size)) 59 + goto out; 60 + if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size))) 61 + goto out; 62 + if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) 63 + goto out; 64 + if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration))) 65 + goto out; 66 + err = 0; 67 + out: 68 + return err; 69 + } 70 + 71 + static void *bpf_test_init(const union bpf_attr *kattr, u32 size, 72 + u32 headroom, u32 tailroom) 73 + { 74 + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); 75 + void *data; 76 + 77 + if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) 78 + return ERR_PTR(-EINVAL); 79 + 80 + data = kzalloc(size + headroom + tailroom, GFP_USER); 81 + if (!data) 82 + return ERR_PTR(-ENOMEM); 83 + 84 + if (copy_from_user(data + headroom, data_in, size)) { 85 + kfree(data); 86 + return ERR_PTR(-EFAULT); 87 + } 88 + return data; 89 + } 90 + 91 + int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, 92 + union bpf_attr __user *uattr) 93 + { 94 + bool is_l2 = false, is_direct_pkt_access = false; 95 + u32 size = kattr->test.data_size_in; 96 + u32 repeat = kattr->test.repeat; 97 + u32 retval, duration; 98 + struct sk_buff *skb; 99 + void *data; 100 + int ret; 101 + 102 + data = bpf_test_init(kattr, size, NET_SKB_PAD, 103 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); 104 + if (IS_ERR(data)) 105 + return PTR_ERR(data); 106 + 107 + switch (prog->type) { 108 + case BPF_PROG_TYPE_SCHED_CLS: 109 + case BPF_PROG_TYPE_SCHED_ACT: 110 + is_l2 = true; 111 + /* fall through */ 112 + case BPF_PROG_TYPE_LWT_IN: 113 + case BPF_PROG_TYPE_LWT_OUT: 114 + case BPF_PROG_TYPE_LWT_XMIT: 115 + is_direct_pkt_access = true; 116 + break; 117 + default: 118 + break; 119 + } 120 + 121 + skb = build_skb(data, 0); 122 + if (!skb) { 123 + kfree(data); 124 + return -ENOMEM; 125 + } 126 + 127 + skb_reserve(skb, NET_SKB_PAD); 128 + __skb_put(skb, size); 129 + skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev); 130 + skb_reset_network_header(skb); 131 + 132 + if (is_l2) 133 + __skb_push(skb, ETH_HLEN); 134 + if (is_direct_pkt_access) 135 + bpf_compute_data_end(skb); 136 + retval = bpf_test_run(prog, skb, repeat, &duration); 137 + if (!is_l2) 138 + __skb_push(skb, ETH_HLEN); 139 + size = skb->len; 140 + /* bpf program can never convert linear skb to non-linear */ 141 + if (WARN_ON_ONCE(skb_is_nonlinear(skb))) 142 + size = skb_headlen(skb); 143 + ret = bpf_test_finish(uattr, skb->data, size, retval, duration); 144 + kfree_skb(skb); 145 + return ret; 146 + } 147 + 148 + int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, 149 + union bpf_attr __user *uattr) 150 + { 151 + u32 size = kattr->test.data_size_in; 152 + u32 repeat = kattr->test.repeat; 153 + struct xdp_buff xdp = {}; 154 + u32 retval, duration; 155 + void *data; 156 + int ret; 157 + 158 + data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM, 0); 159 + if (IS_ERR(data)) 160 + return PTR_ERR(data); 161 + 162 + xdp.data_hard_start = data; 163 + xdp.data = data + XDP_PACKET_HEADROOM; 164 + xdp.data_end = xdp.data + size; 165 + 166 + retval = bpf_test_run(prog, &xdp, repeat, &duration); 167 + if (xdp.data != data + XDP_PACKET_HEADROOM) 168 + size = xdp.data_end - xdp.data; 169 + ret = bpf_test_finish(uattr, xdp.data, size, retval, duration); 170 + kfree(data); 171 + return ret; 172 + }

net/core/filter.c

··· 3309 3309 .is_valid_access = tc_cls_act_is_valid_access, 3310 3310 .convert_ctx_access = tc_cls_act_convert_ctx_access, 3311 3311 .gen_prologue = tc_cls_act_prologue, 3312 + .test_run = bpf_prog_test_run_skb, 3312 3313 }; 3313 3314 3314 3315 static const struct bpf_verifier_ops xdp_ops = { 3315 3316 .get_func_proto = xdp_func_proto, 3316 3317 .is_valid_access = xdp_is_valid_access, 3317 3318 .convert_ctx_access = xdp_convert_ctx_access, 3319 + .test_run = bpf_prog_test_run_xdp, 3318 3320 }; 3319 3321 3320 3322 static const struct bpf_verifier_ops cg_skb_ops = { 3321 3323 .get_func_proto = cg_skb_func_proto, 3322 3324 .is_valid_access = sk_filter_is_valid_access, 3323 3325 .convert_ctx_access = bpf_convert_ctx_access, 3326 + .test_run = bpf_prog_test_run_skb, 3324 3327 }; 3325 3328 3326 3329 static const struct bpf_verifier_ops lwt_inout_ops = { 3327 3330 .get_func_proto = lwt_inout_func_proto, 3328 3331 .is_valid_access = lwt_is_valid_access, 3329 3332 .convert_ctx_access = bpf_convert_ctx_access, 3333 + .test_run = bpf_prog_test_run_skb, 3330 3334 }; 3331 3335 3332 3336 static const struct bpf_verifier_ops lwt_xmit_ops = { ··· 3338 3334 .is_valid_access = lwt_is_valid_access, 3339 3335 .convert_ctx_access = bpf_convert_ctx_access, 3340 3336 .gen_prologue = tc_cls_act_prologue, 3337 + .test_run = bpf_prog_test_run_skb, 3341 3338 }; 3342 3339 3343 3340 static const struct bpf_verifier_ops cg_sock_ops = {