Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'ebpf-next'

Daniel Borkmann says:

====================
This set adds native eBPF support also to act_bpf and thus covers tc
with eBPF in the classifier *and* action part.

A link to iproute2 preview has been provided in patch 2 and the code
will be pushed out after Stephen has processed the classifier part
and helper bits for tc.

This set depends on ced585c83b27 ("act_bpf: allow non-default TC_ACT
opcodes as BPF exec outcome"), so a net into net-next merge would be
required first. Hope that's fine by you, Dave. ;)
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+228 -83
+5 -1
include/net/tc_act/tc_bpf.h
··· 16 16 struct tcf_bpf { 17 17 struct tcf_common common; 18 18 struct bpf_prog *filter; 19 + union { 20 + u32 bpf_fd; 21 + u16 bpf_num_ops; 22 + }; 19 23 struct sock_filter *bpf_ops; 20 - u16 bpf_num_ops; 24 + const char *bpf_name; 21 25 }; 22 26 #define to_bpf(a) \ 23 27 container_of(a->priv, struct tcf_bpf, common)
+1
include/uapi/linux/bpf.h
··· 119 119 BPF_PROG_TYPE_UNSPEC, 120 120 BPF_PROG_TYPE_SOCKET_FILTER, 121 121 BPF_PROG_TYPE_SCHED_CLS, 122 + BPF_PROG_TYPE_SCHED_ACT, 122 123 }; 123 124 124 125 #define BPF_PSEUDO_MAP_FD 1
+2
include/uapi/linux/tc_act/tc_bpf.h
··· 24 24 TCA_ACT_BPF_PARMS, 25 25 TCA_ACT_BPF_OPS_LEN, 26 26 TCA_ACT_BPF_OPS, 27 + TCA_ACT_BPF_FD, 28 + TCA_ACT_BPF_NAME, 27 29 __TCA_ACT_BPF_MAX, 28 30 }; 29 31 #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
+1
kernel/bpf/verifier.c
··· 1180 1180 switch (type) { 1181 1181 case BPF_PROG_TYPE_SOCKET_FILTER: 1182 1182 case BPF_PROG_TYPE_SCHED_CLS: 1183 + case BPF_PROG_TYPE_SCHED_ACT: 1183 1184 return true; 1184 1185 default: 1185 1186 return false;
+6
net/core/filter.c
··· 1263 1263 .type = BPF_PROG_TYPE_SCHED_CLS, 1264 1264 }; 1265 1265 1266 + static struct bpf_prog_type_list sched_act_type __read_mostly = { 1267 + .ops = &sk_filter_ops, 1268 + .type = BPF_PROG_TYPE_SCHED_ACT, 1269 + }; 1270 + 1266 1271 static int __init register_sk_filter_ops(void) 1267 1272 { 1268 1273 bpf_register_prog_type(&sk_filter_type); 1269 1274 bpf_register_prog_type(&sched_cls_type); 1275 + bpf_register_prog_type(&sched_act_type); 1270 1276 1271 1277 return 0; 1272 1278 }
+213 -82
net/sched/act_bpf.c
··· 13 13 #include <linux/skbuff.h> 14 14 #include <linux/rtnetlink.h> 15 15 #include <linux/filter.h> 16 + #include <linux/bpf.h> 17 + 16 18 #include <net/netlink.h> 17 19 #include <net/pkt_sched.h> 18 20 19 21 #include <linux/tc_act/tc_bpf.h> 20 22 #include <net/tc_act/tc_bpf.h> 21 23 22 - #define BPF_TAB_MASK 15 24 + #define BPF_TAB_MASK 15 25 + #define ACT_BPF_NAME_LEN 256 23 26 24 - static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, 27 + struct tcf_bpf_cfg { 28 + struct bpf_prog *filter; 29 + struct sock_filter *bpf_ops; 30 + char *bpf_name; 31 + u32 bpf_fd; 32 + u16 bpf_num_ops; 33 + }; 34 + 35 + static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, 25 36 struct tcf_result *res) 26 37 { 27 - struct tcf_bpf *b = a->priv; 38 + struct tcf_bpf *prog = act->priv; 28 39 int action, filter_res; 29 40 30 - spin_lock(&b->tcf_lock); 41 + spin_lock(&prog->tcf_lock); 31 42 32 - b->tcf_tm.lastuse = jiffies; 33 - bstats_update(&b->tcf_bstats, skb); 43 + prog->tcf_tm.lastuse = jiffies; 44 + bstats_update(&prog->tcf_bstats, skb); 34 45 35 - filter_res = BPF_PROG_RUN(b->filter, skb); 46 + /* Needed here for accessing maps. */ 47 + rcu_read_lock(); 48 + filter_res = BPF_PROG_RUN(prog->filter, skb); 49 + rcu_read_unlock(); 36 50 37 51 /* A BPF program may overwrite the default action opcode. 38 52 * Similarly as in cls_bpf, if filter_res == -1 we use the ··· 66 52 break; 67 53 case TC_ACT_SHOT: 68 54 action = filter_res; 69 - b->tcf_qstats.drops++; 55 + prog->tcf_qstats.drops++; 70 56 break; 71 57 case TC_ACT_UNSPEC: 72 - action = b->tcf_action; 58 + action = prog->tcf_action; 73 59 break; 74 60 default: 75 61 action = TC_ACT_UNSPEC; 76 62 break; 77 63 } 78 64 79 - spin_unlock(&b->tcf_lock); 65 + spin_unlock(&prog->tcf_lock); 80 66 return action; 81 67 } 82 68 83 - static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a, 69 + static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog) 70 + { 71 + return !prog->bpf_ops; 72 + } 73 + 74 + static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog, 75 + struct sk_buff *skb) 76 + { 77 + struct nlattr *nla; 78 + 79 + if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops)) 80 + return -EMSGSIZE; 81 + 82 + nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops * 83 + sizeof(struct sock_filter)); 84 + if (nla == NULL) 85 + return -EMSGSIZE; 86 + 87 + memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); 88 + 89 + return 0; 90 + } 91 + 92 + static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, 93 + struct sk_buff *skb) 94 + { 95 + if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd)) 96 + return -EMSGSIZE; 97 + 98 + if (prog->bpf_name && 99 + nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) 100 + return -EMSGSIZE; 101 + 102 + return 0; 103 + } 104 + 105 + static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, 84 106 int bind, int ref) 85 107 { 86 108 unsigned char *tp = skb_tail_pointer(skb); 87 - struct tcf_bpf *b = a->priv; 109 + struct tcf_bpf *prog = act->priv; 88 110 struct tc_act_bpf opt = { 89 - .index = b->tcf_index, 90 - .refcnt = b->tcf_refcnt - ref, 91 - .bindcnt = b->tcf_bindcnt - bind, 92 - .action = b->tcf_action, 111 + .index = prog->tcf_index, 112 + .refcnt = prog->tcf_refcnt - ref, 113 + .bindcnt = prog->tcf_bindcnt - bind, 114 + .action = prog->tcf_action, 93 115 }; 94 - struct tcf_t t; 95 - struct nlattr *nla; 116 + struct tcf_t tm; 117 + int ret; 96 118 97 119 if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) 98 120 goto nla_put_failure; 99 121 100 - if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops)) 122 + if (tcf_bpf_is_ebpf(prog)) 123 + ret = tcf_bpf_dump_ebpf_info(prog, skb); 124 + else 125 + ret = tcf_bpf_dump_bpf_info(prog, skb); 126 + if (ret) 101 127 goto nla_put_failure; 102 128 103 - nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops * 104 - sizeof(struct sock_filter)); 105 - if (!nla) 129 + tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install); 130 + tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse); 131 + tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires); 132 + 133 + if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm)) 106 134 goto nla_put_failure; 107 135 108 - memcpy(nla_data(nla), b->bpf_ops, nla_len(nla)); 109 - 110 - t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install); 111 - t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse); 112 - t.expires = jiffies_to_clock_t(b->tcf_tm.expires); 113 - if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t)) 114 - goto nla_put_failure; 115 136 return skb->len; 116 137 117 138 nla_put_failure: ··· 156 107 157 108 static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { 158 109 [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, 110 + [TCA_ACT_BPF_FD] = { .type = NLA_U32 }, 111 + [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN }, 159 112 [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, 160 113 [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, 161 114 .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, 162 115 }; 163 116 164 - static int tcf_bpf_init(struct net *net, struct nlattr *nla, 165 - struct nlattr *est, struct tc_action *a, 166 - int ovr, int bind) 117 + static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) 167 118 { 168 - struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; 169 - struct tc_act_bpf *parm; 170 - struct tcf_bpf *b; 171 - u16 bpf_size, bpf_num_ops; 172 119 struct sock_filter *bpf_ops; 173 - struct sock_fprog_kern tmp; 120 + struct sock_fprog_kern fprog_tmp; 174 121 struct bpf_prog *fp; 122 + u16 bpf_size, bpf_num_ops; 175 123 int ret; 176 - 177 - if (!nla) 178 - return -EINVAL; 179 - 180 - ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); 181 - if (ret < 0) 182 - return ret; 183 - 184 - if (!tb[TCA_ACT_BPF_PARMS] || 185 - !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS]) 186 - return -EINVAL; 187 - parm = nla_data(tb[TCA_ACT_BPF_PARMS]); 188 124 189 125 bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); 190 126 if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) ··· 180 146 return -EINVAL; 181 147 182 148 bpf_ops = kzalloc(bpf_size, GFP_KERNEL); 183 - if (!bpf_ops) 149 + if (bpf_ops == NULL) 184 150 return -ENOMEM; 185 151 186 152 memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); 187 153 188 - tmp.len = bpf_num_ops; 189 - tmp.filter = bpf_ops; 154 + fprog_tmp.len = bpf_num_ops; 155 + fprog_tmp.filter = bpf_ops; 190 156 191 - ret = bpf_prog_create(&fp, &tmp); 192 - if (ret) 193 - goto free_bpf_ops; 157 + ret = bpf_prog_create(&fp, &fprog_tmp); 158 + if (ret < 0) { 159 + kfree(bpf_ops); 160 + return ret; 161 + } 194 162 195 - if (!tcf_hash_check(parm->index, a, bind)) { 196 - ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind); 197 - if (ret) 163 + cfg->bpf_ops = bpf_ops; 164 + cfg->bpf_num_ops = bpf_num_ops; 165 + cfg->filter = fp; 166 + 167 + return 0; 168 + } 169 + 170 + static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) 171 + { 172 + struct bpf_prog *fp; 173 + char *name = NULL; 174 + u32 bpf_fd; 175 + 176 + bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); 177 + 178 + fp = bpf_prog_get(bpf_fd); 179 + if (IS_ERR(fp)) 180 + return PTR_ERR(fp); 181 + 182 + if (fp->type != BPF_PROG_TYPE_SCHED_ACT) { 183 + bpf_prog_put(fp); 184 + return -EINVAL; 185 + } 186 + 187 + if (tb[TCA_ACT_BPF_NAME]) { 188 + name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), 189 + nla_len(tb[TCA_ACT_BPF_NAME]), 190 + GFP_KERNEL); 191 + if (!name) { 192 + bpf_prog_put(fp); 193 + return -ENOMEM; 194 + } 195 + } 196 + 197 + cfg->bpf_fd = bpf_fd; 198 + cfg->bpf_name = name; 199 + cfg->filter = fp; 200 + 201 + return 0; 202 + } 203 + 204 + static int tcf_bpf_init(struct net *net, struct nlattr *nla, 205 + struct nlattr *est, struct tc_action *act, 206 + int replace, int bind) 207 + { 208 + struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; 209 + struct tc_act_bpf *parm; 210 + struct tcf_bpf *prog; 211 + struct tcf_bpf_cfg cfg; 212 + bool is_bpf, is_ebpf; 213 + int ret; 214 + 215 + if (!nla) 216 + return -EINVAL; 217 + 218 + ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); 219 + if (ret < 0) 220 + return ret; 221 + 222 + is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; 223 + is_ebpf = tb[TCA_ACT_BPF_FD]; 224 + 225 + if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) || 226 + !tb[TCA_ACT_BPF_PARMS]) 227 + return -EINVAL; 228 + 229 + parm = nla_data(tb[TCA_ACT_BPF_PARMS]); 230 + 231 + memset(&cfg, 0, sizeof(cfg)); 232 + 233 + ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : 234 + tcf_bpf_init_from_efd(tb, &cfg); 235 + if (ret < 0) 236 + return ret; 237 + 238 + if (!tcf_hash_check(parm->index, act, bind)) { 239 + ret = tcf_hash_create(parm->index, est, act, 240 + sizeof(*prog), bind); 241 + if (ret < 0) 198 242 goto destroy_fp; 199 243 200 244 ret = ACT_P_CREATED; 201 245 } else { 246 + /* Don't override defaults. */ 202 247 if (bind) 203 248 goto destroy_fp; 204 - tcf_hash_release(a, bind); 205 - if (!ovr) { 249 + 250 + tcf_hash_release(act, bind); 251 + if (!replace) { 206 252 ret = -EEXIST; 207 253 goto destroy_fp; 208 254 } 209 255 } 210 256 211 - b = to_bpf(a); 212 - spin_lock_bh(&b->tcf_lock); 213 - b->tcf_action = parm->action; 214 - b->bpf_num_ops = bpf_num_ops; 215 - b->bpf_ops = bpf_ops; 216 - b->filter = fp; 217 - spin_unlock_bh(&b->tcf_lock); 257 + prog = to_bpf(act); 258 + spin_lock_bh(&prog->tcf_lock); 259 + 260 + prog->bpf_ops = cfg.bpf_ops; 261 + prog->bpf_name = cfg.bpf_name; 262 + 263 + if (cfg.bpf_num_ops) 264 + prog->bpf_num_ops = cfg.bpf_num_ops; 265 + if (cfg.bpf_fd) 266 + prog->bpf_fd = cfg.bpf_fd; 267 + 268 + prog->tcf_action = parm->action; 269 + prog->filter = cfg.filter; 270 + 271 + spin_unlock_bh(&prog->tcf_lock); 218 272 219 273 if (ret == ACT_P_CREATED) 220 - tcf_hash_insert(a); 274 + tcf_hash_insert(act); 275 + 221 276 return ret; 222 277 223 278 destroy_fp: 224 - bpf_prog_destroy(fp); 225 - free_bpf_ops: 226 - kfree(bpf_ops); 279 + if (is_ebpf) 280 + bpf_prog_put(cfg.filter); 281 + else 282 + bpf_prog_destroy(cfg.filter); 283 + 284 + kfree(cfg.bpf_ops); 285 + kfree(cfg.bpf_name); 286 + 227 287 return ret; 228 288 } 229 289 230 - static void tcf_bpf_cleanup(struct tc_action *a, int bind) 290 + static void tcf_bpf_cleanup(struct tc_action *act, int bind) 231 291 { 232 - struct tcf_bpf *b = a->priv; 292 + const struct tcf_bpf *prog = act->priv; 233 293 234 - bpf_prog_destroy(b->filter); 294 + if (tcf_bpf_is_ebpf(prog)) 295 + bpf_prog_put(prog->filter); 296 + else 297 + bpf_prog_destroy(prog->filter); 235 298 } 236 299 237 - static struct tc_action_ops act_bpf_ops = { 238 - .kind = "bpf", 239 - .type = TCA_ACT_BPF, 240 - .owner = THIS_MODULE, 241 - .act = tcf_bpf, 242 - .dump = tcf_bpf_dump, 243 - .cleanup = tcf_bpf_cleanup, 244 - .init = tcf_bpf_init, 300 + static struct tc_action_ops act_bpf_ops __read_mostly = { 301 + .kind = "bpf", 302 + .type = TCA_ACT_BPF, 303 + .owner = THIS_MODULE, 304 + .act = tcf_bpf, 305 + .dump = tcf_bpf_dump, 306 + .cleanup = tcf_bpf_cleanup, 307 + .init = tcf_bpf_init, 245 308 }; 246 309 247 310 static int __init bpf_init_module(void)