Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2020-06-30

The following pull-request contains BPF updates for your *net* tree.

We've added 28 non-merge commits during the last 9 day(s) which contain
a total of 35 files changed, 486 insertions(+), 232 deletions(-).

The main changes are:

1) Fix an incorrect verifier branch elimination for PTR_TO_BTF_ID pointer
types, from Yonghong Song.

2) Fix UAPI for sockmap and flow_dissector progs that were ignoring various
arguments passed to BPF_PROG_{ATTACH,DETACH}, from Lorenz Bauer & Jakub Sitnicki.

3) Fix broken AF_XDP DMA hacks that are poking into dma-direct and swiotlb
internals and integrate it properly into DMA core, from Christoph Hellwig.

4) Fix RCU splat from recent changes to avoid skipping ingress policy when
kTLS is enabled, from John Fastabend.

5) Fix BPF ringbuf map to enforce size to be the power of 2 in order for its
position masking to work, from Andrii Nakryiko.

6) Fix regression from CAP_BPF work to re-allow CAP_SYS_ADMIN for loading
of network programs, from Maciej Żenczykowski.

7) Fix libbpf section name prefix for devmap progs, from Jesper Dangaard Brouer.

8) Fix formatting in UAPI documentation for BPF helpers, from Quentin Monnet.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+486 -232
+8
Documentation/core-api/dma-api.rst
··· 206 206 207 207 :: 208 208 209 + bool 210 + dma_need_sync(struct device *dev, dma_addr_t dma_addr); 211 + 212 + Returns %true if dma_sync_single_for_{device,cpu} calls are required to 213 + transfer memory ownership. Returns %false if those calls can be skipped. 214 + 215 + :: 216 + 209 217 unsigned long 210 218 dma_get_merge_boundary(struct device *dev); 211 219
+3 -2
include/linux/bpf-netns.h
··· 33 33 union bpf_attr __user *uattr); 34 34 int netns_bpf_prog_attach(const union bpf_attr *attr, 35 35 struct bpf_prog *prog); 36 - int netns_bpf_prog_detach(const union bpf_attr *attr); 36 + int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); 37 37 int netns_bpf_link_create(const union bpf_attr *attr, 38 38 struct bpf_prog *prog); 39 39 #else ··· 49 49 return -EOPNOTSUPP; 50 50 } 51 51 52 - static inline int netns_bpf_prog_detach(const union bpf_attr *attr) 52 + static inline int netns_bpf_prog_detach(const union bpf_attr *attr, 53 + enum bpf_prog_type ptype) 53 54 { 54 55 return -EOPNOTSUPP; 55 56 }
+11 -2
include/linux/bpf.h
··· 1543 1543 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ 1544 1544 1545 1545 #if defined(CONFIG_BPF_STREAM_PARSER) 1546 - int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); 1546 + int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, 1547 + struct bpf_prog *old, u32 which); 1547 1548 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); 1549 + int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); 1548 1550 void sock_map_unhash(struct sock *sk); 1549 1551 void sock_map_close(struct sock *sk, long timeout); 1550 1552 #else 1551 1553 static inline int sock_map_prog_update(struct bpf_map *map, 1552 - struct bpf_prog *prog, u32 which) 1554 + struct bpf_prog *prog, 1555 + struct bpf_prog *old, u32 which) 1553 1556 { 1554 1557 return -EOPNOTSUPP; 1555 1558 } ··· 1561 1558 struct bpf_prog *prog) 1562 1559 { 1563 1560 return -EINVAL; 1561 + } 1562 + 1563 + static inline int sock_map_prog_detach(const union bpf_attr *attr, 1564 + enum bpf_prog_type ptype) 1565 + { 1566 + return -EOPNOTSUPP; 1564 1567 } 1565 1568 #endif /* CONFIG_BPF_STREAM_PARSER */ 1566 1569
+5
include/linux/btf.h
··· 82 82 return BTF_INFO_KIND(t->info) == BTF_KIND_INT; 83 83 } 84 84 85 + static inline bool btf_type_is_small_int(const struct btf_type *t) 86 + { 87 + return btf_type_is_int(t) && t->size <= sizeof(u64); 88 + } 89 + 85 90 static inline bool btf_type_is_enum(const struct btf_type *t) 86 91 { 87 92 return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM;
+1
include/linux/dma-direct.h
··· 87 87 void *cpu_addr, dma_addr_t dma_addr, size_t size, 88 88 unsigned long attrs); 89 89 int dma_direct_supported(struct device *dev, u64 mask); 90 + bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); 90 91 #endif /* _LINUX_DMA_DIRECT_H */
+5
include/linux/dma-mapping.h
··· 461 461 int dma_set_coherent_mask(struct device *dev, u64 mask); 462 462 u64 dma_get_required_mask(struct device *dev); 463 463 size_t dma_max_mapping_size(struct device *dev); 464 + bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); 464 465 unsigned long dma_get_merge_boundary(struct device *dev); 465 466 #else /* CONFIG_HAS_DMA */ 466 467 static inline dma_addr_t dma_map_page_attrs(struct device *dev, ··· 571 570 static inline size_t dma_max_mapping_size(struct device *dev) 572 571 { 573 572 return 0; 573 + } 574 + static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) 575 + { 576 + return false; 574 577 } 575 578 static inline unsigned long dma_get_merge_boundary(struct device *dev) 576 579 {
+13
include/linux/skmsg.h
··· 430 430 bpf_prog_put(prog); 431 431 } 432 432 433 + static inline int psock_replace_prog(struct bpf_prog **pprog, 434 + struct bpf_prog *prog, 435 + struct bpf_prog *old) 436 + { 437 + if (cmpxchg(pprog, old, prog) != old) 438 + return -ENOENT; 439 + 440 + if (old) 441 + bpf_prog_put(old); 442 + 443 + return 0; 444 + } 445 + 433 446 static inline void psock_progs_drop(struct sk_psock_progs *progs) 434 447 { 435 448 psock_set_prog(&progs->msg_parser, NULL);
+2 -1
include/net/flow_dissector.h
··· 372 372 } 373 373 374 374 #ifdef CONFIG_BPF_SYSCALL 375 - int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog); 375 + int flow_dissector_bpf_prog_attach_check(struct net *net, 376 + struct bpf_prog *prog); 376 377 #endif /* CONFIG_BPF_SYSCALL */ 377 378 378 379 #endif
+5 -2
include/net/netns/bpf.h
··· 9 9 #include <linux/bpf-netns.h> 10 10 11 11 struct bpf_prog; 12 + struct bpf_prog_array; 12 13 13 14 struct netns_bpf { 14 - struct bpf_prog __rcu *progs[MAX_NETNS_BPF_ATTACH_TYPE]; 15 - struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE]; 15 + /* Array of programs to run compiled from progs or links */ 16 + struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; 17 + struct bpf_prog *progs[MAX_NETNS_BPF_ATTACH_TYPE]; 18 + struct list_head links[MAX_NETNS_BPF_ATTACH_TYPE]; 16 19 }; 17 20 18 21 #endif /* __NETNS_BPF_H__ */
+3 -3
include/net/xsk_buff_pool.h
··· 40 40 u32 headroom; 41 41 u32 chunk_size; 42 42 u32 frame_len; 43 - bool cheap_dma; 43 + bool dma_need_sync; 44 44 bool unaligned; 45 45 void *addrs; 46 46 struct device *dev; ··· 80 80 void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb); 81 81 static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb) 82 82 { 83 - if (xskb->pool->cheap_dma) 83 + if (!xskb->pool->dma_need_sync) 84 84 return; 85 85 86 86 xp_dma_sync_for_cpu_slow(xskb); ··· 91 91 static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool, 92 92 dma_addr_t dma, size_t size) 93 93 { 94 - if (pool->cheap_dma) 94 + if (!pool->dma_need_sync) 95 95 return; 96 96 97 97 xp_dma_sync_for_device_slow(pool, dma, size);
+21 -20
include/uapi/linux/bpf.h
··· 3171 3171 * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) 3172 3172 * Description 3173 3173 * Copy *size* bytes from *data* into a ring buffer *ringbuf*. 3174 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3175 - * new data availability is sent. 3176 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3177 - * new data availability is sent unconditionally. 3174 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3175 + * of new data availability is sent. 3176 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3177 + * of new data availability is sent unconditionally. 3178 3178 * Return 3179 - * 0, on success; 3180 - * < 0, on error. 3179 + * 0 on success, or a negative error in case of failure. 3181 3180 * 3182 3181 * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) 3183 3182 * Description ··· 3188 3189 * void bpf_ringbuf_submit(void *data, u64 flags) 3189 3190 * Description 3190 3191 * Submit reserved ring buffer sample, pointed to by *data*. 3191 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3192 - * new data availability is sent. 3193 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3194 - * new data availability is sent unconditionally. 3192 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3193 + * of new data availability is sent. 3194 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3195 + * of new data availability is sent unconditionally. 3195 3196 * Return 3196 3197 * Nothing. Always succeeds. 3197 3198 * 3198 3199 * void bpf_ringbuf_discard(void *data, u64 flags) 3199 3200 * Description 3200 3201 * Discard reserved ring buffer sample, pointed to by *data*. 3201 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3202 - * new data availability is sent. 3203 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3204 - * new data availability is sent unconditionally. 3202 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3203 + * of new data availability is sent. 3204 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3205 + * of new data availability is sent unconditionally. 3205 3206 * Return 3206 3207 * Nothing. Always succeeds. 3207 3208 * ··· 3209 3210 * Description 3210 3211 * Query various characteristics of provided ring buffer. What 3211 3212 * exactly is queries is determined by *flags*: 3212 - * - BPF_RB_AVAIL_DATA - amount of data not yet consumed; 3213 - * - BPF_RB_RING_SIZE - the size of ring buffer; 3214 - * - BPF_RB_CONS_POS - consumer position (can wrap around); 3215 - * - BPF_RB_PROD_POS - producer(s) position (can wrap around); 3216 - * Data returned is just a momentary snapshots of actual values 3213 + * 3214 + * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. 3215 + * * **BPF_RB_RING_SIZE**: The size of ring buffer. 3216 + * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). 3217 + * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). 3218 + * 3219 + * Data returned is just a momentary snapshot of actual values 3217 3220 * and could be inaccurate, so this facility should be used to 3218 3221 * power heuristics and for reporting, not to make 100% correct 3219 3222 * calculation. 3220 3223 * Return 3221 - * Requested value, or 0, if flags are not recognized. 3224 + * Requested value, or 0, if *flags* are not recognized. 3222 3225 * 3223 3226 * int bpf_csum_level(struct sk_buff *skb, u64 level) 3224 3227 * Description
+2 -2
kernel/bpf/btf.c
··· 3746 3746 return false; 3747 3747 3748 3748 t = btf_type_skip_modifiers(btf, t->type, NULL); 3749 - if (!btf_type_is_int(t)) { 3749 + if (!btf_type_is_small_int(t)) { 3750 3750 bpf_log(log, 3751 3751 "ret type %s not allowed for fmod_ret\n", 3752 3752 btf_kind_str[BTF_INFO_KIND(t->info)]); ··· 3768 3768 /* skip modifiers */ 3769 3769 while (btf_type_is_modifier(t)) 3770 3770 t = btf_type_by_id(btf, t->type); 3771 - if (btf_type_is_int(t) || btf_type_is_enum(t)) 3771 + if (btf_type_is_small_int(t) || btf_type_is_enum(t)) 3772 3772 /* accessing a scalar */ 3773 3773 return true; 3774 3774 if (!btf_type_is_ptr(t)) {
+134 -60
kernel/bpf/net_namespace.c
··· 19 19 * with netns_bpf_mutex held. 20 20 */ 21 21 struct net *net; 22 + struct list_head node; /* node in list of links attached to net */ 22 23 }; 23 24 24 25 /* Protects updates to netns_bpf */ 25 26 DEFINE_MUTEX(netns_bpf_mutex); 26 27 27 28 /* Must be called with netns_bpf_mutex held. */ 28 - static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link) 29 + static void netns_bpf_run_array_detach(struct net *net, 30 + enum netns_bpf_attach_type type) 29 31 { 30 - struct bpf_netns_link *net_link = 31 - container_of(link, struct bpf_netns_link, link); 32 + struct bpf_prog_array *run_array; 32 33 33 - net_link->net = NULL; 34 + run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL, 35 + lockdep_is_held(&netns_bpf_mutex)); 36 + bpf_prog_array_free(run_array); 34 37 } 35 38 36 39 static void bpf_netns_link_release(struct bpf_link *link) ··· 43 40 enum netns_bpf_attach_type type = net_link->netns_type; 44 41 struct net *net; 45 42 46 - /* Link auto-detached by dying netns. */ 47 - if (!net_link->net) 48 - return; 49 - 50 43 mutex_lock(&netns_bpf_mutex); 51 44 52 - /* Recheck after potential sleep. We can race with cleanup_net 53 - * here, but if we see a non-NULL struct net pointer pre_exit 54 - * has not happened yet and will block on netns_bpf_mutex. 45 + /* We can race with cleanup_net, but if we see a non-NULL 46 + * struct net pointer, pre_exit has not run yet and wait for 47 + * netns_bpf_mutex. 55 48 */ 56 49 net = net_link->net; 57 50 if (!net) 58 51 goto out_unlock; 59 52 60 - net->bpf.links[type] = NULL; 61 - RCU_INIT_POINTER(net->bpf.progs[type], NULL); 53 + netns_bpf_run_array_detach(net, type); 54 + list_del(&net_link->node); 62 55 63 56 out_unlock: 64 57 mutex_unlock(&netns_bpf_mutex); ··· 75 76 struct bpf_netns_link *net_link = 76 77 container_of(link, struct bpf_netns_link, link); 77 78 enum netns_bpf_attach_type type = net_link->netns_type; 79 + struct bpf_prog_array *run_array; 78 80 struct net *net; 79 81 int ret = 0; 80 82 ··· 93 93 goto out_unlock; 94 94 } 95 95 96 + run_array = rcu_dereference_protected(net->bpf.run_array[type], 97 + lockdep_is_held(&netns_bpf_mutex)); 98 + WRITE_ONCE(run_array->items[0].prog, new_prog); 99 + 96 100 old_prog = xchg(&link->prog, new_prog); 97 - rcu_assign_pointer(net->bpf.progs[type], new_prog); 98 101 bpf_prog_put(old_prog); 99 102 100 103 out_unlock: ··· 145 142 .show_fdinfo = bpf_netns_link_show_fdinfo, 146 143 }; 147 144 145 + /* Must be called with netns_bpf_mutex held. */ 146 + static int __netns_bpf_prog_query(const union bpf_attr *attr, 147 + union bpf_attr __user *uattr, 148 + struct net *net, 149 + enum netns_bpf_attach_type type) 150 + { 151 + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 152 + struct bpf_prog_array *run_array; 153 + u32 prog_cnt = 0, flags = 0; 154 + 155 + run_array = rcu_dereference_protected(net->bpf.run_array[type], 156 + lockdep_is_held(&netns_bpf_mutex)); 157 + if (run_array) 158 + prog_cnt = bpf_prog_array_length(run_array); 159 + 160 + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 161 + return -EFAULT; 162 + if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) 163 + return -EFAULT; 164 + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) 165 + return 0; 166 + 167 + return bpf_prog_array_copy_to_user(run_array, prog_ids, 168 + attr->query.prog_cnt); 169 + } 170 + 148 171 int netns_bpf_prog_query(const union bpf_attr *attr, 149 172 union bpf_attr __user *uattr) 150 173 { 151 - __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 152 - u32 prog_id, prog_cnt = 0, flags = 0; 153 174 enum netns_bpf_attach_type type; 154 - struct bpf_prog *attached; 155 175 struct net *net; 176 + int ret; 156 177 157 178 if (attr->query.query_flags) 158 179 return -EINVAL; ··· 189 162 if (IS_ERR(net)) 190 163 return PTR_ERR(net); 191 164 192 - rcu_read_lock(); 193 - attached = rcu_dereference(net->bpf.progs[type]); 194 - if (attached) { 195 - prog_cnt = 1; 196 - prog_id = attached->aux->id; 197 - } 198 - rcu_read_unlock(); 165 + mutex_lock(&netns_bpf_mutex); 166 + ret = __netns_bpf_prog_query(attr, uattr, net, type); 167 + mutex_unlock(&netns_bpf_mutex); 199 168 200 169 put_net(net); 201 - 202 - if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 203 - return -EFAULT; 204 - if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) 205 - return -EFAULT; 206 - 207 - if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) 208 - return 0; 209 - 210 - if (copy_to_user(prog_ids, &prog_id, sizeof(u32))) 211 - return -EFAULT; 212 - 213 - return 0; 170 + return ret; 214 171 } 215 172 216 173 int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) 217 174 { 175 + struct bpf_prog_array *run_array; 218 176 enum netns_bpf_attach_type type; 177 + struct bpf_prog *attached; 219 178 struct net *net; 220 179 int ret; 180 + 181 + if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd) 182 + return -EINVAL; 221 183 222 184 type = to_netns_bpf_attach_type(attr->attach_type); 223 185 if (type < 0) ··· 216 200 mutex_lock(&netns_bpf_mutex); 217 201 218 202 /* Attaching prog directly is not compatible with links */ 219 - if (net->bpf.links[type]) { 203 + if (!list_empty(&net->bpf.links[type])) { 220 204 ret = -EEXIST; 221 205 goto out_unlock; 222 206 } 223 207 224 208 switch (type) { 225 209 case NETNS_BPF_FLOW_DISSECTOR: 226 - ret = flow_dissector_bpf_prog_attach(net, prog); 210 + ret = flow_dissector_bpf_prog_attach_check(net, prog); 227 211 break; 228 212 default: 229 213 ret = -EINVAL; 230 214 break; 231 215 } 216 + if (ret) 217 + goto out_unlock; 218 + 219 + attached = net->bpf.progs[type]; 220 + if (attached == prog) { 221 + /* The same program cannot be attached twice */ 222 + ret = -EINVAL; 223 + goto out_unlock; 224 + } 225 + 226 + run_array = rcu_dereference_protected(net->bpf.run_array[type], 227 + lockdep_is_held(&netns_bpf_mutex)); 228 + if (run_array) { 229 + WRITE_ONCE(run_array->items[0].prog, prog); 230 + } else { 231 + run_array = bpf_prog_array_alloc(1, GFP_KERNEL); 232 + if (!run_array) { 233 + ret = -ENOMEM; 234 + goto out_unlock; 235 + } 236 + run_array->items[0].prog = prog; 237 + rcu_assign_pointer(net->bpf.run_array[type], run_array); 238 + } 239 + 240 + net->bpf.progs[type] = prog; 241 + if (attached) 242 + bpf_prog_put(attached); 243 + 232 244 out_unlock: 233 245 mutex_unlock(&netns_bpf_mutex); 234 246 ··· 265 221 266 222 /* Must be called with netns_bpf_mutex held. */ 267 223 static int __netns_bpf_prog_detach(struct net *net, 268 - enum netns_bpf_attach_type type) 224 + enum netns_bpf_attach_type type, 225 + struct bpf_prog *old) 269 226 { 270 227 struct bpf_prog *attached; 271 228 272 229 /* Progs attached via links cannot be detached */ 273 - if (net->bpf.links[type]) 230 + if (!list_empty(&net->bpf.links[type])) 274 231 return -EINVAL; 275 232 276 - attached = rcu_dereference_protected(net->bpf.progs[type], 277 - lockdep_is_held(&netns_bpf_mutex)); 278 - if (!attached) 233 + attached = net->bpf.progs[type]; 234 + if (!attached || attached != old) 279 235 return -ENOENT; 280 - RCU_INIT_POINTER(net->bpf.progs[type], NULL); 236 + netns_bpf_run_array_detach(net, type); 237 + net->bpf.progs[type] = NULL; 281 238 bpf_prog_put(attached); 282 239 return 0; 283 240 } 284 241 285 - int netns_bpf_prog_detach(const union bpf_attr *attr) 242 + int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) 286 243 { 287 244 enum netns_bpf_attach_type type; 245 + struct bpf_prog *prog; 288 246 int ret; 247 + 248 + if (attr->target_fd) 249 + return -EINVAL; 289 250 290 251 type = to_netns_bpf_attach_type(attr->attach_type); 291 252 if (type < 0) 292 253 return -EINVAL; 293 254 255 + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 256 + if (IS_ERR(prog)) 257 + return PTR_ERR(prog); 258 + 294 259 mutex_lock(&netns_bpf_mutex); 295 - ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type); 260 + ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog); 296 261 mutex_unlock(&netns_bpf_mutex); 262 + 263 + bpf_prog_put(prog); 297 264 298 265 return ret; 299 266 } ··· 312 257 static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, 313 258 enum netns_bpf_attach_type type) 314 259 { 315 - struct bpf_prog *prog; 260 + struct bpf_netns_link *net_link = 261 + container_of(link, struct bpf_netns_link, link); 262 + struct bpf_prog_array *run_array; 316 263 int err; 317 264 318 265 mutex_lock(&netns_bpf_mutex); 319 266 320 267 /* Allow attaching only one prog or link for now */ 321 - if (net->bpf.links[type]) { 268 + if (!list_empty(&net->bpf.links[type])) { 322 269 err = -E2BIG; 323 270 goto out_unlock; 324 271 } 325 272 /* Links are not compatible with attaching prog directly */ 326 - prog = rcu_dereference_protected(net->bpf.progs[type], 327 - lockdep_is_held(&netns_bpf_mutex)); 328 - if (prog) { 273 + if (net->bpf.progs[type]) { 329 274 err = -EEXIST; 330 275 goto out_unlock; 331 276 } 332 277 333 278 switch (type) { 334 279 case NETNS_BPF_FLOW_DISSECTOR: 335 - err = flow_dissector_bpf_prog_attach(net, link->prog); 280 + err = flow_dissector_bpf_prog_attach_check(net, link->prog); 336 281 break; 337 282 default: 338 283 err = -EINVAL; ··· 341 286 if (err) 342 287 goto out_unlock; 343 288 344 - net->bpf.links[type] = link; 289 + run_array = bpf_prog_array_alloc(1, GFP_KERNEL); 290 + if (!run_array) { 291 + err = -ENOMEM; 292 + goto out_unlock; 293 + } 294 + run_array->items[0].prog = link->prog; 295 + rcu_assign_pointer(net->bpf.run_array[type], run_array); 296 + 297 + list_add_tail(&net_link->node, &net->bpf.links[type]); 345 298 346 299 out_unlock: 347 300 mutex_unlock(&netns_bpf_mutex); ··· 408 345 return err; 409 346 } 410 347 348 + static int __net_init netns_bpf_pernet_init(struct net *net) 349 + { 350 + int type; 351 + 352 + for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) 353 + INIT_LIST_HEAD(&net->bpf.links[type]); 354 + 355 + return 0; 356 + } 357 + 411 358 static void __net_exit netns_bpf_pernet_pre_exit(struct net *net) 412 359 { 413 360 enum netns_bpf_attach_type type; 414 - struct bpf_link *link; 361 + struct bpf_netns_link *net_link; 415 362 416 363 mutex_lock(&netns_bpf_mutex); 417 364 for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) { 418 - link = net->bpf.links[type]; 419 - if (link) 420 - bpf_netns_link_auto_detach(link); 421 - else 422 - __netns_bpf_prog_detach(net, type); 365 + netns_bpf_run_array_detach(net, type); 366 + list_for_each_entry(net_link, &net->bpf.links[type], node) 367 + net_link->net = NULL; /* auto-detach link */ 368 + if (net->bpf.progs[type]) 369 + bpf_prog_put(net->bpf.progs[type]); 423 370 } 424 371 mutex_unlock(&netns_bpf_mutex); 425 372 } 426 373 427 374 static struct pernet_operations netns_bpf_pernet_ops __net_initdata = { 375 + .init = netns_bpf_pernet_init, 428 376 .pre_exit = netns_bpf_pernet_pre_exit, 429 377 }; 430 378
+8 -10
kernel/bpf/ringbuf.c
··· 132 132 { 133 133 struct bpf_ringbuf *rb; 134 134 135 - if (!data_sz || !PAGE_ALIGNED(data_sz)) 136 - return ERR_PTR(-EINVAL); 137 - 138 - #ifdef CONFIG_64BIT 139 - /* on 32-bit arch, it's impossible to overflow record's hdr->pgoff */ 140 - if (data_sz > RINGBUF_MAX_DATA_SZ) 141 - return ERR_PTR(-E2BIG); 142 - #endif 143 - 144 135 rb = bpf_ringbuf_area_alloc(data_sz, numa_node); 145 136 if (!rb) 146 137 return ERR_PTR(-ENOMEM); ··· 157 166 return ERR_PTR(-EINVAL); 158 167 159 168 if (attr->key_size || attr->value_size || 160 - attr->max_entries == 0 || !PAGE_ALIGNED(attr->max_entries)) 169 + !is_power_of_2(attr->max_entries) || 170 + !PAGE_ALIGNED(attr->max_entries)) 161 171 return ERR_PTR(-EINVAL); 172 + 173 + #ifdef CONFIG_64BIT 174 + /* on 32-bit arch, it's impossible to overflow record's hdr->pgoff */ 175 + if (attr->max_entries > RINGBUF_MAX_DATA_SZ) 176 + return ERR_PTR(-E2BIG); 177 + #endif 162 178 163 179 rb_map = kzalloc(sizeof(*rb_map), GFP_USER); 164 180 if (!rb_map)
+3 -5
kernel/bpf/syscall.c
··· 2121 2121 !bpf_capable()) 2122 2122 return -EPERM; 2123 2123 2124 - if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN)) 2124 + if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) 2125 2125 return -EPERM; 2126 2126 if (is_perfmon_prog_type(type) && !perfmon_capable()) 2127 2127 return -EPERM; ··· 2893 2893 switch (ptype) { 2894 2894 case BPF_PROG_TYPE_SK_MSG: 2895 2895 case BPF_PROG_TYPE_SK_SKB: 2896 - return sock_map_get_from_fd(attr, NULL); 2896 + return sock_map_prog_detach(attr, ptype); 2897 2897 case BPF_PROG_TYPE_LIRC_MODE2: 2898 2898 return lirc_prog_detach(attr); 2899 2899 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2900 - if (!capable(CAP_NET_ADMIN)) 2901 - return -EPERM; 2902 - return netns_bpf_prog_detach(attr); 2900 + return netns_bpf_prog_detach(attr, ptype); 2903 2901 case BPF_PROG_TYPE_CGROUP_DEVICE: 2904 2902 case BPF_PROG_TYPE_CGROUP_SKB: 2905 2903 case BPF_PROG_TYPE_CGROUP_SOCK:
+10 -3
kernel/bpf/verifier.c
··· 399 399 return type == PTR_TO_SOCKET || 400 400 type == PTR_TO_TCP_SOCK || 401 401 type == PTR_TO_MAP_VALUE || 402 - type == PTR_TO_SOCK_COMMON || 403 - type == PTR_TO_BTF_ID; 402 + type == PTR_TO_SOCK_COMMON; 404 403 } 405 404 406 405 static bool reg_type_may_be_null(enum bpf_reg_type type) ··· 9800 9801 int i, j, subprog_start, subprog_end = 0, len, subprog; 9801 9802 struct bpf_insn *insn; 9802 9803 void *old_bpf_func; 9803 - int err; 9804 + int err, num_exentries; 9804 9805 9805 9806 if (env->subprog_cnt <= 1) 9806 9807 return 0; ··· 9875 9876 func[i]->aux->nr_linfo = prog->aux->nr_linfo; 9876 9877 func[i]->aux->jited_linfo = prog->aux->jited_linfo; 9877 9878 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; 9879 + num_exentries = 0; 9880 + insn = func[i]->insnsi; 9881 + for (j = 0; j < func[i]->len; j++, insn++) { 9882 + if (BPF_CLASS(insn->code) == BPF_LDX && 9883 + BPF_MODE(insn->code) == BPF_PROBE_MEM) 9884 + num_exentries++; 9885 + } 9886 + func[i]->aux->num_exentries = num_exentries; 9878 9887 func[i] = bpf_int_jit_compile(func[i]); 9879 9888 if (!func[i]->jited) { 9880 9889 err = -ENOTSUPP;
+6
kernel/dma/direct.c
··· 530 530 return swiotlb_max_mapping_size(dev); 531 531 return SIZE_MAX; 532 532 } 533 + 534 + bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) 535 + { 536 + return !dev_is_dma_coherent(dev) || 537 + is_swiotlb_buffer(dma_to_phys(dev, dma_addr)); 538 + }
+10
kernel/dma/mapping.c
··· 397 397 } 398 398 EXPORT_SYMBOL_GPL(dma_max_mapping_size); 399 399 400 + bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) 401 + { 402 + const struct dma_map_ops *ops = get_dma_ops(dev); 403 + 404 + if (dma_is_direct(ops)) 405 + return dma_direct_need_sync(dev, dma_addr); 406 + return ops->sync_single_for_cpu || ops->sync_single_for_device; 407 + } 408 + EXPORT_SYMBOL_GPL(dma_need_sync); 409 + 400 410 unsigned long dma_get_merge_boundary(struct device *dev) 401 411 { 402 412 const struct dma_map_ops *ops = get_dma_ops(dev);
+18 -1
net/bpf/test_run.c
··· 147 147 return a + (long)b + c + d + (long)e + f; 148 148 } 149 149 150 + struct bpf_fentry_test_t { 151 + struct bpf_fentry_test_t *a; 152 + }; 153 + 154 + int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg) 155 + { 156 + return (long)arg; 157 + } 158 + 159 + int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg) 160 + { 161 + return (long)arg->a; 162 + } 163 + 150 164 int noinline bpf_modify_return_test(int a, int *b) 151 165 { 152 166 *b += 1; ··· 199 185 const union bpf_attr *kattr, 200 186 union bpf_attr __user *uattr) 201 187 { 188 + struct bpf_fentry_test_t arg = {}; 202 189 u16 side_effect = 0, ret = 0; 203 190 int b = 2, err = -EFAULT; 204 191 u32 retval = 0; ··· 212 197 bpf_fentry_test3(4, 5, 6) != 15 || 213 198 bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || 214 199 bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || 215 - bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) 200 + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 || 201 + bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 || 202 + bpf_fentry_test8(&arg) != 0) 216 203 goto out; 217 204 break; 218 205 case BPF_MODIFY_RETURN:
+12 -20
net/core/flow_dissector.c
··· 70 70 EXPORT_SYMBOL(skb_flow_dissector_init); 71 71 72 72 #ifdef CONFIG_BPF_SYSCALL 73 - int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) 73 + int flow_dissector_bpf_prog_attach_check(struct net *net, 74 + struct bpf_prog *prog) 74 75 { 75 76 enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; 76 - struct bpf_prog *attached; 77 77 78 78 if (net == &init_net) { 79 79 /* BPF flow dissector in the root namespace overrides ··· 86 86 for_each_net(ns) { 87 87 if (ns == &init_net) 88 88 continue; 89 - if (rcu_access_pointer(ns->bpf.progs[type])) 89 + if (rcu_access_pointer(ns->bpf.run_array[type])) 90 90 return -EEXIST; 91 91 } 92 92 } else { 93 93 /* Make sure root flow dissector is not attached 94 94 * when attaching to the non-root namespace. 95 95 */ 96 - if (rcu_access_pointer(init_net.bpf.progs[type])) 96 + if (rcu_access_pointer(init_net.bpf.run_array[type])) 97 97 return -EEXIST; 98 98 } 99 99 100 - attached = rcu_dereference_protected(net->bpf.progs[type], 101 - lockdep_is_held(&netns_bpf_mutex)); 102 - if (attached == prog) 103 - /* The same program cannot be attached twice */ 104 - return -EINVAL; 105 - 106 - rcu_assign_pointer(net->bpf.progs[type], prog); 107 - if (attached) 108 - bpf_prog_put(attached); 109 100 return 0; 110 101 } 111 102 #endif /* CONFIG_BPF_SYSCALL */ ··· 894 903 struct flow_dissector_key_addrs *key_addrs; 895 904 struct flow_dissector_key_tags *key_tags; 896 905 struct flow_dissector_key_vlan *key_vlan; 897 - struct bpf_prog *attached = NULL; 898 906 enum flow_dissect_ret fdret; 899 907 enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; 900 908 bool mpls_el = false; ··· 950 960 WARN_ON_ONCE(!net); 951 961 if (net) { 952 962 enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; 963 + struct bpf_prog_array *run_array; 953 964 954 965 rcu_read_lock(); 955 - attached = rcu_dereference(init_net.bpf.progs[type]); 966 + run_array = rcu_dereference(init_net.bpf.run_array[type]); 967 + if (!run_array) 968 + run_array = rcu_dereference(net->bpf.run_array[type]); 956 969 957 - if (!attached) 958 - attached = rcu_dereference(net->bpf.progs[type]); 959 - 960 - if (attached) { 970 + if (run_array) { 961 971 struct bpf_flow_keys flow_keys; 962 972 struct bpf_flow_dissector ctx = { 963 973 .flow_keys = &flow_keys, ··· 965 975 .data_end = data + hlen, 966 976 }; 967 977 __be16 n_proto = proto; 978 + struct bpf_prog *prog; 968 979 969 980 if (skb) { 970 981 ctx.skb = skb; ··· 976 985 n_proto = skb->protocol; 977 986 } 978 987 979 - ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, 988 + prog = READ_ONCE(run_array->items[0].prog); 989 + ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, 980 990 hlen, flags); 981 991 __skb_flow_bpf_to_target(&flow_keys, flow_dissector, 982 992 target_container);
+15 -8
net/core/skmsg.c
··· 683 683 return container_of(parser, struct sk_psock, parser); 684 684 } 685 685 686 - static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) 686 + static void sk_psock_skb_redirect(struct sk_buff *skb) 687 687 { 688 688 struct sk_psock *psock_other; 689 689 struct sock *sk_other; ··· 715 715 } 716 716 } 717 717 718 - static void sk_psock_tls_verdict_apply(struct sk_psock *psock, 719 - struct sk_buff *skb, int verdict) 718 + static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict) 720 719 { 721 720 switch (verdict) { 722 721 case __SK_REDIRECT: 723 - sk_psock_skb_redirect(psock, skb); 722 + sk_psock_skb_redirect(skb); 724 723 break; 725 724 case __SK_PASS: 726 725 case __SK_DROP: ··· 740 741 ret = sk_psock_bpf_run(psock, prog, skb); 741 742 ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); 742 743 } 744 + sk_psock_tls_verdict_apply(skb, ret); 743 745 rcu_read_unlock(); 744 - sk_psock_tls_verdict_apply(psock, skb, ret); 745 746 return ret; 746 747 } 747 748 EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); ··· 769 770 } 770 771 goto out_free; 771 772 case __SK_REDIRECT: 772 - sk_psock_skb_redirect(psock, skb); 773 + sk_psock_skb_redirect(skb); 773 774 break; 774 775 case __SK_DROP: 775 776 /* fall-through */ ··· 781 782 782 783 static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) 783 784 { 784 - struct sk_psock *psock = sk_psock_from_strp(strp); 785 + struct sk_psock *psock; 785 786 struct bpf_prog *prog; 786 787 int ret = __SK_DROP; 788 + struct sock *sk; 787 789 788 790 rcu_read_lock(); 791 + sk = strp->sk; 792 + psock = sk_psock(sk); 793 + if (unlikely(!psock)) { 794 + kfree_skb(skb); 795 + goto out; 796 + } 789 797 prog = READ_ONCE(psock->progs.skb_verdict); 790 798 if (likely(prog)) { 791 799 skb_orphan(skb); ··· 800 794 ret = sk_psock_bpf_run(psock, prog, skb); 801 795 ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); 802 796 } 803 - rcu_read_unlock(); 804 797 sk_psock_verdict_apply(psock, skb, ret); 798 + out: 799 + rcu_read_unlock(); 805 800 } 806 801 807 802 static int sk_psock_strp_read_done(struct strparser *strp, int err)
+48 -5
net/core/sock_map.c
··· 70 70 struct fd f; 71 71 int ret; 72 72 73 + if (attr->attach_flags || attr->replace_bpf_fd) 74 + return -EINVAL; 75 + 73 76 f = fdget(ufd); 74 77 map = __bpf_map_get(f); 75 78 if (IS_ERR(map)) 76 79 return PTR_ERR(map); 77 - ret = sock_map_prog_update(map, prog, attr->attach_type); 80 + ret = sock_map_prog_update(map, prog, NULL, attr->attach_type); 81 + fdput(f); 82 + return ret; 83 + } 84 + 85 + int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) 86 + { 87 + u32 ufd = attr->target_fd; 88 + struct bpf_prog *prog; 89 + struct bpf_map *map; 90 + struct fd f; 91 + int ret; 92 + 93 + if (attr->attach_flags || attr->replace_bpf_fd) 94 + return -EINVAL; 95 + 96 + f = fdget(ufd); 97 + map = __bpf_map_get(f); 98 + if (IS_ERR(map)) 99 + return PTR_ERR(map); 100 + 101 + prog = bpf_prog_get(attr->attach_bpf_fd); 102 + if (IS_ERR(prog)) { 103 + ret = PTR_ERR(prog); 104 + goto put_map; 105 + } 106 + 107 + if (prog->type != ptype) { 108 + ret = -EINVAL; 109 + goto put_prog; 110 + } 111 + 112 + ret = sock_map_prog_update(map, NULL, prog, attr->attach_type); 113 + put_prog: 114 + bpf_prog_put(prog); 115 + put_map: 78 116 fdput(f); 79 117 return ret; 80 118 } ··· 1241 1203 } 1242 1204 1243 1205 int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, 1244 - u32 which) 1206 + struct bpf_prog *old, u32 which) 1245 1207 { 1246 1208 struct sk_psock_progs *progs = sock_map_progs(map); 1209 + struct bpf_prog **pprog; 1247 1210 1248 1211 if (!progs) 1249 1212 return -EOPNOTSUPP; 1250 1213 1251 1214 switch (which) { 1252 1215 case BPF_SK_MSG_VERDICT: 1253 - psock_set_prog(&progs->msg_parser, prog); 1216 + pprog = &progs->msg_parser; 1254 1217 break; 1255 1218 case BPF_SK_SKB_STREAM_PARSER: 1256 - psock_set_prog(&progs->skb_parser, prog); 1219 + pprog = &progs->skb_parser; 1257 1220 break; 1258 1221 case BPF_SK_SKB_STREAM_VERDICT: 1259 - psock_set_prog(&progs->skb_verdict, prog); 1222 + pprog = &progs->skb_verdict; 1260 1223 break; 1261 1224 default: 1262 1225 return -EOPNOTSUPP; 1263 1226 } 1264 1227 1228 + if (old) 1229 + return psock_replace_prog(pprog, prog, old); 1230 + 1231 + psock_set_prog(pprog, prog); 1265 1232 return 0; 1266 1233 } 1267 1234
+4 -50
net/xdp/xsk_buff_pool.c
··· 2 2 3 3 #include <net/xsk_buff_pool.h> 4 4 #include <net/xdp_sock.h> 5 - #include <linux/dma-direct.h> 6 - #include <linux/dma-noncoherent.h> 7 - #include <linux/swiotlb.h> 8 5 9 6 #include "xsk_queue.h" 10 7 ··· 52 55 pool->free_heads_cnt = chunks; 53 56 pool->headroom = headroom; 54 57 pool->chunk_size = chunk_size; 55 - pool->cheap_dma = true; 56 58 pool->unaligned = unaligned; 57 59 pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM; 58 60 INIT_LIST_HEAD(&pool->free_list); ··· 121 125 } 122 126 } 123 127 124 - static bool __maybe_unused xp_check_swiotlb_dma(struct xsk_buff_pool *pool) 125 - { 126 - #if defined(CONFIG_SWIOTLB) 127 - phys_addr_t paddr; 128 - u32 i; 129 - 130 - for (i = 0; i < pool->dma_pages_cnt; i++) { 131 - paddr = dma_to_phys(pool->dev, pool->dma_pages[i]); 132 - if (is_swiotlb_buffer(paddr)) 133 - return false; 134 - } 135 - #endif 136 - return true; 137 - } 138 - 139 - static bool xp_check_cheap_dma(struct xsk_buff_pool *pool) 140 - { 141 - #if defined(CONFIG_HAS_DMA) 142 - const struct dma_map_ops *ops = get_dma_ops(pool->dev); 143 - 144 - if (ops) { 145 - return !ops->sync_single_for_cpu && 146 - !ops->sync_single_for_device; 147 - } 148 - 149 - if (!dma_is_direct(ops)) 150 - return false; 151 - 152 - if (!xp_check_swiotlb_dma(pool)) 153 - return false; 154 - 155 - if (!dev_is_dma_coherent(pool->dev)) { 156 - #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ 157 - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ 158 - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) 159 - return false; 160 - #endif 161 - } 162 - #endif 163 - return true; 164 - } 165 - 166 128 int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, 167 129 unsigned long attrs, struct page **pages, u32 nr_pages) 168 130 { ··· 134 180 135 181 pool->dev = dev; 136 182 pool->dma_pages_cnt = nr_pages; 183 + pool->dma_need_sync = false; 137 184 138 185 for (i = 0; i < pool->dma_pages_cnt; i++) { 139 186 dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, ··· 143 188 xp_dma_unmap(pool, attrs); 144 189 return -ENOMEM; 145 190 } 191 + if (dma_need_sync(dev, dma)) 192 + pool->dma_need_sync = true; 146 193 pool->dma_pages[i] = dma; 147 194 } 148 195 149 196 if (pool->unaligned) 150 197 xp_check_dma_contiguity(pool); 151 - 152 - pool->dev = dev; 153 - pool->cheap_dma = xp_check_cheap_dma(pool); 154 198 return 0; 155 199 } 156 200 EXPORT_SYMBOL(xp_dma_map); ··· 234 280 xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; 235 281 xskb->xdp.data_meta = xskb->xdp.data; 236 282 237 - if (!pool->cheap_dma) { 283 + if (pool->dma_need_sync) { 238 284 dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, 239 285 pool->frame_len, 240 286 DMA_BIDIRECTIONAL);
+21 -20
tools/include/uapi/linux/bpf.h
··· 3171 3171 * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) 3172 3172 * Description 3173 3173 * Copy *size* bytes from *data* into a ring buffer *ringbuf*. 3174 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3175 - * new data availability is sent. 3176 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3177 - * new data availability is sent unconditionally. 3174 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3175 + * of new data availability is sent. 3176 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3177 + * of new data availability is sent unconditionally. 3178 3178 * Return 3179 - * 0, on success; 3180 - * < 0, on error. 3179 + * 0 on success, or a negative error in case of failure. 3181 3180 * 3182 3181 * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) 3183 3182 * Description ··· 3188 3189 * void bpf_ringbuf_submit(void *data, u64 flags) 3189 3190 * Description 3190 3191 * Submit reserved ring buffer sample, pointed to by *data*. 3191 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3192 - * new data availability is sent. 3193 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3194 - * new data availability is sent unconditionally. 3192 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3193 + * of new data availability is sent. 3194 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3195 + * of new data availability is sent unconditionally. 3195 3196 * Return 3196 3197 * Nothing. Always succeeds. 3197 3198 * 3198 3199 * void bpf_ringbuf_discard(void *data, u64 flags) 3199 3200 * Description 3200 3201 * Discard reserved ring buffer sample, pointed to by *data*. 3201 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3202 - * new data availability is sent. 3203 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3204 - * new data availability is sent unconditionally. 3202 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3203 + * of new data availability is sent. 3204 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3205 + * of new data availability is sent unconditionally. 3205 3206 * Return 3206 3207 * Nothing. Always succeeds. 3207 3208 * ··· 3209 3210 * Description 3210 3211 * Query various characteristics of provided ring buffer. What 3211 3212 * exactly is queries is determined by *flags*: 3212 - * - BPF_RB_AVAIL_DATA - amount of data not yet consumed; 3213 - * - BPF_RB_RING_SIZE - the size of ring buffer; 3214 - * - BPF_RB_CONS_POS - consumer position (can wrap around); 3215 - * - BPF_RB_PROD_POS - producer(s) position (can wrap around); 3216 - * Data returned is just a momentary snapshots of actual values 3213 + * 3214 + * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. 3215 + * * **BPF_RB_RING_SIZE**: The size of ring buffer. 3216 + * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). 3217 + * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). 3218 + * 3219 + * Data returned is just a momentary snapshot of actual values 3217 3220 * and could be inaccurate, so this facility should be used to 3218 3221 * power heuristics and for reporting, not to make 100% correct 3219 3222 * calculation. 3220 3223 * Return 3221 - * Requested value, or 0, if flags are not recognized. 3224 + * Requested value, or 0, if *flags* are not recognized. 3222 3225 * 3223 3226 * int bpf_csum_level(struct sk_buff *skb, u64 level) 3224 3227 * Description
+2
tools/lib/bpf/bpf.h
··· 233 233 LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, 234 234 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, 235 235 __u64 *probe_offset, __u64 *probe_addr); 236 + 237 + enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */ 236 238 LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); 237 239 238 240 #ifdef __cplusplus
+8 -2
tools/lib/bpf/libbpf.c
··· 4818 4818 err = -EINVAL; 4819 4819 goto out; 4820 4820 } 4821 - prog = bpf_object__find_program_by_title(obj, sec_name); 4821 + prog = NULL; 4822 + for (i = 0; i < obj->nr_programs; i++) { 4823 + if (!strcmp(obj->programs[i].section_name, sec_name)) { 4824 + prog = &obj->programs[i]; 4825 + break; 4826 + } 4827 + } 4822 4828 if (!prog) { 4823 4829 pr_warn("failed to find program '%s' for CO-RE offset relocation\n", 4824 4830 sec_name); ··· 6659 6653 .expected_attach_type = BPF_TRACE_ITER, 6660 6654 .is_attach_btf = true, 6661 6655 .attach_fn = attach_iter), 6662 - BPF_EAPROG_SEC("xdp_devmap", BPF_PROG_TYPE_XDP, 6656 + BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, 6663 6657 BPF_XDP_DEVMAP), 6664 6658 BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), 6665 6659 BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
+1 -1
tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
··· 36 36 fentry_res = (__u64 *)fentry_skel->bss; 37 37 fexit_res = (__u64 *)fexit_skel->bss; 38 38 printf("%lld\n", fentry_skel->bss->test1_result); 39 - for (i = 0; i < 6; i++) { 39 + for (i = 0; i < 8; i++) { 40 40 CHECK(fentry_res[i] != 1, "result", 41 41 "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]); 42 42 CHECK(fexit_res[i] != 1, "result",
+2 -2
tools/testing/selftests/bpf/prog_tests/flow_dissector.c
··· 527 527 528 528 run_tests_skb_less(tap_fd, skel->maps.last_dissection); 529 529 530 - err = bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR); 531 - CHECK(err, "bpf_prog_detach", "err %d errno %d\n", err, errno); 530 + err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); 531 + CHECK(err, "bpf_prog_detach2", "err %d errno %d\n", err, errno); 532 532 } 533 533 534 534 static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
+34 -10
tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* 3 - * Test that the flow_dissector program can be updated with a single 4 - * syscall by attaching a new program that replaces the existing one. 5 - * 6 - * Corner case - the same program cannot be attached twice. 3 + * Tests for attaching, detaching, and replacing flow_dissector BPF program. 7 4 */ 8 5 9 6 #define _GNU_SOURCE ··· 113 116 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2)); 114 117 115 118 out_detach: 116 - err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); 119 + err = bpf_prog_detach2(prog2, 0, BPF_FLOW_DISSECTOR); 117 120 if (CHECK_FAIL(err)) 118 121 perror("bpf_prog_detach"); 119 122 CHECK_FAIL(prog_is_attached(netns)); ··· 149 152 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); 150 153 int err, link; 151 154 152 - err = bpf_prog_attach(prog1, -1, BPF_FLOW_DISSECTOR, 0); 155 + err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0); 153 156 if (CHECK_FAIL(err)) { 154 157 perror("bpf_prog_attach(prog1)"); 155 158 return; ··· 165 168 close(link); 166 169 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); 167 170 168 - err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR); 171 + err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR); 169 172 if (CHECK_FAIL(err)) 170 173 perror("bpf_prog_detach"); 171 174 CHECK_FAIL(prog_is_attached(netns)); ··· 185 188 186 189 /* Expect failure attaching prog when link exists */ 187 190 errno = 0; 188 - err = bpf_prog_attach(prog2, -1, BPF_FLOW_DISSECTOR, 0); 191 + err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0); 189 192 if (CHECK_FAIL(!err || errno != EEXIST)) 190 193 perror("bpf_prog_attach(prog2) expected EEXIST"); 191 194 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); ··· 208 211 209 212 /* Expect failure detaching prog when link exists */ 210 213 errno = 0; 211 - err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR); 214 + err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR); 212 215 if (CHECK_FAIL(!err || errno != EINVAL)) 213 216 perror("bpf_prog_detach expected EINVAL"); 214 217 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); ··· 228 231 } 229 232 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); 230 233 231 - err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); 234 + err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR); 232 235 if (CHECK_FAIL(err)) { 233 236 perror("bpf_prog_detach"); 234 237 return; ··· 300 303 if (CHECK_FAIL(err)) 301 304 perror("bpf_link_update"); 302 305 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2)); 306 + 307 + close(link); 308 + CHECK_FAIL(prog_is_attached(netns)); 309 + } 310 + 311 + static void test_link_update_same_prog(int netns, int prog1, int prog2) 312 + { 313 + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts); 314 + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); 315 + int err, link; 316 + 317 + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts); 318 + if (CHECK_FAIL(link < 0)) { 319 + perror("bpf_link_create(prog1)"); 320 + return; 321 + } 322 + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); 323 + 324 + /* Expect success updating the prog with the same one */ 325 + update_opts.flags = 0; 326 + update_opts.old_prog_fd = 0; 327 + err = bpf_link_update(link, prog1, &update_opts); 328 + if (CHECK_FAIL(err)) 329 + perror("bpf_link_update"); 330 + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); 303 331 304 332 close(link); 305 333 CHECK_FAIL(prog_is_attached(netns)); ··· 593 571 test_link_update_no_old_prog }, 594 572 { "link update with replace old prog", 595 573 test_link_update_replace_old_prog }, 574 + { "link update with same prog", 575 + test_link_update_same_prog }, 596 576 { "link update invalid opts", 597 577 test_link_update_invalid_opts }, 598 578 { "link update invalid prog",
+1 -1
tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
··· 25 25 struct netlink_sock *sk; 26 26 } __attribute__((preserve_access_index)); 27 27 28 - static inline struct inode *SOCK_INODE(struct socket *socket) 28 + static __attribute__((noinline)) struct inode *SOCK_INODE(struct socket *socket) 29 29 { 30 30 return &container_of(socket, struct socket_alloc, socket)->vfs_inode; 31 31 }
+22
tools/testing/selftests/bpf/progs/fentry_test.c
··· 55 55 e == (void *)20 && f == 21; 56 56 return 0; 57 57 } 58 + 59 + struct bpf_fentry_test_t { 60 + struct bpf_fentry_test_t *a; 61 + }; 62 + 63 + __u64 test7_result = 0; 64 + SEC("fentry/bpf_fentry_test7") 65 + int BPF_PROG(test7, struct bpf_fentry_test_t *arg) 66 + { 67 + if (arg == 0) 68 + test7_result = 1; 69 + return 0; 70 + } 71 + 72 + __u64 test8_result = 0; 73 + SEC("fentry/bpf_fentry_test8") 74 + int BPF_PROG(test8, struct bpf_fentry_test_t *arg) 75 + { 76 + if (arg->a == 0) 77 + test8_result = 1; 78 + return 0; 79 + }
+22
tools/testing/selftests/bpf/progs/fexit_test.c
··· 56 56 e == (void *)20 && f == 21 && ret == 111; 57 57 return 0; 58 58 } 59 + 60 + struct bpf_fentry_test_t { 61 + struct bpf_fentry_test *a; 62 + }; 63 + 64 + __u64 test7_result = 0; 65 + SEC("fexit/bpf_fentry_test7") 66 + int BPF_PROG(test7, struct bpf_fentry_test_t *arg) 67 + { 68 + if (arg == 0) 69 + test7_result = 1; 70 + return 0; 71 + } 72 + 73 + __u64 test8_result = 0; 74 + SEC("fexit/bpf_fentry_test8") 75 + int BPF_PROG(test8, struct bpf_fentry_test_t *arg) 76 + { 77 + if (arg->a == 0) 78 + test8_result = 1; 79 + return 0; 80 + }
+7 -1
tools/testing/selftests/bpf/progs/test_sockmap_kern.h
··· 79 79 80 80 struct { 81 81 __uint(type, BPF_MAP_TYPE_ARRAY); 82 - __uint(max_entries, 2); 82 + __uint(max_entries, 3); 83 83 __type(key, int); 84 84 __type(value, int); 85 85 } sock_skb_opts SEC(".maps"); ··· 94 94 SEC("sk_skb1") 95 95 int bpf_prog1(struct __sk_buff *skb) 96 96 { 97 + int *f, two = 2; 98 + 99 + f = bpf_map_lookup_elem(&sock_skb_opts, &two); 100 + if (f && *f) { 101 + return *f; 102 + } 97 103 return skb->len; 98 104 } 99 105
+1 -1
tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
··· 27 27 /* valid program on DEVMAP entry via SEC name; 28 28 * has access to egress and ingress ifindex 29 29 */ 30 - SEC("xdp_devmap") 30 + SEC("xdp_devmap/map_prog") 31 31 int xdp_dummy_dm(struct xdp_md *ctx) 32 32 { 33 33 char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+18
tools/testing/selftests/bpf/test_sockmap.c
··· 85 85 int txmsg_ktls_skb_redir; 86 86 int ktls; 87 87 int peek_flag; 88 + int skb_use_parser; 88 89 89 90 static const struct option long_options[] = { 90 91 {"help", no_argument, NULL, 'h' }, ··· 175 174 txmsg_apply = txmsg_cork = 0; 176 175 txmsg_ingress = txmsg_redir_skb = 0; 177 176 txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0; 177 + skb_use_parser = 0; 178 178 } 179 179 180 180 static int test_start_subtest(const struct _test *t, struct sockmap_options *o) ··· 1213 1211 } 1214 1212 } 1215 1213 1214 + if (skb_use_parser) { 1215 + i = 2; 1216 + err = bpf_map_update_elem(map_fd[7], &i, &skb_use_parser, BPF_ANY); 1217 + } 1218 + 1216 1219 if (txmsg_drop) 1217 1220 options->drop_expected = true; 1218 1221 ··· 1657 1650 test_send(opt, cgrp); 1658 1651 } 1659 1652 1653 + static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt) 1654 + { 1655 + txmsg_pass = 1; 1656 + skb_use_parser = 512; 1657 + opt->iov_length = 256; 1658 + opt->iov_count = 1; 1659 + opt->rate = 2; 1660 + test_exec(cgrp, opt); 1661 + } 1662 + 1660 1663 char *map_names[] = { 1661 1664 "sock_map", 1662 1665 "sock_map_txmsg", ··· 1765 1748 {"txmsg test pull-data", test_txmsg_pull}, 1766 1749 {"txmsg test pop-data", test_txmsg_pop}, 1767 1750 {"txmsg test push/pop data", test_txmsg_push_pop}, 1751 + {"txmsg text ingress parser", test_txmsg_ingress_parser}, 1768 1752 }; 1769 1753 1770 1754 static int check_whitelist(struct _test *t, struct sockmap_options *opt)