Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

+8

Documentation/core-api/dma-api.rst

··· 206 206 207 207 :: 208 208 209 + bool 210 + dma_need_sync(struct device *dev, dma_addr_t dma_addr); 211 + 212 + Returns %true if dma_sync_single_for_{device,cpu} calls are required to 213 + transfer memory ownership. Returns %false if those calls can be skipped. 214 + 215 + :: 216 + 209 217 unsigned long 210 218 dma_get_merge_boundary(struct device *dev); 211 219

+3 -2

include/linux/bpf-netns.h

··· 33 33 union bpf_attr __user *uattr); 34 34 int netns_bpf_prog_attach(const union bpf_attr *attr, 35 35 struct bpf_prog *prog); 36 - int netns_bpf_prog_detach(const union bpf_attr *attr); 36 + int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); 37 37 int netns_bpf_link_create(const union bpf_attr *attr, 38 38 struct bpf_prog *prog); 39 39 #else ··· 49 49 return -EOPNOTSUPP; 50 50 } 51 51 52 - static inline int netns_bpf_prog_detach(const union bpf_attr *attr) 52 + static inline int netns_bpf_prog_detach(const union bpf_attr *attr, 53 + enum bpf_prog_type ptype) 53 54 { 54 55 return -EOPNOTSUPP; 55 56 }

+11 -2

include/linux/bpf.h

··· 1543 1543 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ 1544 1544 1545 1545 #if defined(CONFIG_BPF_STREAM_PARSER) 1546 - int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); 1546 + int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, 1547 + struct bpf_prog *old, u32 which); 1547 1548 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); 1549 + int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); 1548 1550 void sock_map_unhash(struct sock *sk); 1549 1551 void sock_map_close(struct sock *sk, long timeout); 1550 1552 #else 1551 1553 static inline int sock_map_prog_update(struct bpf_map *map, 1552 - struct bpf_prog *prog, u32 which) 1554 + struct bpf_prog *prog, 1555 + struct bpf_prog *old, u32 which) 1553 1556 { 1554 1557 return -EOPNOTSUPP; 1555 1558 } ··· 1561 1558 struct bpf_prog *prog) 1562 1559 { 1563 1560 return -EINVAL; 1561 + } 1562 + 1563 + static inline int sock_map_prog_detach(const union bpf_attr *attr, 1564 + enum bpf_prog_type ptype) 1565 + { 1566 + return -EOPNOTSUPP; 1564 1567 } 1565 1568 #endif /* CONFIG_BPF_STREAM_PARSER */ 1566 1569

+5

include/linux/btf.h

··· 82 82 return BTF_INFO_KIND(t->info) == BTF_KIND_INT; 83 83 } 84 84 85 + static inline bool btf_type_is_small_int(const struct btf_type *t) 86 + { 87 + return btf_type_is_int(t) && t->size <= sizeof(u64); 88 + } 89 + 85 90 static inline bool btf_type_is_enum(const struct btf_type *t) 86 91 { 87 92 return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM;

+1

include/linux/dma-direct.h

··· 87 87 void *cpu_addr, dma_addr_t dma_addr, size_t size, 88 88 unsigned long attrs); 89 89 int dma_direct_supported(struct device *dev, u64 mask); 90 + bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); 90 91 #endif /* _LINUX_DMA_DIRECT_H */

+5

include/linux/dma-mapping.h

··· 461 461 int dma_set_coherent_mask(struct device *dev, u64 mask); 462 462 u64 dma_get_required_mask(struct device *dev); 463 463 size_t dma_max_mapping_size(struct device *dev); 464 + bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); 464 465 unsigned long dma_get_merge_boundary(struct device *dev); 465 466 #else /* CONFIG_HAS_DMA */ 466 467 static inline dma_addr_t dma_map_page_attrs(struct device *dev, ··· 571 570 static inline size_t dma_max_mapping_size(struct device *dev) 572 571 { 573 572 return 0; 573 + } 574 + static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) 575 + { 576 + return false; 574 577 } 575 578 static inline unsigned long dma_get_merge_boundary(struct device *dev) 576 579 {

+13

include/linux/skmsg.h

··· 430 430 bpf_prog_put(prog); 431 431 } 432 432 433 + static inline int psock_replace_prog(struct bpf_prog **pprog, 434 + struct bpf_prog *prog, 435 + struct bpf_prog *old) 436 + { 437 + if (cmpxchg(pprog, old, prog) != old) 438 + return -ENOENT; 439 + 440 + if (old) 441 + bpf_prog_put(old); 442 + 443 + return 0; 444 + } 445 + 433 446 static inline void psock_progs_drop(struct sk_psock_progs *progs) 434 447 { 435 448 psock_set_prog(&progs->msg_parser, NULL);

+2 -1

include/net/flow_dissector.h

··· 372 372 } 373 373 374 374 #ifdef CONFIG_BPF_SYSCALL 375 - int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog); 375 + int flow_dissector_bpf_prog_attach_check(struct net *net, 376 + struct bpf_prog *prog); 376 377 #endif /* CONFIG_BPF_SYSCALL */ 377 378 378 379 #endif

+5 -2

include/net/netns/bpf.h

··· 9 9 #include <linux/bpf-netns.h> 10 10 11 11 struct bpf_prog; 12 + struct bpf_prog_array; 12 13 13 14 struct netns_bpf { 14 - struct bpf_prog __rcu *progs[MAX_NETNS_BPF_ATTACH_TYPE]; 15 - struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE]; 15 + /* Array of programs to run compiled from progs or links */ 16 + struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; 17 + struct bpf_prog *progs[MAX_NETNS_BPF_ATTACH_TYPE]; 18 + struct list_head links[MAX_NETNS_BPF_ATTACH_TYPE]; 16 19 }; 17 20 18 21 #endif /* __NETNS_BPF_H__ */

+3 -3

include/net/xsk_buff_pool.h

··· 40 40 u32 headroom; 41 41 u32 chunk_size; 42 42 u32 frame_len; 43 - bool cheap_dma; 43 + bool dma_need_sync; 44 44 bool unaligned; 45 45 void *addrs; 46 46 struct device *dev; ··· 80 80 void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb); 81 81 static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb) 82 82 { 83 - if (xskb->pool->cheap_dma) 83 + if (!xskb->pool->dma_need_sync) 84 84 return; 85 85 86 86 xp_dma_sync_for_cpu_slow(xskb); ··· 91 91 static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool, 92 92 dma_addr_t dma, size_t size) 93 93 { 94 - if (pool->cheap_dma) 94 + if (!pool->dma_need_sync) 95 95 return; 96 96 97 97 xp_dma_sync_for_device_slow(pool, dma, size);

+21 -20

include/uapi/linux/bpf.h

··· 3171 3171 * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) 3172 3172 * Description 3173 3173 * Copy *size* bytes from *data* into a ring buffer *ringbuf*. 3174 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3175 - * new data availability is sent. 3176 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3177 - * new data availability is sent unconditionally. 3174 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3175 + * of new data availability is sent. 3176 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3177 + * of new data availability is sent unconditionally. 3178 3178 * Return 3179 - * 0, on success; 3180 - * < 0, on error. 3179 + * 0 on success, or a negative error in case of failure. 3181 3180 * 3182 3181 * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) 3183 3182 * Description ··· 3188 3189 * void bpf_ringbuf_submit(void *data, u64 flags) 3189 3190 * Description 3190 3191 * Submit reserved ring buffer sample, pointed to by *data*. 3191 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3192 - * new data availability is sent. 3193 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3194 - * new data availability is sent unconditionally. 3192 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3193 + * of new data availability is sent. 3194 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3195 + * of new data availability is sent unconditionally. 3195 3196 * Return 3196 3197 * Nothing. Always succeeds. 3197 3198 * 3198 3199 * void bpf_ringbuf_discard(void *data, u64 flags) 3199 3200 * Description 3200 3201 * Discard reserved ring buffer sample, pointed to by *data*. 3201 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3202 - * new data availability is sent. 3203 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3204 - * new data availability is sent unconditionally. 3202 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3203 + * of new data availability is sent. 3204 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3205 + * of new data availability is sent unconditionally. 3205 3206 * Return 3206 3207 * Nothing. Always succeeds. 3207 3208 * ··· 3209 3210 * Description 3210 3211 * Query various characteristics of provided ring buffer. What 3211 3212 * exactly is queries is determined by *flags*: 3212 - * - BPF_RB_AVAIL_DATA - amount of data not yet consumed; 3213 - * - BPF_RB_RING_SIZE - the size of ring buffer; 3214 - * - BPF_RB_CONS_POS - consumer position (can wrap around); 3215 - * - BPF_RB_PROD_POS - producer(s) position (can wrap around); 3216 - * Data returned is just a momentary snapshots of actual values 3213 + * 3214 + * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. 3215 + * * **BPF_RB_RING_SIZE**: The size of ring buffer. 3216 + * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). 3217 + * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). 3218 + * 3219 + * Data returned is just a momentary snapshot of actual values 3217 3220 * and could be inaccurate, so this facility should be used to 3218 3221 * power heuristics and for reporting, not to make 100% correct 3219 3222 * calculation. 3220 3223 * Return 3221 - * Requested value, or 0, if flags are not recognized. 3224 + * Requested value, or 0, if *flags* are not recognized. 3222 3225 * 3223 3226 * int bpf_csum_level(struct sk_buff *skb, u64 level) 3224 3227 * Description

+2 -2

kernel/bpf/btf.c

··· 3746 3746 return false; 3747 3747 3748 3748 t = btf_type_skip_modifiers(btf, t->type, NULL); 3749 - if (!btf_type_is_int(t)) { 3749 + if (!btf_type_is_small_int(t)) { 3750 3750 bpf_log(log, 3751 3751 "ret type %s not allowed for fmod_ret\n", 3752 3752 btf_kind_str[BTF_INFO_KIND(t->info)]); ··· 3768 3768 /* skip modifiers */ 3769 3769 while (btf_type_is_modifier(t)) 3770 3770 t = btf_type_by_id(btf, t->type); 3771 - if (btf_type_is_int(t) || btf_type_is_enum(t)) 3771 + if (btf_type_is_small_int(t) || btf_type_is_enum(t)) 3772 3772 /* accessing a scalar */ 3773 3773 return true; 3774 3774 if (!btf_type_is_ptr(t)) {

+134 -60

kernel/bpf/net_namespace.c

··· 19 19 * with netns_bpf_mutex held. 20 20 */ 21 21 struct net *net; 22 + struct list_head node; /* node in list of links attached to net */ 22 23 }; 23 24 24 25 /* Protects updates to netns_bpf */ 25 26 DEFINE_MUTEX(netns_bpf_mutex); 26 27 27 28 /* Must be called with netns_bpf_mutex held. */ 28 - static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link) 29 + static void netns_bpf_run_array_detach(struct net *net, 30 + enum netns_bpf_attach_type type) 29 31 { 30 - struct bpf_netns_link *net_link = 31 - container_of(link, struct bpf_netns_link, link); 32 + struct bpf_prog_array *run_array; 32 33 33 - net_link->net = NULL; 34 + run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL, 35 + lockdep_is_held(&netns_bpf_mutex)); 36 + bpf_prog_array_free(run_array); 34 37 } 35 38 36 39 static void bpf_netns_link_release(struct bpf_link *link) ··· 43 40 enum netns_bpf_attach_type type = net_link->netns_type; 44 41 struct net *net; 45 42 46 - /* Link auto-detached by dying netns. */ 47 - if (!net_link->net) 48 - return; 49 - 50 43 mutex_lock(&netns_bpf_mutex); 51 44 52 - /* Recheck after potential sleep. We can race with cleanup_net 53 - * here, but if we see a non-NULL struct net pointer pre_exit 54 - * has not happened yet and will block on netns_bpf_mutex. 45 + /* We can race with cleanup_net, but if we see a non-NULL 46 + * struct net pointer, pre_exit has not run yet and wait for 47 + * netns_bpf_mutex. 55 48 */ 56 49 net = net_link->net; 57 50 if (!net) 58 51 goto out_unlock; 59 52 60 - net->bpf.links[type] = NULL; 61 - RCU_INIT_POINTER(net->bpf.progs[type], NULL); 53 + netns_bpf_run_array_detach(net, type); 54 + list_del(&net_link->node); 62 55 63 56 out_unlock: 64 57 mutex_unlock(&netns_bpf_mutex); ··· 75 76 struct bpf_netns_link *net_link = 76 77 container_of(link, struct bpf_netns_link, link); 77 78 enum netns_bpf_attach_type type = net_link->netns_type; 79 + struct bpf_prog_array *run_array; 78 80 struct net *net; 79 81 int ret = 0; 80 82 ··· 93 93 goto out_unlock; 94 94 } 95 95 96 + run_array = rcu_dereference_protected(net->bpf.run_array[type], 97 + lockdep_is_held(&netns_bpf_mutex)); 98 + WRITE_ONCE(run_array->items[0].prog, new_prog); 99 + 96 100 old_prog = xchg(&link->prog, new_prog); 97 - rcu_assign_pointer(net->bpf.progs[type], new_prog); 98 101 bpf_prog_put(old_prog); 99 102 100 103 out_unlock: ··· 145 142 .show_fdinfo = bpf_netns_link_show_fdinfo, 146 143 }; 147 144 145 + /* Must be called with netns_bpf_mutex held. */ 146 + static int __netns_bpf_prog_query(const union bpf_attr *attr, 147 + union bpf_attr __user *uattr, 148 + struct net *net, 149 + enum netns_bpf_attach_type type) 150 + { 151 + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 152 + struct bpf_prog_array *run_array; 153 + u32 prog_cnt = 0, flags = 0; 154 + 155 + run_array = rcu_dereference_protected(net->bpf.run_array[type], 156 + lockdep_is_held(&netns_bpf_mutex)); 157 + if (run_array) 158 + prog_cnt = bpf_prog_array_length(run_array); 159 + 160 + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 161 + return -EFAULT; 162 + if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) 163 + return -EFAULT; 164 + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) 165 + return 0; 166 + 167 + return bpf_prog_array_copy_to_user(run_array, prog_ids, 168 + attr->query.prog_cnt); 169 + } 170 + 148 171 int netns_bpf_prog_query(const union bpf_attr *attr, 149 172 union bpf_attr __user *uattr) 150 173 { 151 - __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 152 - u32 prog_id, prog_cnt = 0, flags = 0; 153 174 enum netns_bpf_attach_type type; 154 - struct bpf_prog *attached; 155 175 struct net *net; 176 + int ret; 156 177 157 178 if (attr->query.query_flags) 158 179 return -EINVAL; ··· 189 162 if (IS_ERR(net)) 190 163 return PTR_ERR(net); 191 164 192 - rcu_read_lock(); 193 - attached = rcu_dereference(net->bpf.progs[type]); 194 - if (attached) { 195 - prog_cnt = 1; 196 - prog_id = attached->aux->id; 197 - } 198 - rcu_read_unlock(); 165 + mutex_lock(&netns_bpf_mutex); 166 + ret = __netns_bpf_prog_query(attr, uattr, net, type); 167 + mutex_unlock(&netns_bpf_mutex); 199 168 200 169 put_net(net); 201 - 202 - if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 203 - return -EFAULT; 204 - if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) 205 - return -EFAULT; 206 - 207 - if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) 208 - return 0; 209 - 210 - if (copy_to_user(prog_ids, &prog_id, sizeof(u32))) 211 - return -EFAULT; 212 - 213 - return 0; 170 + return ret; 214 171 } 215 172 216 173 int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) 217 174 { 175 + struct bpf_prog_array *run_array; 218 176 enum netns_bpf_attach_type type; 177 + struct bpf_prog *attached; 219 178 struct net *net; 220 179 int ret; 180 + 181 + if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd) 182 + return -EINVAL; 221 183 222 184 type = to_netns_bpf_attach_type(attr->attach_type); 223 185 if (type < 0) ··· 216 200 mutex_lock(&netns_bpf_mutex); 217 201 218 202 /* Attaching prog directly is not compatible with links */ 219 - if (net->bpf.links[type]) { 203 + if (!list_empty(&net->bpf.links[type])) { 220 204 ret = -EEXIST; 221 205 goto out_unlock; 222 206 } 223 207 224 208 switch (type) { 225 209 case NETNS_BPF_FLOW_DISSECTOR: 226 - ret = flow_dissector_bpf_prog_attach(net, prog); 210 + ret = flow_dissector_bpf_prog_attach_check(net, prog); 227 211 break; 228 212 default: 229 213 ret = -EINVAL; 230 214 break; 231 215 } 216 + if (ret) 217 + goto out_unlock; 218 + 219 + attached = net->bpf.progs[type]; 220 + if (attached == prog) { 221 + /* The same program cannot be attached twice */ 222 + ret = -EINVAL; 223 + goto out_unlock; 224 + } 225 + 226 + run_array = rcu_dereference_protected(net->bpf.run_array[type], 227 + lockdep_is_held(&netns_bpf_mutex)); 228 + if (run_array) { 229 + WRITE_ONCE(run_array->items[0].prog, prog); 230 + } else { 231 + run_array = bpf_prog_array_alloc(1, GFP_KERNEL); 232 + if (!run_array) { 233 + ret = -ENOMEM; 234 + goto out_unlock; 235 + } 236 + run_array->items[0].prog = prog; 237 + rcu_assign_pointer(net->bpf.run_array[type], run_array); 238 + } 239 + 240 + net->bpf.progs[type] = prog; 241 + if (attached) 242 + bpf_prog_put(attached); 243 + 232 244 out_unlock: 233 245 mutex_unlock(&netns_bpf_mutex); 234 246 ··· 265 221 266 222 /* Must be called with netns_bpf_mutex held. */ 267 223 static int __netns_bpf_prog_detach(struct net *net, 268 - enum netns_bpf_attach_type type) 224 + enum netns_bpf_attach_type type, 225 + struct bpf_prog *old) 269 226 { 270 227 struct bpf_prog *attached; 271 228 272 229 /* Progs attached via links cannot be detached */ 273 - if (net->bpf.links[type]) 230 + if (!list_empty(&net->bpf.links[type])) 274 231 return -EINVAL; 275 232 276 - attached = rcu_dereference_protected(net->bpf.progs[type], 277 - lockdep_is_held(&netns_bpf_mutex)); 278 - if (!attached) 233 + attached = net->bpf.progs[type]; 234 + if (!attached || attached != old) 279 235 return -ENOENT; 280 - RCU_INIT_POINTER(net->bpf.progs[type], NULL); 236 + netns_bpf_run_array_detach(net, type); 237 + net->bpf.progs[type] = NULL; 281 238 bpf_prog_put(attached); 282 239 return 0; 283 240 } 284 241 285 - int netns_bpf_prog_detach(const union bpf_attr *attr) 242 + int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) 286 243 { 287 244 enum netns_bpf_attach_type type; 245 + struct bpf_prog *prog; 288 246 int ret; 247 + 248 + if (attr->target_fd) 249 + return -EINVAL; 289 250 290 251 type = to_netns_bpf_attach_type(attr->attach_type); 291 252 if (type < 0) 292 253 return -EINVAL; 293 254 255 + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 256 + if (IS_ERR(prog)) 257 + return PTR_ERR(prog); 258 + 294 259 mutex_lock(&netns_bpf_mutex); 295 - ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type); 260 + ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog); 296 261 mutex_unlock(&netns_bpf_mutex); 262 + 263 + bpf_prog_put(prog); 297 264 298 265 return ret; 299 266 } ··· 312 257 static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, 313 258 enum netns_bpf_attach_type type) 314 259 { 315 - struct bpf_prog *prog; 260 + struct bpf_netns_link *net_link = 261 + container_of(link, struct bpf_netns_link, link); 262 + struct bpf_prog_array *run_array; 316 263 int err; 317 264 318 265 mutex_lock(&netns_bpf_mutex); 319 266 320 267 /* Allow attaching only one prog or link for now */ 321 - if (net->bpf.links[type]) { 268 + if (!list_empty(&net->bpf.links[type])) { 322 269 err = -E2BIG; 323 270 goto out_unlock; 324 271 } 325 272 /* Links are not compatible with attaching prog directly */ 326 - prog = rcu_dereference_protected(net->bpf.progs[type], 327 - lockdep_is_held(&netns_bpf_mutex)); 328 - if (prog) { 273 + if (net->bpf.progs[type]) { 329 274 err = -EEXIST; 330 275 goto out_unlock; 331 276 } 332 277 333 278 switch (type) { 334 279 case NETNS_BPF_FLOW_DISSECTOR: 335 - err = flow_dissector_bpf_prog_attach(net, link->prog); 280 + err = flow_dissector_bpf_prog_attach_check(net, link->prog); 336 281 break; 337 282 default: 338 283 err = -EINVAL; ··· 341 286 if (err) 342 287 goto out_unlock; 343 288 344 - net->bpf.links[type] = link; 289 + run_array = bpf_prog_array_alloc(1, GFP_KERNEL); 290 + if (!run_array) { 291 + err = -ENOMEM; 292 + goto out_unlock; 293 + } 294 + run_array->items[0].prog = link->prog; 295 + rcu_assign_pointer(net->bpf.run_array[type], run_array); 296 + 297 + list_add_tail(&net_link->node, &net->bpf.links[type]); 345 298 346 299 out_unlock: 347 300 mutex_unlock(&netns_bpf_mutex); ··· 408 345 return err; 409 346 } 410 347 348 + static int __net_init netns_bpf_pernet_init(struct net *net) 349 + { 350 + int type; 351 + 352 + for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) 353 + INIT_LIST_HEAD(&net->bpf.links[type]); 354 + 355 + return 0; 356 + } 357 + 411 358 static void __net_exit netns_bpf_pernet_pre_exit(struct net *net) 412 359 { 413 360 enum netns_bpf_attach_type type; 414 - struct bpf_link *link; 361 + struct bpf_netns_link *net_link; 415 362 416 363 mutex_lock(&netns_bpf_mutex); 417 364 for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) { 418 - link = net->bpf.links[type]; 419 - if (link) 420 - bpf_netns_link_auto_detach(link); 421 - else 422 - __netns_bpf_prog_detach(net, type); 365 + netns_bpf_run_array_detach(net, type); 366 + list_for_each_entry(net_link, &net->bpf.links[type], node) 367 + net_link->net = NULL; /* auto-detach link */ 368 + if (net->bpf.progs[type]) 369 + bpf_prog_put(net->bpf.progs[type]); 423 370 } 424 371 mutex_unlock(&netns_bpf_mutex); 425 372 } 426 373 427 374 static struct pernet_operations netns_bpf_pernet_ops __net_initdata = { 375 + .init = netns_bpf_pernet_init, 428 376 .pre_exit = netns_bpf_pernet_pre_exit, 429 377 }; 430 378

+8 -10

kernel/bpf/ringbuf.c

··· 132 132 { 133 133 struct bpf_ringbuf *rb; 134 134 135 - if (!data_sz || !PAGE_ALIGNED(data_sz)) 136 - return ERR_PTR(-EINVAL); 137 - 138 - #ifdef CONFIG_64BIT 139 - /* on 32-bit arch, it's impossible to overflow record's hdr->pgoff */ 140 - if (data_sz > RINGBUF_MAX_DATA_SZ) 141 - return ERR_PTR(-E2BIG); 142 - #endif 143 - 144 135 rb = bpf_ringbuf_area_alloc(data_sz, numa_node); 145 136 if (!rb) 146 137 return ERR_PTR(-ENOMEM); ··· 157 166 return ERR_PTR(-EINVAL); 158 167 159 168 if (attr->key_size || attr->value_size || 160 - attr->max_entries == 0 || !PAGE_ALIGNED(attr->max_entries)) 169 + !is_power_of_2(attr->max_entries) || 170 + !PAGE_ALIGNED(attr->max_entries)) 161 171 return ERR_PTR(-EINVAL); 172 + 173 + #ifdef CONFIG_64BIT 174 + /* on 32-bit arch, it's impossible to overflow record's hdr->pgoff */ 175 + if (attr->max_entries > RINGBUF_MAX_DATA_SZ) 176 + return ERR_PTR(-E2BIG); 177 + #endif 162 178 163 179 rb_map = kzalloc(sizeof(*rb_map), GFP_USER); 164 180 if (!rb_map)

+3 -5

kernel/bpf/syscall.c

··· 2121 2121 !bpf_capable()) 2122 2122 return -EPERM; 2123 2123 2124 - if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN)) 2124 + if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) 2125 2125 return -EPERM; 2126 2126 if (is_perfmon_prog_type(type) && !perfmon_capable()) 2127 2127 return -EPERM; ··· 2893 2893 switch (ptype) { 2894 2894 case BPF_PROG_TYPE_SK_MSG: 2895 2895 case BPF_PROG_TYPE_SK_SKB: 2896 - return sock_map_get_from_fd(attr, NULL); 2896 + return sock_map_prog_detach(attr, ptype); 2897 2897 case BPF_PROG_TYPE_LIRC_MODE2: 2898 2898 return lirc_prog_detach(attr); 2899 2899 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2900 - if (!capable(CAP_NET_ADMIN)) 2901 - return -EPERM; 2902 - return netns_bpf_prog_detach(attr); 2900 + return netns_bpf_prog_detach(attr, ptype); 2903 2901 case BPF_PROG_TYPE_CGROUP_DEVICE: 2904 2902 case BPF_PROG_TYPE_CGROUP_SKB: 2905 2903 case BPF_PROG_TYPE_CGROUP_SOCK:

+10 -3

kernel/bpf/verifier.c

··· 399 399 return type == PTR_TO_SOCKET || 400 400 type == PTR_TO_TCP_SOCK || 401 401 type == PTR_TO_MAP_VALUE || 402 - type == PTR_TO_SOCK_COMMON || 403 - type == PTR_TO_BTF_ID; 402 + type == PTR_TO_SOCK_COMMON; 404 403 } 405 404 406 405 static bool reg_type_may_be_null(enum bpf_reg_type type) ··· 9800 9801 int i, j, subprog_start, subprog_end = 0, len, subprog; 9801 9802 struct bpf_insn *insn; 9802 9803 void *old_bpf_func; 9803 - int err; 9804 + int err, num_exentries; 9804 9805 9805 9806 if (env->subprog_cnt <= 1) 9806 9807 return 0; ··· 9875 9876 func[i]->aux->nr_linfo = prog->aux->nr_linfo; 9876 9877 func[i]->aux->jited_linfo = prog->aux->jited_linfo; 9877 9878 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; 9879 + num_exentries = 0; 9880 + insn = func[i]->insnsi; 9881 + for (j = 0; j < func[i]->len; j++, insn++) { 9882 + if (BPF_CLASS(insn->code) == BPF_LDX && 9883 + BPF_MODE(insn->code) == BPF_PROBE_MEM) 9884 + num_exentries++; 9885 + } 9886 + func[i]->aux->num_exentries = num_exentries; 9878 9887 func[i] = bpf_int_jit_compile(func[i]); 9879 9888 if (!func[i]->jited) { 9880 9889 err = -ENOTSUPP;

+6

kernel/dma/direct.c

··· 530 530 return swiotlb_max_mapping_size(dev); 531 531 return SIZE_MAX; 532 532 } 533 + 534 + bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) 535 + { 536 + return !dev_is_dma_coherent(dev) || 537 + is_swiotlb_buffer(dma_to_phys(dev, dma_addr)); 538 + }

+10

kernel/dma/mapping.c

··· 397 397 } 398 398 EXPORT_SYMBOL_GPL(dma_max_mapping_size); 399 399 400 + bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) 401 + { 402 + const struct dma_map_ops *ops = get_dma_ops(dev); 403 + 404 + if (dma_is_direct(ops)) 405 + return dma_direct_need_sync(dev, dma_addr); 406 + return ops->sync_single_for_cpu || ops->sync_single_for_device; 407 + } 408 + EXPORT_SYMBOL_GPL(dma_need_sync); 409 + 400 410 unsigned long dma_get_merge_boundary(struct device *dev) 401 411 { 402 412 const struct dma_map_ops *ops = get_dma_ops(dev);

+18 -1

net/bpf/test_run.c

··· 147 147 return a + (long)b + c + d + (long)e + f; 148 148 } 149 149 150 + struct bpf_fentry_test_t { 151 + struct bpf_fentry_test_t *a; 152 + }; 153 + 154 + int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg) 155 + { 156 + return (long)arg; 157 + } 158 + 159 + int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg) 160 + { 161 + return (long)arg->a; 162 + } 163 + 150 164 int noinline bpf_modify_return_test(int a, int *b) 151 165 { 152 166 *b += 1; ··· 199 185 const union bpf_attr *kattr, 200 186 union bpf_attr __user *uattr) 201 187 { 188 + struct bpf_fentry_test_t arg = {}; 202 189 u16 side_effect = 0, ret = 0; 203 190 int b = 2, err = -EFAULT; 204 191 u32 retval = 0; ··· 212 197 bpf_fentry_test3(4, 5, 6) != 15 || 213 198 bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || 214 199 bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || 215 - bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) 200 + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 || 201 + bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 || 202 + bpf_fentry_test8(&arg) != 0) 216 203 goto out; 217 204 break; 218 205 case BPF_MODIFY_RETURN:

+12 -20

net/core/flow_dissector.c

··· 70 70 EXPORT_SYMBOL(skb_flow_dissector_init); 71 71 72 72 #ifdef CONFIG_BPF_SYSCALL 73 - int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) 73 + int flow_dissector_bpf_prog_attach_check(struct net *net, 74 + struct bpf_prog *prog) 74 75 { 75 76 enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; 76 - struct bpf_prog *attached; 77 77 78 78 if (net == &init_net) { 79 79 /* BPF flow dissector in the root namespace overrides ··· 86 86 for_each_net(ns) { 87 87 if (ns == &init_net) 88 88 continue; 89 - if (rcu_access_pointer(ns->bpf.progs[type])) 89 + if (rcu_access_pointer(ns->bpf.run_array[type])) 90 90 return -EEXIST; 91 91 } 92 92 } else { 93 93 /* Make sure root flow dissector is not attached 94 94 * when attaching to the non-root namespace. 95 95 */ 96 - if (rcu_access_pointer(init_net.bpf.progs[type])) 96 + if (rcu_access_pointer(init_net.bpf.run_array[type])) 97 97 return -EEXIST; 98 98 } 99 99 100 - attached = rcu_dereference_protected(net->bpf.progs[type], 101 - lockdep_is_held(&netns_bpf_mutex)); 102 - if (attached == prog) 103 - /* The same program cannot be attached twice */ 104 - return -EINVAL; 105 - 106 - rcu_assign_pointer(net->bpf.progs[type], prog); 107 - if (attached) 108 - bpf_prog_put(attached); 109 100 return 0; 110 101 } 111 102 #endif /* CONFIG_BPF_SYSCALL */ ··· 894 903 struct flow_dissector_key_addrs *key_addrs; 895 904 struct flow_dissector_key_tags *key_tags; 896 905 struct flow_dissector_key_vlan *key_vlan; 897 - struct bpf_prog *attached = NULL; 898 906 enum flow_dissect_ret fdret; 899 907 enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; 900 908 bool mpls_el = false; ··· 950 960 WARN_ON_ONCE(!net); 951 961 if (net) { 952 962 enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; 963 + struct bpf_prog_array *run_array; 953 964 954 965 rcu_read_lock(); 955 - attached = rcu_dereference(init_net.bpf.progs[type]); 966 + run_array = rcu_dereference(init_net.bpf.run_array[type]); 967 + if (!run_array) 968 + run_array = rcu_dereference(net->bpf.run_array[type]); 956 969 957 - if (!attached) 958 - attached = rcu_dereference(net->bpf.progs[type]); 959 - 960 - if (attached) { 970 + if (run_array) { 961 971 struct bpf_flow_keys flow_keys; 962 972 struct bpf_flow_dissector ctx = { 963 973 .flow_keys = &flow_keys, ··· 965 975 .data_end = data + hlen, 966 976 }; 967 977 __be16 n_proto = proto; 978 + struct bpf_prog *prog; 968 979 969 980 if (skb) { 970 981 ctx.skb = skb; ··· 976 985 n_proto = skb->protocol; 977 986 } 978 987 979 - ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, 988 + prog = READ_ONCE(run_array->items[0].prog); 989 + ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, 980 990 hlen, flags); 981 991 __skb_flow_bpf_to_target(&flow_keys, flow_dissector, 982 992 target_container);

+15 -8

net/core/skmsg.c

··· 683 683 return container_of(parser, struct sk_psock, parser); 684 684 } 685 685 686 - static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) 686 + static void sk_psock_skb_redirect(struct sk_buff *skb) 687 687 { 688 688 struct sk_psock *psock_other; 689 689 struct sock *sk_other; ··· 715 715 } 716 716 } 717 717 718 - static void sk_psock_tls_verdict_apply(struct sk_psock *psock, 719 - struct sk_buff *skb, int verdict) 718 + static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict) 720 719 { 721 720 switch (verdict) { 722 721 case __SK_REDIRECT: 723 - sk_psock_skb_redirect(psock, skb); 722 + sk_psock_skb_redirect(skb); 724 723 break; 725 724 case __SK_PASS: 726 725 case __SK_DROP: ··· 740 741 ret = sk_psock_bpf_run(psock, prog, skb); 741 742 ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); 742 743 } 744 + sk_psock_tls_verdict_apply(skb, ret); 743 745 rcu_read_unlock(); 744 - sk_psock_tls_verdict_apply(psock, skb, ret); 745 746 return ret; 746 747 } 747 748 EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); ··· 769 770 } 770 771 goto out_free; 771 772 case __SK_REDIRECT: 772 - sk_psock_skb_redirect(psock, skb); 773 + sk_psock_skb_redirect(skb); 773 774 break; 774 775 case __SK_DROP: 775 776 /* fall-through */ ··· 781 782 782 783 static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) 783 784 { 784 - struct sk_psock *psock = sk_psock_from_strp(strp); 785 + struct sk_psock *psock; 785 786 struct bpf_prog *prog; 786 787 int ret = __SK_DROP; 788 + struct sock *sk; 787 789 788 790 rcu_read_lock(); 791 + sk = strp->sk; 792 + psock = sk_psock(sk); 793 + if (unlikely(!psock)) { 794 + kfree_skb(skb); 795 + goto out; 796 + } 789 797 prog = READ_ONCE(psock->progs.skb_verdict); 790 798 if (likely(prog)) { 791 799 skb_orphan(skb); ··· 800 794 ret = sk_psock_bpf_run(psock, prog, skb); 801 795 ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); 802 796 } 803 - rcu_read_unlock(); 804 797 sk_psock_verdict_apply(psock, skb, ret); 798 + out: 799 + rcu_read_unlock(); 805 800 } 806 801 807 802 static int sk_psock_strp_read_done(struct strparser *strp, int err)

+48 -5

net/core/sock_map.c

··· 70 70 struct fd f; 71 71 int ret; 72 72 73 + if (attr->attach_flags || attr->replace_bpf_fd) 74 + return -EINVAL; 75 + 73 76 f = fdget(ufd); 74 77 map = __bpf_map_get(f); 75 78 if (IS_ERR(map)) 76 79 return PTR_ERR(map); 77 - ret = sock_map_prog_update(map, prog, attr->attach_type); 80 + ret = sock_map_prog_update(map, prog, NULL, attr->attach_type); 81 + fdput(f); 82 + return ret; 83 + } 84 + 85 + int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) 86 + { 87 + u32 ufd = attr->target_fd; 88 + struct bpf_prog *prog; 89 + struct bpf_map *map; 90 + struct fd f; 91 + int ret; 92 + 93 + if (attr->attach_flags || attr->replace_bpf_fd) 94 + return -EINVAL; 95 + 96 + f = fdget(ufd); 97 + map = __bpf_map_get(f); 98 + if (IS_ERR(map)) 99 + return PTR_ERR(map); 100 + 101 + prog = bpf_prog_get(attr->attach_bpf_fd); 102 + if (IS_ERR(prog)) { 103 + ret = PTR_ERR(prog); 104 + goto put_map; 105 + } 106 + 107 + if (prog->type != ptype) { 108 + ret = -EINVAL; 109 + goto put_prog; 110 + } 111 + 112 + ret = sock_map_prog_update(map, NULL, prog, attr->attach_type); 113 + put_prog: 114 + bpf_prog_put(prog); 115 + put_map: 78 116 fdput(f); 79 117 return ret; 80 118 } ··· 1241 1203 } 1242 1204 1243 1205 int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, 1244 - u32 which) 1206 + struct bpf_prog *old, u32 which) 1245 1207 { 1246 1208 struct sk_psock_progs *progs = sock_map_progs(map); 1209 + struct bpf_prog **pprog; 1247 1210 1248 1211 if (!progs) 1249 1212 return -EOPNOTSUPP; 1250 1213 1251 1214 switch (which) { 1252 1215 case BPF_SK_MSG_VERDICT: 1253 - psock_set_prog(&progs->msg_parser, prog); 1216 + pprog = &progs->msg_parser; 1254 1217 break; 1255 1218 case BPF_SK_SKB_STREAM_PARSER: 1256 - psock_set_prog(&progs->skb_parser, prog); 1219 + pprog = &progs->skb_parser; 1257 1220 break; 1258 1221 case BPF_SK_SKB_STREAM_VERDICT: 1259 - psock_set_prog(&progs->skb_verdict, prog); 1222 + pprog = &progs->skb_verdict; 1260 1223 break; 1261 1224 default: 1262 1225 return -EOPNOTSUPP; 1263 1226 } 1264 1227 1228 + if (old) 1229 + return psock_replace_prog(pprog, prog, old); 1230 + 1231 + psock_set_prog(pprog, prog); 1265 1232 return 0; 1266 1233 } 1267 1234

+4 -50

net/xdp/xsk_buff_pool.c

··· 2 2 3 3 #include <net/xsk_buff_pool.h> 4 4 #include <net/xdp_sock.h> 5 - #include <linux/dma-direct.h> 6 - #include <linux/dma-noncoherent.h> 7 - #include <linux/swiotlb.h> 8 5 9 6 #include "xsk_queue.h" 10 7 ··· 52 55 pool->free_heads_cnt = chunks; 53 56 pool->headroom = headroom; 54 57 pool->chunk_size = chunk_size; 55 - pool->cheap_dma = true; 56 58 pool->unaligned = unaligned; 57 59 pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM; 58 60 INIT_LIST_HEAD(&pool->free_list); ··· 121 125 } 122 126 } 123 127 124 - static bool __maybe_unused xp_check_swiotlb_dma(struct xsk_buff_pool *pool) 125 - { 126 - #if defined(CONFIG_SWIOTLB) 127 - phys_addr_t paddr; 128 - u32 i; 129 - 130 - for (i = 0; i < pool->dma_pages_cnt; i++) { 131 - paddr = dma_to_phys(pool->dev, pool->dma_pages[i]); 132 - if (is_swiotlb_buffer(paddr)) 133 - return false; 134 - } 135 - #endif 136 - return true; 137 - } 138 - 139 - static bool xp_check_cheap_dma(struct xsk_buff_pool *pool) 140 - { 141 - #if defined(CONFIG_HAS_DMA) 142 - const struct dma_map_ops *ops = get_dma_ops(pool->dev); 143 - 144 - if (ops) { 145 - return !ops->sync_single_for_cpu && 146 - !ops->sync_single_for_device; 147 - } 148 - 149 - if (!dma_is_direct(ops)) 150 - return false; 151 - 152 - if (!xp_check_swiotlb_dma(pool)) 153 - return false; 154 - 155 - if (!dev_is_dma_coherent(pool->dev)) { 156 - #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ 157 - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ 158 - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) 159 - return false; 160 - #endif 161 - } 162 - #endif 163 - return true; 164 - } 165 - 166 128 int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, 167 129 unsigned long attrs, struct page **pages, u32 nr_pages) 168 130 { ··· 134 180 135 181 pool->dev = dev; 136 182 pool->dma_pages_cnt = nr_pages; 183 + pool->dma_need_sync = false; 137 184 138 185 for (i = 0; i < pool->dma_pages_cnt; i++) { 139 186 dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, ··· 143 188 xp_dma_unmap(pool, attrs); 144 189 return -ENOMEM; 145 190 } 191 + if (dma_need_sync(dev, dma)) 192 + pool->dma_need_sync = true; 146 193 pool->dma_pages[i] = dma; 147 194 } 148 195 149 196 if (pool->unaligned) 150 197 xp_check_dma_contiguity(pool); 151 - 152 - pool->dev = dev; 153 - pool->cheap_dma = xp_check_cheap_dma(pool); 154 198 return 0; 155 199 } 156 200 EXPORT_SYMBOL(xp_dma_map); ··· 234 280 xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; 235 281 xskb->xdp.data_meta = xskb->xdp.data; 236 282 237 - if (!pool->cheap_dma) { 283 + if (pool->dma_need_sync) { 238 284 dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, 239 285 pool->frame_len, 240 286 DMA_BIDIRECTIONAL);

+21 -20

tools/include/uapi/linux/bpf.h

··· 3171 3171 * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) 3172 3172 * Description 3173 3173 * Copy *size* bytes from *data* into a ring buffer *ringbuf*. 3174 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3175 - * new data availability is sent. 3176 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3177 - * new data availability is sent unconditionally. 3174 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3175 + * of new data availability is sent. 3176 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3177 + * of new data availability is sent unconditionally. 3178 3178 * Return 3179 - * 0, on success; 3180 - * < 0, on error. 3179 + * 0 on success, or a negative error in case of failure. 3181 3180 * 3182 3181 * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) 3183 3182 * Description ··· 3188 3189 * void bpf_ringbuf_submit(void *data, u64 flags) 3189 3190 * Description 3190 3191 * Submit reserved ring buffer sample, pointed to by *data*. 3191 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3192 - * new data availability is sent. 3193 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3194 - * new data availability is sent unconditionally. 3192 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3193 + * of new data availability is sent. 3194 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3195 + * of new data availability is sent unconditionally. 3195 3196 * Return 3196 3197 * Nothing. Always succeeds. 3197 3198 * 3198 3199 * void bpf_ringbuf_discard(void *data, u64 flags) 3199 3200 * Description 3200 3201 * Discard reserved ring buffer sample, pointed to by *data*. 3201 - * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of 3202 - * new data availability is sent. 3203 - * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of 3204 - * new data availability is sent unconditionally. 3202 + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification 3203 + * of new data availability is sent. 3204 + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification 3205 + * of new data availability is sent unconditionally. 3205 3206 * Return 3206 3207 * Nothing. Always succeeds. 3207 3208 * ··· 3209 3210 * Description 3210 3211 * Query various characteristics of provided ring buffer. What 3211 3212 * exactly is queries is determined by *flags*: 3212 - * - BPF_RB_AVAIL_DATA - amount of data not yet consumed; 3213 - * - BPF_RB_RING_SIZE - the size of ring buffer; 3214 - * - BPF_RB_CONS_POS - consumer position (can wrap around); 3215 - * - BPF_RB_PROD_POS - producer(s) position (can wrap around); 3216 - * Data returned is just a momentary snapshots of actual values 3213 + * 3214 + * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. 3215 + * * **BPF_RB_RING_SIZE**: The size of ring buffer. 3216 + * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). 3217 + * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). 3218 + * 3219 + * Data returned is just a momentary snapshot of actual values 3217 3220 * and could be inaccurate, so this facility should be used to 3218 3221 * power heuristics and for reporting, not to make 100% correct 3219 3222 * calculation. 3220 3223 * Return 3221 - * Requested value, or 0, if flags are not recognized. 3224 + * Requested value, or 0, if *flags* are not recognized. 3222 3225 * 3223 3226 * int bpf_csum_level(struct sk_buff *skb, u64 level) 3224 3227 * Description

+2

tools/lib/bpf/bpf.h

··· 233 233 LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, 234 234 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, 235 235 __u64 *probe_offset, __u64 *probe_addr); 236 + 237 + enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */ 236 238 LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); 237 239 238 240 #ifdef __cplusplus

+8 -2

tools/lib/bpf/libbpf.c

··· 4818 4818 err = -EINVAL; 4819 4819 goto out; 4820 4820 } 4821 - prog = bpf_object__find_program_by_title(obj, sec_name); 4821 + prog = NULL; 4822 + for (i = 0; i < obj->nr_programs; i++) { 4823 + if (!strcmp(obj->programs[i].section_name, sec_name)) { 4824 + prog = &obj->programs[i]; 4825 + break; 4826 + } 4827 + } 4822 4828 if (!prog) { 4823 4829 pr_warn("failed to find program '%s' for CO-RE offset relocation\n", 4824 4830 sec_name); ··· 6659 6653 .expected_attach_type = BPF_TRACE_ITER, 6660 6654 .is_attach_btf = true, 6661 6655 .attach_fn = attach_iter), 6662 - BPF_EAPROG_SEC("xdp_devmap", BPF_PROG_TYPE_XDP, 6656 + BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, 6663 6657 BPF_XDP_DEVMAP), 6664 6658 BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), 6665 6659 BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),

+1 -1

tools/testing/selftests/bpf/prog_tests/fentry_fexit.c

··· 36 36 fentry_res = (__u64 *)fentry_skel->bss; 37 37 fexit_res = (__u64 *)fexit_skel->bss; 38 38 printf("%lld\n", fentry_skel->bss->test1_result); 39 - for (i = 0; i < 6; i++) { 39 + for (i = 0; i < 8; i++) { 40 40 CHECK(fentry_res[i] != 1, "result", 41 41 "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]); 42 42 CHECK(fexit_res[i] != 1, "result",

+2 -2

tools/testing/selftests/bpf/prog_tests/flow_dissector.c

··· 527 527 528 528 run_tests_skb_less(tap_fd, skel->maps.last_dissection); 529 529 530 - err = bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR); 531 - CHECK(err, "bpf_prog_detach", "err %d errno %d\n", err, errno); 530 + err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR); 531 + CHECK(err, "bpf_prog_detach2", "err %d errno %d\n", err, errno); 532 532 } 533 533 534 534 static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)

+34 -10

tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* 3 - * Test that the flow_dissector program can be updated with a single 4 - * syscall by attaching a new program that replaces the existing one. 5 - * 6 - * Corner case - the same program cannot be attached twice. 3 + * Tests for attaching, detaching, and replacing flow_dissector BPF program. 7 4 */ 8 5 9 6 #define _GNU_SOURCE ··· 113 116 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2)); 114 117 115 118 out_detach: 116 - err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); 119 + err = bpf_prog_detach2(prog2, 0, BPF_FLOW_DISSECTOR); 117 120 if (CHECK_FAIL(err)) 118 121 perror("bpf_prog_detach"); 119 122 CHECK_FAIL(prog_is_attached(netns)); ··· 149 152 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); 150 153 int err, link; 151 154 152 - err = bpf_prog_attach(prog1, -1, BPF_FLOW_DISSECTOR, 0); 155 + err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0); 153 156 if (CHECK_FAIL(err)) { 154 157 perror("bpf_prog_attach(prog1)"); 155 158 return; ··· 165 168 close(link); 166 169 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); 167 170 168 - err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR); 171 + err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR); 169 172 if (CHECK_FAIL(err)) 170 173 perror("bpf_prog_detach"); 171 174 CHECK_FAIL(prog_is_attached(netns)); ··· 185 188 186 189 /* Expect failure attaching prog when link exists */ 187 190 errno = 0; 188 - err = bpf_prog_attach(prog2, -1, BPF_FLOW_DISSECTOR, 0); 191 + err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0); 189 192 if (CHECK_FAIL(!err || errno != EEXIST)) 190 193 perror("bpf_prog_attach(prog2) expected EEXIST"); 191 194 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); ··· 208 211 209 212 /* Expect failure detaching prog when link exists */ 210 213 errno = 0; 211 - err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR); 214 + err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR); 212 215 if (CHECK_FAIL(!err || errno != EINVAL)) 213 216 perror("bpf_prog_detach expected EINVAL"); 214 217 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); ··· 228 231 } 229 232 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); 230 233 231 - err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); 234 + err = bpf_prog_detach2(prog1, 0, BPF_FLOW_DISSECTOR); 232 235 if (CHECK_FAIL(err)) { 233 236 perror("bpf_prog_detach"); 234 237 return; ··· 300 303 if (CHECK_FAIL(err)) 301 304 perror("bpf_link_update"); 302 305 CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2)); 306 + 307 + close(link); 308 + CHECK_FAIL(prog_is_attached(netns)); 309 + } 310 + 311 + static void test_link_update_same_prog(int netns, int prog1, int prog2) 312 + { 313 + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts); 314 + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); 315 + int err, link; 316 + 317 + link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts); 318 + if (CHECK_FAIL(link < 0)) { 319 + perror("bpf_link_create(prog1)"); 320 + return; 321 + } 322 + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); 323 + 324 + /* Expect success updating the prog with the same one */ 325 + update_opts.flags = 0; 326 + update_opts.old_prog_fd = 0; 327 + err = bpf_link_update(link, prog1, &update_opts); 328 + if (CHECK_FAIL(err)) 329 + perror("bpf_link_update"); 330 + CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); 303 331 304 332 close(link); 305 333 CHECK_FAIL(prog_is_attached(netns)); ··· 593 571 test_link_update_no_old_prog }, 594 572 { "link update with replace old prog", 595 573 test_link_update_replace_old_prog }, 574 + { "link update with same prog", 575 + test_link_update_same_prog }, 596 576 { "link update invalid opts", 597 577 test_link_update_invalid_opts }, 598 578 { "link update invalid prog",

+1 -1

tools/testing/selftests/bpf/progs/bpf_iter_netlink.c

··· 25 25 struct netlink_sock *sk; 26 26 } __attribute__((preserve_access_index)); 27 27 28 - static inline struct inode *SOCK_INODE(struct socket *socket) 28 + static __attribute__((noinline)) struct inode *SOCK_INODE(struct socket *socket) 29 29 { 30 30 return &container_of(socket, struct socket_alloc, socket)->vfs_inode; 31 31 }

+22

tools/testing/selftests/bpf/progs/fentry_test.c

··· 55 55 e == (void *)20 && f == 21; 56 56 return 0; 57 57 } 58 + 59 + struct bpf_fentry_test_t { 60 + struct bpf_fentry_test_t *a; 61 + }; 62 + 63 + __u64 test7_result = 0; 64 + SEC("fentry/bpf_fentry_test7") 65 + int BPF_PROG(test7, struct bpf_fentry_test_t *arg) 66 + { 67 + if (arg == 0) 68 + test7_result = 1; 69 + return 0; 70 + } 71 + 72 + __u64 test8_result = 0; 73 + SEC("fentry/bpf_fentry_test8") 74 + int BPF_PROG(test8, struct bpf_fentry_test_t *arg) 75 + { 76 + if (arg->a == 0) 77 + test8_result = 1; 78 + return 0; 79 + }

+22

tools/testing/selftests/bpf/progs/fexit_test.c

··· 56 56 e == (void *)20 && f == 21 && ret == 111; 57 57 return 0; 58 58 } 59 + 60 + struct bpf_fentry_test_t { 61 + struct bpf_fentry_test *a; 62 + }; 63 + 64 + __u64 test7_result = 0; 65 + SEC("fexit/bpf_fentry_test7") 66 + int BPF_PROG(test7, struct bpf_fentry_test_t *arg) 67 + { 68 + if (arg == 0) 69 + test7_result = 1; 70 + return 0; 71 + } 72 + 73 + __u64 test8_result = 0; 74 + SEC("fexit/bpf_fentry_test8") 75 + int BPF_PROG(test8, struct bpf_fentry_test_t *arg) 76 + { 77 + if (arg->a == 0) 78 + test8_result = 1; 79 + return 0; 80 + }

+7 -1

tools/testing/selftests/bpf/progs/test_sockmap_kern.h

··· 79 79 80 80 struct { 81 81 __uint(type, BPF_MAP_TYPE_ARRAY); 82 - __uint(max_entries, 2); 82 + __uint(max_entries, 3); 83 83 __type(key, int); 84 84 __type(value, int); 85 85 } sock_skb_opts SEC(".maps"); ··· 94 94 SEC("sk_skb1") 95 95 int bpf_prog1(struct __sk_buff *skb) 96 96 { 97 + int *f, two = 2; 98 + 99 + f = bpf_map_lookup_elem(&sock_skb_opts, &two); 100 + if (f && *f) { 101 + return *f; 102 + } 97 103 return skb->len; 98 104 } 99 105

+1 -1

tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c

··· 27 27 /* valid program on DEVMAP entry via SEC name; 28 28 * has access to egress and ingress ifindex 29 29 */ 30 - SEC("xdp_devmap") 30 + SEC("xdp_devmap/map_prog") 31 31 int xdp_dummy_dm(struct xdp_md *ctx) 32 32 { 33 33 char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";

+18

tools/testing/selftests/bpf/test_sockmap.c

··· 85 85 int txmsg_ktls_skb_redir; 86 86 int ktls; 87 87 int peek_flag; 88 + int skb_use_parser; 88 89 89 90 static const struct option long_options[] = { 90 91 {"help", no_argument, NULL, 'h' }, ··· 175 174 txmsg_apply = txmsg_cork = 0; 176 175 txmsg_ingress = txmsg_redir_skb = 0; 177 176 txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0; 177 + skb_use_parser = 0; 178 178 } 179 179 180 180 static int test_start_subtest(const struct _test *t, struct sockmap_options *o) ··· 1213 1211 } 1214 1212 } 1215 1213 1214 + if (skb_use_parser) { 1215 + i = 2; 1216 + err = bpf_map_update_elem(map_fd[7], &i, &skb_use_parser, BPF_ANY); 1217 + } 1218 + 1216 1219 if (txmsg_drop) 1217 1220 options->drop_expected = true; 1218 1221 ··· 1657 1650 test_send(opt, cgrp); 1658 1651 } 1659 1652 1653 + static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt) 1654 + { 1655 + txmsg_pass = 1; 1656 + skb_use_parser = 512; 1657 + opt->iov_length = 256; 1658 + opt->iov_count = 1; 1659 + opt->rate = 2; 1660 + test_exec(cgrp, opt); 1661 + } 1662 + 1660 1663 char *map_names[] = { 1661 1664 "sock_map", 1662 1665 "sock_map_txmsg", ··· 1765 1748 {"txmsg test pull-data", test_txmsg_pull}, 1766 1749 {"txmsg test pop-data", test_txmsg_pop}, 1767 1750 {"txmsg test push/pop data", test_txmsg_push_pop}, 1751 + {"txmsg text ingress parser", test_txmsg_ingress_parser}, 1768 1752 }; 1769 1753 1770 1754 static int check_whitelist(struct _test *t, struct sockmap_options *opt)