Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+8

Documentation/ABI/testing/sysfs-kernel-btf

··· 15 15 information with description of all internal kernel types. See 16 16 Documentation/bpf/btf.rst for detailed description of format 17 17 itself. 18 + 19 + What: /sys/kernel/btf/<module-name> 20 + Date: Nov 2020 21 + KernelVersion: 5.11 22 + Contact: bpf@vger.kernel.org 23 + Description: 24 + Read-only binary attribute exposing kernel module's BTF type 25 + information as an add-on to the kernel's BTF (/sys/kernel/btf/vmlinux).

+9 -1

drivers/net/ethernet/marvell/mvneta.c

··· 1834 1834 struct netdev_queue *nq, bool napi) 1835 1835 { 1836 1836 unsigned int bytes_compl = 0, pkts_compl = 0; 1837 + struct xdp_frame_bulk bq; 1837 1838 int i; 1839 + 1840 + xdp_frame_bulk_init(&bq); 1841 + 1842 + rcu_read_lock(); /* need for xdp_return_frame_bulk */ 1838 1843 1839 1844 for (i = 0; i < num; i++) { 1840 1845 struct mvneta_tx_buf *buf = &txq->buf[txq->txq_get_index]; ··· 1862 1857 if (napi && buf->type == MVNETA_TYPE_XDP_TX) 1863 1858 xdp_return_frame_rx_napi(buf->xdpf); 1864 1859 else 1865 - xdp_return_frame(buf->xdpf); 1860 + xdp_return_frame_bulk(buf->xdpf, &bq); 1866 1861 } 1867 1862 } 1863 + xdp_flush_frame_bulk(&bq); 1864 + 1865 + rcu_read_unlock(); 1868 1866 1869 1867 netdev_tx_completed_queue(nq, pkts_compl, bytes_compl); 1870 1868 }

+9 -1

drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c

··· 2440 2440 struct mvpp2_tx_queue *txq, 2441 2441 struct mvpp2_txq_pcpu *txq_pcpu, int num) 2442 2442 { 2443 + struct xdp_frame_bulk bq; 2443 2444 int i; 2445 + 2446 + xdp_frame_bulk_init(&bq); 2447 + 2448 + rcu_read_lock(); /* need for xdp_return_frame_bulk */ 2444 2449 2445 2450 for (i = 0; i < num; i++) { 2446 2451 struct mvpp2_txq_pcpu_buf *tx_buf = ··· 2459 2454 dev_kfree_skb_any(tx_buf->skb); 2460 2455 else if (tx_buf->type == MVPP2_TYPE_XDP_TX || 2461 2456 tx_buf->type == MVPP2_TYPE_XDP_NDO) 2462 - xdp_return_frame(tx_buf->xdpf); 2457 + xdp_return_frame_bulk(tx_buf->xdpf, &bq); 2463 2458 2464 2459 mvpp2_txq_inc_get(txq_pcpu); 2465 2460 } 2461 + xdp_flush_frame_bulk(&bq); 2462 + 2463 + rcu_read_unlock(); 2466 2464 } 2467 2465 2468 2466 static inline struct mvpp2_rx_queue *mvpp2_get_rx_queue(struct mvpp2_port *port,

+18 -4

drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c

··· 366 366 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, 367 367 struct mlx5e_xdp_wqe_info *wi, 368 368 u32 *xsk_frames, 369 - bool recycle) 369 + bool recycle, 370 + struct xdp_frame_bulk *bq) 370 371 { 371 372 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; 372 373 u16 i; ··· 380 379 /* XDP_TX from the XSK RQ and XDP_REDIRECT */ 381 380 dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, 382 381 xdpi.frame.xdpf->len, DMA_TO_DEVICE); 383 - xdp_return_frame(xdpi.frame.xdpf); 382 + xdp_return_frame_bulk(xdpi.frame.xdpf, bq); 384 383 break; 385 384 case MLX5E_XDP_XMIT_MODE_PAGE: 386 385 /* XDP_TX from the regular RQ */ ··· 398 397 399 398 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) 400 399 { 400 + struct xdp_frame_bulk bq; 401 401 struct mlx5e_xdpsq *sq; 402 402 struct mlx5_cqe64 *cqe; 403 403 u32 xsk_frames = 0; 404 404 u16 sqcc; 405 405 int i; 406 + 407 + xdp_frame_bulk_init(&bq); 406 408 407 409 sq = container_of(cq, struct mlx5e_xdpsq, cq); 408 410 ··· 438 434 439 435 sqcc += wi->num_wqebbs; 440 436 441 - mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true); 437 + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq); 442 438 } while (!last_wqe); 443 439 444 440 if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { ··· 450 446 mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); 451 447 } 452 448 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); 449 + 450 + xdp_flush_frame_bulk(&bq); 453 451 454 452 if (xsk_frames) 455 453 xsk_tx_completed(sq->xsk_pool, xsk_frames); ··· 469 463 470 464 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) 471 465 { 466 + struct xdp_frame_bulk bq; 472 467 u32 xsk_frames = 0; 468 + 469 + xdp_frame_bulk_init(&bq); 470 + 471 + rcu_read_lock(); /* need for xdp_return_frame_bulk */ 473 472 474 473 while (sq->cc != sq->pc) { 475 474 struct mlx5e_xdp_wqe_info *wi; ··· 485 474 486 475 sq->cc += wi->num_wqebbs; 487 476 488 - mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false); 477 + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq); 489 478 } 479 + 480 + xdp_flush_frame_bulk(&bq); 481 + rcu_read_unlock(); 490 482 491 483 if (xsk_frames) 492 484 xsk_tx_completed(sq->xsk_pool, xsk_frames);

+8

include/linux/bpf.h

··· 36 36 struct bpf_iter_aux_info; 37 37 struct bpf_local_storage; 38 38 struct bpf_local_storage_map; 39 + struct kobject; 39 40 40 41 extern struct idr btf_idr; 41 42 extern spinlock_t btf_idr_lock; 43 + extern struct kobject *btf_kobj; 42 44 43 45 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, 44 46 struct bpf_iter_aux_info *aux); ··· 312 310 RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ 313 311 RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ 314 312 RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ 313 + RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ 315 314 }; 316 315 317 316 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs ··· 1297 1294 typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux, 1298 1295 struct bpf_link_info *info); 1299 1296 1297 + enum bpf_iter_feature { 1298 + BPF_ITER_RESCHED = BIT(0), 1299 + }; 1300 + 1300 1301 #define BPF_ITER_CTX_ARG_MAX 2 1301 1302 struct bpf_iter_reg { 1302 1303 const char *target; ··· 1309 1302 bpf_iter_show_fdinfo_t show_fdinfo; 1310 1303 bpf_iter_fill_link_info_t fill_link_info; 1311 1304 u32 ctx_arg_info_size; 1305 + u32 feature; 1312 1306 struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; 1313 1307 const struct bpf_iter_seq_info *seq_info; 1314 1308 };

+30

include/linux/bpf_lsm.h

··· 7 7 #ifndef _LINUX_BPF_LSM_H 8 8 #define _LINUX_BPF_LSM_H 9 9 10 + #include <linux/sched.h> 10 11 #include <linux/bpf.h> 11 12 #include <linux/lsm_hooks.h> 12 13 ··· 27 26 int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, 28 27 const struct bpf_prog *prog); 29 28 29 + bool bpf_lsm_is_sleepable_hook(u32 btf_id); 30 + 30 31 static inline struct bpf_storage_blob *bpf_inode( 31 32 const struct inode *inode) 32 33 { ··· 38 35 return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; 39 36 } 40 37 38 + static inline struct bpf_storage_blob *bpf_task( 39 + const struct task_struct *task) 40 + { 41 + if (unlikely(!task->security)) 42 + return NULL; 43 + 44 + return task->security + bpf_lsm_blob_sizes.lbs_task; 45 + } 46 + 41 47 extern const struct bpf_func_proto bpf_inode_storage_get_proto; 42 48 extern const struct bpf_func_proto bpf_inode_storage_delete_proto; 49 + extern const struct bpf_func_proto bpf_task_storage_get_proto; 50 + extern const struct bpf_func_proto bpf_task_storage_delete_proto; 43 51 void bpf_inode_storage_free(struct inode *inode); 52 + void bpf_task_storage_free(struct task_struct *task); 44 53 45 54 #else /* !CONFIG_BPF_LSM */ 55 + 56 + static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) 57 + { 58 + return false; 59 + } 46 60 47 61 static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, 48 62 const struct bpf_prog *prog) ··· 73 53 return NULL; 74 54 } 75 55 56 + static inline struct bpf_storage_blob *bpf_task( 57 + const struct task_struct *task) 58 + { 59 + return NULL; 60 + } 61 + 76 62 static inline void bpf_inode_storage_free(struct inode *inode) 63 + { 64 + } 65 + 66 + static inline void bpf_task_storage_free(struct task_struct *task) 77 67 { 78 68 } 79 69

+1

include/linux/bpf_types.h

··· 109 109 #endif 110 110 #ifdef CONFIG_BPF_LSM 111 111 BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) 112 + BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) 112 113 #endif 113 114 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) 114 115 #if defined(CONFIG_XDP_SOCKETS)

+1 -1

include/linux/bpf_verifier.h

··· 45 45 enum bpf_reg_type type; 46 46 union { 47 47 /* valid when type == PTR_TO_PACKET */ 48 - u16 range; 48 + int range; 49 49 50 50 /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | 51 51 * PTR_TO_MAP_VALUE_OR_NULL

+4

include/linux/module.h

··· 475 475 unsigned int num_bpf_raw_events; 476 476 struct bpf_raw_event_map *bpf_raw_events; 477 477 #endif 478 + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES 479 + unsigned int btf_data_size; 480 + void *btf_data; 481 + #endif 478 482 #ifdef CONFIG_JUMP_LABEL 479 483 struct jump_entry *jump_entries; 480 484 unsigned int num_jump_entries;

+2

include/net/bpf_sk_storage.h

··· 20 20 21 21 extern const struct bpf_func_proto bpf_sk_storage_get_proto; 22 22 extern const struct bpf_func_proto bpf_sk_storage_delete_proto; 23 + extern const struct bpf_func_proto bpf_sk_storage_get_tracing_proto; 24 + extern const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto; 23 25 24 26 struct bpf_local_storage_elem; 25 27 struct bpf_sk_storage_diag;

+26

include/net/page_pool.h

··· 152 152 void page_pool_destroy(struct page_pool *pool); 153 153 void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); 154 154 void page_pool_release_page(struct page_pool *pool, struct page *page); 155 + void page_pool_put_page_bulk(struct page_pool *pool, void **data, 156 + int count); 155 157 #else 156 158 static inline void page_pool_destroy(struct page_pool *pool) 157 159 { ··· 165 163 } 166 164 static inline void page_pool_release_page(struct page_pool *pool, 167 165 struct page *page) 166 + { 167 + } 168 + 169 + static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, 170 + int count) 168 171 { 169 172 } 170 173 #endif ··· 222 215 if (unlikely(pool->p.nid != new_nid)) 223 216 page_pool_update_nid(pool, new_nid); 224 217 } 218 + 219 + static inline void page_pool_ring_lock(struct page_pool *pool) 220 + __acquires(&pool->ring.producer_lock) 221 + { 222 + if (in_serving_softirq()) 223 + spin_lock(&pool->ring.producer_lock); 224 + else 225 + spin_lock_bh(&pool->ring.producer_lock); 226 + } 227 + 228 + static inline void page_pool_ring_unlock(struct page_pool *pool) 229 + __releases(&pool->ring.producer_lock) 230 + { 231 + if (in_serving_softirq()) 232 + spin_unlock(&pool->ring.producer_lock); 233 + else 234 + spin_unlock_bh(&pool->ring.producer_lock); 235 + } 236 + 225 237 #endif /* _NET_PAGE_POOL_H */

+16 -1

include/net/xdp.h

··· 104 104 struct net_device *dev_rx; /* used by cpumap */ 105 105 }; 106 106 107 + #define XDP_BULK_QUEUE_SIZE 16 108 + struct xdp_frame_bulk { 109 + int count; 110 + void *xa; 111 + void *q[XDP_BULK_QUEUE_SIZE]; 112 + }; 113 + 114 + static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) 115 + { 116 + /* bq->count will be zero'ed when bq->xa gets updated */ 117 + bq->xa = NULL; 118 + } 107 119 108 120 static inline struct skb_shared_info * 109 121 xdp_get_shared_info_from_frame(struct xdp_frame *frame) ··· 206 194 void xdp_return_frame(struct xdp_frame *xdpf); 207 195 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); 208 196 void xdp_return_buff(struct xdp_buff *xdp); 197 + void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); 198 + void xdp_return_frame_bulk(struct xdp_frame *xdpf, 199 + struct xdp_frame_bulk *bq); 209 200 210 201 /* When sending xdp_frame into the network stack, then there is no 211 202 * return point callback, which is needed to release e.g. DMA-mapping ··· 260 245 void xdp_attachment_setup(struct xdp_attachment_info *info, 261 246 struct netdev_bpf *bpf); 262 247 263 - #define DEV_MAP_BULK_SIZE 16 248 + #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE 264 249 265 250 #endif /* __LINUX_NET_XDP_H__ */

+51

include/uapi/linux/bpf.h

··· 157 157 BPF_MAP_TYPE_STRUCT_OPS, 158 158 BPF_MAP_TYPE_RINGBUF, 159 159 BPF_MAP_TYPE_INODE_STORAGE, 160 + BPF_MAP_TYPE_TASK_STORAGE, 160 161 }; 161 162 162 163 /* Note that tracing related programs such as ··· 3743 3742 * Return 3744 3743 * The helper returns **TC_ACT_REDIRECT** on success or 3745 3744 * **TC_ACT_SHOT** on error. 3745 + * 3746 + * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) 3747 + * Description 3748 + * Get a bpf_local_storage from the *task*. 3749 + * 3750 + * Logically, it could be thought of as getting the value from 3751 + * a *map* with *task* as the **key**. From this 3752 + * perspective, the usage is not much different from 3753 + * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this 3754 + * helper enforces the key must be an task_struct and the map must also 3755 + * be a **BPF_MAP_TYPE_TASK_STORAGE**. 3756 + * 3757 + * Underneath, the value is stored locally at *task* instead of 3758 + * the *map*. The *map* is used as the bpf-local-storage 3759 + * "type". The bpf-local-storage "type" (i.e. the *map*) is 3760 + * searched against all bpf_local_storage residing at *task*. 3761 + * 3762 + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be 3763 + * used such that a new bpf_local_storage will be 3764 + * created if one does not exist. *value* can be used 3765 + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify 3766 + * the initial value of a bpf_local_storage. If *value* is 3767 + * **NULL**, the new bpf_local_storage will be zero initialized. 3768 + * Return 3769 + * A bpf_local_storage pointer is returned on success. 3770 + * 3771 + * **NULL** if not found or there was an error in adding 3772 + * a new bpf_local_storage. 3773 + * 3774 + * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) 3775 + * Description 3776 + * Delete a bpf_local_storage from a *task*. 3777 + * Return 3778 + * 0 on success. 3779 + * 3780 + * **-ENOENT** if the bpf_local_storage cannot be found. 3781 + * 3782 + * struct task_struct *bpf_get_current_task_btf(void) 3783 + * Description 3784 + * Return a BTF pointer to the "current" task. 3785 + * This pointer can also be used in helpers that accept an 3786 + * *ARG_PTR_TO_BTF_ID* of type *task_struct*. 3787 + * Return 3788 + * Pointer to the current task. 3746 3789 */ 3747 3790 #define __BPF_FUNC_MAPPER(FN) \ 3748 3791 FN(unspec), \ ··· 3945 3900 FN(bpf_per_cpu_ptr), \ 3946 3901 FN(bpf_this_cpu_ptr), \ 3947 3902 FN(redirect_peer), \ 3903 + FN(task_storage_get), \ 3904 + FN(task_storage_delete), \ 3905 + FN(get_current_task_btf), \ 3948 3906 /* */ 3949 3907 3950 3908 /* integer value in 'imm' field of BPF_CALL instruction selects which helper ··· 4466 4418 __aligned_u64 btf; 4467 4419 __u32 btf_size; 4468 4420 __u32 id; 4421 + __aligned_u64 name; 4422 + __u32 name_len; 4423 + __u32 kernel_btf; 4469 4424 } __attribute__((aligned(8))); 4470 4425 4471 4426 struct bpf_link_info {

+1

kernel/bpf/Makefile

··· 10 10 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o 11 11 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o 12 12 obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o 13 + obj-${CONFIG_BPF_LSM} += bpf_task_storage.o 13 14 obj-$(CONFIG_BPF_SYSCALL) += disasm.o 14 15 obj-$(CONFIG_BPF_JIT) += trampoline.o 15 16 obj-$(CONFIG_BPF_SYSCALL) += btf.o

+14

kernel/bpf/bpf_iter.c

··· 67 67 iter_priv->done_stop = true; 68 68 } 69 69 70 + static bool bpf_iter_support_resched(struct seq_file *seq) 71 + { 72 + struct bpf_iter_priv_data *iter_priv; 73 + 74 + iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 75 + target_private); 76 + return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED; 77 + } 78 + 70 79 /* maximum visited objects before bailing out */ 71 80 #define MAX_ITER_OBJECTS 1000000 72 81 ··· 92 83 struct seq_file *seq = file->private_data; 93 84 size_t n, offs, copied = 0; 94 85 int err = 0, num_objs = 0; 86 + bool can_resched; 95 87 void *p; 96 88 97 89 mutex_lock(&seq->lock); ··· 145 135 goto done; 146 136 } 147 137 138 + can_resched = bpf_iter_support_resched(seq); 148 139 while (1) { 149 140 loff_t pos = seq->index; 150 141 ··· 191 180 } 192 181 break; 193 182 } 183 + 184 + if (can_resched) 185 + cond_resched(); 194 186 } 195 187 stop: 196 188 offs = seq->count;

+88

kernel/bpf/bpf_lsm.c

··· 63 63 return &bpf_sk_storage_get_proto; 64 64 case BPF_FUNC_sk_storage_delete: 65 65 return &bpf_sk_storage_delete_proto; 66 + case BPF_FUNC_spin_lock: 67 + return &bpf_spin_lock_proto; 68 + case BPF_FUNC_spin_unlock: 69 + return &bpf_spin_unlock_proto; 70 + case BPF_FUNC_task_storage_get: 71 + return &bpf_task_storage_get_proto; 72 + case BPF_FUNC_task_storage_delete: 73 + return &bpf_task_storage_delete_proto; 66 74 default: 67 75 return tracing_prog_func_proto(func_id, prog); 68 76 } 77 + } 78 + 79 + /* The set of hooks which are called without pagefaults disabled and are allowed 80 + * to "sleep" and thus can be used for sleeable BPF programs. 81 + */ 82 + BTF_SET_START(sleepable_lsm_hooks) 83 + BTF_ID(func, bpf_lsm_bpf) 84 + BTF_ID(func, bpf_lsm_bpf_map) 85 + BTF_ID(func, bpf_lsm_bpf_map_alloc_security) 86 + BTF_ID(func, bpf_lsm_bpf_map_free_security) 87 + BTF_ID(func, bpf_lsm_bpf_prog) 88 + BTF_ID(func, bpf_lsm_bprm_check_security) 89 + BTF_ID(func, bpf_lsm_bprm_committed_creds) 90 + BTF_ID(func, bpf_lsm_bprm_committing_creds) 91 + BTF_ID(func, bpf_lsm_bprm_creds_for_exec) 92 + BTF_ID(func, bpf_lsm_bprm_creds_from_file) 93 + BTF_ID(func, bpf_lsm_capget) 94 + BTF_ID(func, bpf_lsm_capset) 95 + BTF_ID(func, bpf_lsm_cred_prepare) 96 + BTF_ID(func, bpf_lsm_file_ioctl) 97 + BTF_ID(func, bpf_lsm_file_lock) 98 + BTF_ID(func, bpf_lsm_file_open) 99 + BTF_ID(func, bpf_lsm_file_receive) 100 + BTF_ID(func, bpf_lsm_inet_conn_established) 101 + BTF_ID(func, bpf_lsm_inode_create) 102 + BTF_ID(func, bpf_lsm_inode_free_security) 103 + BTF_ID(func, bpf_lsm_inode_getattr) 104 + BTF_ID(func, bpf_lsm_inode_getxattr) 105 + BTF_ID(func, bpf_lsm_inode_mknod) 106 + BTF_ID(func, bpf_lsm_inode_need_killpriv) 107 + BTF_ID(func, bpf_lsm_inode_post_setxattr) 108 + BTF_ID(func, bpf_lsm_inode_readlink) 109 + BTF_ID(func, bpf_lsm_inode_rename) 110 + BTF_ID(func, bpf_lsm_inode_rmdir) 111 + BTF_ID(func, bpf_lsm_inode_setattr) 112 + BTF_ID(func, bpf_lsm_inode_setxattr) 113 + BTF_ID(func, bpf_lsm_inode_symlink) 114 + BTF_ID(func, bpf_lsm_inode_unlink) 115 + BTF_ID(func, bpf_lsm_kernel_module_request) 116 + BTF_ID(func, bpf_lsm_kernfs_init_security) 117 + BTF_ID(func, bpf_lsm_key_free) 118 + BTF_ID(func, bpf_lsm_mmap_file) 119 + BTF_ID(func, bpf_lsm_netlink_send) 120 + BTF_ID(func, bpf_lsm_path_notify) 121 + BTF_ID(func, bpf_lsm_release_secctx) 122 + BTF_ID(func, bpf_lsm_sb_alloc_security) 123 + BTF_ID(func, bpf_lsm_sb_eat_lsm_opts) 124 + BTF_ID(func, bpf_lsm_sb_kern_mount) 125 + BTF_ID(func, bpf_lsm_sb_mount) 126 + BTF_ID(func, bpf_lsm_sb_remount) 127 + BTF_ID(func, bpf_lsm_sb_set_mnt_opts) 128 + BTF_ID(func, bpf_lsm_sb_show_options) 129 + BTF_ID(func, bpf_lsm_sb_statfs) 130 + BTF_ID(func, bpf_lsm_sb_umount) 131 + BTF_ID(func, bpf_lsm_settime) 132 + BTF_ID(func, bpf_lsm_socket_accept) 133 + BTF_ID(func, bpf_lsm_socket_bind) 134 + BTF_ID(func, bpf_lsm_socket_connect) 135 + BTF_ID(func, bpf_lsm_socket_create) 136 + BTF_ID(func, bpf_lsm_socket_getpeername) 137 + BTF_ID(func, bpf_lsm_socket_getpeersec_dgram) 138 + BTF_ID(func, bpf_lsm_socket_getsockname) 139 + BTF_ID(func, bpf_lsm_socket_getsockopt) 140 + BTF_ID(func, bpf_lsm_socket_listen) 141 + BTF_ID(func, bpf_lsm_socket_post_create) 142 + BTF_ID(func, bpf_lsm_socket_recvmsg) 143 + BTF_ID(func, bpf_lsm_socket_sendmsg) 144 + BTF_ID(func, bpf_lsm_socket_shutdown) 145 + BTF_ID(func, bpf_lsm_socket_socketpair) 146 + BTF_ID(func, bpf_lsm_syslog) 147 + BTF_ID(func, bpf_lsm_task_alloc) 148 + BTF_ID(func, bpf_lsm_task_getsecid) 149 + BTF_ID(func, bpf_lsm_task_prctl) 150 + BTF_ID(func, bpf_lsm_task_setscheduler) 151 + BTF_ID(func, bpf_lsm_task_to_inode) 152 + BTF_SET_END(sleepable_lsm_hooks) 153 + 154 + bool bpf_lsm_is_sleepable_hook(u32 btf_id) 155 + { 156 + return btf_id_set_contains(&sleepable_lsm_hooks, btf_id); 69 157 } 70 158 71 159 const struct bpf_prog_ops lsm_prog_ops = {

+315

kernel/bpf/bpf_task_storage.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2020 Facebook 4 + * Copyright 2020 Google LLC. 5 + */ 6 + 7 + #include <linux/pid.h> 8 + #include <linux/sched.h> 9 + #include <linux/rculist.h> 10 + #include <linux/list.h> 11 + #include <linux/hash.h> 12 + #include <linux/types.h> 13 + #include <linux/spinlock.h> 14 + #include <linux/bpf.h> 15 + #include <linux/bpf_local_storage.h> 16 + #include <linux/filter.h> 17 + #include <uapi/linux/btf.h> 18 + #include <linux/bpf_lsm.h> 19 + #include <linux/btf_ids.h> 20 + #include <linux/fdtable.h> 21 + 22 + DEFINE_BPF_STORAGE_CACHE(task_cache); 23 + 24 + static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) 25 + { 26 + struct task_struct *task = owner; 27 + struct bpf_storage_blob *bsb; 28 + 29 + bsb = bpf_task(task); 30 + if (!bsb) 31 + return NULL; 32 + return &bsb->storage; 33 + } 34 + 35 + static struct bpf_local_storage_data * 36 + task_storage_lookup(struct task_struct *task, struct bpf_map *map, 37 + bool cacheit_lockit) 38 + { 39 + struct bpf_local_storage *task_storage; 40 + struct bpf_local_storage_map *smap; 41 + struct bpf_storage_blob *bsb; 42 + 43 + bsb = bpf_task(task); 44 + if (!bsb) 45 + return NULL; 46 + 47 + task_storage = rcu_dereference(bsb->storage); 48 + if (!task_storage) 49 + return NULL; 50 + 51 + smap = (struct bpf_local_storage_map *)map; 52 + return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit); 53 + } 54 + 55 + void bpf_task_storage_free(struct task_struct *task) 56 + { 57 + struct bpf_local_storage_elem *selem; 58 + struct bpf_local_storage *local_storage; 59 + bool free_task_storage = false; 60 + struct bpf_storage_blob *bsb; 61 + struct hlist_node *n; 62 + 63 + bsb = bpf_task(task); 64 + if (!bsb) 65 + return; 66 + 67 + rcu_read_lock(); 68 + 69 + local_storage = rcu_dereference(bsb->storage); 70 + if (!local_storage) { 71 + rcu_read_unlock(); 72 + return; 73 + } 74 + 75 + /* Neither the bpf_prog nor the bpf-map's syscall 76 + * could be modifying the local_storage->list now. 77 + * Thus, no elem can be added-to or deleted-from the 78 + * local_storage->list by the bpf_prog or by the bpf-map's syscall. 79 + * 80 + * It is racing with bpf_local_storage_map_free() alone 81 + * when unlinking elem from the local_storage->list and 82 + * the map's bucket->list. 83 + */ 84 + raw_spin_lock_bh(&local_storage->lock); 85 + hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { 86 + /* Always unlink from map before unlinking from 87 + * local_storage. 88 + */ 89 + bpf_selem_unlink_map(selem); 90 + free_task_storage = bpf_selem_unlink_storage_nolock( 91 + local_storage, selem, false); 92 + } 93 + raw_spin_unlock_bh(&local_storage->lock); 94 + rcu_read_unlock(); 95 + 96 + /* free_task_storage should always be true as long as 97 + * local_storage->list was non-empty. 98 + */ 99 + if (free_task_storage) 100 + kfree_rcu(local_storage, rcu); 101 + } 102 + 103 + static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) 104 + { 105 + struct bpf_local_storage_data *sdata; 106 + struct task_struct *task; 107 + unsigned int f_flags; 108 + struct pid *pid; 109 + int fd, err; 110 + 111 + fd = *(int *)key; 112 + pid = pidfd_get_pid(fd, &f_flags); 113 + if (IS_ERR(pid)) 114 + return ERR_CAST(pid); 115 + 116 + /* We should be in an RCU read side critical section, it should be safe 117 + * to call pid_task. 118 + */ 119 + WARN_ON_ONCE(!rcu_read_lock_held()); 120 + task = pid_task(pid, PIDTYPE_PID); 121 + if (!task) { 122 + err = -ENOENT; 123 + goto out; 124 + } 125 + 126 + sdata = task_storage_lookup(task, map, true); 127 + put_pid(pid); 128 + return sdata ? sdata->data : NULL; 129 + out: 130 + put_pid(pid); 131 + return ERR_PTR(err); 132 + } 133 + 134 + static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, 135 + void *value, u64 map_flags) 136 + { 137 + struct bpf_local_storage_data *sdata; 138 + struct task_struct *task; 139 + unsigned int f_flags; 140 + struct pid *pid; 141 + int fd, err; 142 + 143 + fd = *(int *)key; 144 + pid = pidfd_get_pid(fd, &f_flags); 145 + if (IS_ERR(pid)) 146 + return PTR_ERR(pid); 147 + 148 + /* We should be in an RCU read side critical section, it should be safe 149 + * to call pid_task. 150 + */ 151 + WARN_ON_ONCE(!rcu_read_lock_held()); 152 + task = pid_task(pid, PIDTYPE_PID); 153 + if (!task || !task_storage_ptr(task)) { 154 + err = -ENOENT; 155 + goto out; 156 + } 157 + 158 + sdata = bpf_local_storage_update( 159 + task, (struct bpf_local_storage_map *)map, value, map_flags); 160 + 161 + err = PTR_ERR_OR_ZERO(sdata); 162 + out: 163 + put_pid(pid); 164 + return err; 165 + } 166 + 167 + static int task_storage_delete(struct task_struct *task, struct bpf_map *map) 168 + { 169 + struct bpf_local_storage_data *sdata; 170 + 171 + sdata = task_storage_lookup(task, map, false); 172 + if (!sdata) 173 + return -ENOENT; 174 + 175 + bpf_selem_unlink(SELEM(sdata)); 176 + 177 + return 0; 178 + } 179 + 180 + static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) 181 + { 182 + struct task_struct *task; 183 + unsigned int f_flags; 184 + struct pid *pid; 185 + int fd, err; 186 + 187 + fd = *(int *)key; 188 + pid = pidfd_get_pid(fd, &f_flags); 189 + if (IS_ERR(pid)) 190 + return PTR_ERR(pid); 191 + 192 + /* We should be in an RCU read side critical section, it should be safe 193 + * to call pid_task. 194 + */ 195 + WARN_ON_ONCE(!rcu_read_lock_held()); 196 + task = pid_task(pid, PIDTYPE_PID); 197 + if (!task) { 198 + err = -ENOENT; 199 + goto out; 200 + } 201 + 202 + err = task_storage_delete(task, map); 203 + out: 204 + put_pid(pid); 205 + return err; 206 + } 207 + 208 + BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, 209 + task, void *, value, u64, flags) 210 + { 211 + struct bpf_local_storage_data *sdata; 212 + 213 + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) 214 + return (unsigned long)NULL; 215 + 216 + /* explicitly check that the task_storage_ptr is not 217 + * NULL as task_storage_lookup returns NULL in this case and 218 + * bpf_local_storage_update expects the owner to have a 219 + * valid storage pointer. 220 + */ 221 + if (!task_storage_ptr(task)) 222 + return (unsigned long)NULL; 223 + 224 + sdata = task_storage_lookup(task, map, true); 225 + if (sdata) 226 + return (unsigned long)sdata->data; 227 + 228 + /* This helper must only be called from places where the lifetime of the task 229 + * is guaranteed. Either by being refcounted or by being protected 230 + * by an RCU read-side critical section. 231 + */ 232 + if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { 233 + sdata = bpf_local_storage_update( 234 + task, (struct bpf_local_storage_map *)map, value, 235 + BPF_NOEXIST); 236 + return IS_ERR(sdata) ? (unsigned long)NULL : 237 + (unsigned long)sdata->data; 238 + } 239 + 240 + return (unsigned long)NULL; 241 + } 242 + 243 + BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, 244 + task) 245 + { 246 + /* This helper must only be called from places where the lifetime of the task 247 + * is guaranteed. Either by being refcounted or by being protected 248 + * by an RCU read-side critical section. 249 + */ 250 + return task_storage_delete(task, map); 251 + } 252 + 253 + static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) 254 + { 255 + return -ENOTSUPP; 256 + } 257 + 258 + static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) 259 + { 260 + struct bpf_local_storage_map *smap; 261 + 262 + smap = bpf_local_storage_map_alloc(attr); 263 + if (IS_ERR(smap)) 264 + return ERR_CAST(smap); 265 + 266 + smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache); 267 + return &smap->map; 268 + } 269 + 270 + static void task_storage_map_free(struct bpf_map *map) 271 + { 272 + struct bpf_local_storage_map *smap; 273 + 274 + smap = (struct bpf_local_storage_map *)map; 275 + bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx); 276 + bpf_local_storage_map_free(smap); 277 + } 278 + 279 + static int task_storage_map_btf_id; 280 + const struct bpf_map_ops task_storage_map_ops = { 281 + .map_meta_equal = bpf_map_meta_equal, 282 + .map_alloc_check = bpf_local_storage_map_alloc_check, 283 + .map_alloc = task_storage_map_alloc, 284 + .map_free = task_storage_map_free, 285 + .map_get_next_key = notsupp_get_next_key, 286 + .map_lookup_elem = bpf_pid_task_storage_lookup_elem, 287 + .map_update_elem = bpf_pid_task_storage_update_elem, 288 + .map_delete_elem = bpf_pid_task_storage_delete_elem, 289 + .map_check_btf = bpf_local_storage_map_check_btf, 290 + .map_btf_name = "bpf_local_storage_map", 291 + .map_btf_id = &task_storage_map_btf_id, 292 + .map_owner_storage_ptr = task_storage_ptr, 293 + }; 294 + 295 + BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct) 296 + 297 + const struct bpf_func_proto bpf_task_storage_get_proto = { 298 + .func = bpf_task_storage_get, 299 + .gpl_only = false, 300 + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 301 + .arg1_type = ARG_CONST_MAP_PTR, 302 + .arg2_type = ARG_PTR_TO_BTF_ID, 303 + .arg2_btf_id = &bpf_task_storage_btf_ids[0], 304 + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 305 + .arg4_type = ARG_ANYTHING, 306 + }; 307 + 308 + const struct bpf_func_proto bpf_task_storage_delete_proto = { 309 + .func = bpf_task_storage_delete, 310 + .gpl_only = false, 311 + .ret_type = RET_INTEGER, 312 + .arg1_type = ARG_CONST_MAP_PTR, 313 + .arg2_type = ARG_PTR_TO_BTF_ID, 314 + .arg2_btf_id = &bpf_task_storage_btf_ids[0], 315 + };

+357 -56

kernel/bpf/btf.c

··· 22 22 #include <linux/skmsg.h> 23 23 #include <linux/perf_event.h> 24 24 #include <linux/bsearch.h> 25 - #include <linux/btf_ids.h> 25 + #include <linux/kobject.h> 26 + #include <linux/sysfs.h> 26 27 #include <net/sock.h> 27 28 28 29 /* BTF (BPF Type Format) is the meta data format which describes ··· 205 204 const char *strings; 206 205 void *nohdr_data; 207 206 struct btf_header hdr; 208 - u32 nr_types; 207 + u32 nr_types; /* includes VOID for base BTF */ 209 208 u32 types_size; 210 209 u32 data_size; 211 210 refcount_t refcnt; 212 211 u32 id; 213 212 struct rcu_head rcu; 213 + 214 + /* split BTF support */ 215 + struct btf *base_btf; 216 + u32 start_id; /* first type ID in this BTF (0 for base BTF) */ 217 + u32 start_str_off; /* first string offset (0 for base BTF) */ 218 + char name[MODULE_NAME_LEN]; 219 + bool kernel_btf; 214 220 }; 215 221 216 222 enum verifier_phase { ··· 458 450 return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; 459 451 } 460 452 453 + static u32 btf_nr_types_total(const struct btf *btf) 454 + { 455 + u32 total = 0; 456 + 457 + while (btf) { 458 + total += btf->nr_types; 459 + btf = btf->base_btf; 460 + } 461 + 462 + return total; 463 + } 464 + 461 465 s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) 462 466 { 463 467 const struct btf_type *t; 464 468 const char *tname; 465 - u32 i; 469 + u32 i, total; 466 470 467 - for (i = 1; i <= btf->nr_types; i++) { 468 - t = btf->types[i]; 471 + total = btf_nr_types_total(btf); 472 + for (i = 1; i < total; i++) { 473 + t = btf_type_by_id(btf, i); 469 474 if (BTF_INFO_KIND(t->info) != kind) 470 475 continue; 471 476 ··· 621 600 622 601 static bool btf_name_offset_valid(const struct btf *btf, u32 offset) 623 602 { 624 - return BTF_STR_OFFSET_VALID(offset) && 625 - offset < btf->hdr.str_len; 603 + if (!BTF_STR_OFFSET_VALID(offset)) 604 + return false; 605 + 606 + while (offset < btf->start_str_off) 607 + btf = btf->base_btf; 608 + 609 + offset -= btf->start_str_off; 610 + return offset < btf->hdr.str_len; 626 611 } 627 612 628 613 static bool __btf_name_char_ok(char c, bool first, bool dot_ok) ··· 642 615 return true; 643 616 } 644 617 618 + static const char *btf_str_by_offset(const struct btf *btf, u32 offset) 619 + { 620 + while (offset < btf->start_str_off) 621 + btf = btf->base_btf; 622 + 623 + offset -= btf->start_str_off; 624 + if (offset < btf->hdr.str_len) 625 + return &btf->strings[offset]; 626 + 627 + return NULL; 628 + } 629 + 645 630 static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok) 646 631 { 647 632 /* offset must be valid */ 648 - const char *src = &btf->strings[offset]; 633 + const char *src = btf_str_by_offset(btf, offset); 649 634 const char *src_limit; 650 635 651 636 if (!__btf_name_char_ok(*src, true, dot_ok)) ··· 690 651 691 652 static const char *__btf_name_by_offset(const struct btf *btf, u32 offset) 692 653 { 654 + const char *name; 655 + 693 656 if (!offset) 694 657 return "(anon)"; 695 - else if (offset < btf->hdr.str_len) 696 - return &btf->strings[offset]; 697 - else 698 - return "(invalid-name-offset)"; 658 + 659 + name = btf_str_by_offset(btf, offset); 660 + return name ?: "(invalid-name-offset)"; 699 661 } 700 662 701 663 const char *btf_name_by_offset(const struct btf *btf, u32 offset) 702 664 { 703 - if (offset < btf->hdr.str_len) 704 - return &btf->strings[offset]; 705 - 706 - return NULL; 665 + return btf_str_by_offset(btf, offset); 707 666 } 708 667 709 668 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) 710 669 { 711 - if (type_id > btf->nr_types) 712 - return NULL; 670 + while (type_id < btf->start_id) 671 + btf = btf->base_btf; 713 672 673 + type_id -= btf->start_id; 674 + if (type_id >= btf->nr_types) 675 + return NULL; 714 676 return btf->types[type_id]; 715 677 } 716 678 ··· 1431 1391 { 1432 1392 struct btf *btf = env->btf; 1433 1393 1434 - /* < 2 because +1 for btf_void which is always in btf->types[0]. 1435 - * btf_void is not accounted in btf->nr_types because btf_void 1436 - * does not come from the BTF file. 1437 - */ 1438 - if (btf->types_size - btf->nr_types < 2) { 1394 + if (btf->types_size == btf->nr_types) { 1439 1395 /* Expand 'types' array */ 1440 1396 1441 1397 struct btf_type **new_types; 1442 1398 u32 expand_by, new_size; 1443 1399 1444 - if (btf->types_size == BTF_MAX_TYPE) { 1400 + if (btf->start_id + btf->types_size == BTF_MAX_TYPE) { 1445 1401 btf_verifier_log(env, "Exceeded max num of types"); 1446 1402 return -E2BIG; 1447 1403 } ··· 1451 1415 if (!new_types) 1452 1416 return -ENOMEM; 1453 1417 1454 - if (btf->nr_types == 0) 1455 - new_types[0] = &btf_void; 1456 - else 1418 + if (btf->nr_types == 0) { 1419 + if (!btf->base_btf) { 1420 + /* lazily init VOID type */ 1421 + new_types[0] = &btf_void; 1422 + btf->nr_types++; 1423 + } 1424 + } else { 1457 1425 memcpy(new_types, btf->types, 1458 - sizeof(*btf->types) * (btf->nr_types + 1)); 1426 + sizeof(*btf->types) * btf->nr_types); 1427 + } 1459 1428 1460 1429 kvfree(btf->types); 1461 1430 btf->types = new_types; 1462 1431 btf->types_size = new_size; 1463 1432 } 1464 1433 1465 - btf->types[++(btf->nr_types)] = t; 1434 + btf->types[btf->nr_types++] = t; 1466 1435 1467 1436 return 0; 1468 1437 } ··· 1540 1499 u32 *resolved_ids = NULL; 1541 1500 u8 *visit_states = NULL; 1542 1501 1543 - /* +1 for btf_void */ 1544 - resolved_sizes = kvcalloc(nr_types + 1, sizeof(*resolved_sizes), 1502 + resolved_sizes = kvcalloc(nr_types, sizeof(*resolved_sizes), 1545 1503 GFP_KERNEL | __GFP_NOWARN); 1546 1504 if (!resolved_sizes) 1547 1505 goto nomem; 1548 1506 1549 - resolved_ids = kvcalloc(nr_types + 1, sizeof(*resolved_ids), 1507 + resolved_ids = kvcalloc(nr_types, sizeof(*resolved_ids), 1550 1508 GFP_KERNEL | __GFP_NOWARN); 1551 1509 if (!resolved_ids) 1552 1510 goto nomem; 1553 1511 1554 - visit_states = kvcalloc(nr_types + 1, sizeof(*visit_states), 1512 + visit_states = kvcalloc(nr_types, sizeof(*visit_states), 1555 1513 GFP_KERNEL | __GFP_NOWARN); 1556 1514 if (!visit_states) 1557 1515 goto nomem; ··· 1602 1562 static bool env_type_is_resolved(const struct btf_verifier_env *env, 1603 1563 u32 type_id) 1604 1564 { 1605 - return env->visit_states[type_id] == RESOLVED; 1565 + /* base BTF types should be resolved by now */ 1566 + if (type_id < env->btf->start_id) 1567 + return true; 1568 + 1569 + return env->visit_states[type_id - env->btf->start_id] == RESOLVED; 1606 1570 } 1607 1571 1608 1572 static int env_stack_push(struct btf_verifier_env *env, 1609 1573 const struct btf_type *t, u32 type_id) 1610 1574 { 1575 + const struct btf *btf = env->btf; 1611 1576 struct resolve_vertex *v; 1612 1577 1613 1578 if (env->top_stack == MAX_RESOLVE_DEPTH) 1614 1579 return -E2BIG; 1615 1580 1616 - if (env->visit_states[type_id] != NOT_VISITED) 1581 + if (type_id < btf->start_id 1582 + || env->visit_states[type_id - btf->start_id] != NOT_VISITED) 1617 1583 return -EEXIST; 1618 1584 1619 - env->visit_states[type_id] = VISITED; 1585 + env->visit_states[type_id - btf->start_id] = VISITED; 1620 1586 1621 1587 v = &env->stack[env->top_stack++]; 1622 1588 v->t = t; ··· 1652 1606 u32 type_id = env->stack[--(env->top_stack)].type_id; 1653 1607 struct btf *btf = env->btf; 1654 1608 1609 + type_id -= btf->start_id; /* adjust to local type id */ 1655 1610 btf->resolved_sizes[type_id] = resolved_size; 1656 1611 btf->resolved_ids[type_id] = resolved_type_id; 1657 1612 env->visit_states[type_id] = RESOLVED; ··· 1757 1710 return __btf_resolve_size(btf, type, type_size, NULL, NULL, NULL, NULL); 1758 1711 } 1759 1712 1713 + static u32 btf_resolved_type_id(const struct btf *btf, u32 type_id) 1714 + { 1715 + while (type_id < btf->start_id) 1716 + btf = btf->base_btf; 1717 + 1718 + return btf->resolved_ids[type_id - btf->start_id]; 1719 + } 1720 + 1760 1721 /* The input param "type_id" must point to a needs_resolve type */ 1761 1722 static const struct btf_type *btf_type_id_resolve(const struct btf *btf, 1762 1723 u32 *type_id) 1763 1724 { 1764 - *type_id = btf->resolved_ids[*type_id]; 1725 + *type_id = btf_resolved_type_id(btf, *type_id); 1765 1726 return btf_type_by_id(btf, *type_id); 1727 + } 1728 + 1729 + static u32 btf_resolved_type_size(const struct btf *btf, u32 type_id) 1730 + { 1731 + while (type_id < btf->start_id) 1732 + btf = btf->base_btf; 1733 + 1734 + return btf->resolved_sizes[type_id - btf->start_id]; 1766 1735 } 1767 1736 1768 1737 const struct btf_type *btf_type_id_size(const struct btf *btf, ··· 1795 1732 if (btf_type_has_size(size_type)) { 1796 1733 size = size_type->size; 1797 1734 } else if (btf_type_is_array(size_type)) { 1798 - size = btf->resolved_sizes[size_type_id]; 1735 + size = btf_resolved_type_size(btf, size_type_id); 1799 1736 } else if (btf_type_is_ptr(size_type)) { 1800 1737 size = sizeof(void *); 1801 1738 } else { ··· 1803 1740 !btf_type_is_var(size_type))) 1804 1741 return NULL; 1805 1742 1806 - size_type_id = btf->resolved_ids[size_type_id]; 1743 + size_type_id = btf_resolved_type_id(btf, size_type_id); 1807 1744 size_type = btf_type_by_id(btf, size_type_id); 1808 1745 if (btf_type_nosize_or_null(size_type)) 1809 1746 return NULL; 1810 1747 else if (btf_type_has_size(size_type)) 1811 1748 size = size_type->size; 1812 1749 else if (btf_type_is_array(size_type)) 1813 - size = btf->resolved_sizes[size_type_id]; 1750 + size = btf_resolved_type_size(btf, size_type_id); 1814 1751 else if (btf_type_is_ptr(size_type)) 1815 1752 size = sizeof(void *); 1816 1753 else ··· 3862 3799 cur = btf->nohdr_data + hdr->type_off; 3863 3800 end = cur + hdr->type_len; 3864 3801 3865 - env->log_type_id = 1; 3802 + env->log_type_id = btf->base_btf ? btf->start_id : 1; 3866 3803 while (cur < end) { 3867 3804 struct btf_type *t = cur; 3868 3805 s32 meta_size; ··· 3889 3826 return false; 3890 3827 3891 3828 if (btf_type_is_struct(t) || btf_type_is_datasec(t)) 3892 - return !btf->resolved_ids[type_id] && 3893 - !btf->resolved_sizes[type_id]; 3829 + return !btf_resolved_type_id(btf, type_id) && 3830 + !btf_resolved_type_size(btf, type_id); 3894 3831 3895 3832 if (btf_type_is_modifier(t) || btf_type_is_ptr(t) || 3896 3833 btf_type_is_var(t)) { ··· 3910 3847 elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); 3911 3848 return elem_type && !btf_type_is_modifier(elem_type) && 3912 3849 (array->nelems * elem_size == 3913 - btf->resolved_sizes[type_id]); 3850 + btf_resolved_type_size(btf, type_id)); 3914 3851 } 3915 3852 3916 3853 return false; ··· 3952 3889 static int btf_check_all_types(struct btf_verifier_env *env) 3953 3890 { 3954 3891 struct btf *btf = env->btf; 3955 - u32 type_id; 3892 + const struct btf_type *t; 3893 + u32 type_id, i; 3956 3894 int err; 3957 3895 3958 3896 err = env_resolve_init(env); ··· 3961 3897 return err; 3962 3898 3963 3899 env->phase++; 3964 - for (type_id = 1; type_id <= btf->nr_types; type_id++) { 3965 - const struct btf_type *t = btf_type_by_id(btf, type_id); 3900 + for (i = btf->base_btf ? 0 : 1; i < btf->nr_types; i++) { 3901 + type_id = btf->start_id + i; 3902 + t = btf_type_by_id(btf, type_id); 3966 3903 3967 3904 env->log_type_id = type_id; 3968 3905 if (btf_type_needs_resolve(t) && ··· 4000 3935 return -EINVAL; 4001 3936 } 4002 3937 4003 - if (!hdr->type_len) { 3938 + if (!env->btf->base_btf && !hdr->type_len) { 4004 3939 btf_verifier_log(env, "No type found"); 4005 3940 return -EINVAL; 4006 3941 } ··· 4027 3962 return -EINVAL; 4028 3963 } 4029 3964 4030 - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || 4031 - start[0] || end[-1]) { 3965 + btf->strings = start; 3966 + 3967 + if (btf->base_btf && !hdr->str_len) 3968 + return 0; 3969 + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || end[-1]) { 4032 3970 btf_verifier_log(env, "Invalid string section"); 4033 3971 return -EINVAL; 4034 3972 } 4035 - 4036 - btf->strings = start; 3973 + if (!btf->base_btf && start[0]) { 3974 + btf_verifier_log(env, "Invalid string section"); 3975 + return -EINVAL; 3976 + } 4037 3977 4038 3978 return 0; 4039 3979 } ··· 4433 4363 4434 4364 btf->data = __start_BTF; 4435 4365 btf->data_size = __stop_BTF - __start_BTF; 4366 + btf->kernel_btf = true; 4367 + snprintf(btf->name, sizeof(btf->name), "vmlinux"); 4436 4368 4437 4369 err = btf_parse_hdr(env); 4438 4370 if (err) ··· 4460 4388 4461 4389 bpf_struct_ops_init(btf, log); 4462 4390 4463 - btf_verifier_env_free(env); 4464 4391 refcount_set(&btf->refcnt, 1); 4392 + 4393 + err = btf_alloc_id(btf); 4394 + if (err) 4395 + goto errout; 4396 + 4397 + btf_verifier_env_free(env); 4465 4398 return btf; 4466 4399 4467 4400 errout: ··· 4477 4400 } 4478 4401 return ERR_PTR(err); 4479 4402 } 4403 + 4404 + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES 4405 + 4406 + static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size) 4407 + { 4408 + struct btf_verifier_env *env = NULL; 4409 + struct bpf_verifier_log *log; 4410 + struct btf *btf = NULL, *base_btf; 4411 + int err; 4412 + 4413 + base_btf = bpf_get_btf_vmlinux(); 4414 + if (IS_ERR(base_btf)) 4415 + return base_btf; 4416 + if (!base_btf) 4417 + return ERR_PTR(-EINVAL); 4418 + 4419 + env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); 4420 + if (!env) 4421 + return ERR_PTR(-ENOMEM); 4422 + 4423 + log = &env->log; 4424 + log->level = BPF_LOG_KERNEL; 4425 + 4426 + btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); 4427 + if (!btf) { 4428 + err = -ENOMEM; 4429 + goto errout; 4430 + } 4431 + env->btf = btf; 4432 + 4433 + btf->base_btf = base_btf; 4434 + btf->start_id = base_btf->nr_types; 4435 + btf->start_str_off = base_btf->hdr.str_len; 4436 + btf->kernel_btf = true; 4437 + snprintf(btf->name, sizeof(btf->name), "%s", module_name); 4438 + 4439 + btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN); 4440 + if (!btf->data) { 4441 + err = -ENOMEM; 4442 + goto errout; 4443 + } 4444 + memcpy(btf->data, data, data_size); 4445 + btf->data_size = data_size; 4446 + 4447 + err = btf_parse_hdr(env); 4448 + if (err) 4449 + goto errout; 4450 + 4451 + btf->nohdr_data = btf->data + btf->hdr.hdr_len; 4452 + 4453 + err = btf_parse_str_sec(env); 4454 + if (err) 4455 + goto errout; 4456 + 4457 + err = btf_check_all_metas(env); 4458 + if (err) 4459 + goto errout; 4460 + 4461 + btf_verifier_env_free(env); 4462 + refcount_set(&btf->refcnt, 1); 4463 + return btf; 4464 + 4465 + errout: 4466 + btf_verifier_env_free(env); 4467 + if (btf) { 4468 + kvfree(btf->data); 4469 + kvfree(btf->types); 4470 + kfree(btf); 4471 + } 4472 + return ERR_PTR(err); 4473 + } 4474 + 4475 + #endif /* CONFIG_DEBUG_INFO_BTF_MODULES */ 4480 4476 4481 4477 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) 4482 4478 { ··· 5059 4909 while (t && btf_type_is_modifier(t)) 5060 4910 t = btf_type_by_id(btf, t->type); 5061 4911 if (!t) { 5062 - *bad_type = btf->types[0]; 4912 + *bad_type = btf_type_by_id(btf, 0); 5063 4913 return -EINVAL; 5064 4914 } 5065 4915 if (btf_type_is_ptr(t)) ··· 5637 5487 struct bpf_btf_info info; 5638 5488 u32 info_copy, btf_copy; 5639 5489 void __user *ubtf; 5640 - u32 uinfo_len; 5490 + char __user *uname; 5491 + u32 uinfo_len, uname_len, name_len; 5492 + int ret = 0; 5641 5493 5642 5494 uinfo = u64_to_user_ptr(attr->info.info); 5643 5495 uinfo_len = attr->info.info_len; ··· 5656 5504 return -EFAULT; 5657 5505 info.btf_size = btf->data_size; 5658 5506 5507 + info.kernel_btf = btf->kernel_btf; 5508 + 5509 + uname = u64_to_user_ptr(info.name); 5510 + uname_len = info.name_len; 5511 + if (!uname ^ !uname_len) 5512 + return -EINVAL; 5513 + 5514 + name_len = strlen(btf->name); 5515 + info.name_len = name_len; 5516 + 5517 + if (uname) { 5518 + if (uname_len >= name_len + 1) { 5519 + if (copy_to_user(uname, btf->name, name_len + 1)) 5520 + return -EFAULT; 5521 + } else { 5522 + char zero = '\0'; 5523 + 5524 + if (copy_to_user(uname, btf->name, uname_len - 1)) 5525 + return -EFAULT; 5526 + if (put_user(zero, uname + uname_len - 1)) 5527 + return -EFAULT; 5528 + /* let user-space know about too short buffer */ 5529 + ret = -ENOSPC; 5530 + } 5531 + } 5532 + 5659 5533 if (copy_to_user(uinfo, &info, info_copy) || 5660 5534 put_user(info_copy, &uattr->info.info_len)) 5661 5535 return -EFAULT; 5662 5536 5663 - return 0; 5537 + return ret; 5664 5538 } 5665 5539 5666 5540 int btf_get_fd_by_id(u32 id) ··· 5726 5548 { 5727 5549 return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; 5728 5550 } 5551 + 5552 + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES 5553 + struct btf_module { 5554 + struct list_head list; 5555 + struct module *module; 5556 + struct btf *btf; 5557 + struct bin_attribute *sysfs_attr; 5558 + }; 5559 + 5560 + static LIST_HEAD(btf_modules); 5561 + static DEFINE_MUTEX(btf_module_mutex); 5562 + 5563 + static ssize_t 5564 + btf_module_read(struct file *file, struct kobject *kobj, 5565 + struct bin_attribute *bin_attr, 5566 + char *buf, loff_t off, size_t len) 5567 + { 5568 + const struct btf *btf = bin_attr->private; 5569 + 5570 + memcpy(buf, btf->data + off, len); 5571 + return len; 5572 + } 5573 + 5574 + static int btf_module_notify(struct notifier_block *nb, unsigned long op, 5575 + void *module) 5576 + { 5577 + struct btf_module *btf_mod, *tmp; 5578 + struct module *mod = module; 5579 + struct btf *btf; 5580 + int err = 0; 5581 + 5582 + if (mod->btf_data_size == 0 || 5583 + (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) 5584 + goto out; 5585 + 5586 + switch (op) { 5587 + case MODULE_STATE_COMING: 5588 + btf_mod = kzalloc(sizeof(*btf_mod), GFP_KERNEL); 5589 + if (!btf_mod) { 5590 + err = -ENOMEM; 5591 + goto out; 5592 + } 5593 + btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size); 5594 + if (IS_ERR(btf)) { 5595 + pr_warn("failed to validate module [%s] BTF: %ld\n", 5596 + mod->name, PTR_ERR(btf)); 5597 + kfree(btf_mod); 5598 + err = PTR_ERR(btf); 5599 + goto out; 5600 + } 5601 + err = btf_alloc_id(btf); 5602 + if (err) { 5603 + btf_free(btf); 5604 + kfree(btf_mod); 5605 + goto out; 5606 + } 5607 + 5608 + mutex_lock(&btf_module_mutex); 5609 + btf_mod->module = module; 5610 + btf_mod->btf = btf; 5611 + list_add(&btf_mod->list, &btf_modules); 5612 + mutex_unlock(&btf_module_mutex); 5613 + 5614 + if (IS_ENABLED(CONFIG_SYSFS)) { 5615 + struct bin_attribute *attr; 5616 + 5617 + attr = kzalloc(sizeof(*attr), GFP_KERNEL); 5618 + if (!attr) 5619 + goto out; 5620 + 5621 + sysfs_bin_attr_init(attr); 5622 + attr->attr.name = btf->name; 5623 + attr->attr.mode = 0444; 5624 + attr->size = btf->data_size; 5625 + attr->private = btf; 5626 + attr->read = btf_module_read; 5627 + 5628 + err = sysfs_create_bin_file(btf_kobj, attr); 5629 + if (err) { 5630 + pr_warn("failed to register module [%s] BTF in sysfs: %d\n", 5631 + mod->name, err); 5632 + kfree(attr); 5633 + err = 0; 5634 + goto out; 5635 + } 5636 + 5637 + btf_mod->sysfs_attr = attr; 5638 + } 5639 + 5640 + break; 5641 + case MODULE_STATE_GOING: 5642 + mutex_lock(&btf_module_mutex); 5643 + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { 5644 + if (btf_mod->module != module) 5645 + continue; 5646 + 5647 + list_del(&btf_mod->list); 5648 + if (btf_mod->sysfs_attr) 5649 + sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr); 5650 + btf_put(btf_mod->btf); 5651 + kfree(btf_mod->sysfs_attr); 5652 + kfree(btf_mod); 5653 + break; 5654 + } 5655 + mutex_unlock(&btf_module_mutex); 5656 + break; 5657 + } 5658 + out: 5659 + return notifier_from_errno(err); 5660 + } 5661 + 5662 + static struct notifier_block btf_module_nb = { 5663 + .notifier_call = btf_module_notify, 5664 + }; 5665 + 5666 + static int __init btf_module_init(void) 5667 + { 5668 + register_module_notifier(&btf_module_nb); 5669 + return 0; 5670 + } 5671 + 5672 + fs_initcall(btf_module_init); 5673 + #endif /* CONFIG_DEBUG_INFO_BTF_MODULES */

+99 -45

kernel/bpf/hashtab.c

··· 86 86 }; 87 87 }; 88 88 89 + #define HASHTAB_MAP_LOCK_COUNT 8 90 + #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1) 91 + 89 92 struct bpf_htab { 90 93 struct bpf_map map; 91 94 struct bucket *buckets; ··· 102 99 u32 n_buckets; /* number of hash buckets */ 103 100 u32 elem_size; /* size of each element in bytes */ 104 101 u32 hashrnd; 102 + struct lock_class_key lockdep_key; 103 + int __percpu *map_locked[HASHTAB_MAP_LOCK_COUNT]; 105 104 }; 106 105 107 106 /* each htab element is struct htab_elem + key + value */ ··· 143 138 144 139 for (i = 0; i < htab->n_buckets; i++) { 145 140 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); 146 - if (htab_use_raw_lock(htab)) 141 + if (htab_use_raw_lock(htab)) { 147 142 raw_spin_lock_init(&htab->buckets[i].raw_lock); 148 - else 143 + lockdep_set_class(&htab->buckets[i].raw_lock, 144 + &htab->lockdep_key); 145 + } else { 149 146 spin_lock_init(&htab->buckets[i].lock); 147 + lockdep_set_class(&htab->buckets[i].lock, 148 + &htab->lockdep_key); 149 + } 150 150 } 151 151 } 152 152 153 - static inline unsigned long htab_lock_bucket(const struct bpf_htab *htab, 154 - struct bucket *b) 153 + static inline int htab_lock_bucket(const struct bpf_htab *htab, 154 + struct bucket *b, u32 hash, 155 + unsigned long *pflags) 155 156 { 156 157 unsigned long flags; 158 + 159 + hash = hash & HASHTAB_MAP_LOCK_MASK; 160 + 161 + migrate_disable(); 162 + if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { 163 + __this_cpu_dec(*(htab->map_locked[hash])); 164 + migrate_enable(); 165 + return -EBUSY; 166 + } 157 167 158 168 if (htab_use_raw_lock(htab)) 159 169 raw_spin_lock_irqsave(&b->raw_lock, flags); 160 170 else 161 171 spin_lock_irqsave(&b->lock, flags); 162 - return flags; 172 + *pflags = flags; 173 + 174 + return 0; 163 175 } 164 176 165 177 static inline void htab_unlock_bucket(const struct bpf_htab *htab, 166 - struct bucket *b, 178 + struct bucket *b, u32 hash, 167 179 unsigned long flags) 168 180 { 181 + hash = hash & HASHTAB_MAP_LOCK_MASK; 169 182 if (htab_use_raw_lock(htab)) 170 183 raw_spin_unlock_irqrestore(&b->raw_lock, flags); 171 184 else 172 185 spin_unlock_irqrestore(&b->lock, flags); 186 + __this_cpu_dec(*(htab->map_locked[hash])); 187 + migrate_enable(); 173 188 } 174 189 175 190 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); ··· 415 390 attr->value_size == 0) 416 391 return -EINVAL; 417 392 418 - if (attr->key_size > MAX_BPF_STACK) 419 - /* eBPF programs initialize keys on stack, so they cannot be 420 - * larger than max stack size 421 - */ 422 - return -E2BIG; 423 - 424 - if (attr->value_size >= KMALLOC_MAX_SIZE - 425 - MAX_BPF_STACK - sizeof(struct htab_elem)) 426 - /* if value_size is bigger, the user space won't be able to 427 - * access the elements via bpf syscall. This check also makes 428 - * sure that the elem_size doesn't overflow and it's 393 + if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE - 394 + sizeof(struct htab_elem)) 395 + /* if key_size + value_size is bigger, the user space won't be 396 + * able to access the elements via bpf syscall. This check 397 + * also makes sure that the elem_size doesn't overflow and it's 429 398 * kmalloc-able later in htab_map_update_elem() 430 399 */ 431 400 return -E2BIG; ··· 441 422 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 442 423 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 443 424 struct bpf_htab *htab; 425 + int err, i; 444 426 u64 cost; 445 - int err; 446 427 447 428 htab = kzalloc(sizeof(*htab), GFP_USER); 448 429 if (!htab) 449 430 return ERR_PTR(-ENOMEM); 431 + 432 + lockdep_register_key(&htab->lockdep_key); 450 433 451 434 bpf_map_init_from_attr(&htab->map, attr); 452 435 ··· 501 480 if (!htab->buckets) 502 481 goto free_charge; 503 482 483 + for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) { 484 + htab->map_locked[i] = __alloc_percpu_gfp(sizeof(int), 485 + sizeof(int), GFP_USER); 486 + if (!htab->map_locked[i]) 487 + goto free_map_locked; 488 + } 489 + 504 490 if (htab->map.map_flags & BPF_F_ZERO_SEED) 505 491 htab->hashrnd = 0; 506 492 else ··· 518 490 if (prealloc) { 519 491 err = prealloc_init(htab); 520 492 if (err) 521 - goto free_buckets; 493 + goto free_map_locked; 522 494 523 495 if (!percpu && !lru) { 524 496 /* lru itself can remove the least used element, so ··· 534 506 535 507 free_prealloc: 536 508 prealloc_destroy(htab); 537 - free_buckets: 509 + free_map_locked: 510 + for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) 511 + free_percpu(htab->map_locked[i]); 538 512 bpf_map_area_free(htab->buckets); 539 513 free_charge: 540 514 bpf_map_charge_finish(&htab->map.memory); 541 515 free_htab: 516 + lockdep_unregister_key(&htab->lockdep_key); 542 517 kfree(htab); 543 518 return ERR_PTR(err); 544 519 } ··· 718 687 struct hlist_nulls_node *n; 719 688 unsigned long flags; 720 689 struct bucket *b; 690 + int ret; 721 691 722 692 tgt_l = container_of(node, struct htab_elem, lru_node); 723 693 b = __select_bucket(htab, tgt_l->hash); 724 694 head = &b->head; 725 695 726 - flags = htab_lock_bucket(htab, b); 696 + ret = htab_lock_bucket(htab, b, tgt_l->hash, &flags); 697 + if (ret) 698 + return false; 727 699 728 700 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 729 701 if (l == tgt_l) { ··· 734 700 break; 735 701 } 736 702 737 - htab_unlock_bucket(htab, b, flags); 703 + htab_unlock_bucket(htab, b, tgt_l->hash, flags); 738 704 739 705 return l == tgt_l; 740 706 } ··· 1032 998 */ 1033 999 } 1034 1000 1035 - flags = htab_lock_bucket(htab, b); 1001 + ret = htab_lock_bucket(htab, b, hash, &flags); 1002 + if (ret) 1003 + return ret; 1036 1004 1037 1005 l_old = lookup_elem_raw(head, hash, key, key_size); 1038 1006 ··· 1075 1039 } 1076 1040 ret = 0; 1077 1041 err: 1078 - htab_unlock_bucket(htab, b, flags); 1042 + htab_unlock_bucket(htab, b, hash, flags); 1079 1043 return ret; 1080 1044 } 1081 1045 ··· 1113 1077 return -ENOMEM; 1114 1078 memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size); 1115 1079 1116 - flags = htab_lock_bucket(htab, b); 1080 + ret = htab_lock_bucket(htab, b, hash, &flags); 1081 + if (ret) 1082 + return ret; 1117 1083 1118 1084 l_old = lookup_elem_raw(head, hash, key, key_size); 1119 1085 ··· 1134 1096 ret = 0; 1135 1097 1136 1098 err: 1137 - htab_unlock_bucket(htab, b, flags); 1099 + htab_unlock_bucket(htab, b, hash, flags); 1138 1100 1139 1101 if (ret) 1140 1102 bpf_lru_push_free(&htab->lru, &l_new->lru_node); ··· 1169 1131 b = __select_bucket(htab, hash); 1170 1132 head = &b->head; 1171 1133 1172 - flags = htab_lock_bucket(htab, b); 1134 + ret = htab_lock_bucket(htab, b, hash, &flags); 1135 + if (ret) 1136 + return ret; 1173 1137 1174 1138 l_old = lookup_elem_raw(head, hash, key, key_size); 1175 1139 ··· 1194 1154 } 1195 1155 ret = 0; 1196 1156 err: 1197 - htab_unlock_bucket(htab, b, flags); 1157 + htab_unlock_bucket(htab, b, hash, flags); 1198 1158 return ret; 1199 1159 } 1200 1160 ··· 1234 1194 return -ENOMEM; 1235 1195 } 1236 1196 1237 - flags = htab_lock_bucket(htab, b); 1197 + ret = htab_lock_bucket(htab, b, hash, &flags); 1198 + if (ret) 1199 + return ret; 1238 1200 1239 1201 l_old = lookup_elem_raw(head, hash, key, key_size); 1240 1202 ··· 1258 1216 } 1259 1217 ret = 0; 1260 1218 err: 1261 - htab_unlock_bucket(htab, b, flags); 1219 + htab_unlock_bucket(htab, b, hash, flags); 1262 1220 if (l_new) 1263 1221 bpf_lru_push_free(&htab->lru, &l_new->lru_node); 1264 1222 return ret; ··· 1286 1244 struct htab_elem *l; 1287 1245 unsigned long flags; 1288 1246 u32 hash, key_size; 1289 - int ret = -ENOENT; 1247 + int ret; 1290 1248 1291 1249 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); 1292 1250 ··· 1296 1254 b = __select_bucket(htab, hash); 1297 1255 head = &b->head; 1298 1256 1299 - flags = htab_lock_bucket(htab, b); 1257 + ret = htab_lock_bucket(htab, b, hash, &flags); 1258 + if (ret) 1259 + return ret; 1300 1260 1301 1261 l = lookup_elem_raw(head, hash, key, key_size); 1302 1262 1303 1263 if (l) { 1304 1264 hlist_nulls_del_rcu(&l->hash_node); 1305 1265 free_htab_elem(htab, l); 1306 - ret = 0; 1266 + } else { 1267 + ret = -ENOENT; 1307 1268 } 1308 1269 1309 - htab_unlock_bucket(htab, b, flags); 1270 + htab_unlock_bucket(htab, b, hash, flags); 1310 1271 return ret; 1311 1272 } 1312 1273 ··· 1321 1276 struct htab_elem *l; 1322 1277 unsigned long flags; 1323 1278 u32 hash, key_size; 1324 - int ret = -ENOENT; 1279 + int ret; 1325 1280 1326 1281 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); 1327 1282 ··· 1331 1286 b = __select_bucket(htab, hash); 1332 1287 head = &b->head; 1333 1288 1334 - flags = htab_lock_bucket(htab, b); 1289 + ret = htab_lock_bucket(htab, b, hash, &flags); 1290 + if (ret) 1291 + return ret; 1335 1292 1336 1293 l = lookup_elem_raw(head, hash, key, key_size); 1337 1294 1338 - if (l) { 1295 + if (l) 1339 1296 hlist_nulls_del_rcu(&l->hash_node); 1340 - ret = 0; 1341 - } 1297 + else 1298 + ret = -ENOENT; 1342 1299 1343 - htab_unlock_bucket(htab, b, flags); 1300 + htab_unlock_bucket(htab, b, hash, flags); 1344 1301 if (l) 1345 1302 bpf_lru_push_free(&htab->lru, &l->lru_node); 1346 1303 return ret; ··· 1368 1321 static void htab_map_free(struct bpf_map *map) 1369 1322 { 1370 1323 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1324 + int i; 1371 1325 1372 1326 /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback. 1373 1327 * bpf_free_used_maps() is called after bpf prog is no longer executing. ··· 1386 1338 1387 1339 free_percpu(htab->extra_elems); 1388 1340 bpf_map_area_free(htab->buckets); 1341 + for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) 1342 + free_percpu(htab->map_locked[i]); 1343 + lockdep_unregister_key(&htab->lockdep_key); 1389 1344 kfree(htab); 1390 1345 } 1391 1346 ··· 1492 1441 b = &htab->buckets[batch]; 1493 1442 head = &b->head; 1494 1443 /* do not grab the lock unless need it (bucket_cnt > 0). */ 1495 - if (locked) 1496 - flags = htab_lock_bucket(htab, b); 1444 + if (locked) { 1445 + ret = htab_lock_bucket(htab, b, batch, &flags); 1446 + if (ret) 1447 + goto next_batch; 1448 + } 1497 1449 1498 1450 bucket_cnt = 0; 1499 1451 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) ··· 1513 1459 /* Note that since bucket_cnt > 0 here, it is implicit 1514 1460 * that the locked was grabbed, so release it. 1515 1461 */ 1516 - htab_unlock_bucket(htab, b, flags); 1462 + htab_unlock_bucket(htab, b, batch, flags); 1517 1463 rcu_read_unlock(); 1518 1464 bpf_enable_instrumentation(); 1519 1465 goto after_loop; ··· 1524 1470 /* Note that since bucket_cnt > 0 here, it is implicit 1525 1471 * that the locked was grabbed, so release it. 1526 1472 */ 1527 - htab_unlock_bucket(htab, b, flags); 1473 + htab_unlock_bucket(htab, b, batch, flags); 1528 1474 rcu_read_unlock(); 1529 1475 bpf_enable_instrumentation(); 1530 1476 kvfree(keys); ··· 1577 1523 dst_val += value_size; 1578 1524 } 1579 1525 1580 - htab_unlock_bucket(htab, b, flags); 1526 + htab_unlock_bucket(htab, b, batch, flags); 1581 1527 locked = false; 1582 1528 1583 1529 while (node_to_free) {

+2 -1

kernel/bpf/syscall.c

··· 773 773 map->map_type != BPF_MAP_TYPE_ARRAY && 774 774 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 775 775 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 776 - map->map_type != BPF_MAP_TYPE_INODE_STORAGE) 776 + map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 777 + map->map_type != BPF_MAP_TYPE_TASK_STORAGE) 777 778 return -ENOTSUPP; 778 779 if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > 779 780 map->value_size) {

+1 -1

kernel/bpf/sysfs_btf.c

··· 26 26 .read = btf_vmlinux_read, 27 27 }; 28 28 29 - static struct kobject *btf_kobj; 29 + struct kobject *btf_kobj; 30 30 31 31 static int __init btf_vmlinux_init(void) 32 32 {

+2

kernel/bpf/task_iter.c

··· 337 337 338 338 static struct bpf_iter_reg task_reg_info = { 339 339 .target = "task", 340 + .feature = BPF_ITER_RESCHED, 340 341 .ctx_arg_info_size = 1, 341 342 .ctx_arg_info = { 342 343 { offsetof(struct bpf_iter__task, task), ··· 355 354 356 355 static struct bpf_iter_reg task_file_reg_info = { 357 356 .target = "task_file", 357 + .feature = BPF_ITER_RESCHED, 358 358 .ctx_arg_info_size = 2, 359 359 .ctx_arg_info = { 360 360 { offsetof(struct bpf_iter__task_file, task),

+138 -44

kernel/bpf/verifier.c

··· 2739 2739 regno); 2740 2740 return -EACCES; 2741 2741 } 2742 - err = __check_mem_access(env, regno, off, size, reg->range, 2742 + 2743 + err = reg->range < 0 ? -EINVAL : 2744 + __check_mem_access(env, regno, off, size, reg->range, 2743 2745 zero_size_allowed); 2744 2746 if (err) { 2745 2747 verbose(env, "R%d offset is outside of the packet\n", regno); ··· 4471 4469 func_id != BPF_FUNC_inode_storage_delete) 4472 4470 goto error; 4473 4471 break; 4472 + case BPF_MAP_TYPE_TASK_STORAGE: 4473 + if (func_id != BPF_FUNC_task_storage_get && 4474 + func_id != BPF_FUNC_task_storage_delete) 4475 + goto error; 4476 + break; 4474 4477 default: 4475 4478 break; 4476 4479 } ··· 4552 4545 case BPF_FUNC_inode_storage_get: 4553 4546 case BPF_FUNC_inode_storage_delete: 4554 4547 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) 4548 + goto error; 4549 + break; 4550 + case BPF_FUNC_task_storage_get: 4551 + case BPF_FUNC_task_storage_delete: 4552 + if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) 4555 4553 goto error; 4556 4554 break; 4557 4555 default: ··· 4697 4685 4698 4686 for (i = 0; i <= vstate->curframe; i++) 4699 4687 __clear_all_pkt_pointers(env, vstate->frame[i]); 4688 + } 4689 + 4690 + enum { 4691 + AT_PKT_END = -1, 4692 + BEYOND_PKT_END = -2, 4693 + }; 4694 + 4695 + static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open) 4696 + { 4697 + struct bpf_func_state *state = vstate->frame[vstate->curframe]; 4698 + struct bpf_reg_state *reg = &state->regs[regn]; 4699 + 4700 + if (reg->type != PTR_TO_PACKET) 4701 + /* PTR_TO_PACKET_META is not supported yet */ 4702 + return; 4703 + 4704 + /* The 'reg' is pkt > pkt_end or pkt >= pkt_end. 4705 + * How far beyond pkt_end it goes is unknown. 4706 + * if (!range_open) it's the case of pkt >= pkt_end 4707 + * if (range_open) it's the case of pkt > pkt_end 4708 + * hence this pointer is at least 1 byte bigger than pkt_end 4709 + */ 4710 + if (range_open) 4711 + reg->range = BEYOND_PKT_END; 4712 + else 4713 + reg->range = AT_PKT_END; 4700 4714 } 4701 4715 4702 4716 static void release_reg_references(struct bpf_verifier_env *env, ··· 5214 5176 PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; 5215 5177 regs[BPF_REG_0].btf_id = meta.ret_btf_id; 5216 5178 } 5217 - } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { 5179 + } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL || 5180 + fn->ret_type == RET_PTR_TO_BTF_ID) { 5218 5181 int ret_btf_id; 5219 5182 5220 5183 mark_reg_known_zero(env, regs, BPF_REG_0); 5221 - regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; 5184 + regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ? 5185 + PTR_TO_BTF_ID : 5186 + PTR_TO_BTF_ID_OR_NULL; 5222 5187 ret_btf_id = *fn->ret_btf_id; 5223 5188 if (ret_btf_id == 0) { 5224 5189 verbose(env, "invalid return type %d of func %s#%d\n", ··· 6736 6695 6737 6696 static void __find_good_pkt_pointers(struct bpf_func_state *state, 6738 6697 struct bpf_reg_state *dst_reg, 6739 - enum bpf_reg_type type, u16 new_range) 6698 + enum bpf_reg_type type, int new_range) 6740 6699 { 6741 6700 struct bpf_reg_state *reg; 6742 6701 int i; ··· 6761 6720 enum bpf_reg_type type, 6762 6721 bool range_right_open) 6763 6722 { 6764 - u16 new_range; 6765 - int i; 6723 + int new_range, i; 6766 6724 6767 6725 if (dst_reg->off < 0 || 6768 6726 (dst_reg->off == 0 && range_right_open)) ··· 7012 6972 return is_branch64_taken(reg, val, opcode); 7013 6973 } 7014 6974 6975 + static int flip_opcode(u32 opcode) 6976 + { 6977 + /* How can we transform "a <op> b" into "b <op> a"? */ 6978 + static const u8 opcode_flip[16] = { 6979 + /* these stay the same */ 6980 + [BPF_JEQ >> 4] = BPF_JEQ, 6981 + [BPF_JNE >> 4] = BPF_JNE, 6982 + [BPF_JSET >> 4] = BPF_JSET, 6983 + /* these swap "lesser" and "greater" (L and G in the opcodes) */ 6984 + [BPF_JGE >> 4] = BPF_JLE, 6985 + [BPF_JGT >> 4] = BPF_JLT, 6986 + [BPF_JLE >> 4] = BPF_JGE, 6987 + [BPF_JLT >> 4] = BPF_JGT, 6988 + [BPF_JSGE >> 4] = BPF_JSLE, 6989 + [BPF_JSGT >> 4] = BPF_JSLT, 6990 + [BPF_JSLE >> 4] = BPF_JSGE, 6991 + [BPF_JSLT >> 4] = BPF_JSGT 6992 + }; 6993 + return opcode_flip[opcode >> 4]; 6994 + } 6995 + 6996 + static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, 6997 + struct bpf_reg_state *src_reg, 6998 + u8 opcode) 6999 + { 7000 + struct bpf_reg_state *pkt; 7001 + 7002 + if (src_reg->type == PTR_TO_PACKET_END) { 7003 + pkt = dst_reg; 7004 + } else if (dst_reg->type == PTR_TO_PACKET_END) { 7005 + pkt = src_reg; 7006 + opcode = flip_opcode(opcode); 7007 + } else { 7008 + return -1; 7009 + } 7010 + 7011 + if (pkt->range >= 0) 7012 + return -1; 7013 + 7014 + switch (opcode) { 7015 + case BPF_JLE: 7016 + /* pkt <= pkt_end */ 7017 + fallthrough; 7018 + case BPF_JGT: 7019 + /* pkt > pkt_end */ 7020 + if (pkt->range == BEYOND_PKT_END) 7021 + /* pkt has at last one extra byte beyond pkt_end */ 7022 + return opcode == BPF_JGT; 7023 + break; 7024 + case BPF_JLT: 7025 + /* pkt < pkt_end */ 7026 + fallthrough; 7027 + case BPF_JGE: 7028 + /* pkt >= pkt_end */ 7029 + if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END) 7030 + return opcode == BPF_JGE; 7031 + break; 7032 + } 7033 + return -1; 7034 + } 7035 + 7015 7036 /* Adjusts the register min/max values in the case that the dst_reg is the 7016 7037 * variable register that we are working on, and src_reg is a constant or we're 7017 7038 * simply doing a BPF_K check. ··· 7236 7135 u64 val, u32 val32, 7237 7136 u8 opcode, bool is_jmp32) 7238 7137 { 7239 - /* How can we transform "a <op> b" into "b <op> a"? */ 7240 - static const u8 opcode_flip[16] = { 7241 - /* these stay the same */ 7242 - [BPF_JEQ >> 4] = BPF_JEQ, 7243 - [BPF_JNE >> 4] = BPF_JNE, 7244 - [BPF_JSET >> 4] = BPF_JSET, 7245 - /* these swap "lesser" and "greater" (L and G in the opcodes) */ 7246 - [BPF_JGE >> 4] = BPF_JLE, 7247 - [BPF_JGT >> 4] = BPF_JLT, 7248 - [BPF_JLE >> 4] = BPF_JGE, 7249 - [BPF_JLT >> 4] = BPF_JGT, 7250 - [BPF_JSGE >> 4] = BPF_JSLE, 7251 - [BPF_JSGT >> 4] = BPF_JSLT, 7252 - [BPF_JSLE >> 4] = BPF_JSGE, 7253 - [BPF_JSLT >> 4] = BPF_JSGT 7254 - }; 7255 - opcode = opcode_flip[opcode >> 4]; 7138 + opcode = flip_opcode(opcode); 7256 7139 /* This uses zero as "not present in table"; luckily the zero opcode, 7257 7140 * BPF_JA, can't get here. 7258 7141 */ ··· 7418 7333 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ 7419 7334 find_good_pkt_pointers(this_branch, dst_reg, 7420 7335 dst_reg->type, false); 7336 + mark_pkt_end(other_branch, insn->dst_reg, true); 7421 7337 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7422 7338 src_reg->type == PTR_TO_PACKET) || 7423 7339 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ··· 7426 7340 /* pkt_end > pkt_data', pkt_data > pkt_meta' */ 7427 7341 find_good_pkt_pointers(other_branch, src_reg, 7428 7342 src_reg->type, true); 7343 + mark_pkt_end(this_branch, insn->src_reg, false); 7429 7344 } else { 7430 7345 return false; 7431 7346 } ··· 7439 7352 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ 7440 7353 find_good_pkt_pointers(other_branch, dst_reg, 7441 7354 dst_reg->type, true); 7355 + mark_pkt_end(this_branch, insn->dst_reg, false); 7442 7356 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7443 7357 src_reg->type == PTR_TO_PACKET) || 7444 7358 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ··· 7447 7359 /* pkt_end < pkt_data', pkt_data > pkt_meta' */ 7448 7360 find_good_pkt_pointers(this_branch, src_reg, 7449 7361 src_reg->type, false); 7362 + mark_pkt_end(other_branch, insn->src_reg, true); 7450 7363 } else { 7451 7364 return false; 7452 7365 } ··· 7460 7371 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ 7461 7372 find_good_pkt_pointers(this_branch, dst_reg, 7462 7373 dst_reg->type, true); 7374 + mark_pkt_end(other_branch, insn->dst_reg, false); 7463 7375 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7464 7376 src_reg->type == PTR_TO_PACKET) || 7465 7377 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ··· 7468 7378 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ 7469 7379 find_good_pkt_pointers(other_branch, src_reg, 7470 7380 src_reg->type, false); 7381 + mark_pkt_end(this_branch, insn->src_reg, true); 7471 7382 } else { 7472 7383 return false; 7473 7384 } ··· 7481 7390 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ 7482 7391 find_good_pkt_pointers(other_branch, dst_reg, 7483 7392 dst_reg->type, false); 7393 + mark_pkt_end(this_branch, insn->dst_reg, true); 7484 7394 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7485 7395 src_reg->type == PTR_TO_PACKET) || 7486 7396 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && ··· 7489 7397 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ 7490 7398 find_good_pkt_pointers(this_branch, src_reg, 7491 7399 src_reg->type, true); 7400 + mark_pkt_end(other_branch, insn->src_reg, false); 7492 7401 } else { 7493 7402 return false; 7494 7403 } ··· 7589 7496 src_reg->var_off.value, 7590 7497 opcode, 7591 7498 is_jmp32); 7499 + } else if (reg_is_pkt_pointer_any(dst_reg) && 7500 + reg_is_pkt_pointer_any(src_reg) && 7501 + !is_jmp32) { 7502 + pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode); 7592 7503 } 7593 7504 7594 7505 if (pred >= 0) { ··· 7601 7504 */ 7602 7505 if (!__is_pointer_value(false, dst_reg)) 7603 7506 err = mark_chain_precision(env, insn->dst_reg); 7604 - if (BPF_SRC(insn->code) == BPF_X && !err) 7507 + if (BPF_SRC(insn->code) == BPF_X && !err && 7508 + !__is_pointer_value(false, src_reg)) 7605 7509 err = mark_chain_precision(env, insn->src_reg); 7606 7510 if (err) 7607 7511 return err; ··· 9817 9719 verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n"); 9818 9720 } 9819 9721 9820 - if ((is_tracing_prog_type(prog_type) || 9821 - prog_type == BPF_PROG_TYPE_SOCKET_FILTER) && 9822 - map_value_has_spin_lock(map)) { 9823 - verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); 9824 - return -EINVAL; 9722 + if (map_value_has_spin_lock(map)) { 9723 + if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { 9724 + verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n"); 9725 + return -EINVAL; 9726 + } 9727 + 9728 + if (is_tracing_prog_type(prog_type)) { 9729 + verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); 9730 + return -EINVAL; 9731 + } 9732 + 9733 + if (prog->aux->sleepable) { 9734 + verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n"); 9735 + return -EINVAL; 9736 + } 9825 9737 } 9826 9738 9827 9739 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && ··· 11562 11454 return -EINVAL; 11563 11455 } 11564 11456 11565 - /* non exhaustive list of sleepable bpf_lsm_*() functions */ 11566 - BTF_SET_START(btf_sleepable_lsm_hooks) 11567 - #ifdef CONFIG_BPF_LSM 11568 - BTF_ID(func, bpf_lsm_bprm_committed_creds) 11569 - #else 11570 - BTF_ID_UNUSED 11571 - #endif 11572 - BTF_SET_END(btf_sleepable_lsm_hooks) 11573 - 11574 - static int check_sleepable_lsm_hook(u32 btf_id) 11575 - { 11576 - return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id); 11577 - } 11578 - 11579 11457 /* list of non-sleepable functions that are otherwise on 11580 11458 * ALLOW_ERROR_INJECTION list 11581 11459 */ ··· 11783 11689 /* LSM progs check that they are attached to bpf_lsm_*() funcs. 11784 11690 * Only some of them are sleepable. 11785 11691 */ 11786 - if (check_sleepable_lsm_hook(btf_id)) 11692 + if (bpf_lsm_is_sleepable_hook(btf_id)) 11787 11693 ret = 0; 11788 11694 break; 11789 11695 default:

+32

kernel/module.c

··· 380 380 return (void *)info->sechdrs[sec].sh_addr; 381 381 } 382 382 383 + /* Find a module section: 0 means not found. Ignores SHF_ALLOC flag. */ 384 + static unsigned int find_any_sec(const struct load_info *info, const char *name) 385 + { 386 + unsigned int i; 387 + 388 + for (i = 1; i < info->hdr->e_shnum; i++) { 389 + Elf_Shdr *shdr = &info->sechdrs[i]; 390 + if (strcmp(info->secstrings + shdr->sh_name, name) == 0) 391 + return i; 392 + } 393 + return 0; 394 + } 395 + 396 + /* 397 + * Find a module section, or NULL. Fill in number of "objects" in section. 398 + * Ignores SHF_ALLOC flag. 399 + */ 400 + static __maybe_unused void *any_section_objs(const struct load_info *info, 401 + const char *name, 402 + size_t object_size, 403 + unsigned int *num) 404 + { 405 + unsigned int sec = find_any_sec(info, name); 406 + 407 + /* Section 0 has sh_addr 0 and sh_size 0. */ 408 + *num = info->sechdrs[sec].sh_size / object_size; 409 + return (void *)info->sechdrs[sec].sh_addr; 410 + } 411 + 383 412 /* Provided by the linker */ 384 413 extern const struct kernel_symbol __start___ksymtab[]; 385 414 extern const struct kernel_symbol __stop___ksymtab[]; ··· 3278 3249 mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map", 3279 3250 sizeof(*mod->bpf_raw_events), 3280 3251 &mod->num_bpf_raw_events); 3252 + #endif 3253 + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES 3254 + mod->btf_data = any_section_objs(info, ".BTF", 1, &mod->btf_data_size); 3281 3255 #endif 3282 3256 #ifdef CONFIG_JUMP_LABEL 3283 3257 mod->jump_entries = section_objs(info, "__jump_table",

+28 -1

kernel/trace/bpf_trace.c

··· 16 16 #include <linux/syscalls.h> 17 17 #include <linux/error-injection.h> 18 18 #include <linux/btf_ids.h> 19 + #include <linux/bpf_lsm.h> 20 + 21 + #include <net/bpf_sk_storage.h> 19 22 20 23 #include <uapi/linux/bpf.h> 21 24 #include <uapi/linux/btf.h> ··· 1025 1022 .ret_type = RET_INTEGER, 1026 1023 }; 1027 1024 1025 + BPF_CALL_0(bpf_get_current_task_btf) 1026 + { 1027 + return (unsigned long) current; 1028 + } 1029 + 1030 + BTF_ID_LIST_SINGLE(bpf_get_current_btf_ids, struct, task_struct) 1031 + 1032 + static const struct bpf_func_proto bpf_get_current_task_btf_proto = { 1033 + .func = bpf_get_current_task_btf, 1034 + .gpl_only = true, 1035 + .ret_type = RET_PTR_TO_BTF_ID, 1036 + .ret_btf_id = &bpf_get_current_btf_ids[0], 1037 + }; 1038 + 1028 1039 BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) 1029 1040 { 1030 1041 struct bpf_array *array = container_of(map, struct bpf_array, map); ··· 1181 1164 1182 1165 static bool bpf_d_path_allowed(const struct bpf_prog *prog) 1183 1166 { 1184 - return btf_id_set_contains(&btf_allowlist_d_path, prog->aux->attach_btf_id); 1167 + if (prog->type == BPF_PROG_TYPE_LSM) 1168 + return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id); 1169 + 1170 + return btf_id_set_contains(&btf_allowlist_d_path, 1171 + prog->aux->attach_btf_id); 1185 1172 } 1186 1173 1187 1174 BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path) ··· 1286 1265 return &bpf_get_current_pid_tgid_proto; 1287 1266 case BPF_FUNC_get_current_task: 1288 1267 return &bpf_get_current_task_proto; 1268 + case BPF_FUNC_get_current_task_btf: 1269 + return &bpf_get_current_task_btf_proto; 1289 1270 case BPF_FUNC_get_current_uid_gid: 1290 1271 return &bpf_get_current_uid_gid_proto; 1291 1272 case BPF_FUNC_get_current_comm: ··· 1742 1719 return &bpf_skc_to_tcp_request_sock_proto; 1743 1720 case BPF_FUNC_skc_to_udp6_sock: 1744 1721 return &bpf_skc_to_udp6_sock_proto; 1722 + case BPF_FUNC_sk_storage_get: 1723 + return &bpf_sk_storage_get_tracing_proto; 1724 + case BPF_FUNC_sk_storage_delete: 1725 + return &bpf_sk_storage_delete_tracing_proto; 1745 1726 #endif 1746 1727 case BPF_FUNC_seq_printf: 1747 1728 return prog->expected_attach_type == BPF_TRACE_ITER ?

+9

lib/Kconfig.debug

··· 274 274 Turning this on expects presence of pahole tool, which will convert 275 275 DWARF type info into equivalent deduplicated BTF type info. 276 276 277 + config PAHOLE_HAS_SPLIT_BTF 278 + def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119") 279 + 280 + config DEBUG_INFO_BTF_MODULES 281 + def_bool y 282 + depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF 283 + help 284 + Generate compact split BTF type information for kernel modules. 285 + 277 286 config GDB_SCRIPTS 278 287 bool "Provide GDB scripts for kernel debugging" 279 288 help

+103 -32

net/core/bpf_sk_storage.c

··· 6 6 #include <linux/types.h> 7 7 #include <linux/spinlock.h> 8 8 #include <linux/bpf.h> 9 + #include <linux/btf.h> 9 10 #include <linux/btf_ids.h> 10 11 #include <linux/bpf_local_storage.h> 11 12 #include <net/bpf_sk_storage.h> ··· 16 15 17 16 DEFINE_BPF_STORAGE_CACHE(sk_cache); 18 17 19 - static int omem_charge(struct sock *sk, unsigned int size) 20 - { 21 - /* same check as in sock_kmalloc() */ 22 - if (size <= sysctl_optmem_max && 23 - atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { 24 - atomic_add(size, &sk->sk_omem_alloc); 25 - return 0; 26 - } 27 - 28 - return -ENOMEM; 29 - } 30 - 31 18 static struct bpf_local_storage_data * 32 - sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) 19 + bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) 33 20 { 34 21 struct bpf_local_storage *sk_storage; 35 22 struct bpf_local_storage_map *smap; ··· 30 41 return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit); 31 42 } 32 43 33 - static int sk_storage_delete(struct sock *sk, struct bpf_map *map) 44 + static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) 34 45 { 35 46 struct bpf_local_storage_data *sdata; 36 47 37 - sdata = sk_storage_lookup(sk, map, false); 48 + sdata = bpf_sk_storage_lookup(sk, map, false); 38 49 if (!sdata) 39 50 return -ENOENT; 40 51 ··· 83 94 kfree_rcu(sk_storage, rcu); 84 95 } 85 96 86 - static void sk_storage_map_free(struct bpf_map *map) 97 + static void bpf_sk_storage_map_free(struct bpf_map *map) 87 98 { 88 99 struct bpf_local_storage_map *smap; 89 100 ··· 92 103 bpf_local_storage_map_free(smap); 93 104 } 94 105 95 - static struct bpf_map *sk_storage_map_alloc(union bpf_attr *attr) 106 + static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) 96 107 { 97 108 struct bpf_local_storage_map *smap; 98 109 ··· 119 130 fd = *(int *)key; 120 131 sock = sockfd_lookup(fd, &err); 121 132 if (sock) { 122 - sdata = sk_storage_lookup(sock->sk, map, true); 133 + sdata = bpf_sk_storage_lookup(sock->sk, map, true); 123 134 sockfd_put(sock); 124 135 return sdata ? sdata->data : NULL; 125 136 } ··· 155 166 fd = *(int *)key; 156 167 sock = sockfd_lookup(fd, &err); 157 168 if (sock) { 158 - err = sk_storage_delete(sock->sk, map); 169 + err = bpf_sk_storage_del(sock->sk, map); 159 170 sockfd_put(sock); 160 171 return err; 161 172 } ··· 261 272 if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) 262 273 return (unsigned long)NULL; 263 274 264 - sdata = sk_storage_lookup(sk, map, true); 275 + sdata = bpf_sk_storage_lookup(sk, map, true); 265 276 if (sdata) 266 277 return (unsigned long)sdata->data; 267 278 ··· 294 305 if (refcount_inc_not_zero(&sk->sk_refcnt)) { 295 306 int err; 296 307 297 - err = sk_storage_delete(sk, map); 308 + err = bpf_sk_storage_del(sk, map); 298 309 sock_put(sk); 299 310 return err; 300 311 } ··· 302 313 return -ENOENT; 303 314 } 304 315 305 - static int sk_storage_charge(struct bpf_local_storage_map *smap, 306 - void *owner, u32 size) 316 + static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, 317 + void *owner, u32 size) 307 318 { 308 - return omem_charge(owner, size); 319 + struct sock *sk = (struct sock *)owner; 320 + 321 + /* same check as in sock_kmalloc() */ 322 + if (size <= sysctl_optmem_max && 323 + atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { 324 + atomic_add(size, &sk->sk_omem_alloc); 325 + return 0; 326 + } 327 + 328 + return -ENOMEM; 309 329 } 310 330 311 - static void sk_storage_uncharge(struct bpf_local_storage_map *smap, 312 - void *owner, u32 size) 331 + static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap, 332 + void *owner, u32 size) 313 333 { 314 334 struct sock *sk = owner; 315 335 ··· 326 328 } 327 329 328 330 static struct bpf_local_storage __rcu ** 329 - sk_storage_ptr(void *owner) 331 + bpf_sk_storage_ptr(void *owner) 330 332 { 331 333 struct sock *sk = owner; 332 334 ··· 337 339 const struct bpf_map_ops sk_storage_map_ops = { 338 340 .map_meta_equal = bpf_map_meta_equal, 339 341 .map_alloc_check = bpf_local_storage_map_alloc_check, 340 - .map_alloc = sk_storage_map_alloc, 341 - .map_free = sk_storage_map_free, 342 + .map_alloc = bpf_sk_storage_map_alloc, 343 + .map_free = bpf_sk_storage_map_free, 342 344 .map_get_next_key = notsupp_get_next_key, 343 345 .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, 344 346 .map_update_elem = bpf_fd_sk_storage_update_elem, ··· 346 348 .map_check_btf = bpf_local_storage_map_check_btf, 347 349 .map_btf_name = "bpf_local_storage_map", 348 350 .map_btf_id = &sk_storage_map_btf_id, 349 - .map_local_storage_charge = sk_storage_charge, 350 - .map_local_storage_uncharge = sk_storage_uncharge, 351 - .map_owner_storage_ptr = sk_storage_ptr, 351 + .map_local_storage_charge = bpf_sk_storage_charge, 352 + .map_local_storage_uncharge = bpf_sk_storage_uncharge, 353 + .map_owner_storage_ptr = bpf_sk_storage_ptr, 352 354 }; 353 355 354 356 const struct bpf_func_proto bpf_sk_storage_get_proto = { ··· 377 379 .ret_type = RET_INTEGER, 378 380 .arg1_type = ARG_CONST_MAP_PTR, 379 381 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 382 + }; 383 + 384 + static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) 385 + { 386 + const struct btf *btf_vmlinux; 387 + const struct btf_type *t; 388 + const char *tname; 389 + u32 btf_id; 390 + 391 + if (prog->aux->dst_prog) 392 + return false; 393 + 394 + /* Ensure the tracing program is not tracing 395 + * any bpf_sk_storage*() function and also 396 + * use the bpf_sk_storage_(get|delete) helper. 397 + */ 398 + switch (prog->expected_attach_type) { 399 + case BPF_TRACE_RAW_TP: 400 + /* bpf_sk_storage has no trace point */ 401 + return true; 402 + case BPF_TRACE_FENTRY: 403 + case BPF_TRACE_FEXIT: 404 + btf_vmlinux = bpf_get_btf_vmlinux(); 405 + btf_id = prog->aux->attach_btf_id; 406 + t = btf_type_by_id(btf_vmlinux, btf_id); 407 + tname = btf_name_by_offset(btf_vmlinux, t->name_off); 408 + return !!strncmp(tname, "bpf_sk_storage", 409 + strlen("bpf_sk_storage")); 410 + default: 411 + return false; 412 + } 413 + 414 + return false; 415 + } 416 + 417 + BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, 418 + void *, value, u64, flags) 419 + { 420 + if (!in_serving_softirq() && !in_task()) 421 + return (unsigned long)NULL; 422 + 423 + return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags); 424 + } 425 + 426 + BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, 427 + struct sock *, sk) 428 + { 429 + if (!in_serving_softirq() && !in_task()) 430 + return -EPERM; 431 + 432 + return ____bpf_sk_storage_delete(map, sk); 433 + } 434 + 435 + const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { 436 + .func = bpf_sk_storage_get_tracing, 437 + .gpl_only = false, 438 + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 439 + .arg1_type = ARG_CONST_MAP_PTR, 440 + .arg2_type = ARG_PTR_TO_BTF_ID, 441 + .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 442 + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 443 + .arg4_type = ARG_ANYTHING, 444 + .allowed = bpf_sk_storage_tracing_allowed, 445 + }; 446 + 447 + const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { 448 + .func = bpf_sk_storage_delete_tracing, 449 + .gpl_only = false, 450 + .ret_type = RET_INTEGER, 451 + .arg1_type = ARG_CONST_MAP_PTR, 452 + .arg2_type = ARG_PTR_TO_BTF_ID, 453 + .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 454 + .allowed = bpf_sk_storage_tracing_allowed, 380 455 }; 381 456 382 457 struct bpf_sk_storage_diag {

+60 -10

net/core/page_pool.c

··· 11 11 #include <linux/device.h> 12 12 13 13 #include <net/page_pool.h> 14 + #include <net/xdp.h> 15 + 14 16 #include <linux/dma-direction.h> 15 17 #include <linux/dma-mapping.h> 16 18 #include <linux/page-flags.h> ··· 364 362 * If the page refcnt != 1, then the page will be returned to memory 365 363 * subsystem. 366 364 */ 367 - void page_pool_put_page(struct page_pool *pool, struct page *page, 368 - unsigned int dma_sync_size, bool allow_direct) 365 + static __always_inline struct page * 366 + __page_pool_put_page(struct page_pool *pool, struct page *page, 367 + unsigned int dma_sync_size, bool allow_direct) 369 368 { 370 369 /* This allocator is optimized for the XDP mode that uses 371 370 * one-frame-per-page, but have fallbacks that act like the ··· 382 379 page_pool_dma_sync_for_device(pool, page, 383 380 dma_sync_size); 384 381 385 - if (allow_direct && in_serving_softirq()) 386 - if (page_pool_recycle_in_cache(page, pool)) 387 - return; 382 + if (allow_direct && in_serving_softirq() && 383 + page_pool_recycle_in_cache(page, pool)) 384 + return NULL; 388 385 389 - if (!page_pool_recycle_in_ring(pool, page)) { 390 - /* Cache full, fallback to free pages */ 391 - page_pool_return_page(pool, page); 392 - } 393 - return; 386 + /* Page found as candidate for recycling */ 387 + return page; 394 388 } 395 389 /* Fallback/non-XDP mode: API user have elevated refcnt. 396 390 * ··· 405 405 /* Do not replace this with page_pool_return_page() */ 406 406 page_pool_release_page(pool, page); 407 407 put_page(page); 408 + 409 + return NULL; 410 + } 411 + 412 + void page_pool_put_page(struct page_pool *pool, struct page *page, 413 + unsigned int dma_sync_size, bool allow_direct) 414 + { 415 + page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); 416 + if (page && !page_pool_recycle_in_ring(pool, page)) { 417 + /* Cache full, fallback to free pages */ 418 + page_pool_return_page(pool, page); 419 + } 408 420 } 409 421 EXPORT_SYMBOL(page_pool_put_page); 422 + 423 + /* Caller must not use data area after call, as this function overwrites it */ 424 + void page_pool_put_page_bulk(struct page_pool *pool, void **data, 425 + int count) 426 + { 427 + int i, bulk_len = 0; 428 + 429 + for (i = 0; i < count; i++) { 430 + struct page *page = virt_to_head_page(data[i]); 431 + 432 + page = __page_pool_put_page(pool, page, -1, false); 433 + /* Approved for bulk recycling in ptr_ring cache */ 434 + if (page) 435 + data[bulk_len++] = page; 436 + } 437 + 438 + if (unlikely(!bulk_len)) 439 + return; 440 + 441 + /* Bulk producer into ptr_ring page_pool cache */ 442 + page_pool_ring_lock(pool); 443 + for (i = 0; i < bulk_len; i++) { 444 + if (__ptr_ring_produce(&pool->ring, data[i])) 445 + break; /* ring full */ 446 + } 447 + page_pool_ring_unlock(pool); 448 + 449 + /* Hopefully all pages was return into ptr_ring */ 450 + if (likely(i == bulk_len)) 451 + return; 452 + 453 + /* ptr_ring cache full, free remaining pages outside producer lock 454 + * since put_page() with refcnt == 1 can be an expensive operation 455 + */ 456 + for (; i < bulk_len; i++) 457 + page_pool_return_page(pool, data[i]); 458 + } 459 + EXPORT_SYMBOL(page_pool_put_page_bulk); 410 460 411 461 static void page_pool_empty_ring(struct page_pool *pool) 412 462 {

+54

net/core/xdp.c

··· 380 380 } 381 381 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); 382 382 383 + /* XDP bulk APIs introduce a defer/flush mechanism to return 384 + * pages belonging to the same xdp_mem_allocator object 385 + * (identified via the mem.id field) in bulk to optimize 386 + * I-cache and D-cache. 387 + * The bulk queue size is set to 16 to be aligned to how 388 + * XDP_REDIRECT bulking works. The bulk is flushed when 389 + * it is full or when mem.id changes. 390 + * xdp_frame_bulk is usually stored/allocated on the function 391 + * call-stack to avoid locking penalties. 392 + */ 393 + void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) 394 + { 395 + struct xdp_mem_allocator *xa = bq->xa; 396 + 397 + if (unlikely(!xa || !bq->count)) 398 + return; 399 + 400 + page_pool_put_page_bulk(xa->page_pool, bq->q, bq->count); 401 + /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */ 402 + bq->count = 0; 403 + } 404 + EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); 405 + 406 + /* Must be called with rcu_read_lock held */ 407 + void xdp_return_frame_bulk(struct xdp_frame *xdpf, 408 + struct xdp_frame_bulk *bq) 409 + { 410 + struct xdp_mem_info *mem = &xdpf->mem; 411 + struct xdp_mem_allocator *xa; 412 + 413 + if (mem->type != MEM_TYPE_PAGE_POOL) { 414 + __xdp_return(xdpf->data, &xdpf->mem, false); 415 + return; 416 + } 417 + 418 + xa = bq->xa; 419 + if (unlikely(!xa)) { 420 + xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 421 + bq->count = 0; 422 + bq->xa = xa; 423 + } 424 + 425 + if (bq->count == XDP_BULK_QUEUE_SIZE) 426 + xdp_flush_frame_bulk(bq); 427 + 428 + if (unlikely(mem->id != xa->mem.id)) { 429 + xdp_flush_frame_bulk(bq); 430 + bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 431 + } 432 + 433 + bq->q[bq->count++] = xdpf->data; 434 + } 435 + EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); 436 + 383 437 void xdp_return_buff(struct xdp_buff *xdp) 384 438 { 385 439 __xdp_return(xdp->data, &xdp->rxq->mem, true);

-1

samples/bpf/hbm.c

··· 51 51 #include "cgroup_helpers.h" 52 52 #include "hbm.h" 53 53 #include "bpf_util.h" 54 - #include <bpf/bpf.h> 55 54 #include <bpf/libbpf.h> 56 55 57 56 bool outFlag = true;

-179

samples/bpf/test_ipip.sh

··· 1 - #!/bin/bash 2 - # SPDX-License-Identifier: GPL-2.0 3 - 4 - function config_device { 5 - ip netns add at_ns0 6 - ip netns add at_ns1 7 - ip netns add at_ns2 8 - ip link add veth0 type veth peer name veth0b 9 - ip link add veth1 type veth peer name veth1b 10 - ip link add veth2 type veth peer name veth2b 11 - ip link set veth0b up 12 - ip link set veth1b up 13 - ip link set veth2b up 14 - ip link set dev veth0b mtu 1500 15 - ip link set dev veth1b mtu 1500 16 - ip link set dev veth2b mtu 1500 17 - ip link set veth0 netns at_ns0 18 - ip link set veth1 netns at_ns1 19 - ip link set veth2 netns at_ns2 20 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 21 - ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad 22 - ip netns exec at_ns0 ip link set dev veth0 up 23 - ip netns exec at_ns1 ip addr add 172.16.1.101/24 dev veth1 24 - ip netns exec at_ns1 ip addr add 2401:db00::2/64 dev veth1 nodad 25 - ip netns exec at_ns1 ip link set dev veth1 up 26 - ip netns exec at_ns2 ip addr add 172.16.1.200/24 dev veth2 27 - ip netns exec at_ns2 ip addr add 2401:db00::3/64 dev veth2 nodad 28 - ip netns exec at_ns2 ip link set dev veth2 up 29 - ip link add br0 type bridge 30 - ip link set br0 up 31 - ip link set dev br0 mtu 1500 32 - ip link set veth0b master br0 33 - ip link set veth1b master br0 34 - ip link set veth2b master br0 35 - } 36 - 37 - function add_ipip_tunnel { 38 - ip netns exec at_ns0 \ 39 - ip link add dev $DEV_NS type ipip local 172.16.1.100 remote 172.16.1.200 40 - ip netns exec at_ns0 ip link set dev $DEV_NS up 41 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 42 - ip netns exec at_ns1 \ 43 - ip link add dev $DEV_NS type ipip local 172.16.1.101 remote 172.16.1.200 44 - ip netns exec at_ns1 ip link set dev $DEV_NS up 45 - # same inner IP address in at_ns0 and at_ns1 46 - ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 47 - 48 - ip netns exec at_ns2 ip link add dev $DEV type ipip external 49 - ip netns exec at_ns2 ip link set dev $DEV up 50 - ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 51 - } 52 - 53 - function add_ipip6_tunnel { 54 - ip netns exec at_ns0 \ 55 - ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::1/64 remote 2401:db00::3/64 56 - ip netns exec at_ns0 ip link set dev $DEV_NS up 57 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 58 - ip netns exec at_ns1 \ 59 - ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::2/64 remote 2401:db00::3/64 60 - ip netns exec at_ns1 ip link set dev $DEV_NS up 61 - # same inner IP address in at_ns0 and at_ns1 62 - ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 63 - 64 - ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ipip6 external 65 - ip netns exec at_ns2 ip link set dev $DEV up 66 - ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 67 - } 68 - 69 - function add_ip6ip6_tunnel { 70 - ip netns exec at_ns0 \ 71 - ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::1/64 remote 2401:db00::3/64 72 - ip netns exec at_ns0 ip link set dev $DEV_NS up 73 - ip netns exec at_ns0 ip addr add dev $DEV_NS 2601:646::1/64 74 - ip netns exec at_ns1 \ 75 - ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::2/64 remote 2401:db00::3/64 76 - ip netns exec at_ns1 ip link set dev $DEV_NS up 77 - # same inner IP address in at_ns0 and at_ns1 78 - ip netns exec at_ns1 ip addr add dev $DEV_NS 2601:646::1/64 79 - 80 - ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ip6ip6 external 81 - ip netns exec at_ns2 ip link set dev $DEV up 82 - ip netns exec at_ns2 ip addr add dev $DEV 2601:646::2/64 83 - } 84 - 85 - function attach_bpf { 86 - DEV=$1 87 - SET_TUNNEL=$2 88 - GET_TUNNEL=$3 89 - ip netns exec at_ns2 tc qdisc add dev $DEV clsact 90 - ip netns exec at_ns2 tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL 91 - ip netns exec at_ns2 tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL 92 - } 93 - 94 - function test_ipip { 95 - DEV_NS=ipip_std 96 - DEV=ipip_bpf 97 - config_device 98 - # tcpdump -nei br0 & 99 - cat /sys/kernel/debug/tracing/trace_pipe & 100 - 101 - add_ipip_tunnel 102 - attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel 103 - 104 - ip netns exec at_ns0 ping -c 1 10.1.1.200 105 - ip netns exec at_ns2 ping -c 1 10.1.1.100 106 - ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null 107 - ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null 108 - sleep 0.2 109 - # tcp check _same_ IP over different tunnels 110 - ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 111 - ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 112 - cleanup 113 - } 114 - 115 - # IPv4 over IPv6 tunnel 116 - function test_ipip6 { 117 - DEV_NS=ipip_std 118 - DEV=ipip_bpf 119 - config_device 120 - # tcpdump -nei br0 & 121 - cat /sys/kernel/debug/tracing/trace_pipe & 122 - 123 - add_ipip6_tunnel 124 - attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel 125 - 126 - ip netns exec at_ns0 ping -c 1 10.1.1.200 127 - ip netns exec at_ns2 ping -c 1 10.1.1.100 128 - ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null 129 - ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null 130 - sleep 0.2 131 - # tcp check _same_ IP over different tunnels 132 - ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 133 - ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 134 - cleanup 135 - } 136 - 137 - # IPv6 over IPv6 tunnel 138 - function test_ip6ip6 { 139 - DEV_NS=ipip_std 140 - DEV=ipip_bpf 141 - config_device 142 - # tcpdump -nei br0 & 143 - cat /sys/kernel/debug/tracing/trace_pipe & 144 - 145 - add_ip6ip6_tunnel 146 - attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel 147 - 148 - ip netns exec at_ns0 ping -6 -c 1 2601:646::2 149 - ip netns exec at_ns2 ping -6 -c 1 2601:646::1 150 - ip netns exec at_ns0 iperf -6sD -p 5200 > /dev/null 151 - ip netns exec at_ns1 iperf -6sD -p 5201 > /dev/null 152 - sleep 0.2 153 - # tcp check _same_ IP over different tunnels 154 - ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5200 155 - ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5201 156 - cleanup 157 - } 158 - 159 - function cleanup { 160 - set +ex 161 - pkill iperf 162 - ip netns delete at_ns0 163 - ip netns delete at_ns1 164 - ip netns delete at_ns2 165 - ip link del veth0 166 - ip link del veth1 167 - ip link del veth2 168 - ip link del br0 169 - pkill tcpdump 170 - pkill cat 171 - set -ex 172 - } 173 - 174 - cleanup 175 - echo "Testing IP tunnels..." 176 - test_ipip 177 - test_ipip6 178 - test_ip6ip6 179 - echo "*** PASS ***"

+18 -2

scripts/Makefile.modfinal

··· 6 6 PHONY := __modfinal 7 7 __modfinal: 8 8 9 + include include/config/auto.conf 9 10 include $(srctree)/scripts/Kbuild.include 10 11 11 12 # for c_flags ··· 37 36 -T scripts/module.lds -o $@ $(filter %.o, $^); \ 38 37 $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) 39 38 40 - $(modules): %.ko: %.o %.mod.o scripts/module.lds FORCE 41 - +$(call if_changed,ld_ko_o) 39 + quiet_cmd_btf_ko = BTF [M] $@ 40 + cmd_btf_ko = LLVM_OBJCOPY=$(OBJCOPY) $(PAHOLE) -J --btf_base vmlinux $@ 41 + 42 + # Same as newer-prereqs, but allows to exclude specified extra dependencies 43 + newer_prereqs_except = $(filter-out $(PHONY) $(1),$?) 44 + 45 + # Same as if_changed, but allows to exclude specified extra dependencies 46 + if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \ 47 + $(cmd); \ 48 + printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:) 49 + 50 + # Re-generate module BTFs if either module's .ko or vmlinux changed 51 + $(modules): %.ko: %.o %.mod.o scripts/module.lds vmlinux FORCE 52 + +$(call if_changed_except,ld_ko_o,vmlinux) 53 + ifdef CONFIG_DEBUG_INFO_BTF_MODULES 54 + +$(if $(newer-prereqs),$(call cmd,btf_ko)) 55 + endif 42 56 43 57 targets += $(modules) $(modules:.ko=.mod.o) 44 58

+2

security/bpf/hooks.c

··· 12 12 #include <linux/lsm_hook_defs.h> 13 13 #undef LSM_HOOK 14 14 LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free), 15 + LSM_HOOK_INIT(task_free, bpf_task_storage_free), 15 16 }; 16 17 17 18 static int __init bpf_lsm_init(void) ··· 24 23 25 24 struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = { 26 25 .lbs_inode = sizeof(struct bpf_storage_blob), 26 + .lbs_task = sizeof(struct bpf_storage_blob), 27 27 }; 28 28 29 29 DEFINE_LSM(bpf) = {

+1 -1

tools/bpf/bpftool/.gitignore

··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 *.d 3 - /bpftool-bootstrap 3 + /bootstrap/ 4 4 /bpftool 5 5 bpftool*.8 6 6 bpf-helpers.*

+2 -1

tools/bpf/bpftool/Documentation/bpftool-map.rst

··· 50 50 | | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** 51 51 | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** 52 52 | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** 53 - | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** } 53 + | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** 54 + | **task_storage** } 54 55 55 56 DESCRIPTION 56 57 ===========

+33 -11

tools/bpf/bpftool/Makefile

··· 19 19 ifneq ($(OUTPUT),) 20 20 LIBBPF_OUTPUT = $(OUTPUT)/libbpf/ 21 21 LIBBPF_PATH = $(LIBBPF_OUTPUT) 22 + BOOTSTRAP_OUTPUT = $(OUTPUT)/bootstrap/ 22 23 else 24 + LIBBPF_OUTPUT = 23 25 LIBBPF_PATH = $(BPF_DIR) 26 + BOOTSTRAP_OUTPUT = $(CURDIR)/bootstrap/ 24 27 endif 25 28 26 29 LIBBPF = $(LIBBPF_PATH)libbpf.a 30 + LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/ 31 + LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a 27 32 28 - BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion) 33 + ifeq ($(BPFTOOL_VERSION),) 34 + BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) 35 + endif 29 36 30 - $(LIBBPF): FORCE 31 - $(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT)) 37 + $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT): 38 + $(QUIET_MKDIR)mkdir -p $@ 39 + 40 + $(LIBBPF): FORCE | $(LIBBPF_OUTPUT) 32 41 $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a 33 42 34 - $(LIBBPF)-clean: 43 + $(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) 44 + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ 45 + ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ 46 + 47 + $(LIBBPF)-clean: FORCE | $(LIBBPF_OUTPUT) 35 48 $(call QUIET_CLEAN, libbpf) 36 49 $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null 50 + 51 + $(LIBBPF_BOOTSTRAP)-clean: FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) 52 + $(call QUIET_CLEAN, libbpf-bootstrap) 53 + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) clean >/dev/null 37 54 38 55 prefix ?= /usr/local 39 56 bash_compdir ?= /usr/share/bash-completion/completions ··· 109 92 endif 110 93 111 94 LIBS = $(LIBBPF) -lelf -lz 95 + LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz 112 96 ifeq ($(feature-libcap), 1) 113 97 CFLAGS += -DUSE_LIBCAP 114 98 LIBS += -lcap ··· 136 118 SRCS += $(BFD_SRCS) 137 119 endif 138 120 139 - BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap) 121 + BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool 140 122 141 - BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o) 123 + BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o) 142 124 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o 143 125 144 126 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ ··· 185 167 186 168 $(OUTPUT)feature.o: | zdep 187 169 188 - $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF) 189 - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS) 170 + $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) 171 + $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \ 172 + $(LIBS_BOOTSTRAP) 190 173 191 174 $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) 192 175 $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) 176 + 177 + $(BOOTSTRAP_OUTPUT)%.o: %.c | $(BOOTSTRAP_OUTPUT) 178 + $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< 193 179 194 180 $(OUTPUT)%.o: %.c 195 181 $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< ··· 202 180 $(call QUIET_CLEAN, feature-detect) 203 181 $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null 204 182 205 - clean: $(LIBBPF)-clean feature-detect-clean 183 + clean: $(LIBBPF)-clean $(LIBBPF_BOOTSTRAP)-clean feature-detect-clean 206 184 $(call QUIET_CLEAN, bpftool) 207 185 $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d 208 - $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h 209 - $(Q)$(RM) -r -- $(OUTPUT)libbpf/ 186 + $(Q)$(RM) -- $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h 187 + $(Q)$(RM) -r -- $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) 210 188 $(call QUIET_CLEAN, core-gen) 211 189 $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool 212 190 $(Q)$(RM) -r -- $(OUTPUT)feature/

+1 -1

tools/bpf/bpftool/bash-completion/bpftool

··· 705 705 hash_of_maps devmap devmap_hash sockmap cpumap \ 706 706 xskmap sockhash cgroup_storage reuseport_sockarray \ 707 707 percpu_cgroup_storage queue stack sk_storage \ 708 - struct_ops inode_storage' -- \ 708 + struct_ops inode_storage task_storage' -- \ 709 709 "$cur" ) ) 710 710 return 0 711 711 ;;

+33 -4

tools/bpf/bpftool/btf.c

··· 358 358 } 359 359 } else { 360 360 int cnt = btf__get_nr_types(btf); 361 + int start_id = 1; 361 362 362 - for (i = 1; i <= cnt; i++) { 363 + if (base_btf) 364 + start_id = btf__get_nr_types(base_btf) + 1; 365 + 366 + for (i = start_id; i <= cnt; i++) { 363 367 t = btf__type_by_id(btf, i); 364 368 dump_btf_type(btf, i, t); 365 369 } ··· 442 438 return -1; 443 439 } 444 440 src = GET_ARG(); 445 - 446 441 if (is_prefix(src, "map")) { 447 442 struct bpf_map_info info = {}; 448 443 __u32 len = sizeof(info); ··· 502 499 } 503 500 NEXT_ARG(); 504 501 } else if (is_prefix(src, "file")) { 505 - btf = btf__parse(*argv, NULL); 502 + btf = btf__parse_split(*argv, base_btf); 506 503 if (IS_ERR(btf)) { 507 504 err = -PTR_ERR(btf); 508 505 btf = NULL; ··· 742 739 struct btf_attach_table *btf_map_table) 743 740 { 744 741 struct btf_attach_point *obj; 742 + const char *name = u64_to_ptr(info->name); 745 743 int n; 746 744 747 745 printf("%u: ", info->id); 746 + if (info->kernel_btf) 747 + printf("name [%s] ", name); 748 + else if (name && name[0]) 749 + printf("name %s ", name); 748 750 printf("size %uB", info->btf_size); 749 751 750 752 n = 0; ··· 776 768 struct btf_attach_table *btf_map_table) 777 769 { 778 770 struct btf_attach_point *obj; 771 + const char *name = u64_to_ptr(info->name); 779 772 780 773 jsonw_start_object(json_wtr); /* btf object */ 781 774 jsonw_uint_field(json_wtr, "id", info->id); ··· 802 793 803 794 emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */ 804 795 796 + jsonw_bool_field(json_wtr, "kernel", info->kernel_btf); 797 + 798 + if (name && name[0]) 799 + jsonw_string_field(json_wtr, "name", name); 800 + 805 801 jsonw_end_object(json_wtr); /* btf object */ 806 802 } 807 803 ··· 814 800 show_btf(int fd, struct btf_attach_table *btf_prog_table, 815 801 struct btf_attach_table *btf_map_table) 816 802 { 817 - struct bpf_btf_info info = {}; 803 + struct bpf_btf_info info; 818 804 __u32 len = sizeof(info); 805 + char name[64]; 819 806 int err; 820 807 808 + memset(&info, 0, sizeof(info)); 821 809 err = bpf_obj_get_info_by_fd(fd, &info, &len); 822 810 if (err) { 823 811 p_err("can't get BTF object info: %s", strerror(errno)); 824 812 return -1; 813 + } 814 + /* if kernel support emitting BTF object name, pass name pointer */ 815 + if (info.name_len) { 816 + memset(&info, 0, sizeof(info)); 817 + info.name_len = sizeof(name); 818 + info.name = ptr_to_u64(name); 819 + len = sizeof(info); 820 + 821 + err = bpf_obj_get_info_by_fd(fd, &info, &len); 822 + if (err) { 823 + p_err("can't get BTF object info: %s", strerror(errno)); 824 + return -1; 825 + } 825 826 } 826 827 827 828 if (json_output)

+14 -1

tools/bpf/bpftool/main.c

··· 11 11 12 12 #include <bpf/bpf.h> 13 13 #include <bpf/libbpf.h> 14 + #include <bpf/btf.h> 14 15 15 16 #include "main.h" 16 17 ··· 29 28 bool block_mount; 30 29 bool verifier_logs; 31 30 bool relaxed_maps; 31 + struct btf *base_btf; 32 32 struct pinned_obj_table prog_table; 33 33 struct pinned_obj_table map_table; 34 34 struct pinned_obj_table link_table; ··· 393 391 { "mapcompat", no_argument, NULL, 'm' }, 394 392 { "nomount", no_argument, NULL, 'n' }, 395 393 { "debug", no_argument, NULL, 'd' }, 394 + { "base-btf", required_argument, NULL, 'B' }, 396 395 { 0 } 397 396 }; 398 397 int opt, ret; ··· 410 407 hash_init(link_table.table); 411 408 412 409 opterr = 0; 413 - while ((opt = getopt_long(argc, argv, "Vhpjfmnd", 410 + while ((opt = getopt_long(argc, argv, "VhpjfmndB:", 414 411 options, NULL)) >= 0) { 415 412 switch (opt) { 416 413 case 'V': ··· 444 441 libbpf_set_print(print_all_levels); 445 442 verifier_logs = true; 446 443 break; 444 + case 'B': 445 + base_btf = btf__parse(optarg, NULL); 446 + if (libbpf_get_error(base_btf)) { 447 + p_err("failed to parse base BTF at '%s': %ld\n", 448 + optarg, libbpf_get_error(base_btf)); 449 + base_btf = NULL; 450 + return -1; 451 + } 452 + break; 447 453 default: 448 454 p_err("unrecognized option '%s'", argv[optind - 1]); 449 455 if (json_output) ··· 477 465 delete_pinned_obj_table(&map_table); 478 466 delete_pinned_obj_table(&link_table); 479 467 } 468 + btf__free(base_btf); 480 469 481 470 return ret; 482 471 }

+1

tools/bpf/bpftool/main.h

··· 90 90 extern bool block_mount; 91 91 extern bool verifier_logs; 92 92 extern bool relaxed_maps; 93 + extern struct btf *base_btf; 93 94 extern struct pinned_obj_table prog_table; 94 95 extern struct pinned_obj_table map_table; 95 96 extern struct pinned_obj_table link_table;

+3 -1

tools/bpf/bpftool/map.c

··· 51 51 [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", 52 52 [BPF_MAP_TYPE_RINGBUF] = "ringbuf", 53 53 [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", 54 + [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", 54 55 }; 55 56 56 57 const size_t map_type_name_size = ARRAY_SIZE(map_type_name); ··· 1465 1464 " lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n" 1466 1465 " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" 1467 1466 " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" 1468 - " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n" 1467 + " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" 1468 + " task_storage }\n" 1469 1469 " " HELP_SPEC_OPTIONS "\n" 1470 1470 "", 1471 1471 bin_name, argv[-2]);

-9

tools/bpf/resolve_btfids/Makefile

··· 18 18 endif 19 19 20 20 # always use the host compiler 21 - ifneq ($(LLVM),) 22 - HOSTAR ?= llvm-ar 23 - HOSTCC ?= clang 24 - HOSTLD ?= ld.lld 25 - else 26 - HOSTAR ?= ar 27 - HOSTCC ?= gcc 28 - HOSTLD ?= ld 29 - endif 30 21 AR = $(HOSTAR) 31 22 CC = $(HOSTCC) 32 23 LD = $(HOSTLD)

+27 -28

tools/bpf/runqslower/Makefile

··· 1 1 # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 - OUTPUT := .output 2 + include ../../scripts/Makefile.include 3 + 4 + OUTPUT ?= $(abspath .output)/ 5 + 3 6 CLANG ?= clang 4 7 LLC ?= llc 5 8 LLVM_STRIP ?= llvm-strip 6 - DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool 9 + BPFTOOL_OUTPUT := $(OUTPUT)bpftool/ 10 + DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bpftool 7 11 BPFTOOL ?= $(DEFAULT_BPFTOOL) 8 12 LIBBPF_SRC := $(abspath ../../lib/bpf) 9 - BPFOBJ := $(OUTPUT)/libbpf.a 10 - BPF_INCLUDE := $(OUTPUT) 13 + BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ 14 + BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a 15 + BPF_INCLUDE := $(BPFOBJ_OUTPUT) 11 16 INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) \ 12 17 -I$(abspath ../../include/uapi) 13 18 CFLAGS := -g -Wall ··· 23 18 VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ 24 19 $(wildcard $(VMLINUX_BTF_PATHS)))) 25 20 26 - abs_out := $(abspath $(OUTPUT)) 27 21 ifeq ($(V),1) 28 22 Q = 29 - msg = 30 23 else 31 24 Q = @ 32 - msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; 33 25 MAKEFLAGS += --no-print-directory 34 26 submake_extras := feature_display=0 35 27 endif ··· 39 37 runqslower: $(OUTPUT)/runqslower 40 38 41 39 clean: 42 - $(call msg,CLEAN) 43 - $(Q)rm -rf $(OUTPUT) runqslower 40 + $(call QUIET_CLEAN, runqslower) 41 + $(Q)$(RM) -r $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT) 42 + $(Q)$(RM) $(OUTPUT)*.o $(OUTPUT)*.d 43 + $(Q)$(RM) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h 44 + $(Q)$(RM) $(OUTPUT)runqslower 45 + $(Q)$(RM) -r .output 44 46 45 47 $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) 46 - $(call msg,BINARY,$@) 47 - $(Q)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ 48 + $(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ 48 49 49 50 $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ 50 51 $(OUTPUT)/runqslower.bpf.o ··· 55 50 $(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h 56 51 57 52 $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) 58 - $(call msg,GEN-SKEL,$@) 59 - $(Q)$(BPFTOOL) gen skeleton $< > $@ 53 + $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@ 60 54 61 55 $(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT) 62 - $(call msg,BPF,$@) 63 - $(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES) \ 56 + $(QUIET_GEN)$(CLANG) -g -O2 -target bpf $(INCLUDES) \ 64 57 -c $(filter %.c,$^) -o $@ && \ 65 58 $(LLVM_STRIP) -g $@ 66 59 67 60 $(OUTPUT)/%.o: %.c | $(OUTPUT) 68 - $(call msg,CC,$@) 69 - $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ 61 + $(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ 70 62 71 - $(OUTPUT): 72 - $(call msg,MKDIR,$@) 73 - $(Q)mkdir -p $(OUTPUT) 63 + $(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT): 64 + $(QUIET_MKDIR)mkdir -p $@ 74 65 75 66 $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL) 76 - $(call msg,GEN,$@) 77 67 $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \ 78 68 echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \ 79 69 "specify its location." >&2; \ 80 70 exit 1;\ 81 71 fi 82 - $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ 72 + $(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ 83 73 84 - $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) 85 - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ 86 - OUTPUT=$(abspath $(dir $@))/ $(abspath $@) 74 + $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT) 75 + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@ 87 76 88 - $(DEFAULT_BPFTOOL): 89 - $(Q)$(MAKE) $(submake_extras) -C ../bpftool \ 90 - prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install 77 + $(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) 78 + $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ 79 + CC=$(HOSTCC) LD=$(HOSTLD)

-4

tools/build/Makefile

··· 15 15 $(call allow-override,CC,$(CROSS_COMPILE)gcc) 16 16 $(call allow-override,LD,$(CROSS_COMPILE)ld) 17 17 18 - HOSTCC ?= gcc 19 - HOSTLD ?= ld 20 - HOSTAR ?= ar 21 - 22 18 export HOSTCC HOSTLD HOSTAR 23 19 24 20 ifeq ($(V),1)

+51

tools/include/uapi/linux/bpf.h

··· 157 157 BPF_MAP_TYPE_STRUCT_OPS, 158 158 BPF_MAP_TYPE_RINGBUF, 159 159 BPF_MAP_TYPE_INODE_STORAGE, 160 + BPF_MAP_TYPE_TASK_STORAGE, 160 161 }; 161 162 162 163 /* Note that tracing related programs such as ··· 3743 3742 * Return 3744 3743 * The helper returns **TC_ACT_REDIRECT** on success or 3745 3744 * **TC_ACT_SHOT** on error. 3745 + * 3746 + * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) 3747 + * Description 3748 + * Get a bpf_local_storage from the *task*. 3749 + * 3750 + * Logically, it could be thought of as getting the value from 3751 + * a *map* with *task* as the **key**. From this 3752 + * perspective, the usage is not much different from 3753 + * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this 3754 + * helper enforces the key must be an task_struct and the map must also 3755 + * be a **BPF_MAP_TYPE_TASK_STORAGE**. 3756 + * 3757 + * Underneath, the value is stored locally at *task* instead of 3758 + * the *map*. The *map* is used as the bpf-local-storage 3759 + * "type". The bpf-local-storage "type" (i.e. the *map*) is 3760 + * searched against all bpf_local_storage residing at *task*. 3761 + * 3762 + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be 3763 + * used such that a new bpf_local_storage will be 3764 + * created if one does not exist. *value* can be used 3765 + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify 3766 + * the initial value of a bpf_local_storage. If *value* is 3767 + * **NULL**, the new bpf_local_storage will be zero initialized. 3768 + * Return 3769 + * A bpf_local_storage pointer is returned on success. 3770 + * 3771 + * **NULL** if not found or there was an error in adding 3772 + * a new bpf_local_storage. 3773 + * 3774 + * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) 3775 + * Description 3776 + * Delete a bpf_local_storage from a *task*. 3777 + * Return 3778 + * 0 on success. 3779 + * 3780 + * **-ENOENT** if the bpf_local_storage cannot be found. 3781 + * 3782 + * struct task_struct *bpf_get_current_task_btf(void) 3783 + * Description 3784 + * Return a BTF pointer to the "current" task. 3785 + * This pointer can also be used in helpers that accept an 3786 + * *ARG_PTR_TO_BTF_ID* of type *task_struct*. 3787 + * Return 3788 + * Pointer to the current task. 3746 3789 */ 3747 3790 #define __BPF_FUNC_MAPPER(FN) \ 3748 3791 FN(unspec), \ ··· 3945 3900 FN(bpf_per_cpu_ptr), \ 3946 3901 FN(bpf_this_cpu_ptr), \ 3947 3902 FN(redirect_peer), \ 3903 + FN(task_storage_get), \ 3904 + FN(task_storage_delete), \ 3905 + FN(get_current_task_btf), \ 3948 3906 /* */ 3949 3907 3950 3908 /* integer value in 'imm' field of BPF_CALL instruction selects which helper ··· 4466 4418 __aligned_u64 btf; 4467 4419 __u32 btf_size; 4468 4420 __u32 id; 4421 + __aligned_u64 name; 4422 + __u32 name_len; 4423 + __u32 kernel_btf; 4469 4424 } __attribute__((aligned(8))); 4470 4425 4471 4426 struct bpf_link_info {

+473 -336

tools/lib/bpf/btf.c

··· 78 78 void *types_data; 79 79 size_t types_data_cap; /* used size stored in hdr->type_len */ 80 80 81 - /* type ID to `struct btf_type *` lookup index */ 81 + /* type ID to `struct btf_type *` lookup index 82 + * type_offs[0] corresponds to the first non-VOID type: 83 + * - for base BTF it's type [1]; 84 + * - for split BTF it's the first non-base BTF type. 85 + */ 82 86 __u32 *type_offs; 83 87 size_t type_offs_cap; 88 + /* number of types in this BTF instance: 89 + * - doesn't include special [0] void type; 90 + * - for split BTF counts number of types added on top of base BTF. 91 + */ 84 92 __u32 nr_types; 93 + /* if not NULL, points to the base BTF on top of which the current 94 + * split BTF is based 95 + */ 96 + struct btf *base_btf; 97 + /* BTF type ID of the first type in this BTF instance: 98 + * - for base BTF it's equal to 1; 99 + * - for split BTF it's equal to biggest type ID of base BTF plus 1. 100 + */ 101 + int start_id; 102 + /* logical string offset of this BTF instance: 103 + * - for base BTF it's equal to 0; 104 + * - for split BTF it's equal to total size of base BTF's string section size. 105 + */ 106 + int start_str_off; 85 107 86 108 void *strs_data; 87 109 size_t strs_data_cap; /* used size stored in hdr->str_len */ ··· 112 90 struct hashmap *strs_hash; 113 91 /* whether strings are already deduplicated */ 114 92 bool strs_deduped; 93 + /* extra indirection layer to make strings hashmap work with stable 94 + * string offsets and ability to transparently choose between 95 + * btf->strs_data or btf_dedup->strs_data as a source of strings. 96 + * This is used for BTF strings dedup to transfer deduplicated strings 97 + * data back to struct btf without re-building strings index. 98 + */ 99 + void **strs_data_ptr; 100 + 115 101 /* BTF object FD, if loaded into kernel */ 116 102 int fd; 117 103 ··· 198 168 __u32 *p; 199 169 200 170 p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), 201 - btf->nr_types + 1, BTF_MAX_NR_TYPES, 1); 171 + btf->nr_types, BTF_MAX_NR_TYPES, 1); 202 172 if (!p) 203 173 return -ENOMEM; 204 174 ··· 245 215 return -EINVAL; 246 216 } 247 217 248 - if (meta_left < hdr->type_off) { 249 - pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off); 218 + if (meta_left < hdr->str_off + hdr->str_len) { 219 + pr_debug("Invalid BTF total size:%u\n", btf->raw_size); 250 220 return -EINVAL; 251 221 } 252 222 253 - if (meta_left < hdr->str_off) { 254 - pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off); 223 + if (hdr->type_off + hdr->type_len > hdr->str_off) { 224 + pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", 225 + hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); 255 226 return -EINVAL; 256 227 } 257 228 258 - if (hdr->type_off >= hdr->str_off) { 259 - pr_debug("BTF type section offset >= string section offset. No type?\n"); 260 - return -EINVAL; 261 - } 262 - 263 - if (hdr->type_off & 0x02) { 229 + if (hdr->type_off % 4) { 264 230 pr_debug("BTF type section is not aligned to 4 bytes\n"); 265 231 return -EINVAL; 266 232 } ··· 270 244 const char *start = btf->strs_data; 271 245 const char *end = start + btf->hdr->str_len; 272 246 273 - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || 274 - start[0] || end[-1]) { 247 + if (btf->base_btf && hdr->str_len == 0) 248 + return 0; 249 + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) { 275 250 pr_debug("Invalid BTF string section\n"); 276 251 return -EINVAL; 277 252 } 278 - 253 + if (!btf->base_btf && start[0]) { 254 + pr_debug("Invalid BTF string section\n"); 255 + return -EINVAL; 256 + } 279 257 return 0; 280 258 } 281 259 ··· 394 364 struct btf_header *hdr = btf->hdr; 395 365 void *next_type = btf->types_data; 396 366 void *end_type = next_type + hdr->type_len; 397 - int err, i = 0, type_size; 398 - 399 - /* VOID (type_id == 0) is specially handled by btf__get_type_by_id(), 400 - * so ensure we can never properly use its offset from index by 401 - * setting it to a large value 402 - */ 403 - err = btf_add_type_idx_entry(btf, UINT_MAX); 404 - if (err) 405 - return err; 367 + int err, type_size; 406 368 407 369 while (next_type + sizeof(struct btf_type) <= end_type) { 408 - i++; 409 - 410 370 if (btf->swapped_endian) 411 371 btf_bswap_type_base(next_type); 412 372 ··· 404 384 if (type_size < 0) 405 385 return type_size; 406 386 if (next_type + type_size > end_type) { 407 - pr_warn("BTF type [%d] is malformed\n", i); 387 + pr_warn("BTF type [%d] is malformed\n", btf->start_id + btf->nr_types); 408 388 return -EINVAL; 409 389 } 410 390 ··· 429 409 430 410 __u32 btf__get_nr_types(const struct btf *btf) 431 411 { 432 - return btf->nr_types; 412 + return btf->start_id + btf->nr_types - 1; 433 413 } 434 414 435 415 /* internal helper returning non-const pointer to a type */ ··· 437 417 { 438 418 if (type_id == 0) 439 419 return &btf_void; 440 - 441 - return btf->types_data + btf->type_offs[type_id]; 420 + if (type_id < btf->start_id) 421 + return btf_type_by_id(btf->base_btf, type_id); 422 + return btf->types_data + btf->type_offs[type_id - btf->start_id]; 442 423 } 443 424 444 425 const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) 445 426 { 446 - if (type_id > btf->nr_types) 427 + if (type_id >= btf->start_id + btf->nr_types) 447 428 return NULL; 448 429 return btf_type_by_id((struct btf *)btf, type_id); 449 430 } ··· 453 432 { 454 433 const struct btf_type *t; 455 434 const char *name; 456 - int i; 435 + int i, n; 457 436 458 - for (i = 1; i <= btf->nr_types; i++) { 437 + if (btf->base_btf && btf->base_btf->ptr_sz > 0) 438 + return btf->base_btf->ptr_sz; 439 + 440 + n = btf__get_nr_types(btf); 441 + for (i = 1; i <= n; i++) { 459 442 t = btf__type_by_id(btf, i); 460 443 if (!btf_is_int(t)) 461 444 continue; ··· 742 717 free(btf); 743 718 } 744 719 745 - struct btf *btf__new_empty(void) 720 + static struct btf *btf_new_empty(struct btf *base_btf) 746 721 { 747 722 struct btf *btf; 748 723 ··· 750 725 if (!btf) 751 726 return ERR_PTR(-ENOMEM); 752 727 728 + btf->nr_types = 0; 729 + btf->start_id = 1; 730 + btf->start_str_off = 0; 753 731 btf->fd = -1; 754 732 btf->ptr_sz = sizeof(void *); 755 733 btf->swapped_endian = false; 756 734 735 + if (base_btf) { 736 + btf->base_btf = base_btf; 737 + btf->start_id = btf__get_nr_types(base_btf) + 1; 738 + btf->start_str_off = base_btf->hdr->str_len; 739 + } 740 + 757 741 /* +1 for empty string at offset 0 */ 758 - btf->raw_size = sizeof(struct btf_header) + 1; 742 + btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1); 759 743 btf->raw_data = calloc(1, btf->raw_size); 760 744 if (!btf->raw_data) { 761 745 free(btf); ··· 778 744 779 745 btf->types_data = btf->raw_data + btf->hdr->hdr_len; 780 746 btf->strs_data = btf->raw_data + btf->hdr->hdr_len; 781 - btf->hdr->str_len = 1; /* empty string at offset 0 */ 747 + btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */ 782 748 783 749 return btf; 784 750 } 785 751 786 - struct btf *btf__new(const void *data, __u32 size) 752 + struct btf *btf__new_empty(void) 753 + { 754 + return btf_new_empty(NULL); 755 + } 756 + 757 + struct btf *btf__new_empty_split(struct btf *base_btf) 758 + { 759 + return btf_new_empty(base_btf); 760 + } 761 + 762 + static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) 787 763 { 788 764 struct btf *btf; 789 765 int err; ··· 801 757 btf = calloc(1, sizeof(struct btf)); 802 758 if (!btf) 803 759 return ERR_PTR(-ENOMEM); 760 + 761 + btf->nr_types = 0; 762 + btf->start_id = 1; 763 + btf->start_str_off = 0; 764 + 765 + if (base_btf) { 766 + btf->base_btf = base_btf; 767 + btf->start_id = btf__get_nr_types(base_btf) + 1; 768 + btf->start_str_off = base_btf->hdr->str_len; 769 + } 804 770 805 771 btf->raw_data = malloc(size); 806 772 if (!btf->raw_data) { ··· 844 790 return btf; 845 791 } 846 792 847 - struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) 793 + struct btf *btf__new(const void *data, __u32 size) 794 + { 795 + return btf_new(data, size, NULL); 796 + } 797 + 798 + static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, 799 + struct btf_ext **btf_ext) 848 800 { 849 801 Elf_Data *btf_data = NULL, *btf_ext_data = NULL; 850 802 int err = 0, fd = -1, idx = 0; ··· 928 868 err = -ENOENT; 929 869 goto done; 930 870 } 931 - btf = btf__new(btf_data->d_buf, btf_data->d_size); 871 + btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf); 932 872 if (IS_ERR(btf)) 933 873 goto done; 934 874 ··· 973 913 return btf; 974 914 } 975 915 976 - struct btf *btf__parse_raw(const char *path) 916 + struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) 917 + { 918 + return btf_parse_elf(path, NULL, btf_ext); 919 + } 920 + 921 + struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf) 922 + { 923 + return btf_parse_elf(path, base_btf, NULL); 924 + } 925 + 926 + static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) 977 927 { 978 928 struct btf *btf = NULL; 979 929 void *data = NULL; ··· 1037 967 } 1038 968 1039 969 /* finally parse BTF data */ 1040 - btf = btf__new(data, sz); 970 + btf = btf_new(data, sz, base_btf); 1041 971 1042 972 err_out: 1043 973 free(data); ··· 1046 976 return err ? ERR_PTR(err) : btf; 1047 977 } 1048 978 1049 - struct btf *btf__parse(const char *path, struct btf_ext **btf_ext) 979 + struct btf *btf__parse_raw(const char *path) 980 + { 981 + return btf_parse_raw(path, NULL); 982 + } 983 + 984 + struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf) 985 + { 986 + return btf_parse_raw(path, base_btf); 987 + } 988 + 989 + static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext) 1050 990 { 1051 991 struct btf *btf; 1052 992 1053 993 if (btf_ext) 1054 994 *btf_ext = NULL; 1055 995 1056 - btf = btf__parse_raw(path); 996 + btf = btf_parse_raw(path, base_btf); 1057 997 if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO) 1058 998 return btf; 1059 999 1060 - return btf__parse_elf(path, btf_ext); 1000 + return btf_parse_elf(path, base_btf, btf_ext); 1001 + } 1002 + 1003 + struct btf *btf__parse(const char *path, struct btf_ext **btf_ext) 1004 + { 1005 + return btf_parse(path, NULL, btf_ext); 1006 + } 1007 + 1008 + struct btf *btf__parse_split(const char *path, struct btf *base_btf) 1009 + { 1010 + return btf_parse(path, base_btf, NULL); 1061 1011 } 1062 1012 1063 1013 static int compare_vsi_off(const void *_a, const void *_b) ··· 1261 1171 1262 1172 memcpy(p, btf->types_data, hdr->type_len); 1263 1173 if (swap_endian) { 1264 - for (i = 1; i <= btf->nr_types; i++) { 1265 - t = p + btf->type_offs[i]; 1174 + for (i = 0; i < btf->nr_types; i++) { 1175 + t = p + btf->type_offs[i]; 1266 1176 /* btf_bswap_type_rest() relies on native t->info, so 1267 1177 * we swap base type info after we swapped all the 1268 1178 * additional information ··· 1305 1215 1306 1216 const char *btf__str_by_offset(const struct btf *btf, __u32 offset) 1307 1217 { 1308 - if (offset < btf->hdr->str_len) 1309 - return btf->strs_data + offset; 1218 + if (offset < btf->start_str_off) 1219 + return btf__str_by_offset(btf->base_btf, offset); 1220 + else if (offset - btf->start_str_off < btf->hdr->str_len) 1221 + return btf->strs_data + (offset - btf->start_str_off); 1310 1222 else 1311 1223 return NULL; 1312 1224 } ··· 1455 1363 1456 1364 static size_t strs_hash_fn(const void *key, void *ctx) 1457 1365 { 1458 - struct btf *btf = ctx; 1459 - const char *str = btf->strs_data + (long)key; 1366 + const struct btf *btf = ctx; 1367 + const char *strs = *btf->strs_data_ptr; 1368 + const char *str = strs + (long)key; 1460 1369 1461 1370 return str_hash(str); 1462 1371 } 1463 1372 1464 1373 static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx) 1465 1374 { 1466 - struct btf *btf = ctx; 1467 - const char *str1 = btf->strs_data + (long)key1; 1468 - const char *str2 = btf->strs_data + (long)key2; 1375 + const struct btf *btf = ctx; 1376 + const char *strs = *btf->strs_data_ptr; 1377 + const char *str1 = strs + (long)key1; 1378 + const char *str2 = strs + (long)key2; 1469 1379 1470 1380 return strcmp(str1, str2) == 0; 1471 1381 } ··· 1512 1418 memcpy(types, btf->types_data, btf->hdr->type_len); 1513 1419 memcpy(strs, btf->strs_data, btf->hdr->str_len); 1514 1420 1421 + /* make hashmap below use btf->strs_data as a source of strings */ 1422 + btf->strs_data_ptr = &btf->strs_data; 1423 + 1515 1424 /* build lookup index for all strings */ 1516 1425 hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf); 1517 1426 if (IS_ERR(hash)) { ··· 1545 1448 /* if BTF was created from scratch, all strings are guaranteed to be 1546 1449 * unique and deduplicated 1547 1450 */ 1548 - btf->strs_deduped = btf->hdr->str_len <= 1; 1451 + if (btf->hdr->str_len == 0) 1452 + btf->strs_deduped = true; 1453 + if (!btf->base_btf && btf->hdr->str_len == 1) 1454 + btf->strs_deduped = true; 1549 1455 1550 1456 /* invalidate raw_data representation */ 1551 1457 btf_invalidate_raw_data(btf); ··· 1580 1480 long old_off, new_off, len; 1581 1481 void *p; 1582 1482 1483 + if (btf->base_btf) { 1484 + int ret; 1485 + 1486 + ret = btf__find_str(btf->base_btf, s); 1487 + if (ret != -ENOENT) 1488 + return ret; 1489 + } 1490 + 1583 1491 /* BTF needs to be in a modifiable state to build string lookup index */ 1584 1492 if (btf_ensure_modifiable(btf)) 1585 1493 return -ENOMEM; ··· 1602 1494 memcpy(p, s, len); 1603 1495 1604 1496 if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off)) 1605 - return old_off; 1497 + return btf->start_str_off + old_off; 1606 1498 1607 1499 return -ENOENT; 1608 1500 } ··· 1617 1509 long old_off, new_off, len; 1618 1510 void *p; 1619 1511 int err; 1512 + 1513 + if (btf->base_btf) { 1514 + int ret; 1515 + 1516 + ret = btf__find_str(btf->base_btf, s); 1517 + if (ret != -ENOENT) 1518 + return ret; 1519 + } 1620 1520 1621 1521 if (btf_ensure_modifiable(btf)) 1622 1522 return -ENOMEM; ··· 1652 1536 err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off, 1653 1537 HASHMAP_ADD, (const void **)&old_off, NULL); 1654 1538 if (err == -EEXIST) 1655 - return old_off; /* duplicated string, return existing offset */ 1539 + return btf->start_str_off + old_off; /* duplicated string, return existing offset */ 1656 1540 if (err) 1657 1541 return err; 1658 1542 1659 1543 btf->hdr->str_len += len; /* new unique string, adjust data length */ 1660 - return new_off; 1544 + return btf->start_str_off + new_off; 1661 1545 } 1662 1546 1663 1547 static void *btf_add_type_mem(struct btf *btf, size_t add_sz) ··· 1676 1560 t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t)); 1677 1561 } 1678 1562 1563 + static int btf_commit_type(struct btf *btf, int data_sz) 1564 + { 1565 + int err; 1566 + 1567 + err = btf_add_type_idx_entry(btf, btf->hdr->type_len); 1568 + if (err) 1569 + return err; 1570 + 1571 + btf->hdr->type_len += data_sz; 1572 + btf->hdr->str_off += data_sz; 1573 + btf->nr_types++; 1574 + return btf->start_id + btf->nr_types - 1; 1575 + } 1576 + 1679 1577 /* 1680 1578 * Append new BTF_KIND_INT type with: 1681 1579 * - *name* - non-empty, non-NULL type name; ··· 1702 1572 int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding) 1703 1573 { 1704 1574 struct btf_type *t; 1705 - int sz, err, name_off; 1575 + int sz, name_off; 1706 1576 1707 1577 /* non-empty name */ 1708 1578 if (!name || !name[0]) ··· 1736 1606 /* set INT info, we don't allow setting legacy bit offset/size */ 1737 1607 *(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8); 1738 1608 1739 - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); 1740 - if (err) 1741 - return err; 1742 - 1743 - btf->hdr->type_len += sz; 1744 - btf->hdr->str_off += sz; 1745 - btf->nr_types++; 1746 - return btf->nr_types; 1609 + return btf_commit_type(btf, sz); 1747 1610 } 1748 1611 1749 1612 /* it's completely legal to append BTF types with type IDs pointing forward to ··· 1754 1631 static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id) 1755 1632 { 1756 1633 struct btf_type *t; 1757 - int sz, name_off = 0, err; 1634 + int sz, name_off = 0; 1758 1635 1759 1636 if (validate_type_id(ref_type_id)) 1760 1637 return -EINVAL; ··· 1777 1654 t->info = btf_type_info(kind, 0, 0); 1778 1655 t->type = ref_type_id; 1779 1656 1780 - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); 1781 - if (err) 1782 - return err; 1783 - 1784 - btf->hdr->type_len += sz; 1785 - btf->hdr->str_off += sz; 1786 - btf->nr_types++; 1787 - return btf->nr_types; 1657 + return btf_commit_type(btf, sz); 1788 1658 } 1789 1659 1790 1660 /* ··· 1805 1689 { 1806 1690 struct btf_type *t; 1807 1691 struct btf_array *a; 1808 - int sz, err; 1692 + int sz; 1809 1693 1810 1694 if (validate_type_id(index_type_id) || validate_type_id(elem_type_id)) 1811 1695 return -EINVAL; ··· 1827 1711 a->index_type = index_type_id; 1828 1712 a->nelems = nr_elems; 1829 1713 1830 - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); 1831 - if (err) 1832 - return err; 1833 - 1834 - btf->hdr->type_len += sz; 1835 - btf->hdr->str_off += sz; 1836 - btf->nr_types++; 1837 - return btf->nr_types; 1714 + return btf_commit_type(btf, sz); 1838 1715 } 1839 1716 1840 1717 /* generic STRUCT/UNION append function */ 1841 1718 static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz) 1842 1719 { 1843 1720 struct btf_type *t; 1844 - int sz, err, name_off = 0; 1721 + int sz, name_off = 0; 1845 1722 1846 1723 if (btf_ensure_modifiable(btf)) 1847 1724 return -ENOMEM; ··· 1857 1748 t->info = btf_type_info(kind, 0, 0); 1858 1749 t->size = bytes_sz; 1859 1750 1860 - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); 1861 - if (err) 1862 - return err; 1863 - 1864 - btf->hdr->type_len += sz; 1865 - btf->hdr->str_off += sz; 1866 - btf->nr_types++; 1867 - return btf->nr_types; 1751 + return btf_commit_type(btf, sz); 1868 1752 } 1869 1753 1870 1754 /* ··· 1895 1793 return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz); 1896 1794 } 1897 1795 1796 + static struct btf_type *btf_last_type(struct btf *btf) 1797 + { 1798 + return btf_type_by_id(btf, btf__get_nr_types(btf)); 1799 + } 1800 + 1898 1801 /* 1899 1802 * Append new field for the current STRUCT/UNION type with: 1900 1803 * - *name* - name of the field, can be NULL or empty for anonymous field; ··· 1921 1814 /* last type should be union/struct */ 1922 1815 if (btf->nr_types == 0) 1923 1816 return -EINVAL; 1924 - t = btf_type_by_id(btf, btf->nr_types); 1817 + t = btf_last_type(btf); 1925 1818 if (!btf_is_composite(t)) 1926 1819 return -EINVAL; 1927 1820 ··· 1956 1849 m->offset = bit_offset | (bit_size << 24); 1957 1850 1958 1851 /* btf_add_type_mem can invalidate t pointer */ 1959 - t = btf_type_by_id(btf, btf->nr_types); 1852 + t = btf_last_type(btf); 1960 1853 /* update parent type's vlen and kflag */ 1961 1854 t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t)); 1962 1855 ··· 1981 1874 int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) 1982 1875 { 1983 1876 struct btf_type *t; 1984 - int sz, err, name_off = 0; 1877 + int sz, name_off = 0; 1985 1878 1986 1879 /* byte_sz must be power of 2 */ 1987 1880 if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8) ··· 2006 1899 t->info = btf_type_info(BTF_KIND_ENUM, 0, 0); 2007 1900 t->size = byte_sz; 2008 1901 2009 - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); 2010 - if (err) 2011 - return err; 2012 - 2013 - btf->hdr->type_len += sz; 2014 - btf->hdr->str_off += sz; 2015 - btf->nr_types++; 2016 - return btf->nr_types; 1902 + return btf_commit_type(btf, sz); 2017 1903 } 2018 1904 2019 1905 /* ··· 2026 1926 /* last type should be BTF_KIND_ENUM */ 2027 1927 if (btf->nr_types == 0) 2028 1928 return -EINVAL; 2029 - t = btf_type_by_id(btf, btf->nr_types); 1929 + t = btf_last_type(btf); 2030 1930 if (!btf_is_enum(t)) 2031 1931 return -EINVAL; 2032 1932 ··· 2053 1953 v->val = value; 2054 1954 2055 1955 /* update parent type's vlen */ 2056 - t = btf_type_by_id(btf, btf->nr_types); 1956 + t = btf_last_type(btf); 2057 1957 btf_type_inc_vlen(t); 2058 1958 2059 1959 btf->hdr->type_len += sz; ··· 2193 2093 int btf__add_func_proto(struct btf *btf, int ret_type_id) 2194 2094 { 2195 2095 struct btf_type *t; 2196 - int sz, err; 2096 + int sz; 2197 2097 2198 2098 if (validate_type_id(ret_type_id)) 2199 2099 return -EINVAL; ··· 2213 2113 t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0); 2214 2114 t->type = ret_type_id; 2215 2115 2216 - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); 2217 - if (err) 2218 - return err; 2219 - 2220 - btf->hdr->type_len += sz; 2221 - btf->hdr->str_off += sz; 2222 - btf->nr_types++; 2223 - return btf->nr_types; 2116 + return btf_commit_type(btf, sz); 2224 2117 } 2225 2118 2226 2119 /* ··· 2236 2143 /* last type should be BTF_KIND_FUNC_PROTO */ 2237 2144 if (btf->nr_types == 0) 2238 2145 return -EINVAL; 2239 - t = btf_type_by_id(btf, btf->nr_types); 2146 + t = btf_last_type(btf); 2240 2147 if (!btf_is_func_proto(t)) 2241 2148 return -EINVAL; 2242 2149 ··· 2259 2166 p->type = type_id; 2260 2167 2261 2168 /* update parent type's vlen */ 2262 - t = btf_type_by_id(btf, btf->nr_types); 2169 + t = btf_last_type(btf); 2263 2170 btf_type_inc_vlen(t); 2264 2171 2265 2172 btf->hdr->type_len += sz; ··· 2281 2188 { 2282 2189 struct btf_type *t; 2283 2190 struct btf_var *v; 2284 - int sz, err, name_off; 2191 + int sz, name_off; 2285 2192 2286 2193 /* non-empty name */ 2287 2194 if (!name || !name[0]) ··· 2312 2219 v = btf_var(t); 2313 2220 v->linkage = linkage; 2314 2221 2315 - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); 2316 - if (err) 2317 - return err; 2318 - 2319 - btf->hdr->type_len += sz; 2320 - btf->hdr->str_off += sz; 2321 - btf->nr_types++; 2322 - return btf->nr_types; 2222 + return btf_commit_type(btf, sz); 2323 2223 } 2324 2224 2325 2225 /* ··· 2330 2244 int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) 2331 2245 { 2332 2246 struct btf_type *t; 2333 - int sz, err, name_off; 2247 + int sz, name_off; 2334 2248 2335 2249 /* non-empty name */ 2336 2250 if (!name || !name[0]) ··· 2353 2267 t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0); 2354 2268 t->size = byte_sz; 2355 2269 2356 - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); 2357 - if (err) 2358 - return err; 2359 - 2360 - btf->hdr->type_len += sz; 2361 - btf->hdr->str_off += sz; 2362 - btf->nr_types++; 2363 - return btf->nr_types; 2270 + return btf_commit_type(btf, sz); 2364 2271 } 2365 2272 2366 2273 /* ··· 2375 2296 /* last type should be BTF_KIND_DATASEC */ 2376 2297 if (btf->nr_types == 0) 2377 2298 return -EINVAL; 2378 - t = btf_type_by_id(btf, btf->nr_types); 2299 + t = btf_last_type(btf); 2379 2300 if (!btf_is_datasec(t)) 2380 2301 return -EINVAL; 2381 2302 ··· 2396 2317 v->size = byte_sz; 2397 2318 2398 2319 /* update parent type's vlen */ 2399 - t = btf_type_by_id(btf, btf->nr_types); 2320 + t = btf_last_type(btf); 2400 2321 btf_type_inc_vlen(t); 2401 2322 2402 2323 btf->hdr->type_len += sz; ··· 2718 2639 static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, 2719 2640 const struct btf_dedup_opts *opts); 2720 2641 static void btf_dedup_free(struct btf_dedup *d); 2642 + static int btf_dedup_prep(struct btf_dedup *d); 2721 2643 static int btf_dedup_strings(struct btf_dedup *d); 2722 2644 static int btf_dedup_prim_types(struct btf_dedup *d); 2723 2645 static int btf_dedup_struct_types(struct btf_dedup *d); ··· 2877 2797 if (btf_ensure_modifiable(btf)) 2878 2798 return -ENOMEM; 2879 2799 2800 + err = btf_dedup_prep(d); 2801 + if (err) { 2802 + pr_debug("btf_dedup_prep failed:%d\n", err); 2803 + goto done; 2804 + } 2880 2805 err = btf_dedup_strings(d); 2881 2806 if (err < 0) { 2882 2807 pr_debug("btf_dedup_strings failed:%d\n", err); ··· 2944 2859 __u32 *hypot_list; 2945 2860 size_t hypot_cnt; 2946 2861 size_t hypot_cap; 2862 + /* Whether hypothetical mapping, if successful, would need to adjust 2863 + * already canonicalized types (due to a new forward declaration to 2864 + * concrete type resolution). In such case, during split BTF dedup 2865 + * candidate type would still be considered as different, because base 2866 + * BTF is considered to be immutable. 2867 + */ 2868 + bool hypot_adjust_canon; 2947 2869 /* Various option modifying behavior of algorithm */ 2948 2870 struct btf_dedup_opts opts; 2949 - }; 2950 - 2951 - struct btf_str_ptr { 2952 - const char *str; 2953 - __u32 new_off; 2954 - bool used; 2955 - }; 2956 - 2957 - struct btf_str_ptrs { 2958 - struct btf_str_ptr *ptrs; 2959 - const char *data; 2960 - __u32 cnt; 2961 - __u32 cap; 2871 + /* temporary strings deduplication state */ 2872 + void *strs_data; 2873 + size_t strs_cap; 2874 + size_t strs_len; 2875 + struct hashmap* strs_hash; 2962 2876 }; 2963 2877 2964 2878 static long hash_combine(long h, long value) ··· 2998 2914 for (i = 0; i < d->hypot_cnt; i++) 2999 2915 d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID; 3000 2916 d->hypot_cnt = 0; 2917 + d->hypot_adjust_canon = false; 3001 2918 } 3002 2919 3003 2920 static void btf_dedup_free(struct btf_dedup *d) ··· 3038 2953 { 3039 2954 struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); 3040 2955 hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; 3041 - int i, err = 0; 2956 + int i, err = 0, type_cnt; 3042 2957 3043 2958 if (!d) 3044 2959 return ERR_PTR(-ENOMEM); ··· 3058 2973 goto done; 3059 2974 } 3060 2975 3061 - d->map = malloc(sizeof(__u32) * (1 + btf->nr_types)); 2976 + type_cnt = btf__get_nr_types(btf) + 1; 2977 + d->map = malloc(sizeof(__u32) * type_cnt); 3062 2978 if (!d->map) { 3063 2979 err = -ENOMEM; 3064 2980 goto done; 3065 2981 } 3066 2982 /* special BTF "void" type is made canonical immediately */ 3067 2983 d->map[0] = 0; 3068 - for (i = 1; i <= btf->nr_types; i++) { 2984 + for (i = 1; i < type_cnt; i++) { 3069 2985 struct btf_type *t = btf_type_by_id(d->btf, i); 3070 2986 3071 2987 /* VAR and DATASEC are never deduped and are self-canonical */ ··· 3076 2990 d->map[i] = BTF_UNPROCESSED_ID; 3077 2991 } 3078 2992 3079 - d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); 2993 + d->hypot_map = malloc(sizeof(__u32) * type_cnt); 3080 2994 if (!d->hypot_map) { 3081 2995 err = -ENOMEM; 3082 2996 goto done; 3083 2997 } 3084 - for (i = 0; i <= btf->nr_types; i++) 2998 + for (i = 0; i < type_cnt; i++) 3085 2999 d->hypot_map[i] = BTF_UNPROCESSED_ID; 3086 3000 3087 3001 done: ··· 3105 3019 int i, j, r, rec_size; 3106 3020 struct btf_type *t; 3107 3021 3108 - for (i = 1; i <= d->btf->nr_types; i++) { 3109 - t = btf_type_by_id(d->btf, i); 3022 + for (i = 0; i < d->btf->nr_types; i++) { 3023 + t = btf_type_by_id(d->btf, d->btf->start_id + i); 3110 3024 r = fn(&t->name_off, ctx); 3111 3025 if (r) 3112 3026 return r; ··· 3186 3100 return 0; 3187 3101 } 3188 3102 3189 - static int str_sort_by_content(const void *a1, const void *a2) 3103 + static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx) 3190 3104 { 3191 - const struct btf_str_ptr *p1 = a1; 3192 - const struct btf_str_ptr *p2 = a2; 3105 + struct btf_dedup *d = ctx; 3106 + __u32 str_off = *str_off_ptr; 3107 + long old_off, new_off, len; 3108 + const char *s; 3109 + void *p; 3110 + int err; 3193 3111 3194 - return strcmp(p1->str, p2->str); 3195 - } 3196 - 3197 - static int str_sort_by_offset(const void *a1, const void *a2) 3198 - { 3199 - const struct btf_str_ptr *p1 = a1; 3200 - const struct btf_str_ptr *p2 = a2; 3201 - 3202 - if (p1->str != p2->str) 3203 - return p1->str < p2->str ? -1 : 1; 3204 - return 0; 3205 - } 3206 - 3207 - static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem) 3208 - { 3209 - const struct btf_str_ptr *p = pelem; 3210 - 3211 - if (str_ptr != p->str) 3212 - return (const char *)str_ptr < p->str ? -1 : 1; 3213 - return 0; 3214 - } 3215 - 3216 - static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx) 3217 - { 3218 - struct btf_str_ptrs *strs; 3219 - struct btf_str_ptr *s; 3220 - 3221 - if (*str_off_ptr == 0) 3112 + /* don't touch empty string or string in main BTF */ 3113 + if (str_off == 0 || str_off < d->btf->start_str_off) 3222 3114 return 0; 3223 3115 3224 - strs = ctx; 3225 - s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, 3226 - sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); 3227 - if (!s) 3228 - return -EINVAL; 3229 - s->used = true; 3230 - return 0; 3231 - } 3116 + s = btf__str_by_offset(d->btf, str_off); 3117 + if (d->btf->base_btf) { 3118 + err = btf__find_str(d->btf->base_btf, s); 3119 + if (err >= 0) { 3120 + *str_off_ptr = err; 3121 + return 0; 3122 + } 3123 + if (err != -ENOENT) 3124 + return err; 3125 + } 3232 3126 3233 - static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) 3234 - { 3235 - struct btf_str_ptrs *strs; 3236 - struct btf_str_ptr *s; 3127 + len = strlen(s) + 1; 3237 3128 3238 - if (*str_off_ptr == 0) 3239 - return 0; 3129 + new_off = d->strs_len; 3130 + p = btf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len); 3131 + if (!p) 3132 + return -ENOMEM; 3240 3133 3241 - strs = ctx; 3242 - s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, 3243 - sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); 3244 - if (!s) 3245 - return -EINVAL; 3246 - *str_off_ptr = s->new_off; 3134 + memcpy(p, s, len); 3135 + 3136 + /* Now attempt to add the string, but only if the string with the same 3137 + * contents doesn't exist already (HASHMAP_ADD strategy). If such 3138 + * string exists, we'll get its offset in old_off (that's old_key). 3139 + */ 3140 + err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off, 3141 + HASHMAP_ADD, (const void **)&old_off, NULL); 3142 + if (err == -EEXIST) { 3143 + *str_off_ptr = d->btf->start_str_off + old_off; 3144 + } else if (err) { 3145 + return err; 3146 + } else { 3147 + *str_off_ptr = d->btf->start_str_off + new_off; 3148 + d->strs_len += len; 3149 + } 3247 3150 return 0; 3248 3151 } 3249 3152 ··· 3249 3174 */ 3250 3175 static int btf_dedup_strings(struct btf_dedup *d) 3251 3176 { 3252 - char *start = d->btf->strs_data; 3253 - char *end = start + d->btf->hdr->str_len; 3254 - char *p = start, *tmp_strs = NULL; 3255 - struct btf_str_ptrs strs = { 3256 - .cnt = 0, 3257 - .cap = 0, 3258 - .ptrs = NULL, 3259 - .data = start, 3260 - }; 3261 - int i, j, err = 0, grp_idx; 3262 - bool grp_used; 3177 + char *s; 3178 + int err; 3263 3179 3264 3180 if (d->btf->strs_deduped) 3265 3181 return 0; 3266 3182 3267 - /* build index of all strings */ 3268 - while (p < end) { 3269 - if (strs.cnt + 1 > strs.cap) { 3270 - struct btf_str_ptr *new_ptrs; 3271 - 3272 - strs.cap += max(strs.cnt / 2, 16U); 3273 - new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0])); 3274 - if (!new_ptrs) { 3275 - err = -ENOMEM; 3276 - goto done; 3277 - } 3278 - strs.ptrs = new_ptrs; 3279 - } 3280 - 3281 - strs.ptrs[strs.cnt].str = p; 3282 - strs.ptrs[strs.cnt].used = false; 3283 - 3284 - p += strlen(p) + 1; 3285 - strs.cnt++; 3286 - } 3287 - 3288 - /* temporary storage for deduplicated strings */ 3289 - tmp_strs = malloc(d->btf->hdr->str_len); 3290 - if (!tmp_strs) { 3291 - err = -ENOMEM; 3292 - goto done; 3293 - } 3294 - 3295 - /* mark all used strings */ 3296 - strs.ptrs[0].used = true; 3297 - err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs); 3298 - if (err) 3299 - goto done; 3300 - 3301 - /* sort strings by context, so that we can identify duplicates */ 3302 - qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content); 3303 - 3304 - /* 3305 - * iterate groups of equal strings and if any instance in a group was 3306 - * referenced, emit single instance and remember new offset 3183 + /* temporarily switch to use btf_dedup's strs_data for strings for hash 3184 + * functions; later we'll just transfer hashmap to struct btf as is, 3185 + * along the strs_data 3307 3186 */ 3308 - p = tmp_strs; 3309 - grp_idx = 0; 3310 - grp_used = strs.ptrs[0].used; 3311 - /* iterate past end to avoid code duplication after loop */ 3312 - for (i = 1; i <= strs.cnt; i++) { 3313 - /* 3314 - * when i == strs.cnt, we want to skip string comparison and go 3315 - * straight to handling last group of strings (otherwise we'd 3316 - * need to handle last group after the loop w/ duplicated code) 3317 - */ 3318 - if (i < strs.cnt && 3319 - !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) { 3320 - grp_used = grp_used || strs.ptrs[i].used; 3321 - continue; 3322 - } 3187 + d->btf->strs_data_ptr = &d->strs_data; 3323 3188 3324 - /* 3325 - * this check would have been required after the loop to handle 3326 - * last group of strings, but due to <= condition in a loop 3327 - * we avoid that duplication 3328 - */ 3329 - if (grp_used) { 3330 - int new_off = p - tmp_strs; 3331 - __u32 len = strlen(strs.ptrs[grp_idx].str); 3332 - 3333 - memmove(p, strs.ptrs[grp_idx].str, len + 1); 3334 - for (j = grp_idx; j < i; j++) 3335 - strs.ptrs[j].new_off = new_off; 3336 - p += len + 1; 3337 - } 3338 - 3339 - if (i < strs.cnt) { 3340 - grp_idx = i; 3341 - grp_used = strs.ptrs[i].used; 3342 - } 3189 + d->strs_hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, d->btf); 3190 + if (IS_ERR(d->strs_hash)) { 3191 + err = PTR_ERR(d->strs_hash); 3192 + d->strs_hash = NULL; 3193 + goto err_out; 3343 3194 } 3344 3195 3345 - /* replace original strings with deduped ones */ 3346 - d->btf->hdr->str_len = p - tmp_strs; 3347 - memmove(start, tmp_strs, d->btf->hdr->str_len); 3348 - end = start + d->btf->hdr->str_len; 3196 + if (!d->btf->base_btf) { 3197 + s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1); 3198 + if (!s) 3199 + return -ENOMEM; 3200 + /* initial empty string */ 3201 + s[0] = 0; 3202 + d->strs_len = 1; 3349 3203 3350 - /* restore original order for further binary search lookups */ 3351 - qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset); 3204 + /* insert empty string; we won't be looking it up during strings 3205 + * dedup, but it's good to have it for generic BTF string lookups 3206 + */ 3207 + err = hashmap__insert(d->strs_hash, (void *)0, (void *)0, 3208 + HASHMAP_ADD, NULL, NULL); 3209 + if (err) 3210 + goto err_out; 3211 + } 3352 3212 3353 3213 /* remap string offsets */ 3354 - err = btf_for_each_str_off(d, btf_str_remap_offset, &strs); 3214 + err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d); 3355 3215 if (err) 3356 - goto done; 3216 + goto err_out; 3357 3217 3358 - d->btf->hdr->str_len = end - start; 3218 + /* replace BTF string data and hash with deduped ones */ 3219 + free(d->btf->strs_data); 3220 + hashmap__free(d->btf->strs_hash); 3221 + d->btf->strs_data = d->strs_data; 3222 + d->btf->strs_data_cap = d->strs_cap; 3223 + d->btf->hdr->str_len = d->strs_len; 3224 + d->btf->strs_hash = d->strs_hash; 3225 + /* now point strs_data_ptr back to btf->strs_data */ 3226 + d->btf->strs_data_ptr = &d->btf->strs_data; 3227 + 3228 + d->strs_data = d->strs_hash = NULL; 3229 + d->strs_len = d->strs_cap = 0; 3359 3230 d->btf->strs_deduped = true; 3231 + return 0; 3360 3232 3361 - done: 3362 - free(tmp_strs); 3363 - free(strs.ptrs); 3233 + err_out: 3234 + free(d->strs_data); 3235 + hashmap__free(d->strs_hash); 3236 + d->strs_data = d->strs_hash = NULL; 3237 + d->strs_len = d->strs_cap = 0; 3238 + 3239 + /* restore strings pointer for existing d->btf->strs_hash back */ 3240 + d->btf->strs_data_ptr = &d->strs_data; 3241 + 3364 3242 return err; 3365 3243 } 3366 3244 ··· 3578 3550 return true; 3579 3551 } 3580 3552 3553 + /* Prepare split BTF for deduplication by calculating hashes of base BTF's 3554 + * types and initializing the rest of the state (canonical type mapping) for 3555 + * the fixed base BTF part. 3556 + */ 3557 + static int btf_dedup_prep(struct btf_dedup *d) 3558 + { 3559 + struct btf_type *t; 3560 + int type_id; 3561 + long h; 3562 + 3563 + if (!d->btf->base_btf) 3564 + return 0; 3565 + 3566 + for (type_id = 1; type_id < d->btf->start_id; type_id++) { 3567 + t = btf_type_by_id(d->btf, type_id); 3568 + 3569 + /* all base BTF types are self-canonical by definition */ 3570 + d->map[type_id] = type_id; 3571 + 3572 + switch (btf_kind(t)) { 3573 + case BTF_KIND_VAR: 3574 + case BTF_KIND_DATASEC: 3575 + /* VAR and DATASEC are never hash/deduplicated */ 3576 + continue; 3577 + case BTF_KIND_CONST: 3578 + case BTF_KIND_VOLATILE: 3579 + case BTF_KIND_RESTRICT: 3580 + case BTF_KIND_PTR: 3581 + case BTF_KIND_FWD: 3582 + case BTF_KIND_TYPEDEF: 3583 + case BTF_KIND_FUNC: 3584 + h = btf_hash_common(t); 3585 + break; 3586 + case BTF_KIND_INT: 3587 + h = btf_hash_int(t); 3588 + break; 3589 + case BTF_KIND_ENUM: 3590 + h = btf_hash_enum(t); 3591 + break; 3592 + case BTF_KIND_STRUCT: 3593 + case BTF_KIND_UNION: 3594 + h = btf_hash_struct(t); 3595 + break; 3596 + case BTF_KIND_ARRAY: 3597 + h = btf_hash_array(t); 3598 + break; 3599 + case BTF_KIND_FUNC_PROTO: 3600 + h = btf_hash_fnproto(t); 3601 + break; 3602 + default: 3603 + pr_debug("unknown kind %d for type [%d]\n", btf_kind(t), type_id); 3604 + return -EINVAL; 3605 + } 3606 + if (btf_dedup_table_add(d, h, type_id)) 3607 + return -ENOMEM; 3608 + } 3609 + 3610 + return 0; 3611 + } 3612 + 3581 3613 /* 3582 3614 * Deduplicate primitive types, that can't reference other types, by calculating 3583 3615 * their type signature hash and comparing them with any possible canonical ··· 3731 3643 { 3732 3644 int i, err; 3733 3645 3734 - for (i = 1; i <= d->btf->nr_types; i++) { 3735 - err = btf_dedup_prim_type(d, i); 3646 + for (i = 0; i < d->btf->nr_types; i++) { 3647 + err = btf_dedup_prim_type(d, d->btf->start_id + i); 3736 3648 if (err) 3737 3649 return err; 3738 3650 } ··· 3783 3695 static inline __u16 btf_fwd_kind(struct btf_type *t) 3784 3696 { 3785 3697 return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT; 3698 + } 3699 + 3700 + /* Check if given two types are identical ARRAY definitions */ 3701 + static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) 3702 + { 3703 + struct btf_type *t1, *t2; 3704 + 3705 + t1 = btf_type_by_id(d->btf, id1); 3706 + t2 = btf_type_by_id(d->btf, id2); 3707 + if (!btf_is_array(t1) || !btf_is_array(t2)) 3708 + return 0; 3709 + 3710 + return btf_equal_array(t1, t2); 3786 3711 } 3787 3712 3788 3713 /* ··· 3908 3807 canon_id = resolve_fwd_id(d, canon_id); 3909 3808 3910 3809 hypot_type_id = d->hypot_map[canon_id]; 3911 - if (hypot_type_id <= BTF_MAX_NR_TYPES) 3912 - return hypot_type_id == cand_id; 3810 + if (hypot_type_id <= BTF_MAX_NR_TYPES) { 3811 + /* In some cases compiler will generate different DWARF types 3812 + * for *identical* array type definitions and use them for 3813 + * different fields within the *same* struct. This breaks type 3814 + * equivalence check, which makes an assumption that candidate 3815 + * types sub-graph has a consistent and deduped-by-compiler 3816 + * types within a single CU. So work around that by explicitly 3817 + * allowing identical array types here. 3818 + */ 3819 + return hypot_type_id == cand_id || 3820 + btf_dedup_identical_arrays(d, hypot_type_id, cand_id); 3821 + } 3913 3822 3914 3823 if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) 3915 3824 return -ENOMEM; ··· 3945 3834 } else { 3946 3835 real_kind = cand_kind; 3947 3836 fwd_kind = btf_fwd_kind(canon_type); 3837 + /* we'd need to resolve base FWD to STRUCT/UNION */ 3838 + if (fwd_kind == real_kind && canon_id < d->btf->start_id) 3839 + d->hypot_adjust_canon = true; 3948 3840 } 3949 3841 return fwd_kind == real_kind; 3950 3842 } ··· 3985 3871 return 0; 3986 3872 cand_arr = btf_array(cand_type); 3987 3873 canon_arr = btf_array(canon_type); 3988 - eq = btf_dedup_is_equiv(d, 3989 - cand_arr->index_type, canon_arr->index_type); 3874 + eq = btf_dedup_is_equiv(d, cand_arr->index_type, canon_arr->index_type); 3990 3875 if (eq <= 0) 3991 3876 return eq; 3992 3877 return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type); ··· 4068 3955 */ 4069 3956 static void btf_dedup_merge_hypot_map(struct btf_dedup *d) 4070 3957 { 4071 - __u32 cand_type_id, targ_type_id; 3958 + __u32 canon_type_id, targ_type_id; 4072 3959 __u16 t_kind, c_kind; 4073 3960 __u32 t_id, c_id; 4074 3961 int i; 4075 3962 4076 3963 for (i = 0; i < d->hypot_cnt; i++) { 4077 - cand_type_id = d->hypot_list[i]; 4078 - targ_type_id = d->hypot_map[cand_type_id]; 3964 + canon_type_id = d->hypot_list[i]; 3965 + targ_type_id = d->hypot_map[canon_type_id]; 4079 3966 t_id = resolve_type_id(d, targ_type_id); 4080 - c_id = resolve_type_id(d, cand_type_id); 3967 + c_id = resolve_type_id(d, canon_type_id); 4081 3968 t_kind = btf_kind(btf__type_by_id(d->btf, t_id)); 4082 3969 c_kind = btf_kind(btf__type_by_id(d->btf, c_id)); 4083 3970 /* ··· 4092 3979 * stability is not a requirement for STRUCT/UNION equivalence 4093 3980 * checks, though. 4094 3981 */ 3982 + 3983 + /* if it's the split BTF case, we still need to point base FWD 3984 + * to STRUCT/UNION in a split BTF, because FWDs from split BTF 3985 + * will be resolved against base FWD. If we don't point base 3986 + * canonical FWD to the resolved STRUCT/UNION, then all the 3987 + * FWDs in split BTF won't be correctly resolved to a proper 3988 + * STRUCT/UNION. 3989 + */ 4095 3990 if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD) 4096 3991 d->map[c_id] = t_id; 4097 - else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) 3992 + 3993 + /* if graph equivalence determined that we'd need to adjust 3994 + * base canonical types, then we need to only point base FWDs 3995 + * to STRUCTs/UNIONs and do no more modifications. For all 3996 + * other purposes the type graphs were not equivalent. 3997 + */ 3998 + if (d->hypot_adjust_canon) 3999 + continue; 4000 + 4001 + if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) 4098 4002 d->map[t_id] = c_id; 4099 4003 4100 4004 if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) && ··· 4195 4065 return eq; 4196 4066 if (!eq) 4197 4067 continue; 4198 - new_id = cand_id; 4199 4068 btf_dedup_merge_hypot_map(d); 4069 + if (d->hypot_adjust_canon) /* not really equivalent */ 4070 + continue; 4071 + new_id = cand_id; 4200 4072 break; 4201 4073 } 4202 4074 ··· 4213 4081 { 4214 4082 int i, err; 4215 4083 4216 - for (i = 1; i <= d->btf->nr_types; i++) { 4217 - err = btf_dedup_struct_type(d, i); 4084 + for (i = 0; i < d->btf->nr_types; i++) { 4085 + err = btf_dedup_struct_type(d, d->btf->start_id + i); 4218 4086 if (err) 4219 4087 return err; 4220 4088 } ··· 4357 4225 { 4358 4226 int i, err; 4359 4227 4360 - for (i = 1; i <= d->btf->nr_types; i++) { 4361 - err = btf_dedup_ref_type(d, i); 4228 + for (i = 0; i < d->btf->nr_types; i++) { 4229 + err = btf_dedup_ref_type(d, d->btf->start_id + i); 4362 4230 if (err < 0) 4363 4231 return err; 4364 4232 } ··· 4382 4250 static int btf_dedup_compact_types(struct btf_dedup *d) 4383 4251 { 4384 4252 __u32 *new_offs; 4385 - __u32 next_type_id = 1; 4253 + __u32 next_type_id = d->btf->start_id; 4254 + const struct btf_type *t; 4386 4255 void *p; 4387 - int i, len; 4256 + int i, id, len; 4388 4257 4389 4258 /* we are going to reuse hypot_map to store compaction remapping */ 4390 4259 d->hypot_map[0] = 0; 4391 - for (i = 1; i <= d->btf->nr_types; i++) 4392 - d->hypot_map[i] = BTF_UNPROCESSED_ID; 4260 + /* base BTF types are not renumbered */ 4261 + for (id = 1; id < d->btf->start_id; id++) 4262 + d->hypot_map[id] = id; 4263 + for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) 4264 + d->hypot_map[id] = BTF_UNPROCESSED_ID; 4393 4265 4394 4266 p = d->btf->types_data; 4395 4267 4396 - for (i = 1; i <= d->btf->nr_types; i++) { 4397 - if (d->map[i] != i) 4268 + for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) { 4269 + if (d->map[id] != id) 4398 4270 continue; 4399 4271 4400 - len = btf_type_size(btf__type_by_id(d->btf, i)); 4272 + t = btf__type_by_id(d->btf, id); 4273 + len = btf_type_size(t); 4401 4274 if (len < 0) 4402 4275 return len; 4403 4276 4404 - memmove(p, btf__type_by_id(d->btf, i), len); 4405 - d->hypot_map[i] = next_type_id; 4406 - d->btf->type_offs[next_type_id] = p - d->btf->types_data; 4277 + memmove(p, t, len); 4278 + d->hypot_map[id] = next_type_id; 4279 + d->btf->type_offs[next_type_id - d->btf->start_id] = p - d->btf->types_data; 4407 4280 p += len; 4408 4281 next_type_id++; 4409 4282 } 4410 4283 4411 4284 /* shrink struct btf's internal types index and update btf_header */ 4412 - d->btf->nr_types = next_type_id - 1; 4413 - d->btf->type_offs_cap = d->btf->nr_types + 1; 4285 + d->btf->nr_types = next_type_id - d->btf->start_id; 4286 + d->btf->type_offs_cap = d->btf->nr_types; 4414 4287 d->btf->hdr->type_len = p - d->btf->types_data; 4415 4288 new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap, 4416 4289 sizeof(*new_offs)); 4417 - if (!new_offs) 4290 + if (d->btf->type_offs_cap && !new_offs) 4418 4291 return -ENOMEM; 4419 4292 d->btf->type_offs = new_offs; 4420 4293 d->btf->hdr->str_off = d->btf->hdr->type_len; ··· 4551 4414 { 4552 4415 int i, r; 4553 4416 4554 - for (i = 1; i <= d->btf->nr_types; i++) { 4555 - r = btf_dedup_remap_type(d, i); 4417 + for (i = 0; i < d->btf->nr_types; i++) { 4418 + r = btf_dedup_remap_type(d, d->btf->start_id + i); 4556 4419 if (r < 0) 4557 4420 return r; 4558 4421 }

+8

tools/lib/bpf/btf.h

··· 31 31 }; 32 32 33 33 LIBBPF_API void btf__free(struct btf *btf); 34 + 34 35 LIBBPF_API struct btf *btf__new(const void *data, __u32 size); 36 + LIBBPF_API struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf); 35 37 LIBBPF_API struct btf *btf__new_empty(void); 38 + LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); 39 + 36 40 LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); 41 + LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf); 37 42 LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext); 43 + LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf); 38 44 LIBBPF_API struct btf *btf__parse_raw(const char *path); 45 + LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf); 46 + 39 47 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); 40 48 LIBBPF_API int btf__load(struct btf *btf); 41 49 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,

+9

tools/lib/bpf/libbpf.map

··· 337 337 perf_buffer__consume_buffer; 338 338 xsk_socket__create_shared; 339 339 } LIBBPF_0.1.0; 340 + 341 + LIBBPF_0.3.0 { 342 + global: 343 + btf__parse_elf_split; 344 + btf__parse_raw_split; 345 + btf__parse_split; 346 + btf__new_empty_split; 347 + btf__new_split; 348 + } LIBBPF_0.2.0;

+1

tools/lib/bpf/libbpf_probes.c

··· 230 230 break; 231 231 case BPF_MAP_TYPE_SK_STORAGE: 232 232 case BPF_MAP_TYPE_INODE_STORAGE: 233 + case BPF_MAP_TYPE_TASK_STORAGE: 233 234 btf_key_type_id = 1; 234 235 btf_value_type_id = 3; 235 236 value_size = 8;

-9

tools/objtool/Makefile

··· 3 3 include ../scripts/Makefile.arch 4 4 5 5 # always use the host compiler 6 - ifneq ($(LLVM),) 7 - HOSTAR ?= llvm-ar 8 - HOSTCC ?= clang 9 - HOSTLD ?= ld.lld 10 - else 11 - HOSTAR ?= ar 12 - HOSTCC ?= gcc 13 - HOSTLD ?= ld 14 - endif 15 6 AR = $(HOSTAR) 16 7 CC = $(HOSTCC) 17 8 LD = $(HOSTLD)

-4

tools/perf/Makefile.perf

··· 175 175 176 176 LD += $(EXTRA_LDFLAGS) 177 177 178 - HOSTCC ?= gcc 179 - HOSTLD ?= ld 180 - HOSTAR ?= ar 181 - 182 178 PKG_CONFIG = $(CROSS_COMPILE)pkg-config 183 179 LLVM_CONFIG ?= llvm-config 184 180

-1

tools/power/acpi/Makefile.config

··· 54 54 CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- 55 55 CROSS_COMPILE ?= $(CROSS) 56 56 LD = $(CC) 57 - HOSTCC = gcc 58 57 59 58 # check if compiler option is supported 60 59 cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}

+10

tools/scripts/Makefile.include

··· 59 59 $(call allow-override,CXX,$(CROSS_COMPILE)g++) 60 60 $(call allow-override,STRIP,$(CROSS_COMPILE)strip) 61 61 62 + ifneq ($(LLVM),) 63 + HOSTAR ?= llvm-ar 64 + HOSTCC ?= clang 65 + HOSTLD ?= ld.lld 66 + else 67 + HOSTAR ?= ar 68 + HOSTCC ?= gcc 69 + HOSTLD ?= ld 70 + endif 71 + 62 72 ifeq ($(CC_NO_CLANG), 1) 63 73 EXTRA_WARNINGS += -Wstrict-aliasing=3 64 74 endif

-1

tools/testing/selftests/bpf/.gitignore

··· 8 8 fixdep 9 9 test_dev_cgroup 10 10 /test_progs* 11 - test_tcpbpf_user 12 11 test_verifier_log 13 12 feature 14 13 test_sock

+2 -3

tools/testing/selftests/bpf/Makefile

··· 32 32 33 33 # Order correspond to 'make run_tests' order 34 34 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 35 - test_verifier_log test_dev_cgroup test_tcpbpf_user \ 35 + test_verifier_log test_dev_cgroup \ 36 36 test_sock test_sockmap get_cgroup_id_user test_socket_cookie \ 37 37 test_cgroup_storage \ 38 38 test_netcnt test_tcpnotify_user test_sysctl \ ··· 163 163 $(OUTPUT)/test_sock_addr: cgroup_helpers.c 164 164 $(OUTPUT)/test_socket_cookie: cgroup_helpers.c 165 165 $(OUTPUT)/test_sockmap: cgroup_helpers.c 166 - $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c 167 166 $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c 168 167 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c 169 168 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c ··· 386 387 TRUNNER_BPF_PROGS_DIR := progs 387 388 TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ 388 389 network_helpers.c testing_helpers.c \ 389 - flow_dissector_load.h 390 + btf_helpers.c flow_dissector_load.h 390 391 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ 391 392 $(wildcard progs/btf_dump_test_case_*.c) 392 393 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE

+259

tools/testing/selftests/bpf/btf_helpers.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + #include <stdio.h> 4 + #include <errno.h> 5 + #include <bpf/btf.h> 6 + #include <bpf/libbpf.h> 7 + #include "test_progs.h" 8 + 9 + static const char * const btf_kind_str_mapping[] = { 10 + [BTF_KIND_UNKN] = "UNKNOWN", 11 + [BTF_KIND_INT] = "INT", 12 + [BTF_KIND_PTR] = "PTR", 13 + [BTF_KIND_ARRAY] = "ARRAY", 14 + [BTF_KIND_STRUCT] = "STRUCT", 15 + [BTF_KIND_UNION] = "UNION", 16 + [BTF_KIND_ENUM] = "ENUM", 17 + [BTF_KIND_FWD] = "FWD", 18 + [BTF_KIND_TYPEDEF] = "TYPEDEF", 19 + [BTF_KIND_VOLATILE] = "VOLATILE", 20 + [BTF_KIND_CONST] = "CONST", 21 + [BTF_KIND_RESTRICT] = "RESTRICT", 22 + [BTF_KIND_FUNC] = "FUNC", 23 + [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", 24 + [BTF_KIND_VAR] = "VAR", 25 + [BTF_KIND_DATASEC] = "DATASEC", 26 + }; 27 + 28 + static const char *btf_kind_str(__u16 kind) 29 + { 30 + if (kind > BTF_KIND_DATASEC) 31 + return "UNKNOWN"; 32 + return btf_kind_str_mapping[kind]; 33 + } 34 + 35 + static const char *btf_int_enc_str(__u8 encoding) 36 + { 37 + switch (encoding) { 38 + case 0: 39 + return "(none)"; 40 + case BTF_INT_SIGNED: 41 + return "SIGNED"; 42 + case BTF_INT_CHAR: 43 + return "CHAR"; 44 + case BTF_INT_BOOL: 45 + return "BOOL"; 46 + default: 47 + return "UNKN"; 48 + } 49 + } 50 + 51 + static const char *btf_var_linkage_str(__u32 linkage) 52 + { 53 + switch (linkage) { 54 + case BTF_VAR_STATIC: 55 + return "static"; 56 + case BTF_VAR_GLOBAL_ALLOCATED: 57 + return "global-alloc"; 58 + default: 59 + return "(unknown)"; 60 + } 61 + } 62 + 63 + static const char *btf_func_linkage_str(const struct btf_type *t) 64 + { 65 + switch (btf_vlen(t)) { 66 + case BTF_FUNC_STATIC: 67 + return "static"; 68 + case BTF_FUNC_GLOBAL: 69 + return "global"; 70 + case BTF_FUNC_EXTERN: 71 + return "extern"; 72 + default: 73 + return "(unknown)"; 74 + } 75 + } 76 + 77 + static const char *btf_str(const struct btf *btf, __u32 off) 78 + { 79 + if (!off) 80 + return "(anon)"; 81 + return btf__str_by_offset(btf, off) ?: "(invalid)"; 82 + } 83 + 84 + int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) 85 + { 86 + const struct btf_type *t; 87 + int kind, i; 88 + __u32 vlen; 89 + 90 + t = btf__type_by_id(btf, id); 91 + if (!t) 92 + return -EINVAL; 93 + 94 + vlen = btf_vlen(t); 95 + kind = btf_kind(t); 96 + 97 + fprintf(out, "[%u] %s '%s'", id, btf_kind_str(kind), btf_str(btf, t->name_off)); 98 + 99 + switch (kind) { 100 + case BTF_KIND_INT: 101 + fprintf(out, " size=%u bits_offset=%u nr_bits=%u encoding=%s", 102 + t->size, btf_int_offset(t), btf_int_bits(t), 103 + btf_int_enc_str(btf_int_encoding(t))); 104 + break; 105 + case BTF_KIND_PTR: 106 + case BTF_KIND_CONST: 107 + case BTF_KIND_VOLATILE: 108 + case BTF_KIND_RESTRICT: 109 + case BTF_KIND_TYPEDEF: 110 + fprintf(out, " type_id=%u", t->type); 111 + break; 112 + case BTF_KIND_ARRAY: { 113 + const struct btf_array *arr = btf_array(t); 114 + 115 + fprintf(out, " type_id=%u index_type_id=%u nr_elems=%u", 116 + arr->type, arr->index_type, arr->nelems); 117 + break; 118 + } 119 + case BTF_KIND_STRUCT: 120 + case BTF_KIND_UNION: { 121 + const struct btf_member *m = btf_members(t); 122 + 123 + fprintf(out, " size=%u vlen=%u", t->size, vlen); 124 + for (i = 0; i < vlen; i++, m++) { 125 + __u32 bit_off, bit_sz; 126 + 127 + bit_off = btf_member_bit_offset(t, i); 128 + bit_sz = btf_member_bitfield_size(t, i); 129 + fprintf(out, "\n\t'%s' type_id=%u bits_offset=%u", 130 + btf_str(btf, m->name_off), m->type, bit_off); 131 + if (bit_sz) 132 + fprintf(out, " bitfield_size=%u", bit_sz); 133 + } 134 + break; 135 + } 136 + case BTF_KIND_ENUM: { 137 + const struct btf_enum *v = btf_enum(t); 138 + 139 + fprintf(out, " size=%u vlen=%u", t->size, vlen); 140 + for (i = 0; i < vlen; i++, v++) { 141 + fprintf(out, "\n\t'%s' val=%u", 142 + btf_str(btf, v->name_off), v->val); 143 + } 144 + break; 145 + } 146 + case BTF_KIND_FWD: 147 + fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct"); 148 + break; 149 + case BTF_KIND_FUNC: 150 + fprintf(out, " type_id=%u linkage=%s", t->type, btf_func_linkage_str(t)); 151 + break; 152 + case BTF_KIND_FUNC_PROTO: { 153 + const struct btf_param *p = btf_params(t); 154 + 155 + fprintf(out, " ret_type_id=%u vlen=%u", t->type, vlen); 156 + for (i = 0; i < vlen; i++, p++) { 157 + fprintf(out, "\n\t'%s' type_id=%u", 158 + btf_str(btf, p->name_off), p->type); 159 + } 160 + break; 161 + } 162 + case BTF_KIND_VAR: 163 + fprintf(out, " type_id=%u, linkage=%s", 164 + t->type, btf_var_linkage_str(btf_var(t)->linkage)); 165 + break; 166 + case BTF_KIND_DATASEC: { 167 + const struct btf_var_secinfo *v = btf_var_secinfos(t); 168 + 169 + fprintf(out, " size=%u vlen=%u", t->size, vlen); 170 + for (i = 0; i < vlen; i++, v++) { 171 + fprintf(out, "\n\ttype_id=%u offset=%u size=%u", 172 + v->type, v->offset, v->size); 173 + } 174 + break; 175 + } 176 + default: 177 + break; 178 + } 179 + 180 + return 0; 181 + } 182 + 183 + /* Print raw BTF type dump into a local buffer and return string pointer back. 184 + * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls 185 + */ 186 + const char *btf_type_raw_dump(const struct btf *btf, int type_id) 187 + { 188 + static char buf[16 * 1024]; 189 + FILE *buf_file; 190 + 191 + buf_file = fmemopen(buf, sizeof(buf) - 1, "w"); 192 + if (!buf_file) { 193 + fprintf(stderr, "Failed to open memstream: %d\n", errno); 194 + return NULL; 195 + } 196 + 197 + fprintf_btf_type_raw(buf_file, btf, type_id); 198 + fflush(buf_file); 199 + fclose(buf_file); 200 + 201 + return buf; 202 + } 203 + 204 + int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]) 205 + { 206 + int i; 207 + bool ok = true; 208 + 209 + ASSERT_EQ(btf__get_nr_types(btf), nr_types, "btf_nr_types"); 210 + 211 + for (i = 1; i <= nr_types; i++) { 212 + if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump")) 213 + ok = false; 214 + } 215 + 216 + return ok; 217 + } 218 + 219 + static void btf_dump_printf(void *ctx, const char *fmt, va_list args) 220 + { 221 + vfprintf(ctx, fmt, args); 222 + } 223 + 224 + /* Print BTF-to-C dump into a local buffer and return string pointer back. 225 + * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls 226 + */ 227 + const char *btf_type_c_dump(const struct btf *btf) 228 + { 229 + static char buf[16 * 1024]; 230 + FILE *buf_file; 231 + struct btf_dump *d = NULL; 232 + struct btf_dump_opts opts = {}; 233 + int err, i; 234 + 235 + buf_file = fmemopen(buf, sizeof(buf) - 1, "w"); 236 + if (!buf_file) { 237 + fprintf(stderr, "Failed to open memstream: %d\n", errno); 238 + return NULL; 239 + } 240 + 241 + opts.ctx = buf_file; 242 + d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); 243 + if (libbpf_get_error(d)) { 244 + fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d)); 245 + return NULL; 246 + } 247 + 248 + for (i = 1; i <= btf__get_nr_types(btf); i++) { 249 + err = btf_dump__dump_type(d, i); 250 + if (err) { 251 + fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err); 252 + return NULL; 253 + } 254 + } 255 + 256 + fflush(buf_file); 257 + fclose(buf_file); 258 + return buf; 259 + }

+19

tools/testing/selftests/bpf/btf_helpers.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (c) 2020 Facebook */ 3 + #ifndef __BTF_HELPERS_H 4 + #define __BTF_HELPERS_H 5 + 6 + #include <stdio.h> 7 + #include <bpf/btf.h> 8 + 9 + int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id); 10 + const char *btf_type_raw_dump(const struct btf *btf, int type_id); 11 + int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]); 12 + 13 + #define VALIDATE_RAW_BTF(btf, raw_types...) \ 14 + btf_validate_raw(btf, \ 15 + sizeof((const char *[]){raw_types})/sizeof(void *),\ 16 + (const char *[]){raw_types}) 17 + 18 + const char *btf_type_c_dump(const struct btf *btf); 19 + #endif

+25 -15

tools/testing/selftests/bpf/prog_tests/btf.c

··· 6652 6652 const void *test_btf_data, *expect_btf_data; 6653 6653 const char *ret_test_next_str, *ret_expect_next_str; 6654 6654 const char *test_strs, *expect_strs; 6655 - const char *test_str_cur, *test_str_end; 6655 + const char *test_str_cur; 6656 6656 const char *expect_str_cur, *expect_str_end; 6657 6657 unsigned int raw_btf_size; 6658 6658 void *raw_btf; ··· 6719 6719 goto done; 6720 6720 } 6721 6721 6722 - test_str_cur = test_strs; 6723 - test_str_end = test_strs + test_hdr->str_len; 6724 6722 expect_str_cur = expect_strs; 6725 6723 expect_str_end = expect_strs + expect_hdr->str_len; 6726 - while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) { 6724 + while (expect_str_cur < expect_str_end) { 6727 6725 size_t test_len, expect_len; 6726 + int off; 6727 + 6728 + off = btf__find_str(test_btf, expect_str_cur); 6729 + if (CHECK(off < 0, "exp str '%s' not found: %d\n", expect_str_cur, off)) { 6730 + err = -1; 6731 + goto done; 6732 + } 6733 + test_str_cur = btf__str_by_offset(test_btf, off); 6728 6734 6729 6735 test_len = strlen(test_str_cur); 6730 6736 expect_len = strlen(expect_str_cur); ··· 6747 6741 err = -1; 6748 6742 goto done; 6749 6743 } 6750 - test_str_cur += test_len + 1; 6751 6744 expect_str_cur += expect_len + 1; 6752 - } 6753 - if (CHECK(test_str_cur != test_str_end, 6754 - "test_str_cur:%p != test_str_end:%p", 6755 - test_str_cur, test_str_end)) { 6756 - err = -1; 6757 - goto done; 6758 6745 } 6759 6746 6760 6747 test_nr_types = btf__get_nr_types(test_btf); ··· 6774 6775 err = -1; 6775 6776 goto done; 6776 6777 } 6777 - if (CHECK(memcmp((void *)test_type, 6778 - (void *)expect_type, 6779 - test_size), 6780 - "type #%d: contents differ", i)) { 6778 + if (CHECK(btf_kind(test_type) != btf_kind(expect_type), 6779 + "type %d kind: exp %d != got %u\n", 6780 + i, btf_kind(expect_type), btf_kind(test_type))) { 6781 + err = -1; 6782 + goto done; 6783 + } 6784 + if (CHECK(test_type->info != expect_type->info, 6785 + "type %d info: exp %d != got %u\n", 6786 + i, expect_type->info, test_type->info)) { 6787 + err = -1; 6788 + goto done; 6789 + } 6790 + if (CHECK(test_type->size != expect_type->size, 6791 + "type %d size/type: exp %d != got %u\n", 6792 + i, expect_type->size, test_type->size)) { 6781 6793 err = -1; 6782 6794 goto done; 6783 6795 }

+325

tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + #include <test_progs.h> 4 + #include <bpf/btf.h> 5 + #include "btf_helpers.h" 6 + 7 + static void test_split_simple() { 8 + const struct btf_type *t; 9 + struct btf *btf1, *btf2; 10 + int str_off, err; 11 + 12 + btf1 = btf__new_empty(); 13 + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) 14 + return; 15 + 16 + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ 17 + 18 + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ 19 + btf__add_ptr(btf1, 1); /* [2] ptr to int */ 20 + btf__add_struct(btf1, "s1", 4); /* [3] struct s1 { */ 21 + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ 22 + /* } */ 23 + 24 + VALIDATE_RAW_BTF( 25 + btf1, 26 + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 27 + "[2] PTR '(anon)' type_id=1", 28 + "[3] STRUCT 's1' size=4 vlen=1\n" 29 + "\t'f1' type_id=1 bits_offset=0"); 30 + 31 + ASSERT_STREQ(btf_type_c_dump(btf1), "\ 32 + struct s1 {\n\ 33 + int f1;\n\ 34 + };\n\n", "c_dump"); 35 + 36 + btf2 = btf__new_empty_split(btf1); 37 + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) 38 + goto cleanup; 39 + 40 + /* pointer size should be "inherited" from main BTF */ 41 + ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz"); 42 + 43 + str_off = btf__find_str(btf2, "int"); 44 + ASSERT_NEQ(str_off, -ENOENT, "str_int_missing"); 45 + 46 + t = btf__type_by_id(btf2, 1); 47 + if (!ASSERT_OK_PTR(t, "int_type")) 48 + goto cleanup; 49 + ASSERT_EQ(btf_is_int(t), true, "int_kind"); 50 + ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name"); 51 + 52 + btf__add_struct(btf2, "s2", 16); /* [4] struct s2 { */ 53 + btf__add_field(btf2, "f1", 6, 0, 0); /* struct s1 f1; */ 54 + btf__add_field(btf2, "f2", 5, 32, 0); /* int f2; */ 55 + btf__add_field(btf2, "f3", 2, 64, 0); /* int *f3; */ 56 + /* } */ 57 + 58 + /* duplicated int */ 59 + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [5] int */ 60 + 61 + /* duplicated struct s1 */ 62 + btf__add_struct(btf2, "s1", 4); /* [6] struct s1 { */ 63 + btf__add_field(btf2, "f1", 5, 0, 0); /* int f1; */ 64 + /* } */ 65 + 66 + VALIDATE_RAW_BTF( 67 + btf2, 68 + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 69 + "[2] PTR '(anon)' type_id=1", 70 + "[3] STRUCT 's1' size=4 vlen=1\n" 71 + "\t'f1' type_id=1 bits_offset=0", 72 + "[4] STRUCT 's2' size=16 vlen=3\n" 73 + "\t'f1' type_id=6 bits_offset=0\n" 74 + "\t'f2' type_id=5 bits_offset=32\n" 75 + "\t'f3' type_id=2 bits_offset=64", 76 + "[5] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 77 + "[6] STRUCT 's1' size=4 vlen=1\n" 78 + "\t'f1' type_id=5 bits_offset=0"); 79 + 80 + ASSERT_STREQ(btf_type_c_dump(btf2), "\ 81 + struct s1 {\n\ 82 + int f1;\n\ 83 + };\n\ 84 + \n\ 85 + struct s1___2 {\n\ 86 + int f1;\n\ 87 + };\n\ 88 + \n\ 89 + struct s2 {\n\ 90 + struct s1___2 f1;\n\ 91 + int f2;\n\ 92 + int *f3;\n\ 93 + };\n\n", "c_dump"); 94 + 95 + err = btf__dedup(btf2, NULL, NULL); 96 + if (!ASSERT_OK(err, "btf_dedup")) 97 + goto cleanup; 98 + 99 + VALIDATE_RAW_BTF( 100 + btf2, 101 + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 102 + "[2] PTR '(anon)' type_id=1", 103 + "[3] STRUCT 's1' size=4 vlen=1\n" 104 + "\t'f1' type_id=1 bits_offset=0", 105 + "[4] STRUCT 's2' size=16 vlen=3\n" 106 + "\t'f1' type_id=3 bits_offset=0\n" 107 + "\t'f2' type_id=1 bits_offset=32\n" 108 + "\t'f3' type_id=2 bits_offset=64"); 109 + 110 + ASSERT_STREQ(btf_type_c_dump(btf2), "\ 111 + struct s1 {\n\ 112 + int f1;\n\ 113 + };\n\ 114 + \n\ 115 + struct s2 {\n\ 116 + struct s1 f1;\n\ 117 + int f2;\n\ 118 + int *f3;\n\ 119 + };\n\n", "c_dump"); 120 + 121 + cleanup: 122 + btf__free(btf2); 123 + btf__free(btf1); 124 + } 125 + 126 + static void test_split_fwd_resolve() { 127 + struct btf *btf1, *btf2; 128 + int err; 129 + 130 + btf1 = btf__new_empty(); 131 + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) 132 + return; 133 + 134 + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ 135 + 136 + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ 137 + btf__add_ptr(btf1, 4); /* [2] ptr to struct s1 */ 138 + btf__add_ptr(btf1, 5); /* [3] ptr to struct s2 */ 139 + btf__add_struct(btf1, "s1", 16); /* [4] struct s1 { */ 140 + btf__add_field(btf1, "f1", 2, 0, 0); /* struct s1 *f1; */ 141 + btf__add_field(btf1, "f2", 3, 64, 0); /* struct s2 *f2; */ 142 + /* } */ 143 + btf__add_struct(btf1, "s2", 4); /* [5] struct s2 { */ 144 + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ 145 + /* } */ 146 + 147 + VALIDATE_RAW_BTF( 148 + btf1, 149 + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 150 + "[2] PTR '(anon)' type_id=4", 151 + "[3] PTR '(anon)' type_id=5", 152 + "[4] STRUCT 's1' size=16 vlen=2\n" 153 + "\t'f1' type_id=2 bits_offset=0\n" 154 + "\t'f2' type_id=3 bits_offset=64", 155 + "[5] STRUCT 's2' size=4 vlen=1\n" 156 + "\t'f1' type_id=1 bits_offset=0"); 157 + 158 + btf2 = btf__new_empty_split(btf1); 159 + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) 160 + goto cleanup; 161 + 162 + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */ 163 + btf__add_ptr(btf2, 10); /* [7] ptr to struct s1 */ 164 + btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [8] fwd for struct s2 */ 165 + btf__add_ptr(btf2, 8); /* [9] ptr to fwd struct s2 */ 166 + btf__add_struct(btf2, "s1", 16); /* [10] struct s1 { */ 167 + btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */ 168 + btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */ 169 + /* } */ 170 + 171 + VALIDATE_RAW_BTF( 172 + btf2, 173 + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 174 + "[2] PTR '(anon)' type_id=4", 175 + "[3] PTR '(anon)' type_id=5", 176 + "[4] STRUCT 's1' size=16 vlen=2\n" 177 + "\t'f1' type_id=2 bits_offset=0\n" 178 + "\t'f2' type_id=3 bits_offset=64", 179 + "[5] STRUCT 's2' size=4 vlen=1\n" 180 + "\t'f1' type_id=1 bits_offset=0", 181 + "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 182 + "[7] PTR '(anon)' type_id=10", 183 + "[8] FWD 's2' fwd_kind=struct", 184 + "[9] PTR '(anon)' type_id=8", 185 + "[10] STRUCT 's1' size=16 vlen=2\n" 186 + "\t'f1' type_id=7 bits_offset=0\n" 187 + "\t'f2' type_id=9 bits_offset=64"); 188 + 189 + err = btf__dedup(btf2, NULL, NULL); 190 + if (!ASSERT_OK(err, "btf_dedup")) 191 + goto cleanup; 192 + 193 + VALIDATE_RAW_BTF( 194 + btf2, 195 + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 196 + "[2] PTR '(anon)' type_id=4", 197 + "[3] PTR '(anon)' type_id=5", 198 + "[4] STRUCT 's1' size=16 vlen=2\n" 199 + "\t'f1' type_id=2 bits_offset=0\n" 200 + "\t'f2' type_id=3 bits_offset=64", 201 + "[5] STRUCT 's2' size=4 vlen=1\n" 202 + "\t'f1' type_id=1 bits_offset=0"); 203 + 204 + cleanup: 205 + btf__free(btf2); 206 + btf__free(btf1); 207 + } 208 + 209 + static void test_split_struct_duped() { 210 + struct btf *btf1, *btf2; 211 + int err; 212 + 213 + btf1 = btf__new_empty(); 214 + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) 215 + return; 216 + 217 + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ 218 + 219 + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ 220 + btf__add_ptr(btf1, 5); /* [2] ptr to struct s1 */ 221 + btf__add_fwd(btf1, "s2", BTF_FWD_STRUCT); /* [3] fwd for struct s2 */ 222 + btf__add_ptr(btf1, 3); /* [4] ptr to fwd struct s2 */ 223 + btf__add_struct(btf1, "s1", 16); /* [5] struct s1 { */ 224 + btf__add_field(btf1, "f1", 2, 0, 0); /* struct s1 *f1; */ 225 + btf__add_field(btf1, "f2", 4, 64, 0); /* struct s2 *f2; */ 226 + /* } */ 227 + 228 + VALIDATE_RAW_BTF( 229 + btf1, 230 + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 231 + "[2] PTR '(anon)' type_id=5", 232 + "[3] FWD 's2' fwd_kind=struct", 233 + "[4] PTR '(anon)' type_id=3", 234 + "[5] STRUCT 's1' size=16 vlen=2\n" 235 + "\t'f1' type_id=2 bits_offset=0\n" 236 + "\t'f2' type_id=4 bits_offset=64"); 237 + 238 + btf2 = btf__new_empty_split(btf1); 239 + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) 240 + goto cleanup; 241 + 242 + btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */ 243 + btf__add_ptr(btf2, 10); /* [7] ptr to struct s1 */ 244 + btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [8] fwd for struct s2 */ 245 + btf__add_ptr(btf2, 11); /* [9] ptr to struct s2 */ 246 + btf__add_struct(btf2, "s1", 16); /* [10] struct s1 { */ 247 + btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */ 248 + btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */ 249 + /* } */ 250 + btf__add_struct(btf2, "s2", 40); /* [11] struct s2 { */ 251 + btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */ 252 + btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */ 253 + btf__add_field(btf2, "f3", 6, 128, 0); /* int f3; */ 254 + btf__add_field(btf2, "f4", 10, 192, 0); /* struct s1 f4; */ 255 + /* } */ 256 + btf__add_ptr(btf2, 8); /* [12] ptr to fwd struct s2 */ 257 + btf__add_struct(btf2, "s3", 8); /* [13] struct s3 { */ 258 + btf__add_field(btf2, "f1", 12, 0, 0); /* struct s2 *f1; (fwd) */ 259 + /* } */ 260 + 261 + VALIDATE_RAW_BTF( 262 + btf2, 263 + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 264 + "[2] PTR '(anon)' type_id=5", 265 + "[3] FWD 's2' fwd_kind=struct", 266 + "[4] PTR '(anon)' type_id=3", 267 + "[5] STRUCT 's1' size=16 vlen=2\n" 268 + "\t'f1' type_id=2 bits_offset=0\n" 269 + "\t'f2' type_id=4 bits_offset=64", 270 + "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 271 + "[7] PTR '(anon)' type_id=10", 272 + "[8] FWD 's2' fwd_kind=struct", 273 + "[9] PTR '(anon)' type_id=11", 274 + "[10] STRUCT 's1' size=16 vlen=2\n" 275 + "\t'f1' type_id=7 bits_offset=0\n" 276 + "\t'f2' type_id=9 bits_offset=64", 277 + "[11] STRUCT 's2' size=40 vlen=4\n" 278 + "\t'f1' type_id=7 bits_offset=0\n" 279 + "\t'f2' type_id=9 bits_offset=64\n" 280 + "\t'f3' type_id=6 bits_offset=128\n" 281 + "\t'f4' type_id=10 bits_offset=192", 282 + "[12] PTR '(anon)' type_id=8", 283 + "[13] STRUCT 's3' size=8 vlen=1\n" 284 + "\t'f1' type_id=12 bits_offset=0"); 285 + 286 + err = btf__dedup(btf2, NULL, NULL); 287 + if (!ASSERT_OK(err, "btf_dedup")) 288 + goto cleanup; 289 + 290 + VALIDATE_RAW_BTF( 291 + btf2, 292 + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", 293 + "[2] PTR '(anon)' type_id=5", 294 + "[3] FWD 's2' fwd_kind=struct", 295 + "[4] PTR '(anon)' type_id=3", 296 + "[5] STRUCT 's1' size=16 vlen=2\n" 297 + "\t'f1' type_id=2 bits_offset=0\n" 298 + "\t'f2' type_id=4 bits_offset=64", 299 + "[6] PTR '(anon)' type_id=8", 300 + "[7] PTR '(anon)' type_id=9", 301 + "[8] STRUCT 's1' size=16 vlen=2\n" 302 + "\t'f1' type_id=6 bits_offset=0\n" 303 + "\t'f2' type_id=7 bits_offset=64", 304 + "[9] STRUCT 's2' size=40 vlen=4\n" 305 + "\t'f1' type_id=6 bits_offset=0\n" 306 + "\t'f2' type_id=7 bits_offset=64\n" 307 + "\t'f3' type_id=1 bits_offset=128\n" 308 + "\t'f4' type_id=8 bits_offset=192", 309 + "[10] STRUCT 's3' size=8 vlen=1\n" 310 + "\t'f1' type_id=7 bits_offset=0"); 311 + 312 + cleanup: 313 + btf__free(btf2); 314 + btf__free(btf1); 315 + } 316 + 317 + void test_btf_dedup_split() 318 + { 319 + if (test__start_subtest("split_simple")) 320 + test_split_simple(); 321 + if (test__start_subtest("split_struct_duped")) 322 + test_split_struct_duped(); 323 + if (test__start_subtest("split_fwd_resolve")) 324 + test_split_fwd_resolve(); 325 + }

+1 -1

tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c

··· 17 17 #include "test_btf_skc_cls_ingress.skel.h" 18 18 19 19 static struct test_btf_skc_cls_ingress *skel; 20 - struct sockaddr_in6 srv_sa6; 20 + static struct sockaddr_in6 srv_sa6; 21 21 static __u32 duration; 22 22 23 23 #define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"

+99

tools/testing/selftests/bpf/prog_tests/btf_split.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + #include <test_progs.h> 4 + #include <bpf/btf.h> 5 + 6 + static char *dump_buf; 7 + static size_t dump_buf_sz; 8 + static FILE *dump_buf_file; 9 + 10 + static void btf_dump_printf(void *ctx, const char *fmt, va_list args) 11 + { 12 + vfprintf(ctx, fmt, args); 13 + } 14 + 15 + void test_btf_split() { 16 + struct btf_dump_opts opts; 17 + struct btf_dump *d = NULL; 18 + const struct btf_type *t; 19 + struct btf *btf1, *btf2; 20 + int str_off, i, err; 21 + 22 + btf1 = btf__new_empty(); 23 + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) 24 + return; 25 + 26 + btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */ 27 + 28 + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ 29 + btf__add_ptr(btf1, 1); /* [2] ptr to int */ 30 + 31 + btf__add_struct(btf1, "s1", 4); /* [3] struct s1 { */ 32 + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ 33 + /* } */ 34 + 35 + btf2 = btf__new_empty_split(btf1); 36 + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) 37 + goto cleanup; 38 + 39 + /* pointer size should be "inherited" from main BTF */ 40 + ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz"); 41 + 42 + str_off = btf__find_str(btf2, "int"); 43 + ASSERT_NEQ(str_off, -ENOENT, "str_int_missing"); 44 + 45 + t = btf__type_by_id(btf2, 1); 46 + if (!ASSERT_OK_PTR(t, "int_type")) 47 + goto cleanup; 48 + ASSERT_EQ(btf_is_int(t), true, "int_kind"); 49 + ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name"); 50 + 51 + btf__add_struct(btf2, "s2", 16); /* [4] struct s2 { */ 52 + btf__add_field(btf2, "f1", 3, 0, 0); /* struct s1 f1; */ 53 + btf__add_field(btf2, "f2", 1, 32, 0); /* int f2; */ 54 + btf__add_field(btf2, "f3", 2, 64, 0); /* int *f3; */ 55 + /* } */ 56 + 57 + t = btf__type_by_id(btf1, 4); 58 + ASSERT_NULL(t, "split_type_in_main"); 59 + 60 + t = btf__type_by_id(btf2, 4); 61 + if (!ASSERT_OK_PTR(t, "split_struct_type")) 62 + goto cleanup; 63 + ASSERT_EQ(btf_is_struct(t), true, "split_struct_kind"); 64 + ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen"); 65 + ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name"); 66 + 67 + /* BTF-to-C dump of split BTF */ 68 + dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); 69 + if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) 70 + return; 71 + opts.ctx = dump_buf_file; 72 + d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf); 73 + if (!ASSERT_OK_PTR(d, "btf_dump__new")) 74 + goto cleanup; 75 + for (i = 1; i <= btf__get_nr_types(btf2); i++) { 76 + err = btf_dump__dump_type(d, i); 77 + ASSERT_OK(err, "dump_type_ok"); 78 + } 79 + fflush(dump_buf_file); 80 + dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ 81 + ASSERT_STREQ(dump_buf, 82 + "struct s1 {\n" 83 + " int f1;\n" 84 + "};\n" 85 + "\n" 86 + "struct s2 {\n" 87 + " struct s1 f1;\n" 88 + " int f2;\n" 89 + " int *f3;\n" 90 + "};\n\n", "c_dump"); 91 + 92 + cleanup: 93 + if (dump_buf_file) 94 + fclose(dump_buf_file); 95 + free(dump_buf); 96 + btf_dump__free(d); 97 + btf__free(btf1); 98 + btf__free(btf2); 99 + }

+43

tools/testing/selftests/bpf/prog_tests/btf_write.c

··· 2 2 /* Copyright (c) 2020 Facebook */ 3 3 #include <test_progs.h> 4 4 #include <bpf/btf.h> 5 + #include "btf_helpers.h" 5 6 6 7 static int duration = 0; 7 8 ··· 40 39 ASSERT_EQ(t->size, 4, "int_sz"); 41 40 ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc"); 42 41 ASSERT_EQ(btf_int_bits(t), 32, "int_bits"); 42 + ASSERT_STREQ(btf_type_raw_dump(btf, 1), 43 + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", "raw_dump"); 43 44 44 45 /* invalid int size */ 45 46 id = btf__add_int(btf, "bad sz int", 7, 0); ··· 62 59 t = btf__type_by_id(btf, 2); 63 60 ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind"); 64 61 ASSERT_EQ(t->type, 1, "ptr_type"); 62 + ASSERT_STREQ(btf_type_raw_dump(btf, 2), 63 + "[2] PTR '(anon)' type_id=1", "raw_dump"); 65 64 66 65 id = btf__add_const(btf, 5); /* points forward to restrict */ 67 66 ASSERT_EQ(id, 3, "const_id"); 68 67 t = btf__type_by_id(btf, 3); 69 68 ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind"); 70 69 ASSERT_EQ(t->type, 5, "const_type"); 70 + ASSERT_STREQ(btf_type_raw_dump(btf, 3), 71 + "[3] CONST '(anon)' type_id=5", "raw_dump"); 71 72 72 73 id = btf__add_volatile(btf, 3); 73 74 ASSERT_EQ(id, 4, "volatile_id"); 74 75 t = btf__type_by_id(btf, 4); 75 76 ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind"); 76 77 ASSERT_EQ(t->type, 3, "volatile_type"); 78 + ASSERT_STREQ(btf_type_raw_dump(btf, 4), 79 + "[4] VOLATILE '(anon)' type_id=3", "raw_dump"); 77 80 78 81 id = btf__add_restrict(btf, 4); 79 82 ASSERT_EQ(id, 5, "restrict_id"); 80 83 t = btf__type_by_id(btf, 5); 81 84 ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind"); 82 85 ASSERT_EQ(t->type, 4, "restrict_type"); 86 + ASSERT_STREQ(btf_type_raw_dump(btf, 5), 87 + "[5] RESTRICT '(anon)' type_id=4", "raw_dump"); 83 88 84 89 /* ARRAY */ 85 90 id = btf__add_array(btf, 1, 2, 10); /* int *[10] */ ··· 97 86 ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type"); 98 87 ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type"); 99 88 ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems"); 89 + ASSERT_STREQ(btf_type_raw_dump(btf, 6), 90 + "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", "raw_dump"); 100 91 101 92 /* STRUCT */ 102 93 err = btf__add_field(btf, "field", 1, 0, 0); ··· 126 113 ASSERT_EQ(m->type, 1, "f2_type"); 127 114 ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off"); 128 115 ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz"); 116 + ASSERT_STREQ(btf_type_raw_dump(btf, 7), 117 + "[7] STRUCT 's1' size=8 vlen=2\n" 118 + "\t'f1' type_id=1 bits_offset=0\n" 119 + "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "raw_dump"); 129 120 130 121 /* UNION */ 131 122 id = btf__add_union(btf, "u1", 8); ··· 153 136 ASSERT_EQ(m->type, 1, "f1_type"); 154 137 ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off"); 155 138 ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz"); 139 + ASSERT_STREQ(btf_type_raw_dump(btf, 8), 140 + "[8] UNION 'u1' size=8 vlen=1\n" 141 + "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", "raw_dump"); 156 142 157 143 /* ENUM */ 158 144 id = btf__add_enum(btf, "e1", 4); ··· 176 156 v = btf_enum(t) + 1; 177 157 ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name"); 178 158 ASSERT_EQ(v->val, 2, "v2_val"); 159 + ASSERT_STREQ(btf_type_raw_dump(btf, 9), 160 + "[9] ENUM 'e1' size=4 vlen=2\n" 161 + "\t'v1' val=1\n" 162 + "\t'v2' val=2", "raw_dump"); 179 163 180 164 /* FWDs */ 181 165 id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT); ··· 188 164 ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name"); 189 165 ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); 190 166 ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag"); 167 + ASSERT_STREQ(btf_type_raw_dump(btf, 10), 168 + "[10] FWD 'struct_fwd' fwd_kind=struct", "raw_dump"); 191 169 192 170 id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION); 193 171 ASSERT_EQ(id, 11, "union_fwd_id"); ··· 197 171 ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name"); 198 172 ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); 199 173 ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag"); 174 + ASSERT_STREQ(btf_type_raw_dump(btf, 11), 175 + "[11] FWD 'union_fwd' fwd_kind=union", "raw_dump"); 200 176 201 177 id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM); 202 178 ASSERT_EQ(id, 12, "enum_fwd_id"); ··· 207 179 ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind"); 208 180 ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind"); 209 181 ASSERT_EQ(t->size, 4, "enum_fwd_sz"); 182 + ASSERT_STREQ(btf_type_raw_dump(btf, 12), 183 + "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump"); 210 184 211 185 /* TYPEDEF */ 212 186 id = btf__add_typedef(btf, "typedef1", 1); ··· 217 187 ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name"); 218 188 ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind"); 219 189 ASSERT_EQ(t->type, 1, "typedef_type"); 190 + ASSERT_STREQ(btf_type_raw_dump(btf, 13), 191 + "[13] TYPEDEF 'typedef1' type_id=1", "raw_dump"); 220 192 221 193 /* FUNC & FUNC_PROTO */ 222 194 id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15); ··· 228 196 ASSERT_EQ(t->type, 15, "func_type"); 229 197 ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind"); 230 198 ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen"); 199 + ASSERT_STREQ(btf_type_raw_dump(btf, 14), 200 + "[14] FUNC 'func1' type_id=15 linkage=global", "raw_dump"); 231 201 232 202 id = btf__add_func_proto(btf, 1); 233 203 ASSERT_EQ(id, 15, "func_proto_id"); ··· 248 214 p = btf_params(t) + 1; 249 215 ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name"); 250 216 ASSERT_EQ(p->type, 2, "p2_type"); 217 + ASSERT_STREQ(btf_type_raw_dump(btf, 15), 218 + "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" 219 + "\t'p1' type_id=1\n" 220 + "\t'p2' type_id=2", "raw_dump"); 251 221 252 222 /* VAR */ 253 223 id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1); ··· 261 223 ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind"); 262 224 ASSERT_EQ(t->type, 1, "var_type"); 263 225 ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type"); 226 + ASSERT_STREQ(btf_type_raw_dump(btf, 16), 227 + "[16] VAR 'var1' type_id=1, linkage=global-alloc", "raw_dump"); 264 228 265 229 /* DATASECT */ 266 230 id = btf__add_datasec(btf, "datasec1", 12); ··· 279 239 ASSERT_EQ(vi->type, 1, "v1_type"); 280 240 ASSERT_EQ(vi->offset, 4, "v1_off"); 281 241 ASSERT_EQ(vi->size, 8, "v1_sz"); 242 + ASSERT_STREQ(btf_type_raw_dump(btf, 17), 243 + "[17] DATASEC 'datasec1' size=12 vlen=1\n" 244 + "\ttype_id=1 offset=4 size=8", "raw_dump"); 282 245 283 246 btf__free(btf); 284 247 }

+43

tools/testing/selftests/bpf/prog_tests/hash_large_key.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <test_progs.h> 4 + #include "test_hash_large_key.skel.h" 5 + 6 + void test_hash_large_key(void) 7 + { 8 + int err, value = 21, duration = 0, hash_map_fd; 9 + struct test_hash_large_key *skel; 10 + 11 + struct bigelement { 12 + int a; 13 + char b[4096]; 14 + long long c; 15 + } key; 16 + bzero(&key, sizeof(key)); 17 + 18 + skel = test_hash_large_key__open_and_load(); 19 + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) 20 + return; 21 + 22 + hash_map_fd = bpf_map__fd(skel->maps.hash_map); 23 + if (CHECK(hash_map_fd < 0, "bpf_map__fd", "failed\n")) 24 + goto cleanup; 25 + 26 + err = test_hash_large_key__attach(skel); 27 + if (CHECK(err, "attach_raw_tp", "err %d\n", err)) 28 + goto cleanup; 29 + 30 + err = bpf_map_update_elem(hash_map_fd, &key, &value, BPF_ANY); 31 + if (CHECK(err, "bpf_map_update_elem", "errno=%d\n", errno)) 32 + goto cleanup; 33 + 34 + key.c = 1; 35 + err = bpf_map_lookup_elem(hash_map_fd, &key, &value); 36 + if (CHECK(err, "bpf_map_lookup_elem", "errno=%d\n", errno)) 37 + goto cleanup; 38 + 39 + CHECK_FAIL(value != 42); 40 + 41 + cleanup: 42 + test_hash_large_key__destroy(skel); 43 + }

+135

tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + 4 + #include <sys/types.h> 5 + #include <bpf/bpf.h> 6 + #include <bpf/libbpf.h> 7 + #include "test_progs.h" 8 + #include "network_helpers.h" 9 + #include "test_sk_storage_trace_itself.skel.h" 10 + #include "test_sk_storage_tracing.skel.h" 11 + 12 + #define LO_ADDR6 "::1" 13 + #define TEST_COMM "test_progs" 14 + 15 + struct sk_stg { 16 + __u32 pid; 17 + __u32 last_notclose_state; 18 + char comm[16]; 19 + }; 20 + 21 + static struct test_sk_storage_tracing *skel; 22 + static __u32 duration; 23 + static pid_t my_pid; 24 + 25 + static int check_sk_stg(int sk_fd, __u32 expected_state) 26 + { 27 + struct sk_stg sk_stg; 28 + int err; 29 + 30 + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_stg_map), &sk_fd, 31 + &sk_stg); 32 + if (!ASSERT_OK(err, "map_lookup(sk_stg_map)")) 33 + return -1; 34 + 35 + if (!ASSERT_EQ(sk_stg.last_notclose_state, expected_state, 36 + "last_notclose_state")) 37 + return -1; 38 + 39 + if (!ASSERT_EQ(sk_stg.pid, my_pid, "pid")) 40 + return -1; 41 + 42 + if (!ASSERT_STREQ(sk_stg.comm, skel->bss->task_comm, "task_comm")) 43 + return -1; 44 + 45 + return 0; 46 + } 47 + 48 + static void do_test(void) 49 + { 50 + int listen_fd = -1, passive_fd = -1, active_fd = -1, value = 1, err; 51 + char abyte; 52 + 53 + listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0); 54 + if (CHECK(listen_fd == -1, "start_server", 55 + "listen_fd:%d errno:%d\n", listen_fd, errno)) 56 + return; 57 + 58 + active_fd = connect_to_fd(listen_fd, 0); 59 + if (CHECK(active_fd == -1, "connect_to_fd", "active_fd:%d errno:%d\n", 60 + active_fd, errno)) 61 + goto out; 62 + 63 + err = bpf_map_update_elem(bpf_map__fd(skel->maps.del_sk_stg_map), 64 + &active_fd, &value, 0); 65 + if (!ASSERT_OK(err, "map_update(del_sk_stg_map)")) 66 + goto out; 67 + 68 + passive_fd = accept(listen_fd, NULL, 0); 69 + if (CHECK(passive_fd == -1, "accept", "passive_fd:%d errno:%d\n", 70 + passive_fd, errno)) 71 + goto out; 72 + 73 + shutdown(active_fd, SHUT_WR); 74 + err = read(passive_fd, &abyte, 1); 75 + if (!ASSERT_OK(err, "read(passive_fd)")) 76 + goto out; 77 + 78 + shutdown(passive_fd, SHUT_WR); 79 + err = read(active_fd, &abyte, 1); 80 + if (!ASSERT_OK(err, "read(active_fd)")) 81 + goto out; 82 + 83 + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.del_sk_stg_map), 84 + &active_fd, &value); 85 + if (!ASSERT_ERR(err, "map_lookup(del_sk_stg_map)")) 86 + goto out; 87 + 88 + err = check_sk_stg(listen_fd, BPF_TCP_LISTEN); 89 + if (!ASSERT_OK(err, "listen_fd sk_stg")) 90 + goto out; 91 + 92 + err = check_sk_stg(active_fd, BPF_TCP_FIN_WAIT2); 93 + if (!ASSERT_OK(err, "active_fd sk_stg")) 94 + goto out; 95 + 96 + err = check_sk_stg(passive_fd, BPF_TCP_LAST_ACK); 97 + ASSERT_OK(err, "passive_fd sk_stg"); 98 + 99 + out: 100 + if (active_fd != -1) 101 + close(active_fd); 102 + if (passive_fd != -1) 103 + close(passive_fd); 104 + if (listen_fd != -1) 105 + close(listen_fd); 106 + } 107 + 108 + void test_sk_storage_tracing(void) 109 + { 110 + struct test_sk_storage_trace_itself *skel_itself; 111 + int err; 112 + 113 + my_pid = getpid(); 114 + 115 + skel_itself = test_sk_storage_trace_itself__open_and_load(); 116 + 117 + if (!ASSERT_NULL(skel_itself, "test_sk_storage_trace_itself")) { 118 + test_sk_storage_trace_itself__destroy(skel_itself); 119 + return; 120 + } 121 + 122 + skel = test_sk_storage_tracing__open_and_load(); 123 + if (!ASSERT_OK_PTR(skel, "test_sk_storage_tracing")) 124 + return; 125 + 126 + err = test_sk_storage_tracing__attach(skel); 127 + if (!ASSERT_OK(err, "test_sk_storage_tracing__attach")) { 128 + test_sk_storage_tracing__destroy(skel); 129 + return; 130 + } 131 + 132 + do_test(); 133 + 134 + test_sk_storage_tracing__destroy(skel); 135 + }

+6 -6

tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c

··· 18 18 #define LO_ADDR6 "::1" 19 19 #define CG_NAME "/tcpbpf-hdr-opt-test" 20 20 21 - struct bpf_test_option exp_passive_estab_in; 22 - struct bpf_test_option exp_active_estab_in; 23 - struct bpf_test_option exp_passive_fin_in; 24 - struct bpf_test_option exp_active_fin_in; 25 - struct hdr_stg exp_passive_hdr_stg; 26 - struct hdr_stg exp_active_hdr_stg = { .active = true, }; 21 + static struct bpf_test_option exp_passive_estab_in; 22 + static struct bpf_test_option exp_active_estab_in; 23 + static struct bpf_test_option exp_passive_fin_in; 24 + static struct bpf_test_option exp_active_fin_in; 25 + static struct hdr_stg exp_passive_hdr_stg; 26 + static struct hdr_stg exp_active_hdr_stg = { .active = true, }; 27 27 28 28 static struct test_misc_tcp_hdr_options *misc_skel; 29 29 static struct test_tcp_hdr_options *skel;

+141

tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <test_progs.h> 3 + #include <network_helpers.h> 4 + 5 + #include "test_tcpbpf.h" 6 + #include "test_tcpbpf_kern.skel.h" 7 + 8 + #define LO_ADDR6 "::1" 9 + #define CG_NAME "/tcpbpf-user-test" 10 + 11 + static __u32 duration; 12 + 13 + static void verify_result(struct tcpbpf_globals *result) 14 + { 15 + __u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | 16 + (1 << BPF_SOCK_OPS_RWND_INIT) | 17 + (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) | 18 + (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | 19 + (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | 20 + (1 << BPF_SOCK_OPS_NEEDS_ECN) | 21 + (1 << BPF_SOCK_OPS_STATE_CB) | 22 + (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); 23 + 24 + /* check global map */ 25 + CHECK(expected_events != result->event_map, "event_map", 26 + "unexpected event_map: actual 0x%08x != expected 0x%08x\n", 27 + result->event_map, expected_events); 28 + 29 + ASSERT_EQ(result->bytes_received, 501, "bytes_received"); 30 + ASSERT_EQ(result->bytes_acked, 1002, "bytes_acked"); 31 + ASSERT_EQ(result->data_segs_in, 1, "data_segs_in"); 32 + ASSERT_EQ(result->data_segs_out, 1, "data_segs_out"); 33 + ASSERT_EQ(result->bad_cb_test_rv, 0x80, "bad_cb_test_rv"); 34 + ASSERT_EQ(result->good_cb_test_rv, 0, "good_cb_test_rv"); 35 + ASSERT_EQ(result->num_listen, 1, "num_listen"); 36 + 37 + /* 3 comes from one listening socket + both ends of the connection */ 38 + ASSERT_EQ(result->num_close_events, 3, "num_close_events"); 39 + 40 + /* check setsockopt for SAVE_SYN */ 41 + ASSERT_EQ(result->tcp_save_syn, 0, "tcp_save_syn"); 42 + 43 + /* check getsockopt for SAVED_SYN */ 44 + ASSERT_EQ(result->tcp_saved_syn, 1, "tcp_saved_syn"); 45 + } 46 + 47 + static void run_test(struct tcpbpf_globals *result) 48 + { 49 + int listen_fd = -1, cli_fd = -1, accept_fd = -1; 50 + char buf[1000]; 51 + int err = -1; 52 + int i, rv; 53 + 54 + listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0); 55 + if (CHECK(listen_fd == -1, "start_server", "listen_fd:%d errno:%d\n", 56 + listen_fd, errno)) 57 + goto done; 58 + 59 + cli_fd = connect_to_fd(listen_fd, 0); 60 + if (CHECK(cli_fd == -1, "connect_to_fd(listen_fd)", 61 + "cli_fd:%d errno:%d\n", cli_fd, errno)) 62 + goto done; 63 + 64 + accept_fd = accept(listen_fd, NULL, NULL); 65 + if (CHECK(accept_fd == -1, "accept(listen_fd)", 66 + "accept_fd:%d errno:%d\n", accept_fd, errno)) 67 + goto done; 68 + 69 + /* Send 1000B of '+'s from cli_fd -> accept_fd */ 70 + for (i = 0; i < 1000; i++) 71 + buf[i] = '+'; 72 + 73 + rv = send(cli_fd, buf, 1000, 0); 74 + if (CHECK(rv != 1000, "send(cli_fd)", "rv:%d errno:%d\n", rv, errno)) 75 + goto done; 76 + 77 + rv = recv(accept_fd, buf, 1000, 0); 78 + if (CHECK(rv != 1000, "recv(accept_fd)", "rv:%d errno:%d\n", rv, errno)) 79 + goto done; 80 + 81 + /* Send 500B of '.'s from accept_fd ->cli_fd */ 82 + for (i = 0; i < 500; i++) 83 + buf[i] = '.'; 84 + 85 + rv = send(accept_fd, buf, 500, 0); 86 + if (CHECK(rv != 500, "send(accept_fd)", "rv:%d errno:%d\n", rv, errno)) 87 + goto done; 88 + 89 + rv = recv(cli_fd, buf, 500, 0); 90 + if (CHECK(rv != 500, "recv(cli_fd)", "rv:%d errno:%d\n", rv, errno)) 91 + goto done; 92 + 93 + /* 94 + * shutdown accept first to guarantee correct ordering for 95 + * bytes_received and bytes_acked when we go to verify the results. 96 + */ 97 + shutdown(accept_fd, SHUT_WR); 98 + err = recv(cli_fd, buf, 1, 0); 99 + if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", err, errno)) 100 + goto done; 101 + 102 + shutdown(cli_fd, SHUT_WR); 103 + err = recv(accept_fd, buf, 1, 0); 104 + CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", err, errno); 105 + done: 106 + if (accept_fd != -1) 107 + close(accept_fd); 108 + if (cli_fd != -1) 109 + close(cli_fd); 110 + if (listen_fd != -1) 111 + close(listen_fd); 112 + 113 + if (!err) 114 + verify_result(result); 115 + } 116 + 117 + void test_tcpbpf_user(void) 118 + { 119 + struct test_tcpbpf_kern *skel; 120 + int cg_fd = -1; 121 + 122 + skel = test_tcpbpf_kern__open_and_load(); 123 + if (CHECK(!skel, "open and load skel", "failed")) 124 + return; 125 + 126 + cg_fd = test__join_cgroup(CG_NAME); 127 + if (CHECK(cg_fd < 0, "test__join_cgroup(" CG_NAME ")", 128 + "cg_fd:%d errno:%d", cg_fd, errno)) 129 + goto err; 130 + 131 + skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd); 132 + if (!ASSERT_OK_PTR(skel->links.bpf_testcb, "attach_cgroup(bpf_testcb)")) 133 + goto err; 134 + 135 + run_test(&skel->bss->global); 136 + 137 + err: 138 + if (cg_fd != -1) 139 + close(cg_fd); 140 + test_tcpbpf_kern__destroy(skel); 141 + }

+190 -14

tools/testing/selftests/bpf/prog_tests/test_local_storage.c

··· 4 4 * Copyright (C) 2020 Google LLC. 5 5 */ 6 6 7 + #include <asm-generic/errno-base.h> 8 + #include <sys/stat.h> 7 9 #include <test_progs.h> 8 10 #include <linux/limits.h> 9 11 10 12 #include "local_storage.skel.h" 11 13 #include "network_helpers.h" 12 14 13 - int create_and_unlink_file(void) 15 + #ifndef __NR_pidfd_open 16 + #define __NR_pidfd_open 434 17 + #endif 18 + 19 + static inline int sys_pidfd_open(pid_t pid, unsigned int flags) 14 20 { 15 - char fname[PATH_MAX] = "/tmp/fileXXXXXX"; 16 - int fd; 21 + return syscall(__NR_pidfd_open, pid, flags); 22 + } 17 23 18 - fd = mkstemp(fname); 19 - if (fd < 0) 20 - return fd; 24 + static inline ssize_t copy_file_range(int fd_in, loff_t *off_in, int fd_out, 25 + loff_t *off_out, size_t len, 26 + unsigned int flags) 27 + { 28 + return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out, 29 + len, flags); 30 + } 21 31 22 - close(fd); 23 - unlink(fname); 24 - return 0; 32 + static unsigned int duration; 33 + 34 + #define TEST_STORAGE_VALUE 0xbeefdead 35 + 36 + struct storage { 37 + void *inode; 38 + unsigned int value; 39 + /* Lock ensures that spin locked versions of local stoage operations 40 + * also work, most operations in this tests are still single threaded 41 + */ 42 + struct bpf_spin_lock lock; 43 + }; 44 + 45 + /* Copies an rm binary to a temp file. dest is a mkstemp template */ 46 + static int copy_rm(char *dest) 47 + { 48 + int fd_in, fd_out = -1, ret = 0; 49 + struct stat stat; 50 + 51 + fd_in = open("/bin/rm", O_RDONLY); 52 + if (fd_in < 0) 53 + return -errno; 54 + 55 + fd_out = mkstemp(dest); 56 + if (fd_out < 0) { 57 + ret = -errno; 58 + goto out; 59 + } 60 + 61 + ret = fstat(fd_in, &stat); 62 + if (ret == -1) { 63 + ret = -errno; 64 + goto out; 65 + } 66 + 67 + ret = copy_file_range(fd_in, NULL, fd_out, NULL, stat.st_size, 0); 68 + if (ret == -1) { 69 + ret = -errno; 70 + goto out; 71 + } 72 + 73 + /* Set executable permission on the copied file */ 74 + ret = chmod(dest, 0100); 75 + if (ret == -1) 76 + ret = -errno; 77 + 78 + out: 79 + close(fd_in); 80 + close(fd_out); 81 + return ret; 82 + } 83 + 84 + /* Fork and exec the provided rm binary and return the exit code of the 85 + * forked process and its pid. 86 + */ 87 + static int run_self_unlink(int *monitored_pid, const char *rm_path) 88 + { 89 + int child_pid, child_status, ret; 90 + int null_fd; 91 + 92 + child_pid = fork(); 93 + if (child_pid == 0) { 94 + null_fd = open("/dev/null", O_WRONLY); 95 + dup2(null_fd, STDOUT_FILENO); 96 + dup2(null_fd, STDERR_FILENO); 97 + close(null_fd); 98 + 99 + *monitored_pid = getpid(); 100 + /* Use the copied /usr/bin/rm to delete itself 101 + * /tmp/copy_of_rm /tmp/copy_of_rm. 102 + */ 103 + ret = execlp(rm_path, rm_path, rm_path, NULL); 104 + if (ret) 105 + exit(errno); 106 + } else if (child_pid > 0) { 107 + waitpid(child_pid, &child_status, 0); 108 + return WEXITSTATUS(child_status); 109 + } 110 + 111 + return -EINVAL; 112 + } 113 + 114 + static bool check_syscall_operations(int map_fd, int obj_fd) 115 + { 116 + struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } }, 117 + lookup_val = { .value = 0, .lock = { 0 } }; 118 + int err; 119 + 120 + /* Looking up an existing element should fail initially */ 121 + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 122 + BPF_F_LOCK); 123 + if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", 124 + "err:%d errno:%d\n", err, errno)) 125 + return false; 126 + 127 + /* Create a new element */ 128 + err = bpf_map_update_elem(map_fd, &obj_fd, &val, 129 + BPF_NOEXIST | BPF_F_LOCK); 130 + if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err, 131 + errno)) 132 + return false; 133 + 134 + /* Lookup the newly created element */ 135 + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 136 + BPF_F_LOCK); 137 + if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err, 138 + errno)) 139 + return false; 140 + 141 + /* Check the value of the newly created element */ 142 + if (CHECK(lookup_val.value != val.value, "bpf_map_lookup_elem", 143 + "value got = %x errno:%d", lookup_val.value, val.value)) 144 + return false; 145 + 146 + err = bpf_map_delete_elem(map_fd, &obj_fd); 147 + if (CHECK(err, "bpf_map_delete_elem()", "err:%d errno:%d\n", err, 148 + errno)) 149 + return false; 150 + 151 + /* The lookup should fail, now that the element has been deleted */ 152 + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 153 + BPF_F_LOCK); 154 + if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", 155 + "err:%d errno:%d\n", err, errno)) 156 + return false; 157 + 158 + return true; 25 159 } 26 160 27 161 void test_test_local_storage(void) 28 162 { 163 + char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX"; 164 + int err, serv_sk = -1, task_fd = -1, rm_fd = -1; 29 165 struct local_storage *skel = NULL; 30 - int err, duration = 0, serv_sk = -1; 31 166 32 167 skel = local_storage__open_and_load(); 33 168 if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) ··· 172 37 if (CHECK(err, "attach", "lsm attach failed: %d\n", err)) 173 38 goto close_prog; 174 39 40 + task_fd = sys_pidfd_open(getpid(), 0); 41 + if (CHECK(task_fd < 0, "pidfd_open", 42 + "failed to get pidfd err:%d, errno:%d", task_fd, errno)) 43 + goto close_prog; 44 + 45 + if (!check_syscall_operations(bpf_map__fd(skel->maps.task_storage_map), 46 + task_fd)) 47 + goto close_prog; 48 + 49 + err = copy_rm(tmp_exec_path); 50 + if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno)) 51 + goto close_prog; 52 + 53 + rm_fd = open(tmp_exec_path, O_RDONLY); 54 + if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d", 55 + tmp_exec_path, rm_fd, errno)) 56 + goto close_prog; 57 + 58 + if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map), 59 + rm_fd)) 60 + goto close_prog; 61 + 62 + /* Sets skel->bss->monitored_pid to the pid of the forked child 63 + * forks a child process that executes tmp_exec_path and tries to 64 + * unlink its executable. This operation should be denied by the loaded 65 + * LSM program. 66 + */ 67 + err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path); 68 + if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err)) 69 + goto close_prog_unlink; 70 + 71 + /* Set the process being monitored to be the current process */ 175 72 skel->bss->monitored_pid = getpid(); 176 73 177 - err = create_and_unlink_file(); 178 - if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno)) 179 - goto close_prog; 74 + /* Remove the temporary created executable */ 75 + err = unlink(tmp_exec_path); 76 + if (CHECK(err != 0, "unlink", "unable to unlink %s: %d", tmp_exec_path, 77 + errno)) 78 + goto close_prog_unlink; 180 79 181 80 CHECK(skel->data->inode_storage_result != 0, "inode_storage_result", 182 81 "inode_local_storage not set\n"); ··· 222 53 CHECK(skel->data->sk_storage_result != 0, "sk_storage_result", 223 54 "sk_local_storage not set\n"); 224 55 225 - close(serv_sk); 56 + if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map), 57 + serv_sk)) 58 + goto close_prog; 226 59 60 + close_prog_unlink: 61 + unlink(tmp_exec_path); 227 62 close_prog: 63 + close(serv_sk); 64 + close(rm_fd); 65 + close(task_fd); 228 66 local_storage__destroy(skel); 229 67 }

+41

tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + #include <test_progs.h> 4 + #include <network_helpers.h> 5 + #include "skb_pkt_end.skel.h" 6 + 7 + static int sanity_run(struct bpf_program *prog) 8 + { 9 + __u32 duration, retval; 10 + int err, prog_fd; 11 + 12 + prog_fd = bpf_program__fd(prog); 13 + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), 14 + NULL, NULL, &retval, &duration); 15 + if (CHECK(err || retval != 123, "test_run", 16 + "err %d errno %d retval %d duration %d\n", 17 + err, errno, retval, duration)) 18 + return -1; 19 + return 0; 20 + } 21 + 22 + void test_test_skb_pkt_end(void) 23 + { 24 + struct skb_pkt_end *skb_pkt_end_skel = NULL; 25 + __u32 duration = 0; 26 + int err; 27 + 28 + skb_pkt_end_skel = skb_pkt_end__open_and_load(); 29 + if (CHECK(!skb_pkt_end_skel, "skb_pkt_end_skel_load", "skb_pkt_end skeleton failed\n")) 30 + goto cleanup; 31 + 32 + err = skb_pkt_end__attach(skb_pkt_end_skel); 33 + if (CHECK(err, "skb_pkt_end_attach", "skb_pkt_end attach failed: %d\n", err)) 34 + goto cleanup; 35 + 36 + if (sanity_run(skb_pkt_end_skel->progs.main_prog)) 37 + goto cleanup; 38 + 39 + cleanup: 40 + skb_pkt_end__destroy(skb_pkt_end_skel); 41 + }

+69 -34

tools/testing/selftests/bpf/progs/local_storage.c

··· 4 4 * Copyright 2020 Google LLC. 5 5 */ 6 6 7 + #include "vmlinux.h" 7 8 #include <errno.h> 8 - #include <linux/bpf.h> 9 - #include <stdbool.h> 10 9 #include <bpf/bpf_helpers.h> 11 10 #include <bpf/bpf_tracing.h> 12 11 ··· 17 18 int inode_storage_result = -1; 18 19 int sk_storage_result = -1; 19 20 20 - struct dummy_storage { 21 + struct local_storage { 22 + struct inode *exec_inode; 21 23 __u32 value; 24 + struct bpf_spin_lock lock; 22 25 }; 23 26 24 27 struct { 25 28 __uint(type, BPF_MAP_TYPE_INODE_STORAGE); 26 29 __uint(map_flags, BPF_F_NO_PREALLOC); 27 30 __type(key, int); 28 - __type(value, struct dummy_storage); 31 + __type(value, struct local_storage); 29 32 } inode_storage_map SEC(".maps"); 30 33 31 34 struct { 32 35 __uint(type, BPF_MAP_TYPE_SK_STORAGE); 33 36 __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE); 34 37 __type(key, int); 35 - __type(value, struct dummy_storage); 38 + __type(value, struct local_storage); 36 39 } sk_storage_map SEC(".maps"); 37 40 38 - /* TODO Use vmlinux.h once BTF pruning for embedded types is fixed. 39 - */ 40 - struct sock {} __attribute__((preserve_access_index)); 41 - struct sockaddr {} __attribute__((preserve_access_index)); 42 - struct socket { 43 - struct sock *sk; 44 - } __attribute__((preserve_access_index)); 45 - 46 - struct inode {} __attribute__((preserve_access_index)); 47 - struct dentry { 48 - struct inode *d_inode; 49 - } __attribute__((preserve_access_index)); 50 - struct file { 51 - struct inode *f_inode; 52 - } __attribute__((preserve_access_index)); 53 - 41 + struct { 42 + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); 43 + __uint(map_flags, BPF_F_NO_PREALLOC); 44 + __type(key, int); 45 + __type(value, struct local_storage); 46 + } task_storage_map SEC(".maps"); 54 47 55 48 SEC("lsm/inode_unlink") 56 49 int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) 57 50 { 58 51 __u32 pid = bpf_get_current_pid_tgid() >> 32; 59 - struct dummy_storage *storage; 52 + struct local_storage *storage; 53 + bool is_self_unlink; 54 + int err; 60 55 61 56 if (pid != monitored_pid) 62 57 return 0; 63 58 59 + storage = bpf_task_storage_get(&task_storage_map, 60 + bpf_get_current_task_btf(), 0, 0); 61 + if (storage) { 62 + /* Don't let an executable delete itself */ 63 + bpf_spin_lock(&storage->lock); 64 + is_self_unlink = storage->exec_inode == victim->d_inode; 65 + bpf_spin_unlock(&storage->lock); 66 + if (is_self_unlink) 67 + return -EPERM; 68 + } 69 + 64 70 storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0, 65 - BPF_SK_STORAGE_GET_F_CREATE); 71 + BPF_LOCAL_STORAGE_GET_F_CREATE); 66 72 if (!storage) 67 73 return 0; 68 74 69 - if (storage->value == DUMMY_STORAGE_VALUE) 75 + bpf_spin_lock(&storage->lock); 76 + if (storage->value != DUMMY_STORAGE_VALUE) 70 77 inode_storage_result = -1; 78 + bpf_spin_unlock(&storage->lock); 71 79 72 - inode_storage_result = 73 - bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); 80 + err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); 81 + if (!err) 82 + inode_storage_result = err; 74 83 75 84 return 0; 76 85 } ··· 88 81 int addrlen) 89 82 { 90 83 __u32 pid = bpf_get_current_pid_tgid() >> 32; 91 - struct dummy_storage *storage; 84 + struct local_storage *storage; 85 + int err; 92 86 93 87 if (pid != monitored_pid) 94 88 return 0; 95 89 96 90 storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, 97 - BPF_SK_STORAGE_GET_F_CREATE); 91 + BPF_LOCAL_STORAGE_GET_F_CREATE); 98 92 if (!storage) 99 93 return 0; 100 94 101 - if (storage->value == DUMMY_STORAGE_VALUE) 95 + bpf_spin_lock(&storage->lock); 96 + if (storage->value != DUMMY_STORAGE_VALUE) 102 97 sk_storage_result = -1; 98 + bpf_spin_unlock(&storage->lock); 103 99 104 - sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk); 100 + err = bpf_sk_storage_delete(&sk_storage_map, sock->sk); 101 + if (!err) 102 + sk_storage_result = err; 103 + 105 104 return 0; 106 105 } 107 106 ··· 116 103 int protocol, int kern) 117 104 { 118 105 __u32 pid = bpf_get_current_pid_tgid() >> 32; 119 - struct dummy_storage *storage; 106 + struct local_storage *storage; 120 107 121 108 if (pid != monitored_pid) 122 109 return 0; 123 110 124 111 storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, 125 - BPF_SK_STORAGE_GET_F_CREATE); 112 + BPF_LOCAL_STORAGE_GET_F_CREATE); 126 113 if (!storage) 127 114 return 0; 128 115 116 + bpf_spin_lock(&storage->lock); 129 117 storage->value = DUMMY_STORAGE_VALUE; 118 + bpf_spin_unlock(&storage->lock); 130 119 131 120 return 0; 132 121 } ··· 137 122 int BPF_PROG(file_open, struct file *file) 138 123 { 139 124 __u32 pid = bpf_get_current_pid_tgid() >> 32; 140 - struct dummy_storage *storage; 125 + struct local_storage *storage; 141 126 142 127 if (pid != monitored_pid) 143 128 return 0; ··· 146 131 return 0; 147 132 148 133 storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0, 149 - BPF_LOCAL_STORAGE_GET_F_CREATE); 134 + BPF_LOCAL_STORAGE_GET_F_CREATE); 150 135 if (!storage) 151 136 return 0; 152 137 138 + bpf_spin_lock(&storage->lock); 153 139 storage->value = DUMMY_STORAGE_VALUE; 140 + bpf_spin_unlock(&storage->lock); 154 141 return 0; 142 + } 143 + 144 + /* This uses the local storage to remember the inode of the binary that a 145 + * process was originally executing. 146 + */ 147 + SEC("lsm/bprm_committed_creds") 148 + void BPF_PROG(exec, struct linux_binprm *bprm) 149 + { 150 + struct local_storage *storage; 151 + 152 + storage = bpf_task_storage_get(&task_storage_map, 153 + bpf_get_current_task_btf(), 0, 154 + BPF_LOCAL_STORAGE_GET_F_CREATE); 155 + if (storage) { 156 + bpf_spin_lock(&storage->lock); 157 + storage->exec_inode = bprm->file->f_inode; 158 + bpf_spin_unlock(&storage->lock); 159 + } 155 160 }

+54

tools/testing/selftests/bpf/progs/skb_pkt_end.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #define BPF_NO_PRESERVE_ACCESS_INDEX 3 + #include <vmlinux.h> 4 + #include <bpf/bpf_core_read.h> 5 + #include <bpf/bpf_helpers.h> 6 + 7 + #define NULL 0 8 + #define INLINE __always_inline 9 + 10 + #define skb_shorter(skb, len) ((void *)(long)(skb)->data + (len) > (void *)(long)skb->data_end) 11 + 12 + #define ETH_IPV4_TCP_SIZE (14 + sizeof(struct iphdr) + sizeof(struct tcphdr)) 13 + 14 + static INLINE struct iphdr *get_iphdr(struct __sk_buff *skb) 15 + { 16 + struct iphdr *ip = NULL; 17 + struct ethhdr *eth; 18 + 19 + if (skb_shorter(skb, ETH_IPV4_TCP_SIZE)) 20 + goto out; 21 + 22 + eth = (void *)(long)skb->data; 23 + ip = (void *)(eth + 1); 24 + 25 + out: 26 + return ip; 27 + } 28 + 29 + SEC("classifier/cls") 30 + int main_prog(struct __sk_buff *skb) 31 + { 32 + struct iphdr *ip = NULL; 33 + struct tcphdr *tcp; 34 + __u8 proto = 0; 35 + 36 + if (!(ip = get_iphdr(skb))) 37 + goto out; 38 + 39 + proto = ip->protocol; 40 + 41 + if (proto != IPPROTO_TCP) 42 + goto out; 43 + 44 + tcp = (void*)(ip + 1); 45 + if (tcp->dest != 0) 46 + goto out; 47 + if (!tcp) 48 + goto out; 49 + 50 + return tcp->urg_ptr; 51 + out: 52 + return -1; 53 + } 54 + char _license[] SEC("license") = "GPL";

+44

tools/testing/selftests/bpf/progs/test_hash_large_key.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + struct { 9 + __uint(type, BPF_MAP_TYPE_HASH); 10 + __uint(max_entries, 2); 11 + __type(key, struct bigelement); 12 + __type(value, __u32); 13 + } hash_map SEC(".maps"); 14 + 15 + struct { 16 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 17 + __uint(max_entries, 1); 18 + __type(key, __u32); 19 + __type(value, struct bigelement); 20 + } key_map SEC(".maps"); 21 + 22 + struct bigelement { 23 + int a; 24 + char b[4096]; 25 + long long c; 26 + }; 27 + 28 + SEC("raw_tracepoint/sys_enter") 29 + int bpf_hash_large_key_test(void *ctx) 30 + { 31 + int zero = 0, err = 1, value = 42; 32 + struct bigelement *key; 33 + 34 + key = bpf_map_lookup_elem(&key_map, &zero); 35 + if (!key) 36 + return 0; 37 + 38 + key->c = 1; 39 + if (bpf_map_update_elem(&hash_map, key, &value, BPF_ANY)) 40 + return 0; 41 + 42 + return 0; 43 + } 44 +

+29

tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + 4 + #include <vmlinux.h> 5 + #include <bpf/bpf_tracing.h> 6 + #include <bpf/bpf_helpers.h> 7 + 8 + struct { 9 + __uint(type, BPF_MAP_TYPE_SK_STORAGE); 10 + __uint(map_flags, BPF_F_NO_PREALLOC); 11 + __type(key, int); 12 + __type(value, int); 13 + } sk_stg_map SEC(".maps"); 14 + 15 + SEC("fentry/bpf_sk_storage_free") 16 + int BPF_PROG(trace_bpf_sk_storage_free, struct sock *sk) 17 + { 18 + int *value; 19 + 20 + value = bpf_sk_storage_get(&sk_stg_map, sk, 0, 21 + BPF_SK_STORAGE_GET_F_CREATE); 22 + 23 + if (value) 24 + *value = 1; 25 + 26 + return 0; 27 + } 28 + 29 + char _license[] SEC("license") = "GPL";

+95

tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + 4 + #include <vmlinux.h> 5 + #include <bpf/bpf_tracing.h> 6 + #include <bpf/bpf_core_read.h> 7 + #include <bpf/bpf_helpers.h> 8 + 9 + struct sk_stg { 10 + __u32 pid; 11 + __u32 last_notclose_state; 12 + char comm[16]; 13 + }; 14 + 15 + struct { 16 + __uint(type, BPF_MAP_TYPE_SK_STORAGE); 17 + __uint(map_flags, BPF_F_NO_PREALLOC); 18 + __type(key, int); 19 + __type(value, struct sk_stg); 20 + } sk_stg_map SEC(".maps"); 21 + 22 + /* Testing delete */ 23 + struct { 24 + __uint(type, BPF_MAP_TYPE_SK_STORAGE); 25 + __uint(map_flags, BPF_F_NO_PREALLOC); 26 + __type(key, int); 27 + __type(value, int); 28 + } del_sk_stg_map SEC(".maps"); 29 + 30 + char task_comm[16] = ""; 31 + 32 + SEC("tp_btf/inet_sock_set_state") 33 + int BPF_PROG(trace_inet_sock_set_state, struct sock *sk, int oldstate, 34 + int newstate) 35 + { 36 + struct sk_stg *stg; 37 + 38 + if (newstate == BPF_TCP_CLOSE) 39 + return 0; 40 + 41 + stg = bpf_sk_storage_get(&sk_stg_map, sk, 0, 42 + BPF_SK_STORAGE_GET_F_CREATE); 43 + if (!stg) 44 + return 0; 45 + 46 + stg->last_notclose_state = newstate; 47 + 48 + bpf_sk_storage_delete(&del_sk_stg_map, sk); 49 + 50 + return 0; 51 + } 52 + 53 + static void set_task_info(struct sock *sk) 54 + { 55 + struct task_struct *task; 56 + struct sk_stg *stg; 57 + 58 + stg = bpf_sk_storage_get(&sk_stg_map, sk, 0, 59 + BPF_SK_STORAGE_GET_F_CREATE); 60 + if (!stg) 61 + return; 62 + 63 + stg->pid = bpf_get_current_pid_tgid(); 64 + 65 + task = (struct task_struct *)bpf_get_current_task(); 66 + bpf_core_read_str(&stg->comm, sizeof(stg->comm), &task->comm); 67 + bpf_core_read_str(&task_comm, sizeof(task_comm), &task->comm); 68 + } 69 + 70 + SEC("fentry/inet_csk_listen_start") 71 + int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk, int backlog) 72 + { 73 + set_task_info(sk); 74 + 75 + return 0; 76 + } 77 + 78 + SEC("fentry/tcp_connect") 79 + int BPF_PROG(trace_tcp_connect, struct sock *sk) 80 + { 81 + set_task_info(sk); 82 + 83 + return 0; 84 + } 85 + 86 + SEC("fexit/inet_csk_accept") 87 + int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern, 88 + struct sock *accepted_sk) 89 + { 90 + set_task_info(accepted_sk); 91 + 92 + return 0; 93 + } 94 + 95 + char _license[] SEC("license") = "GPL";

+13 -73

tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c

··· 14 14 #include <bpf/bpf_endian.h> 15 15 #include "test_tcpbpf.h" 16 16 17 - struct { 18 - __uint(type, BPF_MAP_TYPE_ARRAY); 19 - __uint(max_entries, 4); 20 - __type(key, __u32); 21 - __type(value, struct tcpbpf_globals); 22 - } global_map SEC(".maps"); 23 - 24 - struct { 25 - __uint(type, BPF_MAP_TYPE_ARRAY); 26 - __uint(max_entries, 2); 27 - __type(key, __u32); 28 - __type(value, int); 29 - } sockopt_results SEC(".maps"); 30 - 31 - static inline void update_event_map(int event) 32 - { 33 - __u32 key = 0; 34 - struct tcpbpf_globals g, *gp; 35 - 36 - gp = bpf_map_lookup_elem(&global_map, &key); 37 - if (gp == NULL) { 38 - struct tcpbpf_globals g = {0}; 39 - 40 - g.event_map |= (1 << event); 41 - bpf_map_update_elem(&global_map, &key, &g, 42 - BPF_ANY); 43 - } else { 44 - g = *gp; 45 - g.event_map |= (1 << event); 46 - bpf_map_update_elem(&global_map, &key, &g, 47 - BPF_ANY); 48 - } 49 - } 50 - 17 + struct tcpbpf_globals global = {}; 51 18 int _version SEC("version") = 1; 52 19 53 20 SEC("sockops") ··· 72 105 73 106 op = (int) skops->op; 74 107 75 - update_event_map(op); 108 + global.event_map |= (1 << op); 76 109 77 110 switch (op) { 78 111 case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: 79 112 /* Test failure to set largest cb flag (assumes not defined) */ 80 - bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); 113 + global.bad_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); 81 114 /* Set callback */ 82 - good_call_rv = bpf_sock_ops_cb_flags_set(skops, 115 + global.good_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, 83 116 BPF_SOCK_OPS_STATE_CB_FLAG); 84 - /* Update results */ 85 - { 86 - __u32 key = 0; 87 - struct tcpbpf_globals g, *gp; 88 - 89 - gp = bpf_map_lookup_elem(&global_map, &key); 90 - if (!gp) 91 - break; 92 - g = *gp; 93 - g.bad_cb_test_rv = bad_call_rv; 94 - g.good_cb_test_rv = good_call_rv; 95 - bpf_map_update_elem(&global_map, &key, &g, 96 - BPF_ANY); 97 - } 98 117 break; 99 118 case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: 100 119 skops->sk_txhash = 0x12345f; ··· 96 143 97 144 thdr = (struct tcphdr *)(header + offset); 98 145 v = thdr->syn; 99 - __u32 key = 1; 100 146 101 - bpf_map_update_elem(&sockopt_results, &key, &v, 102 - BPF_ANY); 147 + global.tcp_saved_syn = v; 103 148 } 104 149 } 105 150 break; ··· 107 156 break; 108 157 case BPF_SOCK_OPS_STATE_CB: 109 158 if (skops->args[1] == BPF_TCP_CLOSE) { 110 - __u32 key = 0; 111 - struct tcpbpf_globals g, *gp; 112 - 113 - gp = bpf_map_lookup_elem(&global_map, &key); 114 - if (!gp) 115 - break; 116 - g = *gp; 117 159 if (skops->args[0] == BPF_TCP_LISTEN) { 118 - g.num_listen++; 160 + global.num_listen++; 119 161 } else { 120 - g.total_retrans = skops->total_retrans; 121 - g.data_segs_in = skops->data_segs_in; 122 - g.data_segs_out = skops->data_segs_out; 123 - g.bytes_received = skops->bytes_received; 124 - g.bytes_acked = skops->bytes_acked; 162 + global.total_retrans = skops->total_retrans; 163 + global.data_segs_in = skops->data_segs_in; 164 + global.data_segs_out = skops->data_segs_out; 165 + global.bytes_received = skops->bytes_received; 166 + global.bytes_acked = skops->bytes_acked; 125 167 } 126 - g.num_close_events++; 127 - bpf_map_update_elem(&global_map, &key, &g, 128 - BPF_ANY); 168 + global.num_close_events++; 129 169 } 130 170 break; 131 171 case BPF_SOCK_OPS_TCP_LISTEN_CB: ··· 124 182 v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN, 125 183 &save_syn, sizeof(save_syn)); 126 184 /* Update global map w/ result of setsock opt */ 127 - __u32 key = 0; 128 - 129 - bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY); 185 + global.tcp_save_syn = v; 130 186 break; 131 187 default: 132 188 rv = -1;

+7 -35

tools/testing/selftests/bpf/progs/test_tunnel_kern.c

··· 15 15 #include <linux/ip.h> 16 16 #include <linux/ipv6.h> 17 17 #include <linux/types.h> 18 - #include <linux/tcp.h> 19 18 #include <linux/socket.h> 20 19 #include <linux/pkt_cls.h> 21 20 #include <linux/erspan.h> ··· 527 528 struct bpf_tunnel_key key = {}; 528 529 void *data = (void *)(long)skb->data; 529 530 struct iphdr *iph = data; 530 - struct tcphdr *tcp = data + sizeof(*iph); 531 531 void *data_end = (void *)(long)skb->data_end; 532 532 int ret; 533 533 534 534 /* single length check */ 535 - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { 535 + if (data + sizeof(*iph) > data_end) { 536 536 ERROR(1); 537 537 return TC_ACT_SHOT; 538 538 } ··· 539 541 key.tunnel_ttl = 64; 540 542 if (iph->protocol == IPPROTO_ICMP) { 541 543 key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ 542 - } else { 543 - if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) 544 - return TC_ACT_SHOT; 545 - 546 - if (tcp->dest == bpf_htons(5200)) 547 - key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ 548 - else if (tcp->dest == bpf_htons(5201)) 549 - key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */ 550 - else 551 - return TC_ACT_SHOT; 552 544 } 553 545 554 546 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); ··· 573 585 struct bpf_tunnel_key key = {}; 574 586 void *data = (void *)(long)skb->data; 575 587 struct iphdr *iph = data; 576 - struct tcphdr *tcp = data + sizeof(*iph); 577 588 void *data_end = (void *)(long)skb->data_end; 578 589 int ret; 579 590 580 591 /* single length check */ 581 - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { 592 + if (data + sizeof(*iph) > data_end) { 582 593 ERROR(1); 583 594 return TC_ACT_SHOT; 584 595 } 585 596 586 597 __builtin_memset(&key, 0x0, sizeof(key)); 587 - key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ 588 598 key.tunnel_ttl = 64; 599 + if (iph->protocol == IPPROTO_ICMP) { 600 + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ 601 + } 589 602 590 603 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 591 604 BPF_F_TUNINFO_IPV6); ··· 623 634 struct bpf_tunnel_key key = {}; 624 635 void *data = (void *)(long)skb->data; 625 636 struct ipv6hdr *iph = data; 626 - struct tcphdr *tcp = data + sizeof(*iph); 627 637 void *data_end = (void *)(long)skb->data_end; 628 638 int ret; 629 639 630 640 /* single length check */ 631 - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { 641 + if (data + sizeof(*iph) > data_end) { 632 642 ERROR(1); 633 643 return TC_ACT_SHOT; 634 644 } 635 645 636 - key.remote_ipv6[0] = bpf_htonl(0x2401db00); 637 646 key.tunnel_ttl = 64; 638 - 639 647 if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) { 640 - key.remote_ipv6[3] = bpf_htonl(1); 641 - } else { 642 - if (iph->nexthdr != 6 /* NEXTHDR_TCP */) { 643 - ERROR(iph->nexthdr); 644 - return TC_ACT_SHOT; 645 - } 646 - 647 - if (tcp->dest == bpf_htons(5200)) { 648 - key.remote_ipv6[3] = bpf_htonl(1); 649 - } else if (tcp->dest == bpf_htons(5201)) { 650 - key.remote_ipv6[3] = bpf_htonl(2); 651 - } else { 652 - ERROR(tcp->dest); 653 - return TC_ACT_SHOT; 654 - } 648 + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ 655 649 } 656 650 657 651 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),

-50

tools/testing/selftests/bpf/tcp_client.py

··· 1 - #!/usr/bin/env python3 2 - # 3 - # SPDX-License-Identifier: GPL-2.0 4 - # 5 - 6 - import sys, os, os.path, getopt 7 - import socket, time 8 - import subprocess 9 - import select 10 - 11 - def read(sock, n): 12 - buf = b'' 13 - while len(buf) < n: 14 - rem = n - len(buf) 15 - try: s = sock.recv(rem) 16 - except (socket.error) as e: return b'' 17 - buf += s 18 - return buf 19 - 20 - def send(sock, s): 21 - total = len(s) 22 - count = 0 23 - while count < total: 24 - try: n = sock.send(s) 25 - except (socket.error) as e: n = 0 26 - if n == 0: 27 - return count; 28 - count += n 29 - return count 30 - 31 - 32 - serverPort = int(sys.argv[1]) 33 - 34 - # create active socket 35 - sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) 36 - try: 37 - sock.connect(('::1', serverPort)) 38 - except socket.error as e: 39 - sys.exit(1) 40 - 41 - buf = b'' 42 - n = 0 43 - while n < 1000: 44 - buf += b'+' 45 - n += 1 46 - 47 - sock.settimeout(1); 48 - n = send(sock, buf) 49 - n = read(sock, 500) 50 - sys.exit(0)

-80

tools/testing/selftests/bpf/tcp_server.py

··· 1 - #!/usr/bin/env python3 2 - # 3 - # SPDX-License-Identifier: GPL-2.0 4 - # 5 - 6 - import sys, os, os.path, getopt 7 - import socket, time 8 - import subprocess 9 - import select 10 - 11 - def read(sock, n): 12 - buf = b'' 13 - while len(buf) < n: 14 - rem = n - len(buf) 15 - try: s = sock.recv(rem) 16 - except (socket.error) as e: return b'' 17 - buf += s 18 - return buf 19 - 20 - def send(sock, s): 21 - total = len(s) 22 - count = 0 23 - while count < total: 24 - try: n = sock.send(s) 25 - except (socket.error) as e: n = 0 26 - if n == 0: 27 - return count; 28 - count += n 29 - return count 30 - 31 - 32 - SERVER_PORT = 12877 33 - MAX_PORTS = 2 34 - 35 - serverPort = SERVER_PORT 36 - serverSocket = None 37 - 38 - # create passive socket 39 - serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) 40 - 41 - try: serverSocket.bind(('::1', 0)) 42 - except socket.error as msg: 43 - print('bind fails: ' + str(msg)) 44 - 45 - sn = serverSocket.getsockname() 46 - serverPort = sn[1] 47 - 48 - cmdStr = ("./tcp_client.py %d &") % (serverPort) 49 - os.system(cmdStr) 50 - 51 - buf = b'' 52 - n = 0 53 - while n < 500: 54 - buf += b'.' 55 - n += 1 56 - 57 - serverSocket.listen(MAX_PORTS) 58 - readList = [serverSocket] 59 - 60 - while True: 61 - readyRead, readyWrite, inError = \ 62 - select.select(readList, [], [], 2) 63 - 64 - if len(readyRead) > 0: 65 - waitCount = 0 66 - for sock in readyRead: 67 - if sock == serverSocket: 68 - (clientSocket, address) = serverSocket.accept() 69 - address = str(address[0]) 70 - readList.append(clientSocket) 71 - else: 72 - sock.settimeout(1); 73 - s = read(sock, 1000) 74 - n = send(sock, buf) 75 - sock.close() 76 - serverSocket.close() 77 - sys.exit(0) 78 - else: 79 - print('Select timeout!') 80 - sys.exit(1)

+2 -1

tools/testing/selftests/bpf/test_maps.c

··· 1223 1223 1224 1224 static void test_map_large(void) 1225 1225 { 1226 + 1226 1227 struct bigkey { 1227 1228 int a; 1228 - char b[116]; 1229 + char b[4096]; 1229 1230 long long c; 1230 1231 } key; 1231 1232 int fd, i, value;

+11

tools/testing/selftests/bpf/test_progs.h

··· 141 141 ___ok; \ 142 142 }) 143 143 144 + #define ASSERT_NEQ(actual, expected, name) ({ \ 145 + static int duration = 0; \ 146 + typeof(actual) ___act = (actual); \ 147 + typeof(expected) ___exp = (expected); \ 148 + bool ___ok = ___act != ___exp; \ 149 + CHECK(!___ok, (name), \ 150 + "unexpected %s: actual %lld == expected %lld\n", \ 151 + (name), (long long)(___act), (long long)(___exp)); \ 152 + ___ok; \ 153 + }) 154 + 144 155 #define ASSERT_STREQ(actual, expected, name) ({ \ 145 156 static int duration = 0; \ 146 157 const char *___act = actual; \

+2

tools/testing/selftests/bpf/test_tcpbpf.h

··· 14 14 __u64 bytes_acked; 15 15 __u32 num_listen; 16 16 __u32 num_close_events; 17 + __u32 tcp_save_syn; 18 + __u32 tcp_saved_syn; 17 19 }; 18 20 #endif

-165

tools/testing/selftests/bpf/test_tcpbpf_user.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #include <inttypes.h> 3 - #include <stdio.h> 4 - #include <stdlib.h> 5 - #include <unistd.h> 6 - #include <errno.h> 7 - #include <string.h> 8 - #include <linux/bpf.h> 9 - #include <sys/types.h> 10 - #include <bpf/bpf.h> 11 - #include <bpf/libbpf.h> 12 - 13 - #include "bpf_rlimit.h" 14 - #include "bpf_util.h" 15 - #include "cgroup_helpers.h" 16 - 17 - #include "test_tcpbpf.h" 18 - 19 - /* 3 comes from one listening socket + both ends of the connection */ 20 - #define EXPECTED_CLOSE_EVENTS 3 21 - 22 - #define EXPECT_EQ(expected, actual, fmt) \ 23 - do { \ 24 - if ((expected) != (actual)) { \ 25 - printf(" Value of: " #actual "\n" \ 26 - " Actual: %" fmt "\n" \ 27 - " Expected: %" fmt "\n", \ 28 - (actual), (expected)); \ 29 - ret--; \ 30 - } \ 31 - } while (0) 32 - 33 - int verify_result(const struct tcpbpf_globals *result) 34 - { 35 - __u32 expected_events; 36 - int ret = 0; 37 - 38 - expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | 39 - (1 << BPF_SOCK_OPS_RWND_INIT) | 40 - (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) | 41 - (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) | 42 - (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) | 43 - (1 << BPF_SOCK_OPS_NEEDS_ECN) | 44 - (1 << BPF_SOCK_OPS_STATE_CB) | 45 - (1 << BPF_SOCK_OPS_TCP_LISTEN_CB)); 46 - 47 - EXPECT_EQ(expected_events, result->event_map, "#" PRIx32); 48 - EXPECT_EQ(501ULL, result->bytes_received, "llu"); 49 - EXPECT_EQ(1002ULL, result->bytes_acked, "llu"); 50 - EXPECT_EQ(1, result->data_segs_in, PRIu32); 51 - EXPECT_EQ(1, result->data_segs_out, PRIu32); 52 - EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32); 53 - EXPECT_EQ(0, result->good_cb_test_rv, PRIu32); 54 - EXPECT_EQ(1, result->num_listen, PRIu32); 55 - EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32); 56 - 57 - return ret; 58 - } 59 - 60 - int verify_sockopt_result(int sock_map_fd) 61 - { 62 - __u32 key = 0; 63 - int ret = 0; 64 - int res; 65 - int rv; 66 - 67 - /* check setsockopt for SAVE_SYN */ 68 - rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); 69 - EXPECT_EQ(0, rv, "d"); 70 - EXPECT_EQ(0, res, "d"); 71 - key = 1; 72 - /* check getsockopt for SAVED_SYN */ 73 - rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); 74 - EXPECT_EQ(0, rv, "d"); 75 - EXPECT_EQ(1, res, "d"); 76 - return ret; 77 - } 78 - 79 - static int bpf_find_map(const char *test, struct bpf_object *obj, 80 - const char *name) 81 - { 82 - struct bpf_map *map; 83 - 84 - map = bpf_object__find_map_by_name(obj, name); 85 - if (!map) { 86 - printf("%s:FAIL:map '%s' not found\n", test, name); 87 - return -1; 88 - } 89 - return bpf_map__fd(map); 90 - } 91 - 92 - int main(int argc, char **argv) 93 - { 94 - const char *file = "test_tcpbpf_kern.o"; 95 - int prog_fd, map_fd, sock_map_fd; 96 - struct tcpbpf_globals g = {0}; 97 - const char *cg_path = "/foo"; 98 - int error = EXIT_FAILURE; 99 - struct bpf_object *obj; 100 - int cg_fd = -1; 101 - int retry = 10; 102 - __u32 key = 0; 103 - int rv; 104 - 105 - cg_fd = cgroup_setup_and_join(cg_path); 106 - if (cg_fd < 0) 107 - goto err; 108 - 109 - if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { 110 - printf("FAILED: load_bpf_file failed for: %s\n", file); 111 - goto err; 112 - } 113 - 114 - rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); 115 - if (rv) { 116 - printf("FAILED: bpf_prog_attach: %d (%s)\n", 117 - error, strerror(errno)); 118 - goto err; 119 - } 120 - 121 - if (system("./tcp_server.py")) { 122 - printf("FAILED: TCP server\n"); 123 - goto err; 124 - } 125 - 126 - map_fd = bpf_find_map(__func__, obj, "global_map"); 127 - if (map_fd < 0) 128 - goto err; 129 - 130 - sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results"); 131 - if (sock_map_fd < 0) 132 - goto err; 133 - 134 - retry_lookup: 135 - rv = bpf_map_lookup_elem(map_fd, &key, &g); 136 - if (rv != 0) { 137 - printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); 138 - goto err; 139 - } 140 - 141 - if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) { 142 - printf("Unexpected number of close events (%d), retrying!\n", 143 - g.num_close_events); 144 - usleep(100); 145 - goto retry_lookup; 146 - } 147 - 148 - if (verify_result(&g)) { 149 - printf("FAILED: Wrong stats\n"); 150 - goto err; 151 - } 152 - 153 - if (verify_sockopt_result(sock_map_fd)) { 154 - printf("FAILED: Wrong sockopt stats\n"); 155 - goto err; 156 - } 157 - 158 - printf("PASSED!\n"); 159 - error = 0; 160 - err: 161 - bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); 162 - close(cg_fd); 163 - cleanup_cgroup_environment(); 164 - return error; 165 - }

+39 -4

tools/testing/selftests/bpf/test_tunnel.sh

··· 24 24 # Root namespace with metadata-mode tunnel + BPF 25 25 # Device names and addresses: 26 26 # veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay) 27 - # tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay) 27 + # tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay) 28 28 # 29 29 # Namespace at_ns0 with native tunnel 30 30 # Device names and addresses: 31 31 # veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay) 32 - # tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay) 32 + # tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay) 33 33 # 34 34 # 35 35 # End-to-end ping packet flow ··· 250 250 ip addr add dev $DEV 10.1.1.200/24 251 251 } 252 252 253 - add_ipip6tnl_tunnel() 253 + add_ip6tnl_tunnel() 254 254 { 255 255 ip netns exec at_ns0 ip addr add ::11/96 dev veth0 256 256 ip netns exec at_ns0 ip link set dev veth0 up ··· 262 262 ip link add dev $DEV_NS type $TYPE \ 263 263 local ::11 remote ::22 264 264 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 265 + ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96 265 266 ip netns exec at_ns0 ip link set dev $DEV_NS up 266 267 267 268 # root namespace 268 269 ip link add dev $DEV type $TYPE external 269 270 ip addr add dev $DEV 10.1.1.200/24 271 + ip addr add dev $DEV 1::22/96 270 272 ip link set dev $DEV up 271 273 } 272 274 ··· 536 534 537 535 check $TYPE 538 536 config_device 539 - add_ipip6tnl_tunnel 537 + add_ip6tnl_tunnel 540 538 ip link set dev veth1 mtu 1500 541 539 attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel 542 540 # underlay ··· 553 551 return 1 554 552 fi 555 553 echo -e ${GREEN}"PASS: $TYPE"${NC} 554 + } 555 + 556 + test_ip6ip6() 557 + { 558 + TYPE=ip6tnl 559 + DEV_NS=ip6ip6tnl00 560 + DEV=ip6ip6tnl11 561 + ret=0 562 + 563 + check $TYPE 564 + config_device 565 + add_ip6tnl_tunnel 566 + ip link set dev veth1 mtu 1500 567 + attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel 568 + # underlay 569 + ping6 $PING_ARG ::11 570 + # ip6 over ip6 571 + ping6 $PING_ARG 1::11 572 + check_err $? 573 + ip netns exec at_ns0 ping6 $PING_ARG 1::22 574 + check_err $? 575 + cleanup 576 + 577 + if [ $ret -ne 0 ]; then 578 + echo -e ${RED}"FAIL: ip6$TYPE"${NC} 579 + return 1 580 + fi 581 + echo -e ${GREEN}"PASS: ip6$TYPE"${NC} 556 582 } 557 583 558 584 setup_xfrm_tunnel() ··· 676 646 ip link del veth1 2> /dev/null 677 647 ip link del ipip11 2> /dev/null 678 648 ip link del ipip6tnl11 2> /dev/null 649 + ip link del ip6ip6tnl11 2> /dev/null 679 650 ip link del gretap11 2> /dev/null 680 651 ip link del ip6gre11 2> /dev/null 681 652 ip link del ip6gretap11 2> /dev/null ··· 771 740 772 741 echo "Testing IPIP6 tunnel..." 773 742 test_ipip6 743 + errors=$(( $errors + $? )) 744 + 745 + echo "Testing IP6IP6 tunnel..." 746 + test_ip6ip6 774 747 errors=$(( $errors + $? )) 775 748 776 749 echo "Testing IPSec tunnel..."

+42

tools/testing/selftests/bpf/verifier/ctx_skb.c

··· 1089 1089 .errstr_unpriv = "R1 leaks addr", 1090 1090 .result = REJECT, 1091 1091 }, 1092 + { 1093 + "pkt > pkt_end taken check", 1094 + .insns = { 1095 + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, // 0. r2 = *(u32 *)(r1 + data_end) 1096 + offsetof(struct __sk_buff, data_end)), 1097 + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, // 1. r4 = *(u32 *)(r1 + data) 1098 + offsetof(struct __sk_buff, data)), 1099 + BPF_MOV64_REG(BPF_REG_3, BPF_REG_4), // 2. r3 = r4 1100 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42), // 3. r3 += 42 1101 + BPF_MOV64_IMM(BPF_REG_1, 0), // 4. r1 = 0 1102 + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2), // 5. if r3 > r2 goto 8 1103 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14), // 6. r4 += 14 1104 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), // 7. r1 = r4 1105 + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 1), // 8. if r3 > r2 goto 10 1106 + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9), // 9. r2 = *(u8 *)(r1 + 9) 1107 + BPF_MOV64_IMM(BPF_REG_0, 0), // 10. r0 = 0 1108 + BPF_EXIT_INSN(), // 11. exit 1109 + }, 1110 + .result = ACCEPT, 1111 + .prog_type = BPF_PROG_TYPE_SK_SKB, 1112 + }, 1113 + { 1114 + "pkt_end < pkt taken check", 1115 + .insns = { 1116 + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, // 0. r2 = *(u32 *)(r1 + data_end) 1117 + offsetof(struct __sk_buff, data_end)), 1118 + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, // 1. r4 = *(u32 *)(r1 + data) 1119 + offsetof(struct __sk_buff, data)), 1120 + BPF_MOV64_REG(BPF_REG_3, BPF_REG_4), // 2. r3 = r4 1121 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42), // 3. r3 += 42 1122 + BPF_MOV64_IMM(BPF_REG_1, 0), // 4. r1 = 0 1123 + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2), // 5. if r3 > r2 goto 8 1124 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14), // 6. r4 += 14 1125 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), // 7. r1 = r4 1126 + BPF_JMP_REG(BPF_JLT, BPF_REG_2, BPF_REG_3, 1), // 8. if r2 < r3 goto 10 1127 + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9), // 9. r2 = *(u8 *)(r1 + 9) 1128 + BPF_MOV64_IMM(BPF_REG_0, 0), // 10. r0 = 0 1129 + BPF_EXIT_INSN(), // 11. exit 1130 + }, 1131 + .result = ACCEPT, 1132 + .prog_type = BPF_PROG_TYPE_SK_SKB, 1133 + },