Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: btf: Add pretty print support to the basic arraymap

This patch adds pretty print support to the basic arraymap.
Support for other bpf maps can be added later.

This patch adds new attrs to the BPF_MAP_CREATE command to allow
specifying the btf_fd, btf_key_id and btf_value_id. The
BPF_MAP_CREATE can then associate the btf to the map if
the creating map supports BTF.

A BTF supported map needs to implement two new map ops,
map_seq_show_elem() and map_check_btf(). This patch has
implemented these new map ops for the basic arraymap.

It also adds file_operations, bpffs_map_fops, to the pinned
map such that the pinned map can be opened and read.
After that, the user has an intuitive way to do
"cat bpffs/pathto/a-pinned-map" instead of getting
an error.

bpffs_map_fops should not be extended further to support
other operations. Other operations (e.g. write/key-lookup...)
should be realized by the userspace tools (e.g. bpftool) through
the BPF_OBJ_GET_INFO_BY_FD, map's lookup/update interface...etc.
Follow up patches will allow the userspace to obtain
the BTF from a map-fd.

Here is a sample output when reading a pinned arraymap
with the following map's value:

struct map_value {
int count_a;
int count_b;
};

cat /sys/fs/bpf/pinned_array_map:

0: {1,2}
1: {3,4}
2: {5,6}
...

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

authored by

Martin KaFai Lau and committed by
Daniel Borkmann
a26ca7c9 60197cfb

+254 -7
+17 -3
include/linux/bpf.h
··· 22 22 struct bpf_prog; 23 23 struct bpf_map; 24 24 struct sock; 25 + struct seq_file; 26 + struct btf; 25 27 26 28 /* map is generic key/value storage optionally accesible by eBPF programs */ 27 29 struct bpf_map_ops { ··· 45 43 void (*map_fd_put_ptr)(void *ptr); 46 44 u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); 47 45 u32 (*map_fd_sys_lookup_elem)(void *ptr); 46 + void (*map_seq_show_elem)(struct bpf_map *map, void *key, 47 + struct seq_file *m); 48 + int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf, 49 + u32 key_type_id, u32 value_type_id); 48 50 }; 49 51 50 52 struct bpf_map { 51 - /* 1st cacheline with read-mostly members of which some 53 + /* The first two cachelines with read-mostly members of which some 52 54 * are also accessed in fast-path (e.g. ops, max_entries). 53 55 */ 54 56 const struct bpf_map_ops *ops ____cacheline_aligned; ··· 68 62 u32 pages; 69 63 u32 id; 70 64 int numa_node; 65 + u32 btf_key_id; 66 + u32 btf_value_id; 67 + struct btf *btf; 71 68 bool unpriv_array; 72 - /* 7 bytes hole */ 69 + /* 55 bytes hole */ 73 70 74 - /* 2nd cacheline with misc members to avoid false sharing 71 + /* The 3rd and 4th cacheline with misc members to avoid false sharing 75 72 * particularly with refcounting. 76 73 */ 77 74 struct user_struct *user ____cacheline_aligned; ··· 107 98 static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map) 108 99 { 109 100 return container_of(map, struct bpf_offloaded_map, map); 101 + } 102 + 103 + static inline bool bpf_map_support_seq_show(const struct bpf_map *map) 104 + { 105 + return map->ops->map_seq_show_elem && map->ops->map_check_btf; 110 106 } 111 107 112 108 extern const struct bpf_map_ops bpf_map_offload_ops;
+3
include/uapi/linux/bpf.h
··· 280 280 */ 281 281 char map_name[BPF_OBJ_NAME_LEN]; 282 282 __u32 map_ifindex; /* ifindex of netdev to create on */ 283 + __u32 btf_fd; /* fd pointing to a BTF type data */ 284 + __u32 btf_key_id; /* BTF type_id of the key */ 285 + __u32 btf_value_id; /* BTF type_id of the value */ 283 286 }; 284 287 285 288 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+50
kernel/bpf/arraymap.c
··· 11 11 * General Public License for more details. 12 12 */ 13 13 #include <linux/bpf.h> 14 + #include <linux/btf.h> 14 15 #include <linux/err.h> 15 16 #include <linux/slab.h> 16 17 #include <linux/mm.h> 17 18 #include <linux/filter.h> 18 19 #include <linux/perf_event.h> 20 + #include <uapi/linux/btf.h> 19 21 20 22 #include "map_in_map.h" 21 23 ··· 338 336 bpf_map_area_free(array); 339 337 } 340 338 339 + static void array_map_seq_show_elem(struct bpf_map *map, void *key, 340 + struct seq_file *m) 341 + { 342 + void *value; 343 + 344 + rcu_read_lock(); 345 + 346 + value = array_map_lookup_elem(map, key); 347 + if (!value) { 348 + rcu_read_unlock(); 349 + return; 350 + } 351 + 352 + seq_printf(m, "%u: ", *(u32 *)key); 353 + btf_type_seq_show(map->btf, map->btf_value_id, value, m); 354 + seq_puts(m, "\n"); 355 + 356 + rcu_read_unlock(); 357 + } 358 + 359 + static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf, 360 + u32 btf_key_id, u32 btf_value_id) 361 + { 362 + const struct btf_type *key_type, *value_type; 363 + u32 key_size, value_size; 364 + u32 int_data; 365 + 366 + key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 367 + if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) 368 + return -EINVAL; 369 + 370 + int_data = *(u32 *)(key_type + 1); 371 + /* bpf array can only take a u32 key. This check makes 372 + * sure that the btf matches the attr used during map_create. 373 + */ 374 + if (BTF_INT_BITS(int_data) != 32 || key_size != 4 || 375 + BTF_INT_OFFSET(int_data)) 376 + return -EINVAL; 377 + 378 + value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 379 + if (!value_type || value_size > map->value_size) 380 + return -EINVAL; 381 + 382 + return 0; 383 + } 384 + 341 385 const struct bpf_map_ops array_map_ops = { 342 386 .map_alloc_check = array_map_alloc_check, 343 387 .map_alloc = array_map_alloc, ··· 393 345 .map_update_elem = array_map_update_elem, 394 346 .map_delete_elem = array_map_delete_elem, 395 347 .map_gen_lookup = array_map_gen_lookup, 348 + .map_seq_show_elem = array_map_seq_show_elem, 349 + .map_check_btf = array_map_check_btf, 396 350 }; 397 351 398 352 const struct bpf_map_ops percpu_array_map_ops = {
+153 -3
kernel/bpf/inode.c
··· 150 150 return 0; 151 151 } 152 152 153 + struct map_iter { 154 + void *key; 155 + bool done; 156 + }; 157 + 158 + static struct map_iter *map_iter(struct seq_file *m) 159 + { 160 + return m->private; 161 + } 162 + 163 + static struct bpf_map *seq_file_to_map(struct seq_file *m) 164 + { 165 + return file_inode(m->file)->i_private; 166 + } 167 + 168 + static void map_iter_free(struct map_iter *iter) 169 + { 170 + if (iter) { 171 + kfree(iter->key); 172 + kfree(iter); 173 + } 174 + } 175 + 176 + static struct map_iter *map_iter_alloc(struct bpf_map *map) 177 + { 178 + struct map_iter *iter; 179 + 180 + iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); 181 + if (!iter) 182 + goto error; 183 + 184 + iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN); 185 + if (!iter->key) 186 + goto error; 187 + 188 + return iter; 189 + 190 + error: 191 + map_iter_free(iter); 192 + return NULL; 193 + } 194 + 195 + static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) 196 + { 197 + struct bpf_map *map = seq_file_to_map(m); 198 + void *key = map_iter(m)->key; 199 + 200 + if (map_iter(m)->done) 201 + return NULL; 202 + 203 + if (unlikely(v == SEQ_START_TOKEN)) 204 + goto done; 205 + 206 + if (map->ops->map_get_next_key(map, key, key)) { 207 + map_iter(m)->done = true; 208 + return NULL; 209 + } 210 + 211 + done: 212 + ++(*pos); 213 + return key; 214 + } 215 + 216 + static void *map_seq_start(struct seq_file *m, loff_t *pos) 217 + { 218 + if (map_iter(m)->done) 219 + return NULL; 220 + 221 + return *pos ? map_iter(m)->key : SEQ_START_TOKEN; 222 + } 223 + 224 + static void map_seq_stop(struct seq_file *m, void *v) 225 + { 226 + } 227 + 228 + static int map_seq_show(struct seq_file *m, void *v) 229 + { 230 + struct bpf_map *map = seq_file_to_map(m); 231 + void *key = map_iter(m)->key; 232 + 233 + if (unlikely(v == SEQ_START_TOKEN)) { 234 + seq_puts(m, "# WARNING!! The output is for debug purpose only\n"); 235 + seq_puts(m, "# WARNING!! The output format will change\n"); 236 + } else { 237 + map->ops->map_seq_show_elem(map, key, m); 238 + } 239 + 240 + return 0; 241 + } 242 + 243 + static const struct seq_operations bpffs_map_seq_ops = { 244 + .start = map_seq_start, 245 + .next = map_seq_next, 246 + .show = map_seq_show, 247 + .stop = map_seq_stop, 248 + }; 249 + 250 + static int bpffs_map_open(struct inode *inode, struct file *file) 251 + { 252 + struct bpf_map *map = inode->i_private; 253 + struct map_iter *iter; 254 + struct seq_file *m; 255 + int err; 256 + 257 + iter = map_iter_alloc(map); 258 + if (!iter) 259 + return -ENOMEM; 260 + 261 + err = seq_open(file, &bpffs_map_seq_ops); 262 + if (err) { 263 + map_iter_free(iter); 264 + return err; 265 + } 266 + 267 + m = file->private_data; 268 + m->private = iter; 269 + 270 + return 0; 271 + } 272 + 273 + static int bpffs_map_release(struct inode *inode, struct file *file) 274 + { 275 + struct seq_file *m = file->private_data; 276 + 277 + map_iter_free(map_iter(m)); 278 + 279 + return seq_release(inode, file); 280 + } 281 + 282 + /* bpffs_map_fops should only implement the basic 283 + * read operation for a BPF map. The purpose is to 284 + * provide a simple user intuitive way to do 285 + * "cat bpffs/pathto/a-pinned-map". 286 + * 287 + * Other operations (e.g. write, lookup...) should be realized by 288 + * the userspace tools (e.g. bpftool) through the 289 + * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update 290 + * interface. 291 + */ 292 + static const struct file_operations bpffs_map_fops = { 293 + .open = bpffs_map_open, 294 + .read = seq_read, 295 + .release = bpffs_map_release, 296 + }; 297 + 153 298 static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, 154 - const struct inode_operations *iops) 299 + const struct inode_operations *iops, 300 + const struct file_operations *fops) 155 301 { 156 302 struct inode *dir = dentry->d_parent->d_inode; 157 303 struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); ··· 305 159 return PTR_ERR(inode); 306 160 307 161 inode->i_op = iops; 162 + inode->i_fop = fops; 308 163 inode->i_private = raw; 309 164 310 165 bpf_dentry_finalize(dentry, inode, dir); ··· 314 167 315 168 static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) 316 169 { 317 - return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops); 170 + return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, NULL); 318 171 } 319 172 320 173 static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) 321 174 { 322 - return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops); 175 + struct bpf_map *map = arg; 176 + 177 + return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, 178 + map->btf ? &bpffs_map_fops : NULL); 323 179 } 324 180 325 181 static struct dentry *
+31 -1
kernel/bpf/syscall.c
··· 27 27 #include <linux/cred.h> 28 28 #include <linux/timekeeping.h> 29 29 #include <linux/ctype.h> 30 + #include <linux/btf.h> 30 31 31 32 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ 32 33 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ ··· 252 251 253 252 bpf_map_uncharge_memlock(map); 254 253 security_bpf_map_free(map); 254 + btf_put(map->btf); 255 255 /* implementation dependent freeing */ 256 256 map->ops->map_free(map); 257 257 } ··· 418 416 return 0; 419 417 } 420 418 421 - #define BPF_MAP_CREATE_LAST_FIELD map_ifindex 419 + #define BPF_MAP_CREATE_LAST_FIELD btf_value_id 422 420 /* called via syscall */ 423 421 static int map_create(union bpf_attr *attr) 424 422 { ··· 452 450 atomic_set(&map->refcnt, 1); 453 451 atomic_set(&map->usercnt, 1); 454 452 453 + if (bpf_map_support_seq_show(map) && 454 + (attr->btf_key_id || attr->btf_value_id)) { 455 + struct btf *btf; 456 + 457 + if (!attr->btf_key_id || !attr->btf_value_id) { 458 + err = -EINVAL; 459 + goto free_map_nouncharge; 460 + } 461 + 462 + btf = btf_get_by_fd(attr->btf_fd); 463 + if (IS_ERR(btf)) { 464 + err = PTR_ERR(btf); 465 + goto free_map_nouncharge; 466 + } 467 + 468 + err = map->ops->map_check_btf(map, btf, attr->btf_key_id, 469 + attr->btf_value_id); 470 + if (err) { 471 + btf_put(btf); 472 + goto free_map_nouncharge; 473 + } 474 + 475 + map->btf = btf; 476 + map->btf_key_id = attr->btf_key_id; 477 + map->btf_value_id = attr->btf_value_id; 478 + } 479 + 455 480 err = security_bpf_map_alloc(map); 456 481 if (err) 457 482 goto free_map_nouncharge; ··· 511 482 free_map_sec: 512 483 security_bpf_map_free(map); 513 484 free_map_nouncharge: 485 + btf_put(map->btf); 514 486 map->ops->map_free(map); 515 487 return err; 516 488 }