Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v6.3-rc4 932 lines 23 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* Copyright (c) 2019 Facebook */ 3#include <linux/rculist.h> 4#include <linux/list.h> 5#include <linux/hash.h> 6#include <linux/types.h> 7#include <linux/spinlock.h> 8#include <linux/bpf.h> 9#include <linux/btf.h> 10#include <linux/btf_ids.h> 11#include <linux/bpf_local_storage.h> 12#include <net/bpf_sk_storage.h> 13#include <net/sock.h> 14#include <uapi/linux/sock_diag.h> 15#include <uapi/linux/btf.h> 16#include <linux/rcupdate_trace.h> 17 18DEFINE_BPF_STORAGE_CACHE(sk_cache); 19 20static struct bpf_local_storage_data * 21bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) 22{ 23 struct bpf_local_storage *sk_storage; 24 struct bpf_local_storage_map *smap; 25 26 sk_storage = 27 rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); 28 if (!sk_storage) 29 return NULL; 30 31 smap = (struct bpf_local_storage_map *)map; 32 return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit); 33} 34 35static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) 36{ 37 struct bpf_local_storage_data *sdata; 38 39 sdata = bpf_sk_storage_lookup(sk, map, false); 40 if (!sdata) 41 return -ENOENT; 42 43 bpf_selem_unlink(SELEM(sdata), true); 44 45 return 0; 46} 47 48/* Called by __sk_destruct() & bpf_sk_storage_clone() */ 49void bpf_sk_storage_free(struct sock *sk) 50{ 51 struct bpf_local_storage *sk_storage; 52 bool free_sk_storage = false; 53 54 rcu_read_lock(); 55 sk_storage = rcu_dereference(sk->sk_bpf_storage); 56 if (!sk_storage) { 57 rcu_read_unlock(); 58 return; 59 } 60 61 raw_spin_lock_bh(&sk_storage->lock); 62 free_sk_storage = bpf_local_storage_unlink_nolock(sk_storage); 63 raw_spin_unlock_bh(&sk_storage->lock); 64 rcu_read_unlock(); 65 66 if (free_sk_storage) 67 kfree_rcu(sk_storage, rcu); 68} 69 70static void bpf_sk_storage_map_free(struct bpf_map *map) 71{ 72 bpf_local_storage_map_free(map, &sk_cache, NULL); 73} 74 75static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) 76{ 77 return bpf_local_storage_map_alloc(attr, &sk_cache); 78} 79 80static int notsupp_get_next_key(struct bpf_map *map, void *key, 81 void *next_key) 82{ 83 return -ENOTSUPP; 84} 85 86static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) 87{ 88 struct bpf_local_storage_data *sdata; 89 struct socket *sock; 90 int fd, err; 91 92 fd = *(int *)key; 93 sock = sockfd_lookup(fd, &err); 94 if (sock) { 95 sdata = bpf_sk_storage_lookup(sock->sk, map, true); 96 sockfd_put(sock); 97 return sdata ? sdata->data : NULL; 98 } 99 100 return ERR_PTR(err); 101} 102 103static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, 104 void *value, u64 map_flags) 105{ 106 struct bpf_local_storage_data *sdata; 107 struct socket *sock; 108 int fd, err; 109 110 fd = *(int *)key; 111 sock = sockfd_lookup(fd, &err); 112 if (sock) { 113 sdata = bpf_local_storage_update( 114 sock->sk, (struct bpf_local_storage_map *)map, value, 115 map_flags, GFP_ATOMIC); 116 sockfd_put(sock); 117 return PTR_ERR_OR_ZERO(sdata); 118 } 119 120 return err; 121} 122 123static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) 124{ 125 struct socket *sock; 126 int fd, err; 127 128 fd = *(int *)key; 129 sock = sockfd_lookup(fd, &err); 130 if (sock) { 131 err = bpf_sk_storage_del(sock->sk, map); 132 sockfd_put(sock); 133 return err; 134 } 135 136 return err; 137} 138 139static struct bpf_local_storage_elem * 140bpf_sk_storage_clone_elem(struct sock *newsk, 141 struct bpf_local_storage_map *smap, 142 struct bpf_local_storage_elem *selem) 143{ 144 struct bpf_local_storage_elem *copy_selem; 145 146 copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC); 147 if (!copy_selem) 148 return NULL; 149 150 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) 151 copy_map_value_locked(&smap->map, SDATA(copy_selem)->data, 152 SDATA(selem)->data, true); 153 else 154 copy_map_value(&smap->map, SDATA(copy_selem)->data, 155 SDATA(selem)->data); 156 157 return copy_selem; 158} 159 160int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) 161{ 162 struct bpf_local_storage *new_sk_storage = NULL; 163 struct bpf_local_storage *sk_storage; 164 struct bpf_local_storage_elem *selem; 165 int ret = 0; 166 167 RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); 168 169 rcu_read_lock(); 170 sk_storage = rcu_dereference(sk->sk_bpf_storage); 171 172 if (!sk_storage || hlist_empty(&sk_storage->list)) 173 goto out; 174 175 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 176 struct bpf_local_storage_elem *copy_selem; 177 struct bpf_local_storage_map *smap; 178 struct bpf_map *map; 179 180 smap = rcu_dereference(SDATA(selem)->smap); 181 if (!(smap->map.map_flags & BPF_F_CLONE)) 182 continue; 183 184 /* Note that for lockless listeners adding new element 185 * here can race with cleanup in bpf_local_storage_map_free. 186 * Try to grab map refcnt to make sure that it's still 187 * alive and prevent concurrent removal. 188 */ 189 map = bpf_map_inc_not_zero(&smap->map); 190 if (IS_ERR(map)) 191 continue; 192 193 copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); 194 if (!copy_selem) { 195 ret = -ENOMEM; 196 bpf_map_put(map); 197 goto out; 198 } 199 200 if (new_sk_storage) { 201 bpf_selem_link_map(smap, copy_selem); 202 bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); 203 } else { 204 ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); 205 if (ret) { 206 kfree(copy_selem); 207 atomic_sub(smap->elem_size, 208 &newsk->sk_omem_alloc); 209 bpf_map_put(map); 210 goto out; 211 } 212 213 new_sk_storage = 214 rcu_dereference(copy_selem->local_storage); 215 } 216 bpf_map_put(map); 217 } 218 219out: 220 rcu_read_unlock(); 221 222 /* In case of an error, don't free anything explicitly here, the 223 * caller is responsible to call bpf_sk_storage_free. 224 */ 225 226 return ret; 227} 228 229/* *gfp_flags* is a hidden argument provided by the verifier */ 230BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, 231 void *, value, u64, flags, gfp_t, gfp_flags) 232{ 233 struct bpf_local_storage_data *sdata; 234 235 WARN_ON_ONCE(!bpf_rcu_lock_held()); 236 if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) 237 return (unsigned long)NULL; 238 239 sdata = bpf_sk_storage_lookup(sk, map, true); 240 if (sdata) 241 return (unsigned long)sdata->data; 242 243 if (flags == BPF_SK_STORAGE_GET_F_CREATE && 244 /* Cannot add new elem to a going away sk. 245 * Otherwise, the new elem may become a leak 246 * (and also other memory issues during map 247 * destruction). 248 */ 249 refcount_inc_not_zero(&sk->sk_refcnt)) { 250 sdata = bpf_local_storage_update( 251 sk, (struct bpf_local_storage_map *)map, value, 252 BPF_NOEXIST, gfp_flags); 253 /* sk must be a fullsock (guaranteed by verifier), 254 * so sock_gen_put() is unnecessary. 255 */ 256 sock_put(sk); 257 return IS_ERR(sdata) ? 258 (unsigned long)NULL : (unsigned long)sdata->data; 259 } 260 261 return (unsigned long)NULL; 262} 263 264BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) 265{ 266 WARN_ON_ONCE(!bpf_rcu_lock_held()); 267 if (!sk || !sk_fullsock(sk)) 268 return -EINVAL; 269 270 if (refcount_inc_not_zero(&sk->sk_refcnt)) { 271 int err; 272 273 err = bpf_sk_storage_del(sk, map); 274 sock_put(sk); 275 return err; 276 } 277 278 return -ENOENT; 279} 280 281static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, 282 void *owner, u32 size) 283{ 284 int optmem_max = READ_ONCE(sysctl_optmem_max); 285 struct sock *sk = (struct sock *)owner; 286 287 /* same check as in sock_kmalloc() */ 288 if (size <= optmem_max && 289 atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { 290 atomic_add(size, &sk->sk_omem_alloc); 291 return 0; 292 } 293 294 return -ENOMEM; 295} 296 297static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap, 298 void *owner, u32 size) 299{ 300 struct sock *sk = owner; 301 302 atomic_sub(size, &sk->sk_omem_alloc); 303} 304 305static struct bpf_local_storage __rcu ** 306bpf_sk_storage_ptr(void *owner) 307{ 308 struct sock *sk = owner; 309 310 return &sk->sk_bpf_storage; 311} 312 313const struct bpf_map_ops sk_storage_map_ops = { 314 .map_meta_equal = bpf_map_meta_equal, 315 .map_alloc_check = bpf_local_storage_map_alloc_check, 316 .map_alloc = bpf_sk_storage_map_alloc, 317 .map_free = bpf_sk_storage_map_free, 318 .map_get_next_key = notsupp_get_next_key, 319 .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, 320 .map_update_elem = bpf_fd_sk_storage_update_elem, 321 .map_delete_elem = bpf_fd_sk_storage_delete_elem, 322 .map_check_btf = bpf_local_storage_map_check_btf, 323 .map_btf_id = &bpf_local_storage_map_btf_id[0], 324 .map_local_storage_charge = bpf_sk_storage_charge, 325 .map_local_storage_uncharge = bpf_sk_storage_uncharge, 326 .map_owner_storage_ptr = bpf_sk_storage_ptr, 327}; 328 329const struct bpf_func_proto bpf_sk_storage_get_proto = { 330 .func = bpf_sk_storage_get, 331 .gpl_only = false, 332 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 333 .arg1_type = ARG_CONST_MAP_PTR, 334 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 335 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 336 .arg4_type = ARG_ANYTHING, 337}; 338 339const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { 340 .func = bpf_sk_storage_get, 341 .gpl_only = false, 342 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 343 .arg1_type = ARG_CONST_MAP_PTR, 344 .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ 345 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 346 .arg4_type = ARG_ANYTHING, 347}; 348 349const struct bpf_func_proto bpf_sk_storage_delete_proto = { 350 .func = bpf_sk_storage_delete, 351 .gpl_only = false, 352 .ret_type = RET_INTEGER, 353 .arg1_type = ARG_CONST_MAP_PTR, 354 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 355}; 356 357static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) 358{ 359 const struct btf *btf_vmlinux; 360 const struct btf_type *t; 361 const char *tname; 362 u32 btf_id; 363 364 if (prog->aux->dst_prog) 365 return false; 366 367 /* Ensure the tracing program is not tracing 368 * any bpf_sk_storage*() function and also 369 * use the bpf_sk_storage_(get|delete) helper. 370 */ 371 switch (prog->expected_attach_type) { 372 case BPF_TRACE_ITER: 373 case BPF_TRACE_RAW_TP: 374 /* bpf_sk_storage has no trace point */ 375 return true; 376 case BPF_TRACE_FENTRY: 377 case BPF_TRACE_FEXIT: 378 btf_vmlinux = bpf_get_btf_vmlinux(); 379 if (IS_ERR_OR_NULL(btf_vmlinux)) 380 return false; 381 btf_id = prog->aux->attach_btf_id; 382 t = btf_type_by_id(btf_vmlinux, btf_id); 383 tname = btf_name_by_offset(btf_vmlinux, t->name_off); 384 return !!strncmp(tname, "bpf_sk_storage", 385 strlen("bpf_sk_storage")); 386 default: 387 return false; 388 } 389 390 return false; 391} 392 393/* *gfp_flags* is a hidden argument provided by the verifier */ 394BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, 395 void *, value, u64, flags, gfp_t, gfp_flags) 396{ 397 WARN_ON_ONCE(!bpf_rcu_lock_held()); 398 if (in_hardirq() || in_nmi()) 399 return (unsigned long)NULL; 400 401 return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags, 402 gfp_flags); 403} 404 405BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, 406 struct sock *, sk) 407{ 408 WARN_ON_ONCE(!bpf_rcu_lock_held()); 409 if (in_hardirq() || in_nmi()) 410 return -EPERM; 411 412 return ____bpf_sk_storage_delete(map, sk); 413} 414 415const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { 416 .func = bpf_sk_storage_get_tracing, 417 .gpl_only = false, 418 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 419 .arg1_type = ARG_CONST_MAP_PTR, 420 .arg2_type = ARG_PTR_TO_BTF_ID, 421 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 422 .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, 423 .arg4_type = ARG_ANYTHING, 424 .allowed = bpf_sk_storage_tracing_allowed, 425}; 426 427const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { 428 .func = bpf_sk_storage_delete_tracing, 429 .gpl_only = false, 430 .ret_type = RET_INTEGER, 431 .arg1_type = ARG_CONST_MAP_PTR, 432 .arg2_type = ARG_PTR_TO_BTF_ID, 433 .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 434 .allowed = bpf_sk_storage_tracing_allowed, 435}; 436 437struct bpf_sk_storage_diag { 438 u32 nr_maps; 439 struct bpf_map *maps[]; 440}; 441 442/* The reply will be like: 443 * INET_DIAG_BPF_SK_STORAGES (nla_nest) 444 * SK_DIAG_BPF_STORAGE (nla_nest) 445 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 446 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 447 * SK_DIAG_BPF_STORAGE (nla_nest) 448 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 449 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 450 * .... 451 */ 452static int nla_value_size(u32 value_size) 453{ 454 /* SK_DIAG_BPF_STORAGE (nla_nest) 455 * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) 456 * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) 457 */ 458 return nla_total_size(0) + nla_total_size(sizeof(u32)) + 459 nla_total_size_64bit(value_size); 460} 461 462void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) 463{ 464 u32 i; 465 466 if (!diag) 467 return; 468 469 for (i = 0; i < diag->nr_maps; i++) 470 bpf_map_put(diag->maps[i]); 471 472 kfree(diag); 473} 474EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free); 475 476static bool diag_check_dup(const struct bpf_sk_storage_diag *diag, 477 const struct bpf_map *map) 478{ 479 u32 i; 480 481 for (i = 0; i < diag->nr_maps; i++) { 482 if (diag->maps[i] == map) 483 return true; 484 } 485 486 return false; 487} 488 489struct bpf_sk_storage_diag * 490bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) 491{ 492 struct bpf_sk_storage_diag *diag; 493 struct nlattr *nla; 494 u32 nr_maps = 0; 495 int rem, err; 496 497 /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as 498 * the map_alloc_check() side also does. 499 */ 500 if (!bpf_capable()) 501 return ERR_PTR(-EPERM); 502 503 nla_for_each_nested(nla, nla_stgs, rem) { 504 if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) 505 nr_maps++; 506 } 507 508 diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); 509 if (!diag) 510 return ERR_PTR(-ENOMEM); 511 512 nla_for_each_nested(nla, nla_stgs, rem) { 513 struct bpf_map *map; 514 int map_fd; 515 516 if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD) 517 continue; 518 519 map_fd = nla_get_u32(nla); 520 map = bpf_map_get(map_fd); 521 if (IS_ERR(map)) { 522 err = PTR_ERR(map); 523 goto err_free; 524 } 525 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) { 526 bpf_map_put(map); 527 err = -EINVAL; 528 goto err_free; 529 } 530 if (diag_check_dup(diag, map)) { 531 bpf_map_put(map); 532 err = -EEXIST; 533 goto err_free; 534 } 535 diag->maps[diag->nr_maps++] = map; 536 } 537 538 return diag; 539 540err_free: 541 bpf_sk_storage_diag_free(diag); 542 return ERR_PTR(err); 543} 544EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); 545 546static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) 547{ 548 struct nlattr *nla_stg, *nla_value; 549 struct bpf_local_storage_map *smap; 550 551 /* It cannot exceed max nlattr's payload */ 552 BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); 553 554 nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE); 555 if (!nla_stg) 556 return -EMSGSIZE; 557 558 smap = rcu_dereference(sdata->smap); 559 if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) 560 goto errout; 561 562 nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE, 563 smap->map.value_size, 564 SK_DIAG_BPF_STORAGE_PAD); 565 if (!nla_value) 566 goto errout; 567 568 if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) 569 copy_map_value_locked(&smap->map, nla_data(nla_value), 570 sdata->data, true); 571 else 572 copy_map_value(&smap->map, nla_data(nla_value), sdata->data); 573 574 nla_nest_end(skb, nla_stg); 575 return 0; 576 577errout: 578 nla_nest_cancel(skb, nla_stg); 579 return -EMSGSIZE; 580} 581 582static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, 583 int stg_array_type, 584 unsigned int *res_diag_size) 585{ 586 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 587 unsigned int diag_size = nla_total_size(0); 588 struct bpf_local_storage *sk_storage; 589 struct bpf_local_storage_elem *selem; 590 struct bpf_local_storage_map *smap; 591 struct nlattr *nla_stgs; 592 unsigned int saved_len; 593 int err = 0; 594 595 rcu_read_lock(); 596 597 sk_storage = rcu_dereference(sk->sk_bpf_storage); 598 if (!sk_storage || hlist_empty(&sk_storage->list)) { 599 rcu_read_unlock(); 600 return 0; 601 } 602 603 nla_stgs = nla_nest_start(skb, stg_array_type); 604 if (!nla_stgs) 605 /* Continue to learn diag_size */ 606 err = -EMSGSIZE; 607 608 saved_len = skb->len; 609 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { 610 smap = rcu_dereference(SDATA(selem)->smap); 611 diag_size += nla_value_size(smap->map.value_size); 612 613 if (nla_stgs && diag_get(SDATA(selem), skb)) 614 /* Continue to learn diag_size */ 615 err = -EMSGSIZE; 616 } 617 618 rcu_read_unlock(); 619 620 if (nla_stgs) { 621 if (saved_len == skb->len) 622 nla_nest_cancel(skb, nla_stgs); 623 else 624 nla_nest_end(skb, nla_stgs); 625 } 626 627 if (diag_size == nla_total_size(0)) { 628 *res_diag_size = 0; 629 return 0; 630 } 631 632 *res_diag_size = diag_size; 633 return err; 634} 635 636int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, 637 struct sock *sk, struct sk_buff *skb, 638 int stg_array_type, 639 unsigned int *res_diag_size) 640{ 641 /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ 642 unsigned int diag_size = nla_total_size(0); 643 struct bpf_local_storage *sk_storage; 644 struct bpf_local_storage_data *sdata; 645 struct nlattr *nla_stgs; 646 unsigned int saved_len; 647 int err = 0; 648 u32 i; 649 650 *res_diag_size = 0; 651 652 /* No map has been specified. Dump all. */ 653 if (!diag->nr_maps) 654 return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type, 655 res_diag_size); 656 657 rcu_read_lock(); 658 sk_storage = rcu_dereference(sk->sk_bpf_storage); 659 if (!sk_storage || hlist_empty(&sk_storage->list)) { 660 rcu_read_unlock(); 661 return 0; 662 } 663 664 nla_stgs = nla_nest_start(skb, stg_array_type); 665 if (!nla_stgs) 666 /* Continue to learn diag_size */ 667 err = -EMSGSIZE; 668 669 saved_len = skb->len; 670 for (i = 0; i < diag->nr_maps; i++) { 671 sdata = bpf_local_storage_lookup(sk_storage, 672 (struct bpf_local_storage_map *)diag->maps[i], 673 false); 674 675 if (!sdata) 676 continue; 677 678 diag_size += nla_value_size(diag->maps[i]->value_size); 679 680 if (nla_stgs && diag_get(sdata, skb)) 681 /* Continue to learn diag_size */ 682 err = -EMSGSIZE; 683 } 684 rcu_read_unlock(); 685 686 if (nla_stgs) { 687 if (saved_len == skb->len) 688 nla_nest_cancel(skb, nla_stgs); 689 else 690 nla_nest_end(skb, nla_stgs); 691 } 692 693 if (diag_size == nla_total_size(0)) { 694 *res_diag_size = 0; 695 return 0; 696 } 697 698 *res_diag_size = diag_size; 699 return err; 700} 701EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); 702 703struct bpf_iter_seq_sk_storage_map_info { 704 struct bpf_map *map; 705 unsigned int bucket_id; 706 unsigned skip_elems; 707}; 708 709static struct bpf_local_storage_elem * 710bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, 711 struct bpf_local_storage_elem *prev_selem) 712 __acquires(RCU) __releases(RCU) 713{ 714 struct bpf_local_storage *sk_storage; 715 struct bpf_local_storage_elem *selem; 716 u32 skip_elems = info->skip_elems; 717 struct bpf_local_storage_map *smap; 718 u32 bucket_id = info->bucket_id; 719 u32 i, count, n_buckets; 720 struct bpf_local_storage_map_bucket *b; 721 722 smap = (struct bpf_local_storage_map *)info->map; 723 n_buckets = 1U << smap->bucket_log; 724 if (bucket_id >= n_buckets) 725 return NULL; 726 727 /* try to find next selem in the same bucket */ 728 selem = prev_selem; 729 count = 0; 730 while (selem) { 731 selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)), 732 struct bpf_local_storage_elem, map_node); 733 if (!selem) { 734 /* not found, unlock and go to the next bucket */ 735 b = &smap->buckets[bucket_id++]; 736 rcu_read_unlock(); 737 skip_elems = 0; 738 break; 739 } 740 sk_storage = rcu_dereference(selem->local_storage); 741 if (sk_storage) { 742 info->skip_elems = skip_elems + count; 743 return selem; 744 } 745 count++; 746 } 747 748 for (i = bucket_id; i < (1U << smap->bucket_log); i++) { 749 b = &smap->buckets[i]; 750 rcu_read_lock(); 751 count = 0; 752 hlist_for_each_entry_rcu(selem, &b->list, map_node) { 753 sk_storage = rcu_dereference(selem->local_storage); 754 if (sk_storage && count >= skip_elems) { 755 info->bucket_id = i; 756 info->skip_elems = count; 757 return selem; 758 } 759 count++; 760 } 761 rcu_read_unlock(); 762 skip_elems = 0; 763 } 764 765 info->bucket_id = i; 766 info->skip_elems = 0; 767 return NULL; 768} 769 770static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) 771{ 772 struct bpf_local_storage_elem *selem; 773 774 selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); 775 if (!selem) 776 return NULL; 777 778 if (*pos == 0) 779 ++*pos; 780 return selem; 781} 782 783static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, 784 loff_t *pos) 785{ 786 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 787 788 ++*pos; 789 ++info->skip_elems; 790 return bpf_sk_storage_map_seq_find_next(seq->private, v); 791} 792 793struct bpf_iter__bpf_sk_storage_map { 794 __bpf_md_ptr(struct bpf_iter_meta *, meta); 795 __bpf_md_ptr(struct bpf_map *, map); 796 __bpf_md_ptr(struct sock *, sk); 797 __bpf_md_ptr(void *, value); 798}; 799 800DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, 801 struct bpf_map *map, struct sock *sk, 802 void *value) 803 804static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, 805 struct bpf_local_storage_elem *selem) 806{ 807 struct bpf_iter_seq_sk_storage_map_info *info = seq->private; 808 struct bpf_iter__bpf_sk_storage_map ctx = {}; 809 struct bpf_local_storage *sk_storage; 810 struct bpf_iter_meta meta; 811 struct bpf_prog *prog; 812 int ret = 0; 813 814 meta.seq = seq; 815 prog = bpf_iter_get_info(&meta, selem == NULL); 816 if (prog) { 817 ctx.meta = &meta; 818 ctx.map = info->map; 819 if (selem) { 820 sk_storage = rcu_dereference(selem->local_storage); 821 ctx.sk = sk_storage->owner; 822 ctx.value = SDATA(selem)->data; 823 } 824 ret = bpf_iter_run_prog(prog, &ctx); 825 } 826 827 return ret; 828} 829 830static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) 831{ 832 return __bpf_sk_storage_map_seq_show(seq, v); 833} 834 835static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) 836 __releases(RCU) 837{ 838 if (!v) 839 (void)__bpf_sk_storage_map_seq_show(seq, v); 840 else 841 rcu_read_unlock(); 842} 843 844static int bpf_iter_init_sk_storage_map(void *priv_data, 845 struct bpf_iter_aux_info *aux) 846{ 847 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 848 849 bpf_map_inc_with_uref(aux->map); 850 seq_info->map = aux->map; 851 return 0; 852} 853 854static void bpf_iter_fini_sk_storage_map(void *priv_data) 855{ 856 struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; 857 858 bpf_map_put_with_uref(seq_info->map); 859} 860 861static int bpf_iter_attach_map(struct bpf_prog *prog, 862 union bpf_iter_link_info *linfo, 863 struct bpf_iter_aux_info *aux) 864{ 865 struct bpf_map *map; 866 int err = -EINVAL; 867 868 if (!linfo->map.map_fd) 869 return -EBADF; 870 871 map = bpf_map_get_with_uref(linfo->map.map_fd); 872 if (IS_ERR(map)) 873 return PTR_ERR(map); 874 875 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) 876 goto put_map; 877 878 if (prog->aux->max_rdwr_access > map->value_size) { 879 err = -EACCES; 880 goto put_map; 881 } 882 883 aux->map = map; 884 return 0; 885 886put_map: 887 bpf_map_put_with_uref(map); 888 return err; 889} 890 891static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) 892{ 893 bpf_map_put_with_uref(aux->map); 894} 895 896static const struct seq_operations bpf_sk_storage_map_seq_ops = { 897 .start = bpf_sk_storage_map_seq_start, 898 .next = bpf_sk_storage_map_seq_next, 899 .stop = bpf_sk_storage_map_seq_stop, 900 .show = bpf_sk_storage_map_seq_show, 901}; 902 903static const struct bpf_iter_seq_info iter_seq_info = { 904 .seq_ops = &bpf_sk_storage_map_seq_ops, 905 .init_seq_private = bpf_iter_init_sk_storage_map, 906 .fini_seq_private = bpf_iter_fini_sk_storage_map, 907 .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), 908}; 909 910static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { 911 .target = "bpf_sk_storage_map", 912 .attach_target = bpf_iter_attach_map, 913 .detach_target = bpf_iter_detach_map, 914 .show_fdinfo = bpf_iter_map_show_fdinfo, 915 .fill_link_info = bpf_iter_map_fill_link_info, 916 .ctx_arg_info_size = 2, 917 .ctx_arg_info = { 918 { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), 919 PTR_TO_BTF_ID_OR_NULL }, 920 { offsetof(struct bpf_iter__bpf_sk_storage_map, value), 921 PTR_TO_BUF | PTR_MAYBE_NULL }, 922 }, 923 .seq_info = &iter_seq_info, 924}; 925 926static int __init bpf_sk_storage_map_iter_init(void) 927{ 928 bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = 929 btf_sock_ids[BTF_SOCK_TYPE_SOCK]; 930 return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); 931} 932late_initcall(bpf_sk_storage_map_iter_init);