Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: Use bpf_mem_cache_alloc/free for bpf_local_storage

This patch uses bpf_mem_cache_alloc/free for allocating and freeing
bpf_local_storage for task and cgroup storage.

The changes are similar to the previous patch. A few things that
worth to mention for bpf_local_storage:

The local_storage is freed when the last selem is deleted.
Before deleting a selem from local_storage, it needs to retrieve the
local_storage->smap because the bpf_selem_unlink_storage_nolock()
may have set it to NULL. Note that local_storage->smap may have
already been NULL when the selem created this local_storage has
been removed. In this case, call_rcu will be used to free the
local_storage.
Also, the bpf_ma (true or false) value is needed before calling
bpf_local_storage_free(). The bpf_ma can either be obtained from
the local_storage->smap (if available) or any of its selem's smap.
A new helper check_storage_bpf_ma() is added to obtain
bpf_ma for a deleting bpf_local_storage.

When bpf_local_storage_alloc getting a reused memory, all
fields are either in the correct values or will be initialized.
'cache[]' must already be all NULLs. 'list' must be empty.
Others will be initialized.

Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20230322215246.1675516-4-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Martin KaFai Lau and committed by
Alexei Starovoitov
6ae9d5e9 08a7ce38

+117 -16
+1
include/linux/bpf_local_storage.h
··· 57 57 u16 elem_size; 58 58 u16 cache_idx; 59 59 struct bpf_mem_alloc selem_ma; 60 + struct bpf_mem_alloc storage_ma; 60 61 bool bpf_ma; 61 62 }; 62 63
+116 -16
kernel/bpf/bpf_local_storage.c
··· 111 111 return NULL; 112 112 } 113 113 114 + /* rcu tasks trace callback for bpf_ma == false */ 115 + static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) 116 + { 117 + struct bpf_local_storage *local_storage; 118 + 119 + /* If RCU Tasks Trace grace period implies RCU grace period, do 120 + * kfree(), else do kfree_rcu(). 121 + */ 122 + local_storage = container_of(rcu, struct bpf_local_storage, rcu); 123 + if (rcu_trace_implies_rcu_gp()) 124 + kfree(local_storage); 125 + else 126 + kfree_rcu(local_storage, rcu); 127 + } 128 + 114 129 static void bpf_local_storage_free_rcu(struct rcu_head *rcu) 115 130 { 116 131 struct bpf_local_storage *local_storage; 117 132 118 133 local_storage = container_of(rcu, struct bpf_local_storage, rcu); 119 - kfree(local_storage); 134 + bpf_mem_cache_raw_free(local_storage); 120 135 } 121 136 122 137 static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) 123 138 { 124 - /* If RCU Tasks Trace grace period implies RCU grace period, do 125 - * kfree(), else do kfree_rcu(). 126 - */ 127 139 if (rcu_trace_implies_rcu_gp()) 128 140 bpf_local_storage_free_rcu(rcu); 129 141 else 130 142 call_rcu(rcu, bpf_local_storage_free_rcu); 131 143 } 132 144 133 - static void bpf_local_storage_free(struct bpf_local_storage *local_storage, 134 - bool reuse_now) 145 + /* Handle bpf_ma == false */ 146 + static void __bpf_local_storage_free(struct bpf_local_storage *local_storage, 147 + bool vanilla_rcu) 135 148 { 136 - if (!reuse_now) 149 + if (vanilla_rcu) 150 + kfree_rcu(local_storage, rcu); 151 + else 152 + call_rcu_tasks_trace(&local_storage->rcu, 153 + __bpf_local_storage_free_trace_rcu); 154 + } 155 + 156 + static void bpf_local_storage_free(struct bpf_local_storage *local_storage, 157 + struct bpf_local_storage_map *smap, 158 + bool bpf_ma, bool reuse_now) 159 + { 160 + if (!bpf_ma) { 161 + __bpf_local_storage_free(local_storage, reuse_now); 162 + return; 163 + } 164 + 165 + if (!reuse_now) { 137 166 call_rcu_tasks_trace(&local_storage->rcu, 138 167 bpf_local_storage_free_trace_rcu); 139 - else 168 + return; 169 + } 170 + 171 + if (smap) { 172 + migrate_disable(); 173 + bpf_mem_cache_free(&smap->storage_ma, local_storage); 174 + migrate_enable(); 175 + } else { 176 + /* smap could be NULL if the selem that triggered 177 + * this 'local_storage' creation had been long gone. 178 + * In this case, directly do call_rcu(). 179 + */ 140 180 call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); 181 + } 141 182 } 142 183 143 184 /* rcu tasks trace callback for bpf_ma == false */ ··· 301 260 return free_local_storage; 302 261 } 303 262 263 + static bool check_storage_bpf_ma(struct bpf_local_storage *local_storage, 264 + struct bpf_local_storage_map *storage_smap, 265 + struct bpf_local_storage_elem *selem) 266 + { 267 + 268 + struct bpf_local_storage_map *selem_smap; 269 + 270 + /* local_storage->smap may be NULL. If it is, get the bpf_ma 271 + * from any selem in the local_storage->list. The bpf_ma of all 272 + * local_storage and selem should have the same value 273 + * for the same map type. 274 + * 275 + * If the local_storage->list is already empty, the caller will not 276 + * care about the bpf_ma value also because the caller is not 277 + * responsibile to free the local_storage. 278 + */ 279 + 280 + if (storage_smap) 281 + return storage_smap->bpf_ma; 282 + 283 + if (!selem) { 284 + struct hlist_node *n; 285 + 286 + n = rcu_dereference_check(hlist_first_rcu(&local_storage->list), 287 + bpf_rcu_lock_held()); 288 + if (!n) 289 + return false; 290 + 291 + selem = hlist_entry(n, struct bpf_local_storage_elem, snode); 292 + } 293 + selem_smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 294 + 295 + return selem_smap->bpf_ma; 296 + } 297 + 304 298 static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, 305 299 bool reuse_now) 306 300 { 301 + struct bpf_local_storage_map *storage_smap; 307 302 struct bpf_local_storage *local_storage; 308 - bool free_local_storage = false; 303 + bool bpf_ma, free_local_storage = false; 309 304 unsigned long flags; 310 305 311 306 if (unlikely(!selem_linked_to_storage_lockless(selem))) ··· 350 273 351 274 local_storage = rcu_dereference_check(selem->local_storage, 352 275 bpf_rcu_lock_held()); 276 + storage_smap = rcu_dereference_check(local_storage->smap, 277 + bpf_rcu_lock_held()); 278 + bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem); 279 + 353 280 raw_spin_lock_irqsave(&local_storage->lock, flags); 354 281 if (likely(selem_linked_to_storage(selem))) 355 282 free_local_storage = bpf_selem_unlink_storage_nolock( ··· 361 280 raw_spin_unlock_irqrestore(&local_storage->lock, flags); 362 281 363 282 if (free_local_storage) 364 - bpf_local_storage_free(local_storage, reuse_now); 283 + bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now); 365 284 } 366 285 367 286 void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, ··· 481 400 if (err) 482 401 return err; 483 402 484 - storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), 485 - gfp_flags | __GFP_NOWARN); 403 + if (smap->bpf_ma) { 404 + migrate_disable(); 405 + storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags); 406 + migrate_enable(); 407 + } else { 408 + storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), 409 + gfp_flags | __GFP_NOWARN); 410 + } 411 + 486 412 if (!storage) { 487 413 err = -ENOMEM; 488 414 goto uncharge; ··· 535 447 return 0; 536 448 537 449 uncharge: 538 - bpf_local_storage_free(storage, true); 450 + bpf_local_storage_free(storage, smap, smap->bpf_ma, true); 539 451 mem_uncharge(smap, owner, sizeof(*storage)); 540 452 return err; 541 453 } ··· 748 660 749 661 void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) 750 662 { 663 + struct bpf_local_storage_map *storage_smap; 751 664 struct bpf_local_storage_elem *selem; 752 - bool free_storage = false; 665 + bool bpf_ma, free_storage = false; 753 666 struct hlist_node *n; 754 667 unsigned long flags; 668 + 669 + storage_smap = rcu_dereference_check(local_storage->smap, bpf_rcu_lock_held()); 670 + bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, NULL); 755 671 756 672 /* Neither the bpf_prog nor the bpf_map's syscall 757 673 * could be modifying the local_storage->list now. ··· 784 692 raw_spin_unlock_irqrestore(&local_storage->lock, flags); 785 693 786 694 if (free_storage) 787 - bpf_local_storage_free(local_storage, true); 695 + bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true); 788 696 } 789 697 790 698 u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) ··· 847 755 err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false); 848 756 if (err) 849 757 goto free_smap; 758 + 759 + err = bpf_mem_alloc_init(&smap->storage_ma, sizeof(struct bpf_local_storage), false); 760 + if (err) { 761 + bpf_mem_alloc_destroy(&smap->selem_ma); 762 + goto free_smap; 763 + } 850 764 } 851 765 852 766 smap->cache_idx = bpf_local_storage_cache_idx_get(cache); ··· 927 829 */ 928 830 synchronize_rcu(); 929 831 930 - if (smap->bpf_ma) 832 + if (smap->bpf_ma) { 931 833 bpf_mem_alloc_destroy(&smap->selem_ma); 834 + bpf_mem_alloc_destroy(&smap->storage_ma); 835 + } 932 836 kvfree(smap->buckets); 933 837 bpf_map_area_free(smap); 934 838 }