Merge branch 'net-mlx5-hw-counters-refactor'

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Tariq Toukan says:

====================
net/mlx5: hw counters refactor

This is a patchset re-post, see:
https://lore.kernel.org/20240815054656.2210494-7-tariqt@nvidia.com

In this patchset, Cosmin refactors hw counters and solves perf scaling
issue.

Series generated against:
commit c824deb1a897 ("cxgb4: clip_tbl: Fix spelling mistake "wont" -> "won't"")

HW counters are central to mlx5 driver operations. They are hardware
objects created and used alongside most steering operations, and queried
from a variety of places. Most counters are queried in bulk from a
periodic task in fs_counters.c.

Counter performance is important and as such, a variety of improvements
have been done over the years. Currently, counters are allocated from
pools, which are bulk allocated to amortize the cost of firmware
commands. Counters are managed through an IDR, a doubly linked list and
two atomic single linked lists. Adding/removing counters is a complex
dance between user contexts requesting it and the mlx5_fc_stats_work
task which does most of the work.

Under high load (e.g. from connection tracking flow insertion/deletion),
the counter code becomes a bottleneck, as seen on flame graphs. Whenever
a counter is deleted, it gets added to a list and the wq task is
scheduled to run immediately to actually delete it. This is done via
mod_delayed_work which uses an internal spinlock. In some tests, waiting
for this spinlock took up to 66% of all samples.

This series refactors the counter code to use a more straight-forward
approach, avoiding the mod_delayed_work problem and making the code
easier to understand. For that:

- patch #1 moves counters data structs to a more appropriate place.
- patch #2 simplifies the bulk query allocation scheme by using vmalloc.
- patch #3 replaces the IDR+3 lists with an xarray. This is the main
patch of the series, solving the spinlock congestion issue.
- patch #4 removes an unnecessary cacheline alignment causing a lot of
memory to be wasted.
- patches #5 and #6 are small cleanups enabled by the refactoring.
====================

Link: https://patch.msgid.link/20241001103709.58127-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Jakub Kicinski 2 years ago 34ea1df8 c55ff46a

+150 -281

4 changed files

expand all

drivers

net

ethernet

mellanox

mlx5

core

tc_ct.c

fs_counters.c

include

linux

mlx5

driver.h

fs.h

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c

··· 1026 1026 return ERR_PTR(-ENOMEM); 1027 1027 1028 1028 counter->is_shared = false; 1029 - counter->counter = mlx5_fc_create_ex(ct_priv->dev, true); 1029 + counter->counter = mlx5_fc_create(ct_priv->dev, true); 1030 1030 if (IS_ERR(counter->counter)) { 1031 1031 ct_dbg("Failed to create counter for ct entry"); 1032 1032 ret = PTR_ERR(counter->counter);

+148 -245

drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c

··· 32 32 33 33 #include <linux/mlx5/driver.h> 34 34 #include <linux/mlx5/fs.h> 35 - #include <linux/rbtree.h> 36 35 #include "mlx5_core.h" 37 36 #include "fs_core.h" 38 37 #include "fs_cmd.h" 39 38 40 39 #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) 41 - #define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000) 42 40 /* Max number of counters to query in bulk read is 32K */ 43 41 #define MLX5_SW_MAX_COUNTERS_BULK BIT(15) 44 42 #define MLX5_INIT_COUNTERS_BULK 8 ··· 50 52 }; 51 53 52 54 struct mlx5_fc { 53 - struct list_head list; 54 - struct llist_node addlist; 55 - struct llist_node dellist; 56 - 57 - /* last{packets,bytes} members are used when calculating the delta since 58 - * last reading 59 - */ 60 - u64 lastpackets; 61 - u64 lastbytes; 62 - 63 - struct mlx5_fc_bulk *bulk; 64 55 u32 id; 65 56 bool aging; 57 + struct mlx5_fc_bulk *bulk; 58 + struct mlx5_fc_cache cache; 59 + /* last{packets,bytes} are used for calculating deltas since last reading. */ 60 + u64 lastpackets; 61 + u64 lastbytes; 62 + }; 66 63 67 - struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; 64 + struct mlx5_fc_pool { 65 + struct mlx5_core_dev *dev; 66 + struct mutex pool_lock; /* protects pool lists */ 67 + struct list_head fully_used; 68 + struct list_head partially_used; 69 + struct list_head unused; 70 + int available_fcs; 71 + int used_fcs; 72 + int threshold; 73 + }; 74 + 75 + struct mlx5_fc_stats { 76 + struct xarray counters; 77 + 78 + struct workqueue_struct *wq; 79 + struct delayed_work work; 80 + unsigned long sampling_interval; /* jiffies */ 81 + u32 *bulk_query_out; 82 + int bulk_query_len; 83 + bool bulk_query_alloc_failed; 84 + unsigned long next_bulk_query_alloc; 85 + struct mlx5_fc_pool fc_pool; 68 86 }; 69 87 70 88 static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev); 71 89 static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool); 72 90 static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool); 73 91 static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc); 74 - 75 - /* locking scheme: 76 - * 77 - * It is the responsibility of the user to prevent concurrent calls or bad 78 - * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference 79 - * to struct mlx5_fc. 80 - * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a 81 - * dump (access to struct mlx5_fc) after a counter is destroyed. 82 - * 83 - * access to counter list: 84 - * - create (user context) 85 - * - mlx5_fc_create() only adds to an addlist to be used by 86 - * mlx5_fc_stats_work(). addlist is a lockless single linked list 87 - * that doesn't require any additional synchronization when adding single 88 - * node. 89 - * - spawn thread to do the actual destroy 90 - * 91 - * - destroy (user context) 92 - * - add a counter to lockless dellist 93 - * - spawn thread to do the actual del 94 - * 95 - * - dump (user context) 96 - * user should not call dump after destroy 97 - * 98 - * - query (single thread workqueue context) 99 - * destroy/dump - no conflict (see destroy) 100 - * query/dump - packets and bytes might be inconsistent (since update is not 101 - * atomic) 102 - * query/create - no conflict (see create) 103 - * since every create/destroy spawn the work, only after necessary time has 104 - * elapsed, the thread will actually query the hardware. 105 - */ 106 - 107 - static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev, 108 - u32 id) 109 - { 110 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 111 - unsigned long next_id = (unsigned long)id + 1; 112 - struct mlx5_fc *counter; 113 - unsigned long tmp; 114 - 115 - rcu_read_lock(); 116 - /* skip counters that are in idr, but not yet in counters list */ 117 - idr_for_each_entry_continue_ul(&fc_stats->counters_idr, 118 - counter, tmp, next_id) { 119 - if (!list_empty(&counter->list)) 120 - break; 121 - } 122 - rcu_read_unlock(); 123 - 124 - return counter ? &counter->list : &fc_stats->counters; 125 - } 126 - 127 - static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev, 128 - struct mlx5_fc *counter) 129 - { 130 - struct list_head *next = mlx5_fc_counters_lookup_next(dev, counter->id); 131 - 132 - list_add_tail(&counter->list, next); 133 - } 134 - 135 - static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev, 136 - struct mlx5_fc *counter) 137 - { 138 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 139 - 140 - list_del(&counter->list); 141 - 142 - spin_lock(&fc_stats->counters_idr_lock); 143 - WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id)); 144 - spin_unlock(&fc_stats->counters_idr_lock); 145 - } 146 92 147 93 static int get_init_bulk_query_len(struct mlx5_core_dev *dev) 148 94 { ··· 116 174 cache->lastuse = jiffies; 117 175 } 118 176 119 - static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, 120 - struct mlx5_fc *first, 121 - u32 last_id) 177 + /* Synchronization notes 178 + * 179 + * Access to counter array: 180 + * - create - mlx5_fc_create() (user context) 181 + * - inserts the counter into the xarray. 182 + * 183 + * - destroy - mlx5_fc_destroy() (user context) 184 + * - erases the counter from the xarray and releases it. 185 + * 186 + * - query mlx5_fc_query(), mlx5_fc_query_cached{,_raw}() (user context) 187 + * - user should not access a counter after destroy. 188 + * 189 + * - bulk query (single thread workqueue context) 190 + * - create: query relies on 'lastuse' to avoid updating counters added 191 + * around the same time as the current bulk cmd. 192 + * - destroy: destroyed counters will not be accessed, even if they are 193 + * destroyed during a bulk query command. 194 + */ 195 + static void mlx5_fc_stats_query_all_counters(struct mlx5_core_dev *dev) 122 196 { 123 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 124 - bool query_more_counters = (first->id <= last_id); 125 - int cur_bulk_len = fc_stats->bulk_query_len; 197 + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; 198 + u32 bulk_len = fc_stats->bulk_query_len; 199 + XA_STATE(xas, &fc_stats->counters, 0); 126 200 u32 *data = fc_stats->bulk_query_out; 127 - struct mlx5_fc *counter = first; 201 + struct mlx5_fc *counter; 202 + u32 last_bulk_id = 0; 203 + u64 bulk_query_time; 128 204 u32 bulk_base_id; 129 - int bulk_len; 130 205 int err; 131 206 132 - while (query_more_counters) { 133 - /* first id must be aligned to 4 when using bulk query */ 134 - bulk_base_id = counter->id & ~0x3; 135 - 136 - /* number of counters to query inc. the last counter */ 137 - bulk_len = min_t(int, cur_bulk_len, 138 - ALIGN(last_id - bulk_base_id + 1, 4)); 139 - 140 - err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len, 141 - data); 142 - if (err) { 143 - mlx5_core_err(dev, "Error doing bulk query: %d\n", err); 144 - return; 145 - } 146 - query_more_counters = false; 147 - 148 - list_for_each_entry_from(counter, &fc_stats->counters, list) { 149 - int counter_index = counter->id - bulk_base_id; 150 - struct mlx5_fc_cache *cache = &counter->cache; 151 - 152 - if (counter->id >= bulk_base_id + bulk_len) { 153 - query_more_counters = true; 154 - break; 207 + xas_lock(&xas); 208 + xas_for_each(&xas, counter, U32_MAX) { 209 + if (xas_retry(&xas, counter)) 210 + continue; 211 + if (unlikely(counter->id >= last_bulk_id)) { 212 + /* Start new bulk query. */ 213 + /* First id must be aligned to 4 when using bulk query. */ 214 + bulk_base_id = counter->id & ~0x3; 215 + last_bulk_id = bulk_base_id + bulk_len; 216 + /* The lock is released while querying the hw and reacquired after. */ 217 + xas_unlock(&xas); 218 + /* The same id needs to be processed again in the next loop iteration. */ 219 + xas_reset(&xas); 220 + bulk_query_time = jiffies; 221 + err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len, data); 222 + if (err) { 223 + mlx5_core_err(dev, "Error doing bulk query: %d\n", err); 224 + return; 155 225 } 156 - 157 - update_counter_cache(counter_index, data, cache); 226 + xas_lock(&xas); 227 + continue; 158 228 } 229 + /* Do not update counters added after bulk query was started. */ 230 + if (time_after64(bulk_query_time, counter->cache.lastuse)) 231 + update_counter_cache(counter->id - bulk_base_id, data, 232 + &counter->cache); 159 233 } 234 + xas_unlock(&xas); 160 235 } 161 236 162 237 static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter) ··· 184 225 185 226 static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) 186 227 { 187 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 228 + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; 188 229 189 230 if (counter->bulk) 190 231 mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter); ··· 192 233 mlx5_fc_free(dev, counter); 193 234 } 194 235 195 - static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev) 236 + static void mlx5_fc_stats_bulk_query_buf_realloc(struct mlx5_core_dev *dev, 237 + int bulk_query_len) 196 238 { 197 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 198 - int max_bulk_len = get_max_bulk_query_len(dev); 199 - unsigned long now = jiffies; 239 + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; 200 240 u32 *bulk_query_out_tmp; 201 - int max_out_len; 241 + int out_len; 202 242 203 - if (fc_stats->bulk_query_alloc_failed && 204 - time_before(now, fc_stats->next_bulk_query_alloc)) 205 - return; 206 - 207 - max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len); 208 - bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL); 243 + out_len = mlx5_cmd_fc_get_bulk_query_out_len(bulk_query_len); 244 + bulk_query_out_tmp = kvzalloc(out_len, GFP_KERNEL); 209 245 if (!bulk_query_out_tmp) { 210 246 mlx5_core_warn_once(dev, 211 - "Can't increase flow counters bulk query buffer size, insufficient memory, bulk_size(%d)\n", 212 - max_bulk_len); 213 - fc_stats->bulk_query_alloc_failed = true; 214 - fc_stats->next_bulk_query_alloc = 215 - now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD; 247 + "Can't increase flow counters bulk query buffer size, alloc failed, bulk_query_len(%d)\n", 248 + bulk_query_len); 216 249 return; 217 250 } 218 251 219 - kfree(fc_stats->bulk_query_out); 252 + kvfree(fc_stats->bulk_query_out); 220 253 fc_stats->bulk_query_out = bulk_query_out_tmp; 221 - fc_stats->bulk_query_len = max_bulk_len; 222 - if (fc_stats->bulk_query_alloc_failed) { 223 - mlx5_core_info(dev, 224 - "Flow counters bulk query buffer size increased, bulk_size(%d)\n", 225 - max_bulk_len); 226 - fc_stats->bulk_query_alloc_failed = false; 227 - } 254 + fc_stats->bulk_query_len = bulk_query_len; 255 + mlx5_core_info(dev, 256 + "Flow counters bulk query buffer size increased, bulk_query_len(%d)\n", 257 + bulk_query_len); 258 + } 259 + 260 + static int mlx5_fc_num_counters(struct mlx5_fc_stats *fc_stats) 261 + { 262 + struct mlx5_fc *counter; 263 + int num_counters = 0; 264 + unsigned long id; 265 + 266 + xa_for_each(&fc_stats->counters, id, counter) 267 + num_counters++; 268 + return num_counters; 228 269 } 229 270 230 271 static void mlx5_fc_stats_work(struct work_struct *work) 231 272 { 232 - struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, 233 - priv.fc_stats.work.work); 234 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 235 - /* Take dellist first to ensure that counters cannot be deleted before 236 - * they are inserted. 237 - */ 238 - struct llist_node *dellist = llist_del_all(&fc_stats->dellist); 239 - struct llist_node *addlist = llist_del_all(&fc_stats->addlist); 240 - struct mlx5_fc *counter = NULL, *last = NULL, *tmp; 241 - unsigned long now = jiffies; 273 + struct mlx5_fc_stats *fc_stats = container_of(work, struct mlx5_fc_stats, 274 + work.work); 275 + struct mlx5_core_dev *dev = fc_stats->fc_pool.dev; 242 276 243 - if (addlist || !list_empty(&fc_stats->counters)) 244 - queue_delayed_work(fc_stats->wq, &fc_stats->work, 245 - fc_stats->sampling_interval); 277 + queue_delayed_work(fc_stats->wq, &fc_stats->work, fc_stats->sampling_interval); 246 278 247 - llist_for_each_entry(counter, addlist, addlist) { 248 - mlx5_fc_stats_insert(dev, counter); 249 - fc_stats->num_counters++; 250 - } 279 + /* Grow the bulk query buffer to max if not maxed and enough counters are present. */ 280 + if (unlikely(fc_stats->bulk_query_len < get_max_bulk_query_len(dev) && 281 + mlx5_fc_num_counters(fc_stats) > get_init_bulk_query_len(dev))) 282 + mlx5_fc_stats_bulk_query_buf_realloc(dev, get_max_bulk_query_len(dev)); 251 283 252 - llist_for_each_entry_safe(counter, tmp, dellist, dellist) { 253 - mlx5_fc_stats_remove(dev, counter); 254 - 255 - mlx5_fc_release(dev, counter); 256 - fc_stats->num_counters--; 257 - } 258 - 259 - if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) && 260 - fc_stats->num_counters > get_init_bulk_query_len(dev)) 261 - mlx5_fc_stats_bulk_query_size_increase(dev); 262 - 263 - if (time_before(now, fc_stats->next_query) || 264 - list_empty(&fc_stats->counters)) 265 - return; 266 - last = list_last_entry(&fc_stats->counters, struct mlx5_fc, list); 267 - 268 - counter = list_first_entry(&fc_stats->counters, struct mlx5_fc, 269 - list); 270 - if (counter) 271 - mlx5_fc_stats_query_counter_range(dev, counter, last->id); 272 - 273 - fc_stats->next_query = now + fc_stats->sampling_interval; 284 + mlx5_fc_stats_query_all_counters(dev); 274 285 } 275 286 276 287 static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev) ··· 263 334 264 335 static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging) 265 336 { 266 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 337 + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; 267 338 struct mlx5_fc *counter; 268 339 269 340 if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) { ··· 275 346 return mlx5_fc_single_alloc(dev); 276 347 } 277 348 278 - struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging) 349 + struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) 279 350 { 280 351 struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging); 281 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 352 + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; 282 353 int err; 283 354 284 355 if (IS_ERR(counter)) 285 356 return counter; 286 357 287 - INIT_LIST_HEAD(&counter->list); 288 358 counter->aging = aging; 289 359 290 360 if (aging) { ··· 293 365 counter->lastbytes = counter->cache.bytes; 294 366 counter->lastpackets = counter->cache.packets; 295 367 296 - idr_preload(GFP_KERNEL); 297 - spin_lock(&fc_stats->counters_idr_lock); 298 - 299 - err = idr_alloc_u32(&fc_stats->counters_idr, counter, &id, id, 300 - GFP_NOWAIT); 301 - 302 - spin_unlock(&fc_stats->counters_idr_lock); 303 - idr_preload_end(); 304 - if (err) 368 + err = xa_err(xa_store(&fc_stats->counters, id, counter, GFP_KERNEL)); 369 + if (err != 0) 305 370 goto err_out_alloc; 306 - 307 - llist_add(&counter->addlist, &fc_stats->addlist); 308 371 } 309 372 310 373 return counter; ··· 303 384 err_out_alloc: 304 385 mlx5_fc_release(dev, counter); 305 386 return ERR_PTR(err); 306 - } 307 - 308 - struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) 309 - { 310 - struct mlx5_fc *counter = mlx5_fc_create_ex(dev, aging); 311 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 312 - 313 - if (aging) 314 - mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); 315 - return counter; 316 387 } 317 388 EXPORT_SYMBOL(mlx5_fc_create); 318 389 ··· 314 405 315 406 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) 316 407 { 317 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 408 + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; 318 409 319 410 if (!counter) 320 411 return; 321 412 322 - if (counter->aging) { 323 - llist_add(&counter->dellist, &fc_stats->dellist); 324 - mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); 325 - return; 326 - } 327 - 413 + if (counter->aging) 414 + xa_erase(&fc_stats->counters, counter->id); 328 415 mlx5_fc_release(dev, counter); 329 416 } 330 417 EXPORT_SYMBOL(mlx5_fc_destroy); 331 418 332 419 int mlx5_init_fc_stats(struct mlx5_core_dev *dev) 333 420 { 334 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 335 - int init_bulk_len; 336 - int init_out_len; 421 + struct mlx5_fc_stats *fc_stats; 337 422 338 - spin_lock_init(&fc_stats->counters_idr_lock); 339 - idr_init(&fc_stats->counters_idr); 340 - INIT_LIST_HEAD(&fc_stats->counters); 341 - init_llist_head(&fc_stats->addlist); 342 - init_llist_head(&fc_stats->dellist); 343 - 344 - init_bulk_len = get_init_bulk_query_len(dev); 345 - init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len); 346 - fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL); 347 - if (!fc_stats->bulk_query_out) 423 + fc_stats = kzalloc(sizeof(*fc_stats), GFP_KERNEL); 424 + if (!fc_stats) 348 425 return -ENOMEM; 349 - fc_stats->bulk_query_len = init_bulk_len; 426 + dev->priv.fc_stats = fc_stats; 427 + 428 + xa_init(&fc_stats->counters); 429 + 430 + /* Allocate initial (small) bulk query buffer. */ 431 + mlx5_fc_stats_bulk_query_buf_realloc(dev, get_init_bulk_query_len(dev)); 432 + if (!fc_stats->bulk_query_out) 433 + goto err_bulk; 350 434 351 435 fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); 352 436 if (!fc_stats->wq) ··· 349 447 INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); 350 448 351 449 mlx5_fc_pool_init(&fc_stats->fc_pool, dev); 450 + queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD); 352 451 return 0; 353 452 354 453 err_wq_create: 355 - kfree(fc_stats->bulk_query_out); 454 + kvfree(fc_stats->bulk_query_out); 455 + err_bulk: 456 + kfree(fc_stats); 356 457 return -ENOMEM; 357 458 } 358 459 359 460 void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) 360 461 { 361 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 362 - struct llist_node *tmplist; 462 + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; 363 463 struct mlx5_fc *counter; 364 - struct mlx5_fc *tmp; 464 + unsigned long id; 365 465 366 - cancel_delayed_work_sync(&dev->priv.fc_stats.work); 367 - destroy_workqueue(dev->priv.fc_stats.wq); 368 - dev->priv.fc_stats.wq = NULL; 466 + cancel_delayed_work_sync(&fc_stats->work); 467 + destroy_workqueue(fc_stats->wq); 468 + fc_stats->wq = NULL; 369 469 370 - tmplist = llist_del_all(&fc_stats->addlist); 371 - llist_for_each_entry_safe(counter, tmp, tmplist, addlist) 470 + xa_for_each(&fc_stats->counters, id, counter) { 471 + xa_erase(&fc_stats->counters, id); 372 472 mlx5_fc_release(dev, counter); 373 - 374 - list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list) 375 - mlx5_fc_release(dev, counter); 473 + } 474 + xa_destroy(&fc_stats->counters); 376 475 377 476 mlx5_fc_pool_cleanup(&fc_stats->fc_pool); 378 - idr_destroy(&fc_stats->counters_idr); 379 - kfree(fc_stats->bulk_query_out); 477 + kvfree(fc_stats->bulk_query_out); 478 + kfree(fc_stats); 380 479 } 381 480 382 481 int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, ··· 421 518 struct delayed_work *dwork, 422 519 unsigned long delay) 423 520 { 424 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 521 + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; 425 522 426 523 queue_delayed_work(fc_stats->wq, dwork, delay); 427 524 } ··· 429 526 void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, 430 527 unsigned long interval) 431 528 { 432 - struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; 529 + struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; 433 530 434 531 fc_stats->sampling_interval = min_t(unsigned long, interval, 435 532 fc_stats->sampling_interval);

+1 -32

include/linux/mlx5/driver.h

··· 45 45 #include <linux/workqueue.h> 46 46 #include <linux/mempool.h> 47 47 #include <linux/interrupt.h> 48 - #include <linux/idr.h> 49 48 #include <linux/notifier.h> 50 49 #include <linux/refcount.h> 51 50 #include <linux/auxiliary_bus.h> ··· 473 474 u16 max_ec_vfs; 474 475 }; 475 476 476 - struct mlx5_fc_pool { 477 - struct mlx5_core_dev *dev; 478 - struct mutex pool_lock; /* protects pool lists */ 479 - struct list_head fully_used; 480 - struct list_head partially_used; 481 - struct list_head unused; 482 - int available_fcs; 483 - int used_fcs; 484 - int threshold; 485 - }; 486 - 487 - struct mlx5_fc_stats { 488 - spinlock_t counters_idr_lock; /* protects counters_idr */ 489 - struct idr counters_idr; 490 - struct list_head counters; 491 - struct llist_head addlist; 492 - struct llist_head dellist; 493 - 494 - struct workqueue_struct *wq; 495 - struct delayed_work work; 496 - unsigned long next_query; 497 - unsigned long sampling_interval; /* jiffies */ 498 - u32 *bulk_query_out; 499 - int bulk_query_len; 500 - size_t num_counters; 501 - bool bulk_query_alloc_failed; 502 - unsigned long next_bulk_query_alloc; 503 - struct mlx5_fc_pool fc_pool; 504 - }; 505 - 506 477 struct mlx5_events; 507 478 struct mlx5_mpfs; 508 479 struct mlx5_eswitch; ··· 599 630 struct mlx5_devcom_comp_dev *hca_devcom_comp; 600 631 struct mlx5_fw_reset *fw_reset; 601 632 struct mlx5_core_roce roce; 602 - struct mlx5_fc_stats fc_stats; 633 + struct mlx5_fc_stats *fc_stats; 603 634 struct mlx5_rl_table rl_table; 604 635 struct mlx5_ft_pool *ft_pool; 605 636

-3

include/linux/mlx5/fs.h

··· 298 298 299 299 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); 300 300 301 - /* As mlx5_fc_create() but doesn't queue stats refresh thread. */ 302 - struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging); 303 - 304 301 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); 305 302 u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); 306 303 void mlx5_fc_query_cached(struct mlx5_fc *counter,