Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/fq_impl: do not maintain a backlog-sorted list of flows

A sorted flow list is only needed to drop packets in the biggest flow when
hitting the overmemory condition.
By scanning flows only when needed, we can avoid paying the cost of
maintaining the list under normal conditions
In order to avoid scanning lots of empty flows and touching too many cold
cache lines, a bitmap of flows with backlog is maintained

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20201218184718.93650-3-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

authored by

Felix Fietkau and committed by
Johannes Berg
d7b64929 bf9009bf

+71 -54
+4 -6
include/net/fq.h
··· 19 19 * @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++ 20 20 * (deficit round robin) based round robin queuing similar to the one 21 21 * found in net/sched/sch_fq_codel.c 22 - * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of 23 - * fat flows and efficient head-dropping if packet limit is reached 24 22 * @queue: sk_buff queue to hold packets 25 23 * @backlog: number of bytes pending in the queue. The number of packets can be 26 24 * found in @queue.qlen ··· 27 29 struct fq_flow { 28 30 struct fq_tin *tin; 29 31 struct list_head flowchain; 30 - struct list_head backlogchain; 31 32 struct sk_buff_head queue; 32 33 u32 backlog; 33 34 int deficit; ··· 44 47 struct fq_tin { 45 48 struct list_head new_flows; 46 49 struct list_head old_flows; 50 + struct list_head tin_list; 47 51 struct fq_flow default_flow; 48 52 u32 backlog_bytes; 49 53 u32 backlog_packets; ··· 58 60 /** 59 61 * struct fq - main container for fair queuing purposes 60 62 * 61 - * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient 62 - * head-dropping when @backlog reaches @limit 63 63 * @limit: max number of packets that can be queued across all flows 64 64 * @backlog: number of packets queued across all flows 65 65 */ 66 66 struct fq { 67 67 struct fq_flow *flows; 68 - struct list_head backlogs; 68 + unsigned long *flows_bitmap; 69 + 70 + struct list_head tin_backlog; 69 71 spinlock_t lock; 70 72 u32 flows_cnt; 71 73 u32 limit;
+67 -46
include/net/fq_impl.h
··· 17 17 unsigned int bytes, unsigned int truesize) 18 18 { 19 19 struct fq_tin *tin = flow->tin; 20 + int idx; 20 21 21 22 tin->backlog_bytes -= bytes; 22 23 tin->backlog_packets -= packets; 23 24 flow->backlog -= bytes; 24 25 fq->backlog -= packets; 25 26 fq->memory_usage -= truesize; 27 + 28 + if (flow->backlog) 29 + return; 30 + 31 + if (flow == &tin->default_flow) { 32 + list_del_init(&tin->tin_list); 33 + return; 34 + } 35 + 36 + idx = flow - fq->flows; 37 + __clear_bit(idx, fq->flows_bitmap); 26 38 } 27 39 28 40 static void fq_adjust_removal(struct fq *fq, ··· 42 30 struct sk_buff *skb) 43 31 { 44 32 __fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize); 45 - } 46 - 47 - static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow) 48 - { 49 - struct fq_flow *i; 50 - 51 - if (flow->backlog == 0) { 52 - list_del_init(&flow->backlogchain); 53 - } else { 54 - i = flow; 55 - 56 - list_for_each_entry_continue(i, &fq->backlogs, backlogchain) 57 - if (i->backlog < flow->backlog) 58 - break; 59 - 60 - list_move_tail(&flow->backlogchain, 61 - &i->backlogchain); 62 - } 63 33 } 64 34 65 35 static struct sk_buff *fq_flow_dequeue(struct fq *fq, ··· 56 62 return NULL; 57 63 58 64 fq_adjust_removal(fq, flow, skb); 59 - fq_rejigger_backlog(fq, flow); 60 65 61 66 return skb; 62 67 } ··· 83 90 } while (packets < pending); 84 91 85 92 __fq_adjust_removal(fq, flow, packets, bytes, truesize); 86 - fq_rejigger_backlog(fq, flow); 87 93 88 94 return packets; 89 95 } ··· 162 170 return flow; 163 171 } 164 172 165 - static void fq_recalc_backlog(struct fq *fq, 166 - struct fq_tin *tin, 167 - struct fq_flow *flow) 173 + static struct fq_flow *fq_find_fattest_flow(struct fq *fq) 168 174 { 169 - struct fq_flow *i; 175 + struct fq_tin *tin; 176 + struct fq_flow *flow = NULL; 177 + u32 len = 0; 178 + int i; 170 179 171 - if (list_empty(&flow->backlogchain)) 172 - list_add_tail(&flow->backlogchain, &fq->backlogs); 180 + for_each_set_bit(i, fq->flows_bitmap, fq->flows_cnt) { 181 + struct fq_flow *cur = &fq->flows[i]; 182 + unsigned int cur_len; 173 183 174 - i = flow; 175 - list_for_each_entry_continue_reverse(i, &fq->backlogs, 176 - backlogchain) 177 - if (i->backlog > flow->backlog) 178 - break; 184 + cur_len = cur->backlog; 185 + if (cur_len <= len) 186 + continue; 179 187 180 - list_move(&flow->backlogchain, &i->backlogchain); 188 + flow = cur; 189 + len = cur_len; 190 + } 191 + 192 + list_for_each_entry(tin, &fq->tin_backlog, tin_list) { 193 + unsigned int cur_len = tin->default_flow.backlog; 194 + 195 + if (cur_len <= len) 196 + continue; 197 + 198 + flow = &tin->default_flow; 199 + len = cur_len; 200 + } 201 + 202 + return flow; 181 203 } 182 204 183 205 static void fq_tin_enqueue(struct fq *fq, ··· 206 200 207 201 flow = fq_flow_classify(fq, tin, idx, skb); 208 202 203 + if (!flow->backlog) { 204 + if (flow != &tin->default_flow) 205 + __set_bit(idx, fq->flows_bitmap); 206 + else if (list_empty(&tin->tin_list)) 207 + list_add(&tin->tin_list, &fq->tin_backlog); 208 + } 209 + 209 210 flow->tin = tin; 210 211 flow->backlog += skb->len; 211 212 tin->backlog_bytes += skb->len; 212 213 tin->backlog_packets++; 213 214 fq->memory_usage += skb->truesize; 214 215 fq->backlog++; 215 - 216 - fq_recalc_backlog(fq, tin, flow); 217 216 218 217 if (list_empty(&flow->flowchain)) { 219 218 flow->deficit = fq->quantum; ··· 229 218 __skb_queue_tail(&flow->queue, skb); 230 219 oom = (fq->memory_usage > fq->memory_limit); 231 220 while (fq->backlog > fq->limit || oom) { 232 - flow = list_first_entry_or_null(&fq->backlogs, 233 - struct fq_flow, 234 - backlogchain); 221 + flow = fq_find_fattest_flow(fq); 235 222 if (!flow) 236 223 return; 237 224 ··· 264 255 fq_adjust_removal(fq, flow, skb); 265 256 free_func(fq, tin, flow, skb); 266 257 } 267 - 268 - fq_rejigger_backlog(fq, flow); 269 258 } 270 259 271 260 static void fq_tin_filter(struct fq *fq, ··· 286 279 struct fq_flow *flow, 287 280 fq_skb_free_t free_func) 288 281 { 282 + struct fq_tin *tin = flow->tin; 289 283 struct sk_buff *skb; 290 284 291 285 while ((skb = fq_flow_dequeue(fq, flow))) 292 - free_func(fq, flow->tin, flow, skb); 286 + free_func(fq, tin, flow, skb); 293 287 294 - if (!list_empty(&flow->flowchain)) 288 + if (!list_empty(&flow->flowchain)) { 295 289 list_del_init(&flow->flowchain); 296 - 297 - if (!list_empty(&flow->backlogchain)) 298 - list_del_init(&flow->backlogchain); 290 + if (list_empty(&tin->new_flows) && 291 + list_empty(&tin->old_flows)) 292 + list_del_init(&tin->tin_list); 293 + } 299 294 300 295 flow->tin = NULL; 301 296 ··· 323 314 fq_flow_reset(fq, flow, free_func); 324 315 } 325 316 317 + WARN_ON_ONCE(!list_empty(&tin->tin_list)); 326 318 WARN_ON_ONCE(tin->backlog_bytes); 327 319 WARN_ON_ONCE(tin->backlog_packets); 328 320 } ··· 331 321 static void fq_flow_init(struct fq_flow *flow) 332 322 { 333 323 INIT_LIST_HEAD(&flow->flowchain); 334 - INIT_LIST_HEAD(&flow->backlogchain); 335 324 __skb_queue_head_init(&flow->queue); 336 325 } 337 326 ··· 338 329 { 339 330 INIT_LIST_HEAD(&tin->new_flows); 340 331 INIT_LIST_HEAD(&tin->old_flows); 332 + INIT_LIST_HEAD(&tin->tin_list); 341 333 fq_flow_init(&tin->default_flow); 342 334 } 343 335 ··· 347 337 int i; 348 338 349 339 memset(fq, 0, sizeof(fq[0])); 350 - INIT_LIST_HEAD(&fq->backlogs); 351 340 spin_lock_init(&fq->lock); 341 + INIT_LIST_HEAD(&fq->tin_backlog); 352 342 fq->flows_cnt = max_t(u32, flows_cnt, 1); 353 343 fq->quantum = 300; 354 344 fq->limit = 8192; ··· 357 347 fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); 358 348 if (!fq->flows) 359 349 return -ENOMEM; 350 + 351 + fq->flows_bitmap = kcalloc(BITS_TO_LONGS(fq->flows_cnt), sizeof(long), 352 + GFP_KERNEL); 353 + if (!fq->flows_bitmap) { 354 + kvfree(fq->flows); 355 + fq->flows = NULL; 356 + return -ENOMEM; 357 + } 360 358 361 359 for (i = 0; i < fq->flows_cnt; i++) 362 360 fq_flow_init(&fq->flows[i]); ··· 382 364 383 365 kvfree(fq->flows); 384 366 fq->flows = NULL; 367 + 368 + kfree(fq->flows_bitmap); 369 + fq->flows_bitmap = NULL; 385 370 } 386 371 387 372 #endif
-2
net/mac80211/tx.c
··· 3337 3337 if (head->len != orig_len) { 3338 3338 flow->backlog += head->len - orig_len; 3339 3339 tin->backlog_bytes += head->len - orig_len; 3340 - 3341 - fq_recalc_backlog(fq, tin, flow); 3342 3340 } 3343 3341 out: 3344 3342 spin_unlock_bh(&fq->lock);