jcs's openbsd hax
openbsd
at jcs 912 lines 24 kB view raw
1/* $OpenBSD: fq_codel.c,v 1.19 2025/11/13 20:07:22 bket Exp $ */ 2 3/* 4 * Copyright (c) 2017 Mike Belopuhov 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19/* 20 * K. Nichols, V. Jacobson, A. McGregor and J. Iyengar, Controlled Delay 21 * Active Queue Management, RFC 8289, January 2018 22 * 23 * Based on the algorithm by Kathleen Nichols and Van Jacobson with 24 * improvements from Dave Taht and Eric Dumazet. 25 */ 26 27/* 28 * T. Hoeiland-Joergensen, P. McKenney, D. Taht, J. Gettys and E. Dumazet, 29 * The Flow Queue CoDel Packet Scheduler and Active Queue Management 30 * Algorithm, RFC 8290, January 2018 31 * 32 * Based on the implementation by Rasool Al-Saadi, Centre for Advanced 33 * Internet Architectures, Swinburne University of Technology, Melbourne, 34 * Australia. 35 */ 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/mbuf.h> 40#include <sys/queue.h> 41 42#include <net/if.h> 43#include <net/if_var.h> 44#include <net/pfvar.h> 45#include <net/fq_codel.h> 46 47/* #define FQCODEL_DEBUG 1 */ 48 49#ifdef FQCODEL_DEBUG 50#define DPRINTF(x...) printf(x) 51#else 52#define DPRINTF(x...) 53#endif 54 55struct codel { 56 struct mbuf_list q; 57 58 unsigned int dropping:1; /* Dropping state */ 59 unsigned int backlog:31; /* Number of bytes in the queue */ 60 61 unsigned short drops; /* Free running counter of drops */ 62 unsigned short ldrops; /* Value from the previous run */ 63 64 int64_t start; /* The moment queue was above target */ 65 int64_t next; /* Next interval */ 66 int64_t delay; /* Delay incurred by the last packet */ 67}; 68 69struct codel_params { 70 int64_t target; 71 int64_t interval; 72 int64_t grace; 73 int quantum; 74 75 uint32_t *intervals; 76}; 77 78void codel_initparams(struct codel_params *, unsigned int, 79 unsigned int, int); 80void codel_freeparams(struct codel_params *); 81void codel_enqueue(struct codel *, int64_t, struct mbuf *); 82struct mbuf *codel_dequeue(struct codel *, struct codel_params *, int64_t, 83 struct mbuf_list *, uint64_t *, uint64_t *); 84struct mbuf *codel_commit(struct codel *, struct mbuf *); 85void codel_purge(struct codel *, struct mbuf_list *); 86 87struct flow { 88 struct codel cd; 89 int active:1; 90 int deficit:31; 91#ifdef FQCODEL_DEBUG 92 uint16_t id; 93#endif 94 SIMPLEQ_ENTRY(flow) flowentry; 95}; 96SIMPLEQ_HEAD(flowq, flow); 97 98struct fqcodel { 99 struct flowq newq; 100 struct flowq oldq; 101 102 struct flow *flows; 103 unsigned int qlength; 104 105 struct ifnet *ifp; 106 107 struct codel_params cparams; 108 109 unsigned int nflows; 110 unsigned int qlimit; 111 int quantum; 112 113 unsigned int flags; 114#define FQCF_FIXED_QUANTUM 0x1 115 116 /* stats */ 117 struct fqcodel_pktcntr xmit_cnt; 118 struct fqcodel_pktcntr drop_cnt; 119 120 struct mbuf_list pending_drops; 121}; 122 123unsigned int fqcodel_idx(unsigned int, const struct mbuf *); 124void *fqcodel_alloc(unsigned int, void *); 125void fqcodel_free(unsigned int, void *); 126struct mbuf *fqcodel_if_enq(struct ifqueue *, struct mbuf *); 127struct mbuf *fqcodel_if_deq_begin(struct ifqueue *, void **); 128void fqcodel_if_deq_commit(struct ifqueue *, struct mbuf *, void *); 129void fqcodel_if_purge(struct ifqueue *, struct mbuf_list *); 130 131struct mbuf *fqcodel_enq(struct fqcodel *, struct mbuf *); 132struct mbuf *fqcodel_deq_begin(struct fqcodel *, void **, 133 struct mbuf_list *); 134void fqcodel_deq_commit(struct fqcodel *, struct mbuf *, void *); 135void fqcodel_purge(struct fqcodel *, struct mbuf_list *); 136 137/* 138 * ifqueue glue. 139 */ 140 141static const struct ifq_ops fqcodel_ops = { 142 fqcodel_idx, 143 fqcodel_if_enq, 144 fqcodel_if_deq_begin, 145 fqcodel_if_deq_commit, 146 fqcodel_if_purge, 147 fqcodel_alloc, 148 fqcodel_free 149}; 150 151const struct ifq_ops *const ifq_fqcodel_ops = &fqcodel_ops; 152 153void *fqcodel_pf_alloc(struct ifnet *); 154int fqcodel_pf_addqueue(void *, struct pf_queuespec *); 155void fqcodel_pf_free(void *); 156int fqcodel_pf_qstats(struct pf_queuespec *, void *, int *); 157unsigned int fqcodel_pf_qlength(void *); 158struct mbuf *fqcodel_pf_enqueue(void *, struct mbuf *); 159struct mbuf *fqcodel_pf_deq_begin(void *, void **, struct mbuf_list *); 160void fqcodel_pf_deq_commit(void *, struct mbuf *, void *); 161void fqcodel_pf_purge(void *, struct mbuf_list *); 162 163/* 164 * pf queue glue. 165 */ 166 167static const struct pfq_ops fqcodel_pf_ops = { 168 fqcodel_pf_alloc, 169 fqcodel_pf_addqueue, 170 fqcodel_pf_free, 171 fqcodel_pf_qstats, 172 fqcodel_pf_qlength, 173 fqcodel_pf_enqueue, 174 fqcodel_pf_deq_begin, 175 fqcodel_pf_deq_commit, 176 fqcodel_pf_purge 177}; 178 179const struct pfq_ops *const pfq_fqcodel_ops = &fqcodel_pf_ops; 180 181/* Default aggregate queue depth */ 182static const unsigned int fqcodel_qlimit = 1024; 183 184/* 185 * CoDel implementation 186 */ 187 188/* Delay target, 5ms */ 189static const int64_t codel_target = 5000000; 190 191/* First 399 "100 / sqrt(x)" intervals, ns precision */ 192static const uint32_t codel_intervals[] = { 193 100000000, 70710678, 57735027, 50000000, 44721360, 40824829, 37796447, 194 35355339, 33333333, 31622777, 30151134, 28867513, 27735010, 26726124, 195 25819889, 25000000, 24253563, 23570226, 22941573, 22360680, 21821789, 196 21320072, 20851441, 20412415, 20000000, 19611614, 19245009, 18898224, 197 18569534, 18257419, 17960530, 17677670, 17407766, 17149859, 16903085, 198 16666667, 16439899, 16222142, 16012815, 15811388, 15617376, 15430335, 199 15249857, 15075567, 14907120, 14744196, 14586499, 14433757, 14285714, 200 14142136, 14002801, 13867505, 13736056, 13608276, 13483997, 13363062, 201 13245324, 13130643, 13018891, 12909944, 12803688, 12700013, 12598816, 202 12500000, 12403473, 12309149, 12216944, 12126781, 12038585, 11952286, 203 11867817, 11785113, 11704115, 11624764, 11547005, 11470787, 11396058, 204 11322770, 11250879, 11180340, 11111111, 11043153, 10976426, 10910895, 205 10846523, 10783277, 10721125, 10660036, 10599979, 10540926, 10482848, 206 10425721, 10369517, 10314212, 10259784, 10206207, 10153462, 10101525, 207 10050378, 10000000, 9950372, 9901475, 9853293, 9805807, 9759001, 208 9712859, 9667365, 9622504, 9578263, 9534626, 9491580, 9449112, 209 9407209, 9365858, 9325048, 9284767, 9245003, 9205746, 9166985, 210 9128709, 9090909, 9053575, 9016696, 8980265, 8944272, 8908708, 211 8873565, 8838835, 8804509, 8770580, 8737041, 8703883, 8671100, 212 8638684, 8606630, 8574929, 8543577, 8512565, 8481889, 8451543, 213 8421519, 8391814, 8362420, 8333333, 8304548, 8276059, 8247861, 214 8219949, 8192319, 8164966, 8137885, 8111071, 8084521, 8058230, 215 8032193, 8006408, 7980869, 7955573, 7930516, 7905694, 7881104, 216 7856742, 7832604, 7808688, 7784989, 7761505, 7738232, 7715167, 217 7692308, 7669650, 7647191, 7624929, 7602859, 7580980, 7559289, 218 7537784, 7516460, 7495317, 7474351, 7453560, 7432941, 7412493, 219 7392213, 7372098, 7352146, 7332356, 7312724, 7293250, 7273930, 220 7254763, 7235746, 7216878, 7198158, 7179582, 7161149, 7142857, 221 7124705, 7106691, 7088812, 7071068, 7053456, 7035975, 7018624, 222 7001400, 6984303, 6967330, 6950480, 6933752, 6917145, 6900656, 223 6884284, 6868028, 6851887, 6835859, 6819943, 6804138, 6788442, 224 6772855, 6757374, 6741999, 6726728, 6711561, 6696495, 6681531, 225 6666667, 6651901, 6637233, 6622662, 6608186, 6593805, 6579517, 226 6565322, 6551218, 6537205, 6523281, 6509446, 6495698, 6482037, 227 6468462, 6454972, 6441566, 6428243, 6415003, 6401844, 6388766, 228 6375767, 6362848, 6350006, 6337243, 6324555, 6311944, 6299408, 229 6286946, 6274558, 6262243, 6250000, 6237829, 6225728, 6213698, 230 6201737, 6189845, 6178021, 6166264, 6154575, 6142951, 6131393, 231 6119901, 6108472, 6097108, 6085806, 6074567, 6063391, 6052275, 232 6041221, 6030227, 6019293, 6008418, 5997601, 5986843, 5976143, 233 5965500, 5954913, 5944383, 5933908, 5923489, 5913124, 5902813, 234 5892557, 5882353, 5872202, 5862104, 5852057, 5842062, 5832118, 235 5822225, 5812382, 5802589, 5792844, 5783149, 5773503, 5763904, 236 5754353, 5744850, 5735393, 5725983, 5716620, 5707301, 5698029, 237 5688801, 5679618, 5670480, 5661385, 5652334, 5643326, 5634362, 238 5625440, 5616560, 5607722, 5598925, 5590170, 5581456, 5572782, 239 5564149, 5555556, 5547002, 5538488, 5530013, 5521576, 5513178, 240 5504819, 5496497, 5488213, 5479966, 5471757, 5463584, 5455447, 241 5447347, 5439283, 5431254, 5423261, 5415304, 5407381, 5399492, 242 5391639, 5383819, 5376033, 5368281, 5360563, 5352877, 5345225, 243 5337605, 5330018, 5322463, 5314940, 5307449, 5299989, 5292561, 244 5285164, 5277798, 5270463, 5263158, 5255883, 5248639, 5241424, 245 5234239, 5227084, 5219958, 5212860, 5205792, 5198752, 5191741, 246 5184758, 5177804, 5170877, 5163978, 5157106, 5150262, 5143445, 247 5136655, 5129892, 5123155, 5116445, 5109761, 5103104, 5096472, 248 5089866, 5083286, 5076731, 5070201, 5063697, 5057217, 5050763, 249 5044333, 5037927, 5031546, 5025189, 5018856, 5012547, 5006262 250}; 251 252void 253codel_initparams(struct codel_params *cp, unsigned int target, 254 unsigned int interval, int quantum) 255{ 256 uint64_t mult; 257 unsigned int i; 258 259 /* 260 * Update observation intervals table according to the configured 261 * initial interval value. 262 */ 263 if (interval > codel_intervals[0]) { 264 /* 265 * Select either specified target or 5% of an interval 266 * (RFC 8289, section 4.3). 267 */ 268 cp->target = MAX(target, interval / 20); 269 cp->interval = interval; 270 271 /* The coefficient is scaled up by a 1000 */ 272 mult = ((uint64_t)cp->interval * 1000) / codel_intervals[0]; 273 274 /* Prepare table of intervals */ 275 cp->intervals = mallocarray(nitems(codel_intervals), 276 sizeof(codel_intervals[0]), M_DEVBUF, M_WAITOK | M_ZERO); 277 for (i = 0; i < nitems(codel_intervals); i++) 278 cp->intervals[i] = ((uint64_t)codel_intervals[i] * 279 mult) / 1000; 280 } else { 281 cp->target = MAX(target, codel_target); 282 cp->interval = codel_intervals[0]; 283 cp->intervals = (uint32_t *)codel_intervals; 284 } 285 286 cp->quantum = quantum; 287 288 /* Grace period for delta reuse (RFC 8289, section 5.5) */ 289 cp->grace = 16 * cp->interval; 290} 291 292void 293codel_freeparams(struct codel_params *cp) 294{ 295 if (cp->intervals != codel_intervals) 296 free(cp->intervals, M_DEVBUF, nitems(codel_intervals) * 297 sizeof(codel_intervals[0])); 298 cp->intervals = NULL; 299} 300 301static inline unsigned int 302codel_backlog(struct codel *cd) 303{ 304 return (cd->backlog); 305} 306 307static inline unsigned int 308codel_qlength(struct codel *cd) 309{ 310 return (ml_len(&cd->q)); 311} 312 313static inline int64_t 314codel_delay(struct codel *cd) 315{ 316 return (cd->delay); 317} 318 319void 320codel_enqueue(struct codel *cd, int64_t now, struct mbuf *m) 321{ 322 m->m_pkthdr.ph_timestamp = now; 323 324 ml_enqueue(&cd->q, m); 325 cd->backlog += m->m_pkthdr.len; 326} 327 328/* 329 * Select the next interval according to the number of drops 330 * in the current one relative to the provided timestamp. 331 */ 332static inline void 333control_law(struct codel *cd, struct codel_params *cp, int64_t rts) 334{ 335 unsigned int idx; 336 337 idx = min(cd->drops, nitems(codel_intervals) - 1); 338 cd->next = rts + cp->intervals[idx]; 339} 340 341/* 342 * Pick the next enqueued packet and determine the queueing delay 343 * as well as whether or not it's a good candidate for dropping 344 * from the queue. 345 * 346 * The decision whether to drop the packet or not is made based 347 * on the queueing delay target of 5ms and on the current queue 348 * length in bytes which shouldn't be less than the amount of data 349 * that arrives in a typical interarrival time (MTU-sized packets 350 * arriving spaced by the amount of time it takes to send such a 351 * packet on the bottleneck). 352 */ 353static inline struct mbuf * 354codel_next_packet(struct codel *cd, struct codel_params *cp, int64_t now, 355 int *drop) 356{ 357 struct mbuf *m; 358 359 *drop = 0; 360 361 m = MBUF_LIST_FIRST(&cd->q); 362 if (m == NULL) { 363 KASSERT(cd->backlog == 0); 364 /* Empty queue, reset interval */ 365 cd->start = 0; 366 return (NULL); 367 } 368 369 if (now - m->m_pkthdr.ph_timestamp < cp->target || 370 cd->backlog <= cp->quantum) { 371 /* 372 * The minimum delay decreased below the target, reset 373 * the current observation interval. 374 */ 375 cd->start = 0; 376 return (m); 377 } 378 379 if (cd->start == 0) { 380 /* 381 * This is the first packet to be delayed for more than 382 * the target, start the first observation interval after 383 * a single RTT and see if the minimum delay goes below 384 * the target within the interval, otherwise punish the 385 * next packet. 386 */ 387 cd->start = now + cp->interval; 388 } else if (now > cd->start) { 389 *drop = 1; 390 } 391 return (m); 392} 393 394enum { INITIAL, ACCEPTING, FIRSTDROP, DROPPING, CONTROL, RECOVERY }; 395 396static inline int 397codel_state_change(struct codel *cd, int64_t now, struct mbuf *m, int drop, 398 int state) 399{ 400 if (state == FIRSTDROP) 401 return (ACCEPTING); 402 403 if (cd->dropping) { 404 if (!drop) 405 return (RECOVERY); 406 else if (now >= cd->next) 407 return (state == DROPPING ? CONTROL : DROPPING); 408 } else if (drop) 409 return (FIRSTDROP); 410 411 if (m == NULL) 412 return (RECOVERY); 413 414 return (ACCEPTING); 415} 416 417struct mbuf * 418codel_dequeue(struct codel *cd, struct codel_params *cp, int64_t now, 419 struct mbuf_list *free_ml, uint64_t *dpkts, uint64_t *dbytes) 420{ 421 struct mbuf *m; 422 unsigned short delta; 423 int drop, state, done = 0; 424 425 state = INITIAL; 426 427 while (!done) { 428 m = codel_next_packet(cd, cp, now, &drop); 429 state = codel_state_change(cd, now, m, drop, state); 430 431 switch (state) { 432 case FIRSTDROP: 433 m = codel_commit(cd, m); 434 ml_enqueue(free_ml, m); 435 436 *dpkts += 1; 437 *dbytes += m->m_pkthdr.len; 438 439 cd->dropping = 1; 440 441 /* 442 * If we're still within the grace period and not 443 * meeting our minimal delay target we treat this 444 * as a continuation of the previous observation 445 * interval and shrink it further. Otherwise we 446 * start from the initial one. 447 */ 448 delta = cd->drops - cd->ldrops; 449 if (delta > 1 && (now < cd->next || 450 now - cd->next < cp->grace)) 451 cd->drops = delta; 452 else 453 cd->drops = 1; 454 control_law(cd, cp, now); 455 cd->ldrops = cd->drops; 456 457 /* fetches the next packet and goes to ACCEPTING */ 458 break; 459 case DROPPING: 460 m = codel_commit(cd, m); 461 ml_enqueue(free_ml, m); 462 cd->drops++; 463 464 *dpkts += 1; 465 *dbytes += m->m_pkthdr.len; 466 467 /* fetches the next packet and goes to CONTROL */ 468 break; 469 case CONTROL: 470 if (drop) { 471 control_law(cd, cp, cd->next); 472 continue; 473 } 474 /* FALLTHROUGH */ 475 case RECOVERY: 476 cd->dropping = 0; 477 /* FALLTHROUGH */ 478 case ACCEPTING: 479 done = 1; 480 break; 481 } 482 } 483 484 if (m != NULL) 485 cd->delay = now - m->m_pkthdr.ph_timestamp; 486 487 return (m); 488} 489 490struct mbuf * 491codel_commit(struct codel *cd, struct mbuf *m) 492{ 493 struct mbuf *n; 494 495 n = ml_dequeue(&cd->q); 496 if (m) 497 KASSERT(n == m); 498 KASSERT(n != NULL); 499 KASSERT(cd->backlog >= n->m_pkthdr.len); 500 cd->backlog -= n->m_pkthdr.len; 501 return (n); 502} 503 504void 505codel_purge(struct codel *cd, struct mbuf_list *ml) 506{ 507 ml_enlist(ml, &cd->q); 508 cd->backlog = 0; 509} 510 511/* 512 * FQ-CoDel implementation 513 */ 514 515static inline struct flow * 516classify_flow(struct fqcodel *fqc, struct mbuf *m) 517{ 518 unsigned int index = 0; 519 520 if (m->m_pkthdr.csum_flags & M_FLOWID) 521 index = m->m_pkthdr.ph_flowid % fqc->nflows; 522 523 DPRINTF("%s: %u\n", __func__, index); 524 525 return (&fqc->flows[index]); 526} 527 528struct mbuf * 529fqcodel_enq(struct fqcodel *fqc, struct mbuf *m) 530{ 531 struct flow *flow; 532 unsigned int backlog = 0; 533 int64_t now; 534 int i, ndrop; 535 536 flow = classify_flow(fqc, m); 537 if (flow == NULL) 538 return (m); 539 540 now = nsecuptime(); 541 codel_enqueue(&flow->cd, now, m); 542 fqc->qlength++; 543 544 if (!flow->active) { 545 SIMPLEQ_INSERT_TAIL(&fqc->newq, flow, flowentry); 546 flow->deficit = fqc->quantum; 547 flow->active = 1; 548 DPRINTF("%s: flow %u active deficit %d\n", __func__, 549 flow->id, flow->deficit); 550 } 551 552 /* 553 * Flush pending_drops first to let PF account them individually. 554 * When batch dropping (below), we queue multiple packets but can 555 * only return one per enqueue call to maintain the interface 556 * contract. 557 */ 558 if (!ml_empty(&fqc->pending_drops)) 559 return (ml_dequeue(&fqc->pending_drops)); 560 561 /* 562 * If total queue length exceeds the limit, find the flow with the 563 * largest backlog and drop up to half of its packets, with a 564 * maximum of 64, from the head. Implements RFC 8290, section 4.1 565 * batch drop to handle overload efficiently. Dropped packets are 566 * queued in pending_drops. 567 */ 568 if (fqc->qlength > fqc->qlimit) { 569 for (i = 0; i < fqc->nflows; i++) { 570 if (codel_backlog(&fqc->flows[i].cd) > backlog) { 571 flow = &fqc->flows[i]; 572 backlog = codel_backlog(&flow->cd); 573 } 574 } 575 576 KASSERT(flow != NULL); 577 578 ndrop = MIN(MAX(codel_qlength(&flow->cd) / 2, 1), 64); 579 580 for (i = 0; i < ndrop; i++) { 581 m = codel_commit(&flow->cd, NULL); 582 if (m == NULL) 583 break; 584 fqc->drop_cnt.packets++; 585 fqc->drop_cnt.bytes += m->m_pkthdr.len; 586 fqc->qlength--; 587 ml_enqueue(&fqc->pending_drops, m); 588 } 589 590 DPRINTF("%s: batch-dropped %d/%d pkts from flow %u\n", __func__, 591 i, ndrop, flow->id); 592 593 return (ml_dequeue(&fqc->pending_drops)); 594 } 595 596 return (NULL); 597} 598 599static inline struct flowq * 600select_queue(struct fqcodel *fqc) 601{ 602 struct flowq *fq = NULL; 603 604 if (!SIMPLEQ_EMPTY(&fqc->newq)) 605 fq = &fqc->newq; 606 else if (!SIMPLEQ_EMPTY(&fqc->oldq)) 607 fq = &fqc->oldq; 608 return (fq); 609} 610 611static inline struct flow * 612first_flow(struct fqcodel *fqc, struct flowq **fq) 613{ 614 struct flow *flow; 615 616 while ((*fq = select_queue(fqc)) != NULL) { 617 while ((flow = SIMPLEQ_FIRST(*fq)) != NULL) { 618 if (flow->deficit <= 0) { 619 flow->deficit += fqc->quantum; 620 SIMPLEQ_REMOVE_HEAD(*fq, flowentry); 621 SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow, 622 flowentry); 623 DPRINTF("%s: flow %u deficit %d\n", __func__, 624 flow->id, flow->deficit); 625 } else 626 return (flow); 627 } 628 } 629 630 return (NULL); 631} 632 633static inline struct flow * 634next_flow(struct fqcodel *fqc, struct flow *flow, struct flowq **fq) 635{ 636 SIMPLEQ_REMOVE_HEAD(*fq, flowentry); 637 638 if (*fq == &fqc->newq && !SIMPLEQ_EMPTY(&fqc->oldq)) { 639 /* A packet was dropped, starve the queue */ 640 SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow, flowentry); 641 DPRINTF("%s: flow %u ->oldq deficit %d\n", __func__, 642 flow->id, flow->deficit); 643 } else { 644 /* A packet was dropped on a starved queue, disable it */ 645 flow->active = 0; 646 DPRINTF("%s: flow %u inactive deficit %d\n", __func__, 647 flow->id, flow->deficit); 648 } 649 650 return (first_flow(fqc, fq)); 651} 652 653struct mbuf * 654fqcodel_deq_begin(struct fqcodel *fqc, void **cookiep, 655 struct mbuf_list *free_ml) 656{ 657 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 658 struct flowq *fq; 659 struct flow *flow; 660 struct mbuf *m; 661 int64_t now; 662 663 if ((fqc->flags & FQCF_FIXED_QUANTUM) == 0) 664 fqc->quantum = fqc->ifp->if_mtu + max_linkhdr; 665 666 now = nsecuptime(); 667 668 for (flow = first_flow(fqc, &fq); flow != NULL; 669 flow = next_flow(fqc, flow, &fq)) { 670 m = codel_dequeue(&flow->cd, &fqc->cparams, now, &ml, 671 &fqc->drop_cnt.packets, &fqc->drop_cnt.bytes); 672 673 KASSERT(fqc->qlength >= ml_len(&ml)); 674 fqc->qlength -= ml_len(&ml); 675 676 ml_enlist(free_ml, &ml); 677 678 if (m != NULL) { 679 flow->deficit -= m->m_pkthdr.len; 680 DPRINTF("%s: flow %u deficit %d\n", __func__, 681 flow->id, flow->deficit); 682 *cookiep = flow; 683 return (m); 684 } 685 } 686 687 return (NULL); 688} 689 690void 691fqcodel_deq_commit(struct fqcodel *fqc, struct mbuf *m, void *cookie) 692{ 693 struct flow *flow = cookie; 694 695 KASSERT(fqc->qlength > 0); 696 fqc->qlength--; 697 698 fqc->xmit_cnt.packets++; 699 fqc->xmit_cnt.bytes += m->m_pkthdr.len; 700 701 (void)codel_commit(&flow->cd, m); 702} 703 704void 705fqcodel_purge(struct fqcodel *fqc, struct mbuf_list *ml) 706{ 707 unsigned int i; 708 709 for (i = 0; i < fqc->nflows; i++) 710 codel_purge(&fqc->flows[i].cd, ml); 711 ml_enlist(ml, &fqc->pending_drops); 712 fqc->qlength = 0; 713} 714 715struct mbuf * 716fqcodel_if_enq(struct ifqueue *ifq, struct mbuf *m) 717{ 718 return fqcodel_enq(ifq->ifq_q, m); 719} 720 721struct mbuf * 722fqcodel_if_deq_begin(struct ifqueue *ifq, void **cookiep) 723{ 724 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); 725 struct mbuf *m; 726 727 m = fqcodel_deq_begin(ifq->ifq_q, cookiep, &free_ml); 728 ifq_mfreeml(ifq, &free_ml); 729 return (m); 730} 731 732void 733fqcodel_if_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) 734{ 735 return fqcodel_deq_commit(ifq->ifq_q, m, cookie); 736} 737 738void 739fqcodel_if_purge(struct ifqueue *ifq, struct mbuf_list *ml) 740{ 741 return fqcodel_purge(ifq->ifq_q, ml); 742} 743 744void * 745fqcodel_pf_alloc(struct ifnet *ifp) 746{ 747 struct fqcodel *fqc; 748 749 fqc = malloc(sizeof(struct fqcodel), M_DEVBUF, M_WAITOK | M_ZERO); 750 751 SIMPLEQ_INIT(&fqc->newq); 752 SIMPLEQ_INIT(&fqc->oldq); 753 ml_init(&fqc->pending_drops); 754 755 return (fqc); 756} 757 758int 759fqcodel_pf_addqueue(void *arg, struct pf_queuespec *qs) 760{ 761 struct ifnet *ifp = qs->kif->pfik_ifp; 762 struct fqcodel *fqc = arg; 763 764 if (qs->flowqueue.flows == 0 || qs->flowqueue.flows > 0xffff) 765 return (EINVAL); 766 767 fqc->nflows = qs->flowqueue.flows; 768 fqc->quantum = qs->flowqueue.quantum; 769 if (qs->qlimit > 0) 770 fqc->qlimit = qs->qlimit; 771 else 772 fqc->qlimit = fqcodel_qlimit; 773 if (fqc->quantum > 0) 774 fqc->flags |= FQCF_FIXED_QUANTUM; 775 else 776 fqc->quantum = ifp->if_mtu + max_linkhdr; 777 778 codel_initparams(&fqc->cparams, qs->flowqueue.target, 779 qs->flowqueue.interval, fqc->quantum); 780 781 fqc->flows = mallocarray(fqc->nflows, sizeof(struct flow), 782 M_DEVBUF, M_WAITOK | M_ZERO); 783 784#ifdef FQCODEL_DEBUG 785 { 786 unsigned int i; 787 788 for (i = 0; i < fqc->nflows; i++) 789 fqc->flows[i].id = i; 790 } 791#endif 792 793 fqc->ifp = ifp; 794 795 DPRINTF("fq-codel on %s: %d queues %d deep, quantum %d target %llums " 796 "interval %llums\n", ifp->if_xname, fqc->nflows, fqc->qlimit, 797 fqc->quantum, fqc->cparams.target / 1000000, 798 fqc->cparams.interval / 1000000); 799 800 return (0); 801} 802 803void 804fqcodel_pf_free(void *arg) 805{ 806 struct fqcodel *fqc = arg; 807 808 ml_purge(&fqc->pending_drops); 809 codel_freeparams(&fqc->cparams); 810 free(fqc->flows, M_DEVBUF, fqc->nflows * sizeof(struct flow)); 811 free(fqc, M_DEVBUF, sizeof(struct fqcodel)); 812} 813 814int 815fqcodel_pf_qstats(struct pf_queuespec *qs, void *ubuf, int *nbytes) 816{ 817 struct ifnet *ifp = qs->kif->pfik_ifp; 818 struct fqcodel_stats stats; 819 struct fqcodel *fqc; 820 int64_t delay; 821 unsigned int i; 822 int error = 0; 823 824 if (ifp == NULL) 825 return (EBADF); 826 827 if (*nbytes < sizeof(stats)) 828 return (EINVAL); 829 830 memset(&stats, 0, sizeof(stats)); 831 832 /* XXX: multi-q? */ 833 fqc = ifq_q_enter(&ifp->if_snd, ifq_fqcodel_ops); 834 if (fqc == NULL) 835 return (EBADF); 836 837 stats.xmit_cnt = fqc->xmit_cnt; 838 stats.drop_cnt = fqc->drop_cnt; 839 840 stats.qlength = ifq_len(&ifp->if_snd); 841 stats.qlimit = fqc->qlimit; 842 843 stats.flows = 0; 844 stats.delaysum = stats.delaysumsq = 0; 845 846 for (i = 0; i < fqc->nflows; i++) { 847 if (codel_qlength(&fqc->flows[i].cd) == 0) 848 continue; 849 /* Scale down to microseconds to avoid overflows */ 850 delay = codel_delay(&fqc->flows[i].cd) / 1000; 851 stats.delaysum += delay; 852 stats.delaysumsq += delay * delay; 853 stats.flows++; 854 } 855 856 ifq_q_leave(&ifp->if_snd, fqc); 857 858 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) 859 return (error); 860 861 *nbytes = sizeof(stats); 862 return (0); 863} 864 865unsigned int 866fqcodel_pf_qlength(void *fqc) 867{ 868 return ((struct fqcodel *)fqc)->qlength; 869} 870 871struct mbuf * 872fqcodel_pf_enqueue(void *fqc, struct mbuf *m) 873{ 874 return fqcodel_enq(fqc, m); 875} 876 877struct mbuf * 878fqcodel_pf_deq_begin(void *fqc, void **cookiep, struct mbuf_list *free_ml) 879{ 880 return fqcodel_deq_begin(fqc, cookiep, free_ml); 881} 882 883void 884fqcodel_pf_deq_commit(void *fqc, struct mbuf *m, void *cookie) 885{ 886 return fqcodel_deq_commit(fqc, m, cookie); 887} 888 889void 890fqcodel_pf_purge(void *fqc, struct mbuf_list *ml) 891{ 892 return fqcodel_purge(fqc, ml); 893} 894 895unsigned int 896fqcodel_idx(unsigned int nqueues, const struct mbuf *m) 897{ 898 return (0); 899} 900 901void * 902fqcodel_alloc(unsigned int idx, void *arg) 903{ 904 /* Allocation is done in fqcodel_pf_alloc */ 905 return (arg); 906} 907 908void 909fqcodel_free(unsigned int idx, void *arg) 910{ 911 /* nothing to do here */ 912}