jcs's openbsd hax
openbsd
1/* $OpenBSD: fq_codel.c,v 1.19 2025/11/13 20:07:22 bket Exp $ */
2
3/*
4 * Copyright (c) 2017 Mike Belopuhov
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19/*
20 * K. Nichols, V. Jacobson, A. McGregor and J. Iyengar, Controlled Delay
21 * Active Queue Management, RFC 8289, January 2018
22 *
23 * Based on the algorithm by Kathleen Nichols and Van Jacobson with
24 * improvements from Dave Taht and Eric Dumazet.
25 */
26
27/*
28 * T. Hoeiland-Joergensen, P. McKenney, D. Taht, J. Gettys and E. Dumazet,
29 * The Flow Queue CoDel Packet Scheduler and Active Queue Management
30 * Algorithm, RFC 8290, January 2018
31 *
32 * Based on the implementation by Rasool Al-Saadi, Centre for Advanced
33 * Internet Architectures, Swinburne University of Technology, Melbourne,
34 * Australia.
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/mbuf.h>
40#include <sys/queue.h>
41
42#include <net/if.h>
43#include <net/if_var.h>
44#include <net/pfvar.h>
45#include <net/fq_codel.h>
46
47/* #define FQCODEL_DEBUG 1 */
48
49#ifdef FQCODEL_DEBUG
50#define DPRINTF(x...) printf(x)
51#else
52#define DPRINTF(x...)
53#endif
54
55struct codel {
56 struct mbuf_list q;
57
58 unsigned int dropping:1; /* Dropping state */
59 unsigned int backlog:31; /* Number of bytes in the queue */
60
61 unsigned short drops; /* Free running counter of drops */
62 unsigned short ldrops; /* Value from the previous run */
63
64 int64_t start; /* The moment queue was above target */
65 int64_t next; /* Next interval */
66 int64_t delay; /* Delay incurred by the last packet */
67};
68
69struct codel_params {
70 int64_t target;
71 int64_t interval;
72 int64_t grace;
73 int quantum;
74
75 uint32_t *intervals;
76};
77
78void codel_initparams(struct codel_params *, unsigned int,
79 unsigned int, int);
80void codel_freeparams(struct codel_params *);
81void codel_enqueue(struct codel *, int64_t, struct mbuf *);
82struct mbuf *codel_dequeue(struct codel *, struct codel_params *, int64_t,
83 struct mbuf_list *, uint64_t *, uint64_t *);
84struct mbuf *codel_commit(struct codel *, struct mbuf *);
85void codel_purge(struct codel *, struct mbuf_list *);
86
87struct flow {
88 struct codel cd;
89 int active:1;
90 int deficit:31;
91#ifdef FQCODEL_DEBUG
92 uint16_t id;
93#endif
94 SIMPLEQ_ENTRY(flow) flowentry;
95};
96SIMPLEQ_HEAD(flowq, flow);
97
98struct fqcodel {
99 struct flowq newq;
100 struct flowq oldq;
101
102 struct flow *flows;
103 unsigned int qlength;
104
105 struct ifnet *ifp;
106
107 struct codel_params cparams;
108
109 unsigned int nflows;
110 unsigned int qlimit;
111 int quantum;
112
113 unsigned int flags;
114#define FQCF_FIXED_QUANTUM 0x1
115
116 /* stats */
117 struct fqcodel_pktcntr xmit_cnt;
118 struct fqcodel_pktcntr drop_cnt;
119
120 struct mbuf_list pending_drops;
121};
122
123unsigned int fqcodel_idx(unsigned int, const struct mbuf *);
124void *fqcodel_alloc(unsigned int, void *);
125void fqcodel_free(unsigned int, void *);
126struct mbuf *fqcodel_if_enq(struct ifqueue *, struct mbuf *);
127struct mbuf *fqcodel_if_deq_begin(struct ifqueue *, void **);
128void fqcodel_if_deq_commit(struct ifqueue *, struct mbuf *, void *);
129void fqcodel_if_purge(struct ifqueue *, struct mbuf_list *);
130
131struct mbuf *fqcodel_enq(struct fqcodel *, struct mbuf *);
132struct mbuf *fqcodel_deq_begin(struct fqcodel *, void **,
133 struct mbuf_list *);
134void fqcodel_deq_commit(struct fqcodel *, struct mbuf *, void *);
135void fqcodel_purge(struct fqcodel *, struct mbuf_list *);
136
137/*
138 * ifqueue glue.
139 */
140
141static const struct ifq_ops fqcodel_ops = {
142 fqcodel_idx,
143 fqcodel_if_enq,
144 fqcodel_if_deq_begin,
145 fqcodel_if_deq_commit,
146 fqcodel_if_purge,
147 fqcodel_alloc,
148 fqcodel_free
149};
150
151const struct ifq_ops *const ifq_fqcodel_ops = &fqcodel_ops;
152
153void *fqcodel_pf_alloc(struct ifnet *);
154int fqcodel_pf_addqueue(void *, struct pf_queuespec *);
155void fqcodel_pf_free(void *);
156int fqcodel_pf_qstats(struct pf_queuespec *, void *, int *);
157unsigned int fqcodel_pf_qlength(void *);
158struct mbuf *fqcodel_pf_enqueue(void *, struct mbuf *);
159struct mbuf *fqcodel_pf_deq_begin(void *, void **, struct mbuf_list *);
160void fqcodel_pf_deq_commit(void *, struct mbuf *, void *);
161void fqcodel_pf_purge(void *, struct mbuf_list *);
162
163/*
164 * pf queue glue.
165 */
166
167static const struct pfq_ops fqcodel_pf_ops = {
168 fqcodel_pf_alloc,
169 fqcodel_pf_addqueue,
170 fqcodel_pf_free,
171 fqcodel_pf_qstats,
172 fqcodel_pf_qlength,
173 fqcodel_pf_enqueue,
174 fqcodel_pf_deq_begin,
175 fqcodel_pf_deq_commit,
176 fqcodel_pf_purge
177};
178
179const struct pfq_ops *const pfq_fqcodel_ops = &fqcodel_pf_ops;
180
181/* Default aggregate queue depth */
182static const unsigned int fqcodel_qlimit = 1024;
183
184/*
185 * CoDel implementation
186 */
187
188/* Delay target, 5ms */
189static const int64_t codel_target = 5000000;
190
191/* First 399 "100 / sqrt(x)" intervals, ns precision */
192static const uint32_t codel_intervals[] = {
193 100000000, 70710678, 57735027, 50000000, 44721360, 40824829, 37796447,
194 35355339, 33333333, 31622777, 30151134, 28867513, 27735010, 26726124,
195 25819889, 25000000, 24253563, 23570226, 22941573, 22360680, 21821789,
196 21320072, 20851441, 20412415, 20000000, 19611614, 19245009, 18898224,
197 18569534, 18257419, 17960530, 17677670, 17407766, 17149859, 16903085,
198 16666667, 16439899, 16222142, 16012815, 15811388, 15617376, 15430335,
199 15249857, 15075567, 14907120, 14744196, 14586499, 14433757, 14285714,
200 14142136, 14002801, 13867505, 13736056, 13608276, 13483997, 13363062,
201 13245324, 13130643, 13018891, 12909944, 12803688, 12700013, 12598816,
202 12500000, 12403473, 12309149, 12216944, 12126781, 12038585, 11952286,
203 11867817, 11785113, 11704115, 11624764, 11547005, 11470787, 11396058,
204 11322770, 11250879, 11180340, 11111111, 11043153, 10976426, 10910895,
205 10846523, 10783277, 10721125, 10660036, 10599979, 10540926, 10482848,
206 10425721, 10369517, 10314212, 10259784, 10206207, 10153462, 10101525,
207 10050378, 10000000, 9950372, 9901475, 9853293, 9805807, 9759001,
208 9712859, 9667365, 9622504, 9578263, 9534626, 9491580, 9449112,
209 9407209, 9365858, 9325048, 9284767, 9245003, 9205746, 9166985,
210 9128709, 9090909, 9053575, 9016696, 8980265, 8944272, 8908708,
211 8873565, 8838835, 8804509, 8770580, 8737041, 8703883, 8671100,
212 8638684, 8606630, 8574929, 8543577, 8512565, 8481889, 8451543,
213 8421519, 8391814, 8362420, 8333333, 8304548, 8276059, 8247861,
214 8219949, 8192319, 8164966, 8137885, 8111071, 8084521, 8058230,
215 8032193, 8006408, 7980869, 7955573, 7930516, 7905694, 7881104,
216 7856742, 7832604, 7808688, 7784989, 7761505, 7738232, 7715167,
217 7692308, 7669650, 7647191, 7624929, 7602859, 7580980, 7559289,
218 7537784, 7516460, 7495317, 7474351, 7453560, 7432941, 7412493,
219 7392213, 7372098, 7352146, 7332356, 7312724, 7293250, 7273930,
220 7254763, 7235746, 7216878, 7198158, 7179582, 7161149, 7142857,
221 7124705, 7106691, 7088812, 7071068, 7053456, 7035975, 7018624,
222 7001400, 6984303, 6967330, 6950480, 6933752, 6917145, 6900656,
223 6884284, 6868028, 6851887, 6835859, 6819943, 6804138, 6788442,
224 6772855, 6757374, 6741999, 6726728, 6711561, 6696495, 6681531,
225 6666667, 6651901, 6637233, 6622662, 6608186, 6593805, 6579517,
226 6565322, 6551218, 6537205, 6523281, 6509446, 6495698, 6482037,
227 6468462, 6454972, 6441566, 6428243, 6415003, 6401844, 6388766,
228 6375767, 6362848, 6350006, 6337243, 6324555, 6311944, 6299408,
229 6286946, 6274558, 6262243, 6250000, 6237829, 6225728, 6213698,
230 6201737, 6189845, 6178021, 6166264, 6154575, 6142951, 6131393,
231 6119901, 6108472, 6097108, 6085806, 6074567, 6063391, 6052275,
232 6041221, 6030227, 6019293, 6008418, 5997601, 5986843, 5976143,
233 5965500, 5954913, 5944383, 5933908, 5923489, 5913124, 5902813,
234 5892557, 5882353, 5872202, 5862104, 5852057, 5842062, 5832118,
235 5822225, 5812382, 5802589, 5792844, 5783149, 5773503, 5763904,
236 5754353, 5744850, 5735393, 5725983, 5716620, 5707301, 5698029,
237 5688801, 5679618, 5670480, 5661385, 5652334, 5643326, 5634362,
238 5625440, 5616560, 5607722, 5598925, 5590170, 5581456, 5572782,
239 5564149, 5555556, 5547002, 5538488, 5530013, 5521576, 5513178,
240 5504819, 5496497, 5488213, 5479966, 5471757, 5463584, 5455447,
241 5447347, 5439283, 5431254, 5423261, 5415304, 5407381, 5399492,
242 5391639, 5383819, 5376033, 5368281, 5360563, 5352877, 5345225,
243 5337605, 5330018, 5322463, 5314940, 5307449, 5299989, 5292561,
244 5285164, 5277798, 5270463, 5263158, 5255883, 5248639, 5241424,
245 5234239, 5227084, 5219958, 5212860, 5205792, 5198752, 5191741,
246 5184758, 5177804, 5170877, 5163978, 5157106, 5150262, 5143445,
247 5136655, 5129892, 5123155, 5116445, 5109761, 5103104, 5096472,
248 5089866, 5083286, 5076731, 5070201, 5063697, 5057217, 5050763,
249 5044333, 5037927, 5031546, 5025189, 5018856, 5012547, 5006262
250};
251
252void
253codel_initparams(struct codel_params *cp, unsigned int target,
254 unsigned int interval, int quantum)
255{
256 uint64_t mult;
257 unsigned int i;
258
259 /*
260 * Update observation intervals table according to the configured
261 * initial interval value.
262 */
263 if (interval > codel_intervals[0]) {
264 /*
265 * Select either specified target or 5% of an interval
266 * (RFC 8289, section 4.3).
267 */
268 cp->target = MAX(target, interval / 20);
269 cp->interval = interval;
270
271 /* The coefficient is scaled up by a 1000 */
272 mult = ((uint64_t)cp->interval * 1000) / codel_intervals[0];
273
274 /* Prepare table of intervals */
275 cp->intervals = mallocarray(nitems(codel_intervals),
276 sizeof(codel_intervals[0]), M_DEVBUF, M_WAITOK | M_ZERO);
277 for (i = 0; i < nitems(codel_intervals); i++)
278 cp->intervals[i] = ((uint64_t)codel_intervals[i] *
279 mult) / 1000;
280 } else {
281 cp->target = MAX(target, codel_target);
282 cp->interval = codel_intervals[0];
283 cp->intervals = (uint32_t *)codel_intervals;
284 }
285
286 cp->quantum = quantum;
287
288 /* Grace period for delta reuse (RFC 8289, section 5.5) */
289 cp->grace = 16 * cp->interval;
290}
291
292void
293codel_freeparams(struct codel_params *cp)
294{
295 if (cp->intervals != codel_intervals)
296 free(cp->intervals, M_DEVBUF, nitems(codel_intervals) *
297 sizeof(codel_intervals[0]));
298 cp->intervals = NULL;
299}
300
301static inline unsigned int
302codel_backlog(struct codel *cd)
303{
304 return (cd->backlog);
305}
306
307static inline unsigned int
308codel_qlength(struct codel *cd)
309{
310 return (ml_len(&cd->q));
311}
312
313static inline int64_t
314codel_delay(struct codel *cd)
315{
316 return (cd->delay);
317}
318
319void
320codel_enqueue(struct codel *cd, int64_t now, struct mbuf *m)
321{
322 m->m_pkthdr.ph_timestamp = now;
323
324 ml_enqueue(&cd->q, m);
325 cd->backlog += m->m_pkthdr.len;
326}
327
328/*
329 * Select the next interval according to the number of drops
330 * in the current one relative to the provided timestamp.
331 */
332static inline void
333control_law(struct codel *cd, struct codel_params *cp, int64_t rts)
334{
335 unsigned int idx;
336
337 idx = min(cd->drops, nitems(codel_intervals) - 1);
338 cd->next = rts + cp->intervals[idx];
339}
340
341/*
342 * Pick the next enqueued packet and determine the queueing delay
343 * as well as whether or not it's a good candidate for dropping
344 * from the queue.
345 *
346 * The decision whether to drop the packet or not is made based
347 * on the queueing delay target of 5ms and on the current queue
348 * length in bytes which shouldn't be less than the amount of data
349 * that arrives in a typical interarrival time (MTU-sized packets
350 * arriving spaced by the amount of time it takes to send such a
351 * packet on the bottleneck).
352 */
353static inline struct mbuf *
354codel_next_packet(struct codel *cd, struct codel_params *cp, int64_t now,
355 int *drop)
356{
357 struct mbuf *m;
358
359 *drop = 0;
360
361 m = MBUF_LIST_FIRST(&cd->q);
362 if (m == NULL) {
363 KASSERT(cd->backlog == 0);
364 /* Empty queue, reset interval */
365 cd->start = 0;
366 return (NULL);
367 }
368
369 if (now - m->m_pkthdr.ph_timestamp < cp->target ||
370 cd->backlog <= cp->quantum) {
371 /*
372 * The minimum delay decreased below the target, reset
373 * the current observation interval.
374 */
375 cd->start = 0;
376 return (m);
377 }
378
379 if (cd->start == 0) {
380 /*
381 * This is the first packet to be delayed for more than
382 * the target, start the first observation interval after
383 * a single RTT and see if the minimum delay goes below
384 * the target within the interval, otherwise punish the
385 * next packet.
386 */
387 cd->start = now + cp->interval;
388 } else if (now > cd->start) {
389 *drop = 1;
390 }
391 return (m);
392}
393
394enum { INITIAL, ACCEPTING, FIRSTDROP, DROPPING, CONTROL, RECOVERY };
395
396static inline int
397codel_state_change(struct codel *cd, int64_t now, struct mbuf *m, int drop,
398 int state)
399{
400 if (state == FIRSTDROP)
401 return (ACCEPTING);
402
403 if (cd->dropping) {
404 if (!drop)
405 return (RECOVERY);
406 else if (now >= cd->next)
407 return (state == DROPPING ? CONTROL : DROPPING);
408 } else if (drop)
409 return (FIRSTDROP);
410
411 if (m == NULL)
412 return (RECOVERY);
413
414 return (ACCEPTING);
415}
416
417struct mbuf *
418codel_dequeue(struct codel *cd, struct codel_params *cp, int64_t now,
419 struct mbuf_list *free_ml, uint64_t *dpkts, uint64_t *dbytes)
420{
421 struct mbuf *m;
422 unsigned short delta;
423 int drop, state, done = 0;
424
425 state = INITIAL;
426
427 while (!done) {
428 m = codel_next_packet(cd, cp, now, &drop);
429 state = codel_state_change(cd, now, m, drop, state);
430
431 switch (state) {
432 case FIRSTDROP:
433 m = codel_commit(cd, m);
434 ml_enqueue(free_ml, m);
435
436 *dpkts += 1;
437 *dbytes += m->m_pkthdr.len;
438
439 cd->dropping = 1;
440
441 /*
442 * If we're still within the grace period and not
443 * meeting our minimal delay target we treat this
444 * as a continuation of the previous observation
445 * interval and shrink it further. Otherwise we
446 * start from the initial one.
447 */
448 delta = cd->drops - cd->ldrops;
449 if (delta > 1 && (now < cd->next ||
450 now - cd->next < cp->grace))
451 cd->drops = delta;
452 else
453 cd->drops = 1;
454 control_law(cd, cp, now);
455 cd->ldrops = cd->drops;
456
457 /* fetches the next packet and goes to ACCEPTING */
458 break;
459 case DROPPING:
460 m = codel_commit(cd, m);
461 ml_enqueue(free_ml, m);
462 cd->drops++;
463
464 *dpkts += 1;
465 *dbytes += m->m_pkthdr.len;
466
467 /* fetches the next packet and goes to CONTROL */
468 break;
469 case CONTROL:
470 if (drop) {
471 control_law(cd, cp, cd->next);
472 continue;
473 }
474 /* FALLTHROUGH */
475 case RECOVERY:
476 cd->dropping = 0;
477 /* FALLTHROUGH */
478 case ACCEPTING:
479 done = 1;
480 break;
481 }
482 }
483
484 if (m != NULL)
485 cd->delay = now - m->m_pkthdr.ph_timestamp;
486
487 return (m);
488}
489
490struct mbuf *
491codel_commit(struct codel *cd, struct mbuf *m)
492{
493 struct mbuf *n;
494
495 n = ml_dequeue(&cd->q);
496 if (m)
497 KASSERT(n == m);
498 KASSERT(n != NULL);
499 KASSERT(cd->backlog >= n->m_pkthdr.len);
500 cd->backlog -= n->m_pkthdr.len;
501 return (n);
502}
503
504void
505codel_purge(struct codel *cd, struct mbuf_list *ml)
506{
507 ml_enlist(ml, &cd->q);
508 cd->backlog = 0;
509}
510
511/*
512 * FQ-CoDel implementation
513 */
514
515static inline struct flow *
516classify_flow(struct fqcodel *fqc, struct mbuf *m)
517{
518 unsigned int index = 0;
519
520 if (m->m_pkthdr.csum_flags & M_FLOWID)
521 index = m->m_pkthdr.ph_flowid % fqc->nflows;
522
523 DPRINTF("%s: %u\n", __func__, index);
524
525 return (&fqc->flows[index]);
526}
527
528struct mbuf *
529fqcodel_enq(struct fqcodel *fqc, struct mbuf *m)
530{
531 struct flow *flow;
532 unsigned int backlog = 0;
533 int64_t now;
534 int i, ndrop;
535
536 flow = classify_flow(fqc, m);
537 if (flow == NULL)
538 return (m);
539
540 now = nsecuptime();
541 codel_enqueue(&flow->cd, now, m);
542 fqc->qlength++;
543
544 if (!flow->active) {
545 SIMPLEQ_INSERT_TAIL(&fqc->newq, flow, flowentry);
546 flow->deficit = fqc->quantum;
547 flow->active = 1;
548 DPRINTF("%s: flow %u active deficit %d\n", __func__,
549 flow->id, flow->deficit);
550 }
551
552 /*
553 * Flush pending_drops first to let PF account them individually.
554 * When batch dropping (below), we queue multiple packets but can
555 * only return one per enqueue call to maintain the interface
556 * contract.
557 */
558 if (!ml_empty(&fqc->pending_drops))
559 return (ml_dequeue(&fqc->pending_drops));
560
561 /*
562 * If total queue length exceeds the limit, find the flow with the
563 * largest backlog and drop up to half of its packets, with a
564 * maximum of 64, from the head. Implements RFC 8290, section 4.1
565 * batch drop to handle overload efficiently. Dropped packets are
566 * queued in pending_drops.
567 */
568 if (fqc->qlength > fqc->qlimit) {
569 for (i = 0; i < fqc->nflows; i++) {
570 if (codel_backlog(&fqc->flows[i].cd) > backlog) {
571 flow = &fqc->flows[i];
572 backlog = codel_backlog(&flow->cd);
573 }
574 }
575
576 KASSERT(flow != NULL);
577
578 ndrop = MIN(MAX(codel_qlength(&flow->cd) / 2, 1), 64);
579
580 for (i = 0; i < ndrop; i++) {
581 m = codel_commit(&flow->cd, NULL);
582 if (m == NULL)
583 break;
584 fqc->drop_cnt.packets++;
585 fqc->drop_cnt.bytes += m->m_pkthdr.len;
586 fqc->qlength--;
587 ml_enqueue(&fqc->pending_drops, m);
588 }
589
590 DPRINTF("%s: batch-dropped %d/%d pkts from flow %u\n", __func__,
591 i, ndrop, flow->id);
592
593 return (ml_dequeue(&fqc->pending_drops));
594 }
595
596 return (NULL);
597}
598
599static inline struct flowq *
600select_queue(struct fqcodel *fqc)
601{
602 struct flowq *fq = NULL;
603
604 if (!SIMPLEQ_EMPTY(&fqc->newq))
605 fq = &fqc->newq;
606 else if (!SIMPLEQ_EMPTY(&fqc->oldq))
607 fq = &fqc->oldq;
608 return (fq);
609}
610
611static inline struct flow *
612first_flow(struct fqcodel *fqc, struct flowq **fq)
613{
614 struct flow *flow;
615
616 while ((*fq = select_queue(fqc)) != NULL) {
617 while ((flow = SIMPLEQ_FIRST(*fq)) != NULL) {
618 if (flow->deficit <= 0) {
619 flow->deficit += fqc->quantum;
620 SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
621 SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow,
622 flowentry);
623 DPRINTF("%s: flow %u deficit %d\n", __func__,
624 flow->id, flow->deficit);
625 } else
626 return (flow);
627 }
628 }
629
630 return (NULL);
631}
632
633static inline struct flow *
634next_flow(struct fqcodel *fqc, struct flow *flow, struct flowq **fq)
635{
636 SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
637
638 if (*fq == &fqc->newq && !SIMPLEQ_EMPTY(&fqc->oldq)) {
639 /* A packet was dropped, starve the queue */
640 SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow, flowentry);
641 DPRINTF("%s: flow %u ->oldq deficit %d\n", __func__,
642 flow->id, flow->deficit);
643 } else {
644 /* A packet was dropped on a starved queue, disable it */
645 flow->active = 0;
646 DPRINTF("%s: flow %u inactive deficit %d\n", __func__,
647 flow->id, flow->deficit);
648 }
649
650 return (first_flow(fqc, fq));
651}
652
653struct mbuf *
654fqcodel_deq_begin(struct fqcodel *fqc, void **cookiep,
655 struct mbuf_list *free_ml)
656{
657 struct mbuf_list ml = MBUF_LIST_INITIALIZER();
658 struct flowq *fq;
659 struct flow *flow;
660 struct mbuf *m;
661 int64_t now;
662
663 if ((fqc->flags & FQCF_FIXED_QUANTUM) == 0)
664 fqc->quantum = fqc->ifp->if_mtu + max_linkhdr;
665
666 now = nsecuptime();
667
668 for (flow = first_flow(fqc, &fq); flow != NULL;
669 flow = next_flow(fqc, flow, &fq)) {
670 m = codel_dequeue(&flow->cd, &fqc->cparams, now, &ml,
671 &fqc->drop_cnt.packets, &fqc->drop_cnt.bytes);
672
673 KASSERT(fqc->qlength >= ml_len(&ml));
674 fqc->qlength -= ml_len(&ml);
675
676 ml_enlist(free_ml, &ml);
677
678 if (m != NULL) {
679 flow->deficit -= m->m_pkthdr.len;
680 DPRINTF("%s: flow %u deficit %d\n", __func__,
681 flow->id, flow->deficit);
682 *cookiep = flow;
683 return (m);
684 }
685 }
686
687 return (NULL);
688}
689
690void
691fqcodel_deq_commit(struct fqcodel *fqc, struct mbuf *m, void *cookie)
692{
693 struct flow *flow = cookie;
694
695 KASSERT(fqc->qlength > 0);
696 fqc->qlength--;
697
698 fqc->xmit_cnt.packets++;
699 fqc->xmit_cnt.bytes += m->m_pkthdr.len;
700
701 (void)codel_commit(&flow->cd, m);
702}
703
704void
705fqcodel_purge(struct fqcodel *fqc, struct mbuf_list *ml)
706{
707 unsigned int i;
708
709 for (i = 0; i < fqc->nflows; i++)
710 codel_purge(&fqc->flows[i].cd, ml);
711 ml_enlist(ml, &fqc->pending_drops);
712 fqc->qlength = 0;
713}
714
715struct mbuf *
716fqcodel_if_enq(struct ifqueue *ifq, struct mbuf *m)
717{
718 return fqcodel_enq(ifq->ifq_q, m);
719}
720
721struct mbuf *
722fqcodel_if_deq_begin(struct ifqueue *ifq, void **cookiep)
723{
724 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
725 struct mbuf *m;
726
727 m = fqcodel_deq_begin(ifq->ifq_q, cookiep, &free_ml);
728 ifq_mfreeml(ifq, &free_ml);
729 return (m);
730}
731
732void
733fqcodel_if_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
734{
735 return fqcodel_deq_commit(ifq->ifq_q, m, cookie);
736}
737
738void
739fqcodel_if_purge(struct ifqueue *ifq, struct mbuf_list *ml)
740{
741 return fqcodel_purge(ifq->ifq_q, ml);
742}
743
744void *
745fqcodel_pf_alloc(struct ifnet *ifp)
746{
747 struct fqcodel *fqc;
748
749 fqc = malloc(sizeof(struct fqcodel), M_DEVBUF, M_WAITOK | M_ZERO);
750
751 SIMPLEQ_INIT(&fqc->newq);
752 SIMPLEQ_INIT(&fqc->oldq);
753 ml_init(&fqc->pending_drops);
754
755 return (fqc);
756}
757
758int
759fqcodel_pf_addqueue(void *arg, struct pf_queuespec *qs)
760{
761 struct ifnet *ifp = qs->kif->pfik_ifp;
762 struct fqcodel *fqc = arg;
763
764 if (qs->flowqueue.flows == 0 || qs->flowqueue.flows > 0xffff)
765 return (EINVAL);
766
767 fqc->nflows = qs->flowqueue.flows;
768 fqc->quantum = qs->flowqueue.quantum;
769 if (qs->qlimit > 0)
770 fqc->qlimit = qs->qlimit;
771 else
772 fqc->qlimit = fqcodel_qlimit;
773 if (fqc->quantum > 0)
774 fqc->flags |= FQCF_FIXED_QUANTUM;
775 else
776 fqc->quantum = ifp->if_mtu + max_linkhdr;
777
778 codel_initparams(&fqc->cparams, qs->flowqueue.target,
779 qs->flowqueue.interval, fqc->quantum);
780
781 fqc->flows = mallocarray(fqc->nflows, sizeof(struct flow),
782 M_DEVBUF, M_WAITOK | M_ZERO);
783
784#ifdef FQCODEL_DEBUG
785 {
786 unsigned int i;
787
788 for (i = 0; i < fqc->nflows; i++)
789 fqc->flows[i].id = i;
790 }
791#endif
792
793 fqc->ifp = ifp;
794
795 DPRINTF("fq-codel on %s: %d queues %d deep, quantum %d target %llums "
796 "interval %llums\n", ifp->if_xname, fqc->nflows, fqc->qlimit,
797 fqc->quantum, fqc->cparams.target / 1000000,
798 fqc->cparams.interval / 1000000);
799
800 return (0);
801}
802
803void
804fqcodel_pf_free(void *arg)
805{
806 struct fqcodel *fqc = arg;
807
808 ml_purge(&fqc->pending_drops);
809 codel_freeparams(&fqc->cparams);
810 free(fqc->flows, M_DEVBUF, fqc->nflows * sizeof(struct flow));
811 free(fqc, M_DEVBUF, sizeof(struct fqcodel));
812}
813
814int
815fqcodel_pf_qstats(struct pf_queuespec *qs, void *ubuf, int *nbytes)
816{
817 struct ifnet *ifp = qs->kif->pfik_ifp;
818 struct fqcodel_stats stats;
819 struct fqcodel *fqc;
820 int64_t delay;
821 unsigned int i;
822 int error = 0;
823
824 if (ifp == NULL)
825 return (EBADF);
826
827 if (*nbytes < sizeof(stats))
828 return (EINVAL);
829
830 memset(&stats, 0, sizeof(stats));
831
832 /* XXX: multi-q? */
833 fqc = ifq_q_enter(&ifp->if_snd, ifq_fqcodel_ops);
834 if (fqc == NULL)
835 return (EBADF);
836
837 stats.xmit_cnt = fqc->xmit_cnt;
838 stats.drop_cnt = fqc->drop_cnt;
839
840 stats.qlength = ifq_len(&ifp->if_snd);
841 stats.qlimit = fqc->qlimit;
842
843 stats.flows = 0;
844 stats.delaysum = stats.delaysumsq = 0;
845
846 for (i = 0; i < fqc->nflows; i++) {
847 if (codel_qlength(&fqc->flows[i].cd) == 0)
848 continue;
849 /* Scale down to microseconds to avoid overflows */
850 delay = codel_delay(&fqc->flows[i].cd) / 1000;
851 stats.delaysum += delay;
852 stats.delaysumsq += delay * delay;
853 stats.flows++;
854 }
855
856 ifq_q_leave(&ifp->if_snd, fqc);
857
858 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
859 return (error);
860
861 *nbytes = sizeof(stats);
862 return (0);
863}
864
865unsigned int
866fqcodel_pf_qlength(void *fqc)
867{
868 return ((struct fqcodel *)fqc)->qlength;
869}
870
871struct mbuf *
872fqcodel_pf_enqueue(void *fqc, struct mbuf *m)
873{
874 return fqcodel_enq(fqc, m);
875}
876
877struct mbuf *
878fqcodel_pf_deq_begin(void *fqc, void **cookiep, struct mbuf_list *free_ml)
879{
880 return fqcodel_deq_begin(fqc, cookiep, free_ml);
881}
882
883void
884fqcodel_pf_deq_commit(void *fqc, struct mbuf *m, void *cookie)
885{
886 return fqcodel_deq_commit(fqc, m, cookie);
887}
888
889void
890fqcodel_pf_purge(void *fqc, struct mbuf_list *ml)
891{
892 return fqcodel_purge(fqc, ml);
893}
894
895unsigned int
896fqcodel_idx(unsigned int nqueues, const struct mbuf *m)
897{
898 return (0);
899}
900
901void *
902fqcodel_alloc(unsigned int idx, void *arg)
903{
904 /* Allocation is done in fqcodel_pf_alloc */
905 return (arg);
906}
907
908void
909fqcodel_free(unsigned int idx, void *arg)
910{
911 /* nothing to do here */
912}