fq_codel: Fair Queue Codel AQM · tjh.dev/kernel@4b549a2

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fq_codel: Fair Queue Codel AQM

Fair Queue Codel packet scheduler

Principles :

- Packets are classified (internal classifier or external) on flows.
- This is a Stochastic model (as we use a hash, several flows might
be hashed on same slot)
- Each flow has a CoDel managed queue.
- Flows are linked onto two (Round Robin) lists,
so that new flows have priority on old ones.

- For a given flow, packets are not reordered (CoDel uses a FIFO)
- head drops only.
- ECN capability is on by default.
- Very low memory footprint (64 bytes per flow)

tc qdisc ... fq_codel [ limit PACKETS ] [ flows number ]
[ target TIME ] [ interval TIME ] [ noecn ]
[ quantum BYTES ]

defaults : 1024 flows, 10240 packets limit, quantum : device MTU
target : 5ms (CoDel default)
interval : 100ms (CoDel default)

Impressive results on load :

class htb 1:1 root leaf 10: prio 0 quantum 1514 rate 200000Kbit ceil 200000Kbit burst 1475b/8 mpu 0b overhead 0b cburst 1475b/8 mpu 0b overhead 0b level 0
Sent 43304920109 bytes 33063109 pkt (dropped 0, overlimits 0 requeues 0)
rate 201691Kbit 28595pps backlog 0b 312p requeues 0
lended: 33063109 borrowed: 0 giants: 0
tokens: -912 ctokens: -912

class fq_codel 10:1735 parent 10:
(dropped 1292, overlimits 0 requeues 0)
backlog 15140b 10p requeues 0
deficit 1514 count 1 lastcount 1 ldelay 7.1ms
class fq_codel 10:4524 parent 10:
(dropped 1291, overlimits 0 requeues 0)
backlog 16654b 11p requeues 0
deficit 1514 count 1 lastcount 1 ldelay 7.1ms
class fq_codel 10:4e74 parent 10:
(dropped 1290, overlimits 0 requeues 0)
backlog 6056b 4p requeues 0
deficit 1514 count 1 lastcount 1 ldelay 6.4ms dropping drop_next 92.0ms
class fq_codel 10:628a parent 10:
(dropped 1289, overlimits 0 requeues 0)
backlog 7570b 5p requeues 0
deficit 1514 count 1 lastcount 1 ldelay 5.4ms dropping drop_next 90.9ms
class fq_codel 10:a4b3 parent 10:
(dropped 302, overlimits 0 requeues 0)
backlog 16654b 11p requeues 0
deficit 1514 count 1 lastcount 1 ldelay 7.1ms
class fq_codel 10:c3c2 parent 10:
(dropped 1284, overlimits 0 requeues 0)
backlog 13626b 9p requeues 0
deficit 1514 count 1 lastcount 1 ldelay 5.9ms
class fq_codel 10:d331 parent 10:
(dropped 299, overlimits 0 requeues 0)
backlog 15140b 10p requeues 0
deficit 1514 count 1 lastcount 1 ldelay 7.0ms
class fq_codel 10:d526 parent 10:
(dropped 12160, overlimits 0 requeues 0)
backlog 35870b 211p requeues 0
deficit 1508 count 12160 lastcount 1 ldelay 15.3ms dropping drop_next 247us
class fq_codel 10:e2c6 parent 10:
(dropped 1288, overlimits 0 requeues 0)
backlog 15140b 10p requeues 0
deficit 1514 count 1 lastcount 1 ldelay 7.1ms
class fq_codel 10:eab5 parent 10:
(dropped 1285, overlimits 0 requeues 0)
backlog 16654b 11p requeues 0
deficit 1514 count 1 lastcount 1 ldelay 5.9ms
class fq_codel 10:f220 parent 10:
(dropped 1289, overlimits 0 requeues 0)
backlog 15140b 10p requeues 0
deficit 1514 count 1 lastcount 1 ldelay 7.1ms

qdisc htb 1: root refcnt 6 r2q 10 default 1 direct_packets_stat 0 ver 3.17
Sent 43331086547 bytes 33092812 pkt (dropped 0, overlimits 66063544 requeues 71)
rate 201697Kbit 28602pps backlog 0b 260p requeues 71
qdisc fq_codel 10: parent 1:1 limit 10240p flows 65536 target 5.0ms interval 100.0ms ecn
Sent 43331086547 bytes 33092812 pkt (dropped 949359, overlimits 0 requeues 0)
rate 201697Kbit 28602pps backlog 189352b 260p requeues 0
maxpacket 1514 drop_overlimit 0 new_flow_count 5582 ecn_mark 125593
new_flows_len 0 old_flows_len 11

PING 172.30.42.18 (172.30.42.18) 56(84) bytes of data.
64 bytes from 172.30.42.18: icmp_req=1 ttl=64 time=0.227 ms
64 bytes from 172.30.42.18: icmp_req=2 ttl=64 time=0.165 ms
64 bytes from 172.30.42.18: icmp_req=3 ttl=64 time=0.166 ms
64 bytes from 172.30.42.18: icmp_req=4 ttl=64 time=0.151 ms
64 bytes from 172.30.42.18: icmp_req=5 ttl=64 time=0.164 ms
64 bytes from 172.30.42.18: icmp_req=6 ttl=64 time=0.172 ms
64 bytes from 172.30.42.18: icmp_req=7 ttl=64 time=0.175 ms
64 bytes from 172.30.42.18: icmp_req=8 ttl=64 time=0.183 ms
64 bytes from 172.30.42.18: icmp_req=9 ttl=64 time=0.158 ms
64 bytes from 172.30.42.18: icmp_req=10 ttl=64 time=0.200 ms

10 packets transmitted, 10 received, 0% packet loss, time 8999ms
rtt min/avg/max/mdev = 0.151/0.176/0.227/0.022 ms

Much better than SFQ because of priority given to new flows, and fast
path dirtying less cache lines.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by

David S. Miller 14 years ago 4b549a2e 536edd67

+690

4 changed files

expand all

include

linux

pkt_sched.h

net

sched

Kconfig

Makefile

sch_fq_codel.c

+54

include/linux/pkt_sched.h

··· 681 681 __u32 dropping; /* are we in dropping state ? */ 682 682 }; 683 683 684 + /* FQ_CODEL */ 685 + 686 + enum { 687 + TCA_FQ_CODEL_UNSPEC, 688 + TCA_FQ_CODEL_TARGET, 689 + TCA_FQ_CODEL_LIMIT, 690 + TCA_FQ_CODEL_INTERVAL, 691 + TCA_FQ_CODEL_ECN, 692 + TCA_FQ_CODEL_FLOWS, 693 + TCA_FQ_CODEL_QUANTUM, 694 + __TCA_FQ_CODEL_MAX 695 + }; 696 + 697 + #define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1) 698 + 699 + enum { 700 + TCA_FQ_CODEL_XSTATS_QDISC, 701 + TCA_FQ_CODEL_XSTATS_CLASS, 702 + }; 703 + 704 + struct tc_fq_codel_qd_stats { 705 + __u32 maxpacket; /* largest packet we've seen so far */ 706 + __u32 drop_overlimit; /* number of time max qdisc 707 + * packet limit was hit 708 + */ 709 + __u32 ecn_mark; /* number of packets we ECN marked 710 + * instead of being dropped 711 + */ 712 + __u32 new_flow_count; /* number of time packets 713 + * created a 'new flow' 714 + */ 715 + __u32 new_flows_len; /* count of flows in new list */ 716 + __u32 old_flows_len; /* count of flows in old list */ 717 + }; 718 + 719 + struct tc_fq_codel_cl_stats { 720 + __s32 deficit; 721 + __u32 ldelay; /* in-queue delay seen by most recently 722 + * dequeued packet 723 + */ 724 + __u32 count; 725 + __u32 lastcount; 726 + __u32 dropping; 727 + __s32 drop_next; 728 + }; 729 + 730 + struct tc_fq_codel_xstats { 731 + __u32 type; 732 + union { 733 + struct tc_fq_codel_qd_stats qdisc_stats; 734 + struct tc_fq_codel_cl_stats class_stats; 735 + }; 736 + }; 737 + 684 738 #endif

+11

net/sched/Kconfig

··· 261 261 262 262 If unsure, say N. 263 263 264 + config NET_SCH_FQ_CODEL 265 + tristate "Fair Queue Controlled Delay AQM (FQ_CODEL)" 266 + help 267 + Say Y here if you want to use the FQ Controlled Delay (FQ_CODEL) 268 + packet scheduling algorithm. 269 + 270 + To compile this driver as a module, choose M here: the module 271 + will be called sch_fq_codel. 272 + 273 + If unsure, say N. 274 + 264 275 config NET_SCH_INGRESS 265 276 tristate "Ingress Qdisc" 266 277 depends on NET_CLS_ACT

net/sched/Makefile

··· 38 38 obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o 39 39 obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o 40 40 obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o 41 + obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o 41 42 42 43 obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 43 44 obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o

+624

net/sched/sch_fq_codel.c

··· 1 + /* 2 + * Fair Queue CoDel discipline 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation; either version 7 + * 2 of the License, or (at your option) any later version. 8 + * 9 + * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> 10 + */ 11 + 12 + #include <linux/module.h> 13 + #include <linux/types.h> 14 + #include <linux/kernel.h> 15 + #include <linux/jiffies.h> 16 + #include <linux/string.h> 17 + #include <linux/in.h> 18 + #include <linux/errno.h> 19 + #include <linux/init.h> 20 + #include <linux/skbuff.h> 21 + #include <linux/jhash.h> 22 + #include <linux/slab.h> 23 + #include <linux/vmalloc.h> 24 + #include <net/netlink.h> 25 + #include <net/pkt_sched.h> 26 + #include <net/flow_keys.h> 27 + #include <net/codel.h> 28 + 29 + /* Fair Queue CoDel. 30 + * 31 + * Principles : 32 + * Packets are classified (internal classifier or external) on flows. 33 + * This is a Stochastic model (as we use a hash, several flows 34 + * might be hashed on same slot) 35 + * Each flow has a CoDel managed queue. 36 + * Flows are linked onto two (Round Robin) lists, 37 + * so that new flows have priority on old ones. 38 + * 39 + * For a given flow, packets are not reordered (CoDel uses a FIFO) 40 + * head drops only. 41 + * ECN capability is on by default. 42 + * Low memory footprint (64 bytes per flow) 43 + */ 44 + 45 + struct fq_codel_flow { 46 + struct sk_buff *head; 47 + struct sk_buff *tail; 48 + struct list_head flowchain; 49 + int deficit; 50 + u32 dropped; /* number of drops (or ECN marks) on this flow */ 51 + struct codel_vars cvars; 52 + }; /* please try to keep this structure <= 64 bytes */ 53 + 54 + struct fq_codel_sched_data { 55 + struct tcf_proto *filter_list; /* optional external classifier */ 56 + struct fq_codel_flow *flows; /* Flows table [flows_cnt] */ 57 + u32 *backlogs; /* backlog table [flows_cnt] */ 58 + u32 flows_cnt; /* number of flows */ 59 + u32 perturbation; /* hash perturbation */ 60 + u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ 61 + struct codel_params cparams; 62 + struct codel_stats cstats; 63 + u32 drop_overlimit; 64 + u32 new_flow_count; 65 + 66 + struct list_head new_flows; /* list of new flows */ 67 + struct list_head old_flows; /* list of old flows */ 68 + }; 69 + 70 + static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, 71 + const struct sk_buff *skb) 72 + { 73 + struct flow_keys keys; 74 + unsigned int hash; 75 + 76 + skb_flow_dissect(skb, &keys); 77 + hash = jhash_3words((__force u32)keys.dst, 78 + (__force u32)keys.src ^ keys.ip_proto, 79 + (__force u32)keys.ports, q->perturbation); 80 + return ((u64)hash * q->flows_cnt) >> 32; 81 + } 82 + 83 + static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, 84 + int *qerr) 85 + { 86 + struct fq_codel_sched_data *q = qdisc_priv(sch); 87 + struct tcf_result res; 88 + int result; 89 + 90 + if (TC_H_MAJ(skb->priority) == sch->handle && 91 + TC_H_MIN(skb->priority) > 0 && 92 + TC_H_MIN(skb->priority) <= q->flows_cnt) 93 + return TC_H_MIN(skb->priority); 94 + 95 + if (!q->filter_list) 96 + return fq_codel_hash(q, skb) + 1; 97 + 98 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 99 + result = tc_classify(skb, q->filter_list, &res); 100 + if (result >= 0) { 101 + #ifdef CONFIG_NET_CLS_ACT 102 + switch (result) { 103 + case TC_ACT_STOLEN: 104 + case TC_ACT_QUEUED: 105 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 106 + case TC_ACT_SHOT: 107 + return 0; 108 + } 109 + #endif 110 + if (TC_H_MIN(res.classid) <= q->flows_cnt) 111 + return TC_H_MIN(res.classid); 112 + } 113 + return 0; 114 + } 115 + 116 + /* helper functions : might be changed when/if skb use a standard list_head */ 117 + 118 + /* remove one skb from head of slot queue */ 119 + static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow) 120 + { 121 + struct sk_buff *skb = flow->head; 122 + 123 + flow->head = skb->next; 124 + skb->next = NULL; 125 + return skb; 126 + } 127 + 128 + /* add skb to flow queue (tail add) */ 129 + static inline void flow_queue_add(struct fq_codel_flow *flow, 130 + struct sk_buff *skb) 131 + { 132 + if (flow->head == NULL) 133 + flow->head = skb; 134 + else 135 + flow->tail->next = skb; 136 + flow->tail = skb; 137 + skb->next = NULL; 138 + } 139 + 140 + static unsigned int fq_codel_drop(struct Qdisc *sch) 141 + { 142 + struct fq_codel_sched_data *q = qdisc_priv(sch); 143 + struct sk_buff *skb; 144 + unsigned int maxbacklog = 0, idx = 0, i, len; 145 + struct fq_codel_flow *flow; 146 + 147 + /* Queue is full! Find the fat flow and drop packet from it. 148 + * This might sound expensive, but with 1024 flows, we scan 149 + * 4KB of memory, and we dont need to handle a complex tree 150 + * in fast path (packet queue/enqueue) with many cache misses. 151 + */ 152 + for (i = 0; i < q->flows_cnt; i++) { 153 + if (q->backlogs[i] > maxbacklog) { 154 + maxbacklog = q->backlogs[i]; 155 + idx = i; 156 + } 157 + } 158 + flow = &q->flows[idx]; 159 + skb = dequeue_head(flow); 160 + len = qdisc_pkt_len(skb); 161 + q->backlogs[idx] -= len; 162 + kfree_skb(skb); 163 + sch->q.qlen--; 164 + sch->qstats.drops++; 165 + sch->qstats.backlog -= len; 166 + flow->dropped++; 167 + return idx; 168 + } 169 + 170 + static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) 171 + { 172 + struct fq_codel_sched_data *q = qdisc_priv(sch); 173 + unsigned int idx; 174 + struct fq_codel_flow *flow; 175 + int uninitialized_var(ret); 176 + 177 + idx = fq_codel_classify(skb, sch, &ret); 178 + if (idx == 0) { 179 + if (ret & __NET_XMIT_BYPASS) 180 + sch->qstats.drops++; 181 + kfree_skb(skb); 182 + return ret; 183 + } 184 + idx--; 185 + 186 + codel_set_enqueue_time(skb); 187 + flow = &q->flows[idx]; 188 + flow_queue_add(flow, skb); 189 + q->backlogs[idx] += qdisc_pkt_len(skb); 190 + sch->qstats.backlog += qdisc_pkt_len(skb); 191 + 192 + if (list_empty(&flow->flowchain)) { 193 + list_add_tail(&flow->flowchain, &q->new_flows); 194 + codel_vars_init(&flow->cvars); 195 + q->new_flow_count++; 196 + flow->deficit = q->quantum; 197 + flow->dropped = 0; 198 + } 199 + if (++sch->q.qlen < sch->limit) 200 + return NET_XMIT_SUCCESS; 201 + 202 + q->drop_overlimit++; 203 + /* Return Congestion Notification only if we dropped a packet 204 + * from this flow. 205 + */ 206 + if (fq_codel_drop(sch) == idx) 207 + return NET_XMIT_CN; 208 + 209 + /* As we dropped a packet, better let upper stack know this */ 210 + qdisc_tree_decrease_qlen(sch, 1); 211 + return NET_XMIT_SUCCESS; 212 + } 213 + 214 + /* This is the specific function called from codel_dequeue() 215 + * to dequeue a packet from queue. Note: backlog is handled in 216 + * codel, we dont need to reduce it here. 217 + */ 218 + static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) 219 + { 220 + struct fq_codel_flow *flow; 221 + struct sk_buff *skb = NULL; 222 + 223 + flow = container_of(vars, struct fq_codel_flow, cvars); 224 + if (flow->head) { 225 + skb = dequeue_head(flow); 226 + sch->qstats.backlog -= qdisc_pkt_len(skb); 227 + sch->q.qlen--; 228 + } 229 + return skb; 230 + } 231 + 232 + static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) 233 + { 234 + struct fq_codel_sched_data *q = qdisc_priv(sch); 235 + struct sk_buff *skb; 236 + struct fq_codel_flow *flow; 237 + struct list_head *head; 238 + u32 prev_drop_count, prev_ecn_mark; 239 + 240 + begin: 241 + head = &q->new_flows; 242 + if (list_empty(head)) { 243 + head = &q->old_flows; 244 + if (list_empty(head)) 245 + return NULL; 246 + } 247 + flow = list_first_entry(head, struct fq_codel_flow, flowchain); 248 + 249 + if (flow->deficit <= 0) { 250 + flow->deficit += q->quantum; 251 + list_move_tail(&flow->flowchain, &q->old_flows); 252 + goto begin; 253 + } 254 + 255 + prev_drop_count = q->cstats.drop_count; 256 + prev_ecn_mark = q->cstats.ecn_mark; 257 + 258 + skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats, 259 + dequeue, &q->backlogs[flow - q->flows]); 260 + 261 + flow->dropped += q->cstats.drop_count - prev_drop_count; 262 + flow->dropped += q->cstats.ecn_mark - prev_ecn_mark; 263 + 264 + if (!skb) { 265 + /* force a pass through old_flows to prevent starvation */ 266 + if ((head == &q->new_flows) && !list_empty(&q->old_flows)) 267 + list_move_tail(&flow->flowchain, &q->old_flows); 268 + else 269 + list_del_init(&flow->flowchain); 270 + goto begin; 271 + } 272 + qdisc_bstats_update(sch, skb); 273 + flow->deficit -= qdisc_pkt_len(skb); 274 + /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, 275 + * or HTB crashes. Defer it for next round. 276 + */ 277 + if (q->cstats.drop_count && sch->q.qlen) { 278 + qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); 279 + q->cstats.drop_count = 0; 280 + } 281 + return skb; 282 + } 283 + 284 + static void fq_codel_reset(struct Qdisc *sch) 285 + { 286 + struct sk_buff *skb; 287 + 288 + while ((skb = fq_codel_dequeue(sch)) != NULL) 289 + kfree_skb(skb); 290 + } 291 + 292 + static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { 293 + [TCA_FQ_CODEL_TARGET] = { .type = NLA_U32 }, 294 + [TCA_FQ_CODEL_LIMIT] = { .type = NLA_U32 }, 295 + [TCA_FQ_CODEL_INTERVAL] = { .type = NLA_U32 }, 296 + [TCA_FQ_CODEL_ECN] = { .type = NLA_U32 }, 297 + [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 }, 298 + [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 }, 299 + }; 300 + 301 + static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) 302 + { 303 + struct fq_codel_sched_data *q = qdisc_priv(sch); 304 + struct nlattr *tb[TCA_FQ_CODEL_MAX + 1]; 305 + int err; 306 + 307 + if (!opt) 308 + return -EINVAL; 309 + 310 + err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy); 311 + if (err < 0) 312 + return err; 313 + if (tb[TCA_FQ_CODEL_FLOWS]) { 314 + if (q->flows) 315 + return -EINVAL; 316 + q->flows_cnt = nla_get_u32(tb[TCA_FQ_CODEL_FLOWS]); 317 + if (!q->flows_cnt || 318 + q->flows_cnt > 65536) 319 + return -EINVAL; 320 + } 321 + sch_tree_lock(sch); 322 + 323 + if (tb[TCA_FQ_CODEL_TARGET]) { 324 + u64 target = nla_get_u32(tb[TCA_FQ_CODEL_TARGET]); 325 + 326 + q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT; 327 + } 328 + 329 + if (tb[TCA_FQ_CODEL_INTERVAL]) { 330 + u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); 331 + 332 + q->cparams.interval = (interval * NSEC_PER_USEC) >> CODEL_SHIFT; 333 + } 334 + 335 + if (tb[TCA_FQ_CODEL_LIMIT]) 336 + sch->limit = nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]); 337 + 338 + if (tb[TCA_FQ_CODEL_ECN]) 339 + q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]); 340 + 341 + if (tb[TCA_FQ_CODEL_QUANTUM]) 342 + q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); 343 + 344 + while (sch->q.qlen > sch->limit) { 345 + struct sk_buff *skb = fq_codel_dequeue(sch); 346 + 347 + kfree_skb(skb); 348 + q->cstats.drop_count++; 349 + } 350 + qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); 351 + q->cstats.drop_count = 0; 352 + 353 + sch_tree_unlock(sch); 354 + return 0; 355 + } 356 + 357 + static void *fq_codel_zalloc(size_t sz) 358 + { 359 + void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); 360 + 361 + if (!ptr) 362 + ptr = vzalloc(sz); 363 + return ptr; 364 + } 365 + 366 + static void fq_codel_free(void *addr) 367 + { 368 + if (addr) { 369 + if (is_vmalloc_addr(addr)) 370 + vfree(addr); 371 + else 372 + kfree(addr); 373 + } 374 + } 375 + 376 + static void fq_codel_destroy(struct Qdisc *sch) 377 + { 378 + struct fq_codel_sched_data *q = qdisc_priv(sch); 379 + 380 + tcf_destroy_chain(&q->filter_list); 381 + fq_codel_free(q->backlogs); 382 + fq_codel_free(q->flows); 383 + } 384 + 385 + static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) 386 + { 387 + struct fq_codel_sched_data *q = qdisc_priv(sch); 388 + int i; 389 + 390 + sch->limit = 10*1024; 391 + q->flows_cnt = 1024; 392 + q->quantum = psched_mtu(qdisc_dev(sch)); 393 + q->perturbation = net_random(); 394 + INIT_LIST_HEAD(&q->new_flows); 395 + INIT_LIST_HEAD(&q->old_flows); 396 + codel_params_init(&q->cparams); 397 + codel_stats_init(&q->cstats); 398 + q->cparams.ecn = true; 399 + 400 + if (opt) { 401 + int err = fq_codel_change(sch, opt); 402 + if (err) 403 + return err; 404 + } 405 + 406 + if (!q->flows) { 407 + q->flows = fq_codel_zalloc(q->flows_cnt * 408 + sizeof(struct fq_codel_flow)); 409 + if (!q->flows) 410 + return -ENOMEM; 411 + q->backlogs = fq_codel_zalloc(q->flows_cnt * sizeof(u32)); 412 + if (!q->backlogs) { 413 + fq_codel_free(q->flows); 414 + return -ENOMEM; 415 + } 416 + for (i = 0; i < q->flows_cnt; i++) { 417 + struct fq_codel_flow *flow = q->flows + i; 418 + 419 + INIT_LIST_HEAD(&flow->flowchain); 420 + } 421 + } 422 + if (sch->limit >= 1) 423 + sch->flags |= TCQ_F_CAN_BYPASS; 424 + else 425 + sch->flags &= ~TCQ_F_CAN_BYPASS; 426 + return 0; 427 + } 428 + 429 + static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) 430 + { 431 + struct fq_codel_sched_data *q = qdisc_priv(sch); 432 + struct nlattr *opts; 433 + 434 + opts = nla_nest_start(skb, TCA_OPTIONS); 435 + if (opts == NULL) 436 + goto nla_put_failure; 437 + 438 + if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET, 439 + codel_time_to_us(q->cparams.target)) || 440 + nla_put_u32(skb, TCA_FQ_CODEL_LIMIT, 441 + sch->limit) || 442 + nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL, 443 + codel_time_to_us(q->cparams.interval)) || 444 + nla_put_u32(skb, TCA_FQ_CODEL_ECN, 445 + q->cparams.ecn) || 446 + nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM, 447 + q->quantum) || 448 + nla_put_u32(skb, TCA_FQ_CODEL_FLOWS, 449 + q->flows_cnt)) 450 + goto nla_put_failure; 451 + 452 + nla_nest_end(skb, opts); 453 + return skb->len; 454 + 455 + nla_put_failure: 456 + return -1; 457 + } 458 + 459 + static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 460 + { 461 + struct fq_codel_sched_data *q = qdisc_priv(sch); 462 + struct tc_fq_codel_xstats st = { 463 + .type = TCA_FQ_CODEL_XSTATS_QDISC, 464 + .qdisc_stats.maxpacket = q->cstats.maxpacket, 465 + .qdisc_stats.drop_overlimit = q->drop_overlimit, 466 + .qdisc_stats.ecn_mark = q->cstats.ecn_mark, 467 + .qdisc_stats.new_flow_count = q->new_flow_count, 468 + }; 469 + struct list_head *pos; 470 + 471 + list_for_each(pos, &q->new_flows) 472 + st.qdisc_stats.new_flows_len++; 473 + 474 + list_for_each(pos, &q->old_flows) 475 + st.qdisc_stats.old_flows_len++; 476 + 477 + return gnet_stats_copy_app(d, &st, sizeof(st)); 478 + } 479 + 480 + static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg) 481 + { 482 + return NULL; 483 + } 484 + 485 + static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid) 486 + { 487 + return 0; 488 + } 489 + 490 + static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent, 491 + u32 classid) 492 + { 493 + /* we cannot bypass queue discipline anymore */ 494 + sch->flags &= ~TCQ_F_CAN_BYPASS; 495 + return 0; 496 + } 497 + 498 + static void fq_codel_put(struct Qdisc *q, unsigned long cl) 499 + { 500 + } 501 + 502 + static struct tcf_proto **fq_codel_find_tcf(struct Qdisc *sch, unsigned long cl) 503 + { 504 + struct fq_codel_sched_data *q = qdisc_priv(sch); 505 + 506 + if (cl) 507 + return NULL; 508 + return &q->filter_list; 509 + } 510 + 511 + static int fq_codel_dump_class(struct Qdisc *sch, unsigned long cl, 512 + struct sk_buff *skb, struct tcmsg *tcm) 513 + { 514 + tcm->tcm_handle |= TC_H_MIN(cl); 515 + return 0; 516 + } 517 + 518 + static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, 519 + struct gnet_dump *d) 520 + { 521 + struct fq_codel_sched_data *q = qdisc_priv(sch); 522 + u32 idx = cl - 1; 523 + struct gnet_stats_queue qs = { 0 }; 524 + struct tc_fq_codel_xstats xstats; 525 + 526 + if (idx < q->flows_cnt) { 527 + const struct fq_codel_flow *flow = &q->flows[idx]; 528 + const struct sk_buff *skb = flow->head; 529 + 530 + memset(&xstats, 0, sizeof(xstats)); 531 + xstats.type = TCA_FQ_CODEL_XSTATS_CLASS; 532 + xstats.class_stats.deficit = flow->deficit; 533 + xstats.class_stats.ldelay = 534 + codel_time_to_us(flow->cvars.ldelay); 535 + xstats.class_stats.count = flow->cvars.count; 536 + xstats.class_stats.lastcount = flow->cvars.lastcount; 537 + xstats.class_stats.dropping = flow->cvars.dropping; 538 + if (flow->cvars.dropping) { 539 + codel_tdiff_t delta = flow->cvars.drop_next - 540 + codel_get_time(); 541 + 542 + xstats.class_stats.drop_next = (delta >= 0) ? 543 + codel_time_to_us(delta) : 544 + -codel_time_to_us(-delta); 545 + } 546 + while (skb) { 547 + qs.qlen++; 548 + skb = skb->next; 549 + } 550 + qs.backlog = q->backlogs[idx]; 551 + qs.drops = flow->dropped; 552 + } 553 + if (gnet_stats_copy_queue(d, &qs) < 0) 554 + return -1; 555 + if (idx < q->flows_cnt) 556 + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); 557 + return 0; 558 + } 559 + 560 + static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg) 561 + { 562 + struct fq_codel_sched_data *q = qdisc_priv(sch); 563 + unsigned int i; 564 + 565 + if (arg->stop) 566 + return; 567 + 568 + for (i = 0; i < q->flows_cnt; i++) { 569 + if (list_empty(&q->flows[i].flowchain) || 570 + arg->count < arg->skip) { 571 + arg->count++; 572 + continue; 573 + } 574 + if (arg->fn(sch, i + 1, arg) < 0) { 575 + arg->stop = 1; 576 + break; 577 + } 578 + arg->count++; 579 + } 580 + } 581 + 582 + static const struct Qdisc_class_ops fq_codel_class_ops = { 583 + .leaf = fq_codel_leaf, 584 + .get = fq_codel_get, 585 + .put = fq_codel_put, 586 + .tcf_chain = fq_codel_find_tcf, 587 + .bind_tcf = fq_codel_bind, 588 + .unbind_tcf = fq_codel_put, 589 + .dump = fq_codel_dump_class, 590 + .dump_stats = fq_codel_dump_class_stats, 591 + .walk = fq_codel_walk, 592 + }; 593 + 594 + static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { 595 + .cl_ops = &fq_codel_class_ops, 596 + .id = "fq_codel", 597 + .priv_size = sizeof(struct fq_codel_sched_data), 598 + .enqueue = fq_codel_enqueue, 599 + .dequeue = fq_codel_dequeue, 600 + .peek = qdisc_peek_dequeued, 601 + .drop = fq_codel_drop, 602 + .init = fq_codel_init, 603 + .reset = fq_codel_reset, 604 + .destroy = fq_codel_destroy, 605 + .change = fq_codel_change, 606 + .dump = fq_codel_dump, 607 + .dump_stats = fq_codel_dump_stats, 608 + .owner = THIS_MODULE, 609 + }; 610 + 611 + static int __init fq_codel_module_init(void) 612 + { 613 + return register_qdisc(&fq_codel_qdisc_ops); 614 + } 615 + 616 + static void __exit fq_codel_module_exit(void) 617 + { 618 + unregister_qdisc(&fq_codel_qdisc_ops); 619 + } 620 + 621 + module_init(fq_codel_module_init) 622 + module_exit(fq_codel_module_exit) 623 + MODULE_AUTHOR("Eric Dumazet"); 624 + MODULE_LICENSE("GPL");