net: sched: add Flow Queue PIE packet scheduler

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Principles:
- Packets are classified on flows.
- This is a Stochastic model (as we use a hash, several flows might
be hashed to the same slot)
- Each flow has a PIE managed queue.
- Flows are linked onto two (Round Robin) lists,
so that new flows have priority on old ones.
- For a given flow, packets are not reordered.
- Drops during enqueue only.
- ECN capability is off by default.
- ECN threshold (if ECN is enabled) is at 10% by default.
- Uses timestamps to calculate queue delay by default.

Usage:
tc qdisc ... fq_pie [ limit PACKETS ] [ flows NUMBER ]
[ target TIME ] [ tupdate TIME ]
[ alpha NUMBER ] [ beta NUMBER ]
[ quantum BYTES ] [ memory_limit BYTES ]
[ ecnprob PERCENTAGE ] [ [no]ecn ]
[ [no]bytemode ] [ [no_]dq_rate_estimator ]

defaults:
limit: 10240 packets, flows: 1024
target: 15 ms, tupdate: 15 ms (in jiffies)
alpha: 1/8, beta : 5/4
quantum: device MTU, memory_limit: 32 Mb
ecnprob: 10%, ecn: off
bytemode: off, dq_rate_estimator: off

Signed-off-by: Mohit P. Tahiliani <tahiliani@nitk.edu.in>
Signed-off-by: Sachin D. Patil <sdp.sachin@gmail.com>
Signed-off-by: V. Saicharan <vsaicharan1998@gmail.com>
Signed-off-by: Mohit Bhasi <mohitbhasi1998@gmail.com>
Signed-off-by: Leslie Monis <lesliemonis@gmail.com>
Signed-off-by: Gautam Ramakrishnan <gautamramk@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Mohit P. Tahiliani and committed by

David S. Miller 6 years ago ec97ecf1 5205ea00

+609

5 changed files

expand all

include

net

pie.h

uapi

linux

pkt_sched.h

net

sched

Kconfig

Makefile

sch_fq_pie.c

include/net/pie.h

··· 81 81 /** 82 82 * struct pie_skb_cb - contains private skb vars 83 83 * @enqueue_time: timestamp when the packet is enqueued 84 + * @mem_usage: size of the skb during enqueue 84 85 */ 85 86 struct pie_skb_cb { 86 87 psched_time_t enqueue_time; 88 + u32 mem_usage; 87 89 }; 88 90 89 91 static inline void pie_params_init(struct pie_params *params)

+31

include/uapi/linux/pkt_sched.h

··· 971 971 __u32 ecn_mark; /* packets marked with ecn*/ 972 972 }; 973 973 974 + /* FQ PIE */ 975 + enum { 976 + TCA_FQ_PIE_UNSPEC, 977 + TCA_FQ_PIE_LIMIT, 978 + TCA_FQ_PIE_FLOWS, 979 + TCA_FQ_PIE_TARGET, 980 + TCA_FQ_PIE_TUPDATE, 981 + TCA_FQ_PIE_ALPHA, 982 + TCA_FQ_PIE_BETA, 983 + TCA_FQ_PIE_QUANTUM, 984 + TCA_FQ_PIE_MEMORY_LIMIT, 985 + TCA_FQ_PIE_ECN_PROB, 986 + TCA_FQ_PIE_ECN, 987 + TCA_FQ_PIE_BYTEMODE, 988 + TCA_FQ_PIE_DQ_RATE_ESTIMATOR, 989 + __TCA_FQ_PIE_MAX 990 + }; 991 + #define TCA_FQ_PIE_MAX (__TCA_FQ_PIE_MAX - 1) 992 + 993 + struct tc_fq_pie_xstats { 994 + __u32 packets_in; /* total number of packets enqueued */ 995 + __u32 dropped; /* packets dropped due to fq_pie_action */ 996 + __u32 overlimit; /* dropped due to lack of space in queue */ 997 + __u32 overmemory; /* dropped due to lack of memory in queue */ 998 + __u32 ecn_mark; /* packets marked with ecn */ 999 + __u32 new_flow_count; /* count of new flows created by packets */ 1000 + __u32 new_flows_len; /* count of flows in new list */ 1001 + __u32 old_flows_len; /* count of flows in old list */ 1002 + __u32 memory_usage; /* total memory across all queues */ 1003 + }; 1004 + 974 1005 /* CBS */ 975 1006 struct tc_cbs_qopt { 976 1007 __u8 offload;

+13

net/sched/Kconfig

··· 366 366 367 367 If unsure, say N. 368 368 369 + config NET_SCH_FQ_PIE 370 + depends on NET_SCH_PIE 371 + tristate "Flow Queue Proportional Integral controller Enhanced (FQ-PIE)" 372 + help 373 + Say Y here if you want to use the Flow Queue Proportional Integral 374 + controller Enhanced (FQ-PIE) packet scheduling algorithm. 375 + For more information, please see https://tools.ietf.org/html/rfc8033 376 + 377 + To compile this driver as a module, choose M here: the module 378 + will be called sch_fq_pie. 379 + 380 + If unsure, say N. 381 + 369 382 config NET_SCH_INGRESS 370 383 tristate "Ingress/classifier-action Qdisc" 371 384 depends on NET_CLS_ACT

net/sched/Makefile

··· 59 59 obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o 60 60 obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o 61 61 obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o 62 + obj-$(CONFIG_NET_SCH_FQ_PIE) += sch_fq_pie.o 62 63 obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o 63 64 obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o 64 65 obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o

+562

net/sched/sch_fq_pie.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Flow Queue PIE discipline 3 + * 4 + * Copyright (C) 2019 Mohit P. Tahiliani <tahiliani@nitk.edu.in> 5 + * Copyright (C) 2019 Sachin D. Patil <sdp.sachin@gmail.com> 6 + * Copyright (C) 2019 V. Saicharan <vsaicharan1998@gmail.com> 7 + * Copyright (C) 2019 Mohit Bhasi <mohitbhasi1998@gmail.com> 8 + * Copyright (C) 2019 Leslie Monis <lesliemonis@gmail.com> 9 + * Copyright (C) 2019 Gautam Ramakrishnan <gautamramk@gmail.com> 10 + */ 11 + 12 + #include <linux/jhash.h> 13 + #include <linux/sizes.h> 14 + #include <linux/vmalloc.h> 15 + #include <net/pkt_cls.h> 16 + #include <net/pie.h> 17 + 18 + /* Flow Queue PIE 19 + * 20 + * Principles: 21 + * - Packets are classified on flows. 22 + * - This is a Stochastic model (as we use a hash, several flows might 23 + * be hashed to the same slot) 24 + * - Each flow has a PIE managed queue. 25 + * - Flows are linked onto two (Round Robin) lists, 26 + * so that new flows have priority on old ones. 27 + * - For a given flow, packets are not reordered. 28 + * - Drops during enqueue only. 29 + * - ECN capability is off by default. 30 + * - ECN threshold (if ECN is enabled) is at 10% by default. 31 + * - Uses timestamps to calculate queue delay by default. 32 + */ 33 + 34 + /** 35 + * struct fq_pie_flow - contains data for each flow 36 + * @vars: pie vars associated with the flow 37 + * @deficit: number of remaining byte credits 38 + * @backlog: size of data in the flow 39 + * @qlen: number of packets in the flow 40 + * @flowchain: flowchain for the flow 41 + * @head: first packet in the flow 42 + * @tail: last packet in the flow 43 + */ 44 + struct fq_pie_flow { 45 + struct pie_vars vars; 46 + s32 deficit; 47 + u32 backlog; 48 + u32 qlen; 49 + struct list_head flowchain; 50 + struct sk_buff *head; 51 + struct sk_buff *tail; 52 + }; 53 + 54 + struct fq_pie_sched_data { 55 + struct tcf_proto __rcu *filter_list; /* optional external classifier */ 56 + struct tcf_block *block; 57 + struct fq_pie_flow *flows; 58 + struct Qdisc *sch; 59 + struct list_head old_flows; 60 + struct list_head new_flows; 61 + struct pie_params p_params; 62 + u32 ecn_prob; 63 + u32 flows_cnt; 64 + u32 quantum; 65 + u32 memory_limit; 66 + u32 new_flow_count; 67 + u32 memory_usage; 68 + u32 overmemory; 69 + struct pie_stats stats; 70 + struct timer_list adapt_timer; 71 + }; 72 + 73 + static unsigned int fq_pie_hash(const struct fq_pie_sched_data *q, 74 + struct sk_buff *skb) 75 + { 76 + return reciprocal_scale(skb_get_hash(skb), q->flows_cnt); 77 + } 78 + 79 + static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch, 80 + int *qerr) 81 + { 82 + struct fq_pie_sched_data *q = qdisc_priv(sch); 83 + struct tcf_proto *filter; 84 + struct tcf_result res; 85 + int result; 86 + 87 + if (TC_H_MAJ(skb->priority) == sch->handle && 88 + TC_H_MIN(skb->priority) > 0 && 89 + TC_H_MIN(skb->priority) <= q->flows_cnt) 90 + return TC_H_MIN(skb->priority); 91 + 92 + filter = rcu_dereference_bh(q->filter_list); 93 + if (!filter) 94 + return fq_pie_hash(q, skb) + 1; 95 + 96 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 97 + result = tcf_classify(skb, filter, &res, false); 98 + if (result >= 0) { 99 + #ifdef CONFIG_NET_CLS_ACT 100 + switch (result) { 101 + case TC_ACT_STOLEN: 102 + case TC_ACT_QUEUED: 103 + case TC_ACT_TRAP: 104 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 105 + /* fall through */ 106 + case TC_ACT_SHOT: 107 + return 0; 108 + } 109 + #endif 110 + if (TC_H_MIN(res.classid) <= q->flows_cnt) 111 + return TC_H_MIN(res.classid); 112 + } 113 + return 0; 114 + } 115 + 116 + /* add skb to flow queue (tail add) */ 117 + static inline void flow_queue_add(struct fq_pie_flow *flow, 118 + struct sk_buff *skb) 119 + { 120 + if (!flow->head) 121 + flow->head = skb; 122 + else 123 + flow->tail->next = skb; 124 + flow->tail = skb; 125 + skb->next = NULL; 126 + } 127 + 128 + static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, 129 + struct sk_buff **to_free) 130 + { 131 + struct fq_pie_sched_data *q = qdisc_priv(sch); 132 + struct fq_pie_flow *sel_flow; 133 + int uninitialized_var(ret); 134 + u8 memory_limited = false; 135 + u8 enqueue = false; 136 + u32 pkt_len; 137 + u32 idx; 138 + 139 + /* Classifies packet into corresponding flow */ 140 + idx = fq_pie_classify(skb, sch, &ret); 141 + sel_flow = &q->flows[idx]; 142 + 143 + /* Checks whether adding a new packet would exceed memory limit */ 144 + get_pie_cb(skb)->mem_usage = skb->truesize; 145 + memory_limited = q->memory_usage > q->memory_limit + skb->truesize; 146 + 147 + /* Checks if the qdisc is full */ 148 + if (unlikely(qdisc_qlen(sch) >= sch->limit)) { 149 + q->stats.overlimit++; 150 + goto out; 151 + } else if (unlikely(memory_limited)) { 152 + q->overmemory++; 153 + } 154 + 155 + if (!pie_drop_early(sch, &q->p_params, &sel_flow->vars, 156 + sel_flow->backlog, skb->len)) { 157 + enqueue = true; 158 + } else if (q->p_params.ecn && 159 + sel_flow->vars.prob <= (MAX_PROB / 100) * q->ecn_prob && 160 + INET_ECN_set_ce(skb)) { 161 + /* If packet is ecn capable, mark it if drop probability 162 + * is lower than the parameter ecn_prob, else drop it. 163 + */ 164 + q->stats.ecn_mark++; 165 + enqueue = true; 166 + } 167 + if (enqueue) { 168 + /* Set enqueue time only when dq_rate_estimator is disabled. */ 169 + if (!q->p_params.dq_rate_estimator) 170 + pie_set_enqueue_time(skb); 171 + 172 + pkt_len = qdisc_pkt_len(skb); 173 + q->stats.packets_in++; 174 + q->memory_usage += skb->truesize; 175 + sch->qstats.backlog += pkt_len; 176 + sch->q.qlen++; 177 + flow_queue_add(sel_flow, skb); 178 + if (list_empty(&sel_flow->flowchain)) { 179 + list_add_tail(&sel_flow->flowchain, &q->new_flows); 180 + q->new_flow_count++; 181 + sel_flow->deficit = q->quantum; 182 + sel_flow->qlen = 0; 183 + sel_flow->backlog = 0; 184 + } 185 + sel_flow->qlen++; 186 + sel_flow->backlog += pkt_len; 187 + return NET_XMIT_SUCCESS; 188 + } 189 + out: 190 + q->stats.dropped++; 191 + sel_flow->vars.accu_prob = 0; 192 + sel_flow->vars.accu_prob_overflows = 0; 193 + __qdisc_drop(skb, to_free); 194 + qdisc_qstats_drop(sch); 195 + return NET_XMIT_CN; 196 + } 197 + 198 + static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = { 199 + [TCA_FQ_PIE_LIMIT] = {.type = NLA_U32}, 200 + [TCA_FQ_PIE_FLOWS] = {.type = NLA_U32}, 201 + [TCA_FQ_PIE_TARGET] = {.type = NLA_U32}, 202 + [TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32}, 203 + [TCA_FQ_PIE_ALPHA] = {.type = NLA_U32}, 204 + [TCA_FQ_PIE_BETA] = {.type = NLA_U32}, 205 + [TCA_FQ_PIE_QUANTUM] = {.type = NLA_U32}, 206 + [TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32}, 207 + [TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32}, 208 + [TCA_FQ_PIE_ECN] = {.type = NLA_U32}, 209 + [TCA_FQ_PIE_BYTEMODE] = {.type = NLA_U32}, 210 + [TCA_FQ_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32}, 211 + }; 212 + 213 + static inline struct sk_buff *dequeue_head(struct fq_pie_flow *flow) 214 + { 215 + struct sk_buff *skb = flow->head; 216 + 217 + flow->head = skb->next; 218 + skb->next = NULL; 219 + return skb; 220 + } 221 + 222 + static struct sk_buff *fq_pie_qdisc_dequeue(struct Qdisc *sch) 223 + { 224 + struct fq_pie_sched_data *q = qdisc_priv(sch); 225 + struct sk_buff *skb = NULL; 226 + struct fq_pie_flow *flow; 227 + struct list_head *head; 228 + u32 pkt_len; 229 + 230 + begin: 231 + head = &q->new_flows; 232 + if (list_empty(head)) { 233 + head = &q->old_flows; 234 + if (list_empty(head)) 235 + return NULL; 236 + } 237 + 238 + flow = list_first_entry(head, struct fq_pie_flow, flowchain); 239 + /* Flow has exhausted all its credits */ 240 + if (flow->deficit <= 0) { 241 + flow->deficit += q->quantum; 242 + list_move_tail(&flow->flowchain, &q->old_flows); 243 + goto begin; 244 + } 245 + 246 + if (flow->head) { 247 + skb = dequeue_head(flow); 248 + pkt_len = qdisc_pkt_len(skb); 249 + sch->qstats.backlog -= pkt_len; 250 + sch->q.qlen--; 251 + qdisc_bstats_update(sch, skb); 252 + } 253 + 254 + if (!skb) { 255 + /* force a pass through old_flows to prevent starvation */ 256 + if (head == &q->new_flows && !list_empty(&q->old_flows)) 257 + list_move_tail(&flow->flowchain, &q->old_flows); 258 + else 259 + list_del_init(&flow->flowchain); 260 + goto begin; 261 + } 262 + 263 + flow->qlen--; 264 + flow->deficit -= pkt_len; 265 + flow->backlog -= pkt_len; 266 + q->memory_usage -= get_pie_cb(skb)->mem_usage; 267 + pie_process_dequeue(skb, &q->p_params, &flow->vars, flow->backlog); 268 + return skb; 269 + } 270 + 271 + static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, 272 + struct netlink_ext_ack *extack) 273 + { 274 + struct fq_pie_sched_data *q = qdisc_priv(sch); 275 + struct nlattr *tb[TCA_FQ_PIE_MAX + 1]; 276 + unsigned int len_dropped = 0; 277 + unsigned int num_dropped = 0; 278 + int err; 279 + 280 + if (!opt) 281 + return -EINVAL; 282 + 283 + err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack); 284 + if (err < 0) 285 + return err; 286 + 287 + sch_tree_lock(sch); 288 + if (tb[TCA_FQ_PIE_LIMIT]) { 289 + u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]); 290 + 291 + q->p_params.limit = limit; 292 + sch->limit = limit; 293 + } 294 + if (tb[TCA_FQ_PIE_FLOWS]) { 295 + if (q->flows) { 296 + NL_SET_ERR_MSG_MOD(extack, 297 + "Number of flows cannot be changed"); 298 + goto flow_error; 299 + } 300 + q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]); 301 + if (!q->flows_cnt || q->flows_cnt > 65536) { 302 + NL_SET_ERR_MSG_MOD(extack, 303 + "Number of flows must be < 65536"); 304 + goto flow_error; 305 + } 306 + } 307 + 308 + /* convert from microseconds to pschedtime */ 309 + if (tb[TCA_FQ_PIE_TARGET]) { 310 + /* target is in us */ 311 + u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]); 312 + 313 + /* convert to pschedtime */ 314 + q->p_params.target = 315 + PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); 316 + } 317 + 318 + /* tupdate is in jiffies */ 319 + if (tb[TCA_FQ_PIE_TUPDATE]) 320 + q->p_params.tupdate = 321 + usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE])); 322 + 323 + if (tb[TCA_FQ_PIE_ALPHA]) 324 + q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]); 325 + 326 + if (tb[TCA_FQ_PIE_BETA]) 327 + q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]); 328 + 329 + if (tb[TCA_FQ_PIE_QUANTUM]) 330 + q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]); 331 + 332 + if (tb[TCA_FQ_PIE_MEMORY_LIMIT]) 333 + q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]); 334 + 335 + if (tb[TCA_FQ_PIE_ECN_PROB]) 336 + q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]); 337 + 338 + if (tb[TCA_FQ_PIE_ECN]) 339 + q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]); 340 + 341 + if (tb[TCA_FQ_PIE_BYTEMODE]) 342 + q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]); 343 + 344 + if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]) 345 + q->p_params.dq_rate_estimator = 346 + nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]); 347 + 348 + /* Drop excess packets if new limit is lower */ 349 + while (sch->q.qlen > sch->limit) { 350 + struct sk_buff *skb = fq_pie_qdisc_dequeue(sch); 351 + 352 + kfree_skb(skb); 353 + len_dropped += qdisc_pkt_len(skb); 354 + num_dropped += 1; 355 + } 356 + qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped); 357 + 358 + sch_tree_unlock(sch); 359 + return 0; 360 + 361 + flow_error: 362 + sch_tree_unlock(sch); 363 + return -EINVAL; 364 + } 365 + 366 + static void fq_pie_timer(struct timer_list *t) 367 + { 368 + struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer); 369 + struct Qdisc *sch = q->sch; 370 + spinlock_t *root_lock; /* to lock qdisc for probability calculations */ 371 + u16 idx; 372 + 373 + root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 374 + spin_lock(root_lock); 375 + 376 + for (idx = 0; idx < q->flows_cnt; idx++) 377 + pie_calculate_probability(&q->p_params, &q->flows[idx].vars, 378 + q->flows[idx].backlog); 379 + 380 + /* reset the timer to fire after 'tupdate' jiffies. */ 381 + if (q->p_params.tupdate) 382 + mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate); 383 + 384 + spin_unlock(root_lock); 385 + } 386 + 387 + static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, 388 + struct netlink_ext_ack *extack) 389 + { 390 + struct fq_pie_sched_data *q = qdisc_priv(sch); 391 + int err; 392 + u16 idx; 393 + 394 + pie_params_init(&q->p_params); 395 + sch->limit = 10 * 1024; 396 + q->p_params.limit = sch->limit; 397 + q->quantum = psched_mtu(qdisc_dev(sch)); 398 + q->sch = sch; 399 + q->ecn_prob = 10; 400 + q->flows_cnt = 1024; 401 + q->memory_limit = SZ_32M; 402 + 403 + INIT_LIST_HEAD(&q->new_flows); 404 + INIT_LIST_HEAD(&q->old_flows); 405 + 406 + if (opt) { 407 + err = fq_pie_change(sch, opt, extack); 408 + 409 + if (err) 410 + return err; 411 + } 412 + 413 + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); 414 + if (err) 415 + goto init_failure; 416 + 417 + q->flows = kvcalloc(q->flows_cnt, sizeof(struct fq_pie_flow), 418 + GFP_KERNEL); 419 + if (!q->flows) { 420 + err = -ENOMEM; 421 + goto init_failure; 422 + } 423 + for (idx = 0; idx < q->flows_cnt; idx++) { 424 + struct fq_pie_flow *flow = q->flows + idx; 425 + 426 + INIT_LIST_HEAD(&flow->flowchain); 427 + pie_vars_init(&flow->vars); 428 + } 429 + 430 + timer_setup(&q->adapt_timer, fq_pie_timer, 0); 431 + mod_timer(&q->adapt_timer, jiffies + HZ / 2); 432 + 433 + return 0; 434 + 435 + init_failure: 436 + q->flows_cnt = 0; 437 + 438 + return err; 439 + } 440 + 441 + static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb) 442 + { 443 + struct fq_pie_sched_data *q = qdisc_priv(sch); 444 + struct nlattr *opts; 445 + 446 + opts = nla_nest_start(skb, TCA_OPTIONS); 447 + if (!opts) 448 + return -EMSGSIZE; 449 + 450 + /* convert target from pschedtime to us */ 451 + if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) || 452 + nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) || 453 + nla_put_u32(skb, TCA_FQ_PIE_TARGET, 454 + ((u32)PSCHED_TICKS2NS(q->p_params.target)) / 455 + NSEC_PER_USEC) || 456 + nla_put_u32(skb, TCA_FQ_PIE_TUPDATE, 457 + jiffies_to_usecs(q->p_params.tupdate)) || 458 + nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) || 459 + nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) || 460 + nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) || 461 + nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) || 462 + nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) || 463 + nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) || 464 + nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) || 465 + nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR, 466 + q->p_params.dq_rate_estimator)) 467 + goto nla_put_failure; 468 + 469 + return nla_nest_end(skb, opts); 470 + 471 + nla_put_failure: 472 + nla_nest_cancel(skb, opts); 473 + return -EMSGSIZE; 474 + } 475 + 476 + static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 477 + { 478 + struct fq_pie_sched_data *q = qdisc_priv(sch); 479 + struct tc_fq_pie_xstats st = { 480 + .packets_in = q->stats.packets_in, 481 + .overlimit = q->stats.overlimit, 482 + .overmemory = q->overmemory, 483 + .dropped = q->stats.dropped, 484 + .ecn_mark = q->stats.ecn_mark, 485 + .new_flow_count = q->new_flow_count, 486 + .memory_usage = q->memory_usage, 487 + }; 488 + struct list_head *pos; 489 + 490 + sch_tree_lock(sch); 491 + list_for_each(pos, &q->new_flows) 492 + st.new_flows_len++; 493 + 494 + list_for_each(pos, &q->old_flows) 495 + st.old_flows_len++; 496 + sch_tree_unlock(sch); 497 + 498 + return gnet_stats_copy_app(d, &st, sizeof(st)); 499 + } 500 + 501 + static void fq_pie_reset(struct Qdisc *sch) 502 + { 503 + struct fq_pie_sched_data *q = qdisc_priv(sch); 504 + u16 idx; 505 + 506 + INIT_LIST_HEAD(&q->new_flows); 507 + INIT_LIST_HEAD(&q->old_flows); 508 + for (idx = 0; idx < q->flows_cnt; idx++) { 509 + struct fq_pie_flow *flow = q->flows + idx; 510 + 511 + /* Removes all packets from flow */ 512 + rtnl_kfree_skbs(flow->head, flow->tail); 513 + flow->head = NULL; 514 + 515 + INIT_LIST_HEAD(&flow->flowchain); 516 + pie_vars_init(&flow->vars); 517 + } 518 + 519 + sch->q.qlen = 0; 520 + sch->qstats.backlog = 0; 521 + } 522 + 523 + static void fq_pie_destroy(struct Qdisc *sch) 524 + { 525 + struct fq_pie_sched_data *q = qdisc_priv(sch); 526 + 527 + tcf_block_put(q->block); 528 + del_timer_sync(&q->adapt_timer); 529 + kvfree(q->flows); 530 + } 531 + 532 + static struct Qdisc_ops fq_pie_qdisc_ops __read_mostly = { 533 + .id = "fq_pie", 534 + .priv_size = sizeof(struct fq_pie_sched_data), 535 + .enqueue = fq_pie_qdisc_enqueue, 536 + .dequeue = fq_pie_qdisc_dequeue, 537 + .peek = qdisc_peek_dequeued, 538 + .init = fq_pie_init, 539 + .destroy = fq_pie_destroy, 540 + .reset = fq_pie_reset, 541 + .change = fq_pie_change, 542 + .dump = fq_pie_dump, 543 + .dump_stats = fq_pie_dump_stats, 544 + .owner = THIS_MODULE, 545 + }; 546 + 547 + static int __init fq_pie_module_init(void) 548 + { 549 + return register_qdisc(&fq_pie_qdisc_ops); 550 + } 551 + 552 + static void __exit fq_pie_module_exit(void) 553 + { 554 + unregister_qdisc(&fq_pie_qdisc_ops); 555 + } 556 + 557 + module_init(fq_pie_module_init); 558 + module_exit(fq_pie_module_exit); 559 + 560 + MODULE_DESCRIPTION("Flow Queue Proportional Integral controller Enhanced (FQ-PIE)"); 561 + MODULE_AUTHOR("Mohit P. Tahiliani"); 562 + MODULE_LICENSE("GPL");