Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
4 *
5 * Authors: Martin Devera, <devik@cdi.cz>
6 *
7 * Credits (in time order) for older HTB versions:
8 * Stef Coene <stef.coene@docum.org>
9 * HTB support at LARTC mailing list
10 * Ondrej Kraus, <krauso@barr.cz>
11 * found missing INIT_QDISC(htb)
12 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13 * helped a lot to locate nasty class stall bug
14 * Andi Kleen, Jamal Hadi, Bert Hubert
15 * code review and helpful comments on shaping
16 * Tomasz Wrona, <tw@eter.tym.pl>
17 * created test case so that I was able to fix nasty bug
18 * Wilfried Weissmann
19 * spotted bug in dequeue code and helped with fix
20 * Jiri Fojtasek
21 * fixed requeue routine
22 * and many others. thanks.
23 */
24#include <linux/module.h>
25#include <linux/moduleparam.h>
26#include <linux/types.h>
27#include <linux/kernel.h>
28#include <linux/string.h>
29#include <linux/errno.h>
30#include <linux/skbuff.h>
31#include <linux/list.h>
32#include <linux/compiler.h>
33#include <linux/rbtree.h>
34#include <linux/workqueue.h>
35#include <linux/slab.h>
36#include <net/netlink.h>
37#include <net/sch_generic.h>
38#include <net/pkt_sched.h>
39#include <net/pkt_cls.h>
40
41/* HTB algorithm.
42 Author: devik@cdi.cz
43 ========================================================================
44 HTB is like TBF with multiple classes. It is also similar to CBQ because
45 it allows to assign priority to each class in hierarchy.
46 In fact it is another implementation of Floyd's formal sharing.
47
48 Levels:
49 Each class is assigned level. Leaf has ALWAYS level 0 and root
50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51 one less than their parent.
52*/
53
54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55#define HTB_VER 0x30011 /* major must be matched with number supplied by TC as version */
56
57#if HTB_VER >> 16 != TC_HTB_PROTOVER
58#error "Mismatched sch_htb.c and pkt_sch.h"
59#endif
60
61/* Module parameter and sysfs export */
62module_param (htb_hysteresis, int, 0640);
63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66module_param(htb_rate_est, int, 0640);
67MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69/* used internaly to keep status of single class */
70enum htb_cmode {
71 HTB_CANT_SEND, /* class can't send and can't borrow */
72 HTB_MAY_BORROW, /* class can't send but may borrow */
73 HTB_CAN_SEND /* class can send */
74};
75
76struct htb_prio {
77 union {
78 struct rb_root row;
79 struct rb_root feed;
80 };
81 struct rb_node *ptr;
82 /* When class changes from state 1->2 and disconnects from
83 * parent's feed then we lost ptr value and start from the
84 * first child again. Here we store classid of the
85 * last valid ptr (used when ptr is NULL).
86 */
87 u32 last_ptr_id;
88};
89
90/* interior & leaf nodes; props specific to leaves are marked L:
91 * To reduce false sharing, place mostly read fields at beginning,
92 * and mostly written ones at the end.
93 */
94struct htb_class {
95 struct Qdisc_class_common common;
96 struct psched_ratecfg rate;
97 struct psched_ratecfg ceil;
98 s64 buffer, cbuffer;/* token bucket depth/rate */
99 s64 mbuffer; /* max wait time */
100 u32 prio; /* these two are used only by leaves... */
101 int quantum; /* but stored for parent-to-leaf return */
102
103 struct tcf_proto __rcu *filter_list; /* class attached filters */
104 struct tcf_block *block;
105 int filter_cnt;
106
107 int level; /* our level (see above) */
108 unsigned int children;
109 struct htb_class *parent; /* parent class */
110
111 struct net_rate_estimator __rcu *rate_est;
112
113 /*
114 * Written often fields
115 */
116 struct gnet_stats_basic_sync bstats;
117 struct gnet_stats_basic_sync bstats_bias;
118 struct tc_htb_xstats xstats; /* our special stats */
119
120 /* token bucket parameters */
121 s64 tokens, ctokens;/* current number of tokens */
122 s64 t_c; /* checkpoint time */
123
124 union {
125 struct htb_class_leaf {
126 int deficit[TC_HTB_MAXDEPTH];
127 struct Qdisc *q;
128 struct netdev_queue *offload_queue;
129 } leaf;
130 struct htb_class_inner {
131 struct htb_prio clprio[TC_HTB_NUMPRIO];
132 } inner;
133 };
134 s64 pq_key;
135
136 int prio_activity; /* for which prios are we active */
137 enum htb_cmode cmode; /* current mode of the class */
138 struct rb_node pq_node; /* node for event queue */
139 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
140
141 unsigned int drops ____cacheline_aligned_in_smp;
142 unsigned int overlimits;
143};
144
145struct htb_level {
146 struct rb_root wait_pq;
147 struct htb_prio hprio[TC_HTB_NUMPRIO];
148};
149
150struct htb_sched {
151 struct Qdisc_class_hash clhash;
152 int defcls; /* class where unclassified flows go to */
153 int rate2quantum; /* quant = rate / rate2quantum */
154
155 /* filters for qdisc itself */
156 struct tcf_proto __rcu *filter_list;
157 struct tcf_block *block;
158
159#define HTB_WARN_TOOMANYEVENTS 0x1
160 unsigned int warned; /* only one warning */
161 int direct_qlen;
162 struct work_struct work;
163
164 /* non shaped skbs; let them go directly thru */
165 struct qdisc_skb_head direct_queue;
166 u32 direct_pkts;
167 u32 overlimits;
168
169 struct qdisc_watchdog watchdog;
170
171 s64 now; /* cached dequeue time */
172
173 /* time of nearest event per level (row) */
174 s64 near_ev_cache[TC_HTB_MAXDEPTH];
175
176 int row_mask[TC_HTB_MAXDEPTH];
177
178 struct htb_level hlevel[TC_HTB_MAXDEPTH];
179
180 struct Qdisc **direct_qdiscs;
181 unsigned int num_direct_qdiscs;
182
183 bool offload;
184};
185
186/* find class in global hash table using given handle */
187static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
188{
189 struct htb_sched *q = qdisc_priv(sch);
190 struct Qdisc_class_common *clc;
191
192 clc = qdisc_class_find(&q->clhash, handle);
193 if (clc == NULL)
194 return NULL;
195 return container_of(clc, struct htb_class, common);
196}
197
198static unsigned long htb_search(struct Qdisc *sch, u32 handle)
199{
200 return (unsigned long)htb_find(handle, sch);
201}
202
203#define HTB_DIRECT ((struct htb_class *)-1L)
204
205/**
206 * htb_classify - classify a packet into class
207 * @skb: the socket buffer
208 * @sch: the active queue discipline
209 * @qerr: pointer for returned status code
210 *
211 * It returns NULL if the packet should be dropped or -1 if the packet
212 * should be passed directly thru. In all other cases leaf class is returned.
213 * We allow direct class selection by classid in priority. The we examine
214 * filters in qdisc and in inner nodes (if higher filter points to the inner
215 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
216 * internal fifo (direct). These packets then go directly thru. If we still
217 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
218 * then finish and return direct queue.
219 */
220static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
221 int *qerr)
222{
223 struct htb_sched *q = qdisc_priv(sch);
224 struct htb_class *cl;
225 struct tcf_result res;
226 struct tcf_proto *tcf;
227 int result;
228
229 /* allow to select class by setting skb->priority to valid classid;
230 * note that nfmark can be used too by attaching filter fw with no
231 * rules in it
232 */
233 if (skb->priority == sch->handle)
234 return HTB_DIRECT; /* X:0 (direct flow) selected */
235 cl = htb_find(skb->priority, sch);
236 if (cl) {
237 if (cl->level == 0)
238 return cl;
239 /* Start with inner filter chain if a non-leaf class is selected */
240 tcf = rcu_dereference_bh(cl->filter_list);
241 } else {
242 tcf = rcu_dereference_bh(q->filter_list);
243 }
244
245 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
246 while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
247#ifdef CONFIG_NET_CLS_ACT
248 switch (result) {
249 case TC_ACT_QUEUED:
250 case TC_ACT_STOLEN:
251 case TC_ACT_TRAP:
252 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
253 fallthrough;
254 case TC_ACT_SHOT:
255 return NULL;
256 }
257#endif
258 cl = (void *)res.class;
259 if (!cl) {
260 if (res.classid == sch->handle)
261 return HTB_DIRECT; /* X:0 (direct flow) */
262 cl = htb_find(res.classid, sch);
263 if (!cl)
264 break; /* filter selected invalid classid */
265 }
266 if (!cl->level)
267 return cl; /* we hit leaf; return it */
268
269 /* we have got inner class; apply inner filter chain */
270 tcf = rcu_dereference_bh(cl->filter_list);
271 }
272 /* classification failed; try to use default class */
273 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
274 if (!cl || cl->level)
275 return HTB_DIRECT; /* bad default .. this is safe bet */
276 return cl;
277}
278
279/**
280 * htb_add_to_id_tree - adds class to the round robin list
281 * @root: the root of the tree
282 * @cl: the class to add
283 * @prio: the give prio in class
284 *
285 * Routine adds class to the list (actually tree) sorted by classid.
286 * Make sure that class is not already on such list for given prio.
287 */
288static void htb_add_to_id_tree(struct rb_root *root,
289 struct htb_class *cl, int prio)
290{
291 struct rb_node **p = &root->rb_node, *parent = NULL;
292
293 while (*p) {
294 struct htb_class *c;
295 parent = *p;
296 c = rb_entry(parent, struct htb_class, node[prio]);
297
298 if (cl->common.classid > c->common.classid)
299 p = &parent->rb_right;
300 else
301 p = &parent->rb_left;
302 }
303 rb_link_node(&cl->node[prio], parent, p);
304 rb_insert_color(&cl->node[prio], root);
305}
306
307/**
308 * htb_add_to_wait_tree - adds class to the event queue with delay
309 * @q: the priority event queue
310 * @cl: the class to add
311 * @delay: delay in microseconds
312 *
313 * The class is added to priority event queue to indicate that class will
314 * change its mode in cl->pq_key microseconds. Make sure that class is not
315 * already in the queue.
316 */
317static void htb_add_to_wait_tree(struct htb_sched *q,
318 struct htb_class *cl, s64 delay)
319{
320 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
321
322 cl->pq_key = q->now + delay;
323 if (cl->pq_key == q->now)
324 cl->pq_key++;
325
326 /* update the nearest event cache */
327 if (q->near_ev_cache[cl->level] > cl->pq_key)
328 q->near_ev_cache[cl->level] = cl->pq_key;
329
330 while (*p) {
331 struct htb_class *c;
332 parent = *p;
333 c = rb_entry(parent, struct htb_class, pq_node);
334 if (cl->pq_key >= c->pq_key)
335 p = &parent->rb_right;
336 else
337 p = &parent->rb_left;
338 }
339 rb_link_node(&cl->pq_node, parent, p);
340 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
341}
342
343/**
344 * htb_next_rb_node - finds next node in binary tree
345 * @n: the current node in binary tree
346 *
347 * When we are past last key we return NULL.
348 * Average complexity is 2 steps per call.
349 */
350static inline void htb_next_rb_node(struct rb_node **n)
351{
352 *n = rb_next(*n);
353}
354
355/**
356 * htb_add_class_to_row - add class to its row
357 * @q: the priority event queue
358 * @cl: the class to add
359 * @mask: the given priorities in class in bitmap
360 *
361 * The class is added to row at priorities marked in mask.
362 * It does nothing if mask == 0.
363 */
364static inline void htb_add_class_to_row(struct htb_sched *q,
365 struct htb_class *cl, int mask)
366{
367 q->row_mask[cl->level] |= mask;
368 while (mask) {
369 int prio = ffz(~mask);
370 mask &= ~(1 << prio);
371 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
372 }
373}
374
375/* If this triggers, it is a bug in this code, but it need not be fatal */
376static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
377{
378 if (RB_EMPTY_NODE(rb)) {
379 WARN_ON(1);
380 } else {
381 rb_erase(rb, root);
382 RB_CLEAR_NODE(rb);
383 }
384}
385
386
387/**
388 * htb_remove_class_from_row - removes class from its row
389 * @q: the priority event queue
390 * @cl: the class to add
391 * @mask: the given priorities in class in bitmap
392 *
393 * The class is removed from row at priorities marked in mask.
394 * It does nothing if mask == 0.
395 */
396static inline void htb_remove_class_from_row(struct htb_sched *q,
397 struct htb_class *cl, int mask)
398{
399 int m = 0;
400 struct htb_level *hlevel = &q->hlevel[cl->level];
401
402 while (mask) {
403 int prio = ffz(~mask);
404 struct htb_prio *hprio = &hlevel->hprio[prio];
405
406 mask &= ~(1 << prio);
407 if (hprio->ptr == cl->node + prio)
408 htb_next_rb_node(&hprio->ptr);
409
410 htb_safe_rb_erase(cl->node + prio, &hprio->row);
411 if (!hprio->row.rb_node)
412 m |= 1 << prio;
413 }
414 q->row_mask[cl->level] &= ~m;
415}
416
417/**
418 * htb_activate_prios - creates active classe's feed chain
419 * @q: the priority event queue
420 * @cl: the class to activate
421 *
422 * The class is connected to ancestors and/or appropriate rows
423 * for priorities it is participating on. cl->cmode must be new
424 * (activated) mode. It does nothing if cl->prio_activity == 0.
425 */
426static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
427{
428 struct htb_class *p = cl->parent;
429 long m, mask = cl->prio_activity;
430
431 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
432 m = mask;
433 while (m) {
434 int prio = ffz(~m);
435 m &= ~(1 << prio);
436
437 if (p->inner.clprio[prio].feed.rb_node)
438 /* parent already has its feed in use so that
439 * reset bit in mask as parent is already ok
440 */
441 mask &= ~(1 << prio);
442
443 htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
444 }
445 p->prio_activity |= mask;
446 cl = p;
447 p = cl->parent;
448
449 }
450 if (cl->cmode == HTB_CAN_SEND && mask)
451 htb_add_class_to_row(q, cl, mask);
452}
453
454/**
455 * htb_deactivate_prios - remove class from feed chain
456 * @q: the priority event queue
457 * @cl: the class to deactivate
458 *
459 * cl->cmode must represent old mode (before deactivation). It does
460 * nothing if cl->prio_activity == 0. Class is removed from all feed
461 * chains and rows.
462 */
463static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
464{
465 struct htb_class *p = cl->parent;
466 long m, mask = cl->prio_activity;
467
468 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
469 m = mask;
470 mask = 0;
471 while (m) {
472 int prio = ffz(~m);
473 m &= ~(1 << prio);
474
475 if (p->inner.clprio[prio].ptr == cl->node + prio) {
476 /* we are removing child which is pointed to from
477 * parent feed - forget the pointer but remember
478 * classid
479 */
480 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
481 p->inner.clprio[prio].ptr = NULL;
482 }
483
484 htb_safe_rb_erase(cl->node + prio,
485 &p->inner.clprio[prio].feed);
486
487 if (!p->inner.clprio[prio].feed.rb_node)
488 mask |= 1 << prio;
489 }
490
491 p->prio_activity &= ~mask;
492 cl = p;
493 p = cl->parent;
494
495 }
496 if (cl->cmode == HTB_CAN_SEND && mask)
497 htb_remove_class_from_row(q, cl, mask);
498}
499
500static inline s64 htb_lowater(const struct htb_class *cl)
501{
502 if (htb_hysteresis)
503 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
504 else
505 return 0;
506}
507static inline s64 htb_hiwater(const struct htb_class *cl)
508{
509 if (htb_hysteresis)
510 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
511 else
512 return 0;
513}
514
515
516/**
517 * htb_class_mode - computes and returns current class mode
518 * @cl: the target class
519 * @diff: diff time in microseconds
520 *
521 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
522 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
523 * from now to time when cl will change its state.
524 * Also it is worth to note that class mode doesn't change simply
525 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
526 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
527 * mode transitions per time unit. The speed gain is about 1/6.
528 */
529static inline enum htb_cmode
530htb_class_mode(struct htb_class *cl, s64 *diff)
531{
532 s64 toks;
533
534 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
535 *diff = -toks;
536 return HTB_CANT_SEND;
537 }
538
539 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
540 return HTB_CAN_SEND;
541
542 *diff = -toks;
543 return HTB_MAY_BORROW;
544}
545
546/**
547 * htb_change_class_mode - changes classe's mode
548 * @q: the priority event queue
549 * @cl: the target class
550 * @diff: diff time in microseconds
551 *
552 * This should be the only way how to change classe's mode under normal
553 * circumstances. Routine will update feed lists linkage, change mode
554 * and add class to the wait event queue if appropriate. New mode should
555 * be different from old one and cl->pq_key has to be valid if changing
556 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
557 */
558static void
559htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
560{
561 enum htb_cmode new_mode = htb_class_mode(cl, diff);
562
563 if (new_mode == cl->cmode)
564 return;
565
566 if (new_mode == HTB_CANT_SEND) {
567 cl->overlimits++;
568 q->overlimits++;
569 }
570
571 if (cl->prio_activity) { /* not necessary: speed optimization */
572 if (cl->cmode != HTB_CANT_SEND)
573 htb_deactivate_prios(q, cl);
574 cl->cmode = new_mode;
575 if (new_mode != HTB_CANT_SEND)
576 htb_activate_prios(q, cl);
577 } else
578 cl->cmode = new_mode;
579}
580
581/**
582 * htb_activate - inserts leaf cl into appropriate active feeds
583 * @q: the priority event queue
584 * @cl: the target class
585 *
586 * Routine learns (new) priority of leaf and activates feed chain
587 * for the prio. It can be called on already active leaf safely.
588 * It also adds leaf into droplist.
589 */
590static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
591{
592 WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
593
594 if (!cl->prio_activity) {
595 cl->prio_activity = 1 << cl->prio;
596 htb_activate_prios(q, cl);
597 }
598}
599
600/**
601 * htb_deactivate - remove leaf cl from active feeds
602 * @q: the priority event queue
603 * @cl: the target class
604 *
605 * Make sure that leaf is active. In the other words it can't be called
606 * with non-active leaf. It also removes class from the drop list.
607 */
608static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
609{
610 WARN_ON(!cl->prio_activity);
611
612 htb_deactivate_prios(q, cl);
613 cl->prio_activity = 0;
614}
615
616static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
617 struct sk_buff **to_free)
618{
619 int ret;
620 unsigned int len = qdisc_pkt_len(skb);
621 struct htb_sched *q = qdisc_priv(sch);
622 struct htb_class *cl = htb_classify(skb, sch, &ret);
623
624 if (cl == HTB_DIRECT) {
625 /* enqueue to helper queue */
626 if (q->direct_queue.qlen < q->direct_qlen) {
627 __qdisc_enqueue_tail(skb, &q->direct_queue);
628 q->direct_pkts++;
629 } else {
630 return qdisc_drop(skb, sch, to_free);
631 }
632#ifdef CONFIG_NET_CLS_ACT
633 } else if (!cl) {
634 if (ret & __NET_XMIT_BYPASS)
635 qdisc_qstats_drop(sch);
636 __qdisc_drop(skb, to_free);
637 return ret;
638#endif
639 } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
640 to_free)) != NET_XMIT_SUCCESS) {
641 if (net_xmit_drop_count(ret)) {
642 qdisc_qstats_drop(sch);
643 cl->drops++;
644 }
645 return ret;
646 } else {
647 htb_activate(q, cl);
648 }
649
650 sch->qstats.backlog += len;
651 sch->q.qlen++;
652 return NET_XMIT_SUCCESS;
653}
654
655static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
656{
657 s64 toks = diff + cl->tokens;
658
659 if (toks > cl->buffer)
660 toks = cl->buffer;
661 toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
662 if (toks <= -cl->mbuffer)
663 toks = 1 - cl->mbuffer;
664
665 cl->tokens = toks;
666}
667
668static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
669{
670 s64 toks = diff + cl->ctokens;
671
672 if (toks > cl->cbuffer)
673 toks = cl->cbuffer;
674 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
675 if (toks <= -cl->mbuffer)
676 toks = 1 - cl->mbuffer;
677
678 cl->ctokens = toks;
679}
680
681/**
682 * htb_charge_class - charges amount "bytes" to leaf and ancestors
683 * @q: the priority event queue
684 * @cl: the class to start iterate
685 * @level: the minimum level to account
686 * @skb: the socket buffer
687 *
688 * Routine assumes that packet "bytes" long was dequeued from leaf cl
689 * borrowing from "level". It accounts bytes to ceil leaky bucket for
690 * leaf and all ancestors and to rate bucket for ancestors at levels
691 * "level" and higher. It also handles possible change of mode resulting
692 * from the update. Note that mode can also increase here (MAY_BORROW to
693 * CAN_SEND) because we can use more precise clock that event queue here.
694 * In such case we remove class from event queue first.
695 */
696static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
697 int level, struct sk_buff *skb)
698{
699 int bytes = qdisc_pkt_len(skb);
700 enum htb_cmode old_mode;
701 s64 diff;
702
703 while (cl) {
704 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
705 if (cl->level >= level) {
706 if (cl->level == level)
707 cl->xstats.lends++;
708 htb_accnt_tokens(cl, bytes, diff);
709 } else {
710 cl->xstats.borrows++;
711 cl->tokens += diff; /* we moved t_c; update tokens */
712 }
713 htb_accnt_ctokens(cl, bytes, diff);
714 cl->t_c = q->now;
715
716 old_mode = cl->cmode;
717 diff = 0;
718 htb_change_class_mode(q, cl, &diff);
719 if (old_mode != cl->cmode) {
720 if (old_mode != HTB_CAN_SEND)
721 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
722 if (cl->cmode != HTB_CAN_SEND)
723 htb_add_to_wait_tree(q, cl, diff);
724 }
725
726 /* update basic stats except for leaves which are already updated */
727 if (cl->level)
728 bstats_update(&cl->bstats, skb);
729
730 cl = cl->parent;
731 }
732}
733
734/**
735 * htb_do_events - make mode changes to classes at the level
736 * @q: the priority event queue
737 * @level: which wait_pq in 'q->hlevel'
738 * @start: start jiffies
739 *
740 * Scans event queue for pending events and applies them. Returns time of
741 * next pending event (0 for no event in pq, q->now for too many events).
742 * Note: Applied are events whose have cl->pq_key <= q->now.
743 */
744static s64 htb_do_events(struct htb_sched *q, const int level,
745 unsigned long start)
746{
747 /* don't run for longer than 2 jiffies; 2 is used instead of
748 * 1 to simplify things when jiffy is going to be incremented
749 * too soon
750 */
751 unsigned long stop_at = start + 2;
752 struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
753
754 while (time_before(jiffies, stop_at)) {
755 struct htb_class *cl;
756 s64 diff;
757 struct rb_node *p = rb_first(wait_pq);
758
759 if (!p)
760 return 0;
761
762 cl = rb_entry(p, struct htb_class, pq_node);
763 if (cl->pq_key > q->now)
764 return cl->pq_key;
765
766 htb_safe_rb_erase(p, wait_pq);
767 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
768 htb_change_class_mode(q, cl, &diff);
769 if (cl->cmode != HTB_CAN_SEND)
770 htb_add_to_wait_tree(q, cl, diff);
771 }
772
773 /* too much load - let's continue after a break for scheduling */
774 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
775 pr_warn("htb: too many events!\n");
776 q->warned |= HTB_WARN_TOOMANYEVENTS;
777 }
778
779 return q->now;
780}
781
782/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
783 * is no such one exists.
784 */
785static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
786 u32 id)
787{
788 struct rb_node *r = NULL;
789 while (n) {
790 struct htb_class *cl =
791 rb_entry(n, struct htb_class, node[prio]);
792
793 if (id > cl->common.classid) {
794 n = n->rb_right;
795 } else if (id < cl->common.classid) {
796 r = n;
797 n = n->rb_left;
798 } else {
799 return n;
800 }
801 }
802 return r;
803}
804
805/**
806 * htb_lookup_leaf - returns next leaf class in DRR order
807 * @hprio: the current one
808 * @prio: which prio in class
809 *
810 * Find leaf where current feed pointers points to.
811 */
812static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
813{
814 int i;
815 struct {
816 struct rb_node *root;
817 struct rb_node **pptr;
818 u32 *pid;
819 } stk[TC_HTB_MAXDEPTH], *sp = stk;
820
821 BUG_ON(!hprio->row.rb_node);
822 sp->root = hprio->row.rb_node;
823 sp->pptr = &hprio->ptr;
824 sp->pid = &hprio->last_ptr_id;
825
826 for (i = 0; i < 65535; i++) {
827 if (!*sp->pptr && *sp->pid) {
828 /* ptr was invalidated but id is valid - try to recover
829 * the original or next ptr
830 */
831 *sp->pptr =
832 htb_id_find_next_upper(prio, sp->root, *sp->pid);
833 }
834 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
835 * can become out of date quickly
836 */
837 if (!*sp->pptr) { /* we are at right end; rewind & go up */
838 *sp->pptr = sp->root;
839 while ((*sp->pptr)->rb_left)
840 *sp->pptr = (*sp->pptr)->rb_left;
841 if (sp > stk) {
842 sp--;
843 if (!*sp->pptr) {
844 WARN_ON(1);
845 return NULL;
846 }
847 htb_next_rb_node(sp->pptr);
848 }
849 } else {
850 struct htb_class *cl;
851 struct htb_prio *clp;
852
853 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
854 if (!cl->level)
855 return cl;
856 clp = &cl->inner.clprio[prio];
857 (++sp)->root = clp->feed.rb_node;
858 sp->pptr = &clp->ptr;
859 sp->pid = &clp->last_ptr_id;
860 }
861 }
862 WARN_ON(1);
863 return NULL;
864}
865
866/* dequeues packet at given priority and level; call only if
867 * you are sure that there is active class at prio/level
868 */
869static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
870 const int level)
871{
872 struct sk_buff *skb = NULL;
873 struct htb_class *cl, *start;
874 struct htb_level *hlevel = &q->hlevel[level];
875 struct htb_prio *hprio = &hlevel->hprio[prio];
876
877 /* look initial class up in the row */
878 start = cl = htb_lookup_leaf(hprio, prio);
879
880 do {
881next:
882 if (unlikely(!cl))
883 return NULL;
884
885 /* class can be empty - it is unlikely but can be true if leaf
886 * qdisc drops packets in enqueue routine or if someone used
887 * graft operation on the leaf since last dequeue;
888 * simply deactivate and skip such class
889 */
890 if (unlikely(cl->leaf.q->q.qlen == 0)) {
891 struct htb_class *next;
892 htb_deactivate(q, cl);
893
894 /* row/level might become empty */
895 if ((q->row_mask[level] & (1 << prio)) == 0)
896 return NULL;
897
898 next = htb_lookup_leaf(hprio, prio);
899
900 if (cl == start) /* fix start if we just deleted it */
901 start = next;
902 cl = next;
903 goto next;
904 }
905
906 skb = cl->leaf.q->dequeue(cl->leaf.q);
907 if (likely(skb != NULL))
908 break;
909
910 qdisc_warn_nonwc("htb", cl->leaf.q);
911 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
912 &q->hlevel[0].hprio[prio].ptr);
913 cl = htb_lookup_leaf(hprio, prio);
914
915 } while (cl != start);
916
917 if (likely(skb != NULL)) {
918 bstats_update(&cl->bstats, skb);
919 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
920 if (cl->leaf.deficit[level] < 0) {
921 cl->leaf.deficit[level] += cl->quantum;
922 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
923 &q->hlevel[0].hprio[prio].ptr);
924 }
925 /* this used to be after charge_class but this constelation
926 * gives us slightly better performance
927 */
928 if (!cl->leaf.q->q.qlen)
929 htb_deactivate(q, cl);
930 htb_charge_class(q, cl, level, skb);
931 }
932 return skb;
933}
934
935static struct sk_buff *htb_dequeue(struct Qdisc *sch)
936{
937 struct sk_buff *skb;
938 struct htb_sched *q = qdisc_priv(sch);
939 int level;
940 s64 next_event;
941 unsigned long start_at;
942
943 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
944 skb = __qdisc_dequeue_head(&q->direct_queue);
945 if (skb != NULL) {
946ok:
947 qdisc_bstats_update(sch, skb);
948 qdisc_qstats_backlog_dec(sch, skb);
949 sch->q.qlen--;
950 return skb;
951 }
952
953 if (!sch->q.qlen)
954 goto fin;
955 q->now = ktime_get_ns();
956 start_at = jiffies;
957
958 next_event = q->now + 5LLU * NSEC_PER_SEC;
959
960 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
961 /* common case optimization - skip event handler quickly */
962 int m;
963 s64 event = q->near_ev_cache[level];
964
965 if (q->now >= event) {
966 event = htb_do_events(q, level, start_at);
967 if (!event)
968 event = q->now + NSEC_PER_SEC;
969 q->near_ev_cache[level] = event;
970 }
971
972 if (next_event > event)
973 next_event = event;
974
975 m = ~q->row_mask[level];
976 while (m != (int)(-1)) {
977 int prio = ffz(m);
978
979 m |= 1 << prio;
980 skb = htb_dequeue_tree(q, prio, level);
981 if (likely(skb != NULL))
982 goto ok;
983 }
984 }
985 if (likely(next_event > q->now))
986 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
987 else
988 schedule_work(&q->work);
989fin:
990 return skb;
991}
992
993/* reset all classes */
994/* always caled under BH & queue lock */
995static void htb_reset(struct Qdisc *sch)
996{
997 struct htb_sched *q = qdisc_priv(sch);
998 struct htb_class *cl;
999 unsigned int i;
1000
1001 for (i = 0; i < q->clhash.hashsize; i++) {
1002 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1003 if (cl->level)
1004 memset(&cl->inner, 0, sizeof(cl->inner));
1005 else {
1006 if (cl->leaf.q && !q->offload)
1007 qdisc_reset(cl->leaf.q);
1008 }
1009 cl->prio_activity = 0;
1010 cl->cmode = HTB_CAN_SEND;
1011 }
1012 }
1013 qdisc_watchdog_cancel(&q->watchdog);
1014 __qdisc_reset_queue(&q->direct_queue);
1015 memset(q->hlevel, 0, sizeof(q->hlevel));
1016 memset(q->row_mask, 0, sizeof(q->row_mask));
1017}
1018
1019static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1020 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1021 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
1022 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1023 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1024 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1025 [TCA_HTB_RATE64] = { .type = NLA_U64 },
1026 [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1027 [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1028};
1029
1030static void htb_work_func(struct work_struct *work)
1031{
1032 struct htb_sched *q = container_of(work, struct htb_sched, work);
1033 struct Qdisc *sch = q->watchdog.qdisc;
1034
1035 rcu_read_lock();
1036 __netif_schedule(qdisc_root(sch));
1037 rcu_read_unlock();
1038}
1039
1040static void htb_set_lockdep_class_child(struct Qdisc *q)
1041{
1042 static struct lock_class_key child_key;
1043
1044 lockdep_set_class(qdisc_lock(q), &child_key);
1045}
1046
1047static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1048{
1049 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1050}
1051
1052static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1053 struct netlink_ext_ack *extack)
1054{
1055 struct net_device *dev = qdisc_dev(sch);
1056 struct tc_htb_qopt_offload offload_opt;
1057 struct htb_sched *q = qdisc_priv(sch);
1058 struct nlattr *tb[TCA_HTB_MAX + 1];
1059 struct tc_htb_glob *gopt;
1060 unsigned int ntx;
1061 bool offload;
1062 int err;
1063
1064 qdisc_watchdog_init(&q->watchdog, sch);
1065 INIT_WORK(&q->work, htb_work_func);
1066
1067 if (!opt)
1068 return -EINVAL;
1069
1070 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1071 if (err)
1072 return err;
1073
1074 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1075 NULL);
1076 if (err < 0)
1077 return err;
1078
1079 if (!tb[TCA_HTB_INIT])
1080 return -EINVAL;
1081
1082 gopt = nla_data(tb[TCA_HTB_INIT]);
1083 if (gopt->version != HTB_VER >> 16)
1084 return -EINVAL;
1085
1086 offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1087
1088 if (offload) {
1089 if (sch->parent != TC_H_ROOT) {
1090 NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
1091 return -EOPNOTSUPP;
1092 }
1093
1094 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
1095 NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
1096 return -EOPNOTSUPP;
1097 }
1098
1099 q->num_direct_qdiscs = dev->real_num_tx_queues;
1100 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1101 sizeof(*q->direct_qdiscs),
1102 GFP_KERNEL);
1103 if (!q->direct_qdiscs)
1104 return -ENOMEM;
1105 }
1106
1107 err = qdisc_class_hash_init(&q->clhash);
1108 if (err < 0)
1109 return err;
1110
1111 if (tb[TCA_HTB_DIRECT_QLEN])
1112 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1113 else
1114 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1115
1116 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1117 q->rate2quantum = 1;
1118 q->defcls = gopt->defcls;
1119
1120 if (!offload)
1121 return 0;
1122
1123 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1124 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1125 struct Qdisc *qdisc;
1126
1127 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1128 TC_H_MAKE(sch->handle, 0), extack);
1129 if (!qdisc) {
1130 return -ENOMEM;
1131 }
1132
1133 htb_set_lockdep_class_child(qdisc);
1134 q->direct_qdiscs[ntx] = qdisc;
1135 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1136 }
1137
1138 sch->flags |= TCQ_F_MQROOT;
1139
1140 offload_opt = (struct tc_htb_qopt_offload) {
1141 .command = TC_HTB_CREATE,
1142 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1143 .classid = TC_H_MIN(q->defcls),
1144 .extack = extack,
1145 };
1146 err = htb_offload(dev, &offload_opt);
1147 if (err)
1148 return err;
1149
1150 /* Defer this assignment, so that htb_destroy skips offload-related
1151 * parts (especially calling ndo_setup_tc) on errors.
1152 */
1153 q->offload = true;
1154
1155 return 0;
1156}
1157
1158static void htb_attach_offload(struct Qdisc *sch)
1159{
1160 struct net_device *dev = qdisc_dev(sch);
1161 struct htb_sched *q = qdisc_priv(sch);
1162 unsigned int ntx;
1163
1164 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1165 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1166
1167 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1168 qdisc_put(old);
1169 qdisc_hash_add(qdisc, false);
1170 }
1171 for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1172 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1173 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1174
1175 qdisc_put(old);
1176 }
1177
1178 kfree(q->direct_qdiscs);
1179 q->direct_qdiscs = NULL;
1180}
1181
1182static void htb_attach_software(struct Qdisc *sch)
1183{
1184 struct net_device *dev = qdisc_dev(sch);
1185 unsigned int ntx;
1186
1187 /* Resemble qdisc_graft behavior. */
1188 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1189 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1190 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1191
1192 qdisc_refcount_inc(sch);
1193
1194 qdisc_put(old);
1195 }
1196}
1197
1198static void htb_attach(struct Qdisc *sch)
1199{
1200 struct htb_sched *q = qdisc_priv(sch);
1201
1202 if (q->offload)
1203 htb_attach_offload(sch);
1204 else
1205 htb_attach_software(sch);
1206}
1207
1208static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1209{
1210 struct htb_sched *q = qdisc_priv(sch);
1211 struct nlattr *nest;
1212 struct tc_htb_glob gopt;
1213
1214 if (q->offload)
1215 sch->flags |= TCQ_F_OFFLOADED;
1216 else
1217 sch->flags &= ~TCQ_F_OFFLOADED;
1218
1219 sch->qstats.overlimits = q->overlimits;
1220 /* Its safe to not acquire qdisc lock. As we hold RTNL,
1221 * no change can happen on the qdisc parameters.
1222 */
1223
1224 gopt.direct_pkts = q->direct_pkts;
1225 gopt.version = HTB_VER;
1226 gopt.rate2quantum = q->rate2quantum;
1227 gopt.defcls = q->defcls;
1228 gopt.debug = 0;
1229
1230 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1231 if (nest == NULL)
1232 goto nla_put_failure;
1233 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1234 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1235 goto nla_put_failure;
1236 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1237 goto nla_put_failure;
1238
1239 return nla_nest_end(skb, nest);
1240
1241nla_put_failure:
1242 nla_nest_cancel(skb, nest);
1243 return -1;
1244}
1245
1246static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1247 struct sk_buff *skb, struct tcmsg *tcm)
1248{
1249 struct htb_class *cl = (struct htb_class *)arg;
1250 struct htb_sched *q = qdisc_priv(sch);
1251 struct nlattr *nest;
1252 struct tc_htb_opt opt;
1253
1254 /* Its safe to not acquire qdisc lock. As we hold RTNL,
1255 * no change can happen on the class parameters.
1256 */
1257 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1258 tcm->tcm_handle = cl->common.classid;
1259 if (!cl->level && cl->leaf.q)
1260 tcm->tcm_info = cl->leaf.q->handle;
1261
1262 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1263 if (nest == NULL)
1264 goto nla_put_failure;
1265
1266 memset(&opt, 0, sizeof(opt));
1267
1268 psched_ratecfg_getrate(&opt.rate, &cl->rate);
1269 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1270 psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1271 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1272 opt.quantum = cl->quantum;
1273 opt.prio = cl->prio;
1274 opt.level = cl->level;
1275 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1276 goto nla_put_failure;
1277 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1278 goto nla_put_failure;
1279 if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1280 nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1281 TCA_HTB_PAD))
1282 goto nla_put_failure;
1283 if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1284 nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1285 TCA_HTB_PAD))
1286 goto nla_put_failure;
1287
1288 return nla_nest_end(skb, nest);
1289
1290nla_put_failure:
1291 nla_nest_cancel(skb, nest);
1292 return -1;
1293}
1294
1295static void htb_offload_aggregate_stats(struct htb_sched *q,
1296 struct htb_class *cl)
1297{
1298 u64 bytes = 0, packets = 0;
1299 struct htb_class *c;
1300 unsigned int i;
1301
1302 gnet_stats_basic_sync_init(&cl->bstats);
1303
1304 for (i = 0; i < q->clhash.hashsize; i++) {
1305 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1306 struct htb_class *p = c;
1307
1308 while (p && p->level < cl->level)
1309 p = p->parent;
1310
1311 if (p != cl)
1312 continue;
1313
1314 bytes += u64_stats_read(&c->bstats_bias.bytes);
1315 packets += u64_stats_read(&c->bstats_bias.packets);
1316 if (c->level == 0) {
1317 bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
1318 packets += u64_stats_read(&c->leaf.q->bstats.packets);
1319 }
1320 }
1321 }
1322 _bstats_update(&cl->bstats, bytes, packets);
1323}
1324
1325static int
1326htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1327{
1328 struct htb_class *cl = (struct htb_class *)arg;
1329 struct htb_sched *q = qdisc_priv(sch);
1330 struct gnet_stats_queue qs = {
1331 .drops = cl->drops,
1332 .overlimits = cl->overlimits,
1333 };
1334 __u32 qlen = 0;
1335
1336 if (!cl->level && cl->leaf.q)
1337 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1338
1339 cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1340 INT_MIN, INT_MAX);
1341 cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1342 INT_MIN, INT_MAX);
1343
1344 if (q->offload) {
1345 if (!cl->level) {
1346 if (cl->leaf.q)
1347 cl->bstats = cl->leaf.q->bstats;
1348 else
1349 gnet_stats_basic_sync_init(&cl->bstats);
1350 _bstats_update(&cl->bstats,
1351 u64_stats_read(&cl->bstats_bias.bytes),
1352 u64_stats_read(&cl->bstats_bias.packets));
1353 } else {
1354 htb_offload_aggregate_stats(q, cl);
1355 }
1356 }
1357
1358 if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
1359 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1360 gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1361 return -1;
1362
1363 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1364}
1365
1366static struct netdev_queue *
1367htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1368{
1369 struct net_device *dev = qdisc_dev(sch);
1370 struct tc_htb_qopt_offload offload_opt;
1371 struct htb_sched *q = qdisc_priv(sch);
1372 int err;
1373
1374 if (!q->offload)
1375 return sch->dev_queue;
1376
1377 offload_opt = (struct tc_htb_qopt_offload) {
1378 .command = TC_HTB_LEAF_QUERY_QUEUE,
1379 .classid = TC_H_MIN(tcm->tcm_parent),
1380 };
1381 err = htb_offload(dev, &offload_opt);
1382 if (err || offload_opt.qid >= dev->num_tx_queues)
1383 return NULL;
1384 return netdev_get_tx_queue(dev, offload_opt.qid);
1385}
1386
1387static struct Qdisc *
1388htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1389{
1390 struct net_device *dev = dev_queue->dev;
1391 struct Qdisc *old_q;
1392
1393 if (dev->flags & IFF_UP)
1394 dev_deactivate(dev);
1395 old_q = dev_graft_qdisc(dev_queue, new_q);
1396 if (new_q)
1397 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1398 if (dev->flags & IFF_UP)
1399 dev_activate(dev);
1400
1401 return old_q;
1402}
1403
1404static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1405{
1406 struct netdev_queue *queue;
1407
1408 queue = cl->leaf.offload_queue;
1409 if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1410 WARN_ON(cl->leaf.q->dev_queue != queue);
1411
1412 return queue;
1413}
1414
1415static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1416 struct htb_class *cl_new, bool destroying)
1417{
1418 struct netdev_queue *queue_old, *queue_new;
1419 struct net_device *dev = qdisc_dev(sch);
1420
1421 queue_old = htb_offload_get_queue(cl_old);
1422 queue_new = htb_offload_get_queue(cl_new);
1423
1424 if (!destroying) {
1425 struct Qdisc *qdisc;
1426
1427 if (dev->flags & IFF_UP)
1428 dev_deactivate(dev);
1429 qdisc = dev_graft_qdisc(queue_old, NULL);
1430 WARN_ON(qdisc != cl_old->leaf.q);
1431 }
1432
1433 if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1434 cl_old->leaf.q->dev_queue = queue_new;
1435 cl_old->leaf.offload_queue = queue_new;
1436
1437 if (!destroying) {
1438 struct Qdisc *qdisc;
1439
1440 qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1441 if (dev->flags & IFF_UP)
1442 dev_activate(dev);
1443 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1444 }
1445}
1446
1447static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1448 struct Qdisc **old, struct netlink_ext_ack *extack)
1449{
1450 struct netdev_queue *dev_queue = sch->dev_queue;
1451 struct htb_class *cl = (struct htb_class *)arg;
1452 struct htb_sched *q = qdisc_priv(sch);
1453 struct Qdisc *old_q;
1454
1455 if (cl->level)
1456 return -EINVAL;
1457
1458 if (q->offload)
1459 dev_queue = htb_offload_get_queue(cl);
1460
1461 if (!new) {
1462 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1463 cl->common.classid, extack);
1464 if (!new)
1465 return -ENOBUFS;
1466 }
1467
1468 if (q->offload) {
1469 htb_set_lockdep_class_child(new);
1470 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1471 qdisc_refcount_inc(new);
1472 old_q = htb_graft_helper(dev_queue, new);
1473 }
1474
1475 *old = qdisc_replace(sch, new, &cl->leaf.q);
1476
1477 if (q->offload) {
1478 WARN_ON(old_q != *old);
1479 qdisc_put(old_q);
1480 }
1481
1482 return 0;
1483}
1484
1485static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1486{
1487 struct htb_class *cl = (struct htb_class *)arg;
1488 return !cl->level ? cl->leaf.q : NULL;
1489}
1490
1491static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1492{
1493 struct htb_class *cl = (struct htb_class *)arg;
1494
1495 htb_deactivate(qdisc_priv(sch), cl);
1496}
1497
1498static inline int htb_parent_last_child(struct htb_class *cl)
1499{
1500 if (!cl->parent)
1501 /* the root class */
1502 return 0;
1503 if (cl->parent->children > 1)
1504 /* not the last child */
1505 return 0;
1506 return 1;
1507}
1508
1509static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1510 struct Qdisc *new_q)
1511{
1512 struct htb_sched *q = qdisc_priv(sch);
1513 struct htb_class *parent = cl->parent;
1514
1515 WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1516
1517 if (parent->cmode != HTB_CAN_SEND)
1518 htb_safe_rb_erase(&parent->pq_node,
1519 &q->hlevel[parent->level].wait_pq);
1520
1521 parent->level = 0;
1522 memset(&parent->inner, 0, sizeof(parent->inner));
1523 parent->leaf.q = new_q ? new_q : &noop_qdisc;
1524 parent->tokens = parent->buffer;
1525 parent->ctokens = parent->cbuffer;
1526 parent->t_c = ktime_get_ns();
1527 parent->cmode = HTB_CAN_SEND;
1528 if (q->offload)
1529 parent->leaf.offload_queue = cl->leaf.offload_queue;
1530}
1531
1532static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1533 struct netdev_queue *dev_queue,
1534 struct Qdisc *new_q)
1535{
1536 struct Qdisc *old_q;
1537
1538 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1539 if (new_q)
1540 qdisc_refcount_inc(new_q);
1541 old_q = htb_graft_helper(dev_queue, new_q);
1542 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1543}
1544
1545static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1546 bool last_child, bool destroying,
1547 struct netlink_ext_ack *extack)
1548{
1549 struct tc_htb_qopt_offload offload_opt;
1550 struct netdev_queue *dev_queue;
1551 struct Qdisc *q = cl->leaf.q;
1552 struct Qdisc *old;
1553 int err;
1554
1555 if (cl->level)
1556 return -EINVAL;
1557
1558 WARN_ON(!q);
1559 dev_queue = htb_offload_get_queue(cl);
1560 /* When destroying, caller qdisc_graft grafts the new qdisc and invokes
1561 * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
1562 * does not need to graft or qdisc_put the qdisc being destroyed.
1563 */
1564 if (!destroying) {
1565 old = htb_graft_helper(dev_queue, NULL);
1566 /* Last qdisc grafted should be the same as cl->leaf.q when
1567 * calling htb_delete.
1568 */
1569 WARN_ON(old != q);
1570 }
1571
1572 if (cl->parent) {
1573 _bstats_update(&cl->parent->bstats_bias,
1574 u64_stats_read(&q->bstats.bytes),
1575 u64_stats_read(&q->bstats.packets));
1576 }
1577
1578 offload_opt = (struct tc_htb_qopt_offload) {
1579 .command = !last_child ? TC_HTB_LEAF_DEL :
1580 destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1581 TC_HTB_LEAF_DEL_LAST,
1582 .classid = cl->common.classid,
1583 .extack = extack,
1584 };
1585 err = htb_offload(qdisc_dev(sch), &offload_opt);
1586
1587 if (!destroying) {
1588 if (!err)
1589 qdisc_put(old);
1590 else
1591 htb_graft_helper(dev_queue, old);
1592 }
1593
1594 if (last_child)
1595 return err;
1596
1597 if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1598 u32 classid = TC_H_MAJ(sch->handle) |
1599 TC_H_MIN(offload_opt.classid);
1600 struct htb_class *moved_cl = htb_find(classid, sch);
1601
1602 htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1603 }
1604
1605 return err;
1606}
1607
1608static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1609{
1610 if (!cl->level) {
1611 WARN_ON(!cl->leaf.q);
1612 qdisc_put(cl->leaf.q);
1613 }
1614 gen_kill_estimator(&cl->rate_est);
1615 tcf_block_put(cl->block);
1616 kfree(cl);
1617}
1618
1619static void htb_destroy(struct Qdisc *sch)
1620{
1621 struct net_device *dev = qdisc_dev(sch);
1622 struct tc_htb_qopt_offload offload_opt;
1623 struct htb_sched *q = qdisc_priv(sch);
1624 struct hlist_node *next;
1625 bool nonempty, changed;
1626 struct htb_class *cl;
1627 unsigned int i;
1628
1629 cancel_work_sync(&q->work);
1630 qdisc_watchdog_cancel(&q->watchdog);
1631 /* This line used to be after htb_destroy_class call below
1632 * and surprisingly it worked in 2.4. But it must precede it
1633 * because filter need its target class alive to be able to call
1634 * unbind_filter on it (without Oops).
1635 */
1636 tcf_block_put(q->block);
1637
1638 for (i = 0; i < q->clhash.hashsize; i++) {
1639 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1640 tcf_block_put(cl->block);
1641 cl->block = NULL;
1642 }
1643 }
1644
1645 do {
1646 nonempty = false;
1647 changed = false;
1648 for (i = 0; i < q->clhash.hashsize; i++) {
1649 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1650 common.hnode) {
1651 bool last_child;
1652
1653 if (!q->offload) {
1654 htb_destroy_class(sch, cl);
1655 continue;
1656 }
1657
1658 nonempty = true;
1659
1660 if (cl->level)
1661 continue;
1662
1663 changed = true;
1664
1665 last_child = htb_parent_last_child(cl);
1666 htb_destroy_class_offload(sch, cl, last_child,
1667 true, NULL);
1668 qdisc_class_hash_remove(&q->clhash,
1669 &cl->common);
1670 if (cl->parent)
1671 cl->parent->children--;
1672 if (last_child)
1673 htb_parent_to_leaf(sch, cl, NULL);
1674 htb_destroy_class(sch, cl);
1675 }
1676 }
1677 } while (changed);
1678 WARN_ON(nonempty);
1679
1680 qdisc_class_hash_destroy(&q->clhash);
1681 __qdisc_reset_queue(&q->direct_queue);
1682
1683 if (q->offload) {
1684 offload_opt = (struct tc_htb_qopt_offload) {
1685 .command = TC_HTB_DESTROY,
1686 };
1687 htb_offload(dev, &offload_opt);
1688 }
1689
1690 if (!q->direct_qdiscs)
1691 return;
1692 for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1693 qdisc_put(q->direct_qdiscs[i]);
1694 kfree(q->direct_qdiscs);
1695}
1696
1697static int htb_delete(struct Qdisc *sch, unsigned long arg,
1698 struct netlink_ext_ack *extack)
1699{
1700 struct htb_sched *q = qdisc_priv(sch);
1701 struct htb_class *cl = (struct htb_class *)arg;
1702 struct Qdisc *new_q = NULL;
1703 int last_child = 0;
1704 int err;
1705
1706 /* TODO: why don't allow to delete subtree ? references ? does
1707 * tc subsys guarantee us that in htb_destroy it holds no class
1708 * refs so that we can remove children safely there ?
1709 */
1710 if (cl->children || cl->filter_cnt)
1711 return -EBUSY;
1712
1713 if (!cl->level && htb_parent_last_child(cl))
1714 last_child = 1;
1715
1716 if (q->offload) {
1717 err = htb_destroy_class_offload(sch, cl, last_child, false,
1718 extack);
1719 if (err)
1720 return err;
1721 }
1722
1723 if (last_child) {
1724 struct netdev_queue *dev_queue = sch->dev_queue;
1725
1726 if (q->offload)
1727 dev_queue = htb_offload_get_queue(cl);
1728
1729 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1730 cl->parent->common.classid,
1731 NULL);
1732 if (q->offload) {
1733 if (new_q)
1734 htb_set_lockdep_class_child(new_q);
1735 htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1736 }
1737 }
1738
1739 sch_tree_lock(sch);
1740
1741 if (!cl->level)
1742 qdisc_purge_queue(cl->leaf.q);
1743
1744 /* delete from hash and active; remainder in destroy_class */
1745 qdisc_class_hash_remove(&q->clhash, &cl->common);
1746 if (cl->parent)
1747 cl->parent->children--;
1748
1749 if (cl->prio_activity)
1750 htb_deactivate(q, cl);
1751
1752 if (cl->cmode != HTB_CAN_SEND)
1753 htb_safe_rb_erase(&cl->pq_node,
1754 &q->hlevel[cl->level].wait_pq);
1755
1756 if (last_child)
1757 htb_parent_to_leaf(sch, cl, new_q);
1758
1759 sch_tree_unlock(sch);
1760
1761 htb_destroy_class(sch, cl);
1762 return 0;
1763}
1764
1765static int htb_change_class(struct Qdisc *sch, u32 classid,
1766 u32 parentid, struct nlattr **tca,
1767 unsigned long *arg, struct netlink_ext_ack *extack)
1768{
1769 int err = -EINVAL;
1770 struct htb_sched *q = qdisc_priv(sch);
1771 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1772 struct tc_htb_qopt_offload offload_opt;
1773 struct nlattr *opt = tca[TCA_OPTIONS];
1774 struct nlattr *tb[TCA_HTB_MAX + 1];
1775 struct Qdisc *parent_qdisc = NULL;
1776 struct netdev_queue *dev_queue;
1777 struct tc_htb_opt *hopt;
1778 u64 rate64, ceil64;
1779 int warn = 0;
1780
1781 /* extract all subattrs from opt attr */
1782 if (!opt)
1783 goto failure;
1784
1785 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1786 NULL);
1787 if (err < 0)
1788 goto failure;
1789
1790 err = -EINVAL;
1791 if (tb[TCA_HTB_PARMS] == NULL)
1792 goto failure;
1793
1794 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1795
1796 hopt = nla_data(tb[TCA_HTB_PARMS]);
1797 if (!hopt->rate.rate || !hopt->ceil.rate)
1798 goto failure;
1799
1800 if (q->offload) {
1801 /* Options not supported by the offload. */
1802 if (hopt->rate.overhead || hopt->ceil.overhead) {
1803 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1804 goto failure;
1805 }
1806 if (hopt->rate.mpu || hopt->ceil.mpu) {
1807 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1808 goto failure;
1809 }
1810 if (hopt->quantum) {
1811 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
1812 goto failure;
1813 }
1814 if (hopt->prio) {
1815 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
1816 goto failure;
1817 }
1818 }
1819
1820 /* Keeping backward compatible with rate_table based iproute2 tc */
1821 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1822 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1823 NULL));
1824
1825 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1826 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1827 NULL));
1828
1829 rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1830 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1831
1832 if (!cl) { /* new class */
1833 struct net_device *dev = qdisc_dev(sch);
1834 struct Qdisc *new_q, *old_q;
1835 int prio;
1836 struct {
1837 struct nlattr nla;
1838 struct gnet_estimator opt;
1839 } est = {
1840 .nla = {
1841 .nla_len = nla_attr_size(sizeof(est.opt)),
1842 .nla_type = TCA_RATE,
1843 },
1844 .opt = {
1845 /* 4s interval, 16s averaging constant */
1846 .interval = 2,
1847 .ewma_log = 2,
1848 },
1849 };
1850
1851 /* check for valid classid */
1852 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1853 htb_find(classid, sch))
1854 goto failure;
1855
1856 /* check maximal depth */
1857 if (parent && parent->parent && parent->parent->level < 2) {
1858 pr_err("htb: tree is too deep\n");
1859 goto failure;
1860 }
1861 err = -ENOBUFS;
1862 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1863 if (!cl)
1864 goto failure;
1865
1866 gnet_stats_basic_sync_init(&cl->bstats);
1867 gnet_stats_basic_sync_init(&cl->bstats_bias);
1868
1869 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1870 if (err) {
1871 kfree(cl);
1872 goto failure;
1873 }
1874 if (htb_rate_est || tca[TCA_RATE]) {
1875 err = gen_new_estimator(&cl->bstats, NULL,
1876 &cl->rate_est,
1877 NULL,
1878 true,
1879 tca[TCA_RATE] ? : &est.nla);
1880 if (err)
1881 goto err_block_put;
1882 }
1883
1884 cl->children = 0;
1885 RB_CLEAR_NODE(&cl->pq_node);
1886
1887 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1888 RB_CLEAR_NODE(&cl->node[prio]);
1889
1890 cl->common.classid = classid;
1891
1892 /* Make sure nothing interrupts us in between of two
1893 * ndo_setup_tc calls.
1894 */
1895 ASSERT_RTNL();
1896
1897 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1898 * so that can't be used inside of sch_tree_lock
1899 * -- thanks to Karlis Peisenieks
1900 */
1901 if (!q->offload) {
1902 dev_queue = sch->dev_queue;
1903 } else if (!(parent && !parent->level)) {
1904 /* Assign a dev_queue to this classid. */
1905 offload_opt = (struct tc_htb_qopt_offload) {
1906 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1907 .classid = cl->common.classid,
1908 .parent_classid = parent ?
1909 TC_H_MIN(parent->common.classid) :
1910 TC_HTB_CLASSID_ROOT,
1911 .rate = max_t(u64, hopt->rate.rate, rate64),
1912 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1913 .extack = extack,
1914 };
1915 err = htb_offload(dev, &offload_opt);
1916 if (err) {
1917 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1918 err);
1919 goto err_kill_estimator;
1920 }
1921 dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1922 } else { /* First child. */
1923 dev_queue = htb_offload_get_queue(parent);
1924 old_q = htb_graft_helper(dev_queue, NULL);
1925 WARN_ON(old_q != parent->leaf.q);
1926 offload_opt = (struct tc_htb_qopt_offload) {
1927 .command = TC_HTB_LEAF_TO_INNER,
1928 .classid = cl->common.classid,
1929 .parent_classid =
1930 TC_H_MIN(parent->common.classid),
1931 .rate = max_t(u64, hopt->rate.rate, rate64),
1932 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1933 .extack = extack,
1934 };
1935 err = htb_offload(dev, &offload_opt);
1936 if (err) {
1937 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1938 err);
1939 htb_graft_helper(dev_queue, old_q);
1940 goto err_kill_estimator;
1941 }
1942 _bstats_update(&parent->bstats_bias,
1943 u64_stats_read(&old_q->bstats.bytes),
1944 u64_stats_read(&old_q->bstats.packets));
1945 qdisc_put(old_q);
1946 }
1947 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1948 classid, NULL);
1949 if (q->offload) {
1950 if (new_q) {
1951 htb_set_lockdep_class_child(new_q);
1952 /* One ref for cl->leaf.q, the other for
1953 * dev_queue->qdisc.
1954 */
1955 qdisc_refcount_inc(new_q);
1956 }
1957 old_q = htb_graft_helper(dev_queue, new_q);
1958 /* No qdisc_put needed. */
1959 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1960 }
1961 sch_tree_lock(sch);
1962 if (parent && !parent->level) {
1963 /* turn parent into inner node */
1964 qdisc_purge_queue(parent->leaf.q);
1965 parent_qdisc = parent->leaf.q;
1966 if (parent->prio_activity)
1967 htb_deactivate(q, parent);
1968
1969 /* remove from evt list because of level change */
1970 if (parent->cmode != HTB_CAN_SEND) {
1971 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1972 parent->cmode = HTB_CAN_SEND;
1973 }
1974 parent->level = (parent->parent ? parent->parent->level
1975 : TC_HTB_MAXDEPTH) - 1;
1976 memset(&parent->inner, 0, sizeof(parent->inner));
1977 }
1978
1979 /* leaf (we) needs elementary qdisc */
1980 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1981 if (q->offload)
1982 cl->leaf.offload_queue = dev_queue;
1983
1984 cl->parent = parent;
1985
1986 /* set class to be in HTB_CAN_SEND state */
1987 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1988 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1989 cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
1990 cl->t_c = ktime_get_ns();
1991 cl->cmode = HTB_CAN_SEND;
1992
1993 /* attach to the hash list and parent's family */
1994 qdisc_class_hash_insert(&q->clhash, &cl->common);
1995 if (parent)
1996 parent->children++;
1997 if (cl->leaf.q != &noop_qdisc)
1998 qdisc_hash_add(cl->leaf.q, true);
1999 } else {
2000 if (tca[TCA_RATE]) {
2001 err = gen_replace_estimator(&cl->bstats, NULL,
2002 &cl->rate_est,
2003 NULL,
2004 true,
2005 tca[TCA_RATE]);
2006 if (err)
2007 return err;
2008 }
2009
2010 if (q->offload) {
2011 struct net_device *dev = qdisc_dev(sch);
2012
2013 offload_opt = (struct tc_htb_qopt_offload) {
2014 .command = TC_HTB_NODE_MODIFY,
2015 .classid = cl->common.classid,
2016 .rate = max_t(u64, hopt->rate.rate, rate64),
2017 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
2018 .extack = extack,
2019 };
2020 err = htb_offload(dev, &offload_opt);
2021 if (err)
2022 /* Estimator was replaced, and rollback may fail
2023 * as well, so we don't try to recover it, and
2024 * the estimator won't work property with the
2025 * offload anyway, because bstats are updated
2026 * only when the stats are queried.
2027 */
2028 return err;
2029 }
2030
2031 sch_tree_lock(sch);
2032 }
2033
2034 psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2035 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2036
2037 /* it used to be a nasty bug here, we have to check that node
2038 * is really leaf before changing cl->leaf !
2039 */
2040 if (!cl->level) {
2041 u64 quantum = cl->rate.rate_bytes_ps;
2042
2043 do_div(quantum, q->rate2quantum);
2044 cl->quantum = min_t(u64, quantum, INT_MAX);
2045
2046 if (!hopt->quantum && cl->quantum < 1000) {
2047 warn = -1;
2048 cl->quantum = 1000;
2049 }
2050 if (!hopt->quantum && cl->quantum > 200000) {
2051 warn = 1;
2052 cl->quantum = 200000;
2053 }
2054 if (hopt->quantum)
2055 cl->quantum = hopt->quantum;
2056 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2057 cl->prio = TC_HTB_NUMPRIO - 1;
2058 }
2059
2060 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2061 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2062
2063 sch_tree_unlock(sch);
2064 qdisc_put(parent_qdisc);
2065
2066 if (warn)
2067 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2068 cl->common.classid, (warn == -1 ? "small" : "big"));
2069
2070 qdisc_class_hash_grow(sch, &q->clhash);
2071
2072 *arg = (unsigned long)cl;
2073 return 0;
2074
2075err_kill_estimator:
2076 gen_kill_estimator(&cl->rate_est);
2077err_block_put:
2078 tcf_block_put(cl->block);
2079 kfree(cl);
2080failure:
2081 return err;
2082}
2083
2084static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2085 struct netlink_ext_ack *extack)
2086{
2087 struct htb_sched *q = qdisc_priv(sch);
2088 struct htb_class *cl = (struct htb_class *)arg;
2089
2090 return cl ? cl->block : q->block;
2091}
2092
2093static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2094 u32 classid)
2095{
2096 struct htb_class *cl = htb_find(classid, sch);
2097
2098 /*if (cl && !cl->level) return 0;
2099 * The line above used to be there to prevent attaching filters to
2100 * leaves. But at least tc_index filter uses this just to get class
2101 * for other reasons so that we have to allow for it.
2102 * ----
2103 * 19.6.2002 As Werner explained it is ok - bind filter is just
2104 * another way to "lock" the class - unlike "get" this lock can
2105 * be broken by class during destroy IIUC.
2106 */
2107 if (cl)
2108 cl->filter_cnt++;
2109 return (unsigned long)cl;
2110}
2111
2112static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2113{
2114 struct htb_class *cl = (struct htb_class *)arg;
2115
2116 if (cl)
2117 cl->filter_cnt--;
2118}
2119
2120static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2121{
2122 struct htb_sched *q = qdisc_priv(sch);
2123 struct htb_class *cl;
2124 unsigned int i;
2125
2126 if (arg->stop)
2127 return;
2128
2129 for (i = 0; i < q->clhash.hashsize; i++) {
2130 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2131 if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
2132 return;
2133 }
2134 }
2135}
2136
2137static const struct Qdisc_class_ops htb_class_ops = {
2138 .select_queue = htb_select_queue,
2139 .graft = htb_graft,
2140 .leaf = htb_leaf,
2141 .qlen_notify = htb_qlen_notify,
2142 .find = htb_search,
2143 .change = htb_change_class,
2144 .delete = htb_delete,
2145 .walk = htb_walk,
2146 .tcf_block = htb_tcf_block,
2147 .bind_tcf = htb_bind_filter,
2148 .unbind_tcf = htb_unbind_filter,
2149 .dump = htb_dump_class,
2150 .dump_stats = htb_dump_class_stats,
2151};
2152
2153static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2154 .cl_ops = &htb_class_ops,
2155 .id = "htb",
2156 .priv_size = sizeof(struct htb_sched),
2157 .enqueue = htb_enqueue,
2158 .dequeue = htb_dequeue,
2159 .peek = qdisc_peek_dequeued,
2160 .init = htb_init,
2161 .attach = htb_attach,
2162 .reset = htb_reset,
2163 .destroy = htb_destroy,
2164 .dump = htb_dump,
2165 .owner = THIS_MODULE,
2166};
2167
2168static int __init htb_module_init(void)
2169{
2170 return register_qdisc(&htb_qdisc_ops);
2171}
2172static void __exit htb_module_exit(void)
2173{
2174 unregister_qdisc(&htb_qdisc_ops);
2175}
2176
2177module_init(htb_module_init)
2178module_exit(htb_module_exit)
2179MODULE_LICENSE("GPL");