Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
18#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/string.h>
22#include <linux/errno.h>
23#include <linux/skbuff.h>
24#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
29#include <linux/hrtimer.h>
30#include <linux/slab.h>
31#include <linux/hashtable.h>
32
33#include <net/net_namespace.h>
34#include <net/sock.h>
35#include <net/netlink.h>
36#include <net/pkt_sched.h>
37#include <net/pkt_cls.h>
38
39/*
40
41 Short review.
42 -------------
43
44 This file consists of two interrelated parts:
45
46 1. queueing disciplines manager frontend.
47 2. traffic classes manager frontend.
48
49 Generally, queueing discipline ("qdisc") is a black box,
50 which is able to enqueue packets and to dequeue them (when
51 device is ready to send something) in order and at times
52 determined by algorithm hidden in it.
53
54 qdisc's are divided to two categories:
55 - "queues", which have no internal structure visible from outside.
56 - "schedulers", which split all the packets to "traffic classes",
57 using "packet classifiers" (look at cls_api.c)
58
59 In turn, classes may have child qdiscs (as rule, queues)
60 attached to them etc. etc. etc.
61
62 The goal of the routines in this file is to translate
63 information supplied by user in the form of handles
64 to more intelligible for kernel form, to make some sanity
65 checks and part of work, which is common to all qdiscs
66 and to provide rtnetlink notifications.
67
68 All real intelligent work is done inside qdisc modules.
69
70
71
72 Every discipline has two major routines: enqueue and dequeue.
73
74 ---dequeue
75
76 dequeue usually returns a skb to send. It is allowed to return NULL,
77 but it does not mean that queue is empty, it just means that
78 discipline does not want to send anything this time.
79 Queue is really empty if q->q.qlen == 0.
80 For complicated disciplines with multiple queues q->q is not
81 real packet queue, but however q->q.qlen must be valid.
82
83 ---enqueue
84
85 enqueue returns 0, if packet was enqueued successfully.
86 If packet (this one or another one) was dropped, it returns
87 not zero error code.
88 NET_XMIT_DROP - this packet dropped
89 Expected action: do not backoff, but wait until queue will clear.
90 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
91 Expected action: backoff or ignore
92
93 Auxiliary routines:
94
95 ---peek
96
97 like dequeue but without removing a packet from the queue
98
99 ---reset
100
101 returns qdisc to initial state: purge all buffers, clear all
102 timers, counters (except for statistics) etc.
103
104 ---init
105
106 initializes newly created qdisc.
107
108 ---destroy
109
110 destroys resources allocated by init and during lifetime of qdisc.
111
112 ---change
113
114 changes qdisc parameters.
115 */
116
117/* Protects list of registered TC modules. It is pure SMP lock. */
118static DEFINE_RWLOCK(qdisc_mod_lock);
119
120
121/************************************************
122 * Queueing disciplines manipulation. *
123 ************************************************/
124
125
126/* The list of all installed queueing disciplines. */
127
128static struct Qdisc_ops *qdisc_base;
129
130/* Register/unregister queueing discipline */
131
132int register_qdisc(struct Qdisc_ops *qops)
133{
134 struct Qdisc_ops *q, **qp;
135 int rc = -EEXIST;
136
137 write_lock(&qdisc_mod_lock);
138 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
139 if (!strcmp(qops->id, q->id))
140 goto out;
141
142 if (qops->enqueue == NULL)
143 qops->enqueue = noop_qdisc_ops.enqueue;
144 if (qops->peek == NULL) {
145 if (qops->dequeue == NULL)
146 qops->peek = noop_qdisc_ops.peek;
147 else
148 goto out_einval;
149 }
150 if (qops->dequeue == NULL)
151 qops->dequeue = noop_qdisc_ops.dequeue;
152
153 if (qops->cl_ops) {
154 const struct Qdisc_class_ops *cops = qops->cl_ops;
155
156 if (!(cops->find && cops->walk && cops->leaf))
157 goto out_einval;
158
159 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
160 goto out_einval;
161 }
162
163 qops->next = NULL;
164 *qp = qops;
165 rc = 0;
166out:
167 write_unlock(&qdisc_mod_lock);
168 return rc;
169
170out_einval:
171 rc = -EINVAL;
172 goto out;
173}
174EXPORT_SYMBOL(register_qdisc);
175
176int unregister_qdisc(struct Qdisc_ops *qops)
177{
178 struct Qdisc_ops *q, **qp;
179 int err = -ENOENT;
180
181 write_lock(&qdisc_mod_lock);
182 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
183 if (q == qops)
184 break;
185 if (q) {
186 *qp = q->next;
187 q->next = NULL;
188 err = 0;
189 }
190 write_unlock(&qdisc_mod_lock);
191 return err;
192}
193EXPORT_SYMBOL(unregister_qdisc);
194
195/* Get default qdisc if not otherwise specified */
196void qdisc_get_default(char *name, size_t len)
197{
198 read_lock(&qdisc_mod_lock);
199 strlcpy(name, default_qdisc_ops->id, len);
200 read_unlock(&qdisc_mod_lock);
201}
202
203static struct Qdisc_ops *qdisc_lookup_default(const char *name)
204{
205 struct Qdisc_ops *q = NULL;
206
207 for (q = qdisc_base; q; q = q->next) {
208 if (!strcmp(name, q->id)) {
209 if (!try_module_get(q->owner))
210 q = NULL;
211 break;
212 }
213 }
214
215 return q;
216}
217
218/* Set new default qdisc to use */
219int qdisc_set_default(const char *name)
220{
221 const struct Qdisc_ops *ops;
222
223 if (!capable(CAP_NET_ADMIN))
224 return -EPERM;
225
226 write_lock(&qdisc_mod_lock);
227 ops = qdisc_lookup_default(name);
228 if (!ops) {
229 /* Not found, drop lock and try to load module */
230 write_unlock(&qdisc_mod_lock);
231 request_module("sch_%s", name);
232 write_lock(&qdisc_mod_lock);
233
234 ops = qdisc_lookup_default(name);
235 }
236
237 if (ops) {
238 /* Set new default */
239 module_put(default_qdisc_ops->owner);
240 default_qdisc_ops = ops;
241 }
242 write_unlock(&qdisc_mod_lock);
243
244 return ops ? 0 : -ENOENT;
245}
246
247#ifdef CONFIG_NET_SCH_DEFAULT
248/* Set default value from kernel config */
249static int __init sch_default_qdisc(void)
250{
251 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
252}
253late_initcall(sch_default_qdisc);
254#endif
255
256/* We know handle. Find qdisc among all qdisc's attached to device
257 * (root qdisc, all its children, children of children etc.)
258 * Note: caller either uses rtnl or rcu_read_lock()
259 */
260
261static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
262{
263 struct Qdisc *q;
264
265 if (!qdisc_dev(root))
266 return (root->handle == handle ? root : NULL);
267
268 if (!(root->flags & TCQ_F_BUILTIN) &&
269 root->handle == handle)
270 return root;
271
272 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
273 if (q->handle == handle)
274 return q;
275 }
276 return NULL;
277}
278
279void qdisc_hash_add(struct Qdisc *q, bool invisible)
280{
281 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
282 ASSERT_RTNL();
283 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
284 if (invisible)
285 q->flags |= TCQ_F_INVISIBLE;
286 }
287}
288EXPORT_SYMBOL(qdisc_hash_add);
289
290void qdisc_hash_del(struct Qdisc *q)
291{
292 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
293 ASSERT_RTNL();
294 hash_del_rcu(&q->hash);
295 }
296}
297EXPORT_SYMBOL(qdisc_hash_del);
298
299struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
300{
301 struct Qdisc *q;
302
303 if (!handle)
304 return NULL;
305 q = qdisc_match_from_root(dev->qdisc, handle);
306 if (q)
307 goto out;
308
309 if (dev_ingress_queue(dev))
310 q = qdisc_match_from_root(
311 dev_ingress_queue(dev)->qdisc_sleeping,
312 handle);
313out:
314 return q;
315}
316
317struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
318{
319 struct netdev_queue *nq;
320 struct Qdisc *q;
321
322 if (!handle)
323 return NULL;
324 q = qdisc_match_from_root(dev->qdisc, handle);
325 if (q)
326 goto out;
327
328 nq = dev_ingress_queue_rcu(dev);
329 if (nq)
330 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
331out:
332 return q;
333}
334
335static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
336{
337 unsigned long cl;
338 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
339
340 if (cops == NULL)
341 return NULL;
342 cl = cops->find(p, classid);
343
344 if (cl == 0)
345 return NULL;
346 return cops->leaf(p, cl);
347}
348
349/* Find queueing discipline by name */
350
351static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
352{
353 struct Qdisc_ops *q = NULL;
354
355 if (kind) {
356 read_lock(&qdisc_mod_lock);
357 for (q = qdisc_base; q; q = q->next) {
358 if (nla_strcmp(kind, q->id) == 0) {
359 if (!try_module_get(q->owner))
360 q = NULL;
361 break;
362 }
363 }
364 read_unlock(&qdisc_mod_lock);
365 }
366 return q;
367}
368
369/* The linklayer setting were not transferred from iproute2, in older
370 * versions, and the rate tables lookup systems have been dropped in
371 * the kernel. To keep backward compatible with older iproute2 tc
372 * utils, we detect the linklayer setting by detecting if the rate
373 * table were modified.
374 *
375 * For linklayer ATM table entries, the rate table will be aligned to
376 * 48 bytes, thus some table entries will contain the same value. The
377 * mpu (min packet unit) is also encoded into the old rate table, thus
378 * starting from the mpu, we find low and high table entries for
379 * mapping this cell. If these entries contain the same value, when
380 * the rate tables have been modified for linklayer ATM.
381 *
382 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
383 * and then roundup to the next cell, calc the table entry one below,
384 * and compare.
385 */
386static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
387{
388 int low = roundup(r->mpu, 48);
389 int high = roundup(low+1, 48);
390 int cell_low = low >> r->cell_log;
391 int cell_high = (high >> r->cell_log) - 1;
392
393 /* rtab is too inaccurate at rates > 100Mbit/s */
394 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
395 pr_debug("TC linklayer: Giving up ATM detection\n");
396 return TC_LINKLAYER_ETHERNET;
397 }
398
399 if ((cell_high > cell_low) && (cell_high < 256)
400 && (rtab[cell_low] == rtab[cell_high])) {
401 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
402 cell_low, cell_high, rtab[cell_high]);
403 return TC_LINKLAYER_ATM;
404 }
405 return TC_LINKLAYER_ETHERNET;
406}
407
408static struct qdisc_rate_table *qdisc_rtab_list;
409
410struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
411 struct nlattr *tab,
412 struct netlink_ext_ack *extack)
413{
414 struct qdisc_rate_table *rtab;
415
416 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
417 nla_len(tab) != TC_RTAB_SIZE) {
418 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
419 return NULL;
420 }
421
422 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
423 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
424 !memcmp(&rtab->data, nla_data(tab), 1024)) {
425 rtab->refcnt++;
426 return rtab;
427 }
428 }
429
430 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
431 if (rtab) {
432 rtab->rate = *r;
433 rtab->refcnt = 1;
434 memcpy(rtab->data, nla_data(tab), 1024);
435 if (r->linklayer == TC_LINKLAYER_UNAWARE)
436 r->linklayer = __detect_linklayer(r, rtab->data);
437 rtab->next = qdisc_rtab_list;
438 qdisc_rtab_list = rtab;
439 } else {
440 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
441 }
442 return rtab;
443}
444EXPORT_SYMBOL(qdisc_get_rtab);
445
446void qdisc_put_rtab(struct qdisc_rate_table *tab)
447{
448 struct qdisc_rate_table *rtab, **rtabp;
449
450 if (!tab || --tab->refcnt)
451 return;
452
453 for (rtabp = &qdisc_rtab_list;
454 (rtab = *rtabp) != NULL;
455 rtabp = &rtab->next) {
456 if (rtab == tab) {
457 *rtabp = rtab->next;
458 kfree(rtab);
459 return;
460 }
461 }
462}
463EXPORT_SYMBOL(qdisc_put_rtab);
464
465static LIST_HEAD(qdisc_stab_list);
466
467static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
468 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
469 [TCA_STAB_DATA] = { .type = NLA_BINARY },
470};
471
472static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
473 struct netlink_ext_ack *extack)
474{
475 struct nlattr *tb[TCA_STAB_MAX + 1];
476 struct qdisc_size_table *stab;
477 struct tc_sizespec *s;
478 unsigned int tsize = 0;
479 u16 *tab = NULL;
480 int err;
481
482 err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
483 extack);
484 if (err < 0)
485 return ERR_PTR(err);
486 if (!tb[TCA_STAB_BASE]) {
487 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
488 return ERR_PTR(-EINVAL);
489 }
490
491 s = nla_data(tb[TCA_STAB_BASE]);
492
493 if (s->tsize > 0) {
494 if (!tb[TCA_STAB_DATA]) {
495 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
496 return ERR_PTR(-EINVAL);
497 }
498 tab = nla_data(tb[TCA_STAB_DATA]);
499 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
500 }
501
502 if (tsize != s->tsize || (!tab && tsize > 0)) {
503 NL_SET_ERR_MSG(extack, "Invalid size of size table");
504 return ERR_PTR(-EINVAL);
505 }
506
507 list_for_each_entry(stab, &qdisc_stab_list, list) {
508 if (memcmp(&stab->szopts, s, sizeof(*s)))
509 continue;
510 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
511 continue;
512 stab->refcnt++;
513 return stab;
514 }
515
516 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
517 if (!stab)
518 return ERR_PTR(-ENOMEM);
519
520 stab->refcnt = 1;
521 stab->szopts = *s;
522 if (tsize > 0)
523 memcpy(stab->data, tab, tsize * sizeof(u16));
524
525 list_add_tail(&stab->list, &qdisc_stab_list);
526
527 return stab;
528}
529
530void qdisc_put_stab(struct qdisc_size_table *tab)
531{
532 if (!tab)
533 return;
534
535 if (--tab->refcnt == 0) {
536 list_del(&tab->list);
537 kfree_rcu(tab, rcu);
538 }
539}
540EXPORT_SYMBOL(qdisc_put_stab);
541
542static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
543{
544 struct nlattr *nest;
545
546 nest = nla_nest_start_noflag(skb, TCA_STAB);
547 if (nest == NULL)
548 goto nla_put_failure;
549 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
550 goto nla_put_failure;
551 nla_nest_end(skb, nest);
552
553 return skb->len;
554
555nla_put_failure:
556 return -1;
557}
558
559void __qdisc_calculate_pkt_len(struct sk_buff *skb,
560 const struct qdisc_size_table *stab)
561{
562 int pkt_len, slot;
563
564 pkt_len = skb->len + stab->szopts.overhead;
565 if (unlikely(!stab->szopts.tsize))
566 goto out;
567
568 slot = pkt_len + stab->szopts.cell_align;
569 if (unlikely(slot < 0))
570 slot = 0;
571
572 slot >>= stab->szopts.cell_log;
573 if (likely(slot < stab->szopts.tsize))
574 pkt_len = stab->data[slot];
575 else
576 pkt_len = stab->data[stab->szopts.tsize - 1] *
577 (slot / stab->szopts.tsize) +
578 stab->data[slot % stab->szopts.tsize];
579
580 pkt_len <<= stab->szopts.size_log;
581out:
582 if (unlikely(pkt_len < 1))
583 pkt_len = 1;
584 qdisc_skb_cb(skb)->pkt_len = pkt_len;
585}
586EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
587
588void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
589{
590 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
591 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
592 txt, qdisc->ops->id, qdisc->handle >> 16);
593 qdisc->flags |= TCQ_F_WARN_NONWC;
594 }
595}
596EXPORT_SYMBOL(qdisc_warn_nonwc);
597
598static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
599{
600 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
601 timer);
602
603 rcu_read_lock();
604 __netif_schedule(qdisc_root(wd->qdisc));
605 rcu_read_unlock();
606
607 return HRTIMER_NORESTART;
608}
609
610void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
611 clockid_t clockid)
612{
613 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
614 wd->timer.function = qdisc_watchdog;
615 wd->qdisc = qdisc;
616}
617EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
618
619void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
620{
621 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
622}
623EXPORT_SYMBOL(qdisc_watchdog_init);
624
625void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
626{
627 if (test_bit(__QDISC_STATE_DEACTIVATED,
628 &qdisc_root_sleeping(wd->qdisc)->state))
629 return;
630
631 if (wd->last_expires == expires)
632 return;
633
634 wd->last_expires = expires;
635 hrtimer_start(&wd->timer,
636 ns_to_ktime(expires),
637 HRTIMER_MODE_ABS_PINNED);
638}
639EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
640
641void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
642{
643 hrtimer_cancel(&wd->timer);
644}
645EXPORT_SYMBOL(qdisc_watchdog_cancel);
646
647static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
648{
649 struct hlist_head *h;
650 unsigned int i;
651
652 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
653
654 if (h != NULL) {
655 for (i = 0; i < n; i++)
656 INIT_HLIST_HEAD(&h[i]);
657 }
658 return h;
659}
660
661void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
662{
663 struct Qdisc_class_common *cl;
664 struct hlist_node *next;
665 struct hlist_head *nhash, *ohash;
666 unsigned int nsize, nmask, osize;
667 unsigned int i, h;
668
669 /* Rehash when load factor exceeds 0.75 */
670 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
671 return;
672 nsize = clhash->hashsize * 2;
673 nmask = nsize - 1;
674 nhash = qdisc_class_hash_alloc(nsize);
675 if (nhash == NULL)
676 return;
677
678 ohash = clhash->hash;
679 osize = clhash->hashsize;
680
681 sch_tree_lock(sch);
682 for (i = 0; i < osize; i++) {
683 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
684 h = qdisc_class_hash(cl->classid, nmask);
685 hlist_add_head(&cl->hnode, &nhash[h]);
686 }
687 }
688 clhash->hash = nhash;
689 clhash->hashsize = nsize;
690 clhash->hashmask = nmask;
691 sch_tree_unlock(sch);
692
693 kvfree(ohash);
694}
695EXPORT_SYMBOL(qdisc_class_hash_grow);
696
697int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
698{
699 unsigned int size = 4;
700
701 clhash->hash = qdisc_class_hash_alloc(size);
702 if (!clhash->hash)
703 return -ENOMEM;
704 clhash->hashsize = size;
705 clhash->hashmask = size - 1;
706 clhash->hashelems = 0;
707 return 0;
708}
709EXPORT_SYMBOL(qdisc_class_hash_init);
710
711void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
712{
713 kvfree(clhash->hash);
714}
715EXPORT_SYMBOL(qdisc_class_hash_destroy);
716
717void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
718 struct Qdisc_class_common *cl)
719{
720 unsigned int h;
721
722 INIT_HLIST_NODE(&cl->hnode);
723 h = qdisc_class_hash(cl->classid, clhash->hashmask);
724 hlist_add_head(&cl->hnode, &clhash->hash[h]);
725 clhash->hashelems++;
726}
727EXPORT_SYMBOL(qdisc_class_hash_insert);
728
729void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
730 struct Qdisc_class_common *cl)
731{
732 hlist_del(&cl->hnode);
733 clhash->hashelems--;
734}
735EXPORT_SYMBOL(qdisc_class_hash_remove);
736
737/* Allocate an unique handle from space managed by kernel
738 * Possible range is [8000-FFFF]:0000 (0x8000 values)
739 */
740static u32 qdisc_alloc_handle(struct net_device *dev)
741{
742 int i = 0x8000;
743 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
744
745 do {
746 autohandle += TC_H_MAKE(0x10000U, 0);
747 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
748 autohandle = TC_H_MAKE(0x80000000U, 0);
749 if (!qdisc_lookup(dev, autohandle))
750 return autohandle;
751 cond_resched();
752 } while (--i > 0);
753
754 return 0;
755}
756
757void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
758{
759 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
760 const struct Qdisc_class_ops *cops;
761 unsigned long cl;
762 u32 parentid;
763 bool notify;
764 int drops;
765
766 if (n == 0 && len == 0)
767 return;
768 drops = max_t(int, n, 0);
769 rcu_read_lock();
770 while ((parentid = sch->parent)) {
771 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
772 break;
773
774 if (sch->flags & TCQ_F_NOPARENT)
775 break;
776 /* Notify parent qdisc only if child qdisc becomes empty.
777 *
778 * If child was empty even before update then backlog
779 * counter is screwed and we skip notification because
780 * parent class is already passive.
781 *
782 * If the original child was offloaded then it is allowed
783 * to be seem as empty, so the parent is notified anyway.
784 */
785 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
786 !qdisc_is_offloaded);
787 /* TODO: perform the search on a per txq basis */
788 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
789 if (sch == NULL) {
790 WARN_ON_ONCE(parentid != TC_H_ROOT);
791 break;
792 }
793 cops = sch->ops->cl_ops;
794 if (notify && cops->qlen_notify) {
795 cl = cops->find(sch, parentid);
796 cops->qlen_notify(sch, cl);
797 }
798 sch->q.qlen -= n;
799 sch->qstats.backlog -= len;
800 __qdisc_qstats_drop(sch, drops);
801 }
802 rcu_read_unlock();
803}
804EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
805
806int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
807 void *type_data)
808{
809 struct net_device *dev = qdisc_dev(sch);
810 int err;
811
812 sch->flags &= ~TCQ_F_OFFLOADED;
813 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
814 return 0;
815
816 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
817 if (err == -EOPNOTSUPP)
818 return 0;
819
820 if (!err)
821 sch->flags |= TCQ_F_OFFLOADED;
822
823 return err;
824}
825EXPORT_SYMBOL(qdisc_offload_dump_helper);
826
827void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
828 struct Qdisc *new, struct Qdisc *old,
829 enum tc_setup_type type, void *type_data,
830 struct netlink_ext_ack *extack)
831{
832 bool any_qdisc_is_offloaded;
833 int err;
834
835 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
836 return;
837
838 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
839
840 /* Don't report error if the graft is part of destroy operation. */
841 if (!err || !new || new == &noop_qdisc)
842 return;
843
844 /* Don't report error if the parent, the old child and the new
845 * one are not offloaded.
846 */
847 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
848 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
849 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
850
851 if (any_qdisc_is_offloaded)
852 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
853}
854EXPORT_SYMBOL(qdisc_offload_graft_helper);
855
856static void qdisc_offload_graft_root(struct net_device *dev,
857 struct Qdisc *new, struct Qdisc *old,
858 struct netlink_ext_ack *extack)
859{
860 struct tc_root_qopt_offload graft_offload = {
861 .command = TC_ROOT_GRAFT,
862 .handle = new ? new->handle : 0,
863 .ingress = (new && new->flags & TCQ_F_INGRESS) ||
864 (old && old->flags & TCQ_F_INGRESS),
865 };
866
867 qdisc_offload_graft_helper(dev, NULL, new, old,
868 TC_SETUP_ROOT_QDISC, &graft_offload, extack);
869}
870
871static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
872 u32 portid, u32 seq, u16 flags, int event)
873{
874 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
875 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
876 struct tcmsg *tcm;
877 struct nlmsghdr *nlh;
878 unsigned char *b = skb_tail_pointer(skb);
879 struct gnet_dump d;
880 struct qdisc_size_table *stab;
881 u32 block_index;
882 __u32 qlen;
883
884 cond_resched();
885 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
886 if (!nlh)
887 goto out_nlmsg_trim;
888 tcm = nlmsg_data(nlh);
889 tcm->tcm_family = AF_UNSPEC;
890 tcm->tcm__pad1 = 0;
891 tcm->tcm__pad2 = 0;
892 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
893 tcm->tcm_parent = clid;
894 tcm->tcm_handle = q->handle;
895 tcm->tcm_info = refcount_read(&q->refcnt);
896 if (nla_put_string(skb, TCA_KIND, q->ops->id))
897 goto nla_put_failure;
898 if (q->ops->ingress_block_get) {
899 block_index = q->ops->ingress_block_get(q);
900 if (block_index &&
901 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
902 goto nla_put_failure;
903 }
904 if (q->ops->egress_block_get) {
905 block_index = q->ops->egress_block_get(q);
906 if (block_index &&
907 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
908 goto nla_put_failure;
909 }
910 if (q->ops->dump && q->ops->dump(q, skb) < 0)
911 goto nla_put_failure;
912 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
913 goto nla_put_failure;
914 qlen = qdisc_qlen_sum(q);
915
916 stab = rtnl_dereference(q->stab);
917 if (stab && qdisc_dump_stab(skb, stab) < 0)
918 goto nla_put_failure;
919
920 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
921 NULL, &d, TCA_PAD) < 0)
922 goto nla_put_failure;
923
924 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
925 goto nla_put_failure;
926
927 if (qdisc_is_percpu_stats(q)) {
928 cpu_bstats = q->cpu_bstats;
929 cpu_qstats = q->cpu_qstats;
930 }
931
932 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
933 &d, cpu_bstats, &q->bstats) < 0 ||
934 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
935 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
936 goto nla_put_failure;
937
938 if (gnet_stats_finish_copy(&d) < 0)
939 goto nla_put_failure;
940
941 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
942 return skb->len;
943
944out_nlmsg_trim:
945nla_put_failure:
946 nlmsg_trim(skb, b);
947 return -1;
948}
949
950static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
951{
952 if (q->flags & TCQ_F_BUILTIN)
953 return true;
954 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
955 return true;
956
957 return false;
958}
959
960static int qdisc_notify(struct net *net, struct sk_buff *oskb,
961 struct nlmsghdr *n, u32 clid,
962 struct Qdisc *old, struct Qdisc *new)
963{
964 struct sk_buff *skb;
965 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
966
967 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
968 if (!skb)
969 return -ENOBUFS;
970
971 if (old && !tc_qdisc_dump_ignore(old, false)) {
972 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
973 0, RTM_DELQDISC) < 0)
974 goto err_out;
975 }
976 if (new && !tc_qdisc_dump_ignore(new, false)) {
977 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
978 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
979 goto err_out;
980 }
981
982 if (skb->len)
983 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
984 n->nlmsg_flags & NLM_F_ECHO);
985
986err_out:
987 kfree_skb(skb);
988 return -EINVAL;
989}
990
991static void notify_and_destroy(struct net *net, struct sk_buff *skb,
992 struct nlmsghdr *n, u32 clid,
993 struct Qdisc *old, struct Qdisc *new)
994{
995 if (new || old)
996 qdisc_notify(net, skb, n, clid, old, new);
997
998 if (old)
999 qdisc_put(old);
1000}
1001
1002static void qdisc_clear_nolock(struct Qdisc *sch)
1003{
1004 sch->flags &= ~TCQ_F_NOLOCK;
1005 if (!(sch->flags & TCQ_F_CPUSTATS))
1006 return;
1007
1008 free_percpu(sch->cpu_bstats);
1009 free_percpu(sch->cpu_qstats);
1010 sch->cpu_bstats = NULL;
1011 sch->cpu_qstats = NULL;
1012 sch->flags &= ~TCQ_F_CPUSTATS;
1013}
1014
1015/* Graft qdisc "new" to class "classid" of qdisc "parent" or
1016 * to device "dev".
1017 *
1018 * When appropriate send a netlink notification using 'skb'
1019 * and "n".
1020 *
1021 * On success, destroy old qdisc.
1022 */
1023
1024static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1025 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1026 struct Qdisc *new, struct Qdisc *old,
1027 struct netlink_ext_ack *extack)
1028{
1029 struct Qdisc *q = old;
1030 struct net *net = dev_net(dev);
1031
1032 if (parent == NULL) {
1033 unsigned int i, num_q, ingress;
1034
1035 ingress = 0;
1036 num_q = dev->num_tx_queues;
1037 if ((q && q->flags & TCQ_F_INGRESS) ||
1038 (new && new->flags & TCQ_F_INGRESS)) {
1039 num_q = 1;
1040 ingress = 1;
1041 if (!dev_ingress_queue(dev)) {
1042 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1043 return -ENOENT;
1044 }
1045 }
1046
1047 if (dev->flags & IFF_UP)
1048 dev_deactivate(dev);
1049
1050 qdisc_offload_graft_root(dev, new, old, extack);
1051
1052 if (new && new->ops->attach)
1053 goto skip;
1054
1055 for (i = 0; i < num_q; i++) {
1056 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1057
1058 if (!ingress)
1059 dev_queue = netdev_get_tx_queue(dev, i);
1060
1061 old = dev_graft_qdisc(dev_queue, new);
1062 if (new && i > 0)
1063 qdisc_refcount_inc(new);
1064
1065 if (!ingress)
1066 qdisc_put(old);
1067 }
1068
1069skip:
1070 if (!ingress) {
1071 notify_and_destroy(net, skb, n, classid,
1072 dev->qdisc, new);
1073 if (new && !new->ops->attach)
1074 qdisc_refcount_inc(new);
1075 dev->qdisc = new ? : &noop_qdisc;
1076
1077 if (new && new->ops->attach)
1078 new->ops->attach(new);
1079 } else {
1080 notify_and_destroy(net, skb, n, classid, old, new);
1081 }
1082
1083 if (dev->flags & IFF_UP)
1084 dev_activate(dev);
1085 } else {
1086 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1087 unsigned long cl;
1088 int err;
1089
1090 /* Only support running class lockless if parent is lockless */
1091 if (new && (new->flags & TCQ_F_NOLOCK) &&
1092 parent && !(parent->flags & TCQ_F_NOLOCK))
1093 qdisc_clear_nolock(new);
1094
1095 if (!cops || !cops->graft)
1096 return -EOPNOTSUPP;
1097
1098 cl = cops->find(parent, classid);
1099 if (!cl) {
1100 NL_SET_ERR_MSG(extack, "Specified class not found");
1101 return -ENOENT;
1102 }
1103
1104 err = cops->graft(parent, cl, new, &old, extack);
1105 if (err)
1106 return err;
1107 notify_and_destroy(net, skb, n, classid, old, new);
1108 }
1109 return 0;
1110}
1111
1112static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1113 struct netlink_ext_ack *extack)
1114{
1115 u32 block_index;
1116
1117 if (tca[TCA_INGRESS_BLOCK]) {
1118 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1119
1120 if (!block_index) {
1121 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1122 return -EINVAL;
1123 }
1124 if (!sch->ops->ingress_block_set) {
1125 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1126 return -EOPNOTSUPP;
1127 }
1128 sch->ops->ingress_block_set(sch, block_index);
1129 }
1130 if (tca[TCA_EGRESS_BLOCK]) {
1131 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1132
1133 if (!block_index) {
1134 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1135 return -EINVAL;
1136 }
1137 if (!sch->ops->egress_block_set) {
1138 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1139 return -EOPNOTSUPP;
1140 }
1141 sch->ops->egress_block_set(sch, block_index);
1142 }
1143 return 0;
1144}
1145
1146/*
1147 Allocate and initialize new qdisc.
1148
1149 Parameters are passed via opt.
1150 */
1151
1152static struct Qdisc *qdisc_create(struct net_device *dev,
1153 struct netdev_queue *dev_queue,
1154 struct Qdisc *p, u32 parent, u32 handle,
1155 struct nlattr **tca, int *errp,
1156 struct netlink_ext_ack *extack)
1157{
1158 int err;
1159 struct nlattr *kind = tca[TCA_KIND];
1160 struct Qdisc *sch;
1161 struct Qdisc_ops *ops;
1162 struct qdisc_size_table *stab;
1163
1164 ops = qdisc_lookup_ops(kind);
1165#ifdef CONFIG_MODULES
1166 if (ops == NULL && kind != NULL) {
1167 char name[IFNAMSIZ];
1168 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1169 /* We dropped the RTNL semaphore in order to
1170 * perform the module load. So, even if we
1171 * succeeded in loading the module we have to
1172 * tell the caller to replay the request. We
1173 * indicate this using -EAGAIN.
1174 * We replay the request because the device may
1175 * go away in the mean time.
1176 */
1177 rtnl_unlock();
1178 request_module("sch_%s", name);
1179 rtnl_lock();
1180 ops = qdisc_lookup_ops(kind);
1181 if (ops != NULL) {
1182 /* We will try again qdisc_lookup_ops,
1183 * so don't keep a reference.
1184 */
1185 module_put(ops->owner);
1186 err = -EAGAIN;
1187 goto err_out;
1188 }
1189 }
1190 }
1191#endif
1192
1193 err = -ENOENT;
1194 if (!ops) {
1195 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1196 goto err_out;
1197 }
1198
1199 sch = qdisc_alloc(dev_queue, ops, extack);
1200 if (IS_ERR(sch)) {
1201 err = PTR_ERR(sch);
1202 goto err_out2;
1203 }
1204
1205 sch->parent = parent;
1206
1207 if (handle == TC_H_INGRESS) {
1208 sch->flags |= TCQ_F_INGRESS;
1209 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1210 } else {
1211 if (handle == 0) {
1212 handle = qdisc_alloc_handle(dev);
1213 if (handle == 0) {
1214 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1215 err = -ENOSPC;
1216 goto err_out3;
1217 }
1218 }
1219 if (!netif_is_multiqueue(dev))
1220 sch->flags |= TCQ_F_ONETXQUEUE;
1221 }
1222
1223 sch->handle = handle;
1224
1225 /* This exist to keep backward compatible with a userspace
1226 * loophole, what allowed userspace to get IFF_NO_QUEUE
1227 * facility on older kernels by setting tx_queue_len=0 (prior
1228 * to qdisc init), and then forgot to reinit tx_queue_len
1229 * before again attaching a qdisc.
1230 */
1231 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1232 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1233 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1234 }
1235
1236 err = qdisc_block_indexes_set(sch, tca, extack);
1237 if (err)
1238 goto err_out3;
1239
1240 if (ops->init) {
1241 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1242 if (err != 0)
1243 goto err_out5;
1244 }
1245
1246 if (tca[TCA_STAB]) {
1247 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1248 if (IS_ERR(stab)) {
1249 err = PTR_ERR(stab);
1250 goto err_out4;
1251 }
1252 rcu_assign_pointer(sch->stab, stab);
1253 }
1254 if (tca[TCA_RATE]) {
1255 seqcount_t *running;
1256
1257 err = -EOPNOTSUPP;
1258 if (sch->flags & TCQ_F_MQROOT) {
1259 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1260 goto err_out4;
1261 }
1262
1263 if (sch->parent != TC_H_ROOT &&
1264 !(sch->flags & TCQ_F_INGRESS) &&
1265 (!p || !(p->flags & TCQ_F_MQROOT)))
1266 running = qdisc_root_sleeping_running(sch);
1267 else
1268 running = &sch->running;
1269
1270 err = gen_new_estimator(&sch->bstats,
1271 sch->cpu_bstats,
1272 &sch->rate_est,
1273 NULL,
1274 running,
1275 tca[TCA_RATE]);
1276 if (err) {
1277 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1278 goto err_out4;
1279 }
1280 }
1281
1282 qdisc_hash_add(sch, false);
1283
1284 return sch;
1285
1286err_out5:
1287 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1288 if (ops->destroy)
1289 ops->destroy(sch);
1290err_out3:
1291 dev_put(dev);
1292 qdisc_free(sch);
1293err_out2:
1294 module_put(ops->owner);
1295err_out:
1296 *errp = err;
1297 return NULL;
1298
1299err_out4:
1300 /*
1301 * Any broken qdiscs that would require a ops->reset() here?
1302 * The qdisc was never in action so it shouldn't be necessary.
1303 */
1304 qdisc_put_stab(rtnl_dereference(sch->stab));
1305 if (ops->destroy)
1306 ops->destroy(sch);
1307 goto err_out3;
1308}
1309
1310static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1311 struct netlink_ext_ack *extack)
1312{
1313 struct qdisc_size_table *ostab, *stab = NULL;
1314 int err = 0;
1315
1316 if (tca[TCA_OPTIONS]) {
1317 if (!sch->ops->change) {
1318 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1319 return -EINVAL;
1320 }
1321 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1322 NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1323 return -EOPNOTSUPP;
1324 }
1325 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1326 if (err)
1327 return err;
1328 }
1329
1330 if (tca[TCA_STAB]) {
1331 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1332 if (IS_ERR(stab))
1333 return PTR_ERR(stab);
1334 }
1335
1336 ostab = rtnl_dereference(sch->stab);
1337 rcu_assign_pointer(sch->stab, stab);
1338 qdisc_put_stab(ostab);
1339
1340 if (tca[TCA_RATE]) {
1341 /* NB: ignores errors from replace_estimator
1342 because change can't be undone. */
1343 if (sch->flags & TCQ_F_MQROOT)
1344 goto out;
1345 gen_replace_estimator(&sch->bstats,
1346 sch->cpu_bstats,
1347 &sch->rate_est,
1348 NULL,
1349 qdisc_root_sleeping_running(sch),
1350 tca[TCA_RATE]);
1351 }
1352out:
1353 return 0;
1354}
1355
1356struct check_loop_arg {
1357 struct qdisc_walker w;
1358 struct Qdisc *p;
1359 int depth;
1360};
1361
1362static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1363 struct qdisc_walker *w);
1364
1365static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1366{
1367 struct check_loop_arg arg;
1368
1369 if (q->ops->cl_ops == NULL)
1370 return 0;
1371
1372 arg.w.stop = arg.w.skip = arg.w.count = 0;
1373 arg.w.fn = check_loop_fn;
1374 arg.depth = depth;
1375 arg.p = p;
1376 q->ops->cl_ops->walk(q, &arg.w);
1377 return arg.w.stop ? -ELOOP : 0;
1378}
1379
1380static int
1381check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1382{
1383 struct Qdisc *leaf;
1384 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1385 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1386
1387 leaf = cops->leaf(q, cl);
1388 if (leaf) {
1389 if (leaf == arg->p || arg->depth > 7)
1390 return -ELOOP;
1391 return check_loop(leaf, arg->p, arg->depth + 1);
1392 }
1393 return 0;
1394}
1395
1396const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1397 [TCA_KIND] = { .type = NLA_STRING },
1398 [TCA_RATE] = { .type = NLA_BINARY,
1399 .len = sizeof(struct tc_estimator) },
1400 [TCA_STAB] = { .type = NLA_NESTED },
1401 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1402 [TCA_CHAIN] = { .type = NLA_U32 },
1403 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
1404 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
1405};
1406
1407/*
1408 * Delete/get qdisc.
1409 */
1410
1411static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1412 struct netlink_ext_ack *extack)
1413{
1414 struct net *net = sock_net(skb->sk);
1415 struct tcmsg *tcm = nlmsg_data(n);
1416 struct nlattr *tca[TCA_MAX + 1];
1417 struct net_device *dev;
1418 u32 clid;
1419 struct Qdisc *q = NULL;
1420 struct Qdisc *p = NULL;
1421 int err;
1422
1423 if ((n->nlmsg_type != RTM_GETQDISC) &&
1424 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1425 return -EPERM;
1426
1427 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1428 rtm_tca_policy, extack);
1429 if (err < 0)
1430 return err;
1431
1432 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1433 if (!dev)
1434 return -ENODEV;
1435
1436 clid = tcm->tcm_parent;
1437 if (clid) {
1438 if (clid != TC_H_ROOT) {
1439 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1440 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1441 if (!p) {
1442 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1443 return -ENOENT;
1444 }
1445 q = qdisc_leaf(p, clid);
1446 } else if (dev_ingress_queue(dev)) {
1447 q = dev_ingress_queue(dev)->qdisc_sleeping;
1448 }
1449 } else {
1450 q = dev->qdisc;
1451 }
1452 if (!q) {
1453 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1454 return -ENOENT;
1455 }
1456
1457 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1458 NL_SET_ERR_MSG(extack, "Invalid handle");
1459 return -EINVAL;
1460 }
1461 } else {
1462 q = qdisc_lookup(dev, tcm->tcm_handle);
1463 if (!q) {
1464 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1465 return -ENOENT;
1466 }
1467 }
1468
1469 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1470 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1471 return -EINVAL;
1472 }
1473
1474 if (n->nlmsg_type == RTM_DELQDISC) {
1475 if (!clid) {
1476 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1477 return -EINVAL;
1478 }
1479 if (q->handle == 0) {
1480 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1481 return -ENOENT;
1482 }
1483 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1484 if (err != 0)
1485 return err;
1486 } else {
1487 qdisc_notify(net, skb, n, clid, NULL, q);
1488 }
1489 return 0;
1490}
1491
1492/*
1493 * Create/change qdisc.
1494 */
1495
1496static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1497 struct netlink_ext_ack *extack)
1498{
1499 struct net *net = sock_net(skb->sk);
1500 struct tcmsg *tcm;
1501 struct nlattr *tca[TCA_MAX + 1];
1502 struct net_device *dev;
1503 u32 clid;
1504 struct Qdisc *q, *p;
1505 int err;
1506
1507 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1508 return -EPERM;
1509
1510replay:
1511 /* Reinit, just in case something touches this. */
1512 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1513 rtm_tca_policy, extack);
1514 if (err < 0)
1515 return err;
1516
1517 tcm = nlmsg_data(n);
1518 clid = tcm->tcm_parent;
1519 q = p = NULL;
1520
1521 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1522 if (!dev)
1523 return -ENODEV;
1524
1525
1526 if (clid) {
1527 if (clid != TC_H_ROOT) {
1528 if (clid != TC_H_INGRESS) {
1529 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1530 if (!p) {
1531 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1532 return -ENOENT;
1533 }
1534 q = qdisc_leaf(p, clid);
1535 } else if (dev_ingress_queue_create(dev)) {
1536 q = dev_ingress_queue(dev)->qdisc_sleeping;
1537 }
1538 } else {
1539 q = dev->qdisc;
1540 }
1541
1542 /* It may be default qdisc, ignore it */
1543 if (q && q->handle == 0)
1544 q = NULL;
1545
1546 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1547 if (tcm->tcm_handle) {
1548 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1549 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1550 return -EEXIST;
1551 }
1552 if (TC_H_MIN(tcm->tcm_handle)) {
1553 NL_SET_ERR_MSG(extack, "Invalid minor handle");
1554 return -EINVAL;
1555 }
1556 q = qdisc_lookup(dev, tcm->tcm_handle);
1557 if (!q)
1558 goto create_n_graft;
1559 if (n->nlmsg_flags & NLM_F_EXCL) {
1560 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1561 return -EEXIST;
1562 }
1563 if (tca[TCA_KIND] &&
1564 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1565 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1566 return -EINVAL;
1567 }
1568 if (q == p ||
1569 (p && check_loop(q, p, 0))) {
1570 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1571 return -ELOOP;
1572 }
1573 qdisc_refcount_inc(q);
1574 goto graft;
1575 } else {
1576 if (!q)
1577 goto create_n_graft;
1578
1579 /* This magic test requires explanation.
1580 *
1581 * We know, that some child q is already
1582 * attached to this parent and have choice:
1583 * either to change it or to create/graft new one.
1584 *
1585 * 1. We are allowed to create/graft only
1586 * if CREATE and REPLACE flags are set.
1587 *
1588 * 2. If EXCL is set, requestor wanted to say,
1589 * that qdisc tcm_handle is not expected
1590 * to exist, so that we choose create/graft too.
1591 *
1592 * 3. The last case is when no flags are set.
1593 * Alas, it is sort of hole in API, we
1594 * cannot decide what to do unambiguously.
1595 * For now we select create/graft, if
1596 * user gave KIND, which does not match existing.
1597 */
1598 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1599 (n->nlmsg_flags & NLM_F_REPLACE) &&
1600 ((n->nlmsg_flags & NLM_F_EXCL) ||
1601 (tca[TCA_KIND] &&
1602 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1603 goto create_n_graft;
1604 }
1605 }
1606 } else {
1607 if (!tcm->tcm_handle) {
1608 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1609 return -EINVAL;
1610 }
1611 q = qdisc_lookup(dev, tcm->tcm_handle);
1612 }
1613
1614 /* Change qdisc parameters */
1615 if (!q) {
1616 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1617 return -ENOENT;
1618 }
1619 if (n->nlmsg_flags & NLM_F_EXCL) {
1620 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1621 return -EEXIST;
1622 }
1623 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1624 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1625 return -EINVAL;
1626 }
1627 err = qdisc_change(q, tca, extack);
1628 if (err == 0)
1629 qdisc_notify(net, skb, n, clid, NULL, q);
1630 return err;
1631
1632create_n_graft:
1633 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1634 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1635 return -ENOENT;
1636 }
1637 if (clid == TC_H_INGRESS) {
1638 if (dev_ingress_queue(dev)) {
1639 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1640 tcm->tcm_parent, tcm->tcm_parent,
1641 tca, &err, extack);
1642 } else {
1643 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1644 err = -ENOENT;
1645 }
1646 } else {
1647 struct netdev_queue *dev_queue;
1648
1649 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1650 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1651 else if (p)
1652 dev_queue = p->dev_queue;
1653 else
1654 dev_queue = netdev_get_tx_queue(dev, 0);
1655
1656 q = qdisc_create(dev, dev_queue, p,
1657 tcm->tcm_parent, tcm->tcm_handle,
1658 tca, &err, extack);
1659 }
1660 if (q == NULL) {
1661 if (err == -EAGAIN)
1662 goto replay;
1663 return err;
1664 }
1665
1666graft:
1667 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1668 if (err) {
1669 if (q)
1670 qdisc_put(q);
1671 return err;
1672 }
1673
1674 return 0;
1675}
1676
1677static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1678 struct netlink_callback *cb,
1679 int *q_idx_p, int s_q_idx, bool recur,
1680 bool dump_invisible)
1681{
1682 int ret = 0, q_idx = *q_idx_p;
1683 struct Qdisc *q;
1684 int b;
1685
1686 if (!root)
1687 return 0;
1688
1689 q = root;
1690 if (q_idx < s_q_idx) {
1691 q_idx++;
1692 } else {
1693 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1694 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1695 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1696 RTM_NEWQDISC) <= 0)
1697 goto done;
1698 q_idx++;
1699 }
1700
1701 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1702 * itself has already been dumped.
1703 *
1704 * If we've already dumped the top-level (ingress) qdisc above and the global
1705 * qdisc hashtable, we don't want to hit it again
1706 */
1707 if (!qdisc_dev(root) || !recur)
1708 goto out;
1709
1710 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1711 if (q_idx < s_q_idx) {
1712 q_idx++;
1713 continue;
1714 }
1715 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1716 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1717 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1718 RTM_NEWQDISC) <= 0)
1719 goto done;
1720 q_idx++;
1721 }
1722
1723out:
1724 *q_idx_p = q_idx;
1725 return ret;
1726done:
1727 ret = -1;
1728 goto out;
1729}
1730
1731static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1732{
1733 struct net *net = sock_net(skb->sk);
1734 int idx, q_idx;
1735 int s_idx, s_q_idx;
1736 struct net_device *dev;
1737 const struct nlmsghdr *nlh = cb->nlh;
1738 struct nlattr *tca[TCA_MAX + 1];
1739 int err;
1740
1741 s_idx = cb->args[0];
1742 s_q_idx = q_idx = cb->args[1];
1743
1744 idx = 0;
1745 ASSERT_RTNL();
1746
1747 err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1748 rtm_tca_policy, cb->extack);
1749 if (err < 0)
1750 return err;
1751
1752 for_each_netdev(net, dev) {
1753 struct netdev_queue *dev_queue;
1754
1755 if (idx < s_idx)
1756 goto cont;
1757 if (idx > s_idx)
1758 s_q_idx = 0;
1759 q_idx = 0;
1760
1761 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1762 true, tca[TCA_DUMP_INVISIBLE]) < 0)
1763 goto done;
1764
1765 dev_queue = dev_ingress_queue(dev);
1766 if (dev_queue &&
1767 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1768 &q_idx, s_q_idx, false,
1769 tca[TCA_DUMP_INVISIBLE]) < 0)
1770 goto done;
1771
1772cont:
1773 idx++;
1774 }
1775
1776done:
1777 cb->args[0] = idx;
1778 cb->args[1] = q_idx;
1779
1780 return skb->len;
1781}
1782
1783
1784
1785/************************************************
1786 * Traffic classes manipulation. *
1787 ************************************************/
1788
1789static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1790 unsigned long cl,
1791 u32 portid, u32 seq, u16 flags, int event)
1792{
1793 struct tcmsg *tcm;
1794 struct nlmsghdr *nlh;
1795 unsigned char *b = skb_tail_pointer(skb);
1796 struct gnet_dump d;
1797 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1798
1799 cond_resched();
1800 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1801 if (!nlh)
1802 goto out_nlmsg_trim;
1803 tcm = nlmsg_data(nlh);
1804 tcm->tcm_family = AF_UNSPEC;
1805 tcm->tcm__pad1 = 0;
1806 tcm->tcm__pad2 = 0;
1807 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1808 tcm->tcm_parent = q->handle;
1809 tcm->tcm_handle = q->handle;
1810 tcm->tcm_info = 0;
1811 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1812 goto nla_put_failure;
1813 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1814 goto nla_put_failure;
1815
1816 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1817 NULL, &d, TCA_PAD) < 0)
1818 goto nla_put_failure;
1819
1820 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1821 goto nla_put_failure;
1822
1823 if (gnet_stats_finish_copy(&d) < 0)
1824 goto nla_put_failure;
1825
1826 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1827 return skb->len;
1828
1829out_nlmsg_trim:
1830nla_put_failure:
1831 nlmsg_trim(skb, b);
1832 return -1;
1833}
1834
1835static int tclass_notify(struct net *net, struct sk_buff *oskb,
1836 struct nlmsghdr *n, struct Qdisc *q,
1837 unsigned long cl, int event)
1838{
1839 struct sk_buff *skb;
1840 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1841 int err = 0;
1842
1843 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1844 if (!skb)
1845 return -ENOBUFS;
1846
1847 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1848 kfree_skb(skb);
1849 return -EINVAL;
1850 }
1851
1852 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1853 n->nlmsg_flags & NLM_F_ECHO);
1854 if (err > 0)
1855 err = 0;
1856 return err;
1857}
1858
1859static int tclass_del_notify(struct net *net,
1860 const struct Qdisc_class_ops *cops,
1861 struct sk_buff *oskb, struct nlmsghdr *n,
1862 struct Qdisc *q, unsigned long cl)
1863{
1864 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1865 struct sk_buff *skb;
1866 int err = 0;
1867
1868 if (!cops->delete)
1869 return -EOPNOTSUPP;
1870
1871 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1872 if (!skb)
1873 return -ENOBUFS;
1874
1875 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1876 RTM_DELTCLASS) < 0) {
1877 kfree_skb(skb);
1878 return -EINVAL;
1879 }
1880
1881 err = cops->delete(q, cl);
1882 if (err) {
1883 kfree_skb(skb);
1884 return err;
1885 }
1886
1887 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1888 n->nlmsg_flags & NLM_F_ECHO);
1889 if (err > 0)
1890 err = 0;
1891 return err;
1892}
1893
1894#ifdef CONFIG_NET_CLS
1895
1896struct tcf_bind_args {
1897 struct tcf_walker w;
1898 u32 classid;
1899 unsigned long cl;
1900};
1901
1902static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1903{
1904 struct tcf_bind_args *a = (void *)arg;
1905
1906 if (tp->ops->bind_class) {
1907 struct Qdisc *q = tcf_block_q(tp->chain->block);
1908
1909 sch_tree_lock(q);
1910 tp->ops->bind_class(n, a->classid, a->cl);
1911 sch_tree_unlock(q);
1912 }
1913 return 0;
1914}
1915
1916static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1917 unsigned long new_cl)
1918{
1919 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1920 struct tcf_block *block;
1921 struct tcf_chain *chain;
1922 unsigned long cl;
1923
1924 cl = cops->find(q, portid);
1925 if (!cl)
1926 return;
1927 block = cops->tcf_block(q, cl, NULL);
1928 if (!block)
1929 return;
1930 for (chain = tcf_get_next_chain(block, NULL);
1931 chain;
1932 chain = tcf_get_next_chain(block, chain)) {
1933 struct tcf_proto *tp;
1934
1935 for (tp = tcf_get_next_proto(chain, NULL, true);
1936 tp; tp = tcf_get_next_proto(chain, tp, true)) {
1937 struct tcf_bind_args arg = {};
1938
1939 arg.w.fn = tcf_node_bind;
1940 arg.classid = clid;
1941 arg.cl = new_cl;
1942 tp->ops->walk(tp, &arg.w, true);
1943 }
1944 }
1945}
1946
1947#else
1948
1949static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1950 unsigned long new_cl)
1951{
1952}
1953
1954#endif
1955
1956static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1957 struct netlink_ext_ack *extack)
1958{
1959 struct net *net = sock_net(skb->sk);
1960 struct tcmsg *tcm = nlmsg_data(n);
1961 struct nlattr *tca[TCA_MAX + 1];
1962 struct net_device *dev;
1963 struct Qdisc *q = NULL;
1964 const struct Qdisc_class_ops *cops;
1965 unsigned long cl = 0;
1966 unsigned long new_cl;
1967 u32 portid;
1968 u32 clid;
1969 u32 qid;
1970 int err;
1971
1972 if ((n->nlmsg_type != RTM_GETTCLASS) &&
1973 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1974 return -EPERM;
1975
1976 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1977 rtm_tca_policy, extack);
1978 if (err < 0)
1979 return err;
1980
1981 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1982 if (!dev)
1983 return -ENODEV;
1984
1985 /*
1986 parent == TC_H_UNSPEC - unspecified parent.
1987 parent == TC_H_ROOT - class is root, which has no parent.
1988 parent == X:0 - parent is root class.
1989 parent == X:Y - parent is a node in hierarchy.
1990 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1991
1992 handle == 0:0 - generate handle from kernel pool.
1993 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1994 handle == X:Y - clear.
1995 handle == X:0 - root class.
1996 */
1997
1998 /* Step 1. Determine qdisc handle X:0 */
1999
2000 portid = tcm->tcm_parent;
2001 clid = tcm->tcm_handle;
2002 qid = TC_H_MAJ(clid);
2003
2004 if (portid != TC_H_ROOT) {
2005 u32 qid1 = TC_H_MAJ(portid);
2006
2007 if (qid && qid1) {
2008 /* If both majors are known, they must be identical. */
2009 if (qid != qid1)
2010 return -EINVAL;
2011 } else if (qid1) {
2012 qid = qid1;
2013 } else if (qid == 0)
2014 qid = dev->qdisc->handle;
2015
2016 /* Now qid is genuine qdisc handle consistent
2017 * both with parent and child.
2018 *
2019 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2020 */
2021 if (portid)
2022 portid = TC_H_MAKE(qid, portid);
2023 } else {
2024 if (qid == 0)
2025 qid = dev->qdisc->handle;
2026 }
2027
2028 /* OK. Locate qdisc */
2029 q = qdisc_lookup(dev, qid);
2030 if (!q)
2031 return -ENOENT;
2032
2033 /* An check that it supports classes */
2034 cops = q->ops->cl_ops;
2035 if (cops == NULL)
2036 return -EINVAL;
2037
2038 /* Now try to get class */
2039 if (clid == 0) {
2040 if (portid == TC_H_ROOT)
2041 clid = qid;
2042 } else
2043 clid = TC_H_MAKE(qid, clid);
2044
2045 if (clid)
2046 cl = cops->find(q, clid);
2047
2048 if (cl == 0) {
2049 err = -ENOENT;
2050 if (n->nlmsg_type != RTM_NEWTCLASS ||
2051 !(n->nlmsg_flags & NLM_F_CREATE))
2052 goto out;
2053 } else {
2054 switch (n->nlmsg_type) {
2055 case RTM_NEWTCLASS:
2056 err = -EEXIST;
2057 if (n->nlmsg_flags & NLM_F_EXCL)
2058 goto out;
2059 break;
2060 case RTM_DELTCLASS:
2061 err = tclass_del_notify(net, cops, skb, n, q, cl);
2062 /* Unbind the class with flilters with 0 */
2063 tc_bind_tclass(q, portid, clid, 0);
2064 goto out;
2065 case RTM_GETTCLASS:
2066 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2067 goto out;
2068 default:
2069 err = -EINVAL;
2070 goto out;
2071 }
2072 }
2073
2074 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2075 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2076 return -EOPNOTSUPP;
2077 }
2078
2079 new_cl = cl;
2080 err = -EOPNOTSUPP;
2081 if (cops->change)
2082 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2083 if (err == 0) {
2084 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2085 /* We just create a new class, need to do reverse binding. */
2086 if (cl != new_cl)
2087 tc_bind_tclass(q, portid, clid, new_cl);
2088 }
2089out:
2090 return err;
2091}
2092
2093struct qdisc_dump_args {
2094 struct qdisc_walker w;
2095 struct sk_buff *skb;
2096 struct netlink_callback *cb;
2097};
2098
2099static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2100 struct qdisc_walker *arg)
2101{
2102 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2103
2104 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2105 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2106 RTM_NEWTCLASS);
2107}
2108
2109static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2110 struct tcmsg *tcm, struct netlink_callback *cb,
2111 int *t_p, int s_t)
2112{
2113 struct qdisc_dump_args arg;
2114
2115 if (tc_qdisc_dump_ignore(q, false) ||
2116 *t_p < s_t || !q->ops->cl_ops ||
2117 (tcm->tcm_parent &&
2118 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2119 (*t_p)++;
2120 return 0;
2121 }
2122 if (*t_p > s_t)
2123 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2124 arg.w.fn = qdisc_class_dump;
2125 arg.skb = skb;
2126 arg.cb = cb;
2127 arg.w.stop = 0;
2128 arg.w.skip = cb->args[1];
2129 arg.w.count = 0;
2130 q->ops->cl_ops->walk(q, &arg.w);
2131 cb->args[1] = arg.w.count;
2132 if (arg.w.stop)
2133 return -1;
2134 (*t_p)++;
2135 return 0;
2136}
2137
2138static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2139 struct tcmsg *tcm, struct netlink_callback *cb,
2140 int *t_p, int s_t)
2141{
2142 struct Qdisc *q;
2143 int b;
2144
2145 if (!root)
2146 return 0;
2147
2148 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2149 return -1;
2150
2151 if (!qdisc_dev(root))
2152 return 0;
2153
2154 if (tcm->tcm_parent) {
2155 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2156 if (q && q != root &&
2157 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2158 return -1;
2159 return 0;
2160 }
2161 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2162 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2163 return -1;
2164 }
2165
2166 return 0;
2167}
2168
2169static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2170{
2171 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2172 struct net *net = sock_net(skb->sk);
2173 struct netdev_queue *dev_queue;
2174 struct net_device *dev;
2175 int t, s_t;
2176
2177 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2178 return 0;
2179 dev = dev_get_by_index(net, tcm->tcm_ifindex);
2180 if (!dev)
2181 return 0;
2182
2183 s_t = cb->args[0];
2184 t = 0;
2185
2186 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
2187 goto done;
2188
2189 dev_queue = dev_ingress_queue(dev);
2190 if (dev_queue &&
2191 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2192 &t, s_t) < 0)
2193 goto done;
2194
2195done:
2196 cb->args[0] = t;
2197
2198 dev_put(dev);
2199 return skb->len;
2200}
2201
2202#ifdef CONFIG_PROC_FS
2203static int psched_show(struct seq_file *seq, void *v)
2204{
2205 seq_printf(seq, "%08x %08x %08x %08x\n",
2206 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2207 1000000,
2208 (u32)NSEC_PER_SEC / hrtimer_resolution);
2209
2210 return 0;
2211}
2212
2213static int __net_init psched_net_init(struct net *net)
2214{
2215 struct proc_dir_entry *e;
2216
2217 e = proc_create_single("psched", 0, net->proc_net, psched_show);
2218 if (e == NULL)
2219 return -ENOMEM;
2220
2221 return 0;
2222}
2223
2224static void __net_exit psched_net_exit(struct net *net)
2225{
2226 remove_proc_entry("psched", net->proc_net);
2227}
2228#else
2229static int __net_init psched_net_init(struct net *net)
2230{
2231 return 0;
2232}
2233
2234static void __net_exit psched_net_exit(struct net *net)
2235{
2236}
2237#endif
2238
2239static struct pernet_operations psched_net_ops = {
2240 .init = psched_net_init,
2241 .exit = psched_net_exit,
2242};
2243
2244static int __init pktsched_init(void)
2245{
2246 int err;
2247
2248 err = register_pernet_subsys(&psched_net_ops);
2249 if (err) {
2250 pr_err("pktsched_init: "
2251 "cannot initialize per netns operations\n");
2252 return err;
2253 }
2254
2255 register_qdisc(&pfifo_fast_ops);
2256 register_qdisc(&pfifo_qdisc_ops);
2257 register_qdisc(&bfifo_qdisc_ops);
2258 register_qdisc(&pfifo_head_drop_qdisc_ops);
2259 register_qdisc(&mq_qdisc_ops);
2260 register_qdisc(&noqueue_qdisc_ops);
2261
2262 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2263 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2264 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2265 0);
2266 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2267 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2268 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2269 0);
2270
2271 return 0;
2272}
2273
2274subsys_initcall(pktsched_init);