Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net_sched: Add size table for qdiscs

Add size table functions for qdiscs and calculate packet size in
qdisc_enqueue().

Based on patch by Patrick McHardy
http://marc.info/?l=linux-netdev&m=115201979221729&w=2

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jussi Kivilinna and committed by
David S. Miller
175f9c1b 0abf77e5

+199 -5
+20
include/linux/pkt_sched.h
··· 85 85 86 86 #define TC_RTAB_SIZE 1024 87 87 88 + struct tc_sizespec { 89 + unsigned char cell_log; 90 + unsigned char size_log; 91 + short cell_align; 92 + int overhead; 93 + unsigned int linklayer; 94 + unsigned int mpu; 95 + unsigned int mtu; 96 + unsigned int tsize; 97 + }; 98 + 99 + enum { 100 + TCA_STAB_UNSPEC, 101 + TCA_STAB_BASE, 102 + TCA_STAB_DATA, 103 + __TCA_STAB_MAX 104 + }; 105 + 106 + #define TCA_STAB_MAX (__TCA_STAB_MAX - 1) 107 + 88 108 /* FIFO section */ 89 109 90 110 struct tc_fifo_qopt
+1
include/linux/rtnetlink.h
··· 482 482 TCA_RATE, 483 483 TCA_FCNT, 484 484 TCA_STATS2, 485 + TCA_STAB, 485 486 __TCA_MAX 486 487 }; 487 488
+1
include/net/pkt_sched.h
··· 83 83 extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, 84 84 struct nlattr *tab); 85 85 extern void qdisc_put_rtab(struct qdisc_rate_table *tab); 86 + extern void qdisc_put_stab(struct qdisc_size_table *tab); 86 87 87 88 extern void __qdisc_run(struct Qdisc *q); 88 89
+24 -1
include/net/sch_generic.h
··· 29 29 __QDISC_STATE_SCHED, 30 30 }; 31 31 32 + struct qdisc_size_table { 33 + struct list_head list; 34 + struct tc_sizespec szopts; 35 + int refcnt; 36 + u16 data[]; 37 + }; 38 + 32 39 struct Qdisc 33 40 { 34 41 int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); ··· 46 39 #define TCQ_F_INGRESS 4 47 40 int padded; 48 41 struct Qdisc_ops *ops; 42 + struct qdisc_size_table *stab; 49 43 u32 handle; 50 44 u32 parent; 51 45 atomic_t refcnt; ··· 173 165 struct tcf_proto_ops *ops; 174 166 }; 175 167 168 + struct qdisc_skb_cb { 169 + unsigned int pkt_len; 170 + char data[]; 171 + }; 172 + 173 + static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb) 174 + { 175 + return (struct qdisc_skb_cb *)skb->cb; 176 + } 177 + 176 178 static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc) 177 179 { 178 180 return &qdisc->q.lock; ··· 275 257 extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, 276 258 struct netdev_queue *dev_queue, 277 259 struct Qdisc_ops *ops, u32 parentid); 260 + extern void qdisc_calculate_pkt_len(struct sk_buff *skb, 261 + struct qdisc_size_table *stab); 278 262 extern void tcf_destroy(struct tcf_proto *tp); 279 263 extern void tcf_destroy_chain(struct tcf_proto **fl); 280 264 ··· 328 308 329 309 static inline unsigned int qdisc_pkt_len(struct sk_buff *skb) 330 310 { 331 - return skb->len; 311 + return qdisc_skb_cb(skb)->pkt_len; 332 312 } 333 313 334 314 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 335 315 { 316 + if (sch->stab) 317 + qdisc_calculate_pkt_len(skb, sch->stab); 336 318 return sch->enqueue(skb, sch); 337 319 } 338 320 339 321 static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) 340 322 { 323 + qdisc_skb_cb(skb)->pkt_len = skb->len; 341 324 return qdisc_enqueue(skb, sch); 342 325 } 343 326
+149 -2
net/sched/sch_api.c
··· 286 286 } 287 287 EXPORT_SYMBOL(qdisc_put_rtab); 288 288 289 + static LIST_HEAD(qdisc_stab_list); 290 + static DEFINE_SPINLOCK(qdisc_stab_lock); 291 + 292 + static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { 293 + [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, 294 + [TCA_STAB_DATA] = { .type = NLA_BINARY }, 295 + }; 296 + 297 + static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) 298 + { 299 + struct nlattr *tb[TCA_STAB_MAX + 1]; 300 + struct qdisc_size_table *stab; 301 + struct tc_sizespec *s; 302 + unsigned int tsize = 0; 303 + u16 *tab = NULL; 304 + int err; 305 + 306 + err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy); 307 + if (err < 0) 308 + return ERR_PTR(err); 309 + if (!tb[TCA_STAB_BASE]) 310 + return ERR_PTR(-EINVAL); 311 + 312 + s = nla_data(tb[TCA_STAB_BASE]); 313 + 314 + if (s->tsize > 0) { 315 + if (!tb[TCA_STAB_DATA]) 316 + return ERR_PTR(-EINVAL); 317 + tab = nla_data(tb[TCA_STAB_DATA]); 318 + tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 319 + } 320 + 321 + if (!s || tsize != s->tsize || (!tab && tsize > 0)) 322 + return ERR_PTR(-EINVAL); 323 + 324 + spin_lock(&qdisc_stab_lock); 325 + 326 + list_for_each_entry(stab, &qdisc_stab_list, list) { 327 + if (memcmp(&stab->szopts, s, sizeof(*s))) 328 + continue; 329 + if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) 330 + continue; 331 + stab->refcnt++; 332 + spin_unlock(&qdisc_stab_lock); 333 + return stab; 334 + } 335 + 336 + spin_unlock(&qdisc_stab_lock); 337 + 338 + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); 339 + if (!stab) 340 + return ERR_PTR(-ENOMEM); 341 + 342 + stab->refcnt = 1; 343 + stab->szopts = *s; 344 + if (tsize > 0) 345 + memcpy(stab->data, tab, tsize * sizeof(u16)); 346 + 347 + spin_lock(&qdisc_stab_lock); 348 + list_add_tail(&stab->list, &qdisc_stab_list); 349 + spin_unlock(&qdisc_stab_lock); 350 + 351 + return stab; 352 + } 353 + 354 + void qdisc_put_stab(struct qdisc_size_table *tab) 355 + { 356 + if (!tab) 357 + return; 358 + 359 + spin_lock(&qdisc_stab_lock); 360 + 361 + if (--tab->refcnt == 0) { 362 + list_del(&tab->list); 363 + kfree(tab); 364 + } 365 + 366 + spin_unlock(&qdisc_stab_lock); 367 + } 368 + EXPORT_SYMBOL(qdisc_put_stab); 369 + 370 + static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) 371 + { 372 + struct nlattr *nest; 373 + 374 + nest = nla_nest_start(skb, TCA_STAB); 375 + NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); 376 + nla_nest_end(skb, nest); 377 + 378 + return skb->len; 379 + 380 + nla_put_failure: 381 + return -1; 382 + } 383 + 384 + void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) 385 + { 386 + int pkt_len, slot; 387 + 388 + pkt_len = skb->len + stab->szopts.overhead; 389 + if (unlikely(!stab->szopts.tsize)) 390 + goto out; 391 + 392 + slot = pkt_len + stab->szopts.cell_align; 393 + if (unlikely(slot < 0)) 394 + slot = 0; 395 + 396 + slot >>= stab->szopts.cell_log; 397 + if (likely(slot < stab->szopts.tsize)) 398 + pkt_len = stab->data[slot]; 399 + else 400 + pkt_len = stab->data[stab->szopts.tsize - 1] * 401 + (slot / stab->szopts.tsize) + 402 + stab->data[slot % stab->szopts.tsize]; 403 + 404 + pkt_len <<= stab->szopts.size_log; 405 + out: 406 + if (unlikely(pkt_len < 1)) 407 + pkt_len = 1; 408 + qdisc_skb_cb(skb)->pkt_len = pkt_len; 409 + } 410 + EXPORT_SYMBOL(qdisc_calculate_pkt_len); 411 + 289 412 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 290 413 { 291 414 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, ··· 736 613 struct nlattr *kind = tca[TCA_KIND]; 737 614 struct Qdisc *sch; 738 615 struct Qdisc_ops *ops; 616 + struct qdisc_size_table *stab; 739 617 740 618 ops = qdisc_lookup_ops(kind); 741 619 #ifdef CONFIG_KMOD ··· 794 670 sch->handle = handle; 795 671 796 672 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { 673 + if (tca[TCA_STAB]) { 674 + stab = qdisc_get_stab(tca[TCA_STAB]); 675 + if (IS_ERR(stab)) { 676 + err = PTR_ERR(stab); 677 + goto err_out3; 678 + } 679 + sch->stab = stab; 680 + } 797 681 if (tca[TCA_RATE]) { 798 682 err = gen_new_estimator(&sch->bstats, &sch->rate_est, 799 683 qdisc_root_lock(sch), ··· 823 691 return sch; 824 692 } 825 693 err_out3: 694 + qdisc_put_stab(sch->stab); 826 695 dev_put(dev); 827 696 kfree((char *) sch - sch->padded); 828 697 err_out2: ··· 835 702 836 703 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) 837 704 { 838 - if (tca[TCA_OPTIONS]) { 839 - int err; 705 + struct qdisc_size_table *stab = NULL; 706 + int err = 0; 840 707 708 + if (tca[TCA_OPTIONS]) { 841 709 if (sch->ops->change == NULL) 842 710 return -EINVAL; 843 711 err = sch->ops->change(sch, tca[TCA_OPTIONS]); 844 712 if (err) 845 713 return err; 846 714 } 715 + 716 + if (tca[TCA_STAB]) { 717 + stab = qdisc_get_stab(tca[TCA_STAB]); 718 + if (IS_ERR(stab)) 719 + return PTR_ERR(stab); 720 + } 721 + 722 + qdisc_put_stab(sch->stab); 723 + sch->stab = stab; 724 + 847 725 if (tca[TCA_RATE]) 848 726 gen_replace_estimator(&sch->bstats, &sch->rate_est, 849 727 qdisc_root_lock(sch), tca[TCA_RATE]); ··· 1137 993 if (q->ops->dump && q->ops->dump(q, skb) < 0) 1138 994 goto nla_put_failure; 1139 995 q->qstats.qlen = q->q.qlen; 996 + 997 + if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) 998 + goto nla_put_failure; 1140 999 1141 1000 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 1142 1001 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
+1
net/sched/sch_generic.c
··· 469 469 struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); 470 470 const struct Qdisc_ops *ops = qdisc->ops; 471 471 472 + qdisc_put_stab(qdisc->stab); 472 473 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); 473 474 if (ops->reset) 474 475 ops->reset(qdisc);
+3 -2
net/sched/sch_netem.c
··· 84 84 85 85 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 86 86 { 87 - BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct netem_skb_cb)); 88 - return (struct netem_skb_cb *)skb->cb; 87 + BUILD_BUG_ON(sizeof(skb->cb) < 88 + sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); 89 + return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; 89 90 } 90 91 91 92 /* init_crandom - initialize correlated random number generator