Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.20-rc4 1831 lines 44 kB view raw
1/* 2 * Copyright (c) 2003 Patrick McHardy, <kaber@trash.net> 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 2 7 * of the License, or (at your option) any later version. 8 * 9 * 2003-10-17 - Ported from altq 10 */ 11/* 12 * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. 13 * 14 * Permission to use, copy, modify, and distribute this software and 15 * its documentation is hereby granted (including for commercial or 16 * for-profit use), provided that both the copyright notice and this 17 * permission notice appear in all copies of the software, derivative 18 * works, or modified versions, and any portions thereof. 19 * 20 * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF 21 * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS 22 * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED 23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 25 * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 28 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 32 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 33 * DAMAGE. 34 * 35 * Carnegie Mellon encourages (but does not require) users of this 36 * software to return any improvements or extensions that they make, 37 * and to grant Carnegie Mellon the rights to redistribute these 38 * changes without encumbrance. 39 */ 40/* 41 * H-FSC is described in Proceedings of SIGCOMM'97, 42 * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, 43 * Real-Time and Priority Service" 44 * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. 45 * 46 * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing. 47 * when a class has an upperlimit, the fit-time is computed from the 48 * upperlimit service curve. the link-sharing scheduler does not schedule 49 * a class whose fit-time exceeds the current time. 50 */ 51 52#include <linux/kernel.h> 53#include <linux/module.h> 54#include <linux/types.h> 55#include <linux/errno.h> 56#include <linux/jiffies.h> 57#include <linux/compiler.h> 58#include <linux/spinlock.h> 59#include <linux/skbuff.h> 60#include <linux/string.h> 61#include <linux/slab.h> 62#include <linux/timer.h> 63#include <linux/list.h> 64#include <linux/rbtree.h> 65#include <linux/init.h> 66#include <linux/netdevice.h> 67#include <linux/rtnetlink.h> 68#include <linux/pkt_sched.h> 69#include <net/pkt_sched.h> 70#include <net/pkt_cls.h> 71#include <asm/system.h> 72#include <asm/div64.h> 73 74#define HFSC_DEBUG 1 75 76/* 77 * kernel internal service curve representation: 78 * coordinates are given by 64 bit unsigned integers. 79 * x-axis: unit is clock count. 80 * y-axis: unit is byte. 81 * 82 * The service curve parameters are converted to the internal 83 * representation. The slope values are scaled to avoid overflow. 84 * the inverse slope values as well as the y-projection of the 1st 85 * segment are kept in order to to avoid 64-bit divide operations 86 * that are expensive on 32-bit architectures. 87 */ 88 89struct internal_sc 90{ 91 u64 sm1; /* scaled slope of the 1st segment */ 92 u64 ism1; /* scaled inverse-slope of the 1st segment */ 93 u64 dx; /* the x-projection of the 1st segment */ 94 u64 dy; /* the y-projection of the 1st segment */ 95 u64 sm2; /* scaled slope of the 2nd segment */ 96 u64 ism2; /* scaled inverse-slope of the 2nd segment */ 97}; 98 99/* runtime service curve */ 100struct runtime_sc 101{ 102 u64 x; /* current starting position on x-axis */ 103 u64 y; /* current starting position on y-axis */ 104 u64 sm1; /* scaled slope of the 1st segment */ 105 u64 ism1; /* scaled inverse-slope of the 1st segment */ 106 u64 dx; /* the x-projection of the 1st segment */ 107 u64 dy; /* the y-projection of the 1st segment */ 108 u64 sm2; /* scaled slope of the 2nd segment */ 109 u64 ism2; /* scaled inverse-slope of the 2nd segment */ 110}; 111 112enum hfsc_class_flags 113{ 114 HFSC_RSC = 0x1, 115 HFSC_FSC = 0x2, 116 HFSC_USC = 0x4 117}; 118 119struct hfsc_class 120{ 121 u32 classid; /* class id */ 122 unsigned int refcnt; /* usage count */ 123 124 struct gnet_stats_basic bstats; 125 struct gnet_stats_queue qstats; 126 struct gnet_stats_rate_est rate_est; 127 spinlock_t *stats_lock; 128 unsigned int level; /* class level in hierarchy */ 129 struct tcf_proto *filter_list; /* filter list */ 130 unsigned int filter_cnt; /* filter count */ 131 132 struct hfsc_sched *sched; /* scheduler data */ 133 struct hfsc_class *cl_parent; /* parent class */ 134 struct list_head siblings; /* sibling classes */ 135 struct list_head children; /* child classes */ 136 struct Qdisc *qdisc; /* leaf qdisc */ 137 138 struct rb_node el_node; /* qdisc's eligible tree member */ 139 struct rb_root vt_tree; /* active children sorted by cl_vt */ 140 struct rb_node vt_node; /* parent's vt_tree member */ 141 struct rb_root cf_tree; /* active children sorted by cl_f */ 142 struct rb_node cf_node; /* parent's cf_heap member */ 143 struct list_head hlist; /* hash list member */ 144 struct list_head dlist; /* drop list member */ 145 146 u64 cl_total; /* total work in bytes */ 147 u64 cl_cumul; /* cumulative work in bytes done by 148 real-time criteria */ 149 150 u64 cl_d; /* deadline*/ 151 u64 cl_e; /* eligible time */ 152 u64 cl_vt; /* virtual time */ 153 u64 cl_f; /* time when this class will fit for 154 link-sharing, max(myf, cfmin) */ 155 u64 cl_myf; /* my fit-time (calculated from this 156 class's own upperlimit curve) */ 157 u64 cl_myfadj; /* my fit-time adjustment (to cancel 158 history dependence) */ 159 u64 cl_cfmin; /* earliest children's fit-time (used 160 with cl_myf to obtain cl_f) */ 161 u64 cl_cvtmin; /* minimal virtual time among the 162 children fit for link-sharing 163 (monotonic within a period) */ 164 u64 cl_vtadj; /* intra-period cumulative vt 165 adjustment */ 166 u64 cl_vtoff; /* inter-period cumulative vt offset */ 167 u64 cl_cvtmax; /* max child's vt in the last period */ 168 u64 cl_cvtoff; /* cumulative cvtmax of all periods */ 169 u64 cl_pcvtoff; /* parent's cvtoff at initalization 170 time */ 171 172 struct internal_sc cl_rsc; /* internal real-time service curve */ 173 struct internal_sc cl_fsc; /* internal fair service curve */ 174 struct internal_sc cl_usc; /* internal upperlimit service curve */ 175 struct runtime_sc cl_deadline; /* deadline curve */ 176 struct runtime_sc cl_eligible; /* eligible curve */ 177 struct runtime_sc cl_virtual; /* virtual curve */ 178 struct runtime_sc cl_ulimit; /* upperlimit curve */ 179 180 unsigned long cl_flags; /* which curves are valid */ 181 unsigned long cl_vtperiod; /* vt period sequence number */ 182 unsigned long cl_parentperiod;/* parent's vt period sequence number*/ 183 unsigned long cl_nactive; /* number of active children */ 184}; 185 186#define HFSC_HSIZE 16 187 188struct hfsc_sched 189{ 190 u16 defcls; /* default class id */ 191 struct hfsc_class root; /* root class */ 192 struct list_head clhash[HFSC_HSIZE]; /* class hash */ 193 struct rb_root eligible; /* eligible tree */ 194 struct list_head droplist; /* active leaf class list (for 195 dropping) */ 196 struct sk_buff_head requeue; /* requeued packet */ 197 struct timer_list wd_timer; /* watchdog timer */ 198}; 199 200/* 201 * macros 202 */ 203#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY 204#include <linux/time.h> 205#undef PSCHED_GET_TIME 206#define PSCHED_GET_TIME(stamp) \ 207do { \ 208 struct timeval tv; \ 209 do_gettimeofday(&tv); \ 210 (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \ 211} while (0) 212#endif 213 214#if HFSC_DEBUG 215#define ASSERT(cond) \ 216do { \ 217 if (unlikely(!(cond))) \ 218 printk("assertion %s failed at %s:%i (%s)\n", \ 219 #cond, __FILE__, __LINE__, __FUNCTION__); \ 220} while (0) 221#else 222#define ASSERT(cond) 223#endif /* HFSC_DEBUG */ 224 225#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ 226 227 228/* 229 * eligible tree holds backlogged classes being sorted by their eligible times. 230 * there is one eligible tree per hfsc instance. 231 */ 232 233static void 234eltree_insert(struct hfsc_class *cl) 235{ 236 struct rb_node **p = &cl->sched->eligible.rb_node; 237 struct rb_node *parent = NULL; 238 struct hfsc_class *cl1; 239 240 while (*p != NULL) { 241 parent = *p; 242 cl1 = rb_entry(parent, struct hfsc_class, el_node); 243 if (cl->cl_e >= cl1->cl_e) 244 p = &parent->rb_right; 245 else 246 p = &parent->rb_left; 247 } 248 rb_link_node(&cl->el_node, parent, p); 249 rb_insert_color(&cl->el_node, &cl->sched->eligible); 250} 251 252static inline void 253eltree_remove(struct hfsc_class *cl) 254{ 255 rb_erase(&cl->el_node, &cl->sched->eligible); 256} 257 258static inline void 259eltree_update(struct hfsc_class *cl) 260{ 261 eltree_remove(cl); 262 eltree_insert(cl); 263} 264 265/* find the class with the minimum deadline among the eligible classes */ 266static inline struct hfsc_class * 267eltree_get_mindl(struct hfsc_sched *q, u64 cur_time) 268{ 269 struct hfsc_class *p, *cl = NULL; 270 struct rb_node *n; 271 272 for (n = rb_first(&q->eligible); n != NULL; n = rb_next(n)) { 273 p = rb_entry(n, struct hfsc_class, el_node); 274 if (p->cl_e > cur_time) 275 break; 276 if (cl == NULL || p->cl_d < cl->cl_d) 277 cl = p; 278 } 279 return cl; 280} 281 282/* find the class with minimum eligible time among the eligible classes */ 283static inline struct hfsc_class * 284eltree_get_minel(struct hfsc_sched *q) 285{ 286 struct rb_node *n; 287 288 n = rb_first(&q->eligible); 289 if (n == NULL) 290 return NULL; 291 return rb_entry(n, struct hfsc_class, el_node); 292} 293 294/* 295 * vttree holds holds backlogged child classes being sorted by their virtual 296 * time. each intermediate class has one vttree. 297 */ 298static void 299vttree_insert(struct hfsc_class *cl) 300{ 301 struct rb_node **p = &cl->cl_parent->vt_tree.rb_node; 302 struct rb_node *parent = NULL; 303 struct hfsc_class *cl1; 304 305 while (*p != NULL) { 306 parent = *p; 307 cl1 = rb_entry(parent, struct hfsc_class, vt_node); 308 if (cl->cl_vt >= cl1->cl_vt) 309 p = &parent->rb_right; 310 else 311 p = &parent->rb_left; 312 } 313 rb_link_node(&cl->vt_node, parent, p); 314 rb_insert_color(&cl->vt_node, &cl->cl_parent->vt_tree); 315} 316 317static inline void 318vttree_remove(struct hfsc_class *cl) 319{ 320 rb_erase(&cl->vt_node, &cl->cl_parent->vt_tree); 321} 322 323static inline void 324vttree_update(struct hfsc_class *cl) 325{ 326 vttree_remove(cl); 327 vttree_insert(cl); 328} 329 330static inline struct hfsc_class * 331vttree_firstfit(struct hfsc_class *cl, u64 cur_time) 332{ 333 struct hfsc_class *p; 334 struct rb_node *n; 335 336 for (n = rb_first(&cl->vt_tree); n != NULL; n = rb_next(n)) { 337 p = rb_entry(n, struct hfsc_class, vt_node); 338 if (p->cl_f <= cur_time) 339 return p; 340 } 341 return NULL; 342} 343 344/* 345 * get the leaf class with the minimum vt in the hierarchy 346 */ 347static struct hfsc_class * 348vttree_get_minvt(struct hfsc_class *cl, u64 cur_time) 349{ 350 /* if root-class's cfmin is bigger than cur_time nothing to do */ 351 if (cl->cl_cfmin > cur_time) 352 return NULL; 353 354 while (cl->level > 0) { 355 cl = vttree_firstfit(cl, cur_time); 356 if (cl == NULL) 357 return NULL; 358 /* 359 * update parent's cl_cvtmin. 360 */ 361 if (cl->cl_parent->cl_cvtmin < cl->cl_vt) 362 cl->cl_parent->cl_cvtmin = cl->cl_vt; 363 } 364 return cl; 365} 366 367static void 368cftree_insert(struct hfsc_class *cl) 369{ 370 struct rb_node **p = &cl->cl_parent->cf_tree.rb_node; 371 struct rb_node *parent = NULL; 372 struct hfsc_class *cl1; 373 374 while (*p != NULL) { 375 parent = *p; 376 cl1 = rb_entry(parent, struct hfsc_class, cf_node); 377 if (cl->cl_f >= cl1->cl_f) 378 p = &parent->rb_right; 379 else 380 p = &parent->rb_left; 381 } 382 rb_link_node(&cl->cf_node, parent, p); 383 rb_insert_color(&cl->cf_node, &cl->cl_parent->cf_tree); 384} 385 386static inline void 387cftree_remove(struct hfsc_class *cl) 388{ 389 rb_erase(&cl->cf_node, &cl->cl_parent->cf_tree); 390} 391 392static inline void 393cftree_update(struct hfsc_class *cl) 394{ 395 cftree_remove(cl); 396 cftree_insert(cl); 397} 398 399/* 400 * service curve support functions 401 * 402 * external service curve parameters 403 * m: bps 404 * d: us 405 * internal service curve parameters 406 * sm: (bytes/psched_us) << SM_SHIFT 407 * ism: (psched_us/byte) << ISM_SHIFT 408 * dx: psched_us 409 * 410 * Clock source resolution (CONFIG_NET_SCH_CLK_*) 411 * JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us. 412 * CPU: resolution is between 0.5us and 1us. 413 * GETTIMEOFDAY: resolution is exactly 1us. 414 * 415 * sm and ism are scaled in order to keep effective digits. 416 * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective 417 * digits in decimal using the following table. 418 * 419 * Note: We can afford the additional accuracy (altq hfsc keeps at most 420 * 3 effective digits) thanks to the fact that linux clock is bounded 421 * much more tightly. 422 * 423 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps 424 * ------------+------------------------------------------------------- 425 * bytes/0.5us 6.25e-3 62.5e-3 625e-3 6250e-e 62500e-3 426 * bytes/us 12.5e-3 125e-3 1250e-3 12500e-3 125000e-3 427 * bytes/1.27us 15.875e-3 158.75e-3 1587.5e-3 15875e-3 158750e-3 428 * 429 * 0.5us/byte 160 16 1.6 0.16 0.016 430 * us/byte 80 8 0.8 0.08 0.008 431 * 1.27us/byte 63 6.3 0.63 0.063 0.0063 432 */ 433#define SM_SHIFT 20 434#define ISM_SHIFT 18 435 436#define SM_MASK ((1ULL << SM_SHIFT) - 1) 437#define ISM_MASK ((1ULL << ISM_SHIFT) - 1) 438 439static inline u64 440seg_x2y(u64 x, u64 sm) 441{ 442 u64 y; 443 444 /* 445 * compute 446 * y = x * sm >> SM_SHIFT 447 * but divide it for the upper and lower bits to avoid overflow 448 */ 449 y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); 450 return y; 451} 452 453static inline u64 454seg_y2x(u64 y, u64 ism) 455{ 456 u64 x; 457 458 if (y == 0) 459 x = 0; 460 else if (ism == HT_INFINITY) 461 x = HT_INFINITY; 462 else { 463 x = (y >> ISM_SHIFT) * ism 464 + (((y & ISM_MASK) * ism) >> ISM_SHIFT); 465 } 466 return x; 467} 468 469/* Convert m (bps) into sm (bytes/psched us) */ 470static u64 471m2sm(u32 m) 472{ 473 u64 sm; 474 475 sm = ((u64)m << SM_SHIFT); 476 sm += PSCHED_JIFFIE2US(HZ) - 1; 477 do_div(sm, PSCHED_JIFFIE2US(HZ)); 478 return sm; 479} 480 481/* convert m (bps) into ism (psched us/byte) */ 482static u64 483m2ism(u32 m) 484{ 485 u64 ism; 486 487 if (m == 0) 488 ism = HT_INFINITY; 489 else { 490 ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT); 491 ism += m - 1; 492 do_div(ism, m); 493 } 494 return ism; 495} 496 497/* convert d (us) into dx (psched us) */ 498static u64 499d2dx(u32 d) 500{ 501 u64 dx; 502 503 dx = ((u64)d * PSCHED_JIFFIE2US(HZ)); 504 dx += USEC_PER_SEC - 1; 505 do_div(dx, USEC_PER_SEC); 506 return dx; 507} 508 509/* convert sm (bytes/psched us) into m (bps) */ 510static u32 511sm2m(u64 sm) 512{ 513 u64 m; 514 515 m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT; 516 return (u32)m; 517} 518 519/* convert dx (psched us) into d (us) */ 520static u32 521dx2d(u64 dx) 522{ 523 u64 d; 524 525 d = dx * USEC_PER_SEC; 526 do_div(d, PSCHED_JIFFIE2US(HZ)); 527 return (u32)d; 528} 529 530static void 531sc2isc(struct tc_service_curve *sc, struct internal_sc *isc) 532{ 533 isc->sm1 = m2sm(sc->m1); 534 isc->ism1 = m2ism(sc->m1); 535 isc->dx = d2dx(sc->d); 536 isc->dy = seg_x2y(isc->dx, isc->sm1); 537 isc->sm2 = m2sm(sc->m2); 538 isc->ism2 = m2ism(sc->m2); 539} 540 541/* 542 * initialize the runtime service curve with the given internal 543 * service curve starting at (x, y). 544 */ 545static void 546rtsc_init(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) 547{ 548 rtsc->x = x; 549 rtsc->y = y; 550 rtsc->sm1 = isc->sm1; 551 rtsc->ism1 = isc->ism1; 552 rtsc->dx = isc->dx; 553 rtsc->dy = isc->dy; 554 rtsc->sm2 = isc->sm2; 555 rtsc->ism2 = isc->ism2; 556} 557 558/* 559 * calculate the y-projection of the runtime service curve by the 560 * given x-projection value 561 */ 562static u64 563rtsc_y2x(struct runtime_sc *rtsc, u64 y) 564{ 565 u64 x; 566 567 if (y < rtsc->y) 568 x = rtsc->x; 569 else if (y <= rtsc->y + rtsc->dy) { 570 /* x belongs to the 1st segment */ 571 if (rtsc->dy == 0) 572 x = rtsc->x + rtsc->dx; 573 else 574 x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); 575 } else { 576 /* x belongs to the 2nd segment */ 577 x = rtsc->x + rtsc->dx 578 + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); 579 } 580 return x; 581} 582 583static u64 584rtsc_x2y(struct runtime_sc *rtsc, u64 x) 585{ 586 u64 y; 587 588 if (x <= rtsc->x) 589 y = rtsc->y; 590 else if (x <= rtsc->x + rtsc->dx) 591 /* y belongs to the 1st segment */ 592 y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); 593 else 594 /* y belongs to the 2nd segment */ 595 y = rtsc->y + rtsc->dy 596 + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); 597 return y; 598} 599 600/* 601 * update the runtime service curve by taking the minimum of the current 602 * runtime service curve and the service curve starting at (x, y). 603 */ 604static void 605rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) 606{ 607 u64 y1, y2, dx, dy; 608 u32 dsm; 609 610 if (isc->sm1 <= isc->sm2) { 611 /* service curve is convex */ 612 y1 = rtsc_x2y(rtsc, x); 613 if (y1 < y) 614 /* the current rtsc is smaller */ 615 return; 616 rtsc->x = x; 617 rtsc->y = y; 618 return; 619 } 620 621 /* 622 * service curve is concave 623 * compute the two y values of the current rtsc 624 * y1: at x 625 * y2: at (x + dx) 626 */ 627 y1 = rtsc_x2y(rtsc, x); 628 if (y1 <= y) { 629 /* rtsc is below isc, no change to rtsc */ 630 return; 631 } 632 633 y2 = rtsc_x2y(rtsc, x + isc->dx); 634 if (y2 >= y + isc->dy) { 635 /* rtsc is above isc, replace rtsc by isc */ 636 rtsc->x = x; 637 rtsc->y = y; 638 rtsc->dx = isc->dx; 639 rtsc->dy = isc->dy; 640 return; 641 } 642 643 /* 644 * the two curves intersect 645 * compute the offsets (dx, dy) using the reverse 646 * function of seg_x2y() 647 * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y) 648 */ 649 dx = (y1 - y) << SM_SHIFT; 650 dsm = isc->sm1 - isc->sm2; 651 do_div(dx, dsm); 652 /* 653 * check if (x, y1) belongs to the 1st segment of rtsc. 654 * if so, add the offset. 655 */ 656 if (rtsc->x + rtsc->dx > x) 657 dx += rtsc->x + rtsc->dx - x; 658 dy = seg_x2y(dx, isc->sm1); 659 660 rtsc->x = x; 661 rtsc->y = y; 662 rtsc->dx = dx; 663 rtsc->dy = dy; 664 return; 665} 666 667static void 668init_ed(struct hfsc_class *cl, unsigned int next_len) 669{ 670 u64 cur_time; 671 672 PSCHED_GET_TIME(cur_time); 673 674 /* update the deadline curve */ 675 rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul); 676 677 /* 678 * update the eligible curve. 679 * for concave, it is equal to the deadline curve. 680 * for convex, it is a linear curve with slope m2. 681 */ 682 cl->cl_eligible = cl->cl_deadline; 683 if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) { 684 cl->cl_eligible.dx = 0; 685 cl->cl_eligible.dy = 0; 686 } 687 688 /* compute e and d */ 689 cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); 690 cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 691 692 eltree_insert(cl); 693} 694 695static void 696update_ed(struct hfsc_class *cl, unsigned int next_len) 697{ 698 cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); 699 cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 700 701 eltree_update(cl); 702} 703 704static inline void 705update_d(struct hfsc_class *cl, unsigned int next_len) 706{ 707 cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 708} 709 710static inline void 711update_cfmin(struct hfsc_class *cl) 712{ 713 struct rb_node *n = rb_first(&cl->cf_tree); 714 struct hfsc_class *p; 715 716 if (n == NULL) { 717 cl->cl_cfmin = 0; 718 return; 719 } 720 p = rb_entry(n, struct hfsc_class, cf_node); 721 cl->cl_cfmin = p->cl_f; 722} 723 724static void 725init_vf(struct hfsc_class *cl, unsigned int len) 726{ 727 struct hfsc_class *max_cl; 728 struct rb_node *n; 729 u64 vt, f, cur_time; 730 int go_active; 731 732 cur_time = 0; 733 go_active = 1; 734 for (; cl->cl_parent != NULL; cl = cl->cl_parent) { 735 if (go_active && cl->cl_nactive++ == 0) 736 go_active = 1; 737 else 738 go_active = 0; 739 740 if (go_active) { 741 n = rb_last(&cl->cl_parent->vt_tree); 742 if (n != NULL) { 743 max_cl = rb_entry(n, struct hfsc_class,vt_node); 744 /* 745 * set vt to the average of the min and max 746 * classes. if the parent's period didn't 747 * change, don't decrease vt of the class. 748 */ 749 vt = max_cl->cl_vt; 750 if (cl->cl_parent->cl_cvtmin != 0) 751 vt = (cl->cl_parent->cl_cvtmin + vt)/2; 752 753 if (cl->cl_parent->cl_vtperiod != 754 cl->cl_parentperiod || vt > cl->cl_vt) 755 cl->cl_vt = vt; 756 } else { 757 /* 758 * first child for a new parent backlog period. 759 * add parent's cvtmax to cvtoff to make a new 760 * vt (vtoff + vt) larger than the vt in the 761 * last period for all children. 762 */ 763 vt = cl->cl_parent->cl_cvtmax; 764 cl->cl_parent->cl_cvtoff += vt; 765 cl->cl_parent->cl_cvtmax = 0; 766 cl->cl_parent->cl_cvtmin = 0; 767 cl->cl_vt = 0; 768 } 769 770 cl->cl_vtoff = cl->cl_parent->cl_cvtoff - 771 cl->cl_pcvtoff; 772 773 /* update the virtual curve */ 774 vt = cl->cl_vt + cl->cl_vtoff; 775 rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt, 776 cl->cl_total); 777 if (cl->cl_virtual.x == vt) { 778 cl->cl_virtual.x -= cl->cl_vtoff; 779 cl->cl_vtoff = 0; 780 } 781 cl->cl_vtadj = 0; 782 783 cl->cl_vtperiod++; /* increment vt period */ 784 cl->cl_parentperiod = cl->cl_parent->cl_vtperiod; 785 if (cl->cl_parent->cl_nactive == 0) 786 cl->cl_parentperiod++; 787 cl->cl_f = 0; 788 789 vttree_insert(cl); 790 cftree_insert(cl); 791 792 if (cl->cl_flags & HFSC_USC) { 793 /* class has upper limit curve */ 794 if (cur_time == 0) 795 PSCHED_GET_TIME(cur_time); 796 797 /* update the ulimit curve */ 798 rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time, 799 cl->cl_total); 800 /* compute myf */ 801 cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, 802 cl->cl_total); 803 cl->cl_myfadj = 0; 804 } 805 } 806 807 f = max(cl->cl_myf, cl->cl_cfmin); 808 if (f != cl->cl_f) { 809 cl->cl_f = f; 810 cftree_update(cl); 811 update_cfmin(cl->cl_parent); 812 } 813 } 814} 815 816static void 817update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) 818{ 819 u64 f; /* , myf_bound, delta; */ 820 int go_passive = 0; 821 822 if (cl->qdisc->q.qlen == 0 && cl->cl_flags & HFSC_FSC) 823 go_passive = 1; 824 825 for (; cl->cl_parent != NULL; cl = cl->cl_parent) { 826 cl->cl_total += len; 827 828 if (!(cl->cl_flags & HFSC_FSC) || cl->cl_nactive == 0) 829 continue; 830 831 if (go_passive && --cl->cl_nactive == 0) 832 go_passive = 1; 833 else 834 go_passive = 0; 835 836 if (go_passive) { 837 /* no more active child, going passive */ 838 839 /* update cvtmax of the parent class */ 840 if (cl->cl_vt > cl->cl_parent->cl_cvtmax) 841 cl->cl_parent->cl_cvtmax = cl->cl_vt; 842 843 /* remove this class from the vt tree */ 844 vttree_remove(cl); 845 846 cftree_remove(cl); 847 update_cfmin(cl->cl_parent); 848 849 continue; 850 } 851 852 /* 853 * update vt and f 854 */ 855 cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) 856 - cl->cl_vtoff + cl->cl_vtadj; 857 858 /* 859 * if vt of the class is smaller than cvtmin, 860 * the class was skipped in the past due to non-fit. 861 * if so, we need to adjust vtadj. 862 */ 863 if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { 864 cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; 865 cl->cl_vt = cl->cl_parent->cl_cvtmin; 866 } 867 868 /* update the vt tree */ 869 vttree_update(cl); 870 871 if (cl->cl_flags & HFSC_USC) { 872 cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, 873 cl->cl_total); 874#if 0 875 /* 876 * This code causes classes to stay way under their 877 * limit when multiple classes are used at gigabit 878 * speed. needs investigation. -kaber 879 */ 880 /* 881 * if myf lags behind by more than one clock tick 882 * from the current time, adjust myfadj to prevent 883 * a rate-limited class from going greedy. 884 * in a steady state under rate-limiting, myf 885 * fluctuates within one clock tick. 886 */ 887 myf_bound = cur_time - PSCHED_JIFFIE2US(1); 888 if (cl->cl_myf < myf_bound) { 889 delta = cur_time - cl->cl_myf; 890 cl->cl_myfadj += delta; 891 cl->cl_myf += delta; 892 } 893#endif 894 } 895 896 f = max(cl->cl_myf, cl->cl_cfmin); 897 if (f != cl->cl_f) { 898 cl->cl_f = f; 899 cftree_update(cl); 900 update_cfmin(cl->cl_parent); 901 } 902 } 903} 904 905static void 906set_active(struct hfsc_class *cl, unsigned int len) 907{ 908 if (cl->cl_flags & HFSC_RSC) 909 init_ed(cl, len); 910 if (cl->cl_flags & HFSC_FSC) 911 init_vf(cl, len); 912 913 list_add_tail(&cl->dlist, &cl->sched->droplist); 914} 915 916static void 917set_passive(struct hfsc_class *cl) 918{ 919 if (cl->cl_flags & HFSC_RSC) 920 eltree_remove(cl); 921 922 list_del(&cl->dlist); 923 924 /* 925 * vttree is now handled in update_vf() so that update_vf(cl, 0, 0) 926 * needs to be called explicitly to remove a class from vttree. 927 */ 928} 929 930/* 931 * hack to get length of first packet in queue. 932 */ 933static unsigned int 934qdisc_peek_len(struct Qdisc *sch) 935{ 936 struct sk_buff *skb; 937 unsigned int len; 938 939 skb = sch->dequeue(sch); 940 if (skb == NULL) { 941 if (net_ratelimit()) 942 printk("qdisc_peek_len: non work-conserving qdisc ?\n"); 943 return 0; 944 } 945 len = skb->len; 946 if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) { 947 if (net_ratelimit()) 948 printk("qdisc_peek_len: failed to requeue\n"); 949 qdisc_tree_decrease_qlen(sch, 1); 950 return 0; 951 } 952 return len; 953} 954 955static void 956hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) 957{ 958 unsigned int len = cl->qdisc->q.qlen; 959 960 qdisc_reset(cl->qdisc); 961 qdisc_tree_decrease_qlen(cl->qdisc, len); 962} 963 964static void 965hfsc_adjust_levels(struct hfsc_class *cl) 966{ 967 struct hfsc_class *p; 968 unsigned int level; 969 970 do { 971 level = 0; 972 list_for_each_entry(p, &cl->children, siblings) { 973 if (p->level >= level) 974 level = p->level + 1; 975 } 976 cl->level = level; 977 } while ((cl = cl->cl_parent) != NULL); 978} 979 980static inline unsigned int 981hfsc_hash(u32 h) 982{ 983 h ^= h >> 8; 984 h ^= h >> 4; 985 986 return h & (HFSC_HSIZE - 1); 987} 988 989static inline struct hfsc_class * 990hfsc_find_class(u32 classid, struct Qdisc *sch) 991{ 992 struct hfsc_sched *q = qdisc_priv(sch); 993 struct hfsc_class *cl; 994 995 list_for_each_entry(cl, &q->clhash[hfsc_hash(classid)], hlist) { 996 if (cl->classid == classid) 997 return cl; 998 } 999 return NULL; 1000} 1001 1002static void 1003hfsc_change_rsc(struct hfsc_class *cl, struct tc_service_curve *rsc, 1004 u64 cur_time) 1005{ 1006 sc2isc(rsc, &cl->cl_rsc); 1007 rtsc_init(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul); 1008 cl->cl_eligible = cl->cl_deadline; 1009 if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) { 1010 cl->cl_eligible.dx = 0; 1011 cl->cl_eligible.dy = 0; 1012 } 1013 cl->cl_flags |= HFSC_RSC; 1014} 1015 1016static void 1017hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc) 1018{ 1019 sc2isc(fsc, &cl->cl_fsc); 1020 rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); 1021 cl->cl_flags |= HFSC_FSC; 1022} 1023 1024static void 1025hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc, 1026 u64 cur_time) 1027{ 1028 sc2isc(usc, &cl->cl_usc); 1029 rtsc_init(&cl->cl_ulimit, &cl->cl_usc, cur_time, cl->cl_total); 1030 cl->cl_flags |= HFSC_USC; 1031} 1032 1033static int 1034hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 1035 struct rtattr **tca, unsigned long *arg) 1036{ 1037 struct hfsc_sched *q = qdisc_priv(sch); 1038 struct hfsc_class *cl = (struct hfsc_class *)*arg; 1039 struct hfsc_class *parent = NULL; 1040 struct rtattr *opt = tca[TCA_OPTIONS-1]; 1041 struct rtattr *tb[TCA_HFSC_MAX]; 1042 struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL; 1043 u64 cur_time; 1044 1045 if (opt == NULL || rtattr_parse_nested(tb, TCA_HFSC_MAX, opt)) 1046 return -EINVAL; 1047 1048 if (tb[TCA_HFSC_RSC-1]) { 1049 if (RTA_PAYLOAD(tb[TCA_HFSC_RSC-1]) < sizeof(*rsc)) 1050 return -EINVAL; 1051 rsc = RTA_DATA(tb[TCA_HFSC_RSC-1]); 1052 if (rsc->m1 == 0 && rsc->m2 == 0) 1053 rsc = NULL; 1054 } 1055 1056 if (tb[TCA_HFSC_FSC-1]) { 1057 if (RTA_PAYLOAD(tb[TCA_HFSC_FSC-1]) < sizeof(*fsc)) 1058 return -EINVAL; 1059 fsc = RTA_DATA(tb[TCA_HFSC_FSC-1]); 1060 if (fsc->m1 == 0 && fsc->m2 == 0) 1061 fsc = NULL; 1062 } 1063 1064 if (tb[TCA_HFSC_USC-1]) { 1065 if (RTA_PAYLOAD(tb[TCA_HFSC_USC-1]) < sizeof(*usc)) 1066 return -EINVAL; 1067 usc = RTA_DATA(tb[TCA_HFSC_USC-1]); 1068 if (usc->m1 == 0 && usc->m2 == 0) 1069 usc = NULL; 1070 } 1071 1072 if (cl != NULL) { 1073 if (parentid) { 1074 if (cl->cl_parent && cl->cl_parent->classid != parentid) 1075 return -EINVAL; 1076 if (cl->cl_parent == NULL && parentid != TC_H_ROOT) 1077 return -EINVAL; 1078 } 1079 PSCHED_GET_TIME(cur_time); 1080 1081 sch_tree_lock(sch); 1082 if (rsc != NULL) 1083 hfsc_change_rsc(cl, rsc, cur_time); 1084 if (fsc != NULL) 1085 hfsc_change_fsc(cl, fsc); 1086 if (usc != NULL) 1087 hfsc_change_usc(cl, usc, cur_time); 1088 1089 if (cl->qdisc->q.qlen != 0) { 1090 if (cl->cl_flags & HFSC_RSC) 1091 update_ed(cl, qdisc_peek_len(cl->qdisc)); 1092 if (cl->cl_flags & HFSC_FSC) 1093 update_vf(cl, 0, cur_time); 1094 } 1095 sch_tree_unlock(sch); 1096 1097#ifdef CONFIG_NET_ESTIMATOR 1098 if (tca[TCA_RATE-1]) 1099 gen_replace_estimator(&cl->bstats, &cl->rate_est, 1100 cl->stats_lock, tca[TCA_RATE-1]); 1101#endif 1102 return 0; 1103 } 1104 1105 if (parentid == TC_H_ROOT) 1106 return -EEXIST; 1107 1108 parent = &q->root; 1109 if (parentid) { 1110 parent = hfsc_find_class(parentid, sch); 1111 if (parent == NULL) 1112 return -ENOENT; 1113 } 1114 1115 if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0) 1116 return -EINVAL; 1117 if (hfsc_find_class(classid, sch)) 1118 return -EEXIST; 1119 1120 if (rsc == NULL && fsc == NULL) 1121 return -EINVAL; 1122 1123 cl = kzalloc(sizeof(struct hfsc_class), GFP_KERNEL); 1124 if (cl == NULL) 1125 return -ENOBUFS; 1126 1127 if (rsc != NULL) 1128 hfsc_change_rsc(cl, rsc, 0); 1129 if (fsc != NULL) 1130 hfsc_change_fsc(cl, fsc); 1131 if (usc != NULL) 1132 hfsc_change_usc(cl, usc, 0); 1133 1134 cl->refcnt = 1; 1135 cl->classid = classid; 1136 cl->sched = q; 1137 cl->cl_parent = parent; 1138 cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); 1139 if (cl->qdisc == NULL) 1140 cl->qdisc = &noop_qdisc; 1141 cl->stats_lock = &sch->dev->queue_lock; 1142 INIT_LIST_HEAD(&cl->children); 1143 cl->vt_tree = RB_ROOT; 1144 cl->cf_tree = RB_ROOT; 1145 1146 sch_tree_lock(sch); 1147 list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]); 1148 list_add_tail(&cl->siblings, &parent->children); 1149 if (parent->level == 0) 1150 hfsc_purge_queue(sch, parent); 1151 hfsc_adjust_levels(parent); 1152 cl->cl_pcvtoff = parent->cl_cvtoff; 1153 sch_tree_unlock(sch); 1154 1155#ifdef CONFIG_NET_ESTIMATOR 1156 if (tca[TCA_RATE-1]) 1157 gen_new_estimator(&cl->bstats, &cl->rate_est, 1158 cl->stats_lock, tca[TCA_RATE-1]); 1159#endif 1160 *arg = (unsigned long)cl; 1161 return 0; 1162} 1163 1164static void 1165hfsc_destroy_filters(struct tcf_proto **fl) 1166{ 1167 struct tcf_proto *tp; 1168 1169 while ((tp = *fl) != NULL) { 1170 *fl = tp->next; 1171 tcf_destroy(tp); 1172 } 1173} 1174 1175static void 1176hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) 1177{ 1178 struct hfsc_sched *q = qdisc_priv(sch); 1179 1180 hfsc_destroy_filters(&cl->filter_list); 1181 qdisc_destroy(cl->qdisc); 1182#ifdef CONFIG_NET_ESTIMATOR 1183 gen_kill_estimator(&cl->bstats, &cl->rate_est); 1184#endif 1185 if (cl != &q->root) 1186 kfree(cl); 1187} 1188 1189static int 1190hfsc_delete_class(struct Qdisc *sch, unsigned long arg) 1191{ 1192 struct hfsc_sched *q = qdisc_priv(sch); 1193 struct hfsc_class *cl = (struct hfsc_class *)arg; 1194 1195 if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root) 1196 return -EBUSY; 1197 1198 sch_tree_lock(sch); 1199 1200 list_del(&cl->hlist); 1201 list_del(&cl->siblings); 1202 hfsc_adjust_levels(cl->cl_parent); 1203 hfsc_purge_queue(sch, cl); 1204 if (--cl->refcnt == 0) 1205 hfsc_destroy_class(sch, cl); 1206 1207 sch_tree_unlock(sch); 1208 return 0; 1209} 1210 1211static struct hfsc_class * 1212hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) 1213{ 1214 struct hfsc_sched *q = qdisc_priv(sch); 1215 struct hfsc_class *cl; 1216 struct tcf_result res; 1217 struct tcf_proto *tcf; 1218 int result; 1219 1220 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0 && 1221 (cl = hfsc_find_class(skb->priority, sch)) != NULL) 1222 if (cl->level == 0) 1223 return cl; 1224 1225 *qerr = NET_XMIT_BYPASS; 1226 tcf = q->root.filter_list; 1227 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { 1228#ifdef CONFIG_NET_CLS_ACT 1229 switch (result) { 1230 case TC_ACT_QUEUED: 1231 case TC_ACT_STOLEN: 1232 *qerr = NET_XMIT_SUCCESS; 1233 case TC_ACT_SHOT: 1234 return NULL; 1235 } 1236#elif defined(CONFIG_NET_CLS_POLICE) 1237 if (result == TC_POLICE_SHOT) 1238 return NULL; 1239#endif 1240 if ((cl = (struct hfsc_class *)res.class) == NULL) { 1241 if ((cl = hfsc_find_class(res.classid, sch)) == NULL) 1242 break; /* filter selected invalid classid */ 1243 } 1244 1245 if (cl->level == 0) 1246 return cl; /* hit leaf class */ 1247 1248 /* apply inner filter chain */ 1249 tcf = cl->filter_list; 1250 } 1251 1252 /* classification failed, try default class */ 1253 cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); 1254 if (cl == NULL || cl->level > 0) 1255 return NULL; 1256 1257 return cl; 1258} 1259 1260static int 1261hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1262 struct Qdisc **old) 1263{ 1264 struct hfsc_class *cl = (struct hfsc_class *)arg; 1265 1266 if (cl == NULL) 1267 return -ENOENT; 1268 if (cl->level > 0) 1269 return -EINVAL; 1270 if (new == NULL) { 1271 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, 1272 cl->classid); 1273 if (new == NULL) 1274 new = &noop_qdisc; 1275 } 1276 1277 sch_tree_lock(sch); 1278 hfsc_purge_queue(sch, cl); 1279 *old = xchg(&cl->qdisc, new); 1280 sch_tree_unlock(sch); 1281 return 0; 1282} 1283 1284static struct Qdisc * 1285hfsc_class_leaf(struct Qdisc *sch, unsigned long arg) 1286{ 1287 struct hfsc_class *cl = (struct hfsc_class *)arg; 1288 1289 if (cl != NULL && cl->level == 0) 1290 return cl->qdisc; 1291 1292 return NULL; 1293} 1294 1295static void 1296hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) 1297{ 1298 struct hfsc_class *cl = (struct hfsc_class *)arg; 1299 1300 if (cl->qdisc->q.qlen == 0) { 1301 update_vf(cl, 0, 0); 1302 set_passive(cl); 1303 } 1304} 1305 1306static unsigned long 1307hfsc_get_class(struct Qdisc *sch, u32 classid) 1308{ 1309 struct hfsc_class *cl = hfsc_find_class(classid, sch); 1310 1311 if (cl != NULL) 1312 cl->refcnt++; 1313 1314 return (unsigned long)cl; 1315} 1316 1317static void 1318hfsc_put_class(struct Qdisc *sch, unsigned long arg) 1319{ 1320 struct hfsc_class *cl = (struct hfsc_class *)arg; 1321 1322 if (--cl->refcnt == 0) 1323 hfsc_destroy_class(sch, cl); 1324} 1325 1326static unsigned long 1327hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid) 1328{ 1329 struct hfsc_class *p = (struct hfsc_class *)parent; 1330 struct hfsc_class *cl = hfsc_find_class(classid, sch); 1331 1332 if (cl != NULL) { 1333 if (p != NULL && p->level <= cl->level) 1334 return 0; 1335 cl->filter_cnt++; 1336 } 1337 1338 return (unsigned long)cl; 1339} 1340 1341static void 1342hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg) 1343{ 1344 struct hfsc_class *cl = (struct hfsc_class *)arg; 1345 1346 cl->filter_cnt--; 1347} 1348 1349static struct tcf_proto ** 1350hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg) 1351{ 1352 struct hfsc_sched *q = qdisc_priv(sch); 1353 struct hfsc_class *cl = (struct hfsc_class *)arg; 1354 1355 if (cl == NULL) 1356 cl = &q->root; 1357 1358 return &cl->filter_list; 1359} 1360 1361static int 1362hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc) 1363{ 1364 struct tc_service_curve tsc; 1365 1366 tsc.m1 = sm2m(sc->sm1); 1367 tsc.d = dx2d(sc->dx); 1368 tsc.m2 = sm2m(sc->sm2); 1369 RTA_PUT(skb, attr, sizeof(tsc), &tsc); 1370 1371 return skb->len; 1372 1373 rtattr_failure: 1374 return -1; 1375} 1376 1377static inline int 1378hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) 1379{ 1380 if ((cl->cl_flags & HFSC_RSC) && 1381 (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0)) 1382 goto rtattr_failure; 1383 1384 if ((cl->cl_flags & HFSC_FSC) && 1385 (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0)) 1386 goto rtattr_failure; 1387 1388 if ((cl->cl_flags & HFSC_USC) && 1389 (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0)) 1390 goto rtattr_failure; 1391 1392 return skb->len; 1393 1394 rtattr_failure: 1395 return -1; 1396} 1397 1398static int 1399hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, 1400 struct tcmsg *tcm) 1401{ 1402 struct hfsc_class *cl = (struct hfsc_class *)arg; 1403 unsigned char *b = skb->tail; 1404 struct rtattr *rta = (struct rtattr *)b; 1405 1406 tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT; 1407 tcm->tcm_handle = cl->classid; 1408 if (cl->level == 0) 1409 tcm->tcm_info = cl->qdisc->handle; 1410 1411 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 1412 if (hfsc_dump_curves(skb, cl) < 0) 1413 goto rtattr_failure; 1414 rta->rta_len = skb->tail - b; 1415 return skb->len; 1416 1417 rtattr_failure: 1418 skb_trim(skb, b - skb->data); 1419 return -1; 1420} 1421 1422static int 1423hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, 1424 struct gnet_dump *d) 1425{ 1426 struct hfsc_class *cl = (struct hfsc_class *)arg; 1427 struct tc_hfsc_stats xstats; 1428 1429 cl->qstats.qlen = cl->qdisc->q.qlen; 1430 xstats.level = cl->level; 1431 xstats.period = cl->cl_vtperiod; 1432 xstats.work = cl->cl_total; 1433 xstats.rtwork = cl->cl_cumul; 1434 1435 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || 1436#ifdef CONFIG_NET_ESTIMATOR 1437 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || 1438#endif 1439 gnet_stats_copy_queue(d, &cl->qstats) < 0) 1440 return -1; 1441 1442 return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); 1443} 1444 1445 1446 1447static void 1448hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) 1449{ 1450 struct hfsc_sched *q = qdisc_priv(sch); 1451 struct hfsc_class *cl; 1452 unsigned int i; 1453 1454 if (arg->stop) 1455 return; 1456 1457 for (i = 0; i < HFSC_HSIZE; i++) { 1458 list_for_each_entry(cl, &q->clhash[i], hlist) { 1459 if (arg->count < arg->skip) { 1460 arg->count++; 1461 continue; 1462 } 1463 if (arg->fn(sch, (unsigned long)cl, arg) < 0) { 1464 arg->stop = 1; 1465 return; 1466 } 1467 arg->count++; 1468 } 1469 } 1470} 1471 1472static void 1473hfsc_watchdog(unsigned long arg) 1474{ 1475 struct Qdisc *sch = (struct Qdisc *)arg; 1476 1477 sch->flags &= ~TCQ_F_THROTTLED; 1478 netif_schedule(sch->dev); 1479} 1480 1481static void 1482hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time) 1483{ 1484 struct hfsc_sched *q = qdisc_priv(sch); 1485 struct hfsc_class *cl; 1486 u64 next_time = 0; 1487 long delay; 1488 1489 if ((cl = eltree_get_minel(q)) != NULL) 1490 next_time = cl->cl_e; 1491 if (q->root.cl_cfmin != 0) { 1492 if (next_time == 0 || next_time > q->root.cl_cfmin) 1493 next_time = q->root.cl_cfmin; 1494 } 1495 ASSERT(next_time != 0); 1496 delay = next_time - cur_time; 1497 delay = PSCHED_US2JIFFIE(delay); 1498 1499 sch->flags |= TCQ_F_THROTTLED; 1500 mod_timer(&q->wd_timer, jiffies + delay); 1501} 1502 1503static int 1504hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) 1505{ 1506 struct hfsc_sched *q = qdisc_priv(sch); 1507 struct tc_hfsc_qopt *qopt; 1508 unsigned int i; 1509 1510 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 1511 return -EINVAL; 1512 qopt = RTA_DATA(opt); 1513 1514 sch->stats_lock = &sch->dev->queue_lock; 1515 1516 q->defcls = qopt->defcls; 1517 for (i = 0; i < HFSC_HSIZE; i++) 1518 INIT_LIST_HEAD(&q->clhash[i]); 1519 q->eligible = RB_ROOT; 1520 INIT_LIST_HEAD(&q->droplist); 1521 skb_queue_head_init(&q->requeue); 1522 1523 q->root.refcnt = 1; 1524 q->root.classid = sch->handle; 1525 q->root.sched = q; 1526 q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, 1527 sch->handle); 1528 if (q->root.qdisc == NULL) 1529 q->root.qdisc = &noop_qdisc; 1530 q->root.stats_lock = &sch->dev->queue_lock; 1531 INIT_LIST_HEAD(&q->root.children); 1532 q->root.vt_tree = RB_ROOT; 1533 q->root.cf_tree = RB_ROOT; 1534 1535 list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]); 1536 1537 init_timer(&q->wd_timer); 1538 q->wd_timer.function = hfsc_watchdog; 1539 q->wd_timer.data = (unsigned long)sch; 1540 1541 return 0; 1542} 1543 1544static int 1545hfsc_change_qdisc(struct Qdisc *sch, struct rtattr *opt) 1546{ 1547 struct hfsc_sched *q = qdisc_priv(sch); 1548 struct tc_hfsc_qopt *qopt; 1549 1550 if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt)) 1551 return -EINVAL; 1552 qopt = RTA_DATA(opt); 1553 1554 sch_tree_lock(sch); 1555 q->defcls = qopt->defcls; 1556 sch_tree_unlock(sch); 1557 1558 return 0; 1559} 1560 1561static void 1562hfsc_reset_class(struct hfsc_class *cl) 1563{ 1564 cl->cl_total = 0; 1565 cl->cl_cumul = 0; 1566 cl->cl_d = 0; 1567 cl->cl_e = 0; 1568 cl->cl_vt = 0; 1569 cl->cl_vtadj = 0; 1570 cl->cl_vtoff = 0; 1571 cl->cl_cvtmin = 0; 1572 cl->cl_cvtmax = 0; 1573 cl->cl_cvtoff = 0; 1574 cl->cl_pcvtoff = 0; 1575 cl->cl_vtperiod = 0; 1576 cl->cl_parentperiod = 0; 1577 cl->cl_f = 0; 1578 cl->cl_myf = 0; 1579 cl->cl_myfadj = 0; 1580 cl->cl_cfmin = 0; 1581 cl->cl_nactive = 0; 1582 1583 cl->vt_tree = RB_ROOT; 1584 cl->cf_tree = RB_ROOT; 1585 qdisc_reset(cl->qdisc); 1586 1587 if (cl->cl_flags & HFSC_RSC) 1588 rtsc_init(&cl->cl_deadline, &cl->cl_rsc, 0, 0); 1589 if (cl->cl_flags & HFSC_FSC) 1590 rtsc_init(&cl->cl_virtual, &cl->cl_fsc, 0, 0); 1591 if (cl->cl_flags & HFSC_USC) 1592 rtsc_init(&cl->cl_ulimit, &cl->cl_usc, 0, 0); 1593} 1594 1595static void 1596hfsc_reset_qdisc(struct Qdisc *sch) 1597{ 1598 struct hfsc_sched *q = qdisc_priv(sch); 1599 struct hfsc_class *cl; 1600 unsigned int i; 1601 1602 for (i = 0; i < HFSC_HSIZE; i++) { 1603 list_for_each_entry(cl, &q->clhash[i], hlist) 1604 hfsc_reset_class(cl); 1605 } 1606 __skb_queue_purge(&q->requeue); 1607 q->eligible = RB_ROOT; 1608 INIT_LIST_HEAD(&q->droplist); 1609 del_timer(&q->wd_timer); 1610 sch->flags &= ~TCQ_F_THROTTLED; 1611 sch->q.qlen = 0; 1612} 1613 1614static void 1615hfsc_destroy_qdisc(struct Qdisc *sch) 1616{ 1617 struct hfsc_sched *q = qdisc_priv(sch); 1618 struct hfsc_class *cl, *next; 1619 unsigned int i; 1620 1621 for (i = 0; i < HFSC_HSIZE; i++) { 1622 list_for_each_entry_safe(cl, next, &q->clhash[i], hlist) 1623 hfsc_destroy_class(sch, cl); 1624 } 1625 __skb_queue_purge(&q->requeue); 1626 del_timer(&q->wd_timer); 1627} 1628 1629static int 1630hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) 1631{ 1632 struct hfsc_sched *q = qdisc_priv(sch); 1633 unsigned char *b = skb->tail; 1634 struct tc_hfsc_qopt qopt; 1635 1636 qopt.defcls = q->defcls; 1637 RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 1638 return skb->len; 1639 1640 rtattr_failure: 1641 skb_trim(skb, b - skb->data); 1642 return -1; 1643} 1644 1645static int 1646hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 1647{ 1648 struct hfsc_class *cl; 1649 unsigned int len; 1650 int err; 1651 1652 cl = hfsc_classify(skb, sch, &err); 1653 if (cl == NULL) { 1654 if (err == NET_XMIT_BYPASS) 1655 sch->qstats.drops++; 1656 kfree_skb(skb); 1657 return err; 1658 } 1659 1660 len = skb->len; 1661 err = cl->qdisc->enqueue(skb, cl->qdisc); 1662 if (unlikely(err != NET_XMIT_SUCCESS)) { 1663 cl->qstats.drops++; 1664 sch->qstats.drops++; 1665 return err; 1666 } 1667 1668 if (cl->qdisc->q.qlen == 1) 1669 set_active(cl, len); 1670 1671 cl->bstats.packets++; 1672 cl->bstats.bytes += len; 1673 sch->bstats.packets++; 1674 sch->bstats.bytes += len; 1675 sch->q.qlen++; 1676 1677 return NET_XMIT_SUCCESS; 1678} 1679 1680static struct sk_buff * 1681hfsc_dequeue(struct Qdisc *sch) 1682{ 1683 struct hfsc_sched *q = qdisc_priv(sch); 1684 struct hfsc_class *cl; 1685 struct sk_buff *skb; 1686 u64 cur_time; 1687 unsigned int next_len; 1688 int realtime = 0; 1689 1690 if (sch->q.qlen == 0) 1691 return NULL; 1692 if ((skb = __skb_dequeue(&q->requeue))) 1693 goto out; 1694 1695 PSCHED_GET_TIME(cur_time); 1696 1697 /* 1698 * if there are eligible classes, use real-time criteria. 1699 * find the class with the minimum deadline among 1700 * the eligible classes. 1701 */ 1702 if ((cl = eltree_get_mindl(q, cur_time)) != NULL) { 1703 realtime = 1; 1704 } else { 1705 /* 1706 * use link-sharing criteria 1707 * get the class with the minimum vt in the hierarchy 1708 */ 1709 cl = vttree_get_minvt(&q->root, cur_time); 1710 if (cl == NULL) { 1711 sch->qstats.overlimits++; 1712 hfsc_schedule_watchdog(sch, cur_time); 1713 return NULL; 1714 } 1715 } 1716 1717 skb = cl->qdisc->dequeue(cl->qdisc); 1718 if (skb == NULL) { 1719 if (net_ratelimit()) 1720 printk("HFSC: Non-work-conserving qdisc ?\n"); 1721 return NULL; 1722 } 1723 1724 update_vf(cl, skb->len, cur_time); 1725 if (realtime) 1726 cl->cl_cumul += skb->len; 1727 1728 if (cl->qdisc->q.qlen != 0) { 1729 if (cl->cl_flags & HFSC_RSC) { 1730 /* update ed */ 1731 next_len = qdisc_peek_len(cl->qdisc); 1732 if (realtime) 1733 update_ed(cl, next_len); 1734 else 1735 update_d(cl, next_len); 1736 } 1737 } else { 1738 /* the class becomes passive */ 1739 set_passive(cl); 1740 } 1741 1742 out: 1743 sch->flags &= ~TCQ_F_THROTTLED; 1744 sch->q.qlen--; 1745 1746 return skb; 1747} 1748 1749static int 1750hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch) 1751{ 1752 struct hfsc_sched *q = qdisc_priv(sch); 1753 1754 __skb_queue_head(&q->requeue, skb); 1755 sch->q.qlen++; 1756 sch->qstats.requeues++; 1757 return NET_XMIT_SUCCESS; 1758} 1759 1760static unsigned int 1761hfsc_drop(struct Qdisc *sch) 1762{ 1763 struct hfsc_sched *q = qdisc_priv(sch); 1764 struct hfsc_class *cl; 1765 unsigned int len; 1766 1767 list_for_each_entry(cl, &q->droplist, dlist) { 1768 if (cl->qdisc->ops->drop != NULL && 1769 (len = cl->qdisc->ops->drop(cl->qdisc)) > 0) { 1770 if (cl->qdisc->q.qlen == 0) { 1771 update_vf(cl, 0, 0); 1772 set_passive(cl); 1773 } else { 1774 list_move_tail(&cl->dlist, &q->droplist); 1775 } 1776 cl->qstats.drops++; 1777 sch->qstats.drops++; 1778 sch->q.qlen--; 1779 return len; 1780 } 1781 } 1782 return 0; 1783} 1784 1785static struct Qdisc_class_ops hfsc_class_ops = { 1786 .change = hfsc_change_class, 1787 .delete = hfsc_delete_class, 1788 .graft = hfsc_graft_class, 1789 .leaf = hfsc_class_leaf, 1790 .qlen_notify = hfsc_qlen_notify, 1791 .get = hfsc_get_class, 1792 .put = hfsc_put_class, 1793 .bind_tcf = hfsc_bind_tcf, 1794 .unbind_tcf = hfsc_unbind_tcf, 1795 .tcf_chain = hfsc_tcf_chain, 1796 .dump = hfsc_dump_class, 1797 .dump_stats = hfsc_dump_class_stats, 1798 .walk = hfsc_walk 1799}; 1800 1801static struct Qdisc_ops hfsc_qdisc_ops = { 1802 .id = "hfsc", 1803 .init = hfsc_init_qdisc, 1804 .change = hfsc_change_qdisc, 1805 .reset = hfsc_reset_qdisc, 1806 .destroy = hfsc_destroy_qdisc, 1807 .dump = hfsc_dump_qdisc, 1808 .enqueue = hfsc_enqueue, 1809 .dequeue = hfsc_dequeue, 1810 .requeue = hfsc_requeue, 1811 .drop = hfsc_drop, 1812 .cl_ops = &hfsc_class_ops, 1813 .priv_size = sizeof(struct hfsc_sched), 1814 .owner = THIS_MODULE 1815}; 1816 1817static int __init 1818hfsc_init(void) 1819{ 1820 return register_qdisc(&hfsc_qdisc_ops); 1821} 1822 1823static void __exit 1824hfsc_cleanup(void) 1825{ 1826 unregister_qdisc(&hfsc_qdisc_ops); 1827} 1828 1829MODULE_LICENSE("GPL"); 1830module_init(hfsc_init); 1831module_exit(hfsc_cleanup);