net_sched: Add qdisc __NET_XMIT_STOLEN flag

Patrick McHardy <kaber@trash.net> noticed:
"The other problem that affects all qdiscs supporting actions is
TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS
even though the packet is not queued, corrupting upper qdiscs'
qlen counters."

and later explained:
"The reason why it translates it at all seems to be to not increase
the drops counter. Within a single qdisc this could be avoided by
other means easily, upper qdiscs would still increase the counter
when we return anything besides NET_XMIT_SUCCESS though.

This means we need a new NET_XMIT return value to indicate this to
the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN,
return that to upper qdiscs and translate it to NET_XMIT_SUCCESS
in dev_queue_xmit, similar to NET_XMIT_BYPASS."

David Miller <davem@davemloft.net> noticed:
"Maybe these NET_XMIT_* values being passed around should be a set of
bits. They could be composed of base meanings, combined with specific
attributes.

So you could say "NET_XMIT_DROP | __NET_XMIT_NO_DROP_COUNT"

The attributes get masked out by the top-level ->enqueue() caller,
such that the base meanings are the only thing that make their
way up into the stack. If it's only about communication within the
qdisc tree, let's simply code it that way."

This patch is trying to realize these ideas.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by Jarek Poplawski and committed by David S. Miller 378a2f09 6e583ce5

+68 -34
+1
include/linux/netdevice.h
··· 64 #define NET_XMIT_BYPASS 4 /* packet does not leave via dequeue; 65 (TC use only - dev_queue_xmit 66 returns this as NET_XMIT_SUCCESS) */ 67 68 /* Backlog congestion levels */ 69 #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
··· 64 #define NET_XMIT_BYPASS 4 /* packet does not leave via dequeue; 65 (TC use only - dev_queue_xmit 66 returns this as NET_XMIT_SUCCESS) */ 67 + #define NET_XMIT_MASK 0xFFFF /* qdisc flags in net/sch_generic.h */ 68 69 /* Backlog congestion levels */ 70 #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
+13 -1
include/net/sch_generic.h
··· 343 return qdisc_skb_cb(skb)->pkt_len; 344 } 345 346 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 347 { 348 #ifdef CONFIG_NET_SCHED ··· 367 static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) 368 { 369 qdisc_skb_cb(skb)->pkt_len = skb->len; 370 - return qdisc_enqueue(skb, sch); 371 } 372 373 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
··· 343 return qdisc_skb_cb(skb)->pkt_len; 344 } 345 346 + #ifdef CONFIG_NET_CLS_ACT 347 + /* additional qdisc xmit flags */ 348 + enum net_xmit_qdisc_t { 349 + __NET_XMIT_STOLEN = 0x00010000, 350 + }; 351 + 352 + #define net_xmit_drop_count(e) ((e) & __NET_XMIT_STOLEN ? 0 : 1) 353 + 354 + #else 355 + #define net_xmit_drop_count(e) (1) 356 + #endif 357 + 358 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 359 { 360 #ifdef CONFIG_NET_SCHED ··· 355 static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) 356 { 357 qdisc_skb_cb(skb)->pkt_len = skb->len; 358 + return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; 359 } 360 361 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
+7 -5
net/sched/sch_atm.c
··· 415 case TC_ACT_QUEUED: 416 case TC_ACT_STOLEN: 417 kfree_skb(skb); 418 - return NET_XMIT_SUCCESS; 419 case TC_ACT_SHOT: 420 kfree_skb(skb); 421 goto drop; ··· 432 ret = qdisc_enqueue(skb, flow->q); 433 if (ret != 0) { 434 drop: __maybe_unused 435 - sch->qstats.drops++; 436 - if (flow) 437 - flow->qstats.drops++; 438 return ret; 439 } 440 sch->bstats.bytes += qdisc_pkt_len(skb); ··· 532 if (!ret) { 533 sch->q.qlen++; 534 sch->qstats.requeues++; 535 - } else { 536 sch->qstats.drops++; 537 p->link.qstats.drops++; 538 }
··· 415 case TC_ACT_QUEUED: 416 case TC_ACT_STOLEN: 417 kfree_skb(skb); 418 + return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 419 case TC_ACT_SHOT: 420 kfree_skb(skb); 421 goto drop; ··· 432 ret = qdisc_enqueue(skb, flow->q); 433 if (ret != 0) { 434 drop: __maybe_unused 435 + if (net_xmit_drop_count(ret)) { 436 + sch->qstats.drops++; 437 + if (flow) 438 + flow->qstats.drops++; 439 + } 440 return ret; 441 } 442 sch->bstats.bytes += qdisc_pkt_len(skb); ··· 530 if (!ret) { 531 sch->q.qlen++; 532 sch->qstats.requeues++; 533 + } else if (net_xmit_drop_count(ret)) { 534 sch->qstats.drops++; 535 p->link.qstats.drops++; 536 }
+15 -8
net/sched/sch_cbq.c
··· 256 switch (result) { 257 case TC_ACT_QUEUED: 258 case TC_ACT_STOLEN: 259 - *qerr = NET_XMIT_SUCCESS; 260 case TC_ACT_SHOT: 261 return NULL; 262 case TC_ACT_RECLASSIFY: ··· 397 return ret; 398 } 399 400 - sch->qstats.drops++; 401 - cbq_mark_toplevel(q, cl); 402 - cl->qstats.drops++; 403 return ret; 404 } 405 ··· 432 cbq_activate_class(cl); 433 return 0; 434 } 435 - sch->qstats.drops++; 436 - cl->qstats.drops++; 437 return ret; 438 } 439 ··· 668 q->rx_class = NULL; 669 670 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) { 671 672 cbq_mark_toplevel(q, cl); 673 674 q->rx_class = cl; 675 cl->q->__parent = sch; 676 677 - if (qdisc_enqueue(skb, cl->q) == 0) { 678 sch->q.qlen++; 679 sch->bstats.packets++; 680 sch->bstats.bytes += qdisc_pkt_len(skb); ··· 684 cbq_activate_class(cl); 685 return 0; 686 } 687 - sch->qstats.drops++; 688 return 0; 689 } 690
··· 256 switch (result) { 257 case TC_ACT_QUEUED: 258 case TC_ACT_STOLEN: 259 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 260 case TC_ACT_SHOT: 261 return NULL; 262 case TC_ACT_RECLASSIFY: ··· 397 return ret; 398 } 399 400 + if (net_xmit_drop_count(ret)) { 401 + sch->qstats.drops++; 402 + cbq_mark_toplevel(q, cl); 403 + cl->qstats.drops++; 404 + } 405 return ret; 406 } 407 ··· 430 cbq_activate_class(cl); 431 return 0; 432 } 433 + if (net_xmit_drop_count(ret)) { 434 + sch->qstats.drops++; 435 + cl->qstats.drops++; 436 + } 437 return ret; 438 } 439 ··· 664 q->rx_class = NULL; 665 666 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) { 667 + int ret; 668 669 cbq_mark_toplevel(q, cl); 670 671 q->rx_class = cl; 672 cl->q->__parent = sch; 673 674 + ret = qdisc_enqueue(skb, cl->q); 675 + if (ret == NET_XMIT_SUCCESS) { 676 sch->q.qlen++; 677 sch->bstats.packets++; 678 sch->bstats.bytes += qdisc_pkt_len(skb); ··· 678 cbq_activate_class(cl); 679 return 0; 680 } 681 + if (net_xmit_drop_count(ret)) 682 + sch->qstats.drops++; 683 return 0; 684 } 685
+5 -3
net/sched/sch_dsmark.c
··· 236 case TC_ACT_QUEUED: 237 case TC_ACT_STOLEN: 238 kfree_skb(skb); 239 - return NET_XMIT_SUCCESS; 240 241 case TC_ACT_SHOT: 242 goto drop; ··· 254 255 err = qdisc_enqueue(skb, p->q); 256 if (err != NET_XMIT_SUCCESS) { 257 - sch->qstats.drops++; 258 return err; 259 } 260 ··· 322 323 err = p->q->ops->requeue(skb, p->q); 324 if (err != NET_XMIT_SUCCESS) { 325 - sch->qstats.drops++; 326 return err; 327 } 328
··· 236 case TC_ACT_QUEUED: 237 case TC_ACT_STOLEN: 238 kfree_skb(skb); 239 + return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 240 241 case TC_ACT_SHOT: 242 goto drop; ··· 254 255 err = qdisc_enqueue(skb, p->q); 256 if (err != NET_XMIT_SUCCESS) { 257 + if (net_xmit_drop_count(err)) 258 + sch->qstats.drops++; 259 return err; 260 } 261 ··· 321 322 err = p->q->ops->requeue(skb, p->q); 323 if (err != NET_XMIT_SUCCESS) { 324 + if (net_xmit_drop_count(err)) 325 + sch->qstats.drops++; 326 return err; 327 } 328
+5 -3
net/sched/sch_hfsc.c
··· 1166 switch (result) { 1167 case TC_ACT_QUEUED: 1168 case TC_ACT_STOLEN: 1169 - *qerr = NET_XMIT_SUCCESS; 1170 case TC_ACT_SHOT: 1171 return NULL; 1172 } ··· 1586 1587 err = qdisc_enqueue(skb, cl->qdisc); 1588 if (unlikely(err != NET_XMIT_SUCCESS)) { 1589 - cl->qstats.drops++; 1590 - sch->qstats.drops++; 1591 return err; 1592 } 1593
··· 1166 switch (result) { 1167 case TC_ACT_QUEUED: 1168 case TC_ACT_STOLEN: 1169 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 1170 case TC_ACT_SHOT: 1171 return NULL; 1172 } ··· 1586 1587 err = qdisc_enqueue(skb, cl->qdisc); 1588 if (unlikely(err != NET_XMIT_SUCCESS)) { 1589 + if (net_xmit_drop_count(err)) { 1590 + cl->qstats.drops++; 1591 + sch->qstats.drops++; 1592 + } 1593 return err; 1594 } 1595
+11 -7
net/sched/sch_htb.c
··· 221 switch (result) { 222 case TC_ACT_QUEUED: 223 case TC_ACT_STOLEN: 224 - *qerr = NET_XMIT_SUCCESS; 225 case TC_ACT_SHOT: 226 return NULL; 227 } ··· 572 kfree_skb(skb); 573 return ret; 574 #endif 575 - } else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { 576 - sch->qstats.drops++; 577 - cl->qstats.drops++; 578 return NET_XMIT_DROP; 579 } else { 580 cl->bstats.packets += ··· 617 kfree_skb(skb); 618 return ret; 619 #endif 620 - } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != 621 NET_XMIT_SUCCESS) { 622 - sch->qstats.drops++; 623 - cl->qstats.drops++; 624 return NET_XMIT_DROP; 625 } else 626 htb_activate(q, cl);
··· 221 switch (result) { 222 case TC_ACT_QUEUED: 223 case TC_ACT_STOLEN: 224 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 225 case TC_ACT_SHOT: 226 return NULL; 227 } ··· 572 kfree_skb(skb); 573 return ret; 574 #endif 575 + } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) { 576 + if (net_xmit_drop_count(ret)) { 577 + sch->qstats.drops++; 578 + cl->qstats.drops++; 579 + } 580 return NET_XMIT_DROP; 581 } else { 582 cl->bstats.packets += ··· 615 kfree_skb(skb); 616 return ret; 617 #endif 618 + } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) != 619 NET_XMIT_SUCCESS) { 620 + if (net_xmit_drop_count(ret)) { 621 + sch->qstats.drops++; 622 + cl->qstats.drops++; 623 + } 624 return NET_XMIT_DROP; 625 } else 626 htb_activate(q, cl);
+2 -1
net/sched/sch_netem.c
··· 240 sch->q.qlen++; 241 sch->bstats.bytes += qdisc_pkt_len(skb); 242 sch->bstats.packets++; 243 - } else 244 sch->qstats.drops++; 245 246 pr_debug("netem: enqueue ret %d\n", ret); 247 return ret;
··· 240 sch->q.qlen++; 241 sch->bstats.bytes += qdisc_pkt_len(skb); 242 sch->bstats.packets++; 243 + } else if (net_xmit_drop_count(ret)) { 244 sch->qstats.drops++; 245 + } 246 247 pr_debug("netem: enqueue ret %d\n", ret); 248 return ret;
+5 -3
net/sched/sch_prio.c
··· 45 switch (err) { 46 case TC_ACT_STOLEN: 47 case TC_ACT_QUEUED: 48 - *qerr = NET_XMIT_SUCCESS; 49 case TC_ACT_SHOT: 50 return NULL; 51 } ··· 88 sch->q.qlen++; 89 return NET_XMIT_SUCCESS; 90 } 91 - sch->qstats.drops++; 92 return ret; 93 } 94 ··· 115 sch->qstats.requeues++; 116 return 0; 117 } 118 - sch->qstats.drops++; 119 return NET_XMIT_DROP; 120 } 121
··· 45 switch (err) { 46 case TC_ACT_STOLEN: 47 case TC_ACT_QUEUED: 48 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 49 case TC_ACT_SHOT: 50 return NULL; 51 } ··· 88 sch->q.qlen++; 89 return NET_XMIT_SUCCESS; 90 } 91 + if (net_xmit_drop_count(ret)) 92 + sch->qstats.drops++; 93 return ret; 94 } 95 ··· 114 sch->qstats.requeues++; 115 return 0; 116 } 117 + if (net_xmit_drop_count(ret)) 118 + sch->qstats.drops++; 119 return NET_XMIT_DROP; 120 } 121
+1 -1
net/sched/sch_red.c
··· 97 sch->bstats.bytes += qdisc_pkt_len(skb); 98 sch->bstats.packets++; 99 sch->q.qlen++; 100 - } else { 101 q->stats.pdrop++; 102 sch->qstats.drops++; 103 }
··· 97 sch->bstats.bytes += qdisc_pkt_len(skb); 98 sch->bstats.packets++; 99 sch->q.qlen++; 100 + } else if (net_xmit_drop_count(ret)) { 101 q->stats.pdrop++; 102 sch->qstats.drops++; 103 }
+1 -1
net/sched/sch_sfq.c
··· 178 switch (result) { 179 case TC_ACT_STOLEN: 180 case TC_ACT_QUEUED: 181 - *qerr = NET_XMIT_SUCCESS; 182 case TC_ACT_SHOT: 183 return 0; 184 }
··· 178 switch (result) { 179 case TC_ACT_STOLEN: 180 case TC_ACT_QUEUED: 181 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 182 case TC_ACT_SHOT: 183 return 0; 184 }
+2 -1
net/sched/sch_tbf.c
··· 135 136 ret = qdisc_enqueue(skb, q->qdisc); 137 if (ret != 0) { 138 - sch->qstats.drops++; 139 return ret; 140 } 141
··· 135 136 ret = qdisc_enqueue(skb, q->qdisc); 137 if (ret != 0) { 138 + if (net_xmit_drop_count(ret)) 139 + sch->qstats.drops++; 140 return ret; 141 } 142