net_sched: Add qdisc __NET_XMIT_STOLEN flag

Patrick McHardy <kaber@trash.net> noticed:
"The other problem that affects all qdiscs supporting actions is
TC_ACT_QUEUED/TC_ACT_STOLEN getting mapped to NET_XMIT_SUCCESS
even though the packet is not queued, corrupting upper qdiscs'
qlen counters."

and later explained:
"The reason why it translates it at all seems to be to not increase
the drops counter. Within a single qdisc this could be avoided by
other means easily, upper qdiscs would still increase the counter
when we return anything besides NET_XMIT_SUCCESS though.

This means we need a new NET_XMIT return value to indicate this to
the upper qdiscs. So I'd suggest to introduce NET_XMIT_STOLEN,
return that to upper qdiscs and translate it to NET_XMIT_SUCCESS
in dev_queue_xmit, similar to NET_XMIT_BYPASS."

David Miller <davem@davemloft.net> noticed:
"Maybe these NET_XMIT_* values being passed around should be a set of
bits. They could be composed of base meanings, combined with specific
attributes.

So you could say "NET_XMIT_DROP | __NET_XMIT_NO_DROP_COUNT"

The attributes get masked out by the top-level ->enqueue() caller,
such that the base meanings are the only thing that make their
way up into the stack. If it's only about communication within the
qdisc tree, let's simply code it that way."

This patch is trying to realize these ideas.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by Jarek Poplawski and committed by David S. Miller 378a2f09 6e583ce5

+68 -34
+1
include/linux/netdevice.h
··· 64 64 #define NET_XMIT_BYPASS 4 /* packet does not leave via dequeue; 65 65 (TC use only - dev_queue_xmit 66 66 returns this as NET_XMIT_SUCCESS) */ 67 + #define NET_XMIT_MASK 0xFFFF /* qdisc flags in net/sch_generic.h */ 67 68 68 69 /* Backlog congestion levels */ 69 70 #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
+13 -1
include/net/sch_generic.h
··· 343 343 return qdisc_skb_cb(skb)->pkt_len; 344 344 } 345 345 346 + #ifdef CONFIG_NET_CLS_ACT 347 + /* additional qdisc xmit flags */ 348 + enum net_xmit_qdisc_t { 349 + __NET_XMIT_STOLEN = 0x00010000, 350 + }; 351 + 352 + #define net_xmit_drop_count(e) ((e) & __NET_XMIT_STOLEN ? 0 : 1) 353 + 354 + #else 355 + #define net_xmit_drop_count(e) (1) 356 + #endif 357 + 346 358 static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 347 359 { 348 360 #ifdef CONFIG_NET_SCHED ··· 367 355 static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) 368 356 { 369 357 qdisc_skb_cb(skb)->pkt_len = skb->len; 370 - return qdisc_enqueue(skb, sch); 358 + return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; 371 359 } 372 360 373 361 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
+7 -5
net/sched/sch_atm.c
··· 415 415 case TC_ACT_QUEUED: 416 416 case TC_ACT_STOLEN: 417 417 kfree_skb(skb); 418 - return NET_XMIT_SUCCESS; 418 + return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 419 419 case TC_ACT_SHOT: 420 420 kfree_skb(skb); 421 421 goto drop; ··· 432 432 ret = qdisc_enqueue(skb, flow->q); 433 433 if (ret != 0) { 434 434 drop: __maybe_unused 435 - sch->qstats.drops++; 436 - if (flow) 437 - flow->qstats.drops++; 435 + if (net_xmit_drop_count(ret)) { 436 + sch->qstats.drops++; 437 + if (flow) 438 + flow->qstats.drops++; 439 + } 438 440 return ret; 439 441 } 440 442 sch->bstats.bytes += qdisc_pkt_len(skb); ··· 532 530 if (!ret) { 533 531 sch->q.qlen++; 534 532 sch->qstats.requeues++; 535 - } else { 533 + } else if (net_xmit_drop_count(ret)) { 536 534 sch->qstats.drops++; 537 535 p->link.qstats.drops++; 538 536 }
+15 -8
net/sched/sch_cbq.c
··· 256 256 switch (result) { 257 257 case TC_ACT_QUEUED: 258 258 case TC_ACT_STOLEN: 259 - *qerr = NET_XMIT_SUCCESS; 259 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 260 260 case TC_ACT_SHOT: 261 261 return NULL; 262 262 case TC_ACT_RECLASSIFY: ··· 397 397 return ret; 398 398 } 399 399 400 - sch->qstats.drops++; 401 - cbq_mark_toplevel(q, cl); 402 - cl->qstats.drops++; 400 + if (net_xmit_drop_count(ret)) { 401 + sch->qstats.drops++; 402 + cbq_mark_toplevel(q, cl); 403 + cl->qstats.drops++; 404 + } 403 405 return ret; 404 406 } 405 407 ··· 432 430 cbq_activate_class(cl); 433 431 return 0; 434 432 } 435 - sch->qstats.drops++; 436 - cl->qstats.drops++; 433 + if (net_xmit_drop_count(ret)) { 434 + sch->qstats.drops++; 435 + cl->qstats.drops++; 436 + } 437 437 return ret; 438 438 } 439 439 ··· 668 664 q->rx_class = NULL; 669 665 670 666 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) { 667 + int ret; 671 668 672 669 cbq_mark_toplevel(q, cl); 673 670 674 671 q->rx_class = cl; 675 672 cl->q->__parent = sch; 676 673 677 - if (qdisc_enqueue(skb, cl->q) == 0) { 674 + ret = qdisc_enqueue(skb, cl->q); 675 + if (ret == NET_XMIT_SUCCESS) { 678 676 sch->q.qlen++; 679 677 sch->bstats.packets++; 680 678 sch->bstats.bytes += qdisc_pkt_len(skb); ··· 684 678 cbq_activate_class(cl); 685 679 return 0; 686 680 } 687 - sch->qstats.drops++; 681 + if (net_xmit_drop_count(ret)) 682 + sch->qstats.drops++; 688 683 return 0; 689 684 } 690 685
+5 -3
net/sched/sch_dsmark.c
··· 236 236 case TC_ACT_QUEUED: 237 237 case TC_ACT_STOLEN: 238 238 kfree_skb(skb); 239 - return NET_XMIT_SUCCESS; 239 + return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 240 240 241 241 case TC_ACT_SHOT: 242 242 goto drop; ··· 254 254 255 255 err = qdisc_enqueue(skb, p->q); 256 256 if (err != NET_XMIT_SUCCESS) { 257 - sch->qstats.drops++; 257 + if (net_xmit_drop_count(err)) 258 + sch->qstats.drops++; 258 259 return err; 259 260 } 260 261 ··· 322 321 323 322 err = p->q->ops->requeue(skb, p->q); 324 323 if (err != NET_XMIT_SUCCESS) { 325 - sch->qstats.drops++; 324 + if (net_xmit_drop_count(err)) 325 + sch->qstats.drops++; 326 326 return err; 327 327 } 328 328
+5 -3
net/sched/sch_hfsc.c
··· 1166 1166 switch (result) { 1167 1167 case TC_ACT_QUEUED: 1168 1168 case TC_ACT_STOLEN: 1169 - *qerr = NET_XMIT_SUCCESS; 1169 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 1170 1170 case TC_ACT_SHOT: 1171 1171 return NULL; 1172 1172 } ··· 1586 1586 1587 1587 err = qdisc_enqueue(skb, cl->qdisc); 1588 1588 if (unlikely(err != NET_XMIT_SUCCESS)) { 1589 - cl->qstats.drops++; 1590 - sch->qstats.drops++; 1589 + if (net_xmit_drop_count(err)) { 1590 + cl->qstats.drops++; 1591 + sch->qstats.drops++; 1592 + } 1591 1593 return err; 1592 1594 } 1593 1595
+11 -7
net/sched/sch_htb.c
··· 221 221 switch (result) { 222 222 case TC_ACT_QUEUED: 223 223 case TC_ACT_STOLEN: 224 - *qerr = NET_XMIT_SUCCESS; 224 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 225 225 case TC_ACT_SHOT: 226 226 return NULL; 227 227 } ··· 572 572 kfree_skb(skb); 573 573 return ret; 574 574 #endif 575 - } else if (qdisc_enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { 576 - sch->qstats.drops++; 577 - cl->qstats.drops++; 575 + } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) { 576 + if (net_xmit_drop_count(ret)) { 577 + sch->qstats.drops++; 578 + cl->qstats.drops++; 579 + } 578 580 return NET_XMIT_DROP; 579 581 } else { 580 582 cl->bstats.packets += ··· 617 615 kfree_skb(skb); 618 616 return ret; 619 617 #endif 620 - } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != 618 + } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) != 621 619 NET_XMIT_SUCCESS) { 622 - sch->qstats.drops++; 623 - cl->qstats.drops++; 620 + if (net_xmit_drop_count(ret)) { 621 + sch->qstats.drops++; 622 + cl->qstats.drops++; 623 + } 624 624 return NET_XMIT_DROP; 625 625 } else 626 626 htb_activate(q, cl);
+2 -1
net/sched/sch_netem.c
··· 240 240 sch->q.qlen++; 241 241 sch->bstats.bytes += qdisc_pkt_len(skb); 242 242 sch->bstats.packets++; 243 - } else 243 + } else if (net_xmit_drop_count(ret)) { 244 244 sch->qstats.drops++; 245 + } 245 246 246 247 pr_debug("netem: enqueue ret %d\n", ret); 247 248 return ret;
+5 -3
net/sched/sch_prio.c
··· 45 45 switch (err) { 46 46 case TC_ACT_STOLEN: 47 47 case TC_ACT_QUEUED: 48 - *qerr = NET_XMIT_SUCCESS; 48 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 49 49 case TC_ACT_SHOT: 50 50 return NULL; 51 51 } ··· 88 88 sch->q.qlen++; 89 89 return NET_XMIT_SUCCESS; 90 90 } 91 - sch->qstats.drops++; 91 + if (net_xmit_drop_count(ret)) 92 + sch->qstats.drops++; 92 93 return ret; 93 94 } 94 95 ··· 115 114 sch->qstats.requeues++; 116 115 return 0; 117 116 } 118 - sch->qstats.drops++; 117 + if (net_xmit_drop_count(ret)) 118 + sch->qstats.drops++; 119 119 return NET_XMIT_DROP; 120 120 } 121 121
+1 -1
net/sched/sch_red.c
··· 97 97 sch->bstats.bytes += qdisc_pkt_len(skb); 98 98 sch->bstats.packets++; 99 99 sch->q.qlen++; 100 - } else { 100 + } else if (net_xmit_drop_count(ret)) { 101 101 q->stats.pdrop++; 102 102 sch->qstats.drops++; 103 103 }
+1 -1
net/sched/sch_sfq.c
··· 178 178 switch (result) { 179 179 case TC_ACT_STOLEN: 180 180 case TC_ACT_QUEUED: 181 - *qerr = NET_XMIT_SUCCESS; 181 + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 182 182 case TC_ACT_SHOT: 183 183 return 0; 184 184 }
+2 -1
net/sched/sch_tbf.c
··· 135 135 136 136 ret = qdisc_enqueue(skb, q->qdisc); 137 137 if (ret != 0) { 138 - sch->qstats.drops++; 138 + if (net_xmit_drop_count(ret)) 139 + sch->qstats.drops++; 139 140 return ret; 140 141 } 141 142