Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'net_sched-reduce-the-number-of-qdisc-resets'

Cong Wang says:

====================
net_sched: reduce the number of qdisc resets

This patchset aims to reduce the number of qdisc resets during
qdisc tear down. Patch 1~3 are preparation for their following
patches, especially patch 2 and patch 3 add a few tracepoints
so that we can observe the whole lifetime of qdisc's. Patch 4
and patch 5 are the ones do the actual work. Please find more
details in each patch description.

Vaclav Zindulka tested this patchset and his large ruleset with
over 13k qdiscs defined got from 22s to 520ms.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+110 -43
+75
include/trace/events/qdisc.h
··· 8 8 #include <linux/netdevice.h> 9 9 #include <linux/tracepoint.h> 10 10 #include <linux/ftrace.h> 11 + #include <linux/pkt_sched.h> 12 + #include <net/sch_generic.h> 11 13 12 14 TRACE_EVENT(qdisc_dequeue, 13 15 ··· 44 42 TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p", 45 43 __entry->ifindex, __entry->handle, __entry->parent, 46 44 __entry->txq_state, __entry->packets, __entry->skbaddr ) 45 + ); 46 + 47 + TRACE_EVENT(qdisc_reset, 48 + 49 + TP_PROTO(struct Qdisc *q), 50 + 51 + TP_ARGS(q), 52 + 53 + TP_STRUCT__entry( 54 + __string( dev, qdisc_dev(q) ) 55 + __string( kind, q->ops->id ) 56 + __field( u32, parent ) 57 + __field( u32, handle ) 58 + ), 59 + 60 + TP_fast_assign( 61 + __assign_str(dev, qdisc_dev(q)); 62 + __assign_str(kind, q->ops->id); 63 + __entry->parent = q->parent; 64 + __entry->handle = q->handle; 65 + ), 66 + 67 + TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev), 68 + __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent), 69 + TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle)) 70 + ); 71 + 72 + TRACE_EVENT(qdisc_destroy, 73 + 74 + TP_PROTO(struct Qdisc *q), 75 + 76 + TP_ARGS(q), 77 + 78 + TP_STRUCT__entry( 79 + __string( dev, qdisc_dev(q) ) 80 + __string( kind, q->ops->id ) 81 + __field( u32, parent ) 82 + __field( u32, handle ) 83 + ), 84 + 85 + TP_fast_assign( 86 + __assign_str(dev, qdisc_dev(q)); 87 + __assign_str(kind, q->ops->id); 88 + __entry->parent = q->parent; 89 + __entry->handle = q->handle; 90 + ), 91 + 92 + TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev), 93 + __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent), 94 + TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle)) 95 + ); 96 + 97 + TRACE_EVENT(qdisc_create, 98 + 99 + TP_PROTO(const struct Qdisc_ops *ops, struct net_device *dev, u32 parent), 100 + 101 + TP_ARGS(ops, dev, parent), 102 + 103 + TP_STRUCT__entry( 104 + __string( dev, dev->name ) 105 + __string( kind, ops->id ) 106 + __field( u32, parent ) 107 + ), 108 + 109 + TP_fast_assign( 110 + __assign_str(dev, dev->name); 111 + __assign_str(kind, ops->id); 112 + __entry->parent = parent; 113 + ), 114 + 115 + TP_printk("dev=%s kind=%s parent=%x:%x", 116 + __get_str(dev), __get_str(kind), 117 + TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent)) 47 118 ); 48 119 49 120 #endif /* _TRACE_QDISC_H */
+3
net/sched/sch_api.c
··· 32 32 #include <net/pkt_sched.h> 33 33 #include <net/pkt_cls.h> 34 34 35 + #include <trace/events/qdisc.h> 36 + 35 37 /* 36 38 37 39 Short review. ··· 1285 1283 } 1286 1284 1287 1285 qdisc_hash_add(sch, false); 1286 + trace_qdisc_create(ops, dev, parent); 1288 1287 1289 1288 return sch; 1290 1289
+32 -43
net/sched/sch_generic.c
··· 896 896 } 897 897 sch->parent = parentid; 898 898 899 - if (!ops->init || ops->init(sch, NULL, extack) == 0) 899 + if (!ops->init || ops->init(sch, NULL, extack) == 0) { 900 + trace_qdisc_create(ops, dev_queue->dev, parentid); 900 901 return sch; 902 + } 901 903 902 904 qdisc_put(sch); 903 905 return NULL; ··· 912 910 { 913 911 const struct Qdisc_ops *ops = qdisc->ops; 914 912 struct sk_buff *skb, *tmp; 913 + 914 + trace_qdisc_reset(qdisc); 915 915 916 916 if (ops->reset) 917 917 ops->reset(qdisc); ··· 953 949 static void qdisc_destroy(struct Qdisc *qdisc) 954 950 { 955 951 const struct Qdisc_ops *ops = qdisc->ops; 956 - struct sk_buff *skb, *tmp; 957 952 958 953 #ifdef CONFIG_NET_SCHED 959 954 qdisc_hash_del(qdisc); ··· 960 957 qdisc_put_stab(rtnl_dereference(qdisc->stab)); 961 958 #endif 962 959 gen_kill_estimator(&qdisc->rate_est); 963 - if (ops->reset) 964 - ops->reset(qdisc); 960 + 961 + qdisc_reset(qdisc); 962 + 965 963 if (ops->destroy) 966 964 ops->destroy(qdisc); 967 965 968 966 module_put(ops->owner); 969 967 dev_put(qdisc_dev(qdisc)); 970 968 971 - skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) { 972 - __skb_unlink(skb, &qdisc->gso_skb); 973 - kfree_skb_list(skb); 974 - } 975 - 976 - skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) { 977 - __skb_unlink(skb, &qdisc->skb_bad_txq); 978 - kfree_skb_list(skb); 979 - } 969 + trace_qdisc_destroy(qdisc); 980 970 981 971 call_rcu(&qdisc->rcu, qdisc_free_cb); 982 972 } ··· 1128 1132 } 1129 1133 EXPORT_SYMBOL(dev_activate); 1130 1134 1135 + static void qdisc_deactivate(struct Qdisc *qdisc) 1136 + { 1137 + bool nolock = qdisc->flags & TCQ_F_NOLOCK; 1138 + 1139 + if (qdisc->flags & TCQ_F_BUILTIN) 1140 + return; 1141 + if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state)) 1142 + return; 1143 + 1144 + if (nolock) 1145 + spin_lock_bh(&qdisc->seqlock); 1146 + spin_lock_bh(qdisc_lock(qdisc)); 1147 + 1148 + set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); 1149 + 1150 + qdisc_reset(qdisc); 1151 + 1152 + spin_unlock_bh(qdisc_lock(qdisc)); 1153 + if (nolock) 1154 + spin_unlock_bh(&qdisc->seqlock); 1155 + } 1156 + 1131 1157 static void dev_deactivate_queue(struct net_device *dev, 1132 1158 struct netdev_queue *dev_queue, 1133 1159 void *_qdisc_default) ··· 1159 1141 1160 1142 qdisc = rtnl_dereference(dev_queue->qdisc); 1161 1143 if (qdisc) { 1162 - bool nolock = qdisc->flags & TCQ_F_NOLOCK; 1163 - 1164 - if (nolock) 1165 - spin_lock_bh(&qdisc->seqlock); 1166 - spin_lock_bh(qdisc_lock(qdisc)); 1167 - 1168 - if (!(qdisc->flags & TCQ_F_BUILTIN)) 1169 - set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); 1170 - 1144 + qdisc_deactivate(qdisc); 1171 1145 rcu_assign_pointer(dev_queue->qdisc, qdisc_default); 1172 - qdisc_reset(qdisc); 1173 - 1174 - spin_unlock_bh(qdisc_lock(qdisc)); 1175 - if (nolock) 1176 - spin_unlock_bh(&qdisc->seqlock); 1177 1146 } 1178 1147 } 1179 1148 ··· 1189 1184 return true; 1190 1185 } 1191 1186 return false; 1192 - } 1193 - 1194 - static void dev_qdisc_reset(struct net_device *dev, 1195 - struct netdev_queue *dev_queue, 1196 - void *none) 1197 - { 1198 - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; 1199 - 1200 - if (qdisc) 1201 - qdisc_reset(qdisc); 1202 1187 } 1203 1188 1204 1189 /** ··· 1227 1232 */ 1228 1233 schedule_timeout_uninterruptible(1); 1229 1234 } 1230 - /* The new qdisc is assigned at this point so we can safely 1231 - * unwind stale skb lists and qdisc statistics 1232 - */ 1233 - netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL); 1234 - if (dev_ingress_queue(dev)) 1235 - dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL); 1236 1235 } 1237 1236 } 1238 1237