Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.1-rc2 418 lines 10 kB view raw
1/* 2 * net/sched/sch_mqprio.c 3 * 4 * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * version 2 as published by the Free Software Foundation. 9 */ 10 11#include <linux/types.h> 12#include <linux/slab.h> 13#include <linux/kernel.h> 14#include <linux/string.h> 15#include <linux/errno.h> 16#include <linux/skbuff.h> 17#include <net/netlink.h> 18#include <net/pkt_sched.h> 19#include <net/sch_generic.h> 20 21struct mqprio_sched { 22 struct Qdisc **qdiscs; 23 int hw_owned; 24}; 25 26static void mqprio_destroy(struct Qdisc *sch) 27{ 28 struct net_device *dev = qdisc_dev(sch); 29 struct mqprio_sched *priv = qdisc_priv(sch); 30 unsigned int ntx; 31 32 if (priv->qdiscs) { 33 for (ntx = 0; 34 ntx < dev->num_tx_queues && priv->qdiscs[ntx]; 35 ntx++) 36 qdisc_destroy(priv->qdiscs[ntx]); 37 kfree(priv->qdiscs); 38 } 39 40 if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) 41 dev->netdev_ops->ndo_setup_tc(dev, 0); 42 else 43 netdev_set_num_tc(dev, 0); 44} 45 46static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) 47{ 48 int i, j; 49 50 /* Verify num_tc is not out of max range */ 51 if (qopt->num_tc > TC_MAX_QUEUE) 52 return -EINVAL; 53 54 /* Verify priority mapping uses valid tcs */ 55 for (i = 0; i < TC_BITMASK + 1; i++) { 56 if (qopt->prio_tc_map[i] >= qopt->num_tc) 57 return -EINVAL; 58 } 59 60 /* net_device does not support requested operation */ 61 if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) 62 return -EINVAL; 63 64 /* if hw owned qcount and qoffset are taken from LLD so 65 * no reason to verify them here 66 */ 67 if (qopt->hw) 68 return 0; 69 70 for (i = 0; i < qopt->num_tc; i++) { 71 unsigned int last = qopt->offset[i] + qopt->count[i]; 72 73 /* Verify the queue count is in tx range being equal to the 74 * real_num_tx_queues indicates the last queue is in use. 75 */ 76 if (qopt->offset[i] >= dev->real_num_tx_queues || 77 !qopt->count[i] || 78 last > dev->real_num_tx_queues) 79 return -EINVAL; 80 81 /* Verify that the offset and counts do not overlap */ 82 for (j = i + 1; j < qopt->num_tc; j++) { 83 if (last > qopt->offset[j]) 84 return -EINVAL; 85 } 86 } 87 88 return 0; 89} 90 91static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) 92{ 93 struct net_device *dev = qdisc_dev(sch); 94 struct mqprio_sched *priv = qdisc_priv(sch); 95 struct netdev_queue *dev_queue; 96 struct Qdisc *qdisc; 97 int i, err = -EOPNOTSUPP; 98 struct tc_mqprio_qopt *qopt = NULL; 99 100 BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); 101 BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); 102 103 if (sch->parent != TC_H_ROOT) 104 return -EOPNOTSUPP; 105 106 if (!netif_is_multiqueue(dev)) 107 return -EOPNOTSUPP; 108 109 if (nla_len(opt) < sizeof(*qopt)) 110 return -EINVAL; 111 112 qopt = nla_data(opt); 113 if (mqprio_parse_opt(dev, qopt)) 114 return -EINVAL; 115 116 /* pre-allocate qdisc, attachment can't fail */ 117 priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), 118 GFP_KERNEL); 119 if (priv->qdiscs == NULL) { 120 err = -ENOMEM; 121 goto err; 122 } 123 124 for (i = 0; i < dev->num_tx_queues; i++) { 125 dev_queue = netdev_get_tx_queue(dev, i); 126 qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, 127 TC_H_MAKE(TC_H_MAJ(sch->handle), 128 TC_H_MIN(i + 1))); 129 if (qdisc == NULL) { 130 err = -ENOMEM; 131 goto err; 132 } 133 priv->qdiscs[i] = qdisc; 134 } 135 136 /* If the mqprio options indicate that hardware should own 137 * the queue mapping then run ndo_setup_tc otherwise use the 138 * supplied and verified mapping 139 */ 140 if (qopt->hw) { 141 priv->hw_owned = 1; 142 err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); 143 if (err) 144 goto err; 145 } else { 146 netdev_set_num_tc(dev, qopt->num_tc); 147 for (i = 0; i < qopt->num_tc; i++) 148 netdev_set_tc_queue(dev, i, 149 qopt->count[i], qopt->offset[i]); 150 } 151 152 /* Always use supplied priority mappings */ 153 for (i = 0; i < TC_BITMASK + 1; i++) 154 netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]); 155 156 sch->flags |= TCQ_F_MQROOT; 157 return 0; 158 159err: 160 mqprio_destroy(sch); 161 return err; 162} 163 164static void mqprio_attach(struct Qdisc *sch) 165{ 166 struct net_device *dev = qdisc_dev(sch); 167 struct mqprio_sched *priv = qdisc_priv(sch); 168 struct Qdisc *qdisc; 169 unsigned int ntx; 170 171 /* Attach underlying qdisc */ 172 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 173 qdisc = priv->qdiscs[ntx]; 174 qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); 175 if (qdisc) 176 qdisc_destroy(qdisc); 177 } 178 kfree(priv->qdiscs); 179 priv->qdiscs = NULL; 180} 181 182static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, 183 unsigned long cl) 184{ 185 struct net_device *dev = qdisc_dev(sch); 186 unsigned long ntx = cl - 1 - netdev_get_num_tc(dev); 187 188 if (ntx >= dev->num_tx_queues) 189 return NULL; 190 return netdev_get_tx_queue(dev, ntx); 191} 192 193static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, 194 struct Qdisc **old) 195{ 196 struct net_device *dev = qdisc_dev(sch); 197 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); 198 199 if (!dev_queue) 200 return -EINVAL; 201 202 if (dev->flags & IFF_UP) 203 dev_deactivate(dev); 204 205 *old = dev_graft_qdisc(dev_queue, new); 206 207 if (dev->flags & IFF_UP) 208 dev_activate(dev); 209 210 return 0; 211} 212 213static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) 214{ 215 struct net_device *dev = qdisc_dev(sch); 216 struct mqprio_sched *priv = qdisc_priv(sch); 217 unsigned char *b = skb_tail_pointer(skb); 218 struct tc_mqprio_qopt opt = { 0 }; 219 struct Qdisc *qdisc; 220 unsigned int i; 221 222 sch->q.qlen = 0; 223 memset(&sch->bstats, 0, sizeof(sch->bstats)); 224 memset(&sch->qstats, 0, sizeof(sch->qstats)); 225 226 for (i = 0; i < dev->num_tx_queues; i++) { 227 qdisc = netdev_get_tx_queue(dev, i)->qdisc; 228 spin_lock_bh(qdisc_lock(qdisc)); 229 sch->q.qlen += qdisc->q.qlen; 230 sch->bstats.bytes += qdisc->bstats.bytes; 231 sch->bstats.packets += qdisc->bstats.packets; 232 sch->qstats.qlen += qdisc->qstats.qlen; 233 sch->qstats.backlog += qdisc->qstats.backlog; 234 sch->qstats.drops += qdisc->qstats.drops; 235 sch->qstats.requeues += qdisc->qstats.requeues; 236 sch->qstats.overlimits += qdisc->qstats.overlimits; 237 spin_unlock_bh(qdisc_lock(qdisc)); 238 } 239 240 opt.num_tc = netdev_get_num_tc(dev); 241 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); 242 opt.hw = priv->hw_owned; 243 244 for (i = 0; i < netdev_get_num_tc(dev); i++) { 245 opt.count[i] = dev->tc_to_txq[i].count; 246 opt.offset[i] = dev->tc_to_txq[i].offset; 247 } 248 249 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 250 251 return skb->len; 252nla_put_failure: 253 nlmsg_trim(skb, b); 254 return -1; 255} 256 257static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) 258{ 259 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); 260 261 if (!dev_queue) 262 return NULL; 263 264 return dev_queue->qdisc_sleeping; 265} 266 267static unsigned long mqprio_get(struct Qdisc *sch, u32 classid) 268{ 269 struct net_device *dev = qdisc_dev(sch); 270 unsigned int ntx = TC_H_MIN(classid); 271 272 if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev)) 273 return 0; 274 return ntx; 275} 276 277static void mqprio_put(struct Qdisc *sch, unsigned long cl) 278{ 279} 280 281static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, 282 struct sk_buff *skb, struct tcmsg *tcm) 283{ 284 struct net_device *dev = qdisc_dev(sch); 285 286 if (cl <= netdev_get_num_tc(dev)) { 287 tcm->tcm_parent = TC_H_ROOT; 288 tcm->tcm_info = 0; 289 } else { 290 int i; 291 struct netdev_queue *dev_queue; 292 293 dev_queue = mqprio_queue_get(sch, cl); 294 tcm->tcm_parent = 0; 295 for (i = 0; i < netdev_get_num_tc(dev); i++) { 296 struct netdev_tc_txq tc = dev->tc_to_txq[i]; 297 int q_idx = cl - netdev_get_num_tc(dev); 298 299 if (q_idx > tc.offset && 300 q_idx <= tc.offset + tc.count) { 301 tcm->tcm_parent = 302 TC_H_MAKE(TC_H_MAJ(sch->handle), 303 TC_H_MIN(i + 1)); 304 break; 305 } 306 } 307 tcm->tcm_info = dev_queue->qdisc_sleeping->handle; 308 } 309 tcm->tcm_handle |= TC_H_MIN(cl); 310 return 0; 311} 312 313static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, 314 struct gnet_dump *d) 315 __releases(d->lock) 316 __acquires(d->lock) 317{ 318 struct net_device *dev = qdisc_dev(sch); 319 320 if (cl <= netdev_get_num_tc(dev)) { 321 int i; 322 struct Qdisc *qdisc; 323 struct gnet_stats_queue qstats = {0}; 324 struct gnet_stats_basic_packed bstats = {0}; 325 struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1]; 326 327 /* Drop lock here it will be reclaimed before touching 328 * statistics this is required because the d->lock we 329 * hold here is the look on dev_queue->qdisc_sleeping 330 * also acquired below. 331 */ 332 spin_unlock_bh(d->lock); 333 334 for (i = tc.offset; i < tc.offset + tc.count; i++) { 335 qdisc = netdev_get_tx_queue(dev, i)->qdisc; 336 spin_lock_bh(qdisc_lock(qdisc)); 337 bstats.bytes += qdisc->bstats.bytes; 338 bstats.packets += qdisc->bstats.packets; 339 qstats.qlen += qdisc->qstats.qlen; 340 qstats.backlog += qdisc->qstats.backlog; 341 qstats.drops += qdisc->qstats.drops; 342 qstats.requeues += qdisc->qstats.requeues; 343 qstats.overlimits += qdisc->qstats.overlimits; 344 spin_unlock_bh(qdisc_lock(qdisc)); 345 } 346 /* Reclaim root sleeping lock before completing stats */ 347 spin_lock_bh(d->lock); 348 if (gnet_stats_copy_basic(d, &bstats) < 0 || 349 gnet_stats_copy_queue(d, &qstats) < 0) 350 return -1; 351 } else { 352 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); 353 354 sch = dev_queue->qdisc_sleeping; 355 sch->qstats.qlen = sch->q.qlen; 356 if (gnet_stats_copy_basic(d, &sch->bstats) < 0 || 357 gnet_stats_copy_queue(d, &sch->qstats) < 0) 358 return -1; 359 } 360 return 0; 361} 362 363static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) 364{ 365 struct net_device *dev = qdisc_dev(sch); 366 unsigned long ntx; 367 368 if (arg->stop) 369 return; 370 371 /* Walk hierarchy with a virtual class per tc */ 372 arg->count = arg->skip; 373 for (ntx = arg->skip; 374 ntx < dev->num_tx_queues + netdev_get_num_tc(dev); 375 ntx++) { 376 if (arg->fn(sch, ntx + 1, arg) < 0) { 377 arg->stop = 1; 378 break; 379 } 380 arg->count++; 381 } 382} 383 384static const struct Qdisc_class_ops mqprio_class_ops = { 385 .graft = mqprio_graft, 386 .leaf = mqprio_leaf, 387 .get = mqprio_get, 388 .put = mqprio_put, 389 .walk = mqprio_walk, 390 .dump = mqprio_dump_class, 391 .dump_stats = mqprio_dump_class_stats, 392}; 393 394static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { 395 .cl_ops = &mqprio_class_ops, 396 .id = "mqprio", 397 .priv_size = sizeof(struct mqprio_sched), 398 .init = mqprio_init, 399 .destroy = mqprio_destroy, 400 .attach = mqprio_attach, 401 .dump = mqprio_dump, 402 .owner = THIS_MODULE, 403}; 404 405static int __init mqprio_module_init(void) 406{ 407 return register_qdisc(&mqprio_qdisc_ops); 408} 409 410static void __exit mqprio_module_exit(void) 411{ 412 unregister_qdisc(&mqprio_qdisc_ops); 413} 414 415module_init(mqprio_module_init); 416module_exit(mqprio_module_exit); 417 418MODULE_LICENSE("GPL");