Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'xmit_list'

David Miller says:

====================
net: Make dev_hard_start_xmit() work fundamentally on lists

After this patch set, dev_hard_start_xmit() will work fundemantally on
any and all SKB lists.

This opens the path for a clean implementation of pulling multiple
packets out during qdisc_restart(), and then passing that blob in one
shot to dev_hard_start_xmit().

There were two main architectural blockers to this:

1) The GSO handling, we kept the original GSO head SKB around simply
because dev_hard_start_xmit() had no way to communicate to the
caller how far into the segmented list it was able to go. Now it
can, so the head GSO can be liberated immediately.

All of the special GSO head SKB destructor et al. handling goes
away too.

2) Validate of VLAN, CSUM, and segmentation characteristics was being
performed inside of dev_hard_start_xmit(). If want to truly batch,
we have to let the higher levels to this. In particular, this is
now dequeue_skb()'s job.

And with those two issues out of the way, it should now be trivial to
build experiments on top of this patch set, all of the framework
should be there now. You could do something as simple as:

skb = q->dequeue(q);
if (skb)
skb = validate_xmit_skb(skb, qdisc_dev(q));
if (skb) {
struct sk_buff *new, *head = skb;
int limit = 5;

do {
new = q->dequeue(q);
if (new)
new = validate_xmit_skb(new, qdisc_dev(q));
if (new) {
skb->next = new;
skb = new;
}
} while (new && --limit);
skb = head;
}

inside of the else branch of dequeue_skb().

Signed-off-by: David S. Miller <davem@davemloft.net>

+144 -176
+4 -2
drivers/net/wan/dlci.c
··· 192 192 { 193 193 struct dlci_local *dlp = netdev_priv(dev); 194 194 195 - if (skb) 196 - netdev_start_xmit(skb, dlp->slave); 195 + if (skb) { 196 + struct netdev_queue *txq = skb_get_tx_queue(dev, skb); 197 + netdev_start_xmit(skb, dlp->slave, txq, false); 198 + } 197 199 return NETDEV_TX_OK; 198 200 } 199 201
+14 -6
include/linux/netdevice.h
··· 2827 2827 int dev_change_carrier(struct net_device *, bool new_carrier); 2828 2828 int dev_get_phys_port_id(struct net_device *dev, 2829 2829 struct netdev_phys_port_id *ppid); 2830 - int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2831 - struct netdev_queue *txq); 2830 + struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev); 2831 + struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2832 + struct netdev_queue *txq, int *ret); 2832 2833 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); 2833 2834 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); 2834 2835 bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); ··· 3432 3431 #endif 3433 3432 3434 3433 static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, 3435 - struct sk_buff *skb, struct net_device *dev) 3434 + struct sk_buff *skb, struct net_device *dev, 3435 + bool more) 3436 3436 { 3437 - skb->xmit_more = 0; 3437 + skb->xmit_more = more ? 1 : 0; 3438 3438 return ops->ndo_start_xmit(skb, dev); 3439 3439 } 3440 3440 3441 - static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev) 3441 + static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, 3442 + struct netdev_queue *txq, bool more) 3442 3443 { 3443 3444 const struct net_device_ops *ops = dev->netdev_ops; 3445 + int rc; 3444 3446 3445 - return __netdev_start_xmit(ops, skb, dev); 3447 + rc = __netdev_start_xmit(ops, skb, dev, more); 3448 + if (rc == NETDEV_TX_OK) 3449 + txq_trans_update(txq); 3450 + 3451 + return rc; 3446 3452 } 3447 3453 3448 3454 int netdev_class_create_file_ns(struct class_attribute *class_attr,
+1 -1
net/atm/mpc.c
··· 599 599 } 600 600 601 601 non_ip: 602 - return __netdev_start_xmit(mpc->old_ops, skb, dev); 602 + return __netdev_start_xmit(mpc->old_ops, skb, dev, false); 603 603 } 604 604 605 605 static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
+114 -153
net/core/dev.c
··· 2485 2485 return 0; 2486 2486 } 2487 2487 2488 - struct dev_gso_cb { 2489 - void (*destructor)(struct sk_buff *skb); 2490 - }; 2491 - 2492 - #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) 2493 - 2494 - static void dev_gso_skb_destructor(struct sk_buff *skb) 2495 - { 2496 - struct dev_gso_cb *cb; 2497 - 2498 - kfree_skb_list(skb->next); 2499 - skb->next = NULL; 2500 - 2501 - cb = DEV_GSO_CB(skb); 2502 - if (cb->destructor) 2503 - cb->destructor(skb); 2504 - } 2505 - 2506 - /** 2507 - * dev_gso_segment - Perform emulated hardware segmentation on skb. 2508 - * @skb: buffer to segment 2509 - * @features: device features as applicable to this skb 2510 - * 2511 - * This function segments the given skb and stores the list of segments 2512 - * in skb->next. 2513 - */ 2514 - static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) 2515 - { 2516 - struct sk_buff *segs; 2517 - 2518 - segs = skb_gso_segment(skb, features); 2519 - 2520 - /* Verifying header integrity only. */ 2521 - if (!segs) 2522 - return 0; 2523 - 2524 - if (IS_ERR(segs)) 2525 - return PTR_ERR(segs); 2526 - 2527 - skb->next = segs; 2528 - DEV_GSO_CB(skb)->destructor = skb->destructor; 2529 - skb->destructor = dev_gso_skb_destructor; 2530 - 2531 - return 0; 2532 - } 2533 - 2534 2488 /* If MPLS offload request, verify we are testing hardware MPLS features 2535 2489 * instead of standard features for the netdev. 2536 2490 */ ··· 2553 2599 } 2554 2600 EXPORT_SYMBOL(netif_skb_features); 2555 2601 2556 - int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2557 - struct netdev_queue *txq) 2602 + static int xmit_one(struct sk_buff *skb, struct net_device *dev, 2603 + struct netdev_queue *txq, bool more) 2558 2604 { 2559 - int rc = NETDEV_TX_OK; 2560 - unsigned int skb_len; 2605 + unsigned int len; 2606 + int rc; 2561 2607 2562 - if (likely(!skb->next)) { 2563 - netdev_features_t features; 2608 + if (!list_empty(&ptype_all)) 2609 + dev_queue_xmit_nit(skb, dev); 2564 2610 2565 - /* 2566 - * If device doesn't need skb->dst, release it right now while 2567 - * its hot in this cpu cache 2568 - */ 2569 - if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2570 - skb_dst_drop(skb); 2611 + len = skb->len; 2612 + trace_net_dev_start_xmit(skb, dev); 2613 + rc = netdev_start_xmit(skb, dev, txq, more); 2614 + trace_net_dev_xmit(skb, rc, dev, len); 2571 2615 2572 - features = netif_skb_features(skb); 2573 - 2574 - if (vlan_tx_tag_present(skb) && 2575 - !vlan_hw_offload_capable(features, skb->vlan_proto)) { 2576 - skb = __vlan_put_tag(skb, skb->vlan_proto, 2577 - vlan_tx_tag_get(skb)); 2578 - if (unlikely(!skb)) 2579 - goto out; 2580 - 2581 - skb->vlan_tci = 0; 2582 - } 2583 - 2584 - /* If encapsulation offload request, verify we are testing 2585 - * hardware encapsulation features instead of standard 2586 - * features for the netdev 2587 - */ 2588 - if (skb->encapsulation) 2589 - features &= dev->hw_enc_features; 2590 - 2591 - if (netif_needs_gso(skb, features)) { 2592 - if (unlikely(dev_gso_segment(skb, features))) 2593 - goto out_kfree_skb; 2594 - if (skb->next) 2595 - goto gso; 2596 - } else { 2597 - if (skb_needs_linearize(skb, features) && 2598 - __skb_linearize(skb)) 2599 - goto out_kfree_skb; 2600 - 2601 - /* If packet is not checksummed and device does not 2602 - * support checksumming for this protocol, complete 2603 - * checksumming here. 2604 - */ 2605 - if (skb->ip_summed == CHECKSUM_PARTIAL) { 2606 - if (skb->encapsulation) 2607 - skb_set_inner_transport_header(skb, 2608 - skb_checksum_start_offset(skb)); 2609 - else 2610 - skb_set_transport_header(skb, 2611 - skb_checksum_start_offset(skb)); 2612 - if (!(features & NETIF_F_ALL_CSUM) && 2613 - skb_checksum_help(skb)) 2614 - goto out_kfree_skb; 2615 - } 2616 - } 2617 - 2618 - if (!list_empty(&ptype_all)) 2619 - dev_queue_xmit_nit(skb, dev); 2620 - 2621 - skb_len = skb->len; 2622 - trace_net_dev_start_xmit(skb, dev); 2623 - rc = netdev_start_xmit(skb, dev); 2624 - trace_net_dev_xmit(skb, rc, dev, skb_len); 2625 - if (rc == NETDEV_TX_OK) 2626 - txq_trans_update(txq); 2627 - return rc; 2628 - } 2629 - 2630 - gso: 2631 - do { 2632 - struct sk_buff *nskb = skb->next; 2633 - 2634 - skb->next = nskb->next; 2635 - nskb->next = NULL; 2636 - 2637 - if (!list_empty(&ptype_all)) 2638 - dev_queue_xmit_nit(nskb, dev); 2639 - 2640 - skb_len = nskb->len; 2641 - trace_net_dev_start_xmit(nskb, dev); 2642 - rc = netdev_start_xmit(nskb, dev); 2643 - trace_net_dev_xmit(nskb, rc, dev, skb_len); 2644 - if (unlikely(rc != NETDEV_TX_OK)) { 2645 - if (rc & ~NETDEV_TX_MASK) 2646 - goto out_kfree_gso_skb; 2647 - nskb->next = skb->next; 2648 - skb->next = nskb; 2649 - return rc; 2650 - } 2651 - txq_trans_update(txq); 2652 - if (unlikely(netif_xmit_stopped(txq) && skb->next)) 2653 - return NETDEV_TX_BUSY; 2654 - } while (skb->next); 2655 - 2656 - out_kfree_gso_skb: 2657 - if (likely(skb->next == NULL)) { 2658 - skb->destructor = DEV_GSO_CB(skb)->destructor; 2659 - consume_skb(skb); 2660 - return rc; 2661 - } 2662 - out_kfree_skb: 2663 - kfree_skb(skb); 2664 - out: 2665 2616 return rc; 2666 2617 } 2667 - EXPORT_SYMBOL_GPL(dev_hard_start_xmit); 2618 + 2619 + struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev, 2620 + struct netdev_queue *txq, int *ret) 2621 + { 2622 + struct sk_buff *skb = first; 2623 + int rc = NETDEV_TX_OK; 2624 + 2625 + while (skb) { 2626 + struct sk_buff *next = skb->next; 2627 + 2628 + skb->next = NULL; 2629 + rc = xmit_one(skb, dev, txq, next != NULL); 2630 + if (unlikely(!dev_xmit_complete(rc))) { 2631 + skb->next = next; 2632 + goto out; 2633 + } 2634 + 2635 + skb = next; 2636 + if (netif_xmit_stopped(txq) && skb) { 2637 + rc = NETDEV_TX_BUSY; 2638 + break; 2639 + } 2640 + } 2641 + 2642 + out: 2643 + *ret = rc; 2644 + return skb; 2645 + } 2646 + 2647 + struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features) 2648 + { 2649 + if (vlan_tx_tag_present(skb) && 2650 + !vlan_hw_offload_capable(features, skb->vlan_proto)) { 2651 + skb = __vlan_put_tag(skb, skb->vlan_proto, 2652 + vlan_tx_tag_get(skb)); 2653 + if (skb) 2654 + skb->vlan_tci = 0; 2655 + } 2656 + return skb; 2657 + } 2658 + 2659 + struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) 2660 + { 2661 + netdev_features_t features; 2662 + 2663 + if (skb->next) 2664 + return skb; 2665 + 2666 + /* If device doesn't need skb->dst, release it right now while 2667 + * its hot in this cpu cache 2668 + */ 2669 + if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2670 + skb_dst_drop(skb); 2671 + 2672 + features = netif_skb_features(skb); 2673 + skb = validate_xmit_vlan(skb, features); 2674 + if (unlikely(!skb)) 2675 + goto out_null; 2676 + 2677 + /* If encapsulation offload request, verify we are testing 2678 + * hardware encapsulation features instead of standard 2679 + * features for the netdev 2680 + */ 2681 + if (skb->encapsulation) 2682 + features &= dev->hw_enc_features; 2683 + 2684 + if (netif_needs_gso(skb, features)) { 2685 + struct sk_buff *segs; 2686 + 2687 + segs = skb_gso_segment(skb, features); 2688 + kfree_skb(skb); 2689 + if (IS_ERR(segs)) 2690 + segs = NULL; 2691 + skb = segs; 2692 + } else { 2693 + if (skb_needs_linearize(skb, features) && 2694 + __skb_linearize(skb)) 2695 + goto out_kfree_skb; 2696 + 2697 + /* If packet is not checksummed and device does not 2698 + * support checksumming for this protocol, complete 2699 + * checksumming here. 2700 + */ 2701 + if (skb->ip_summed == CHECKSUM_PARTIAL) { 2702 + if (skb->encapsulation) 2703 + skb_set_inner_transport_header(skb, 2704 + skb_checksum_start_offset(skb)); 2705 + else 2706 + skb_set_transport_header(skb, 2707 + skb_checksum_start_offset(skb)); 2708 + if (!(features & NETIF_F_ALL_CSUM) && 2709 + skb_checksum_help(skb)) 2710 + goto out_kfree_skb; 2711 + } 2712 + } 2713 + 2714 + return skb; 2715 + 2716 + out_kfree_skb: 2717 + kfree_skb(skb); 2718 + out_null: 2719 + return NULL; 2720 + } 2668 2721 2669 2722 static void qdisc_pkt_len_init(struct sk_buff *skb) 2670 2723 { ··· 2883 2922 2884 2923 if (!netif_xmit_stopped(txq)) { 2885 2924 __this_cpu_inc(xmit_recursion); 2886 - rc = dev_hard_start_xmit(skb, dev, txq); 2925 + skb = dev_hard_start_xmit(skb, dev, txq, &rc); 2887 2926 __this_cpu_dec(xmit_recursion); 2888 2927 if (dev_xmit_complete(rc)) { 2889 2928 HARD_TX_UNLOCK(dev, txq);
+1 -3
net/core/netpoll.c
··· 91 91 skb->vlan_tci = 0; 92 92 } 93 93 94 - status = netdev_start_xmit(skb, dev); 95 - if (status == NETDEV_TX_OK) 96 - txq_trans_update(txq); 94 + status = netdev_start_xmit(skb, dev, txq, false); 97 95 98 96 out: 99 97 return status;
+1 -2
net/core/pktgen.c
··· 3335 3335 goto unlock; 3336 3336 } 3337 3337 atomic_inc(&(pkt_dev->skb->users)); 3338 - ret = netdev_start_xmit(pkt_dev->skb, odev); 3338 + ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false); 3339 3339 3340 3340 switch (ret) { 3341 3341 case NETDEV_TX_OK: 3342 - txq_trans_update(txq); 3343 3342 pkt_dev->last_ok = 1; 3344 3343 pkt_dev->sofar++; 3345 3344 pkt_dev->seq_num++;
+2 -5
net/packet/af_packet.c
··· 258 258 local_bh_disable(); 259 259 260 260 HARD_TX_LOCK(dev, txq, smp_processor_id()); 261 - if (!netif_xmit_frozen_or_drv_stopped(txq)) { 262 - ret = netdev_start_xmit(skb, dev); 263 - if (ret == NETDEV_TX_OK) 264 - txq_trans_update(txq); 265 - } 261 + if (!netif_xmit_frozen_or_drv_stopped(txq)) 262 + ret = netdev_start_xmit(skb, dev, txq, false); 266 263 HARD_TX_UNLOCK(dev, txq); 267 264 268 265 local_bh_enable();
+5 -2
net/sched/sch_generic.c
··· 70 70 } else 71 71 skb = NULL; 72 72 } else { 73 - if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) 73 + if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) { 74 74 skb = q->dequeue(q); 75 + if (skb) 76 + skb = validate_xmit_skb(skb, qdisc_dev(q)); 77 + } 75 78 } 76 79 77 80 return skb; ··· 129 126 130 127 HARD_TX_LOCK(dev, txq, smp_processor_id()); 131 128 if (!netif_xmit_frozen_or_stopped(txq)) 132 - ret = dev_hard_start_xmit(skb, dev, txq); 129 + skb = dev_hard_start_xmit(skb, dev, txq, &ret); 133 130 134 131 HARD_TX_UNLOCK(dev, txq); 135 132
+2 -2
net/sched/sch_teql.c
··· 316 316 unsigned int length = qdisc_pkt_len(skb); 317 317 318 318 if (!netif_xmit_frozen_or_stopped(slave_txq) && 319 - netdev_start_xmit(skb, slave) == NETDEV_TX_OK) { 320 - txq_trans_update(slave_txq); 319 + netdev_start_xmit(skb, slave, slave_txq, false) == 320 + NETDEV_TX_OK) { 321 321 __netif_tx_unlock(slave_txq); 322 322 master->slaves = NEXT_SLAVE(q); 323 323 netif_wake_queue(dev);