Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

veth: Rework veth_xdp_rcv_skb in order to accept non-linear skb

Introduce veth_convert_skb_to_xdp_buff routine in order to
convert a non-linear skb into a xdp buffer. If the received skb
is cloned or shared, veth_convert_skb_to_xdp_buff will copy it
in a new skb composed by order-0 pages for the linear and the
fragmented area. Moreover veth_convert_skb_to_xdp_buff guarantees
we have enough headroom for xdp.
This is a preliminary patch to allow attaching xdp programs with frags
support on veth devices.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/8d228b106bc1903571afd1d77e797bffe9a5ea7c.1646989407.git.lorenzo@kernel.org

authored by

Lorenzo Bianconi and committed by
Daniel Borkmann
718a18a0 5142239a

+135 -69
+134 -69
drivers/net/veth.c
··· 433 433 { 434 434 } 435 435 436 - static struct sk_buff *veth_build_skb(void *head, int headroom, int len, 437 - int buflen) 438 - { 439 - struct sk_buff *skb; 440 - 441 - skb = build_skb(head, buflen); 442 - if (!skb) 443 - return NULL; 444 - 445 - skb_reserve(skb, headroom); 446 - skb_put(skb, len); 447 - 448 - return skb; 449 - } 450 - 451 436 static int veth_select_rxq(struct net_device *dev) 452 437 { 453 438 return smp_processor_id() % dev->real_num_rx_queues; ··· 680 695 } 681 696 } 682 697 698 + static void veth_xdp_get(struct xdp_buff *xdp) 699 + { 700 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 701 + int i; 702 + 703 + get_page(virt_to_page(xdp->data)); 704 + if (likely(!xdp_buff_has_frags(xdp))) 705 + return; 706 + 707 + for (i = 0; i < sinfo->nr_frags; i++) 708 + __skb_frag_ref(&sinfo->frags[i]); 709 + } 710 + 711 + static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, 712 + struct xdp_buff *xdp, 713 + struct sk_buff **pskb) 714 + { 715 + struct sk_buff *skb = *pskb; 716 + u32 frame_sz; 717 + 718 + if (skb_shared(skb) || skb_head_is_locked(skb) || 719 + skb_shinfo(skb)->nr_frags) { 720 + u32 size, len, max_head_size, off; 721 + struct sk_buff *nskb; 722 + struct page *page; 723 + int i, head_off; 724 + 725 + /* We need a private copy of the skb and data buffers since 726 + * the ebpf program can modify it. We segment the original skb 727 + * into order-0 pages without linearize it. 728 + * 729 + * Make sure we have enough space for linear and paged area 730 + */ 731 + max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - 732 + VETH_XDP_HEADROOM); 733 + if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size) 734 + goto drop; 735 + 736 + /* Allocate skb head */ 737 + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); 738 + if (!page) 739 + goto drop; 740 + 741 + nskb = build_skb(page_address(page), PAGE_SIZE); 742 + if (!nskb) { 743 + put_page(page); 744 + goto drop; 745 + } 746 + 747 + skb_reserve(nskb, VETH_XDP_HEADROOM); 748 + size = min_t(u32, skb->len, max_head_size); 749 + if (skb_copy_bits(skb, 0, nskb->data, size)) { 750 + consume_skb(nskb); 751 + goto drop; 752 + } 753 + skb_put(nskb, size); 754 + 755 + skb_copy_header(nskb, skb); 756 + head_off = skb_headroom(nskb) - skb_headroom(skb); 757 + skb_headers_offset_update(nskb, head_off); 758 + 759 + /* Allocate paged area of new skb */ 760 + off = size; 761 + len = skb->len - off; 762 + 763 + for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { 764 + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); 765 + if (!page) { 766 + consume_skb(nskb); 767 + goto drop; 768 + } 769 + 770 + size = min_t(u32, len, PAGE_SIZE); 771 + skb_add_rx_frag(nskb, i, page, 0, size, PAGE_SIZE); 772 + if (skb_copy_bits(skb, off, page_address(page), 773 + size)) { 774 + consume_skb(nskb); 775 + goto drop; 776 + } 777 + 778 + len -= size; 779 + off += size; 780 + } 781 + 782 + consume_skb(skb); 783 + skb = nskb; 784 + } else if (skb_headroom(skb) < XDP_PACKET_HEADROOM && 785 + pskb_expand_head(skb, VETH_XDP_HEADROOM, 0, GFP_ATOMIC)) { 786 + goto drop; 787 + } 788 + 789 + /* SKB "head" area always have tailroom for skb_shared_info */ 790 + frame_sz = skb_end_pointer(skb) - skb->head; 791 + frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 792 + xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 793 + xdp_prepare_buff(xdp, skb->head, skb_headroom(skb), 794 + skb_headlen(skb), true); 795 + 796 + if (skb_is_nonlinear(skb)) { 797 + skb_shinfo(skb)->xdp_frags_size = skb->data_len; 798 + xdp_buff_set_frags_flag(xdp); 799 + } else { 800 + xdp_buff_clear_frags_flag(xdp); 801 + } 802 + *pskb = skb; 803 + 804 + return 0; 805 + drop: 806 + consume_skb(skb); 807 + *pskb = NULL; 808 + 809 + return -ENOMEM; 810 + } 811 + 683 812 static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, 684 813 struct sk_buff *skb, 685 814 struct veth_xdp_tx_bq *bq, 686 815 struct veth_stats *stats) 687 816 { 688 - u32 pktlen, headroom, act, metalen, frame_sz; 689 817 void *orig_data, *orig_data_end; 690 818 struct bpf_prog *xdp_prog; 691 - int mac_len, delta, off; 692 819 struct xdp_buff xdp; 820 + u32 act, metalen; 821 + int off; 693 822 694 823 skb_prepare_for_gro(skb); 695 824 ··· 814 715 goto out; 815 716 } 816 717 817 - mac_len = skb->data - skb_mac_header(skb); 818 - pktlen = skb->len + mac_len; 819 - headroom = skb_headroom(skb) - mac_len; 820 - 821 - if (skb_shared(skb) || skb_head_is_locked(skb) || 822 - skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) { 823 - struct sk_buff *nskb; 824 - int size, head_off; 825 - void *head, *start; 826 - struct page *page; 827 - 828 - size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) + 829 - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 830 - if (size > PAGE_SIZE) 831 - goto drop; 832 - 833 - page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); 834 - if (!page) 835 - goto drop; 836 - 837 - head = page_address(page); 838 - start = head + VETH_XDP_HEADROOM; 839 - if (skb_copy_bits(skb, -mac_len, start, pktlen)) { 840 - page_frag_free(head); 841 - goto drop; 842 - } 843 - 844 - nskb = veth_build_skb(head, VETH_XDP_HEADROOM + mac_len, 845 - skb->len, PAGE_SIZE); 846 - if (!nskb) { 847 - page_frag_free(head); 848 - goto drop; 849 - } 850 - 851 - skb_copy_header(nskb, skb); 852 - head_off = skb_headroom(nskb) - skb_headroom(skb); 853 - skb_headers_offset_update(nskb, head_off); 854 - consume_skb(skb); 855 - skb = nskb; 856 - } 857 - 858 - /* SKB "head" area always have tailroom for skb_shared_info */ 859 - frame_sz = skb_end_pointer(skb) - skb->head; 860 - frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 861 - xdp_init_buff(&xdp, frame_sz, &rq->xdp_rxq); 862 - xdp_prepare_buff(&xdp, skb->head, skb->mac_header, pktlen, true); 718 + __skb_push(skb, skb->data - skb_mac_header(skb)); 719 + if (veth_convert_skb_to_xdp_buff(rq, &xdp, &skb)) 720 + goto drop; 863 721 864 722 orig_data = xdp.data; 865 723 orig_data_end = xdp.data_end; ··· 827 771 case XDP_PASS: 828 772 break; 829 773 case XDP_TX: 830 - get_page(virt_to_page(xdp.data)); 774 + veth_xdp_get(&xdp); 831 775 consume_skb(skb); 832 776 xdp.rxq->mem = rq->xdp_mem; 833 777 if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) { ··· 839 783 rcu_read_unlock(); 840 784 goto xdp_xmit; 841 785 case XDP_REDIRECT: 842 - get_page(virt_to_page(xdp.data)); 786 + veth_xdp_get(&xdp); 843 787 consume_skb(skb); 844 788 xdp.rxq->mem = rq->xdp_mem; 845 789 if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) { ··· 862 806 rcu_read_unlock(); 863 807 864 808 /* check if bpf_xdp_adjust_head was used */ 865 - delta = orig_data - xdp.data; 866 - off = mac_len + delta; 809 + off = orig_data - xdp.data; 867 810 if (off > 0) 868 811 __skb_push(skb, off); 869 812 else if (off < 0) 870 813 __skb_pull(skb, -off); 871 - skb->mac_header -= delta; 814 + 815 + skb_reset_mac_header(skb); 872 816 873 817 /* check if bpf_xdp_adjust_tail was used */ 874 818 off = xdp.data_end - orig_data_end; 875 819 if (off != 0) 876 820 __skb_put(skb, off); /* positive on grow, negative on shrink */ 821 + 822 + /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers 823 + * (e.g. bpf_xdp_adjust_tail), we need to update data_len here. 824 + */ 825 + if (xdp_buff_has_frags(&xdp)) 826 + skb->data_len = skb_shinfo(skb)->xdp_frags_size; 827 + else 828 + skb->data_len = 0; 829 + 877 830 skb->protocol = eth_type_trans(skb, rq->dev); 878 831 879 832 metalen = xdp.data - xdp.data_meta; ··· 898 833 return NULL; 899 834 err_xdp: 900 835 rcu_read_unlock(); 901 - page_frag_free(xdp.data); 836 + xdp_return_buff(&xdp); 902 837 xdp_xmit: 903 838 return NULL; 904 839 }
+1
net/core/xdp.c
··· 528 528 out: 529 529 __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); 530 530 } 531 + EXPORT_SYMBOL_GPL(xdp_return_buff); 531 532 532 533 /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ 533 534 void __xdp_release_frame(void *data, struct xdp_mem_info *mem)