Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/xsk: transmit and receive multi-buffer packets

Add the ability to send and receive packets that are larger than the
size of a umem frame, using the AF_XDP /XDP multi-buffer
support. There are three pieces of code that need to be changed to
achieve this: the Rx path, the Tx path, and the validation logic.

Both the Rx path and Tx could only deal with a single fragment per
packet. The Tx path is extended with a new function called
pkt_nb_frags() that can be used to retrieve the number of fragments a
packet will consume. We then create these many fragments in a loop and
fill the N-1 first ones to the max size limit to use the buffer space
efficiently, and the Nth one with whatever data that is left. This
goes on until we have filled in at the most BATCH_SIZE worth of
descriptors and fragments. If we detect that the next packet would
lead to BATCH_SIZE number of fragments sent being exceeded, we do not
send this packet and finish the batch. This packet is instead sent in
the next iteration of BATCH_SIZE fragments.

For Rx, we loop over all fragments we receive as usual, but for every
descriptor that we receive we call a new validation function called
is_frag_valid() to validate the consistency of this fragment. The code
then checks if the packet continues in the next frame. If so, it loops
over the next packet and performs the same validation. once we have
received the last fragment of the packet we also call the function
is_pkt_valid() to validate the packet as a whole. If we get to the end
of the batch and we are not at the end of the current packet, we back
out the partial packet and end the loop. Once we get into the receive
loop next time, we start over from the beginning of that packet. This
so the code becomes simpler at the cost of some performance.

The validation function is_frag_valid() checks that the sequence and
packet numbers are correct at the start and end of each fragment.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20230719132421.584801-19-maciej.fijalkowski@intel.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Magnus Karlsson and committed by
Alexei Starovoitov
17f1034d 49ca37d0

+143 -38
+3
tools/include/uapi/linux/if_xdp.h
··· 106 106 __u32 options; 107 107 }; 108 108 109 + /* Flag indicating packet constitutes of multiple buffers*/ 110 + #define XDP_PKT_CONTD (1 << 0) 111 + 109 112 /* UMEM descriptor is __u64 */ 110 113 111 114 #endif /* _LINUX_IF_XDP_H */
+138 -37
tools/testing/selftests/bpf/xskxceiver.c
··· 533 533 return pkt_stream; 534 534 } 535 535 536 + static bool pkt_continues(const struct xdp_desc *desc) 537 + { 538 + return desc->options & XDP_PKT_CONTD; 539 + } 540 + 536 541 static u32 ceil_u32(u32 a, u32 b) 537 542 { 538 543 return (a + b - 1) / b; ··· 554 549 { 555 550 pkt->offset = offset; 556 551 pkt->len = len; 557 - if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom) 552 + if (len > MAX_ETH_JUMBO_SIZE) 558 553 pkt->valid = false; 559 554 else 560 555 pkt->valid = true; ··· 640 635 if (!pkt->valid) 641 636 return pkt->offset; 642 637 return pkt->offset + umem_alloc_buffer(umem); 638 + } 639 + 640 + static void pkt_stream_cancel(struct pkt_stream *pkt_stream) 641 + { 642 + pkt_stream->current_pkt_nb--; 643 643 } 644 644 645 645 static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb, ··· 775 765 return true; 776 766 } 777 767 778 - static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) 768 + static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb, 769 + u32 bytes_processed) 779 770 { 780 - void *data = xsk_umem__get_data(buffer, addr); 781 - u32 seqnum, pkt_data; 771 + u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum; 772 + void *data = xsk_umem__get_data(umem->buffer, addr); 782 773 783 - if (!pkt) { 784 - ksft_print_msg("[%s] too many packets received\n", __func__); 774 + addr -= umem->base_addr; 775 + 776 + if (addr >= umem->num_frames * umem->frame_size || 777 + addr + len > umem->num_frames * umem->frame_size) { 778 + ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len); 779 + return false; 780 + } 781 + if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) { 782 + ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len); 783 + return false; 784 + } 785 + 786 + pkt_data = data; 787 + if (!bytes_processed) { 788 + pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data); 789 + len -= PKT_HDR_SIZE; 790 + } else { 791 + bytes_processed -= PKT_HDR_SIZE; 792 + } 793 + 794 + expected_seqnum = bytes_processed / sizeof(*pkt_data); 795 + seqnum = ntohl(*pkt_data) & 0xffff; 796 + pkt_nb = ntohl(*pkt_data) >> 16; 797 + 798 + if (expected_pkt_nb != pkt_nb) { 799 + ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n", 800 + __func__, expected_pkt_nb, pkt_nb); 801 + goto error; 802 + } 803 + if (expected_seqnum != seqnum) { 804 + ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n", 805 + __func__, expected_seqnum, seqnum); 785 806 goto error; 786 807 } 787 808 788 - if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) { 789 - /* Do not try to verify packets that are smaller than minimum size. */ 790 - return true; 791 - } 792 - 793 - if (pkt->len != len) { 794 - ksft_print_msg("[%s] expected length [%d], got length [%d]\n", 795 - __func__, pkt->len, len); 796 - goto error; 797 - } 798 - 799 - pkt_data = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); 800 - seqnum = pkt_data >> 16; 801 - 802 - if (pkt->pkt_nb != seqnum) { 803 - ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", 804 - __func__, pkt->pkt_nb, seqnum); 809 + words_to_end = len / sizeof(*pkt_data) - 1; 810 + pkt_data += words_to_end; 811 + seqnum = ntohl(*pkt_data) & 0xffff; 812 + expected_seqnum += words_to_end; 813 + if (expected_seqnum != seqnum) { 814 + ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n", 815 + __func__, expected_seqnum, seqnum); 805 816 goto error; 806 817 } 807 818 808 819 return true; 809 820 810 821 error: 811 - pkt_dump(data, len, true); 822 + pkt_dump(data, len, !bytes_processed); 812 823 return false; 824 + } 825 + 826 + static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) 827 + { 828 + if (!pkt) { 829 + ksft_print_msg("[%s] too many packets received\n", __func__); 830 + return false; 831 + } 832 + 833 + if (pkt->len != len) { 834 + ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n", 835 + __func__, pkt->len, len); 836 + pkt_dump(xsk_umem__get_data(buffer, addr), len, true); 837 + return false; 838 + } 839 + 840 + return true; 813 841 } 814 842 815 843 static void kick_tx(struct xsk_socket_info *xsk) ··· 902 854 { 903 855 struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; 904 856 struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream; 905 - u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0; 906 857 struct xsk_socket_info *xsk = test->ifobj_rx->xsk; 858 + u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0; 907 859 struct ifobject *ifobj = test->ifobj_rx; 908 860 struct xsk_umem_info *umem = xsk->umem; 909 861 struct pkt *pkt; ··· 916 868 917 869 pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); 918 870 while (pkt) { 871 + u32 frags_processed = 0, nb_frags = 0, pkt_len = 0; 872 + u64 first_addr; 873 + 919 874 ret = gettimeofday(&tv_now, NULL); 920 875 if (ret) 921 876 exit_with_error(errno); ··· 964 913 } 965 914 } 966 915 967 - for (i = 0; i < rcvd; i++) { 916 + while (frags_processed < rcvd) { 968 917 const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); 969 918 u64 addr = desc->addr, orig; 970 919 971 920 orig = xsk_umem__extract_addr(addr); 972 921 addr = xsk_umem__add_offset_to_addr(addr); 973 922 974 - if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) || 923 + if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) || 975 924 !is_offset_correct(umem, pkt, addr) || 976 925 (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr))) 977 926 return TEST_FAILURE; 978 927 928 + if (!nb_frags++) 929 + first_addr = addr; 930 + frags_processed++; 931 + pkt_len += desc->len; 979 932 if (ifobj->use_fill_ring) 980 933 *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; 934 + 935 + if (pkt_continues(desc)) 936 + continue; 937 + 938 + /* The complete packet has been received */ 939 + if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) || 940 + !is_offset_correct(umem, pkt, addr)) 941 + return TEST_FAILURE; 942 + 981 943 pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); 944 + nb_frags = 0; 945 + pkt_len = 0; 946 + } 947 + 948 + if (nb_frags) { 949 + /* In the middle of a packet. Start over from beginning of packet. */ 950 + idx_rx -= nb_frags; 951 + xsk_ring_cons__cancel(&xsk->rx, nb_frags); 952 + if (ifobj->use_fill_ring) { 953 + idx_fq -= nb_frags; 954 + xsk_ring_prod__cancel(&umem->fq, nb_frags); 955 + } 956 + frags_processed -= nb_frags; 982 957 } 983 958 984 959 if (ifobj->use_fill_ring) 985 - xsk_ring_prod__submit(&umem->fq, rcvd); 960 + xsk_ring_prod__submit(&umem->fq, frags_processed); 986 961 if (ifobj->release_rx) 987 - xsk_ring_cons__release(&xsk->rx, rcvd); 962 + xsk_ring_cons__release(&xsk->rx, frags_processed); 988 963 989 964 pthread_mutex_lock(&pacing_mutex); 990 965 pkts_in_flight -= pkts_sent; ··· 1023 946 1024 947 static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout) 1025 948 { 949 + u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len; 950 + struct pkt_stream *pkt_stream = ifobject->pkt_stream; 1026 951 struct xsk_socket_info *xsk = ifobject->xsk; 1027 952 struct xsk_umem_info *umem = ifobject->umem; 1028 - u32 i, idx = 0, valid_pkts = 0, buffer_len; 1029 953 bool use_poll = ifobject->use_poll; 1030 954 int ret; 1031 955 1032 - buffer_len = pkt_get_buffer_len(umem, ifobject->pkt_stream->max_pkt_len); 956 + buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len); 1033 957 /* pkts_in_flight might be negative if many invalid packets are sent */ 1034 958 if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) { 1035 959 kick_tx(xsk); ··· 1061 983 } 1062 984 1063 985 for (i = 0; i < BATCH_SIZE; i++) { 1064 - struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); 1065 - struct pkt *pkt = pkt_stream_get_next_tx_pkt(ifobject->pkt_stream); 986 + struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream); 987 + u32 nb_frags, bytes_written = 0; 1066 988 1067 989 if (!pkt) 1068 990 break; 1069 991 1070 - tx_desc->addr = pkt_get_addr(pkt, umem); 1071 - tx_desc->len = pkt->len; 992 + nb_frags = pkt_nb_frags(umem->frame_size, pkt); 993 + if (nb_frags > BATCH_SIZE - i) { 994 + pkt_stream_cancel(pkt_stream); 995 + xsk_ring_prod__cancel(&xsk->tx, BATCH_SIZE - i); 996 + break; 997 + } 998 + 1072 999 if (pkt->valid) { 1073 1000 valid_pkts++; 1074 - pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, 0); 1001 + valid_frags += nb_frags; 1002 + } 1003 + 1004 + while (nb_frags--) { 1005 + struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); 1006 + 1007 + tx_desc->addr = pkt_get_addr(pkt, ifobject->umem); 1008 + if (nb_frags) { 1009 + tx_desc->len = umem->frame_size; 1010 + tx_desc->options = XDP_PKT_CONTD; 1011 + i++; 1012 + } else { 1013 + tx_desc->len = pkt->len - bytes_written; 1014 + tx_desc->options = 0; 1015 + } 1016 + if (pkt->valid) 1017 + pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, 1018 + bytes_written); 1019 + bytes_written += tx_desc->len; 1075 1020 } 1076 1021 } 1077 1022 ··· 1103 1002 pthread_mutex_unlock(&pacing_mutex); 1104 1003 1105 1004 xsk_ring_prod__submit(&xsk->tx, i); 1106 - xsk->outstanding_tx += valid_pkts; 1005 + xsk->outstanding_tx += valid_frags; 1107 1006 1108 1007 if (use_poll) { 1109 1008 ret = poll(fds, 1, POLL_TMOUT);
+2 -1
tools/testing/selftests/bpf/xskxceiver.h
··· 38 38 #define MAX_TEARDOWN_ITER 10 39 39 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ 40 40 #define MIN_PKT_SIZE 64 41 + #define MAX_ETH_JUMBO_SIZE 9000 41 42 #define USLEEP_MAX 10000 42 43 #define SOCK_RECONF_CTR 10 43 44 #define BATCH_SIZE 64 ··· 48 47 #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) 49 48 #define RX_FULL_RXQSIZE 32 50 49 #define UMEM_HEADROOM_TEST_SIZE 128 51 - #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) 50 + #define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1) 52 51 #define HUGEPAGE_SIZE (2 * 1024 * 1024) 53 52 #define PKT_DUMP_NB_TO_PRINT 16 54 53