Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: atlantic: implement UDP GSO offload

atlantic hardware does support UDP hardware segmentation offload.
This allows user to specify one large contiguous buffer with data
which then will be split automagically into multiple UDP packets
of specified size.

Bulk sending of large UDP streams lowers CPU usage and increases
bandwidth.

We did estimations both with udpgso_bench_tx test tool and with modified
iperf3 measurement tool (4 streams, multithread, 200b packet size)
over AQC<->AQC 10G link. Flow control is disabled to prevent RX side
impact on measurements.

No UDP GSO:
iperf3 -c 10.0.1.2 -u -b0 -l 200 -P4 --multithread
UDP GSO:
iperf3 -c 10.0.1.2 -u -b0 -l 12600 --udp-lso 200 -P4 --multithread

Mode CPU iperf speed Line speed Packets per second
-------------------------------------------------------------
NO UDP GSO 350% 3.07 Gbps 3.8 Gbps 1,919,419
SW UDP GSO 200% 5.55 Gbps 6.4 Gbps 3,286,144
HW UDP GSO 90% 6.80 Gbps 8.4 Gbps 4,273,117

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Igor Russkikh and committed by
David S. Miller
822cd114 8009bb19

+55 -32
+15
Documentation/networking/device_drivers/aquantia/atlantic.txt
··· 325 325 Example: 326 326 ethtool -N eth0 flow-type udp4 action 0 loc 32 327 327 328 + UDP GSO hardware offload 329 + --------------------------------- 330 + UDP GSO allows to boost UDP tx rates by offloading UDP headers allocation 331 + into hardware. A special userspace socket option is required for this, 332 + could be validated with /kernel/tools/testing/selftests/net/ 333 + 334 + udpgso_bench_tx -u -4 -D 10.0.1.1 -s 6300 -S 100 335 + 336 + Will cause sending out of 100 byte sized UDP packets formed from single 337 + 6300 bytes user buffer. 338 + 339 + UDP GSO is configured by: 340 + 341 + ethtool -K eth0 tx-udp-segmentation on 342 + 328 343 Private flags (testing) 329 344 --------------------------------- 330 345
+28 -24
drivers/net/ethernet/aquantia/atlantic/aq_nic.c
··· 309 309 self->ndev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM | 310 310 NETIF_F_RXHASH | NETIF_F_SG | 311 311 NETIF_F_LRO | NETIF_F_TSO; 312 + self->ndev->gso_partial_features = NETIF_F_GSO_UDP_L4; 312 313 self->ndev->priv_flags = aq_hw_caps->hw_priv_flags; 313 314 self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 314 315 ··· 473 472 { 474 473 unsigned int nr_frags = skb_shinfo(skb)->nr_frags; 475 474 struct aq_ring_buff_s *first = NULL; 475 + u8 ipver = ip_hdr(skb)->version; 476 476 struct aq_ring_buff_s *dx_buff; 477 477 bool need_context_tag = false; 478 478 unsigned int frag_count = 0U; 479 479 unsigned int ret = 0U; 480 480 unsigned int dx; 481 + u8 l4proto = 0; 482 + 483 + if (ipver == 4) 484 + l4proto = ip_hdr(skb)->protocol; 485 + else if (ipver == 6) 486 + l4proto = ipv6_hdr(skb)->nexthdr; 481 487 482 488 dx = ring->sw_tail; 483 489 dx_buff = &ring->buff_ring[dx]; ··· 492 484 493 485 if (unlikely(skb_is_gso(skb))) { 494 486 dx_buff->mss = skb_shinfo(skb)->gso_size; 495 - dx_buff->is_gso = 1U; 487 + if (l4proto == IPPROTO_TCP) { 488 + dx_buff->is_gso_tcp = 1U; 489 + dx_buff->len_l4 = tcp_hdrlen(skb); 490 + } else if (l4proto == IPPROTO_UDP) { 491 + dx_buff->is_gso_udp = 1U; 492 + dx_buff->len_l4 = sizeof(struct udphdr); 493 + /* UDP GSO Hardware does not replace packet length. */ 494 + udp_hdr(skb)->len = htons(dx_buff->mss + 495 + dx_buff->len_l4); 496 + } else { 497 + WARN_ONCE(true, "Bad GSO mode"); 498 + goto exit; 499 + } 496 500 dx_buff->len_pkt = skb->len; 497 501 dx_buff->len_l2 = ETH_HLEN; 498 - dx_buff->len_l3 = ip_hdrlen(skb); 499 - dx_buff->len_l4 = tcp_hdrlen(skb); 502 + dx_buff->len_l3 = skb_network_header_len(skb); 500 503 dx_buff->eop_index = 0xffffU; 501 - dx_buff->is_ipv6 = 502 - (ip_hdr(skb)->version == 6) ? 1U : 0U; 504 + dx_buff->is_ipv6 = (ipver == 6); 503 505 need_context_tag = true; 504 506 } 505 507 ··· 543 525 ++ret; 544 526 545 527 if (skb->ip_summed == CHECKSUM_PARTIAL) { 546 - dx_buff->is_ip_cso = (htons(ETH_P_IP) == skb->protocol) ? 547 - 1U : 0U; 548 - 549 - if (ip_hdr(skb)->version == 4) { 550 - dx_buff->is_tcp_cso = 551 - (ip_hdr(skb)->protocol == IPPROTO_TCP) ? 552 - 1U : 0U; 553 - dx_buff->is_udp_cso = 554 - (ip_hdr(skb)->protocol == IPPROTO_UDP) ? 555 - 1U : 0U; 556 - } else if (ip_hdr(skb)->version == 6) { 557 - dx_buff->is_tcp_cso = 558 - (ipv6_hdr(skb)->nexthdr == NEXTHDR_TCP) ? 559 - 1U : 0U; 560 - dx_buff->is_udp_cso = 561 - (ipv6_hdr(skb)->nexthdr == NEXTHDR_UDP) ? 562 - 1U : 0U; 563 - } 528 + dx_buff->is_ip_cso = (htons(ETH_P_IP) == skb->protocol); 529 + dx_buff->is_tcp_cso = (l4proto == IPPROTO_TCP); 530 + dx_buff->is_udp_cso = (l4proto == IPPROTO_UDP); 564 531 } 565 532 566 533 for (; nr_frags--; ++frag_count) { ··· 600 597 --ret, dx = aq_ring_next_dx(ring, dx)) { 601 598 dx_buff = &ring->buff_ring[dx]; 602 599 603 - if (!dx_buff->is_gso && !dx_buff->is_vlan && dx_buff->pa) { 600 + if (!(dx_buff->is_gso_tcp || dx_buff->is_gso_udp) && 601 + !dx_buff->is_vlan && dx_buff->pa) { 604 602 if (unlikely(dx_buff->is_sop)) { 605 603 dma_unmap_single(aq_nic_get_dev(self), 606 604 dx_buff->pa,
+4 -3
drivers/net/ethernet/aquantia/atlantic/aq_ring.h
··· 65 65 }; 66 66 union { 67 67 struct { 68 - u16 len; 68 + u32 len:16; 69 69 u32 is_ip_cso:1; 70 70 u32 is_udp_cso:1; 71 71 u32 is_tcp_cso:1; 72 72 u32 is_cso_err:1; 73 73 u32 is_sop:1; 74 74 u32 is_eop:1; 75 - u32 is_gso:1; 75 + u32 is_gso_tcp:1; 76 + u32 is_gso_udp:1; 76 77 u32 is_mapped:1; 77 78 u32 is_cleaned:1; 78 79 u32 is_error:1; 79 80 u32 is_vlan:1; 80 - u32 rsvd3:5; 81 + u32 rsvd3:4; 81 82 u16 eop_index; 82 83 u16 rsvd4; 83 84 };
+1 -1
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
··· 454 454 455 455 buff = &ring->buff_ring[ring->sw_tail]; 456 456 457 - if (buff->is_gso) { 457 + if (buff->is_gso_tcp) { 458 458 txd->ctl |= (buff->len_l3 << 31) | 459 459 (buff->len_l2 << 24) | 460 460 HW_ATL_A0_TXD_CTL_CMD_TCP |
+7 -4
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
··· 43 43 NETIF_F_NTUPLE | \ 44 44 NETIF_F_HW_VLAN_CTAG_FILTER | \ 45 45 NETIF_F_HW_VLAN_CTAG_RX | \ 46 - NETIF_F_HW_VLAN_CTAG_TX, \ 46 + NETIF_F_HW_VLAN_CTAG_TX | \ 47 + NETIF_F_GSO_UDP_L4 | \ 48 + NETIF_F_GSO_PARTIAL, \ 47 49 .hw_priv_flags = IFF_UNICAST_FLT, \ 48 50 .flow_control = true, \ 49 51 .mtu = HW_ATL_B0_MTU_JUMBO, \ ··· 535 533 536 534 buff = &ring->buff_ring[ring->sw_tail]; 537 535 538 - if (buff->is_gso) { 539 - txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TCP; 536 + if (buff->is_gso_tcp || buff->is_gso_udp) { 537 + if (buff->is_gso_tcp) 538 + txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TCP; 540 539 txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC; 541 540 txd->ctl |= (buff->len_l3 << 31) | 542 541 (buff->len_l2 << 24); ··· 557 554 txd->ctl |= buff->vlan_tx_tag << 4; 558 555 is_vlan = true; 559 556 } 560 - if (!buff->is_gso && !buff->is_vlan) { 557 + if (!buff->is_gso_tcp && !buff->is_gso_udp && !buff->is_vlan) { 561 558 buff_pa_len = buff->len; 562 559 563 560 txd->buf_addr = buff->pa;