Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'udp_tunnel-gro-optimizations'

Paolo Abeni says:

====================
udp_tunnel: GRO optimizations

The UDP tunnel GRO stage is source of measurable overhead for workload
based on UDP-encapsulated traffic: each incoming packets requires a full
UDP socket lookup and an indirect call.

In the most common setups a single UDP tunnel device is used. In such
case we can optimize both the lookup and the indirect call.

Patch 1 tracks per netns the active UDP tunnels and replaces the socket
lookup with a single destination port comparison when possible.

Patch 2 tracks the different types of UDP tunnels and replaces the
indirect call with a static one when there is a single UDP tunnel type
active.

I measure ~10% performance improvement in TCP over UDP tunnel stream
tests on top of this series.

v4: https://lore.kernel.org/cover.1741718157.git.pabeni@redhat.com
v3: https://lore.kernel.org/cover.1741632298.git.pabeni@redhat.com
v2: https://lore.kernel.org/cover.1741338765.git.pabeni@redhat.com
v1: https://lore.kernel.org/cover.1741275846.git.pabeni@redhat.com
====================

Link: https://patch.msgid.link/cover.1744040675.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+247 -2
+16
include/linux/udp.h
··· 101 101 102 102 /* Cache friendly copy of sk->sk_peek_off >= 0 */ 103 103 bool peeking_with_offset; 104 + 105 + /* 106 + * Accounting for the tunnel GRO fastpath. 107 + * Unprotected by compilers guard, as it uses space available in 108 + * the last UDP socket cacheline. 109 + */ 110 + struct hlist_node tunnel_list; 104 111 }; 105 112 106 113 #define udp_test_bit(nr, sk) \ ··· 225 218 #endif 226 219 227 220 #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) 221 + 222 + static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6) 223 + { 224 + #if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) 225 + return rcu_dereference(net->ipv4.udp_tunnel_gro[is_ipv6].sk); 226 + #else 227 + return NULL; 228 + #endif 229 + } 228 230 229 231 #endif /* _LINUX_UDP_H */
+11
include/net/netns/ipv4.h
··· 47 47 }; 48 48 #endif 49 49 50 + struct udp_tunnel_gro { 51 + struct sock __rcu *sk; 52 + struct hlist_head list; 53 + }; 54 + 50 55 struct netns_ipv4 { 51 56 /* Cacheline organization can be found documented in 52 57 * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. ··· 89 84 90 85 struct inet_timewait_death_row tcp_death_row; 91 86 struct udp_table *udp_table; 87 + 88 + #if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) 89 + /* Not in a pernet subsys because need to be available at GRO stage */ 90 + struct udp_tunnel_gro udp_tunnel_gro[2]; 91 + #endif 92 92 93 93 #ifdef CONFIG_SYSCTL 94 94 struct ctl_table_header *forw_hdr; ··· 287 277 struct hlist_head *inet_addr_lst; 288 278 struct delayed_work addr_chk_work; 289 279 }; 280 + 290 281 #endif
+1
include/net/udp.h
··· 290 290 struct udp_sock *up = udp_sk(sk); 291 291 292 292 skb_queue_head_init(&up->reader_queue); 293 + INIT_HLIST_NODE(&up->tunnel_list); 293 294 up->forward_threshold = sk->sk_rcvbuf >> 2; 294 295 set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); 295 296 }
+16
include/net/udp_tunnel.h
··· 191 191 } 192 192 #endif 193 193 194 + #if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) 195 + void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add); 196 + void udp_tunnel_update_gro_rcv(struct sock *sk, bool add); 197 + #else 198 + static inline void udp_tunnel_update_gro_lookup(struct net *net, 199 + struct sock *sk, bool add) {} 200 + static inline void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) {} 201 + #endif 202 + 203 + static inline void udp_tunnel_cleanup_gro(struct sock *sk) 204 + { 205 + udp_tunnel_update_gro_rcv(sk, false); 206 + udp_tunnel_update_gro_lookup(sock_net(sk), sk, false); 207 + } 208 + 194 209 static inline void udp_tunnel_encap_enable(struct sock *sk) 195 210 { 196 211 if (udp_test_and_set_bit(ENCAP_ENABLED, sk)) ··· 215 200 if (READ_ONCE(sk->sk_family) == PF_INET6) 216 201 ipv6_stub->udpv6_encap_enable(); 217 202 #endif 203 + udp_tunnel_update_gro_rcv(sk, true); 218 204 udp_encap_enable(); 219 205 } 220 206
+12 -1
net/ipv4/udp.c
··· 2897 2897 if (encap_destroy) 2898 2898 encap_destroy(sk); 2899 2899 } 2900 - if (udp_test_bit(ENCAP_ENABLED, sk)) 2900 + if (udp_test_bit(ENCAP_ENABLED, sk)) { 2901 2901 static_branch_dec(&udp_encap_needed_key); 2902 + udp_tunnel_cleanup_gro(sk); 2903 + } 2902 2904 } 2903 2905 } 2904 2906 ··· 3812 3810 3813 3811 static int __net_init udp_pernet_init(struct net *net) 3814 3812 { 3813 + #if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) 3814 + int i; 3815 + 3816 + /* No tunnel is configured */ 3817 + for (i = 0; i < ARRAY_SIZE(net->ipv4.udp_tunnel_gro); ++i) { 3818 + INIT_HLIST_HEAD(&net->ipv4.udp_tunnel_gro[i].list); 3819 + RCU_INIT_POINTER(net->ipv4.udp_tunnel_gro[i].sk, NULL); 3820 + } 3821 + #endif 3815 3822 udp_sysctl_init(net); 3816 3823 udp_set_table(net); 3817 3824
+171 -1
net/ipv4/udp_offload.c
··· 12 12 #include <net/udp.h> 13 13 #include <net/protocol.h> 14 14 #include <net/inet_common.h> 15 + #include <net/udp_tunnel.h> 16 + 17 + #if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) 18 + 19 + /* 20 + * Dummy GRO tunnel callback, exists mainly to avoid dangling/NULL 21 + * values for the udp tunnel static call. 22 + */ 23 + static struct sk_buff *dummy_gro_rcv(struct sock *sk, 24 + struct list_head *head, 25 + struct sk_buff *skb) 26 + { 27 + NAPI_GRO_CB(skb)->flush = 1; 28 + return NULL; 29 + } 30 + 31 + typedef struct sk_buff *(*udp_tunnel_gro_rcv_t)(struct sock *sk, 32 + struct list_head *head, 33 + struct sk_buff *skb); 34 + 35 + struct udp_tunnel_type_entry { 36 + udp_tunnel_gro_rcv_t gro_receive; 37 + refcount_t count; 38 + }; 39 + 40 + #define UDP_MAX_TUNNEL_TYPES (IS_ENABLED(CONFIG_GENEVE) + \ 41 + IS_ENABLED(CONFIG_VXLAN) * 2 + \ 42 + IS_ENABLED(CONFIG_NET_FOU) * 2 + \ 43 + IS_ENABLED(CONFIG_XFRM) * 2) 44 + 45 + DEFINE_STATIC_CALL(udp_tunnel_gro_rcv, dummy_gro_rcv); 46 + static DEFINE_STATIC_KEY_FALSE(udp_tunnel_static_call); 47 + static struct mutex udp_tunnel_gro_type_lock; 48 + static struct udp_tunnel_type_entry udp_tunnel_gro_types[UDP_MAX_TUNNEL_TYPES]; 49 + static unsigned int udp_tunnel_gro_type_nr; 50 + static DEFINE_SPINLOCK(udp_tunnel_gro_lock); 51 + 52 + void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add) 53 + { 54 + bool is_ipv6 = sk->sk_family == AF_INET6; 55 + struct udp_sock *tup, *up = udp_sk(sk); 56 + struct udp_tunnel_gro *udp_tunnel_gro; 57 + 58 + spin_lock(&udp_tunnel_gro_lock); 59 + udp_tunnel_gro = &net->ipv4.udp_tunnel_gro[is_ipv6]; 60 + if (add) 61 + hlist_add_head(&up->tunnel_list, &udp_tunnel_gro->list); 62 + else if (up->tunnel_list.pprev) 63 + hlist_del_init(&up->tunnel_list); 64 + 65 + if (udp_tunnel_gro->list.first && 66 + !udp_tunnel_gro->list.first->next) { 67 + tup = hlist_entry(udp_tunnel_gro->list.first, struct udp_sock, 68 + tunnel_list); 69 + 70 + rcu_assign_pointer(udp_tunnel_gro->sk, (struct sock *)tup); 71 + } else { 72 + RCU_INIT_POINTER(udp_tunnel_gro->sk, NULL); 73 + } 74 + 75 + spin_unlock(&udp_tunnel_gro_lock); 76 + } 77 + EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_lookup); 78 + 79 + void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) 80 + { 81 + struct udp_tunnel_type_entry *cur = NULL; 82 + struct udp_sock *up = udp_sk(sk); 83 + int i, old_gro_type_nr; 84 + 85 + if (!UDP_MAX_TUNNEL_TYPES || !up->gro_receive) 86 + return; 87 + 88 + mutex_lock(&udp_tunnel_gro_type_lock); 89 + 90 + /* Check if the static call is permanently disabled. */ 91 + if (udp_tunnel_gro_type_nr > UDP_MAX_TUNNEL_TYPES) 92 + goto out; 93 + 94 + for (i = 0; i < udp_tunnel_gro_type_nr; i++) 95 + if (udp_tunnel_gro_types[i].gro_receive == up->gro_receive) 96 + cur = &udp_tunnel_gro_types[i]; 97 + 98 + old_gro_type_nr = udp_tunnel_gro_type_nr; 99 + if (add) { 100 + /* 101 + * Update the matching entry, if found, or add a new one 102 + * if needed 103 + */ 104 + if (cur) { 105 + refcount_inc(&cur->count); 106 + goto out; 107 + } 108 + 109 + if (unlikely(udp_tunnel_gro_type_nr == UDP_MAX_TUNNEL_TYPES)) { 110 + pr_err_once("Too many UDP tunnel types, please increase UDP_MAX_TUNNEL_TYPES\n"); 111 + /* Ensure static call will never be enabled */ 112 + udp_tunnel_gro_type_nr = UDP_MAX_TUNNEL_TYPES + 1; 113 + } else { 114 + cur = &udp_tunnel_gro_types[udp_tunnel_gro_type_nr++]; 115 + refcount_set(&cur->count, 1); 116 + cur->gro_receive = up->gro_receive; 117 + } 118 + } else { 119 + /* 120 + * The stack cleanups only successfully added tunnel, the 121 + * lookup on removal should never fail. 122 + */ 123 + if (WARN_ON_ONCE(!cur)) 124 + goto out; 125 + 126 + if (!refcount_dec_and_test(&cur->count)) 127 + goto out; 128 + 129 + /* Avoid gaps, so that the enable tunnel has always id 0 */ 130 + *cur = udp_tunnel_gro_types[--udp_tunnel_gro_type_nr]; 131 + } 132 + 133 + if (udp_tunnel_gro_type_nr == 1) { 134 + static_call_update(udp_tunnel_gro_rcv, 135 + udp_tunnel_gro_types[0].gro_receive); 136 + static_branch_enable(&udp_tunnel_static_call); 137 + } else if (old_gro_type_nr == 1) { 138 + static_branch_disable(&udp_tunnel_static_call); 139 + static_call_update(udp_tunnel_gro_rcv, dummy_gro_rcv); 140 + } 141 + 142 + out: 143 + mutex_unlock(&udp_tunnel_gro_type_lock); 144 + } 145 + EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_rcv); 146 + 147 + static void udp_tunnel_gro_init(void) 148 + { 149 + mutex_init(&udp_tunnel_gro_type_lock); 150 + } 151 + 152 + static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, 153 + struct list_head *head, 154 + struct sk_buff *skb) 155 + { 156 + if (static_branch_likely(&udp_tunnel_static_call)) { 157 + if (unlikely(gro_recursion_inc_test(skb))) { 158 + NAPI_GRO_CB(skb)->flush |= 1; 159 + return NULL; 160 + } 161 + return static_call(udp_tunnel_gro_rcv)(sk, head, skb); 162 + } 163 + return call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); 164 + } 165 + 166 + #else 167 + 168 + static void udp_tunnel_gro_init(void) {} 169 + 170 + static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, 171 + struct list_head *head, 172 + struct sk_buff *skb) 173 + { 174 + return call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); 175 + } 176 + 177 + #endif 15 178 16 179 static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, 17 180 netdev_features_t features, ··· 785 622 786 623 skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ 787 624 skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); 788 - pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); 625 + pp = udp_tunnel_gro_rcv(sk, head, skb); 789 626 790 627 out: 791 628 skb_gro_flush_final(skb, pp, flush); ··· 798 635 { 799 636 const struct iphdr *iph = skb_gro_network_header(skb); 800 637 struct net *net = dev_net_rcu(skb->dev); 638 + struct sock *sk; 801 639 int iif, sdif; 640 + 641 + sk = udp_tunnel_sk(net, false); 642 + if (sk && dport == htons(sk->sk_num)) 643 + return sk; 802 644 803 645 inet_get_iif_sdif(skb, &iif, &sdif); 804 646 ··· 935 767 .gro_complete = udp4_gro_complete, 936 768 }, 937 769 }; 770 + 771 + udp_tunnel_gro_init(); 938 772 return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP); 939 773 }
+13
net/ipv4/udp_tunnel_core.c
··· 58 58 } 59 59 EXPORT_SYMBOL(udp_sock_create4); 60 60 61 + static bool sk_saddr_any(struct sock *sk) 62 + { 63 + #if IS_ENABLED(CONFIG_IPV6) 64 + return ipv6_addr_any(&sk->sk_v6_rcv_saddr); 65 + #else 66 + return !sk->sk_rcv_saddr; 67 + #endif 68 + } 69 + 61 70 void setup_udp_tunnel_sock(struct net *net, struct socket *sock, 62 71 struct udp_tunnel_sock_cfg *cfg) 63 72 { ··· 89 80 udp_sk(sk)->gro_complete = cfg->gro_complete; 90 81 91 82 udp_tunnel_encap_enable(sk); 83 + 84 + if (!sk->sk_dport && !sk->sk_bound_dev_if && sk_saddr_any(sk) && 85 + sk->sk_kern_sock) 86 + udp_tunnel_update_gro_lookup(net, sk, true); 92 87 } 93 88 EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); 94 89
+2
net/ipv6/udp.c
··· 46 46 #include <net/tcp_states.h> 47 47 #include <net/ip6_checksum.h> 48 48 #include <net/ip6_tunnel.h> 49 + #include <net/udp_tunnel.h> 49 50 #include <net/xfrm.h> 50 51 #include <net/inet_hashtables.h> 51 52 #include <net/inet6_hashtables.h> ··· 1826 1825 if (udp_test_bit(ENCAP_ENABLED, sk)) { 1827 1826 static_branch_dec(&udpv6_encap_needed_key); 1828 1827 udp_encap_disable(); 1828 + udp_tunnel_cleanup_gro(sk); 1829 1829 } 1830 1830 } 1831 1831 }
+5
net/ipv6/udp_offload.c
··· 118 118 { 119 119 const struct ipv6hdr *iph = skb_gro_network_header(skb); 120 120 struct net *net = dev_net_rcu(skb->dev); 121 + struct sock *sk; 121 122 int iif, sdif; 123 + 124 + sk = udp_tunnel_sk(net, true); 125 + if (sk && dport == htons(sk->sk_num)) 126 + return sk; 122 127 123 128 inet6_get_iif_sdif(skb, &iif, &sdif); 124 129