Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: Add GRO support for UDP encapsulating protocols

Add GRO handlers for protocols that do UDP encapsulation, with the intent of
being able to coalesce packets which encapsulate packets belonging to
the same TCP session.

For GRO purposes, the destination UDP port takes the role of the ether type
field in the ethernet header or the next protocol in the IP header.

The UDP GRO handler will only attempt to coalesce packets whose destination
port is registered to have gro handler.

Use a mark on the skb GRO CB data to disallow (flush) running the udp gro receive
code twice on a packet. This solves the problem of udp encapsulated packets whose
inner VM packet is udp and happen to carry a port which has registered offloads.

Signed-off-by: Shlomo Pongratz <shlomop@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Or Gerlitz and committed by
David S. Miller
b582ef09 2618abb7

+156 -1
+9 -1
include/linux/netdevice.h
··· 1675 1675 unsigned long age; 1676 1676 1677 1677 /* Used in ipv6_gro_receive() */ 1678 - int proto; 1678 + u16 proto; 1679 + 1680 + /* Used in udp_gro_receive */ 1681 + u16 udp_mark; 1679 1682 1680 1683 /* used to support CHECKSUM_COMPLETE for tunneling protocols */ 1681 1684 __wsum csum; ··· 1715 1712 __be16 type; /* This is really htons(ether_type). */ 1716 1713 struct offload_callbacks callbacks; 1717 1714 struct list_head list; 1715 + }; 1716 + 1717 + struct udp_offload { 1718 + __be16 port; 1719 + struct offload_callbacks callbacks; 1718 1720 }; 1719 1721 1720 1722 /* often modified stats are per cpu, other are shared (netdev->stats) */
+3
include/net/protocol.h
··· 108 108 void inet_register_protosw(struct inet_protosw *p); 109 109 void inet_unregister_protosw(struct inet_protosw *p); 110 110 111 + int udp_add_offload(struct udp_offload *prot); 112 + void udp_del_offload(struct udp_offload *prot); 113 + 111 114 #if IS_ENABLED(CONFIG_IPV6) 112 115 int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num); 113 116 int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num);
+1
net/core/dev.c
··· 3893 3893 NAPI_GRO_CB(skb)->same_flow = 0; 3894 3894 NAPI_GRO_CB(skb)->flush = 0; 3895 3895 NAPI_GRO_CB(skb)->free = 0; 3896 + NAPI_GRO_CB(skb)->udp_mark = 0; 3896 3897 3897 3898 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); 3898 3899 break;
+143
net/ipv4/udp_offload.c
··· 14 14 #include <net/udp.h> 15 15 #include <net/protocol.h> 16 16 17 + static DEFINE_SPINLOCK(udp_offload_lock); 18 + static struct udp_offload_priv *udp_offload_base __read_mostly; 19 + 20 + struct udp_offload_priv { 21 + struct udp_offload *offload; 22 + struct rcu_head rcu; 23 + struct udp_offload_priv __rcu *next; 24 + }; 25 + 17 26 static int udp4_ufo_send_check(struct sk_buff *skb) 18 27 { 19 28 if (!pskb_may_pull(skb, sizeof(struct udphdr))) ··· 98 89 return segs; 99 90 } 100 91 92 + int udp_add_offload(struct udp_offload *uo) 93 + { 94 + struct udp_offload_priv **head = &udp_offload_base; 95 + struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL); 96 + 97 + if (!new_offload) 98 + return -ENOMEM; 99 + 100 + new_offload->offload = uo; 101 + 102 + spin_lock(&udp_offload_lock); 103 + rcu_assign_pointer(new_offload->next, rcu_dereference(*head)); 104 + rcu_assign_pointer(*head, rcu_dereference(new_offload)); 105 + spin_unlock(&udp_offload_lock); 106 + 107 + return 0; 108 + } 109 + EXPORT_SYMBOL(udp_add_offload); 110 + 111 + static void udp_offload_free_routine(struct rcu_head *head) 112 + { 113 + struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu); 114 + kfree(ou_priv); 115 + } 116 + 117 + void udp_del_offload(struct udp_offload *uo) 118 + { 119 + struct udp_offload_priv __rcu **head = &udp_offload_base; 120 + struct udp_offload_priv *uo_priv; 121 + 122 + spin_lock(&udp_offload_lock); 123 + 124 + uo_priv = rcu_dereference(*head); 125 + for (; uo_priv != NULL; 126 + uo_priv = rcu_dereference(*head)) { 127 + 128 + if (uo_priv->offload == uo) { 129 + rcu_assign_pointer(*head, rcu_dereference(uo_priv->next)); 130 + goto unlock; 131 + } 132 + head = &uo_priv->next; 133 + } 134 + pr_warn("udp_del_offload: didn't find offload for port %d\n", htons(uo->port)); 135 + unlock: 136 + spin_unlock(&udp_offload_lock); 137 + if (uo_priv != NULL) 138 + call_rcu(&uo_priv->rcu, udp_offload_free_routine); 139 + } 140 + EXPORT_SYMBOL(udp_del_offload); 141 + 142 + static struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb) 143 + { 144 + struct udp_offload_priv *uo_priv; 145 + struct sk_buff *p, **pp = NULL; 146 + struct udphdr *uh, *uh2; 147 + unsigned int hlen, off; 148 + int flush = 1; 149 + 150 + if (NAPI_GRO_CB(skb)->udp_mark || 151 + (!skb->encapsulation && skb->ip_summed != CHECKSUM_COMPLETE)) 152 + goto out; 153 + 154 + /* mark that this skb passed once through the udp gro layer */ 155 + NAPI_GRO_CB(skb)->udp_mark = 1; 156 + 157 + off = skb_gro_offset(skb); 158 + hlen = off + sizeof(*uh); 159 + uh = skb_gro_header_fast(skb, off); 160 + if (skb_gro_header_hard(skb, hlen)) { 161 + uh = skb_gro_header_slow(skb, hlen, off); 162 + if (unlikely(!uh)) 163 + goto out; 164 + } 165 + 166 + rcu_read_lock(); 167 + uo_priv = rcu_dereference(udp_offload_base); 168 + for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { 169 + if (uo_priv->offload->port == uh->dest && 170 + uo_priv->offload->callbacks.gro_receive) 171 + goto unflush; 172 + } 173 + goto out_unlock; 174 + 175 + unflush: 176 + flush = 0; 177 + 178 + for (p = *head; p; p = p->next) { 179 + if (!NAPI_GRO_CB(p)->same_flow) 180 + continue; 181 + 182 + uh2 = (struct udphdr *)(p->data + off); 183 + if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) { 184 + NAPI_GRO_CB(p)->same_flow = 0; 185 + continue; 186 + } 187 + } 188 + 189 + skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ 190 + pp = uo_priv->offload->callbacks.gro_receive(head, skb); 191 + 192 + out_unlock: 193 + rcu_read_unlock(); 194 + out: 195 + NAPI_GRO_CB(skb)->flush |= flush; 196 + return pp; 197 + } 198 + 199 + static int udp_gro_complete(struct sk_buff *skb, int nhoff) 200 + { 201 + struct udp_offload_priv *uo_priv; 202 + __be16 newlen = htons(skb->len - nhoff); 203 + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); 204 + int err = -ENOSYS; 205 + 206 + uh->len = newlen; 207 + 208 + rcu_read_lock(); 209 + 210 + uo_priv = rcu_dereference(udp_offload_base); 211 + for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { 212 + if (uo_priv->offload->port == uh->dest && 213 + uo_priv->offload->callbacks.gro_complete) 214 + break; 215 + } 216 + 217 + if (uo_priv != NULL) 218 + err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr)); 219 + 220 + rcu_read_unlock(); 221 + return err; 222 + } 223 + 101 224 static const struct net_offload udpv4_offload = { 102 225 .callbacks = { 103 226 .gso_send_check = udp4_ufo_send_check, 104 227 .gso_segment = udp4_ufo_fragment, 228 + .gro_receive = udp_gro_receive, 229 + .gro_complete = udp_gro_complete, 105 230 }, 106 231 }; 107 232