Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: x_tables: don't extract flow keys on early demuxed sks in socket match

Currently in xt_socket, we take advantage of early demuxed sockets
since commit 00028aa37098 ("netfilter: xt_socket: use IP early demux")
in order to avoid a second socket lookup in the fast path, but we
only make partial use of this:

We still unnecessarily parse headers, extract proto, {s,d}addr and
{s,d}ports from the skb data, accessing possible conntrack information,
etc even though we were not even calling into the socket lookup via
xt_socket_get_sock_{v4,v6}() due to skb->sk hit, meaning those cycles
can be spared.

After this patch, we only proceed the slower, manual lookup path
when we have a skb->sk miss, thus time to match verdict for early
demuxed sockets will improve further, which might be i.e. interesting
for use cases such as mentioned in 681f130f39e1 ("netfilter: xt_socket:
add XT_SOCKET_NOWILDCARD flag").

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Daniel Borkmann and committed by
Pablo Neira Ayuso
d64d80a2 0b67c43c

+50 -45
+50 -45
net/netfilter/xt_socket.c
··· 143 143 } 144 144 } 145 145 146 - static bool 147 - socket_match(const struct sk_buff *skb, struct xt_action_param *par, 148 - const struct xt_socket_mtinfo1 *info) 146 + static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb, 147 + const struct net_device *indev) 149 148 { 150 149 const struct iphdr *iph = ip_hdr(skb); 151 - struct udphdr _hdr, *hp = NULL; 152 - struct sock *sk = skb->sk; 153 150 __be32 uninitialized_var(daddr), uninitialized_var(saddr); 154 151 __be16 uninitialized_var(dport), uninitialized_var(sport); 155 152 u8 uninitialized_var(protocol); ··· 156 159 #endif 157 160 158 161 if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { 162 + struct udphdr _hdr, *hp; 163 + 159 164 hp = skb_header_pointer(skb, ip_hdrlen(skb), 160 165 sizeof(_hdr), &_hdr); 161 166 if (hp == NULL) 162 - return false; 167 + return NULL; 163 168 164 169 protocol = iph->protocol; 165 170 saddr = iph->saddr; ··· 171 172 172 173 } else if (iph->protocol == IPPROTO_ICMP) { 173 174 if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, 174 - &sport, &dport)) 175 - return false; 175 + &sport, &dport)) 176 + return NULL; 176 177 } else { 177 - return false; 178 + return NULL; 178 179 } 179 180 180 181 #ifdef XT_SOCKET_HAVE_CONNTRACK 181 - /* Do the lookup with the original socket address in case this is a 182 - * reply packet of an established SNAT-ted connection. */ 183 - 182 + /* Do the lookup with the original socket address in 183 + * case this is a reply packet of an established 184 + * SNAT-ted connection. 185 + */ 184 186 ct = nf_ct_get(skb, &ctinfo); 185 187 if (ct && !nf_ct_is_untracked(ct) && 186 188 ((iph->protocol != IPPROTO_ICMP && ··· 197 197 } 198 198 #endif 199 199 200 + return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, 201 + sport, dport, indev); 202 + } 203 + 204 + static bool 205 + socket_match(const struct sk_buff *skb, struct xt_action_param *par, 206 + const struct xt_socket_mtinfo1 *info) 207 + { 208 + struct sock *sk = skb->sk; 209 + 200 210 if (!sk) 201 - sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol, 202 - saddr, daddr, sport, dport, 203 - par->in); 211 + sk = xt_socket_lookup_slow_v4(skb, par->in); 204 212 if (sk) { 205 213 bool wildcard; 206 214 bool transparent = true; ··· 233 225 sk = NULL; 234 226 } 235 227 236 - pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", 237 - protocol, &saddr, ntohs(sport), 238 - &daddr, ntohs(dport), 239 - &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); 240 - 241 - return (sk != NULL); 228 + return sk != NULL; 242 229 } 243 230 244 231 static bool ··· 330 327 return NULL; 331 328 } 332 329 333 - static bool 334 - socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) 330 + static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, 331 + const struct net_device *indev) 335 332 { 336 - struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb); 337 - struct udphdr _hdr, *hp = NULL; 338 - struct sock *sk = skb->sk; 339 - const struct in6_addr *daddr = NULL, *saddr = NULL; 340 333 __be16 uninitialized_var(dport), uninitialized_var(sport); 341 - int thoff = 0, uninitialized_var(tproto); 342 - const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; 334 + const struct in6_addr *daddr = NULL, *saddr = NULL; 335 + struct ipv6hdr *iph = ipv6_hdr(skb); 336 + int thoff = 0, tproto; 343 337 344 338 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); 345 339 if (tproto < 0) { 346 340 pr_debug("unable to find transport header in IPv6 packet, dropping\n"); 347 - return NF_DROP; 341 + return NULL; 348 342 } 349 343 350 344 if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { 351 - hp = skb_header_pointer(skb, thoff, 352 - sizeof(_hdr), &_hdr); 345 + struct udphdr _hdr, *hp; 346 + 347 + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); 353 348 if (hp == NULL) 354 - return false; 349 + return NULL; 355 350 356 351 saddr = &iph->saddr; 357 352 sport = hp->source; ··· 357 356 dport = hp->dest; 358 357 359 358 } else if (tproto == IPPROTO_ICMPV6) { 359 + struct ipv6hdr ipv6_var; 360 + 360 361 if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, 361 362 &sport, &dport, &ipv6_var)) 362 - return false; 363 + return NULL; 363 364 } else { 364 - return false; 365 + return NULL; 365 366 } 366 367 368 + return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr, 369 + sport, dport, indev); 370 + } 371 + 372 + static bool 373 + socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) 374 + { 375 + const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; 376 + struct sock *sk = skb->sk; 377 + 367 378 if (!sk) 368 - sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto, 369 - saddr, daddr, sport, dport, 370 - par->in); 379 + sk = xt_socket_lookup_slow_v6(skb, par->in); 371 380 if (sk) { 372 381 bool wildcard; 373 382 bool transparent = true; ··· 402 391 sk = NULL; 403 392 } 404 393 405 - pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu " 406 - "(orig %pI6:%hu) sock %p\n", 407 - tproto, saddr, ntohs(sport), 408 - daddr, ntohs(dport), 409 - &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); 410 - 411 - return (sk != NULL); 394 + return sk != NULL; 412 395 } 413 396 #endif 414 397