Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: tproxy: do not assign timewait sockets to skb->sk

Assigning a socket in timewait state to skb->sk can trigger
kernel oops, e.g. in nfnetlink_log, which does:

if (skb->sk) {
read_lock_bh(&skb->sk->sk_callback_lock);
if (skb->sk->sk_socket && skb->sk->sk_socket->file) ...

in the timewait case, accessing sk->sk_callback_lock and sk->sk_socket
is invalid.

Either all of these spots will need to add a test for sk->sk_state != TCP_TIME_WAIT,
or xt_TPROXY must not assign a timewait socket to skb->sk.

This does the latter.

If a TW socket is found, assign the tproxy nfmark, but skip the skb->sk assignment,
thus mimicking behaviour of a '-m socket .. -j MARK/ACCEPT' re-routing rule.

The 'SYN to TW socket' case is left unchanged -- we try to redirect to the
listener socket.

Cc: Balazs Scheidler <bazsi@balabit.hu>
Cc: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Florian Westphal <fwestphal@astaro.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>

authored by

Florian Westphal and committed by
Patrick McHardy
d503b30b de9963f0

+43 -29
+1 -11
include/net/netfilter/nf_tproxy_core.h
··· 201 201 } 202 202 #endif 203 203 204 - static inline void 205 - nf_tproxy_put_sock(struct sock *sk) 206 - { 207 - /* TIME_WAIT inet sockets have to be handled differently */ 208 - if ((sk->sk_protocol == IPPROTO_TCP) && (sk->sk_state == TCP_TIME_WAIT)) 209 - inet_twsk_put(inet_twsk(sk)); 210 - else 211 - sock_put(sk); 212 - } 213 - 214 204 /* assign a socket to the skb -- consumes sk */ 215 - int 205 + void 216 206 nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk); 217 207 218 208 #endif
+11 -14
net/netfilter/nf_tproxy_core.c
··· 28 28 skb->destructor = NULL; 29 29 30 30 if (sk) 31 - nf_tproxy_put_sock(sk); 31 + sock_put(sk); 32 32 } 33 33 34 34 /* consumes sk */ 35 - int 35 + void 36 36 nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) 37 37 { 38 - bool transparent = (sk->sk_state == TCP_TIME_WAIT) ? 39 - inet_twsk(sk)->tw_transparent : 40 - inet_sk(sk)->transparent; 38 + /* assigning tw sockets complicates things; most 39 + * skb->sk->X checks would have to test sk->sk_state first */ 40 + if (sk->sk_state == TCP_TIME_WAIT) { 41 + inet_twsk_put(inet_twsk(sk)); 42 + return; 43 + } 41 44 42 - if (transparent) { 43 - skb_orphan(skb); 44 - skb->sk = sk; 45 - skb->destructor = nf_tproxy_destructor; 46 - return 1; 47 - } else 48 - nf_tproxy_put_sock(sk); 49 - 50 - return 0; 45 + skb_orphan(skb); 46 + skb->sk = sk; 47 + skb->destructor = nf_tproxy_destructor; 51 48 } 52 49 EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); 53 50
+20 -2
net/netfilter/xt_TPROXY.c
··· 33 33 #include <net/netfilter/nf_tproxy_core.h> 34 34 #include <linux/netfilter/xt_TPROXY.h> 35 35 36 + static bool tproxy_sk_is_transparent(struct sock *sk) 37 + { 38 + if (sk->sk_state != TCP_TIME_WAIT) { 39 + if (inet_sk(sk)->transparent) 40 + return true; 41 + sock_put(sk); 42 + } else { 43 + if (inet_twsk(sk)->tw_transparent) 44 + return true; 45 + inet_twsk_put(inet_twsk(sk)); 46 + } 47 + return false; 48 + } 49 + 36 50 static inline __be32 37 51 tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) 38 52 { ··· 155 141 skb->dev, NFT_LOOKUP_LISTENER); 156 142 157 143 /* NOTE: assign_sock consumes our sk reference */ 158 - if (sk && nf_tproxy_assign_sock(skb, sk)) { 144 + if (sk && tproxy_sk_is_transparent(sk)) { 159 145 /* This should be in a separate target, but we don't do multiple 160 146 targets on the same rule yet */ 161 147 skb->mark = (skb->mark & ~mark_mask) ^ mark_value; ··· 163 149 pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n", 164 150 iph->protocol, &iph->daddr, ntohs(hp->dest), 165 151 &laddr, ntohs(lport), skb->mark); 152 + 153 + nf_tproxy_assign_sock(skb, sk); 166 154 return NF_ACCEPT; 167 155 } 168 156 ··· 322 306 par->in, NFT_LOOKUP_LISTENER); 323 307 324 308 /* NOTE: assign_sock consumes our sk reference */ 325 - if (sk && nf_tproxy_assign_sock(skb, sk)) { 309 + if (sk && tproxy_sk_is_transparent(sk)) { 326 310 /* This should be in a separate target, but we don't do multiple 327 311 targets on the same rule yet */ 328 312 skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; ··· 330 314 pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n", 331 315 tproto, &iph->saddr, ntohs(hp->source), 332 316 laddr, ntohs(lport), skb->mark); 317 + 318 + nf_tproxy_assign_sock(skb, sk); 333 319 return NF_ACCEPT; 334 320 } 335 321
+11 -2
net/netfilter/xt_socket.c
··· 35 35 #include <net/netfilter/nf_conntrack.h> 36 36 #endif 37 37 38 + static void 39 + xt_socket_put_sk(struct sock *sk) 40 + { 41 + if (sk->sk_state == TCP_TIME_WAIT) 42 + inet_twsk_put(inet_twsk(sk)); 43 + else 44 + sock_put(sk); 45 + } 46 + 38 47 static int 39 48 extract_icmp4_fields(const struct sk_buff *skb, 40 49 u8 *protocol, ··· 173 164 (sk->sk_state == TCP_TIME_WAIT && 174 165 inet_twsk(sk)->tw_transparent)); 175 166 176 - nf_tproxy_put_sock(sk); 167 + xt_socket_put_sk(sk); 177 168 178 169 if (wildcard || !transparent) 179 170 sk = NULL; ··· 307 298 (sk->sk_state == TCP_TIME_WAIT && 308 299 inet_twsk(sk)->tw_transparent)); 309 300 310 - nf_tproxy_put_sock(sk); 301 + xt_socket_put_sk(sk); 311 302 312 303 if (wildcard || !transparent) 313 304 sk = NULL;