Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rxrpc: Fix ICMP/ICMP6 error handling

Because rxrpc pretends to be a tunnel on top of a UDP/UDP6 socket, allowing
it to siphon off UDP packets early in the handling of received UDP packets
thereby avoiding the packet going through the UDP receive queue, it doesn't
get ICMP packets through the UDP ->sk_error_report() callback. In fact, it
doesn't appear that there's any usable option for getting hold of ICMP
packets.

Fix this by adding a new UDP encap hook to distribute error messages for
UDP tunnels. If the hook is set, then the tunnel driver will be able to
see ICMP packets. The hook provides the offset into the packet of the UDP
header of the original packet that caused the notification.

An alternative would be to call the ->error_handler() hook - but that
requires that the skbuff be cloned (as ip_icmp_error() or ipv6_cmp_error()
do, though isn't really necessary or desirable in rxrpc's case is we want
to parse them there and then, not queue them).

Changes
=======
ver #3)
- Fixed an uninitialised variable.

ver #2)
- Fixed some missing CONFIG_AF_RXRPC_IPV6 conditionals.

Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook")
Signed-off-by: David Howells <dhowells@redhat.com>

+272 -40
+1
include/linux/udp.h
··· 70 70 * For encapsulation sockets. 71 71 */ 72 72 int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); 73 + void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); 73 74 int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb); 74 75 void (*encap_destroy)(struct sock *sk); 75 76
+4
include/net/udp_tunnel.h
··· 67 67 typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); 68 68 typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, 69 69 struct sk_buff *skb); 70 + typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk, 71 + struct sk_buff *skb, 72 + unsigned int udp_offset); 70 73 typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); 71 74 typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, 72 75 struct list_head *head, ··· 83 80 __u8 encap_type; 84 81 udp_tunnel_encap_rcv_t encap_rcv; 85 82 udp_tunnel_encap_err_lookup_t encap_err_lookup; 83 + udp_tunnel_encap_err_rcv_t encap_err_rcv; 86 84 udp_tunnel_encap_destroy_t encap_destroy; 87 85 udp_tunnel_gro_receive_t gro_receive; 88 86 udp_tunnel_gro_complete_t gro_complete;
+2
net/ipv4/udp.c
··· 783 783 */ 784 784 if (tunnel) { 785 785 /* ...not for tunnels though: we don't have a sending socket */ 786 + if (udp_sk(sk)->encap_err_rcv) 787 + udp_sk(sk)->encap_err_rcv(sk, skb, iph->ihl << 2); 786 788 goto out; 787 789 } 788 790 if (!inet->recverr) {
+1
net/ipv4/udp_tunnel_core.c
··· 72 72 73 73 udp_sk(sk)->encap_type = cfg->encap_type; 74 74 udp_sk(sk)->encap_rcv = cfg->encap_rcv; 75 + udp_sk(sk)->encap_err_rcv = cfg->encap_err_rcv; 75 76 udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup; 76 77 udp_sk(sk)->encap_destroy = cfg->encap_destroy; 77 78 udp_sk(sk)->gro_receive = cfg->gro_receive;
+4 -1
net/ipv6/udp.c
··· 616 616 } 617 617 618 618 /* Tunnels don't have an application socket: don't pass errors back */ 619 - if (tunnel) 619 + if (tunnel) { 620 + if (udp_sk(sk)->encap_err_rcv) 621 + udp_sk(sk)->encap_err_rcv(sk, skb, offset); 620 622 goto out; 623 + } 621 624 622 625 if (!np->recverr) { 623 626 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
+1
net/rxrpc/ar-internal.h
··· 982 982 /* 983 983 * peer_event.c 984 984 */ 985 + void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); 985 986 void rxrpc_error_report(struct sock *); 986 987 void rxrpc_peer_keepalive_worker(struct work_struct *); 987 988
+1
net/rxrpc/local_object.c
··· 137 137 138 138 tuncfg.encap_type = UDP_ENCAP_RXRPC; 139 139 tuncfg.encap_rcv = rxrpc_input_packet; 140 + tuncfg.encap_err_rcv = rxrpc_encap_err_rcv; 140 141 tuncfg.sk_user_data = local; 141 142 setup_udp_tunnel_sock(net, local->socket, &tuncfg); 142 143
+258 -39
net/rxrpc/peer_event.c
··· 16 16 #include <net/sock.h> 17 17 #include <net/af_rxrpc.h> 18 18 #include <net/ip.h> 19 + #include <net/icmp.h> 19 20 #include "ar-internal.h" 20 21 22 + static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int); 21 23 static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); 22 24 static void rxrpc_distribute_error(struct rxrpc_peer *, int, 23 25 enum rxrpc_call_completion); 24 26 25 27 /* 26 - * Find the peer associated with an ICMP packet. 28 + * Find the peer associated with an ICMPv4 packet. 27 29 */ 28 30 static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, 29 - const struct sk_buff *skb, 31 + struct sk_buff *skb, 32 + unsigned int udp_offset, 33 + unsigned int *info, 30 34 struct sockaddr_rxrpc *srx) 35 + { 36 + struct iphdr *ip, *ip0 = ip_hdr(skb); 37 + struct icmphdr *icmp = icmp_hdr(skb); 38 + struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); 39 + 40 + _enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code); 41 + 42 + switch (icmp->type) { 43 + case ICMP_DEST_UNREACH: 44 + *info = ntohs(icmp->un.frag.mtu); 45 + fallthrough; 46 + case ICMP_TIME_EXCEEDED: 47 + case ICMP_PARAMETERPROB: 48 + ip = (struct iphdr *)((void *)icmp + 8); 49 + break; 50 + default: 51 + return NULL; 52 + } 53 + 54 + memset(srx, 0, sizeof(*srx)); 55 + srx->transport_type = local->srx.transport_type; 56 + srx->transport_len = local->srx.transport_len; 57 + srx->transport.family = local->srx.transport.family; 58 + 59 + /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice 60 + * versa? 61 + */ 62 + switch (srx->transport.family) { 63 + case AF_INET: 64 + srx->transport_len = sizeof(srx->transport.sin); 65 + srx->transport.family = AF_INET; 66 + srx->transport.sin.sin_port = udp->dest; 67 + memcpy(&srx->transport.sin.sin_addr, &ip->daddr, 68 + sizeof(struct in_addr)); 69 + break; 70 + 71 + #ifdef CONFIG_AF_RXRPC_IPV6 72 + case AF_INET6: 73 + srx->transport_len = sizeof(srx->transport.sin); 74 + srx->transport.family = AF_INET; 75 + srx->transport.sin.sin_port = udp->dest; 76 + memcpy(&srx->transport.sin.sin_addr, &ip->daddr, 77 + sizeof(struct in_addr)); 78 + break; 79 + #endif 80 + 81 + default: 82 + WARN_ON_ONCE(1); 83 + return NULL; 84 + } 85 + 86 + _net("ICMP {%pISp}", &srx->transport); 87 + return rxrpc_lookup_peer_rcu(local, srx); 88 + } 89 + 90 + #ifdef CONFIG_AF_RXRPC_IPV6 91 + /* 92 + * Find the peer associated with an ICMPv6 packet. 93 + */ 94 + static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local, 95 + struct sk_buff *skb, 96 + unsigned int udp_offset, 97 + unsigned int *info, 98 + struct sockaddr_rxrpc *srx) 99 + { 100 + struct icmp6hdr *icmp = icmp6_hdr(skb); 101 + struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb); 102 + struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); 103 + 104 + _enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code); 105 + 106 + switch (icmp->icmp6_type) { 107 + case ICMPV6_DEST_UNREACH: 108 + *info = ntohl(icmp->icmp6_mtu); 109 + fallthrough; 110 + case ICMPV6_PKT_TOOBIG: 111 + case ICMPV6_TIME_EXCEED: 112 + case ICMPV6_PARAMPROB: 113 + ip = (struct ipv6hdr *)((void *)icmp + 8); 114 + break; 115 + default: 116 + return NULL; 117 + } 118 + 119 + memset(srx, 0, sizeof(*srx)); 120 + srx->transport_type = local->srx.transport_type; 121 + srx->transport_len = local->srx.transport_len; 122 + srx->transport.family = local->srx.transport.family; 123 + 124 + /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice 125 + * versa? 126 + */ 127 + switch (srx->transport.family) { 128 + case AF_INET: 129 + _net("Rx ICMP6 on v4 sock"); 130 + srx->transport_len = sizeof(srx->transport.sin); 131 + srx->transport.family = AF_INET; 132 + srx->transport.sin.sin_port = udp->dest; 133 + memcpy(&srx->transport.sin.sin_addr, 134 + &ip->daddr.s6_addr32[3], sizeof(struct in_addr)); 135 + break; 136 + case AF_INET6: 137 + _net("Rx ICMP6"); 138 + srx->transport.sin.sin_port = udp->dest; 139 + memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr, 140 + sizeof(struct in6_addr)); 141 + break; 142 + default: 143 + WARN_ON_ONCE(1); 144 + return NULL; 145 + } 146 + 147 + _net("ICMP {%pISp}", &srx->transport); 148 + return rxrpc_lookup_peer_rcu(local, srx); 149 + } 150 + #endif /* CONFIG_AF_RXRPC_IPV6 */ 151 + 152 + /* 153 + * Handle an error received on the local endpoint as a tunnel. 154 + */ 155 + void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, 156 + unsigned int udp_offset) 157 + { 158 + struct sock_extended_err ee; 159 + struct sockaddr_rxrpc srx; 160 + struct rxrpc_local *local; 161 + struct rxrpc_peer *peer; 162 + unsigned int info = 0; 163 + int err; 164 + u8 version = ip_hdr(skb)->version; 165 + u8 type = icmp_hdr(skb)->type; 166 + u8 code = icmp_hdr(skb)->code; 167 + 168 + rcu_read_lock(); 169 + local = rcu_dereference_sk_user_data(sk); 170 + if (unlikely(!local)) { 171 + rcu_read_unlock(); 172 + return; 173 + } 174 + 175 + rxrpc_new_skb(skb, rxrpc_skb_received); 176 + 177 + switch (ip_hdr(skb)->version) { 178 + case IPVERSION: 179 + peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset, 180 + &info, &srx); 181 + break; 182 + #ifdef CONFIG_AF_RXRPC_IPV6 183 + case 6: 184 + peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset, 185 + &info, &srx); 186 + break; 187 + #endif 188 + default: 189 + rcu_read_unlock(); 190 + return; 191 + } 192 + 193 + if (peer && !rxrpc_get_peer_maybe(peer)) 194 + peer = NULL; 195 + if (!peer) { 196 + rcu_read_unlock(); 197 + return; 198 + } 199 + 200 + memset(&ee, 0, sizeof(ee)); 201 + 202 + switch (version) { 203 + case IPVERSION: 204 + switch (type) { 205 + case ICMP_DEST_UNREACH: 206 + switch (code) { 207 + case ICMP_FRAG_NEEDED: 208 + rxrpc_adjust_mtu(peer, info); 209 + rcu_read_unlock(); 210 + rxrpc_put_peer(peer); 211 + return; 212 + default: 213 + break; 214 + } 215 + 216 + err = EHOSTUNREACH; 217 + if (code <= NR_ICMP_UNREACH) { 218 + /* Might want to do something different with 219 + * non-fatal errors 220 + */ 221 + //harderr = icmp_err_convert[code].fatal; 222 + err = icmp_err_convert[code].errno; 223 + } 224 + break; 225 + 226 + case ICMP_TIME_EXCEEDED: 227 + err = EHOSTUNREACH; 228 + break; 229 + default: 230 + err = EPROTO; 231 + break; 232 + } 233 + 234 + ee.ee_origin = SO_EE_ORIGIN_ICMP; 235 + ee.ee_type = type; 236 + ee.ee_code = code; 237 + ee.ee_errno = err; 238 + break; 239 + 240 + #ifdef CONFIG_AF_RXRPC_IPV6 241 + case 6: 242 + switch (type) { 243 + case ICMPV6_PKT_TOOBIG: 244 + rxrpc_adjust_mtu(peer, info); 245 + rcu_read_unlock(); 246 + rxrpc_put_peer(peer); 247 + return; 248 + } 249 + 250 + icmpv6_err_convert(type, code, &err); 251 + 252 + if (err == EACCES) 253 + err = EHOSTUNREACH; 254 + 255 + ee.ee_origin = SO_EE_ORIGIN_ICMP6; 256 + ee.ee_type = type; 257 + ee.ee_code = code; 258 + ee.ee_errno = err; 259 + break; 260 + #endif 261 + } 262 + 263 + trace_rxrpc_rx_icmp(peer, &ee, &srx); 264 + 265 + rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR); 266 + rcu_read_unlock(); 267 + rxrpc_put_peer(peer); 268 + } 269 + 270 + /* 271 + * Find the peer associated with a local error. 272 + */ 273 + static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, 274 + const struct sk_buff *skb, 275 + struct sockaddr_rxrpc *srx) 31 276 { 32 277 struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); 33 278 ··· 283 38 srx->transport_len = local->srx.transport_len; 284 39 srx->transport.family = local->srx.transport.family; 285 40 286 - /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice 287 - * versa? 288 - */ 289 41 switch (srx->transport.family) { 290 42 case AF_INET: 291 43 srx->transport_len = sizeof(srx->transport.sin); ··· 346 104 /* 347 105 * Handle an MTU/fragmentation problem. 348 106 */ 349 - static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr) 107 + static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) 350 108 { 351 - u32 mtu = serr->ee.ee_info; 352 - 353 109 _net("Rx ICMP Fragmentation Needed (%d)", mtu); 354 110 355 111 /* wind down the local interface MTU */ ··· 388 148 struct sock_exterr_skb *serr; 389 149 struct sockaddr_rxrpc srx; 390 150 struct rxrpc_local *local; 391 - struct rxrpc_peer *peer; 151 + struct rxrpc_peer *peer = NULL; 392 152 struct sk_buff *skb; 393 153 394 154 rcu_read_lock(); ··· 412 172 } 413 173 rxrpc_new_skb(skb, rxrpc_skb_received); 414 174 serr = SKB_EXT_ERR(skb); 415 - if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { 416 - _leave("UDP empty message"); 417 - rcu_read_unlock(); 418 - rxrpc_free_skb(skb, rxrpc_skb_freed); 419 - return; 175 + 176 + if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) { 177 + peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx); 178 + if (peer && !rxrpc_get_peer_maybe(peer)) 179 + peer = NULL; 180 + if (peer) { 181 + trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); 182 + rxrpc_store_error(peer, serr); 183 + } 420 184 } 421 185 422 - peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx); 423 - if (peer && !rxrpc_get_peer_maybe(peer)) 424 - peer = NULL; 425 - if (!peer) { 426 - rcu_read_unlock(); 427 - rxrpc_free_skb(skb, rxrpc_skb_freed); 428 - _leave(" [no peer]"); 429 - return; 430 - } 431 - 432 - trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); 433 - 434 - if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP && 435 - serr->ee.ee_type == ICMP_DEST_UNREACH && 436 - serr->ee.ee_code == ICMP_FRAG_NEEDED)) { 437 - rxrpc_adjust_mtu(peer, serr); 438 - rcu_read_unlock(); 439 - rxrpc_free_skb(skb, rxrpc_skb_freed); 440 - rxrpc_put_peer(peer); 441 - _leave(" [MTU update]"); 442 - return; 443 - } 444 - 445 - rxrpc_store_error(peer, serr); 446 186 rcu_read_unlock(); 447 187 rxrpc_free_skb(skb, rxrpc_skb_freed); 448 188 rxrpc_put_peer(peer); 449 - 450 189 _leave(""); 451 190 } 452 191