Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

icmp: support rfc 4884

Add setsockopt SOL_IP/IP_RECVERR_4884 to return the offset to an
extension struct if present.

ICMP messages may include an extension structure after the original
datagram. RFC 4884 standardized this behavior. It stores the offset
in words to the extension header in u8 icmphdr.un.reserved[1].

The field is valid only for ICMP types destination unreachable, time
exceeded and parameter problem, if length is at least 128 bytes and
entire packet does not exceed 576 bytes.

Return the offset to the start of the extension struct when reading an
ICMP error from the error queue, if it matches the above constraints.

Do not return the raw u8 field. Return the offset from the start of
the user buffer, in bytes. The kernel does not return the network and
transport headers, so subtract those.

Also validate the headers. Return the offset regardless of validation,
as an invalid extension must still not be misinterpreted as part of
the original datagram. Note that !invalid does not imply valid. If
the extension version does not match, no validation can take place,
for instance.

For backward compatibility, make this optional, set by setsockopt
SOL_IP/IP_RECVERR_RFC4884. For API example and feature test, see
github.com/wdebruij/kerneltools/blob/master/tests/recv_icmp_v2.c

For forward compatibility, reserve only setsockopt value 1, leaving
other bits for additional icmp extensions.

Changes
v1->v2:
- convert word offset to byte offset from start of user buffer
- return in ee_data as u8 may be insufficient
- define extension struct and object header structs
- return len only if constraints met
- if returning len, also validate

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Willem de Bruijn and committed by
David S. Miller
eba75c58 930bc4cc

+124 -1
+4
include/linux/icmp.h
··· 15 15 16 16 #include <linux/skbuff.h> 17 17 #include <uapi/linux/icmp.h> 18 + #include <uapi/linux/errqueue.h> 18 19 19 20 static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb) 20 21 { ··· 35 34 36 35 return false; 37 36 } 37 + 38 + void ip_icmp_error_rfc4884(const struct sk_buff *skb, 39 + struct sock_ee_data_rfc4884 *out); 38 40 39 41 #endif /* _LINUX_ICMP_H */
+1
include/net/inet_sock.h
··· 225 225 mc_all:1, 226 226 nodefrag:1; 227 227 __u8 bind_address_no_port:1, 228 + recverr_rfc4884:1, 228 229 defer_connect:1; /* Indicates that fastopen_connect is set 229 230 * and cookie exists so we defer connect 230 231 * until first data frame is written
+13 -1
include/uapi/linux/errqueue.h
··· 5 5 #include <linux/types.h> 6 6 #include <linux/time_types.h> 7 7 8 + /* RFC 4884: return offset to extension struct + validation */ 9 + struct sock_ee_data_rfc4884 { 10 + __u16 len; 11 + __u8 flags; 12 + __u8 reserved; 13 + }; 14 + 8 15 struct sock_extended_err { 9 16 __u32 ee_errno; 10 17 __u8 ee_origin; ··· 19 12 __u8 ee_code; 20 13 __u8 ee_pad; 21 14 __u32 ee_info; 22 - __u32 ee_data; 15 + union { 16 + __u32 ee_data; 17 + struct sock_ee_data_rfc4884 ee_rfc4884; 18 + }; 23 19 }; 24 20 25 21 #define SO_EE_ORIGIN_NONE 0 ··· 40 30 41 31 #define SO_EE_CODE_TXTIME_INVALID_PARAM 1 42 32 #define SO_EE_CODE_TXTIME_MISSED 2 33 + 34 + #define SO_EE_RFC4884_FLAG_INVALID 1 43 35 44 36 /** 45 37 * struct scm_timestamping - timestamps exposed through cmsg
+22
include/uapi/linux/icmp.h
··· 19 19 #define _UAPI_LINUX_ICMP_H 20 20 21 21 #include <linux/types.h> 22 + #include <asm/byteorder.h> 22 23 23 24 #define ICMP_ECHOREPLY 0 /* Echo Reply */ 24 25 #define ICMP_DEST_UNREACH 3 /* Destination Unreachable */ ··· 96 95 __u32 data; 97 96 }; 98 97 98 + /* RFC 4884 extension struct: one per message */ 99 + struct icmp_ext_hdr { 100 + #if defined(__LITTLE_ENDIAN_BITFIELD) 101 + __u8 reserved1:4, 102 + version:4; 103 + #elif defined(__BIG_ENDIAN_BITFIELD) 104 + __u8 version:4, 105 + reserved1:4; 106 + #else 107 + #error "Please fix <asm/byteorder.h>" 108 + #endif 109 + __u8 reserved2; 110 + __sum16 checksum; 111 + }; 112 + 113 + /* RFC 4884 extension object header: one for each object */ 114 + struct icmp_extobj_hdr { 115 + __be16 length; 116 + __u8 class_num; 117 + __u8 class_type; 118 + }; 99 119 100 120 #endif /* _UAPI_LINUX_ICMP_H */
+1
include/uapi/linux/in.h
··· 123 123 #define IP_CHECKSUM 23 124 124 #define IP_BIND_ADDRESS_NO_PORT 24 125 125 #define IP_RECVFRAGSIZE 25 126 + #define IP_RECVERR_RFC4884 26 126 127 127 128 /* IP_MTU_DISCOVER values */ 128 129 #define IP_PMTUDISC_DONT 0 /* Never send DF frames */
+71
net/ipv4/icmp.c
··· 1116 1116 goto drop; 1117 1117 } 1118 1118 1119 + static bool ip_icmp_error_rfc4884_validate(const struct sk_buff *skb, int off) 1120 + { 1121 + struct icmp_extobj_hdr *objh, _objh; 1122 + struct icmp_ext_hdr *exth, _exth; 1123 + u16 olen; 1124 + 1125 + exth = skb_header_pointer(skb, off, sizeof(_exth), &_exth); 1126 + if (!exth) 1127 + return false; 1128 + if (exth->version != 2) 1129 + return true; 1130 + 1131 + if (exth->checksum && 1132 + csum_fold(skb_checksum(skb, off, skb->len - off, 0))) 1133 + return false; 1134 + 1135 + off += sizeof(_exth); 1136 + while (off < skb->len) { 1137 + objh = skb_header_pointer(skb, off, sizeof(_objh), &_objh); 1138 + if (!objh) 1139 + return false; 1140 + 1141 + olen = ntohs(objh->length); 1142 + if (olen < sizeof(_objh)) 1143 + return false; 1144 + 1145 + off += olen; 1146 + if (off > skb->len) 1147 + return false; 1148 + } 1149 + 1150 + return true; 1151 + } 1152 + 1153 + void ip_icmp_error_rfc4884(const struct sk_buff *skb, 1154 + struct sock_ee_data_rfc4884 *out) 1155 + { 1156 + int hlen, off; 1157 + 1158 + switch (icmp_hdr(skb)->type) { 1159 + case ICMP_DEST_UNREACH: 1160 + case ICMP_TIME_EXCEEDED: 1161 + case ICMP_PARAMETERPROB: 1162 + break; 1163 + default: 1164 + return; 1165 + } 1166 + 1167 + /* outer headers up to inner iph. skb->data is at inner payload */ 1168 + hlen = -skb_transport_offset(skb) - sizeof(struct icmphdr); 1169 + 1170 + /* per rfc 791: maximum packet length of 576 bytes */ 1171 + if (hlen + skb->len > 576) 1172 + return; 1173 + 1174 + /* per rfc 4884: minimal datagram length of 128 bytes */ 1175 + off = icmp_hdr(skb)->un.reserved[1] * sizeof(u32); 1176 + if (off < 128) 1177 + return; 1178 + 1179 + /* kernel has stripped headers: return payload offset in bytes */ 1180 + off -= hlen; 1181 + if (off + sizeof(struct icmp_ext_hdr) > skb->len) 1182 + return; 1183 + 1184 + out->len = off; 1185 + 1186 + if (!ip_icmp_error_rfc4884_validate(skb, off)) 1187 + out->flags |= SO_EE_RFC4884_FLAG_INVALID; 1188 + } 1189 + 1119 1190 int icmp_err(struct sk_buff *skb, u32 info) 1120 1191 { 1121 1192 struct iphdr *iph = (struct iphdr *)skb->data;
+12
net/ipv4/ip_sockglue.c
··· 411 411 serr->port = port; 412 412 413 413 if (skb_pull(skb, payload - skb->data)) { 414 + if (inet_sk(sk)->recverr_rfc4884) 415 + ip_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); 416 + 414 417 skb_reset_transport_header(skb); 415 418 if (sock_queue_err_skb(sk, skb) == 0) 416 419 return; ··· 907 904 case IP_RECVORIGDSTADDR: 908 905 case IP_CHECKSUM: 909 906 case IP_RECVFRAGSIZE: 907 + case IP_RECVERR_RFC4884: 910 908 if (optlen >= sizeof(int)) { 911 909 if (get_user(val, (int __user *) optval)) 912 910 return -EFAULT; ··· 1066 1062 inet->recverr = !!val; 1067 1063 if (!val) 1068 1064 skb_queue_purge(&sk->sk_error_queue); 1065 + break; 1066 + case IP_RECVERR_RFC4884: 1067 + if (val < 0 || val > 1) 1068 + goto e_inval; 1069 + inet->recverr_rfc4884 = !!val; 1069 1070 break; 1070 1071 case IP_MULTICAST_TTL: 1071 1072 if (sk->sk_type == SOCK_STREAM) ··· 1619 1610 } 1620 1611 case IP_RECVERR: 1621 1612 val = inet->recverr; 1613 + break; 1614 + case IP_RECVERR_RFC4884: 1615 + val = inet->recverr_rfc4884; 1622 1616 break; 1623 1617 case IP_MULTICAST_TTL: 1624 1618 val = inet->mc_ttl;