Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfrm: add espintcp (RFC 8229)

TCP encapsulation of IKE and IPsec messages (RFC 8229) is implemented
as a TCP ULP, overriding in particular the sendmsg and recvmsg
operations. A Stream Parser is used to extract messages out of the TCP
stream using the first 2 bytes as length marker. Received IKE messages
are put on "ike_queue", waiting to be dequeued by the custom recvmsg
implementation. Received ESP messages are sent to XFRM, like with UDP
encapsulation.

Some of this code is taken from the original submission by Herbert
Xu. Currently, only IPv4 is supported, like for UDP encapsulation.

Co-developed-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>

authored by

Sabrina Dubroca and committed by
Steffen Klassert
e27cca96 eecd227a

+760 -3
+39
include/net/espintcp.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _NET_ESPINTCP_H 3 + #define _NET_ESPINTCP_H 4 + 5 + #include <net/strparser.h> 6 + #include <linux/skmsg.h> 7 + 8 + void __init espintcp_init(void); 9 + 10 + int espintcp_push_skb(struct sock *sk, struct sk_buff *skb); 11 + int espintcp_queue_out(struct sock *sk, struct sk_buff *skb); 12 + bool tcp_is_ulp_esp(struct sock *sk); 13 + 14 + struct espintcp_msg { 15 + struct sk_buff *skb; 16 + struct sk_msg skmsg; 17 + int offset; 18 + int len; 19 + }; 20 + 21 + struct espintcp_ctx { 22 + struct strparser strp; 23 + struct sk_buff_head ike_queue; 24 + struct sk_buff_head out_queue; 25 + struct espintcp_msg partial; 26 + void (*saved_data_ready)(struct sock *sk); 27 + void (*saved_write_space)(struct sock *sk); 28 + struct work_struct work; 29 + bool tx_running; 30 + }; 31 + 32 + static inline struct espintcp_ctx *espintcp_getctx(const struct sock *sk) 33 + { 34 + struct inet_connection_sock *icsk = inet_csk(sk); 35 + 36 + /* RCU is only needed for diag */ 37 + return (__force void *)icsk->icsk_ulp_data; 38 + } 39 + #endif
+1
include/net/xfrm.h
··· 193 193 194 194 /* Data for encapsulator */ 195 195 struct xfrm_encap_tmpl *encap; 196 + struct sock __rcu *encap_sk; 196 197 197 198 /* Data for care-of address */ 198 199 xfrm_address_t *coaddr;
+1
include/uapi/linux/udp.h
··· 42 42 #define UDP_ENCAP_GTP0 4 /* GSM TS 09.60 */ 43 43 #define UDP_ENCAP_GTP1U 5 /* 3GPP TS 29.060 */ 44 44 #define UDP_ENCAP_RXRPC 6 45 + #define TCP_ENCAP_ESPINTCP 7 /* Yikes, this is really xfrm encap types. */ 45 46 46 47 #endif /* _UAPI_LINUX_UDP_H */
+11
net/ipv4/Kconfig
··· 378 378 379 379 If unsure, say N. 380 380 381 + config INET_ESPINTCP 382 + bool "IP: ESP in TCP encapsulation (RFC 8229)" 383 + depends on XFRM && INET_ESP 384 + select STREAM_PARSER 385 + select NET_SOCK_MSG 386 + help 387 + Support for RFC 8229 encapsulation of ESP and IKE over 388 + TCP/IPv4 sockets. 389 + 390 + If unsure, say N. 391 + 381 392 config INET_IPCOMP 382 393 tristate "IP: IPComp transformation" 383 394 select INET_XFRM_TUNNEL
+188 -3
net/ipv4/esp4.c
··· 18 18 #include <net/icmp.h> 19 19 #include <net/protocol.h> 20 20 #include <net/udp.h> 21 + #include <net/tcp.h> 22 + #include <net/espintcp.h> 21 23 22 24 #include <linux/highmem.h> 23 25 ··· 119 117 put_page(sg_page(sg)); 120 118 } 121 119 120 + #ifdef CONFIG_INET_ESPINTCP 121 + struct esp_tcp_sk { 122 + struct sock *sk; 123 + struct rcu_head rcu; 124 + }; 125 + 126 + static void esp_free_tcp_sk(struct rcu_head *head) 127 + { 128 + struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); 129 + 130 + sock_put(esk->sk); 131 + kfree(esk); 132 + } 133 + 134 + static struct sock *esp_find_tcp_sk(struct xfrm_state *x) 135 + { 136 + struct xfrm_encap_tmpl *encap = x->encap; 137 + struct esp_tcp_sk *esk; 138 + __be16 sport, dport; 139 + struct sock *nsk; 140 + struct sock *sk; 141 + 142 + sk = rcu_dereference(x->encap_sk); 143 + if (sk && sk->sk_state == TCP_ESTABLISHED) 144 + return sk; 145 + 146 + spin_lock_bh(&x->lock); 147 + sport = encap->encap_sport; 148 + dport = encap->encap_dport; 149 + nsk = rcu_dereference_protected(x->encap_sk, 150 + lockdep_is_held(&x->lock)); 151 + if (sk && sk == nsk) { 152 + esk = kmalloc(sizeof(*esk), GFP_ATOMIC); 153 + if (!esk) { 154 + spin_unlock_bh(&x->lock); 155 + return ERR_PTR(-ENOMEM); 156 + } 157 + RCU_INIT_POINTER(x->encap_sk, NULL); 158 + esk->sk = sk; 159 + call_rcu(&esk->rcu, esp_free_tcp_sk); 160 + } 161 + spin_unlock_bh(&x->lock); 162 + 163 + sk = inet_lookup_established(xs_net(x), &tcp_hashinfo, x->id.daddr.a4, 164 + dport, x->props.saddr.a4, sport, 0); 165 + if (!sk) 166 + return ERR_PTR(-ENOENT); 167 + 168 + if (!tcp_is_ulp_esp(sk)) { 169 + sock_put(sk); 170 + return ERR_PTR(-EINVAL); 171 + } 172 + 173 + spin_lock_bh(&x->lock); 174 + nsk = rcu_dereference_protected(x->encap_sk, 175 + lockdep_is_held(&x->lock)); 176 + if (encap->encap_sport != sport || 177 + encap->encap_dport != dport) { 178 + sock_put(sk); 179 + sk = nsk ?: ERR_PTR(-EREMCHG); 180 + } else if (sk == nsk) { 181 + sock_put(sk); 182 + } else { 183 + rcu_assign_pointer(x->encap_sk, sk); 184 + } 185 + spin_unlock_bh(&x->lock); 186 + 187 + return sk; 188 + } 189 + 190 + static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) 191 + { 192 + struct sock *sk; 193 + int err; 194 + 195 + rcu_read_lock(); 196 + 197 + sk = esp_find_tcp_sk(x); 198 + err = PTR_ERR_OR_ZERO(sk); 199 + if (err) 200 + goto out; 201 + 202 + bh_lock_sock(sk); 203 + if (sock_owned_by_user(sk)) 204 + err = espintcp_queue_out(sk, skb); 205 + else 206 + err = espintcp_push_skb(sk, skb); 207 + bh_unlock_sock(sk); 208 + 209 + out: 210 + rcu_read_unlock(); 211 + return err; 212 + } 213 + 214 + static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk, 215 + struct sk_buff *skb) 216 + { 217 + struct dst_entry *dst = skb_dst(skb); 218 + struct xfrm_state *x = dst->xfrm; 219 + 220 + return esp_output_tcp_finish(x, skb); 221 + } 222 + 223 + static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) 224 + { 225 + int err; 226 + 227 + local_bh_disable(); 228 + err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb); 229 + local_bh_enable(); 230 + 231 + /* EINPROGRESS just happens to do the right thing. It 232 + * actually means that the skb has been consumed and 233 + * isn't coming back. 234 + */ 235 + return err ?: -EINPROGRESS; 236 + } 237 + #else 238 + static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) 239 + { 240 + kfree_skb(skb); 241 + 242 + return -EOPNOTSUPP; 243 + } 244 + #endif 245 + 122 246 static void esp_output_done(struct crypto_async_request *base, int err) 123 247 { 124 248 struct sk_buff *skb = base->data; ··· 275 147 secpath_reset(skb); 276 148 xfrm_dev_resume(skb); 277 149 } else { 278 - xfrm_output_resume(skb, err); 150 + if (!err && 151 + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) 152 + esp_output_tail_tcp(x, skb); 153 + else 154 + xfrm_output_resume(skb, err); 279 155 } 280 156 } 281 157 ··· 368 236 unsigned int len; 369 237 370 238 len = skb->len + esp->tailen - skb_transport_offset(skb); 371 - if (len + sizeof(struct iphdr) >= IP_MAX_MTU) 239 + if (len + sizeof(struct iphdr) > IP_MAX_MTU) 372 240 return ERR_PTR(-EMSGSIZE); 373 241 374 242 uh = (struct udphdr *)esp->esph; ··· 387 255 388 256 return (struct ip_esp_hdr *)(uh + 1); 389 257 } 258 + 259 + #ifdef CONFIG_INET_ESPINTCP 260 + static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x, 261 + struct sk_buff *skb, 262 + struct esp_info *esp) 263 + { 264 + __be16 *lenp = (void *)esp->esph; 265 + struct ip_esp_hdr *esph; 266 + unsigned int len; 267 + struct sock *sk; 268 + 269 + len = skb->len + esp->tailen - skb_transport_offset(skb); 270 + if (len > IP_MAX_MTU) 271 + return ERR_PTR(-EMSGSIZE); 272 + 273 + rcu_read_lock(); 274 + sk = esp_find_tcp_sk(x); 275 + rcu_read_unlock(); 276 + 277 + if (IS_ERR(sk)) 278 + return ERR_CAST(sk); 279 + 280 + *lenp = htons(len); 281 + esph = (struct ip_esp_hdr *)(lenp + 1); 282 + 283 + return esph; 284 + } 285 + #else 286 + static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x, 287 + struct sk_buff *skb, 288 + struct esp_info *esp) 289 + { 290 + return ERR_PTR(-EOPNOTSUPP); 291 + } 292 + #endif 390 293 391 294 static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb, 392 295 struct esp_info *esp) ··· 443 276 case UDP_ENCAP_ESPINUDP_NON_IKE: 444 277 esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport); 445 278 break; 279 + case TCP_ENCAP_ESPINTCP: 280 + esph = esp_output_tcp_encap(x, skb, esp); 281 + break; 446 282 } 447 283 448 284 if (IS_ERR(esph)) ··· 466 296 struct sk_buff *trailer; 467 297 int tailen = esp->tailen; 468 298 469 - /* this is non-NULL only with UDP Encapsulation */ 299 + /* this is non-NULL only with TCP/UDP Encapsulation */ 470 300 if (x->encap) { 471 301 int err = esp_output_encap(x, skb, esp); 472 302 ··· 661 491 if (sg != dsg) 662 492 esp_ssg_unref(x, tmp); 663 493 494 + if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) 495 + err = esp_output_tail_tcp(x, skb); 496 + 664 497 error_free: 665 498 kfree(tmp); 666 499 error: ··· 790 617 791 618 if (x->encap) { 792 619 struct xfrm_encap_tmpl *encap = x->encap; 620 + struct tcphdr *th = (void *)(skb_network_header(skb) + ihl); 793 621 struct udphdr *uh = (void *)(skb_network_header(skb) + ihl); 794 622 __be16 source; 795 623 796 624 switch (x->encap->encap_type) { 625 + case TCP_ENCAP_ESPINTCP: 626 + source = th->source; 627 + break; 797 628 case UDP_ENCAP_ESPINUDP: 798 629 case UDP_ENCAP_ESPINUDP_NON_IKE: 799 630 source = uh->source; ··· 1194 1017 case UDP_ENCAP_ESPINUDP_NON_IKE: 1195 1018 x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); 1196 1019 break; 1020 + #ifdef CONFIG_INET_ESPINTCP 1021 + case TCP_ENCAP_ESPINTCP: 1022 + /* only the length field, TCP encap is done by 1023 + * the socket 1024 + */ 1025 + x->props.header_len += 2; 1026 + break; 1027 + #endif 1197 1028 } 1198 1029 } 1199 1030
+1
net/xfrm/Makefile
··· 11 11 obj-$(CONFIG_XFRM_USER) += xfrm_user.o 12 12 obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o 13 13 obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o 14 + obj-$(CONFIG_INET_ESPINTCP) += espintcp.o
+509
net/xfrm/espintcp.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <net/tcp.h> 3 + #include <net/strparser.h> 4 + #include <net/xfrm.h> 5 + #include <net/esp.h> 6 + #include <net/espintcp.h> 7 + #include <linux/skmsg.h> 8 + #include <net/inet_common.h> 9 + 10 + static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, 11 + struct sock *sk) 12 + { 13 + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf || 14 + !sk_rmem_schedule(sk, skb, skb->truesize)) { 15 + kfree_skb(skb); 16 + return; 17 + } 18 + 19 + skb_set_owner_r(skb, sk); 20 + 21 + memset(skb->cb, 0, sizeof(skb->cb)); 22 + skb_queue_tail(&ctx->ike_queue, skb); 23 + ctx->saved_data_ready(sk); 24 + } 25 + 26 + static void handle_esp(struct sk_buff *skb, struct sock *sk) 27 + { 28 + skb_reset_transport_header(skb); 29 + memset(skb->cb, 0, sizeof(skb->cb)); 30 + 31 + rcu_read_lock(); 32 + skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); 33 + local_bh_disable(); 34 + xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); 35 + local_bh_enable(); 36 + rcu_read_unlock(); 37 + } 38 + 39 + static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb) 40 + { 41 + struct espintcp_ctx *ctx = container_of(strp, struct espintcp_ctx, 42 + strp); 43 + struct strp_msg *rxm = strp_msg(skb); 44 + u32 nonesp_marker; 45 + int err; 46 + 47 + err = skb_copy_bits(skb, rxm->offset + 2, &nonesp_marker, 48 + sizeof(nonesp_marker)); 49 + if (err < 0) { 50 + kfree_skb(skb); 51 + return; 52 + } 53 + 54 + /* remove header, leave non-ESP marker/SPI */ 55 + if (!__pskb_pull(skb, rxm->offset + 2)) { 56 + kfree_skb(skb); 57 + return; 58 + } 59 + 60 + if (pskb_trim(skb, rxm->full_len - 2) != 0) { 61 + kfree_skb(skb); 62 + return; 63 + } 64 + 65 + if (nonesp_marker == 0) 66 + handle_nonesp(ctx, skb, strp->sk); 67 + else 68 + handle_esp(skb, strp->sk); 69 + } 70 + 71 + static int espintcp_parse(struct strparser *strp, struct sk_buff *skb) 72 + { 73 + struct strp_msg *rxm = strp_msg(skb); 74 + __be16 blen; 75 + u16 len; 76 + int err; 77 + 78 + if (skb->len < rxm->offset + 2) 79 + return 0; 80 + 81 + err = skb_copy_bits(skb, rxm->offset, &blen, sizeof(blen)); 82 + if (err < 0) 83 + return err; 84 + 85 + len = be16_to_cpu(blen); 86 + if (len < 6) 87 + return -EINVAL; 88 + 89 + return len; 90 + } 91 + 92 + static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 93 + int nonblock, int flags, int *addr_len) 94 + { 95 + struct espintcp_ctx *ctx = espintcp_getctx(sk); 96 + struct sk_buff *skb; 97 + int err = 0; 98 + int copied; 99 + int off = 0; 100 + 101 + flags |= nonblock ? MSG_DONTWAIT : 0; 102 + 103 + skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, NULL, &off, &err); 104 + if (!skb) 105 + return err; 106 + 107 + copied = len; 108 + if (copied > skb->len) 109 + copied = skb->len; 110 + else if (copied < skb->len) 111 + msg->msg_flags |= MSG_TRUNC; 112 + 113 + err = skb_copy_datagram_msg(skb, 0, msg, copied); 114 + if (unlikely(err)) { 115 + kfree_skb(skb); 116 + return err; 117 + } 118 + 119 + if (flags & MSG_TRUNC) 120 + copied = skb->len; 121 + kfree_skb(skb); 122 + return copied; 123 + } 124 + 125 + int espintcp_queue_out(struct sock *sk, struct sk_buff *skb) 126 + { 127 + struct espintcp_ctx *ctx = espintcp_getctx(sk); 128 + 129 + if (skb_queue_len(&ctx->out_queue) >= netdev_max_backlog) 130 + return -ENOBUFS; 131 + 132 + __skb_queue_tail(&ctx->out_queue, skb); 133 + 134 + return 0; 135 + } 136 + EXPORT_SYMBOL_GPL(espintcp_queue_out); 137 + 138 + /* espintcp length field is 2B and length includes the length field's size */ 139 + #define MAX_ESPINTCP_MSG (((1 << 16) - 1) - 2) 140 + 141 + static int espintcp_sendskb_locked(struct sock *sk, struct espintcp_msg *emsg, 142 + int flags) 143 + { 144 + do { 145 + int ret; 146 + 147 + ret = skb_send_sock_locked(sk, emsg->skb, 148 + emsg->offset, emsg->len); 149 + if (ret < 0) 150 + return ret; 151 + 152 + emsg->len -= ret; 153 + emsg->offset += ret; 154 + } while (emsg->len > 0); 155 + 156 + kfree_skb(emsg->skb); 157 + memset(emsg, 0, sizeof(*emsg)); 158 + 159 + return 0; 160 + } 161 + 162 + static int espintcp_sendskmsg_locked(struct sock *sk, 163 + struct espintcp_msg *emsg, int flags) 164 + { 165 + struct sk_msg *skmsg = &emsg->skmsg; 166 + struct scatterlist *sg; 167 + int done = 0; 168 + int ret; 169 + 170 + flags |= MSG_SENDPAGE_NOTLAST; 171 + sg = &skmsg->sg.data[skmsg->sg.start]; 172 + do { 173 + size_t size = sg->length - emsg->offset; 174 + int offset = sg->offset + emsg->offset; 175 + struct page *p; 176 + 177 + emsg->offset = 0; 178 + 179 + if (sg_is_last(sg)) 180 + flags &= ~MSG_SENDPAGE_NOTLAST; 181 + 182 + p = sg_page(sg); 183 + retry: 184 + ret = do_tcp_sendpages(sk, p, offset, size, flags); 185 + if (ret < 0) { 186 + emsg->offset = offset - sg->offset; 187 + skmsg->sg.start += done; 188 + return ret; 189 + } 190 + 191 + if (ret != size) { 192 + offset += ret; 193 + size -= ret; 194 + goto retry; 195 + } 196 + 197 + done++; 198 + put_page(p); 199 + sk_mem_uncharge(sk, sg->length); 200 + sg = sg_next(sg); 201 + } while (sg); 202 + 203 + memset(emsg, 0, sizeof(*emsg)); 204 + 205 + return 0; 206 + } 207 + 208 + static int espintcp_push_msgs(struct sock *sk) 209 + { 210 + struct espintcp_ctx *ctx = espintcp_getctx(sk); 211 + struct espintcp_msg *emsg = &ctx->partial; 212 + int err; 213 + 214 + if (!emsg->len) 215 + return 0; 216 + 217 + if (ctx->tx_running) 218 + return -EAGAIN; 219 + ctx->tx_running = 1; 220 + 221 + if (emsg->skb) 222 + err = espintcp_sendskb_locked(sk, emsg, 0); 223 + else 224 + err = espintcp_sendskmsg_locked(sk, emsg, 0); 225 + if (err == -EAGAIN) { 226 + ctx->tx_running = 0; 227 + return 0; 228 + } 229 + if (!err) 230 + memset(emsg, 0, sizeof(*emsg)); 231 + 232 + ctx->tx_running = 0; 233 + 234 + return err; 235 + } 236 + 237 + int espintcp_push_skb(struct sock *sk, struct sk_buff *skb) 238 + { 239 + struct espintcp_ctx *ctx = espintcp_getctx(sk); 240 + struct espintcp_msg *emsg = &ctx->partial; 241 + unsigned int len; 242 + int offset; 243 + 244 + if (sk->sk_state != TCP_ESTABLISHED) { 245 + kfree_skb(skb); 246 + return -ECONNRESET; 247 + } 248 + 249 + offset = skb_transport_offset(skb); 250 + len = skb->len - offset; 251 + 252 + espintcp_push_msgs(sk); 253 + 254 + if (emsg->len) { 255 + kfree_skb(skb); 256 + return -ENOBUFS; 257 + } 258 + 259 + skb_set_owner_w(skb, sk); 260 + 261 + emsg->offset = offset; 262 + emsg->len = len; 263 + emsg->skb = skb; 264 + 265 + espintcp_push_msgs(sk); 266 + 267 + return 0; 268 + } 269 + EXPORT_SYMBOL_GPL(espintcp_push_skb); 270 + 271 + static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) 272 + { 273 + long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 274 + struct espintcp_ctx *ctx = espintcp_getctx(sk); 275 + struct espintcp_msg *emsg = &ctx->partial; 276 + struct iov_iter pfx_iter; 277 + struct kvec pfx_iov = {}; 278 + size_t msglen = size + 2; 279 + char buf[2] = {0}; 280 + int err, end; 281 + 282 + if (msg->msg_flags) 283 + return -EOPNOTSUPP; 284 + 285 + if (size > MAX_ESPINTCP_MSG) 286 + return -EMSGSIZE; 287 + 288 + if (msg->msg_controllen) 289 + return -EOPNOTSUPP; 290 + 291 + lock_sock(sk); 292 + 293 + err = espintcp_push_msgs(sk); 294 + if (err < 0) { 295 + err = -ENOBUFS; 296 + goto unlock; 297 + } 298 + 299 + sk_msg_init(&emsg->skmsg); 300 + while (1) { 301 + /* only -ENOMEM is possible since we don't coalesce */ 302 + err = sk_msg_alloc(sk, &emsg->skmsg, msglen, 0); 303 + if (!err) 304 + break; 305 + 306 + err = sk_stream_wait_memory(sk, &timeo); 307 + if (err) 308 + goto fail; 309 + } 310 + 311 + *((__be16 *)buf) = cpu_to_be16(msglen); 312 + pfx_iov.iov_base = buf; 313 + pfx_iov.iov_len = sizeof(buf); 314 + iov_iter_kvec(&pfx_iter, WRITE, &pfx_iov, 1, pfx_iov.iov_len); 315 + 316 + err = sk_msg_memcopy_from_iter(sk, &pfx_iter, &emsg->skmsg, 317 + pfx_iov.iov_len); 318 + if (err < 0) 319 + goto fail; 320 + 321 + err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, &emsg->skmsg, size); 322 + if (err < 0) 323 + goto fail; 324 + 325 + end = emsg->skmsg.sg.end; 326 + emsg->len = size; 327 + sk_msg_iter_var_prev(end); 328 + sg_mark_end(sk_msg_elem(&emsg->skmsg, end)); 329 + 330 + tcp_rate_check_app_limited(sk); 331 + 332 + err = espintcp_push_msgs(sk); 333 + /* this message could be partially sent, keep it */ 334 + if (err < 0) 335 + goto unlock; 336 + release_sock(sk); 337 + 338 + return size; 339 + 340 + fail: 341 + sk_msg_free(sk, &emsg->skmsg); 342 + memset(emsg, 0, sizeof(*emsg)); 343 + unlock: 344 + release_sock(sk); 345 + return err; 346 + } 347 + 348 + static struct proto espintcp_prot __ro_after_init; 349 + static struct proto_ops espintcp_ops __ro_after_init; 350 + 351 + static void espintcp_data_ready(struct sock *sk) 352 + { 353 + struct espintcp_ctx *ctx = espintcp_getctx(sk); 354 + 355 + strp_data_ready(&ctx->strp); 356 + } 357 + 358 + static void espintcp_tx_work(struct work_struct *work) 359 + { 360 + struct espintcp_ctx *ctx = container_of(work, 361 + struct espintcp_ctx, work); 362 + struct sock *sk = ctx->strp.sk; 363 + 364 + lock_sock(sk); 365 + if (!ctx->tx_running) 366 + espintcp_push_msgs(sk); 367 + release_sock(sk); 368 + } 369 + 370 + static void espintcp_write_space(struct sock *sk) 371 + { 372 + struct espintcp_ctx *ctx = espintcp_getctx(sk); 373 + 374 + schedule_work(&ctx->work); 375 + ctx->saved_write_space(sk); 376 + } 377 + 378 + static void espintcp_destruct(struct sock *sk) 379 + { 380 + struct espintcp_ctx *ctx = espintcp_getctx(sk); 381 + 382 + kfree(ctx); 383 + } 384 + 385 + bool tcp_is_ulp_esp(struct sock *sk) 386 + { 387 + return sk->sk_prot == &espintcp_prot; 388 + } 389 + EXPORT_SYMBOL_GPL(tcp_is_ulp_esp); 390 + 391 + static int espintcp_init_sk(struct sock *sk) 392 + { 393 + struct inet_connection_sock *icsk = inet_csk(sk); 394 + struct strp_callbacks cb = { 395 + .rcv_msg = espintcp_rcv, 396 + .parse_msg = espintcp_parse, 397 + }; 398 + struct espintcp_ctx *ctx; 399 + int err; 400 + 401 + /* sockmap is not compatible with espintcp */ 402 + if (sk->sk_user_data) 403 + return -EBUSY; 404 + 405 + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 406 + if (!ctx) 407 + return -ENOMEM; 408 + 409 + err = strp_init(&ctx->strp, sk, &cb); 410 + if (err) 411 + goto free; 412 + 413 + __sk_dst_reset(sk); 414 + 415 + strp_check_rcv(&ctx->strp); 416 + skb_queue_head_init(&ctx->ike_queue); 417 + skb_queue_head_init(&ctx->out_queue); 418 + sk->sk_prot = &espintcp_prot; 419 + sk->sk_socket->ops = &espintcp_ops; 420 + ctx->saved_data_ready = sk->sk_data_ready; 421 + ctx->saved_write_space = sk->sk_write_space; 422 + sk->sk_data_ready = espintcp_data_ready; 423 + sk->sk_write_space = espintcp_write_space; 424 + sk->sk_destruct = espintcp_destruct; 425 + rcu_assign_pointer(icsk->icsk_ulp_data, ctx); 426 + INIT_WORK(&ctx->work, espintcp_tx_work); 427 + 428 + /* avoid using task_frag */ 429 + sk->sk_allocation = GFP_ATOMIC; 430 + 431 + return 0; 432 + 433 + free: 434 + kfree(ctx); 435 + return err; 436 + } 437 + 438 + static void espintcp_release(struct sock *sk) 439 + { 440 + struct espintcp_ctx *ctx = espintcp_getctx(sk); 441 + struct sk_buff_head queue; 442 + struct sk_buff *skb; 443 + 444 + __skb_queue_head_init(&queue); 445 + skb_queue_splice_init(&ctx->out_queue, &queue); 446 + 447 + while ((skb = __skb_dequeue(&queue))) 448 + espintcp_push_skb(sk, skb); 449 + 450 + tcp_release_cb(sk); 451 + } 452 + 453 + static void espintcp_close(struct sock *sk, long timeout) 454 + { 455 + struct espintcp_ctx *ctx = espintcp_getctx(sk); 456 + struct espintcp_msg *emsg = &ctx->partial; 457 + 458 + strp_stop(&ctx->strp); 459 + 460 + sk->sk_prot = &tcp_prot; 461 + barrier(); 462 + 463 + cancel_work_sync(&ctx->work); 464 + strp_done(&ctx->strp); 465 + 466 + skb_queue_purge(&ctx->out_queue); 467 + skb_queue_purge(&ctx->ike_queue); 468 + 469 + if (emsg->len) { 470 + if (emsg->skb) 471 + kfree_skb(emsg->skb); 472 + else 473 + sk_msg_free(sk, &emsg->skmsg); 474 + } 475 + 476 + tcp_close(sk, timeout); 477 + } 478 + 479 + static __poll_t espintcp_poll(struct file *file, struct socket *sock, 480 + poll_table *wait) 481 + { 482 + __poll_t mask = datagram_poll(file, sock, wait); 483 + struct sock *sk = sock->sk; 484 + struct espintcp_ctx *ctx = espintcp_getctx(sk); 485 + 486 + if (!skb_queue_empty(&ctx->ike_queue)) 487 + mask |= EPOLLIN | EPOLLRDNORM; 488 + 489 + return mask; 490 + } 491 + 492 + static struct tcp_ulp_ops espintcp_ulp __read_mostly = { 493 + .name = "espintcp", 494 + .owner = THIS_MODULE, 495 + .init = espintcp_init_sk, 496 + }; 497 + 498 + void __init espintcp_init(void) 499 + { 500 + memcpy(&espintcp_prot, &tcp_prot, sizeof(tcp_prot)); 501 + memcpy(&espintcp_ops, &inet_stream_ops, sizeof(inet_stream_ops)); 502 + espintcp_prot.sendmsg = espintcp_sendmsg; 503 + espintcp_prot.recvmsg = espintcp_recvmsg; 504 + espintcp_prot.close = espintcp_close; 505 + espintcp_prot.release_cb = espintcp_release; 506 + espintcp_ops.poll = espintcp_poll; 507 + 508 + tcp_register_ulp(&espintcp_ulp); 509 + }
+7
net/xfrm/xfrm_policy.c
··· 39 39 #ifdef CONFIG_XFRM_STATISTICS 40 40 #include <net/snmp.h> 41 41 #endif 42 + #ifdef CONFIG_INET_ESPINTCP 43 + #include <net/espintcp.h> 44 + #endif 42 45 43 46 #include "xfrm_hash.h" 44 47 ··· 4159 4156 xfrm_dev_init(); 4160 4157 seqcount_init(&xfrm_policy_hash_generation); 4161 4158 xfrm_input_init(); 4159 + 4160 + #ifdef CONFIG_INET_ESPINTCP 4161 + espintcp_init(); 4162 + #endif 4162 4163 4163 4164 RCU_INIT_POINTER(xfrm_if_cb, NULL); 4164 4165 synchronize_rcu();
+3
net/xfrm/xfrm_state.c
··· 670 670 net->xfrm.state_num--; 671 671 spin_unlock(&net->xfrm.xfrm_state_lock); 672 672 673 + if (x->encap_sk) 674 + sock_put(rcu_dereference_raw(x->encap_sk)); 675 + 673 676 xfrm_dev_state_delete(x); 674 677 675 678 /* All xfrm_state objects are created by xfrm_state_alloc.