Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2#include <linux/module.h>
3#include <linux/errno.h>
4#include <linux/socket.h>
5#include <linux/skbuff.h>
6#include <linux/ip.h>
7#include <linux/icmp.h>
8#include <linux/udp.h>
9#include <linux/types.h>
10#include <linux/kernel.h>
11#include <net/genetlink.h>
12#include <net/gro.h>
13#include <net/gue.h>
14#include <net/fou.h>
15#include <net/ip.h>
16#include <net/protocol.h>
17#include <net/udp.h>
18#include <net/udp_tunnel.h>
19#include <net/xfrm.h>
20#include <uapi/linux/fou.h>
21#include <uapi/linux/genetlink.h>
22
23struct fou {
24 struct socket *sock;
25 u8 protocol;
26 u8 flags;
27 __be16 port;
28 u8 family;
29 u16 type;
30 struct list_head list;
31 struct rcu_head rcu;
32};
33
34#define FOU_F_REMCSUM_NOPARTIAL BIT(0)
35
36struct fou_cfg {
37 u16 type;
38 u8 protocol;
39 u8 flags;
40 struct udp_port_cfg udp_config;
41};
42
43static unsigned int fou_net_id;
44
45struct fou_net {
46 struct list_head fou_list;
47 struct mutex fou_lock;
48};
49
50static inline struct fou *fou_from_sock(struct sock *sk)
51{
52 return sk->sk_user_data;
53}
54
55static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len)
56{
57 /* Remove 'len' bytes from the packet (UDP header and
58 * FOU header if present).
59 */
60 if (fou->family == AF_INET)
61 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
62 else
63 ipv6_hdr(skb)->payload_len =
64 htons(ntohs(ipv6_hdr(skb)->payload_len) - len);
65
66 __skb_pull(skb, len);
67 skb_postpull_rcsum(skb, udp_hdr(skb), len);
68 skb_reset_transport_header(skb);
69 return iptunnel_pull_offloads(skb);
70}
71
72static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
73{
74 struct fou *fou = fou_from_sock(sk);
75
76 if (!fou)
77 return 1;
78
79 if (fou_recv_pull(skb, fou, sizeof(struct udphdr)))
80 goto drop;
81
82 return -fou->protocol;
83
84drop:
85 kfree_skb(skb);
86 return 0;
87}
88
89static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
90 void *data, size_t hdrlen, u8 ipproto,
91 bool nopartial)
92{
93 __be16 *pd = data;
94 size_t start = ntohs(pd[0]);
95 size_t offset = ntohs(pd[1]);
96 size_t plen = sizeof(struct udphdr) + hdrlen +
97 max_t(size_t, offset + sizeof(u16), start);
98
99 if (skb->remcsum_offload)
100 return guehdr;
101
102 if (!pskb_may_pull(skb, plen))
103 return NULL;
104 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
105
106 skb_remcsum_process(skb, (void *)guehdr + hdrlen,
107 start, offset, nopartial);
108
109 return guehdr;
110}
111
112static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
113{
114 /* No support yet */
115 kfree_skb(skb);
116 return 0;
117}
118
119static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
120{
121 struct fou *fou = fou_from_sock(sk);
122 size_t len, optlen, hdrlen;
123 struct guehdr *guehdr;
124 void *data;
125 u16 doffset = 0;
126 u8 proto_ctype;
127
128 if (!fou)
129 return 1;
130
131 len = sizeof(struct udphdr) + sizeof(struct guehdr);
132 if (!pskb_may_pull(skb, len))
133 goto drop;
134
135 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
136
137 switch (guehdr->version) {
138 case 0: /* Full GUE header present */
139 break;
140
141 case 1: {
142 /* Direct encapsulation of IPv4 or IPv6 */
143
144 int prot;
145
146 switch (((struct iphdr *)guehdr)->version) {
147 case 4:
148 prot = IPPROTO_IPIP;
149 break;
150 case 6:
151 prot = IPPROTO_IPV6;
152 break;
153 default:
154 goto drop;
155 }
156
157 if (fou_recv_pull(skb, fou, sizeof(struct udphdr)))
158 goto drop;
159
160 return -prot;
161 }
162
163 default: /* Undefined version */
164 goto drop;
165 }
166
167 optlen = guehdr->hlen << 2;
168 len += optlen;
169
170 if (!pskb_may_pull(skb, len))
171 goto drop;
172
173 /* guehdr may change after pull */
174 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
175
176 if (validate_gue_flags(guehdr, optlen))
177 goto drop;
178
179 hdrlen = sizeof(struct guehdr) + optlen;
180
181 if (fou->family == AF_INET)
182 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
183 else
184 ipv6_hdr(skb)->payload_len =
185 htons(ntohs(ipv6_hdr(skb)->payload_len) - len);
186
187 /* Pull csum through the guehdr now . This can be used if
188 * there is a remote checksum offload.
189 */
190 skb_postpull_rcsum(skb, udp_hdr(skb), len);
191
192 data = &guehdr[1];
193
194 if (guehdr->flags & GUE_FLAG_PRIV) {
195 __be32 flags = *(__be32 *)(data + doffset);
196
197 doffset += GUE_LEN_PRIV;
198
199 if (flags & GUE_PFLAG_REMCSUM) {
200 guehdr = gue_remcsum(skb, guehdr, data + doffset,
201 hdrlen, guehdr->proto_ctype,
202 !!(fou->flags &
203 FOU_F_REMCSUM_NOPARTIAL));
204 if (!guehdr)
205 goto drop;
206
207 data = &guehdr[1];
208
209 doffset += GUE_PLEN_REMCSUM;
210 }
211 }
212
213 if (unlikely(guehdr->control))
214 return gue_control_message(skb, guehdr);
215
216 proto_ctype = guehdr->proto_ctype;
217 __skb_pull(skb, sizeof(struct udphdr) + hdrlen);
218 skb_reset_transport_header(skb);
219
220 if (iptunnel_pull_offloads(skb))
221 goto drop;
222
223 return -proto_ctype;
224
225drop:
226 kfree_skb(skb);
227 return 0;
228}
229
230static struct sk_buff *fou_gro_receive(struct sock *sk,
231 struct list_head *head,
232 struct sk_buff *skb)
233{
234 const struct net_offload __rcu **offloads;
235 u8 proto = fou_from_sock(sk)->protocol;
236 const struct net_offload *ops;
237 struct sk_buff *pp = NULL;
238
239 /* We can clear the encap_mark for FOU as we are essentially doing
240 * one of two possible things. We are either adding an L4 tunnel
241 * header to the outer L3 tunnel header, or we are simply
242 * treating the GRE tunnel header as though it is a UDP protocol
243 * specific header such as VXLAN or GENEVE.
244 */
245 NAPI_GRO_CB(skb)->encap_mark = 0;
246
247 /* Flag this frame as already having an outer encap header */
248 NAPI_GRO_CB(skb)->is_fou = 1;
249
250 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
251 ops = rcu_dereference(offloads[proto]);
252 if (!ops || !ops->callbacks.gro_receive)
253 goto out;
254
255 pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
256
257out:
258 return pp;
259}
260
261static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
262 int nhoff)
263{
264 const struct net_offload __rcu **offloads;
265 u8 proto = fou_from_sock(sk)->protocol;
266 const struct net_offload *ops;
267 int err = -ENOSYS;
268
269 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
270 ops = rcu_dereference(offloads[proto]);
271 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
272 goto out;
273
274 err = ops->callbacks.gro_complete(skb, nhoff);
275
276 skb_set_inner_mac_header(skb, nhoff);
277
278out:
279 return err;
280}
281
282static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
283 struct guehdr *guehdr, void *data,
284 size_t hdrlen, struct gro_remcsum *grc,
285 bool nopartial)
286{
287 __be16 *pd = data;
288 size_t start = ntohs(pd[0]);
289 size_t offset = ntohs(pd[1]);
290
291 if (skb->remcsum_offload)
292 return guehdr;
293
294 if (!NAPI_GRO_CB(skb)->csum_valid)
295 return NULL;
296
297 guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen,
298 start, offset, grc, nopartial);
299
300 skb->remcsum_offload = 1;
301
302 return guehdr;
303}
304
305static struct sk_buff *gue_gro_receive(struct sock *sk,
306 struct list_head *head,
307 struct sk_buff *skb)
308{
309 const struct net_offload __rcu **offloads;
310 const struct net_offload *ops;
311 struct sk_buff *pp = NULL;
312 struct sk_buff *p;
313 struct guehdr *guehdr;
314 size_t len, optlen, hdrlen, off;
315 void *data;
316 u16 doffset = 0;
317 int flush = 1;
318 struct fou *fou = fou_from_sock(sk);
319 struct gro_remcsum grc;
320 u8 proto;
321
322 skb_gro_remcsum_init(&grc);
323
324 off = skb_gro_offset(skb);
325 len = off + sizeof(*guehdr);
326
327 guehdr = skb_gro_header_fast(skb, off);
328 if (skb_gro_header_hard(skb, len)) {
329 guehdr = skb_gro_header_slow(skb, len, off);
330 if (unlikely(!guehdr))
331 goto out;
332 }
333
334 switch (guehdr->version) {
335 case 0:
336 break;
337 case 1:
338 switch (((struct iphdr *)guehdr)->version) {
339 case 4:
340 proto = IPPROTO_IPIP;
341 break;
342 case 6:
343 proto = IPPROTO_IPV6;
344 break;
345 default:
346 goto out;
347 }
348 goto next_proto;
349 default:
350 goto out;
351 }
352
353 optlen = guehdr->hlen << 2;
354 len += optlen;
355
356 if (skb_gro_header_hard(skb, len)) {
357 guehdr = skb_gro_header_slow(skb, len, off);
358 if (unlikely(!guehdr))
359 goto out;
360 }
361
362 if (unlikely(guehdr->control) || guehdr->version != 0 ||
363 validate_gue_flags(guehdr, optlen))
364 goto out;
365
366 hdrlen = sizeof(*guehdr) + optlen;
367
368 /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
369 * this is needed if there is a remote checkcsum offload.
370 */
371 skb_gro_postpull_rcsum(skb, guehdr, hdrlen);
372
373 data = &guehdr[1];
374
375 if (guehdr->flags & GUE_FLAG_PRIV) {
376 __be32 flags = *(__be32 *)(data + doffset);
377
378 doffset += GUE_LEN_PRIV;
379
380 if (flags & GUE_PFLAG_REMCSUM) {
381 guehdr = gue_gro_remcsum(skb, off, guehdr,
382 data + doffset, hdrlen, &grc,
383 !!(fou->flags &
384 FOU_F_REMCSUM_NOPARTIAL));
385
386 if (!guehdr)
387 goto out;
388
389 data = &guehdr[1];
390
391 doffset += GUE_PLEN_REMCSUM;
392 }
393 }
394
395 skb_gro_pull(skb, hdrlen);
396
397 list_for_each_entry(p, head, list) {
398 const struct guehdr *guehdr2;
399
400 if (!NAPI_GRO_CB(p)->same_flow)
401 continue;
402
403 guehdr2 = (struct guehdr *)(p->data + off);
404
405 /* Compare base GUE header to be equal (covers
406 * hlen, version, proto_ctype, and flags.
407 */
408 if (guehdr->word != guehdr2->word) {
409 NAPI_GRO_CB(p)->same_flow = 0;
410 continue;
411 }
412
413 /* Compare optional fields are the same. */
414 if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
415 guehdr->hlen << 2)) {
416 NAPI_GRO_CB(p)->same_flow = 0;
417 continue;
418 }
419 }
420
421 proto = guehdr->proto_ctype;
422
423next_proto:
424
425 /* We can clear the encap_mark for GUE as we are essentially doing
426 * one of two possible things. We are either adding an L4 tunnel
427 * header to the outer L3 tunnel header, or we are simply
428 * treating the GRE tunnel header as though it is a UDP protocol
429 * specific header such as VXLAN or GENEVE.
430 */
431 NAPI_GRO_CB(skb)->encap_mark = 0;
432
433 /* Flag this frame as already having an outer encap header */
434 NAPI_GRO_CB(skb)->is_fou = 1;
435
436 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
437 ops = rcu_dereference(offloads[proto]);
438 if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
439 goto out;
440
441 pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
442 flush = 0;
443
444out:
445 skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
446
447 return pp;
448}
449
450static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
451{
452 struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
453 const struct net_offload __rcu **offloads;
454 const struct net_offload *ops;
455 unsigned int guehlen = 0;
456 u8 proto;
457 int err = -ENOENT;
458
459 switch (guehdr->version) {
460 case 0:
461 proto = guehdr->proto_ctype;
462 guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
463 break;
464 case 1:
465 switch (((struct iphdr *)guehdr)->version) {
466 case 4:
467 proto = IPPROTO_IPIP;
468 break;
469 case 6:
470 proto = IPPROTO_IPV6;
471 break;
472 default:
473 return err;
474 }
475 break;
476 default:
477 return err;
478 }
479
480 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
481 ops = rcu_dereference(offloads[proto]);
482 if (WARN_ON(!ops || !ops->callbacks.gro_complete))
483 goto out;
484
485 err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
486
487 skb_set_inner_mac_header(skb, nhoff + guehlen);
488
489out:
490 return err;
491}
492
493static bool fou_cfg_cmp(struct fou *fou, struct fou_cfg *cfg)
494{
495 struct sock *sk = fou->sock->sk;
496 struct udp_port_cfg *udp_cfg = &cfg->udp_config;
497
498 if (fou->family != udp_cfg->family ||
499 fou->port != udp_cfg->local_udp_port ||
500 sk->sk_dport != udp_cfg->peer_udp_port ||
501 sk->sk_bound_dev_if != udp_cfg->bind_ifindex)
502 return false;
503
504 if (fou->family == AF_INET) {
505 if (sk->sk_rcv_saddr != udp_cfg->local_ip.s_addr ||
506 sk->sk_daddr != udp_cfg->peer_ip.s_addr)
507 return false;
508 else
509 return true;
510#if IS_ENABLED(CONFIG_IPV6)
511 } else {
512 if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, &udp_cfg->local_ip6) ||
513 ipv6_addr_cmp(&sk->sk_v6_daddr, &udp_cfg->peer_ip6))
514 return false;
515 else
516 return true;
517#endif
518 }
519
520 return false;
521}
522
523static int fou_add_to_port_list(struct net *net, struct fou *fou,
524 struct fou_cfg *cfg)
525{
526 struct fou_net *fn = net_generic(net, fou_net_id);
527 struct fou *fout;
528
529 mutex_lock(&fn->fou_lock);
530 list_for_each_entry(fout, &fn->fou_list, list) {
531 if (fou_cfg_cmp(fout, cfg)) {
532 mutex_unlock(&fn->fou_lock);
533 return -EALREADY;
534 }
535 }
536
537 list_add(&fou->list, &fn->fou_list);
538 mutex_unlock(&fn->fou_lock);
539
540 return 0;
541}
542
543static void fou_release(struct fou *fou)
544{
545 struct socket *sock = fou->sock;
546
547 list_del(&fou->list);
548 udp_tunnel_sock_release(sock);
549
550 kfree_rcu(fou, rcu);
551}
552
553static int fou_create(struct net *net, struct fou_cfg *cfg,
554 struct socket **sockp)
555{
556 struct socket *sock = NULL;
557 struct fou *fou = NULL;
558 struct sock *sk;
559 struct udp_tunnel_sock_cfg tunnel_cfg;
560 int err;
561
562 /* Open UDP socket */
563 err = udp_sock_create(net, &cfg->udp_config, &sock);
564 if (err < 0)
565 goto error;
566
567 /* Allocate FOU port structure */
568 fou = kzalloc(sizeof(*fou), GFP_KERNEL);
569 if (!fou) {
570 err = -ENOMEM;
571 goto error;
572 }
573
574 sk = sock->sk;
575
576 fou->port = cfg->udp_config.local_udp_port;
577 fou->family = cfg->udp_config.family;
578 fou->flags = cfg->flags;
579 fou->type = cfg->type;
580 fou->sock = sock;
581
582 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
583 tunnel_cfg.encap_type = 1;
584 tunnel_cfg.sk_user_data = fou;
585 tunnel_cfg.encap_destroy = NULL;
586
587 /* Initial for fou type */
588 switch (cfg->type) {
589 case FOU_ENCAP_DIRECT:
590 tunnel_cfg.encap_rcv = fou_udp_recv;
591 tunnel_cfg.gro_receive = fou_gro_receive;
592 tunnel_cfg.gro_complete = fou_gro_complete;
593 fou->protocol = cfg->protocol;
594 break;
595 case FOU_ENCAP_GUE:
596 tunnel_cfg.encap_rcv = gue_udp_recv;
597 tunnel_cfg.gro_receive = gue_gro_receive;
598 tunnel_cfg.gro_complete = gue_gro_complete;
599 break;
600 default:
601 err = -EINVAL;
602 goto error;
603 }
604
605 setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
606
607 sk->sk_allocation = GFP_ATOMIC;
608
609 err = fou_add_to_port_list(net, fou, cfg);
610 if (err)
611 goto error;
612
613 if (sockp)
614 *sockp = sock;
615
616 return 0;
617
618error:
619 kfree(fou);
620 if (sock)
621 udp_tunnel_sock_release(sock);
622
623 return err;
624}
625
626static int fou_destroy(struct net *net, struct fou_cfg *cfg)
627{
628 struct fou_net *fn = net_generic(net, fou_net_id);
629 int err = -EINVAL;
630 struct fou *fou;
631
632 mutex_lock(&fn->fou_lock);
633 list_for_each_entry(fou, &fn->fou_list, list) {
634 if (fou_cfg_cmp(fou, cfg)) {
635 fou_release(fou);
636 err = 0;
637 break;
638 }
639 }
640 mutex_unlock(&fn->fou_lock);
641
642 return err;
643}
644
645static struct genl_family fou_nl_family;
646
647static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
648 [FOU_ATTR_PORT] = { .type = NLA_U16, },
649 [FOU_ATTR_AF] = { .type = NLA_U8, },
650 [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
651 [FOU_ATTR_TYPE] = { .type = NLA_U8, },
652 [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
653 [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, },
654 [FOU_ATTR_PEER_V4] = { .type = NLA_U32, },
655 [FOU_ATTR_LOCAL_V6] = { .len = sizeof(struct in6_addr), },
656 [FOU_ATTR_PEER_V6] = { .len = sizeof(struct in6_addr), },
657 [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, },
658 [FOU_ATTR_IFINDEX] = { .type = NLA_S32, },
659};
660
661static int parse_nl_config(struct genl_info *info,
662 struct fou_cfg *cfg)
663{
664 bool has_local = false, has_peer = false;
665 struct nlattr *attr;
666 int ifindex;
667 __be16 port;
668
669 memset(cfg, 0, sizeof(*cfg));
670
671 cfg->udp_config.family = AF_INET;
672
673 if (info->attrs[FOU_ATTR_AF]) {
674 u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);
675
676 switch (family) {
677 case AF_INET:
678 break;
679 case AF_INET6:
680 cfg->udp_config.ipv6_v6only = 1;
681 break;
682 default:
683 return -EAFNOSUPPORT;
684 }
685
686 cfg->udp_config.family = family;
687 }
688
689 if (info->attrs[FOU_ATTR_PORT]) {
690 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]);
691 cfg->udp_config.local_udp_port = port;
692 }
693
694 if (info->attrs[FOU_ATTR_IPPROTO])
695 cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
696
697 if (info->attrs[FOU_ATTR_TYPE])
698 cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
699
700 if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL])
701 cfg->flags |= FOU_F_REMCSUM_NOPARTIAL;
702
703 if (cfg->udp_config.family == AF_INET) {
704 if (info->attrs[FOU_ATTR_LOCAL_V4]) {
705 attr = info->attrs[FOU_ATTR_LOCAL_V4];
706 cfg->udp_config.local_ip.s_addr = nla_get_in_addr(attr);
707 has_local = true;
708 }
709
710 if (info->attrs[FOU_ATTR_PEER_V4]) {
711 attr = info->attrs[FOU_ATTR_PEER_V4];
712 cfg->udp_config.peer_ip.s_addr = nla_get_in_addr(attr);
713 has_peer = true;
714 }
715#if IS_ENABLED(CONFIG_IPV6)
716 } else {
717 if (info->attrs[FOU_ATTR_LOCAL_V6]) {
718 attr = info->attrs[FOU_ATTR_LOCAL_V6];
719 cfg->udp_config.local_ip6 = nla_get_in6_addr(attr);
720 has_local = true;
721 }
722
723 if (info->attrs[FOU_ATTR_PEER_V6]) {
724 attr = info->attrs[FOU_ATTR_PEER_V6];
725 cfg->udp_config.peer_ip6 = nla_get_in6_addr(attr);
726 has_peer = true;
727 }
728#endif
729 }
730
731 if (has_peer) {
732 if (info->attrs[FOU_ATTR_PEER_PORT]) {
733 port = nla_get_be16(info->attrs[FOU_ATTR_PEER_PORT]);
734 cfg->udp_config.peer_udp_port = port;
735 } else {
736 return -EINVAL;
737 }
738 }
739
740 if (info->attrs[FOU_ATTR_IFINDEX]) {
741 if (!has_local)
742 return -EINVAL;
743
744 ifindex = nla_get_s32(info->attrs[FOU_ATTR_IFINDEX]);
745
746 cfg->udp_config.bind_ifindex = ifindex;
747 }
748
749 return 0;
750}
751
752static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
753{
754 struct net *net = genl_info_net(info);
755 struct fou_cfg cfg;
756 int err;
757
758 err = parse_nl_config(info, &cfg);
759 if (err)
760 return err;
761
762 return fou_create(net, &cfg, NULL);
763}
764
765static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
766{
767 struct net *net = genl_info_net(info);
768 struct fou_cfg cfg;
769 int err;
770
771 err = parse_nl_config(info, &cfg);
772 if (err)
773 return err;
774
775 return fou_destroy(net, &cfg);
776}
777
778static int fou_fill_info(struct fou *fou, struct sk_buff *msg)
779{
780 struct sock *sk = fou->sock->sk;
781
782 if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) ||
783 nla_put_be16(msg, FOU_ATTR_PORT, fou->port) ||
784 nla_put_be16(msg, FOU_ATTR_PEER_PORT, sk->sk_dport) ||
785 nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) ||
786 nla_put_u8(msg, FOU_ATTR_TYPE, fou->type) ||
787 nla_put_s32(msg, FOU_ATTR_IFINDEX, sk->sk_bound_dev_if))
788 return -1;
789
790 if (fou->flags & FOU_F_REMCSUM_NOPARTIAL)
791 if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL))
792 return -1;
793
794 if (fou->sock->sk->sk_family == AF_INET) {
795 if (nla_put_in_addr(msg, FOU_ATTR_LOCAL_V4, sk->sk_rcv_saddr))
796 return -1;
797
798 if (nla_put_in_addr(msg, FOU_ATTR_PEER_V4, sk->sk_daddr))
799 return -1;
800#if IS_ENABLED(CONFIG_IPV6)
801 } else {
802 if (nla_put_in6_addr(msg, FOU_ATTR_LOCAL_V6,
803 &sk->sk_v6_rcv_saddr))
804 return -1;
805
806 if (nla_put_in6_addr(msg, FOU_ATTR_PEER_V6, &sk->sk_v6_daddr))
807 return -1;
808#endif
809 }
810
811 return 0;
812}
813
814static int fou_dump_info(struct fou *fou, u32 portid, u32 seq,
815 u32 flags, struct sk_buff *skb, u8 cmd)
816{
817 void *hdr;
818
819 hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd);
820 if (!hdr)
821 return -ENOMEM;
822
823 if (fou_fill_info(fou, skb) < 0)
824 goto nla_put_failure;
825
826 genlmsg_end(skb, hdr);
827 return 0;
828
829nla_put_failure:
830 genlmsg_cancel(skb, hdr);
831 return -EMSGSIZE;
832}
833
834static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info)
835{
836 struct net *net = genl_info_net(info);
837 struct fou_net *fn = net_generic(net, fou_net_id);
838 struct sk_buff *msg;
839 struct fou_cfg cfg;
840 struct fou *fout;
841 __be16 port;
842 u8 family;
843 int ret;
844
845 ret = parse_nl_config(info, &cfg);
846 if (ret)
847 return ret;
848 port = cfg.udp_config.local_udp_port;
849 if (port == 0)
850 return -EINVAL;
851
852 family = cfg.udp_config.family;
853 if (family != AF_INET && family != AF_INET6)
854 return -EINVAL;
855
856 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
857 if (!msg)
858 return -ENOMEM;
859
860 ret = -ESRCH;
861 mutex_lock(&fn->fou_lock);
862 list_for_each_entry(fout, &fn->fou_list, list) {
863 if (fou_cfg_cmp(fout, &cfg)) {
864 ret = fou_dump_info(fout, info->snd_portid,
865 info->snd_seq, 0, msg,
866 info->genlhdr->cmd);
867 break;
868 }
869 }
870 mutex_unlock(&fn->fou_lock);
871 if (ret < 0)
872 goto out_free;
873
874 return genlmsg_reply(msg, info);
875
876out_free:
877 nlmsg_free(msg);
878 return ret;
879}
880
881static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
882{
883 struct net *net = sock_net(skb->sk);
884 struct fou_net *fn = net_generic(net, fou_net_id);
885 struct fou *fout;
886 int idx = 0, ret;
887
888 mutex_lock(&fn->fou_lock);
889 list_for_each_entry(fout, &fn->fou_list, list) {
890 if (idx++ < cb->args[0])
891 continue;
892 ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid,
893 cb->nlh->nlmsg_seq, NLM_F_MULTI,
894 skb, FOU_CMD_GET);
895 if (ret)
896 break;
897 }
898 mutex_unlock(&fn->fou_lock);
899
900 cb->args[0] = idx;
901 return skb->len;
902}
903
904static const struct genl_small_ops fou_nl_ops[] = {
905 {
906 .cmd = FOU_CMD_ADD,
907 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
908 .doit = fou_nl_cmd_add_port,
909 .flags = GENL_ADMIN_PERM,
910 },
911 {
912 .cmd = FOU_CMD_DEL,
913 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
914 .doit = fou_nl_cmd_rm_port,
915 .flags = GENL_ADMIN_PERM,
916 },
917 {
918 .cmd = FOU_CMD_GET,
919 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
920 .doit = fou_nl_cmd_get_port,
921 .dumpit = fou_nl_dump,
922 },
923};
924
925static struct genl_family fou_nl_family __ro_after_init = {
926 .hdrsize = 0,
927 .name = FOU_GENL_NAME,
928 .version = FOU_GENL_VERSION,
929 .maxattr = FOU_ATTR_MAX,
930 .policy = fou_nl_policy,
931 .netnsok = true,
932 .module = THIS_MODULE,
933 .small_ops = fou_nl_ops,
934 .n_small_ops = ARRAY_SIZE(fou_nl_ops),
935};
936
937size_t fou_encap_hlen(struct ip_tunnel_encap *e)
938{
939 return sizeof(struct udphdr);
940}
941EXPORT_SYMBOL(fou_encap_hlen);
942
943size_t gue_encap_hlen(struct ip_tunnel_encap *e)
944{
945 size_t len;
946 bool need_priv = false;
947
948 len = sizeof(struct udphdr) + sizeof(struct guehdr);
949
950 if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) {
951 len += GUE_PLEN_REMCSUM;
952 need_priv = true;
953 }
954
955 len += need_priv ? GUE_LEN_PRIV : 0;
956
957 return len;
958}
959EXPORT_SYMBOL(gue_encap_hlen);
960
961int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
962 u8 *protocol, __be16 *sport, int type)
963{
964 int err;
965
966 err = iptunnel_handle_offloads(skb, type);
967 if (err)
968 return err;
969
970 *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
971 skb, 0, 0, false);
972
973 return 0;
974}
975EXPORT_SYMBOL(__fou_build_header);
976
977int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
978 u8 *protocol, __be16 *sport, int type)
979{
980 struct guehdr *guehdr;
981 size_t hdrlen, optlen = 0;
982 void *data;
983 bool need_priv = false;
984 int err;
985
986 if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
987 skb->ip_summed == CHECKSUM_PARTIAL) {
988 optlen += GUE_PLEN_REMCSUM;
989 type |= SKB_GSO_TUNNEL_REMCSUM;
990 need_priv = true;
991 }
992
993 optlen += need_priv ? GUE_LEN_PRIV : 0;
994
995 err = iptunnel_handle_offloads(skb, type);
996 if (err)
997 return err;
998
999 /* Get source port (based on flow hash) before skb_push */
1000 *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
1001 skb, 0, 0, false);
1002
1003 hdrlen = sizeof(struct guehdr) + optlen;
1004
1005 skb_push(skb, hdrlen);
1006
1007 guehdr = (struct guehdr *)skb->data;
1008
1009 guehdr->control = 0;
1010 guehdr->version = 0;
1011 guehdr->hlen = optlen >> 2;
1012 guehdr->flags = 0;
1013 guehdr->proto_ctype = *protocol;
1014
1015 data = &guehdr[1];
1016
1017 if (need_priv) {
1018 __be32 *flags = data;
1019
1020 guehdr->flags |= GUE_FLAG_PRIV;
1021 *flags = 0;
1022 data += GUE_LEN_PRIV;
1023
1024 if (type & SKB_GSO_TUNNEL_REMCSUM) {
1025 u16 csum_start = skb_checksum_start_offset(skb);
1026 __be16 *pd = data;
1027
1028 if (csum_start < hdrlen)
1029 return -EINVAL;
1030
1031 csum_start -= hdrlen;
1032 pd[0] = htons(csum_start);
1033 pd[1] = htons(csum_start + skb->csum_offset);
1034
1035 if (!skb_is_gso(skb)) {
1036 skb->ip_summed = CHECKSUM_NONE;
1037 skb->encapsulation = 0;
1038 }
1039
1040 *flags |= GUE_PFLAG_REMCSUM;
1041 data += GUE_PLEN_REMCSUM;
1042 }
1043
1044 }
1045
1046 return 0;
1047}
1048EXPORT_SYMBOL(__gue_build_header);
1049
1050#ifdef CONFIG_NET_FOU_IP_TUNNELS
1051
1052static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
1053 struct flowi4 *fl4, u8 *protocol, __be16 sport)
1054{
1055 struct udphdr *uh;
1056
1057 skb_push(skb, sizeof(struct udphdr));
1058 skb_reset_transport_header(skb);
1059
1060 uh = udp_hdr(skb);
1061
1062 uh->dest = e->dport;
1063 uh->source = sport;
1064 uh->len = htons(skb->len);
1065 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
1066 fl4->saddr, fl4->daddr, skb->len);
1067
1068 *protocol = IPPROTO_UDP;
1069}
1070
1071static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
1072 u8 *protocol, struct flowi4 *fl4)
1073{
1074 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
1075 SKB_GSO_UDP_TUNNEL;
1076 __be16 sport;
1077 int err;
1078
1079 err = __fou_build_header(skb, e, protocol, &sport, type);
1080 if (err)
1081 return err;
1082
1083 fou_build_udp(skb, e, fl4, protocol, sport);
1084
1085 return 0;
1086}
1087
1088static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
1089 u8 *protocol, struct flowi4 *fl4)
1090{
1091 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
1092 SKB_GSO_UDP_TUNNEL;
1093 __be16 sport;
1094 int err;
1095
1096 err = __gue_build_header(skb, e, protocol, &sport, type);
1097 if (err)
1098 return err;
1099
1100 fou_build_udp(skb, e, fl4, protocol, sport);
1101
1102 return 0;
1103}
1104
1105static int gue_err_proto_handler(int proto, struct sk_buff *skb, u32 info)
1106{
1107 const struct net_protocol *ipprot = rcu_dereference(inet_protos[proto]);
1108
1109 if (ipprot && ipprot->err_handler) {
1110 if (!ipprot->err_handler(skb, info))
1111 return 0;
1112 }
1113
1114 return -ENOENT;
1115}
1116
1117static int gue_err(struct sk_buff *skb, u32 info)
1118{
1119 int transport_offset = skb_transport_offset(skb);
1120 struct guehdr *guehdr;
1121 size_t len, optlen;
1122 int ret;
1123
1124 len = sizeof(struct udphdr) + sizeof(struct guehdr);
1125 if (!pskb_may_pull(skb, transport_offset + len))
1126 return -EINVAL;
1127
1128 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
1129
1130 switch (guehdr->version) {
1131 case 0: /* Full GUE header present */
1132 break;
1133 case 1: {
1134 /* Direct encapsulation of IPv4 or IPv6 */
1135 skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
1136
1137 switch (((struct iphdr *)guehdr)->version) {
1138 case 4:
1139 ret = gue_err_proto_handler(IPPROTO_IPIP, skb, info);
1140 goto out;
1141#if IS_ENABLED(CONFIG_IPV6)
1142 case 6:
1143 ret = gue_err_proto_handler(IPPROTO_IPV6, skb, info);
1144 goto out;
1145#endif
1146 default:
1147 ret = -EOPNOTSUPP;
1148 goto out;
1149 }
1150 }
1151 default: /* Undefined version */
1152 return -EOPNOTSUPP;
1153 }
1154
1155 if (guehdr->control)
1156 return -ENOENT;
1157
1158 optlen = guehdr->hlen << 2;
1159
1160 if (!pskb_may_pull(skb, transport_offset + len + optlen))
1161 return -EINVAL;
1162
1163 guehdr = (struct guehdr *)&udp_hdr(skb)[1];
1164 if (validate_gue_flags(guehdr, optlen))
1165 return -EINVAL;
1166
1167 /* Handling exceptions for direct UDP encapsulation in GUE would lead to
1168 * recursion. Besides, this kind of encapsulation can't even be
1169 * configured currently. Discard this.
1170 */
1171 if (guehdr->proto_ctype == IPPROTO_UDP ||
1172 guehdr->proto_ctype == IPPROTO_UDPLITE)
1173 return -EOPNOTSUPP;
1174
1175 skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
1176 ret = gue_err_proto_handler(guehdr->proto_ctype, skb, info);
1177
1178out:
1179 skb_set_transport_header(skb, transport_offset);
1180 return ret;
1181}
1182
1183
1184static const struct ip_tunnel_encap_ops fou_iptun_ops = {
1185 .encap_hlen = fou_encap_hlen,
1186 .build_header = fou_build_header,
1187 .err_handler = gue_err,
1188};
1189
1190static const struct ip_tunnel_encap_ops gue_iptun_ops = {
1191 .encap_hlen = gue_encap_hlen,
1192 .build_header = gue_build_header,
1193 .err_handler = gue_err,
1194};
1195
1196static int ip_tunnel_encap_add_fou_ops(void)
1197{
1198 int ret;
1199
1200 ret = ip_tunnel_encap_add_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1201 if (ret < 0) {
1202 pr_err("can't add fou ops\n");
1203 return ret;
1204 }
1205
1206 ret = ip_tunnel_encap_add_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
1207 if (ret < 0) {
1208 pr_err("can't add gue ops\n");
1209 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1210 return ret;
1211 }
1212
1213 return 0;
1214}
1215
1216static void ip_tunnel_encap_del_fou_ops(void)
1217{
1218 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU);
1219 ip_tunnel_encap_del_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE);
1220}
1221
1222#else
1223
1224static int ip_tunnel_encap_add_fou_ops(void)
1225{
1226 return 0;
1227}
1228
1229static void ip_tunnel_encap_del_fou_ops(void)
1230{
1231}
1232
1233#endif
1234
1235static __net_init int fou_init_net(struct net *net)
1236{
1237 struct fou_net *fn = net_generic(net, fou_net_id);
1238
1239 INIT_LIST_HEAD(&fn->fou_list);
1240 mutex_init(&fn->fou_lock);
1241 return 0;
1242}
1243
1244static __net_exit void fou_exit_net(struct net *net)
1245{
1246 struct fou_net *fn = net_generic(net, fou_net_id);
1247 struct fou *fou, *next;
1248
1249 /* Close all the FOU sockets */
1250 mutex_lock(&fn->fou_lock);
1251 list_for_each_entry_safe(fou, next, &fn->fou_list, list)
1252 fou_release(fou);
1253 mutex_unlock(&fn->fou_lock);
1254}
1255
1256static struct pernet_operations fou_net_ops = {
1257 .init = fou_init_net,
1258 .exit = fou_exit_net,
1259 .id = &fou_net_id,
1260 .size = sizeof(struct fou_net),
1261};
1262
1263static int __init fou_init(void)
1264{
1265 int ret;
1266
1267 ret = register_pernet_device(&fou_net_ops);
1268 if (ret)
1269 goto exit;
1270
1271 ret = genl_register_family(&fou_nl_family);
1272 if (ret < 0)
1273 goto unregister;
1274
1275 ret = ip_tunnel_encap_add_fou_ops();
1276 if (ret == 0)
1277 return 0;
1278
1279 genl_unregister_family(&fou_nl_family);
1280unregister:
1281 unregister_pernet_device(&fou_net_ops);
1282exit:
1283 return ret;
1284}
1285
1286static void __exit fou_fini(void)
1287{
1288 ip_tunnel_encap_del_fou_ops();
1289 genl_unregister_family(&fou_nl_family);
1290 unregister_pernet_device(&fou_net_ops);
1291}
1292
1293module_init(fou_init);
1294module_exit(fou_fini);
1295MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
1296MODULE_LICENSE("GPL");
1297MODULE_DESCRIPTION("Foo over UDP");