Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v5.1-rc5 1198 lines 26 kB view raw
1#include <linux/module.h> 2#include <linux/errno.h> 3#include <linux/socket.h> 4#include <linux/skbuff.h> 5#include <linux/ip.h> 6#include <linux/icmp.h> 7#include <linux/udp.h> 8#include <linux/types.h> 9#include <linux/kernel.h> 10#include <net/genetlink.h> 11#include <net/gue.h> 12#include <net/fou.h> 13#include <net/ip.h> 14#include <net/protocol.h> 15#include <net/udp.h> 16#include <net/udp_tunnel.h> 17#include <net/xfrm.h> 18#include <uapi/linux/fou.h> 19#include <uapi/linux/genetlink.h> 20 21struct fou { 22 struct socket *sock; 23 u8 protocol; 24 u8 flags; 25 __be16 port; 26 u8 family; 27 u16 type; 28 struct list_head list; 29 struct rcu_head rcu; 30}; 31 32#define FOU_F_REMCSUM_NOPARTIAL BIT(0) 33 34struct fou_cfg { 35 u16 type; 36 u8 protocol; 37 u8 flags; 38 struct udp_port_cfg udp_config; 39}; 40 41static unsigned int fou_net_id; 42 43struct fou_net { 44 struct list_head fou_list; 45 struct mutex fou_lock; 46}; 47 48static inline struct fou *fou_from_sock(struct sock *sk) 49{ 50 return sk->sk_user_data; 51} 52 53static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len) 54{ 55 /* Remove 'len' bytes from the packet (UDP header and 56 * FOU header if present). 57 */ 58 if (fou->family == AF_INET) 59 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); 60 else 61 ipv6_hdr(skb)->payload_len = 62 htons(ntohs(ipv6_hdr(skb)->payload_len) - len); 63 64 __skb_pull(skb, len); 65 skb_postpull_rcsum(skb, udp_hdr(skb), len); 66 skb_reset_transport_header(skb); 67 return iptunnel_pull_offloads(skb); 68} 69 70static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) 71{ 72 struct fou *fou = fou_from_sock(sk); 73 74 if (!fou) 75 return 1; 76 77 if (fou_recv_pull(skb, fou, sizeof(struct udphdr))) 78 goto drop; 79 80 return -fou->protocol; 81 82drop: 83 kfree_skb(skb); 84 return 0; 85} 86 87static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, 88 void *data, size_t hdrlen, u8 ipproto, 89 bool nopartial) 90{ 91 __be16 *pd = data; 92 size_t start = ntohs(pd[0]); 93 size_t offset = ntohs(pd[1]); 94 size_t plen = sizeof(struct udphdr) + hdrlen + 95 max_t(size_t, offset + sizeof(u16), start); 96 97 if (skb->remcsum_offload) 98 return guehdr; 99 100 if (!pskb_may_pull(skb, plen)) 101 return NULL; 102 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 103 104 skb_remcsum_process(skb, (void *)guehdr + hdrlen, 105 start, offset, nopartial); 106 107 return guehdr; 108} 109 110static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr) 111{ 112 /* No support yet */ 113 kfree_skb(skb); 114 return 0; 115} 116 117static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) 118{ 119 struct fou *fou = fou_from_sock(sk); 120 size_t len, optlen, hdrlen; 121 struct guehdr *guehdr; 122 void *data; 123 u16 doffset = 0; 124 125 if (!fou) 126 return 1; 127 128 len = sizeof(struct udphdr) + sizeof(struct guehdr); 129 if (!pskb_may_pull(skb, len)) 130 goto drop; 131 132 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 133 134 switch (guehdr->version) { 135 case 0: /* Full GUE header present */ 136 break; 137 138 case 1: { 139 /* Direct encasulation of IPv4 or IPv6 */ 140 141 int prot; 142 143 switch (((struct iphdr *)guehdr)->version) { 144 case 4: 145 prot = IPPROTO_IPIP; 146 break; 147 case 6: 148 prot = IPPROTO_IPV6; 149 break; 150 default: 151 goto drop; 152 } 153 154 if (fou_recv_pull(skb, fou, sizeof(struct udphdr))) 155 goto drop; 156 157 return -prot; 158 } 159 160 default: /* Undefined version */ 161 goto drop; 162 } 163 164 optlen = guehdr->hlen << 2; 165 len += optlen; 166 167 if (!pskb_may_pull(skb, len)) 168 goto drop; 169 170 /* guehdr may change after pull */ 171 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 172 173 hdrlen = sizeof(struct guehdr) + optlen; 174 175 if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen)) 176 goto drop; 177 178 hdrlen = sizeof(struct guehdr) + optlen; 179 180 if (fou->family == AF_INET) 181 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); 182 else 183 ipv6_hdr(skb)->payload_len = 184 htons(ntohs(ipv6_hdr(skb)->payload_len) - len); 185 186 /* Pull csum through the guehdr now . This can be used if 187 * there is a remote checksum offload. 188 */ 189 skb_postpull_rcsum(skb, udp_hdr(skb), len); 190 191 data = &guehdr[1]; 192 193 if (guehdr->flags & GUE_FLAG_PRIV) { 194 __be32 flags = *(__be32 *)(data + doffset); 195 196 doffset += GUE_LEN_PRIV; 197 198 if (flags & GUE_PFLAG_REMCSUM) { 199 guehdr = gue_remcsum(skb, guehdr, data + doffset, 200 hdrlen, guehdr->proto_ctype, 201 !!(fou->flags & 202 FOU_F_REMCSUM_NOPARTIAL)); 203 if (!guehdr) 204 goto drop; 205 206 data = &guehdr[1]; 207 208 doffset += GUE_PLEN_REMCSUM; 209 } 210 } 211 212 if (unlikely(guehdr->control)) 213 return gue_control_message(skb, guehdr); 214 215 __skb_pull(skb, sizeof(struct udphdr) + hdrlen); 216 skb_reset_transport_header(skb); 217 218 if (iptunnel_pull_offloads(skb)) 219 goto drop; 220 221 return -guehdr->proto_ctype; 222 223drop: 224 kfree_skb(skb); 225 return 0; 226} 227 228static struct sk_buff *fou_gro_receive(struct sock *sk, 229 struct list_head *head, 230 struct sk_buff *skb) 231{ 232 u8 proto = fou_from_sock(sk)->protocol; 233 const struct net_offload **offloads; 234 const struct net_offload *ops; 235 struct sk_buff *pp = NULL; 236 237 /* We can clear the encap_mark for FOU as we are essentially doing 238 * one of two possible things. We are either adding an L4 tunnel 239 * header to the outer L3 tunnel header, or we are are simply 240 * treating the GRE tunnel header as though it is a UDP protocol 241 * specific header such as VXLAN or GENEVE. 242 */ 243 NAPI_GRO_CB(skb)->encap_mark = 0; 244 245 /* Flag this frame as already having an outer encap header */ 246 NAPI_GRO_CB(skb)->is_fou = 1; 247 248 rcu_read_lock(); 249 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 250 ops = rcu_dereference(offloads[proto]); 251 if (!ops || !ops->callbacks.gro_receive) 252 goto out_unlock; 253 254 pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); 255 256out_unlock: 257 rcu_read_unlock(); 258 259 return pp; 260} 261 262static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, 263 int nhoff) 264{ 265 const struct net_offload *ops; 266 u8 proto = fou_from_sock(sk)->protocol; 267 int err = -ENOSYS; 268 const struct net_offload **offloads; 269 270 rcu_read_lock(); 271 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 272 ops = rcu_dereference(offloads[proto]); 273 if (WARN_ON(!ops || !ops->callbacks.gro_complete)) 274 goto out_unlock; 275 276 err = ops->callbacks.gro_complete(skb, nhoff); 277 278 skb_set_inner_mac_header(skb, nhoff); 279 280out_unlock: 281 rcu_read_unlock(); 282 283 return err; 284} 285 286static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, 287 struct guehdr *guehdr, void *data, 288 size_t hdrlen, struct gro_remcsum *grc, 289 bool nopartial) 290{ 291 __be16 *pd = data; 292 size_t start = ntohs(pd[0]); 293 size_t offset = ntohs(pd[1]); 294 295 if (skb->remcsum_offload) 296 return guehdr; 297 298 if (!NAPI_GRO_CB(skb)->csum_valid) 299 return NULL; 300 301 guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen, 302 start, offset, grc, nopartial); 303 304 skb->remcsum_offload = 1; 305 306 return guehdr; 307} 308 309static struct sk_buff *gue_gro_receive(struct sock *sk, 310 struct list_head *head, 311 struct sk_buff *skb) 312{ 313 const struct net_offload **offloads; 314 const struct net_offload *ops; 315 struct sk_buff *pp = NULL; 316 struct sk_buff *p; 317 struct guehdr *guehdr; 318 size_t len, optlen, hdrlen, off; 319 void *data; 320 u16 doffset = 0; 321 int flush = 1; 322 struct fou *fou = fou_from_sock(sk); 323 struct gro_remcsum grc; 324 u8 proto; 325 326 skb_gro_remcsum_init(&grc); 327 328 off = skb_gro_offset(skb); 329 len = off + sizeof(*guehdr); 330 331 guehdr = skb_gro_header_fast(skb, off); 332 if (skb_gro_header_hard(skb, len)) { 333 guehdr = skb_gro_header_slow(skb, len, off); 334 if (unlikely(!guehdr)) 335 goto out; 336 } 337 338 switch (guehdr->version) { 339 case 0: 340 break; 341 case 1: 342 switch (((struct iphdr *)guehdr)->version) { 343 case 4: 344 proto = IPPROTO_IPIP; 345 break; 346 case 6: 347 proto = IPPROTO_IPV6; 348 break; 349 default: 350 goto out; 351 } 352 goto next_proto; 353 default: 354 goto out; 355 } 356 357 optlen = guehdr->hlen << 2; 358 len += optlen; 359 360 if (skb_gro_header_hard(skb, len)) { 361 guehdr = skb_gro_header_slow(skb, len, off); 362 if (unlikely(!guehdr)) 363 goto out; 364 } 365 366 if (unlikely(guehdr->control) || guehdr->version != 0 || 367 validate_gue_flags(guehdr, optlen)) 368 goto out; 369 370 hdrlen = sizeof(*guehdr) + optlen; 371 372 /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr, 373 * this is needed if there is a remote checkcsum offload. 374 */ 375 skb_gro_postpull_rcsum(skb, guehdr, hdrlen); 376 377 data = &guehdr[1]; 378 379 if (guehdr->flags & GUE_FLAG_PRIV) { 380 __be32 flags = *(__be32 *)(data + doffset); 381 382 doffset += GUE_LEN_PRIV; 383 384 if (flags & GUE_PFLAG_REMCSUM) { 385 guehdr = gue_gro_remcsum(skb, off, guehdr, 386 data + doffset, hdrlen, &grc, 387 !!(fou->flags & 388 FOU_F_REMCSUM_NOPARTIAL)); 389 390 if (!guehdr) 391 goto out; 392 393 data = &guehdr[1]; 394 395 doffset += GUE_PLEN_REMCSUM; 396 } 397 } 398 399 skb_gro_pull(skb, hdrlen); 400 401 list_for_each_entry(p, head, list) { 402 const struct guehdr *guehdr2; 403 404 if (!NAPI_GRO_CB(p)->same_flow) 405 continue; 406 407 guehdr2 = (struct guehdr *)(p->data + off); 408 409 /* Compare base GUE header to be equal (covers 410 * hlen, version, proto_ctype, and flags. 411 */ 412 if (guehdr->word != guehdr2->word) { 413 NAPI_GRO_CB(p)->same_flow = 0; 414 continue; 415 } 416 417 /* Compare optional fields are the same. */ 418 if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1], 419 guehdr->hlen << 2)) { 420 NAPI_GRO_CB(p)->same_flow = 0; 421 continue; 422 } 423 } 424 425 proto = guehdr->proto_ctype; 426 427next_proto: 428 429 /* We can clear the encap_mark for GUE as we are essentially doing 430 * one of two possible things. We are either adding an L4 tunnel 431 * header to the outer L3 tunnel header, or we are are simply 432 * treating the GRE tunnel header as though it is a UDP protocol 433 * specific header such as VXLAN or GENEVE. 434 */ 435 NAPI_GRO_CB(skb)->encap_mark = 0; 436 437 /* Flag this frame as already having an outer encap header */ 438 NAPI_GRO_CB(skb)->is_fou = 1; 439 440 rcu_read_lock(); 441 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 442 ops = rcu_dereference(offloads[proto]); 443 if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) 444 goto out_unlock; 445 446 pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); 447 flush = 0; 448 449out_unlock: 450 rcu_read_unlock(); 451out: 452 skb_gro_flush_final_remcsum(skb, pp, flush, &grc); 453 454 return pp; 455} 456 457static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) 458{ 459 const struct net_offload **offloads; 460 struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); 461 const struct net_offload *ops; 462 unsigned int guehlen = 0; 463 u8 proto; 464 int err = -ENOENT; 465 466 switch (guehdr->version) { 467 case 0: 468 proto = guehdr->proto_ctype; 469 guehlen = sizeof(*guehdr) + (guehdr->hlen << 2); 470 break; 471 case 1: 472 switch (((struct iphdr *)guehdr)->version) { 473 case 4: 474 proto = IPPROTO_IPIP; 475 break; 476 case 6: 477 proto = IPPROTO_IPV6; 478 break; 479 default: 480 return err; 481 } 482 break; 483 default: 484 return err; 485 } 486 487 rcu_read_lock(); 488 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 489 ops = rcu_dereference(offloads[proto]); 490 if (WARN_ON(!ops || !ops->callbacks.gro_complete)) 491 goto out_unlock; 492 493 err = ops->callbacks.gro_complete(skb, nhoff + guehlen); 494 495 skb_set_inner_mac_header(skb, nhoff + guehlen); 496 497out_unlock: 498 rcu_read_unlock(); 499 return err; 500} 501 502static int fou_add_to_port_list(struct net *net, struct fou *fou) 503{ 504 struct fou_net *fn = net_generic(net, fou_net_id); 505 struct fou *fout; 506 507 mutex_lock(&fn->fou_lock); 508 list_for_each_entry(fout, &fn->fou_list, list) { 509 if (fou->port == fout->port && 510 fou->family == fout->family) { 511 mutex_unlock(&fn->fou_lock); 512 return -EALREADY; 513 } 514 } 515 516 list_add(&fou->list, &fn->fou_list); 517 mutex_unlock(&fn->fou_lock); 518 519 return 0; 520} 521 522static void fou_release(struct fou *fou) 523{ 524 struct socket *sock = fou->sock; 525 526 list_del(&fou->list); 527 udp_tunnel_sock_release(sock); 528 529 kfree_rcu(fou, rcu); 530} 531 532static int fou_create(struct net *net, struct fou_cfg *cfg, 533 struct socket **sockp) 534{ 535 struct socket *sock = NULL; 536 struct fou *fou = NULL; 537 struct sock *sk; 538 struct udp_tunnel_sock_cfg tunnel_cfg; 539 int err; 540 541 /* Open UDP socket */ 542 err = udp_sock_create(net, &cfg->udp_config, &sock); 543 if (err < 0) 544 goto error; 545 546 /* Allocate FOU port structure */ 547 fou = kzalloc(sizeof(*fou), GFP_KERNEL); 548 if (!fou) { 549 err = -ENOMEM; 550 goto error; 551 } 552 553 sk = sock->sk; 554 555 fou->port = cfg->udp_config.local_udp_port; 556 fou->family = cfg->udp_config.family; 557 fou->flags = cfg->flags; 558 fou->type = cfg->type; 559 fou->sock = sock; 560 561 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); 562 tunnel_cfg.encap_type = 1; 563 tunnel_cfg.sk_user_data = fou; 564 tunnel_cfg.encap_destroy = NULL; 565 566 /* Initial for fou type */ 567 switch (cfg->type) { 568 case FOU_ENCAP_DIRECT: 569 tunnel_cfg.encap_rcv = fou_udp_recv; 570 tunnel_cfg.gro_receive = fou_gro_receive; 571 tunnel_cfg.gro_complete = fou_gro_complete; 572 fou->protocol = cfg->protocol; 573 break; 574 case FOU_ENCAP_GUE: 575 tunnel_cfg.encap_rcv = gue_udp_recv; 576 tunnel_cfg.gro_receive = gue_gro_receive; 577 tunnel_cfg.gro_complete = gue_gro_complete; 578 break; 579 default: 580 err = -EINVAL; 581 goto error; 582 } 583 584 setup_udp_tunnel_sock(net, sock, &tunnel_cfg); 585 586 sk->sk_allocation = GFP_ATOMIC; 587 588 err = fou_add_to_port_list(net, fou); 589 if (err) 590 goto error; 591 592 if (sockp) 593 *sockp = sock; 594 595 return 0; 596 597error: 598 kfree(fou); 599 if (sock) 600 udp_tunnel_sock_release(sock); 601 602 return err; 603} 604 605static int fou_destroy(struct net *net, struct fou_cfg *cfg) 606{ 607 struct fou_net *fn = net_generic(net, fou_net_id); 608 __be16 port = cfg->udp_config.local_udp_port; 609 u8 family = cfg->udp_config.family; 610 int err = -EINVAL; 611 struct fou *fou; 612 613 mutex_lock(&fn->fou_lock); 614 list_for_each_entry(fou, &fn->fou_list, list) { 615 if (fou->port == port && fou->family == family) { 616 fou_release(fou); 617 err = 0; 618 break; 619 } 620 } 621 mutex_unlock(&fn->fou_lock); 622 623 return err; 624} 625 626static struct genl_family fou_nl_family; 627 628static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { 629 [FOU_ATTR_PORT] = { .type = NLA_U16, }, 630 [FOU_ATTR_AF] = { .type = NLA_U8, }, 631 [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, 632 [FOU_ATTR_TYPE] = { .type = NLA_U8, }, 633 [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, 634}; 635 636static int parse_nl_config(struct genl_info *info, 637 struct fou_cfg *cfg) 638{ 639 memset(cfg, 0, sizeof(*cfg)); 640 641 cfg->udp_config.family = AF_INET; 642 643 if (info->attrs[FOU_ATTR_AF]) { 644 u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]); 645 646 switch (family) { 647 case AF_INET: 648 break; 649 case AF_INET6: 650 cfg->udp_config.ipv6_v6only = 1; 651 break; 652 default: 653 return -EAFNOSUPPORT; 654 } 655 656 cfg->udp_config.family = family; 657 } 658 659 if (info->attrs[FOU_ATTR_PORT]) { 660 __be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]); 661 662 cfg->udp_config.local_udp_port = port; 663 } 664 665 if (info->attrs[FOU_ATTR_IPPROTO]) 666 cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]); 667 668 if (info->attrs[FOU_ATTR_TYPE]) 669 cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]); 670 671 if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL]) 672 cfg->flags |= FOU_F_REMCSUM_NOPARTIAL; 673 674 return 0; 675} 676 677static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info) 678{ 679 struct net *net = genl_info_net(info); 680 struct fou_cfg cfg; 681 int err; 682 683 err = parse_nl_config(info, &cfg); 684 if (err) 685 return err; 686 687 return fou_create(net, &cfg, NULL); 688} 689 690static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info) 691{ 692 struct net *net = genl_info_net(info); 693 struct fou_cfg cfg; 694 int err; 695 696 err = parse_nl_config(info, &cfg); 697 if (err) 698 return err; 699 700 return fou_destroy(net, &cfg); 701} 702 703static int fou_fill_info(struct fou *fou, struct sk_buff *msg) 704{ 705 if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) || 706 nla_put_be16(msg, FOU_ATTR_PORT, fou->port) || 707 nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) || 708 nla_put_u8(msg, FOU_ATTR_TYPE, fou->type)) 709 return -1; 710 711 if (fou->flags & FOU_F_REMCSUM_NOPARTIAL) 712 if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL)) 713 return -1; 714 return 0; 715} 716 717static int fou_dump_info(struct fou *fou, u32 portid, u32 seq, 718 u32 flags, struct sk_buff *skb, u8 cmd) 719{ 720 void *hdr; 721 722 hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd); 723 if (!hdr) 724 return -ENOMEM; 725 726 if (fou_fill_info(fou, skb) < 0) 727 goto nla_put_failure; 728 729 genlmsg_end(skb, hdr); 730 return 0; 731 732nla_put_failure: 733 genlmsg_cancel(skb, hdr); 734 return -EMSGSIZE; 735} 736 737static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) 738{ 739 struct net *net = genl_info_net(info); 740 struct fou_net *fn = net_generic(net, fou_net_id); 741 struct sk_buff *msg; 742 struct fou_cfg cfg; 743 struct fou *fout; 744 __be16 port; 745 u8 family; 746 int ret; 747 748 ret = parse_nl_config(info, &cfg); 749 if (ret) 750 return ret; 751 port = cfg.udp_config.local_udp_port; 752 if (port == 0) 753 return -EINVAL; 754 755 family = cfg.udp_config.family; 756 if (family != AF_INET && family != AF_INET6) 757 return -EINVAL; 758 759 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 760 if (!msg) 761 return -ENOMEM; 762 763 ret = -ESRCH; 764 mutex_lock(&fn->fou_lock); 765 list_for_each_entry(fout, &fn->fou_list, list) { 766 if (port == fout->port && family == fout->family) { 767 ret = fou_dump_info(fout, info->snd_portid, 768 info->snd_seq, 0, msg, 769 info->genlhdr->cmd); 770 break; 771 } 772 } 773 mutex_unlock(&fn->fou_lock); 774 if (ret < 0) 775 goto out_free; 776 777 return genlmsg_reply(msg, info); 778 779out_free: 780 nlmsg_free(msg); 781 return ret; 782} 783 784static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) 785{ 786 struct net *net = sock_net(skb->sk); 787 struct fou_net *fn = net_generic(net, fou_net_id); 788 struct fou *fout; 789 int idx = 0, ret; 790 791 mutex_lock(&fn->fou_lock); 792 list_for_each_entry(fout, &fn->fou_list, list) { 793 if (idx++ < cb->args[0]) 794 continue; 795 ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid, 796 cb->nlh->nlmsg_seq, NLM_F_MULTI, 797 skb, FOU_CMD_GET); 798 if (ret) 799 break; 800 } 801 mutex_unlock(&fn->fou_lock); 802 803 cb->args[0] = idx; 804 return skb->len; 805} 806 807static const struct genl_ops fou_nl_ops[] = { 808 { 809 .cmd = FOU_CMD_ADD, 810 .doit = fou_nl_cmd_add_port, 811 .policy = fou_nl_policy, 812 .flags = GENL_ADMIN_PERM, 813 }, 814 { 815 .cmd = FOU_CMD_DEL, 816 .doit = fou_nl_cmd_rm_port, 817 .policy = fou_nl_policy, 818 .flags = GENL_ADMIN_PERM, 819 }, 820 { 821 .cmd = FOU_CMD_GET, 822 .doit = fou_nl_cmd_get_port, 823 .dumpit = fou_nl_dump, 824 .policy = fou_nl_policy, 825 }, 826}; 827 828static struct genl_family fou_nl_family __ro_after_init = { 829 .hdrsize = 0, 830 .name = FOU_GENL_NAME, 831 .version = FOU_GENL_VERSION, 832 .maxattr = FOU_ATTR_MAX, 833 .netnsok = true, 834 .module = THIS_MODULE, 835 .ops = fou_nl_ops, 836 .n_ops = ARRAY_SIZE(fou_nl_ops), 837}; 838 839size_t fou_encap_hlen(struct ip_tunnel_encap *e) 840{ 841 return sizeof(struct udphdr); 842} 843EXPORT_SYMBOL(fou_encap_hlen); 844 845size_t gue_encap_hlen(struct ip_tunnel_encap *e) 846{ 847 size_t len; 848 bool need_priv = false; 849 850 len = sizeof(struct udphdr) + sizeof(struct guehdr); 851 852 if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) { 853 len += GUE_PLEN_REMCSUM; 854 need_priv = true; 855 } 856 857 len += need_priv ? GUE_LEN_PRIV : 0; 858 859 return len; 860} 861EXPORT_SYMBOL(gue_encap_hlen); 862 863int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 864 u8 *protocol, __be16 *sport, int type) 865{ 866 int err; 867 868 err = iptunnel_handle_offloads(skb, type); 869 if (err) 870 return err; 871 872 *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), 873 skb, 0, 0, false); 874 875 return 0; 876} 877EXPORT_SYMBOL(__fou_build_header); 878 879int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 880 u8 *protocol, __be16 *sport, int type) 881{ 882 struct guehdr *guehdr; 883 size_t hdrlen, optlen = 0; 884 void *data; 885 bool need_priv = false; 886 int err; 887 888 if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) && 889 skb->ip_summed == CHECKSUM_PARTIAL) { 890 optlen += GUE_PLEN_REMCSUM; 891 type |= SKB_GSO_TUNNEL_REMCSUM; 892 need_priv = true; 893 } 894 895 optlen += need_priv ? GUE_LEN_PRIV : 0; 896 897 err = iptunnel_handle_offloads(skb, type); 898 if (err) 899 return err; 900 901 /* Get source port (based on flow hash) before skb_push */ 902 *sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), 903 skb, 0, 0, false); 904 905 hdrlen = sizeof(struct guehdr) + optlen; 906 907 skb_push(skb, hdrlen); 908 909 guehdr = (struct guehdr *)skb->data; 910 911 guehdr->control = 0; 912 guehdr->version = 0; 913 guehdr->hlen = optlen >> 2; 914 guehdr->flags = 0; 915 guehdr->proto_ctype = *protocol; 916 917 data = &guehdr[1]; 918 919 if (need_priv) { 920 __be32 *flags = data; 921 922 guehdr->flags |= GUE_FLAG_PRIV; 923 *flags = 0; 924 data += GUE_LEN_PRIV; 925 926 if (type & SKB_GSO_TUNNEL_REMCSUM) { 927 u16 csum_start = skb_checksum_start_offset(skb); 928 __be16 *pd = data; 929 930 if (csum_start < hdrlen) 931 return -EINVAL; 932 933 csum_start -= hdrlen; 934 pd[0] = htons(csum_start); 935 pd[1] = htons(csum_start + skb->csum_offset); 936 937 if (!skb_is_gso(skb)) { 938 skb->ip_summed = CHECKSUM_NONE; 939 skb->encapsulation = 0; 940 } 941 942 *flags |= GUE_PFLAG_REMCSUM; 943 data += GUE_PLEN_REMCSUM; 944 } 945 946 } 947 948 return 0; 949} 950EXPORT_SYMBOL(__gue_build_header); 951 952#ifdef CONFIG_NET_FOU_IP_TUNNELS 953 954static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, 955 struct flowi4 *fl4, u8 *protocol, __be16 sport) 956{ 957 struct udphdr *uh; 958 959 skb_push(skb, sizeof(struct udphdr)); 960 skb_reset_transport_header(skb); 961 962 uh = udp_hdr(skb); 963 964 uh->dest = e->dport; 965 uh->source = sport; 966 uh->len = htons(skb->len); 967 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb, 968 fl4->saddr, fl4->daddr, skb->len); 969 970 *protocol = IPPROTO_UDP; 971} 972 973static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 974 u8 *protocol, struct flowi4 *fl4) 975{ 976 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : 977 SKB_GSO_UDP_TUNNEL; 978 __be16 sport; 979 int err; 980 981 err = __fou_build_header(skb, e, protocol, &sport, type); 982 if (err) 983 return err; 984 985 fou_build_udp(skb, e, fl4, protocol, sport); 986 987 return 0; 988} 989 990static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 991 u8 *protocol, struct flowi4 *fl4) 992{ 993 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : 994 SKB_GSO_UDP_TUNNEL; 995 __be16 sport; 996 int err; 997 998 err = __gue_build_header(skb, e, protocol, &sport, type); 999 if (err) 1000 return err; 1001 1002 fou_build_udp(skb, e, fl4, protocol, sport); 1003 1004 return 0; 1005} 1006 1007static int gue_err_proto_handler(int proto, struct sk_buff *skb, u32 info) 1008{ 1009 const struct net_protocol *ipprot = rcu_dereference(inet_protos[proto]); 1010 1011 if (ipprot && ipprot->err_handler) { 1012 if (!ipprot->err_handler(skb, info)) 1013 return 0; 1014 } 1015 1016 return -ENOENT; 1017} 1018 1019static int gue_err(struct sk_buff *skb, u32 info) 1020{ 1021 int transport_offset = skb_transport_offset(skb); 1022 struct guehdr *guehdr; 1023 size_t len, optlen; 1024 int ret; 1025 1026 len = sizeof(struct udphdr) + sizeof(struct guehdr); 1027 if (!pskb_may_pull(skb, transport_offset + len)) 1028 return -EINVAL; 1029 1030 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 1031 1032 switch (guehdr->version) { 1033 case 0: /* Full GUE header present */ 1034 break; 1035 case 1: { 1036 /* Direct encasulation of IPv4 or IPv6 */ 1037 skb_set_transport_header(skb, -(int)sizeof(struct icmphdr)); 1038 1039 switch (((struct iphdr *)guehdr)->version) { 1040 case 4: 1041 ret = gue_err_proto_handler(IPPROTO_IPIP, skb, info); 1042 goto out; 1043#if IS_ENABLED(CONFIG_IPV6) 1044 case 6: 1045 ret = gue_err_proto_handler(IPPROTO_IPV6, skb, info); 1046 goto out; 1047#endif 1048 default: 1049 ret = -EOPNOTSUPP; 1050 goto out; 1051 } 1052 } 1053 default: /* Undefined version */ 1054 return -EOPNOTSUPP; 1055 } 1056 1057 if (guehdr->control) 1058 return -ENOENT; 1059 1060 optlen = guehdr->hlen << 2; 1061 1062 if (!pskb_may_pull(skb, transport_offset + len + optlen)) 1063 return -EINVAL; 1064 1065 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 1066 if (validate_gue_flags(guehdr, optlen)) 1067 return -EINVAL; 1068 1069 /* Handling exceptions for direct UDP encapsulation in GUE would lead to 1070 * recursion. Besides, this kind of encapsulation can't even be 1071 * configured currently. Discard this. 1072 */ 1073 if (guehdr->proto_ctype == IPPROTO_UDP || 1074 guehdr->proto_ctype == IPPROTO_UDPLITE) 1075 return -EOPNOTSUPP; 1076 1077 skb_set_transport_header(skb, -(int)sizeof(struct icmphdr)); 1078 ret = gue_err_proto_handler(guehdr->proto_ctype, skb, info); 1079 1080out: 1081 skb_set_transport_header(skb, transport_offset); 1082 return ret; 1083} 1084 1085 1086static const struct ip_tunnel_encap_ops fou_iptun_ops = { 1087 .encap_hlen = fou_encap_hlen, 1088 .build_header = fou_build_header, 1089 .err_handler = gue_err, 1090}; 1091 1092static const struct ip_tunnel_encap_ops gue_iptun_ops = { 1093 .encap_hlen = gue_encap_hlen, 1094 .build_header = gue_build_header, 1095 .err_handler = gue_err, 1096}; 1097 1098static int ip_tunnel_encap_add_fou_ops(void) 1099{ 1100 int ret; 1101 1102 ret = ip_tunnel_encap_add_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU); 1103 if (ret < 0) { 1104 pr_err("can't add fou ops\n"); 1105 return ret; 1106 } 1107 1108 ret = ip_tunnel_encap_add_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE); 1109 if (ret < 0) { 1110 pr_err("can't add gue ops\n"); 1111 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU); 1112 return ret; 1113 } 1114 1115 return 0; 1116} 1117 1118static void ip_tunnel_encap_del_fou_ops(void) 1119{ 1120 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU); 1121 ip_tunnel_encap_del_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE); 1122} 1123 1124#else 1125 1126static int ip_tunnel_encap_add_fou_ops(void) 1127{ 1128 return 0; 1129} 1130 1131static void ip_tunnel_encap_del_fou_ops(void) 1132{ 1133} 1134 1135#endif 1136 1137static __net_init int fou_init_net(struct net *net) 1138{ 1139 struct fou_net *fn = net_generic(net, fou_net_id); 1140 1141 INIT_LIST_HEAD(&fn->fou_list); 1142 mutex_init(&fn->fou_lock); 1143 return 0; 1144} 1145 1146static __net_exit void fou_exit_net(struct net *net) 1147{ 1148 struct fou_net *fn = net_generic(net, fou_net_id); 1149 struct fou *fou, *next; 1150 1151 /* Close all the FOU sockets */ 1152 mutex_lock(&fn->fou_lock); 1153 list_for_each_entry_safe(fou, next, &fn->fou_list, list) 1154 fou_release(fou); 1155 mutex_unlock(&fn->fou_lock); 1156} 1157 1158static struct pernet_operations fou_net_ops = { 1159 .init = fou_init_net, 1160 .exit = fou_exit_net, 1161 .id = &fou_net_id, 1162 .size = sizeof(struct fou_net), 1163}; 1164 1165static int __init fou_init(void) 1166{ 1167 int ret; 1168 1169 ret = register_pernet_device(&fou_net_ops); 1170 if (ret) 1171 goto exit; 1172 1173 ret = genl_register_family(&fou_nl_family); 1174 if (ret < 0) 1175 goto unregister; 1176 1177 ret = ip_tunnel_encap_add_fou_ops(); 1178 if (ret == 0) 1179 return 0; 1180 1181 genl_unregister_family(&fou_nl_family); 1182unregister: 1183 unregister_pernet_device(&fou_net_ops); 1184exit: 1185 return ret; 1186} 1187 1188static void __exit fou_fini(void) 1189{ 1190 ip_tunnel_encap_del_fou_ops(); 1191 genl_unregister_family(&fou_nl_family); 1192 unregister_pernet_device(&fou_net_ops); 1193} 1194 1195module_init(fou_init); 1196module_exit(fou_fini); 1197MODULE_AUTHOR("Tom Herbert <therbert@google.com>"); 1198MODULE_LICENSE("GPL");