Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.6-rc7 1024 lines 22 kB view raw
1#include <linux/module.h> 2#include <linux/errno.h> 3#include <linux/socket.h> 4#include <linux/skbuff.h> 5#include <linux/ip.h> 6#include <linux/udp.h> 7#include <linux/types.h> 8#include <linux/kernel.h> 9#include <net/genetlink.h> 10#include <net/gue.h> 11#include <net/ip.h> 12#include <net/protocol.h> 13#include <net/udp.h> 14#include <net/udp_tunnel.h> 15#include <net/xfrm.h> 16#include <uapi/linux/fou.h> 17#include <uapi/linux/genetlink.h> 18 19struct fou { 20 struct socket *sock; 21 u8 protocol; 22 u8 flags; 23 __be16 port; 24 u16 type; 25 struct udp_offload udp_offloads; 26 struct list_head list; 27 struct rcu_head rcu; 28}; 29 30#define FOU_F_REMCSUM_NOPARTIAL BIT(0) 31 32struct fou_cfg { 33 u16 type; 34 u8 protocol; 35 u8 flags; 36 struct udp_port_cfg udp_config; 37}; 38 39static unsigned int fou_net_id; 40 41struct fou_net { 42 struct list_head fou_list; 43 struct mutex fou_lock; 44}; 45 46static inline struct fou *fou_from_sock(struct sock *sk) 47{ 48 return sk->sk_user_data; 49} 50 51static int fou_recv_pull(struct sk_buff *skb, size_t len) 52{ 53 struct iphdr *iph = ip_hdr(skb); 54 55 /* Remove 'len' bytes from the packet (UDP header and 56 * FOU header if present). 57 */ 58 iph->tot_len = htons(ntohs(iph->tot_len) - len); 59 __skb_pull(skb, len); 60 skb_postpull_rcsum(skb, udp_hdr(skb), len); 61 skb_reset_transport_header(skb); 62 return iptunnel_pull_offloads(skb); 63} 64 65static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) 66{ 67 struct fou *fou = fou_from_sock(sk); 68 69 if (!fou) 70 return 1; 71 72 if (fou_recv_pull(skb, sizeof(struct udphdr))) 73 goto drop; 74 75 return -fou->protocol; 76 77drop: 78 kfree_skb(skb); 79 return 0; 80} 81 82static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, 83 void *data, size_t hdrlen, u8 ipproto, 84 bool nopartial) 85{ 86 __be16 *pd = data; 87 size_t start = ntohs(pd[0]); 88 size_t offset = ntohs(pd[1]); 89 size_t plen = sizeof(struct udphdr) + hdrlen + 90 max_t(size_t, offset + sizeof(u16), start); 91 92 if (skb->remcsum_offload) 93 return guehdr; 94 95 if (!pskb_may_pull(skb, plen)) 96 return NULL; 97 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 98 99 skb_remcsum_process(skb, (void *)guehdr + hdrlen, 100 start, offset, nopartial); 101 102 return guehdr; 103} 104 105static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr) 106{ 107 /* No support yet */ 108 kfree_skb(skb); 109 return 0; 110} 111 112static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) 113{ 114 struct fou *fou = fou_from_sock(sk); 115 size_t len, optlen, hdrlen; 116 struct guehdr *guehdr; 117 void *data; 118 u16 doffset = 0; 119 120 if (!fou) 121 return 1; 122 123 len = sizeof(struct udphdr) + sizeof(struct guehdr); 124 if (!pskb_may_pull(skb, len)) 125 goto drop; 126 127 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 128 129 optlen = guehdr->hlen << 2; 130 len += optlen; 131 132 if (!pskb_may_pull(skb, len)) 133 goto drop; 134 135 /* guehdr may change after pull */ 136 guehdr = (struct guehdr *)&udp_hdr(skb)[1]; 137 138 hdrlen = sizeof(struct guehdr) + optlen; 139 140 if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen)) 141 goto drop; 142 143 hdrlen = sizeof(struct guehdr) + optlen; 144 145 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len); 146 147 /* Pull csum through the guehdr now . This can be used if 148 * there is a remote checksum offload. 149 */ 150 skb_postpull_rcsum(skb, udp_hdr(skb), len); 151 152 data = &guehdr[1]; 153 154 if (guehdr->flags & GUE_FLAG_PRIV) { 155 __be32 flags = *(__be32 *)(data + doffset); 156 157 doffset += GUE_LEN_PRIV; 158 159 if (flags & GUE_PFLAG_REMCSUM) { 160 guehdr = gue_remcsum(skb, guehdr, data + doffset, 161 hdrlen, guehdr->proto_ctype, 162 !!(fou->flags & 163 FOU_F_REMCSUM_NOPARTIAL)); 164 if (!guehdr) 165 goto drop; 166 167 data = &guehdr[1]; 168 169 doffset += GUE_PLEN_REMCSUM; 170 } 171 } 172 173 if (unlikely(guehdr->control)) 174 return gue_control_message(skb, guehdr); 175 176 __skb_pull(skb, sizeof(struct udphdr) + hdrlen); 177 skb_reset_transport_header(skb); 178 179 if (iptunnel_pull_offloads(skb)) 180 goto drop; 181 182 return -guehdr->proto_ctype; 183 184drop: 185 kfree_skb(skb); 186 return 0; 187} 188 189static struct sk_buff **fou_gro_receive(struct sk_buff **head, 190 struct sk_buff *skb, 191 struct udp_offload *uoff) 192{ 193 const struct net_offload *ops; 194 struct sk_buff **pp = NULL; 195 u8 proto = NAPI_GRO_CB(skb)->proto; 196 const struct net_offload **offloads; 197 198 /* We can clear the encap_mark for FOU as we are essentially doing 199 * one of two possible things. We are either adding an L4 tunnel 200 * header to the outer L3 tunnel header, or we are are simply 201 * treating the GRE tunnel header as though it is a UDP protocol 202 * specific header such as VXLAN or GENEVE. 203 */ 204 NAPI_GRO_CB(skb)->encap_mark = 0; 205 206 /* Flag this frame as already having an outer encap header */ 207 NAPI_GRO_CB(skb)->is_fou = 1; 208 209 rcu_read_lock(); 210 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 211 ops = rcu_dereference(offloads[proto]); 212 if (!ops || !ops->callbacks.gro_receive) 213 goto out_unlock; 214 215 pp = ops->callbacks.gro_receive(head, skb); 216 217out_unlock: 218 rcu_read_unlock(); 219 220 return pp; 221} 222 223static int fou_gro_complete(struct sk_buff *skb, int nhoff, 224 struct udp_offload *uoff) 225{ 226 const struct net_offload *ops; 227 u8 proto = NAPI_GRO_CB(skb)->proto; 228 int err = -ENOSYS; 229 const struct net_offload **offloads; 230 231 udp_tunnel_gro_complete(skb, nhoff); 232 233 rcu_read_lock(); 234 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 235 ops = rcu_dereference(offloads[proto]); 236 if (WARN_ON(!ops || !ops->callbacks.gro_complete)) 237 goto out_unlock; 238 239 err = ops->callbacks.gro_complete(skb, nhoff); 240 241out_unlock: 242 rcu_read_unlock(); 243 244 return err; 245} 246 247static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, 248 struct guehdr *guehdr, void *data, 249 size_t hdrlen, struct gro_remcsum *grc, 250 bool nopartial) 251{ 252 __be16 *pd = data; 253 size_t start = ntohs(pd[0]); 254 size_t offset = ntohs(pd[1]); 255 256 if (skb->remcsum_offload) 257 return guehdr; 258 259 if (!NAPI_GRO_CB(skb)->csum_valid) 260 return NULL; 261 262 guehdr = skb_gro_remcsum_process(skb, (void *)guehdr, off, hdrlen, 263 start, offset, grc, nopartial); 264 265 skb->remcsum_offload = 1; 266 267 return guehdr; 268} 269 270static struct sk_buff **gue_gro_receive(struct sk_buff **head, 271 struct sk_buff *skb, 272 struct udp_offload *uoff) 273{ 274 const struct net_offload **offloads; 275 const struct net_offload *ops; 276 struct sk_buff **pp = NULL; 277 struct sk_buff *p; 278 struct guehdr *guehdr; 279 size_t len, optlen, hdrlen, off; 280 void *data; 281 u16 doffset = 0; 282 int flush = 1; 283 struct fou *fou = container_of(uoff, struct fou, udp_offloads); 284 struct gro_remcsum grc; 285 286 skb_gro_remcsum_init(&grc); 287 288 off = skb_gro_offset(skb); 289 len = off + sizeof(*guehdr); 290 291 guehdr = skb_gro_header_fast(skb, off); 292 if (skb_gro_header_hard(skb, len)) { 293 guehdr = skb_gro_header_slow(skb, len, off); 294 if (unlikely(!guehdr)) 295 goto out; 296 } 297 298 optlen = guehdr->hlen << 2; 299 len += optlen; 300 301 if (skb_gro_header_hard(skb, len)) { 302 guehdr = skb_gro_header_slow(skb, len, off); 303 if (unlikely(!guehdr)) 304 goto out; 305 } 306 307 if (unlikely(guehdr->control) || guehdr->version != 0 || 308 validate_gue_flags(guehdr, optlen)) 309 goto out; 310 311 hdrlen = sizeof(*guehdr) + optlen; 312 313 /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr, 314 * this is needed if there is a remote checkcsum offload. 315 */ 316 skb_gro_postpull_rcsum(skb, guehdr, hdrlen); 317 318 data = &guehdr[1]; 319 320 if (guehdr->flags & GUE_FLAG_PRIV) { 321 __be32 flags = *(__be32 *)(data + doffset); 322 323 doffset += GUE_LEN_PRIV; 324 325 if (flags & GUE_PFLAG_REMCSUM) { 326 guehdr = gue_gro_remcsum(skb, off, guehdr, 327 data + doffset, hdrlen, &grc, 328 !!(fou->flags & 329 FOU_F_REMCSUM_NOPARTIAL)); 330 331 if (!guehdr) 332 goto out; 333 334 data = &guehdr[1]; 335 336 doffset += GUE_PLEN_REMCSUM; 337 } 338 } 339 340 skb_gro_pull(skb, hdrlen); 341 342 for (p = *head; p; p = p->next) { 343 const struct guehdr *guehdr2; 344 345 if (!NAPI_GRO_CB(p)->same_flow) 346 continue; 347 348 guehdr2 = (struct guehdr *)(p->data + off); 349 350 /* Compare base GUE header to be equal (covers 351 * hlen, version, proto_ctype, and flags. 352 */ 353 if (guehdr->word != guehdr2->word) { 354 NAPI_GRO_CB(p)->same_flow = 0; 355 continue; 356 } 357 358 /* Compare optional fields are the same. */ 359 if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1], 360 guehdr->hlen << 2)) { 361 NAPI_GRO_CB(p)->same_flow = 0; 362 continue; 363 } 364 } 365 366 /* We can clear the encap_mark for GUE as we are essentially doing 367 * one of two possible things. We are either adding an L4 tunnel 368 * header to the outer L3 tunnel header, or we are are simply 369 * treating the GRE tunnel header as though it is a UDP protocol 370 * specific header such as VXLAN or GENEVE. 371 */ 372 NAPI_GRO_CB(skb)->encap_mark = 0; 373 374 /* Flag this frame as already having an outer encap header */ 375 NAPI_GRO_CB(skb)->is_fou = 1; 376 377 rcu_read_lock(); 378 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 379 ops = rcu_dereference(offloads[guehdr->proto_ctype]); 380 if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) 381 goto out_unlock; 382 383 pp = ops->callbacks.gro_receive(head, skb); 384 flush = 0; 385 386out_unlock: 387 rcu_read_unlock(); 388out: 389 NAPI_GRO_CB(skb)->flush |= flush; 390 skb_gro_remcsum_cleanup(skb, &grc); 391 392 return pp; 393} 394 395static int gue_gro_complete(struct sk_buff *skb, int nhoff, 396 struct udp_offload *uoff) 397{ 398 const struct net_offload **offloads; 399 struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); 400 const struct net_offload *ops; 401 unsigned int guehlen; 402 u8 proto; 403 int err = -ENOENT; 404 405 proto = guehdr->proto_ctype; 406 407 guehlen = sizeof(*guehdr) + (guehdr->hlen << 2); 408 409 rcu_read_lock(); 410 offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; 411 ops = rcu_dereference(offloads[proto]); 412 if (WARN_ON(!ops || !ops->callbacks.gro_complete)) 413 goto out_unlock; 414 415 err = ops->callbacks.gro_complete(skb, nhoff + guehlen); 416 417out_unlock: 418 rcu_read_unlock(); 419 return err; 420} 421 422static int fou_add_to_port_list(struct net *net, struct fou *fou) 423{ 424 struct fou_net *fn = net_generic(net, fou_net_id); 425 struct fou *fout; 426 427 mutex_lock(&fn->fou_lock); 428 list_for_each_entry(fout, &fn->fou_list, list) { 429 if (fou->port == fout->port) { 430 mutex_unlock(&fn->fou_lock); 431 return -EALREADY; 432 } 433 } 434 435 list_add(&fou->list, &fn->fou_list); 436 mutex_unlock(&fn->fou_lock); 437 438 return 0; 439} 440 441static void fou_release(struct fou *fou) 442{ 443 struct socket *sock = fou->sock; 444 struct sock *sk = sock->sk; 445 446 if (sk->sk_family == AF_INET) 447 udp_del_offload(&fou->udp_offloads); 448 list_del(&fou->list); 449 udp_tunnel_sock_release(sock); 450 451 kfree_rcu(fou, rcu); 452} 453 454static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) 455{ 456 udp_sk(sk)->encap_rcv = fou_udp_recv; 457 fou->protocol = cfg->protocol; 458 fou->udp_offloads.callbacks.gro_receive = fou_gro_receive; 459 fou->udp_offloads.callbacks.gro_complete = fou_gro_complete; 460 fou->udp_offloads.port = cfg->udp_config.local_udp_port; 461 fou->udp_offloads.ipproto = cfg->protocol; 462 463 return 0; 464} 465 466static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) 467{ 468 udp_sk(sk)->encap_rcv = gue_udp_recv; 469 fou->udp_offloads.callbacks.gro_receive = gue_gro_receive; 470 fou->udp_offloads.callbacks.gro_complete = gue_gro_complete; 471 fou->udp_offloads.port = cfg->udp_config.local_udp_port; 472 473 return 0; 474} 475 476static int fou_create(struct net *net, struct fou_cfg *cfg, 477 struct socket **sockp) 478{ 479 struct socket *sock = NULL; 480 struct fou *fou = NULL; 481 struct sock *sk; 482 int err; 483 484 /* Open UDP socket */ 485 err = udp_sock_create(net, &cfg->udp_config, &sock); 486 if (err < 0) 487 goto error; 488 489 /* Allocate FOU port structure */ 490 fou = kzalloc(sizeof(*fou), GFP_KERNEL); 491 if (!fou) { 492 err = -ENOMEM; 493 goto error; 494 } 495 496 sk = sock->sk; 497 498 fou->flags = cfg->flags; 499 fou->port = cfg->udp_config.local_udp_port; 500 501 /* Initial for fou type */ 502 switch (cfg->type) { 503 case FOU_ENCAP_DIRECT: 504 err = fou_encap_init(sk, fou, cfg); 505 if (err) 506 goto error; 507 break; 508 case FOU_ENCAP_GUE: 509 err = gue_encap_init(sk, fou, cfg); 510 if (err) 511 goto error; 512 break; 513 default: 514 err = -EINVAL; 515 goto error; 516 } 517 518 fou->type = cfg->type; 519 520 udp_sk(sk)->encap_type = 1; 521 udp_encap_enable(); 522 523 sk->sk_user_data = fou; 524 fou->sock = sock; 525 526 inet_inc_convert_csum(sk); 527 528 sk->sk_allocation = GFP_ATOMIC; 529 530 if (cfg->udp_config.family == AF_INET) { 531 err = udp_add_offload(net, &fou->udp_offloads); 532 if (err) 533 goto error; 534 } 535 536 err = fou_add_to_port_list(net, fou); 537 if (err) 538 goto error; 539 540 if (sockp) 541 *sockp = sock; 542 543 return 0; 544 545error: 546 kfree(fou); 547 if (sock) 548 udp_tunnel_sock_release(sock); 549 550 return err; 551} 552 553static int fou_destroy(struct net *net, struct fou_cfg *cfg) 554{ 555 struct fou_net *fn = net_generic(net, fou_net_id); 556 __be16 port = cfg->udp_config.local_udp_port; 557 int err = -EINVAL; 558 struct fou *fou; 559 560 mutex_lock(&fn->fou_lock); 561 list_for_each_entry(fou, &fn->fou_list, list) { 562 if (fou->port == port) { 563 fou_release(fou); 564 err = 0; 565 break; 566 } 567 } 568 mutex_unlock(&fn->fou_lock); 569 570 return err; 571} 572 573static struct genl_family fou_nl_family = { 574 .id = GENL_ID_GENERATE, 575 .hdrsize = 0, 576 .name = FOU_GENL_NAME, 577 .version = FOU_GENL_VERSION, 578 .maxattr = FOU_ATTR_MAX, 579 .netnsok = true, 580}; 581 582static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { 583 [FOU_ATTR_PORT] = { .type = NLA_U16, }, 584 [FOU_ATTR_AF] = { .type = NLA_U8, }, 585 [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, 586 [FOU_ATTR_TYPE] = { .type = NLA_U8, }, 587 [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, 588}; 589 590static int parse_nl_config(struct genl_info *info, 591 struct fou_cfg *cfg) 592{ 593 memset(cfg, 0, sizeof(*cfg)); 594 595 cfg->udp_config.family = AF_INET; 596 597 if (info->attrs[FOU_ATTR_AF]) { 598 u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]); 599 600 if (family != AF_INET) 601 return -EINVAL; 602 603 cfg->udp_config.family = family; 604 } 605 606 if (info->attrs[FOU_ATTR_PORT]) { 607 __be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]); 608 609 cfg->udp_config.local_udp_port = port; 610 } 611 612 if (info->attrs[FOU_ATTR_IPPROTO]) 613 cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]); 614 615 if (info->attrs[FOU_ATTR_TYPE]) 616 cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]); 617 618 if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL]) 619 cfg->flags |= FOU_F_REMCSUM_NOPARTIAL; 620 621 return 0; 622} 623 624static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info) 625{ 626 struct net *net = genl_info_net(info); 627 struct fou_cfg cfg; 628 int err; 629 630 err = parse_nl_config(info, &cfg); 631 if (err) 632 return err; 633 634 return fou_create(net, &cfg, NULL); 635} 636 637static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info) 638{ 639 struct net *net = genl_info_net(info); 640 struct fou_cfg cfg; 641 int err; 642 643 err = parse_nl_config(info, &cfg); 644 if (err) 645 return err; 646 647 return fou_destroy(net, &cfg); 648} 649 650static int fou_fill_info(struct fou *fou, struct sk_buff *msg) 651{ 652 if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) || 653 nla_put_be16(msg, FOU_ATTR_PORT, fou->port) || 654 nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) || 655 nla_put_u8(msg, FOU_ATTR_TYPE, fou->type)) 656 return -1; 657 658 if (fou->flags & FOU_F_REMCSUM_NOPARTIAL) 659 if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL)) 660 return -1; 661 return 0; 662} 663 664static int fou_dump_info(struct fou *fou, u32 portid, u32 seq, 665 u32 flags, struct sk_buff *skb, u8 cmd) 666{ 667 void *hdr; 668 669 hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd); 670 if (!hdr) 671 return -ENOMEM; 672 673 if (fou_fill_info(fou, skb) < 0) 674 goto nla_put_failure; 675 676 genlmsg_end(skb, hdr); 677 return 0; 678 679nla_put_failure: 680 genlmsg_cancel(skb, hdr); 681 return -EMSGSIZE; 682} 683 684static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) 685{ 686 struct net *net = genl_info_net(info); 687 struct fou_net *fn = net_generic(net, fou_net_id); 688 struct sk_buff *msg; 689 struct fou_cfg cfg; 690 struct fou *fout; 691 __be16 port; 692 int ret; 693 694 ret = parse_nl_config(info, &cfg); 695 if (ret) 696 return ret; 697 port = cfg.udp_config.local_udp_port; 698 if (port == 0) 699 return -EINVAL; 700 701 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 702 if (!msg) 703 return -ENOMEM; 704 705 ret = -ESRCH; 706 mutex_lock(&fn->fou_lock); 707 list_for_each_entry(fout, &fn->fou_list, list) { 708 if (port == fout->port) { 709 ret = fou_dump_info(fout, info->snd_portid, 710 info->snd_seq, 0, msg, 711 info->genlhdr->cmd); 712 break; 713 } 714 } 715 mutex_unlock(&fn->fou_lock); 716 if (ret < 0) 717 goto out_free; 718 719 return genlmsg_reply(msg, info); 720 721out_free: 722 nlmsg_free(msg); 723 return ret; 724} 725 726static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) 727{ 728 struct net *net = sock_net(skb->sk); 729 struct fou_net *fn = net_generic(net, fou_net_id); 730 struct fou *fout; 731 int idx = 0, ret; 732 733 mutex_lock(&fn->fou_lock); 734 list_for_each_entry(fout, &fn->fou_list, list) { 735 if (idx++ < cb->args[0]) 736 continue; 737 ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid, 738 cb->nlh->nlmsg_seq, NLM_F_MULTI, 739 skb, FOU_CMD_GET); 740 if (ret) 741 break; 742 } 743 mutex_unlock(&fn->fou_lock); 744 745 cb->args[0] = idx; 746 return skb->len; 747} 748 749static const struct genl_ops fou_nl_ops[] = { 750 { 751 .cmd = FOU_CMD_ADD, 752 .doit = fou_nl_cmd_add_port, 753 .policy = fou_nl_policy, 754 .flags = GENL_ADMIN_PERM, 755 }, 756 { 757 .cmd = FOU_CMD_DEL, 758 .doit = fou_nl_cmd_rm_port, 759 .policy = fou_nl_policy, 760 .flags = GENL_ADMIN_PERM, 761 }, 762 { 763 .cmd = FOU_CMD_GET, 764 .doit = fou_nl_cmd_get_port, 765 .dumpit = fou_nl_dump, 766 .policy = fou_nl_policy, 767 }, 768}; 769 770size_t fou_encap_hlen(struct ip_tunnel_encap *e) 771{ 772 return sizeof(struct udphdr); 773} 774EXPORT_SYMBOL(fou_encap_hlen); 775 776size_t gue_encap_hlen(struct ip_tunnel_encap *e) 777{ 778 size_t len; 779 bool need_priv = false; 780 781 len = sizeof(struct udphdr) + sizeof(struct guehdr); 782 783 if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) { 784 len += GUE_PLEN_REMCSUM; 785 need_priv = true; 786 } 787 788 len += need_priv ? GUE_LEN_PRIV : 0; 789 790 return len; 791} 792EXPORT_SYMBOL(gue_encap_hlen); 793 794static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e, 795 struct flowi4 *fl4, u8 *protocol, __be16 sport) 796{ 797 struct udphdr *uh; 798 799 skb_push(skb, sizeof(struct udphdr)); 800 skb_reset_transport_header(skb); 801 802 uh = udp_hdr(skb); 803 804 uh->dest = e->dport; 805 uh->source = sport; 806 uh->len = htons(skb->len); 807 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb, 808 fl4->saddr, fl4->daddr, skb->len); 809 810 *protocol = IPPROTO_UDP; 811} 812 813int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 814 u8 *protocol, struct flowi4 *fl4) 815{ 816 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : 817 SKB_GSO_UDP_TUNNEL; 818 __be16 sport; 819 820 skb = iptunnel_handle_offloads(skb, type); 821 822 if (IS_ERR(skb)) 823 return PTR_ERR(skb); 824 825 sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), 826 skb, 0, 0, false); 827 fou_build_udp(skb, e, fl4, protocol, sport); 828 829 return 0; 830} 831EXPORT_SYMBOL(fou_build_header); 832 833int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 834 u8 *protocol, struct flowi4 *fl4) 835{ 836 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM : 837 SKB_GSO_UDP_TUNNEL; 838 struct guehdr *guehdr; 839 size_t hdrlen, optlen = 0; 840 __be16 sport; 841 void *data; 842 bool need_priv = false; 843 844 if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) && 845 skb->ip_summed == CHECKSUM_PARTIAL) { 846 optlen += GUE_PLEN_REMCSUM; 847 type |= SKB_GSO_TUNNEL_REMCSUM; 848 need_priv = true; 849 } 850 851 optlen += need_priv ? GUE_LEN_PRIV : 0; 852 853 skb = iptunnel_handle_offloads(skb, type); 854 855 if (IS_ERR(skb)) 856 return PTR_ERR(skb); 857 858 /* Get source port (based on flow hash) before skb_push */ 859 sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), 860 skb, 0, 0, false); 861 862 hdrlen = sizeof(struct guehdr) + optlen; 863 864 skb_push(skb, hdrlen); 865 866 guehdr = (struct guehdr *)skb->data; 867 868 guehdr->control = 0; 869 guehdr->version = 0; 870 guehdr->hlen = optlen >> 2; 871 guehdr->flags = 0; 872 guehdr->proto_ctype = *protocol; 873 874 data = &guehdr[1]; 875 876 if (need_priv) { 877 __be32 *flags = data; 878 879 guehdr->flags |= GUE_FLAG_PRIV; 880 *flags = 0; 881 data += GUE_LEN_PRIV; 882 883 if (type & SKB_GSO_TUNNEL_REMCSUM) { 884 u16 csum_start = skb_checksum_start_offset(skb); 885 __be16 *pd = data; 886 887 if (csum_start < hdrlen) 888 return -EINVAL; 889 890 csum_start -= hdrlen; 891 pd[0] = htons(csum_start); 892 pd[1] = htons(csum_start + skb->csum_offset); 893 894 if (!skb_is_gso(skb)) { 895 skb->ip_summed = CHECKSUM_NONE; 896 skb->encapsulation = 0; 897 } 898 899 *flags |= GUE_PFLAG_REMCSUM; 900 data += GUE_PLEN_REMCSUM; 901 } 902 903 } 904 905 fou_build_udp(skb, e, fl4, protocol, sport); 906 907 return 0; 908} 909EXPORT_SYMBOL(gue_build_header); 910 911#ifdef CONFIG_NET_FOU_IP_TUNNELS 912 913static const struct ip_tunnel_encap_ops fou_iptun_ops = { 914 .encap_hlen = fou_encap_hlen, 915 .build_header = fou_build_header, 916}; 917 918static const struct ip_tunnel_encap_ops gue_iptun_ops = { 919 .encap_hlen = gue_encap_hlen, 920 .build_header = gue_build_header, 921}; 922 923static int ip_tunnel_encap_add_fou_ops(void) 924{ 925 int ret; 926 927 ret = ip_tunnel_encap_add_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU); 928 if (ret < 0) { 929 pr_err("can't add fou ops\n"); 930 return ret; 931 } 932 933 ret = ip_tunnel_encap_add_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE); 934 if (ret < 0) { 935 pr_err("can't add gue ops\n"); 936 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU); 937 return ret; 938 } 939 940 return 0; 941} 942 943static void ip_tunnel_encap_del_fou_ops(void) 944{ 945 ip_tunnel_encap_del_ops(&fou_iptun_ops, TUNNEL_ENCAP_FOU); 946 ip_tunnel_encap_del_ops(&gue_iptun_ops, TUNNEL_ENCAP_GUE); 947} 948 949#else 950 951static int ip_tunnel_encap_add_fou_ops(void) 952{ 953 return 0; 954} 955 956static void ip_tunnel_encap_del_fou_ops(void) 957{ 958} 959 960#endif 961 962static __net_init int fou_init_net(struct net *net) 963{ 964 struct fou_net *fn = net_generic(net, fou_net_id); 965 966 INIT_LIST_HEAD(&fn->fou_list); 967 mutex_init(&fn->fou_lock); 968 return 0; 969} 970 971static __net_exit void fou_exit_net(struct net *net) 972{ 973 struct fou_net *fn = net_generic(net, fou_net_id); 974 struct fou *fou, *next; 975 976 /* Close all the FOU sockets */ 977 mutex_lock(&fn->fou_lock); 978 list_for_each_entry_safe(fou, next, &fn->fou_list, list) 979 fou_release(fou); 980 mutex_unlock(&fn->fou_lock); 981} 982 983static struct pernet_operations fou_net_ops = { 984 .init = fou_init_net, 985 .exit = fou_exit_net, 986 .id = &fou_net_id, 987 .size = sizeof(struct fou_net), 988}; 989 990static int __init fou_init(void) 991{ 992 int ret; 993 994 ret = register_pernet_device(&fou_net_ops); 995 if (ret) 996 goto exit; 997 998 ret = genl_register_family_with_ops(&fou_nl_family, 999 fou_nl_ops); 1000 if (ret < 0) 1001 goto unregister; 1002 1003 ret = ip_tunnel_encap_add_fou_ops(); 1004 if (ret == 0) 1005 return 0; 1006 1007 genl_unregister_family(&fou_nl_family); 1008unregister: 1009 unregister_pernet_device(&fou_net_ops); 1010exit: 1011 return ret; 1012} 1013 1014static void __exit fou_fini(void) 1015{ 1016 ip_tunnel_encap_del_fou_ops(); 1017 genl_unregister_family(&fou_nl_family); 1018 unregister_pernet_device(&fou_net_ops); 1019} 1020 1021module_init(fou_init); 1022module_exit(fou_fini); 1023MODULE_AUTHOR("Tom Herbert <therbert@google.com>"); 1024MODULE_LICENSE("GPL");