Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

lwtunnel: add support for multiple geneve opts

geneve RFC (draft-ietf-nvo3-geneve-14) allows a geneve packet to carry
multiple geneve opts, so it's necessary for lwtunnel to support adding
multiple geneve opts in one lwtunnel route. But vxlan and erspan opts
are still only allowed to add one option.

With this patch, iproute2 could make it like:

# ip r a 1.1.1.0/24 encap ip id 1 geneve_opts 0:0:12121212,1:2:12121212 \
dst 10.1.0.2 dev geneve1

# ip r a 1.1.1.0/24 encap ip id 1 vxlan_opts 456 \
dst 10.1.0.2 dev erspan1

# ip r a 1.1.1.0/24 encap ip id 1 erspan_opts 1:123:0:0 \
dst 10.1.0.2 dev erspan1

Which are pretty much like cls_flower and act_tunnel_key.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Xin Long and committed by
David S. Miller
2f1d370b 272630fe

+74 -35
+74 -35
net/ipv4/ip_tunnel_core.c
··· 251 251 }; 252 252 253 253 static int ip_tun_parse_opts_geneve(struct nlattr *attr, 254 - struct ip_tunnel_info *info, 254 + struct ip_tunnel_info *info, int opts_len, 255 255 struct netlink_ext_ack *extack) 256 256 { 257 257 struct nlattr *tb[LWTUNNEL_IP_OPT_GENEVE_MAX + 1]; ··· 273 273 return -EINVAL; 274 274 275 275 if (info) { 276 - struct geneve_opt *opt = ip_tunnel_info_opts(info); 276 + struct geneve_opt *opt = ip_tunnel_info_opts(info) + opts_len; 277 277 278 278 memcpy(opt->opt_data, nla_data(attr), data_len); 279 279 opt->length = data_len / 4; ··· 288 288 } 289 289 290 290 static int ip_tun_parse_opts_vxlan(struct nlattr *attr, 291 - struct ip_tunnel_info *info, 291 + struct ip_tunnel_info *info, int opts_len, 292 292 struct netlink_ext_ack *extack) 293 293 { 294 294 struct nlattr *tb[LWTUNNEL_IP_OPT_VXLAN_MAX + 1]; ··· 303 303 return -EINVAL; 304 304 305 305 if (info) { 306 - struct vxlan_metadata *md = ip_tunnel_info_opts(info); 306 + struct vxlan_metadata *md = 307 + ip_tunnel_info_opts(info) + opts_len; 307 308 308 309 attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP]; 309 310 md->gbp = nla_get_u32(attr); ··· 315 314 } 316 315 317 316 static int ip_tun_parse_opts_erspan(struct nlattr *attr, 318 - struct ip_tunnel_info *info, 317 + struct ip_tunnel_info *info, int opts_len, 319 318 struct netlink_ext_ack *extack) 320 319 { 321 320 struct nlattr *tb[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1]; ··· 330 329 return -EINVAL; 331 330 332 331 if (info) { 333 - struct erspan_metadata *md = ip_tunnel_info_opts(info); 332 + struct erspan_metadata *md = 333 + ip_tunnel_info_opts(info) + opts_len; 334 334 335 335 attr = tb[LWTUNNEL_IP_OPT_ERSPAN_VER]; 336 336 md->version = nla_get_u8(attr); ··· 358 356 static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info, 359 357 struct netlink_ext_ack *extack) 360 358 { 361 - struct nlattr *tb[LWTUNNEL_IP_OPTS_MAX + 1]; 362 - int err; 359 + int err, rem, opt_len, opts_len = 0, type = 0; 360 + struct nlattr *nla; 363 361 364 362 if (!attr) 365 363 return 0; 366 364 367 - err = nla_parse_nested(tb, LWTUNNEL_IP_OPTS_MAX, attr, 368 - ip_opts_policy, extack); 365 + err = nla_validate(nla_data(attr), nla_len(attr), LWTUNNEL_IP_OPTS_MAX, 366 + ip_opts_policy, extack); 369 367 if (err) 370 368 return err; 371 369 372 - if (tb[LWTUNNEL_IP_OPTS_GENEVE]) 373 - err = ip_tun_parse_opts_geneve(tb[LWTUNNEL_IP_OPTS_GENEVE], 374 - info, extack); 375 - else if (tb[LWTUNNEL_IP_OPTS_VXLAN]) 376 - err = ip_tun_parse_opts_vxlan(tb[LWTUNNEL_IP_OPTS_VXLAN], 377 - info, extack); 378 - else if (tb[LWTUNNEL_IP_OPTS_ERSPAN]) 379 - err = ip_tun_parse_opts_erspan(tb[LWTUNNEL_IP_OPTS_ERSPAN], 380 - info, extack); 381 - else 382 - err = -EINVAL; 370 + nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) { 371 + switch (nla_type(nla)) { 372 + case LWTUNNEL_IP_OPTS_GENEVE: 373 + if (type && type != TUNNEL_GENEVE_OPT) 374 + return -EINVAL; 375 + opt_len = ip_tun_parse_opts_geneve(nla, info, opts_len, 376 + extack); 377 + if (opt_len < 0) 378 + return opt_len; 379 + opts_len += opt_len; 380 + if (opts_len > IP_TUNNEL_OPTS_MAX) 381 + return -EINVAL; 382 + type = TUNNEL_GENEVE_OPT; 383 + break; 384 + case LWTUNNEL_IP_OPTS_VXLAN: 385 + if (type) 386 + return -EINVAL; 387 + opt_len = ip_tun_parse_opts_vxlan(nla, info, opts_len, 388 + extack); 389 + if (opt_len < 0) 390 + return opt_len; 391 + opts_len += opt_len; 392 + type = TUNNEL_VXLAN_OPT; 393 + break; 394 + case LWTUNNEL_IP_OPTS_ERSPAN: 395 + if (type) 396 + return -EINVAL; 397 + opt_len = ip_tun_parse_opts_erspan(nla, info, opts_len, 398 + extack); 399 + if (opt_len < 0) 400 + return opt_len; 401 + opts_len += opt_len; 402 + type = TUNNEL_ERSPAN_OPT; 403 + break; 404 + default: 405 + return -EINVAL; 406 + } 407 + } 383 408 384 - return err; 409 + return opts_len; 385 410 } 386 411 387 412 static int ip_tun_get_optlen(struct nlattr *attr, ··· 506 477 { 507 478 struct geneve_opt *opt; 508 479 struct nlattr *nest; 480 + int offset = 0; 509 481 510 482 nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_GENEVE); 511 483 if (!nest) 512 484 return -ENOMEM; 513 485 514 - opt = ip_tunnel_info_opts(tun_info); 515 - if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS, opt->opt_class) || 516 - nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) || 517 - nla_put(skb, LWTUNNEL_IP_OPT_GENEVE_DATA, opt->length * 4, 518 - opt->opt_data)) { 519 - nla_nest_cancel(skb, nest); 520 - return -ENOMEM; 486 + while (tun_info->options_len > offset) { 487 + opt = ip_tunnel_info_opts(tun_info) + offset; 488 + if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS, 489 + opt->opt_class) || 490 + nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) || 491 + nla_put(skb, LWTUNNEL_IP_OPT_GENEVE_DATA, opt->length * 4, 492 + opt->opt_data)) { 493 + nla_nest_cancel(skb, nest); 494 + return -ENOMEM; 495 + } 496 + offset += sizeof(*opt) + opt->length * 4; 521 497 } 522 498 523 499 nla_nest_end(skb, nest); ··· 636 602 637 603 opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */ 638 604 if (info->key.tun_flags & TUNNEL_GENEVE_OPT) { 639 - struct geneve_opt *opt = ip_tunnel_info_opts(info); 605 + struct geneve_opt *opt; 606 + int offset = 0; 640 607 641 - opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_GENEVE */ 642 - + nla_total_size(2) /* OPT_GENEVE_CLASS */ 643 - + nla_total_size(1) /* OPT_GENEVE_TYPE */ 644 - + nla_total_size(opt->length * 4); 645 - /* OPT_GENEVE_DATA */ 608 + opt_len += nla_total_size(0); /* LWTUNNEL_IP_OPTS_GENEVE */ 609 + while (info->options_len > offset) { 610 + opt = ip_tunnel_info_opts(info) + offset; 611 + opt_len += nla_total_size(2) /* OPT_GENEVE_CLASS */ 612 + + nla_total_size(1) /* OPT_GENEVE_TYPE */ 613 + + nla_total_size(opt->length * 4); 614 + /* OPT_GENEVE_DATA */ 615 + offset += sizeof(*opt) + opt->length * 4; 616 + } 646 617 } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) { 647 618 opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */ 648 619 + nla_total_size(4); /* OPT_VXLAN_GBP */