Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: openvswitch: Support masked set actions.

OVS userspace already probes the openvswitch kernel module for
OVS_ACTION_ATTR_SET_MASKED support. This patch adds the kernel module
implementation of masked set actions.

The existing set action sets many fields at once. When only a subset
of the IP header fields, for example, should be modified, all the IP
fields need to be exact matched so that the other field values can be
copied to the set action. A masked set action allows modification of
an arbitrary subset of the supported header bits without requiring the
rest to be matched.

Masked set action is now supported for all writeable key types, except
for the tunnel key. The set tunnel action is an exception as any
input tunnel info is cleared before action processing starts, so there
is no tunnel info to mask.

The kernel module converts all (non-tunnel) set actions to masked set
actions. This makes action processing more uniform, and results in
less branching and duplicating the action processing code. When
returning actions to userspace, the fully masked set actions are
converted back to normal set actions. We use a kernel internal action
code to be able to tell the userspace provided and converted masked
set actions apart.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jarno Rajahalme and committed by
David S. Miller
83d2b9ba 2150f984

+381 -171
+21 -1
include/uapi/linux/openvswitch.h
··· 599 599 * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The 600 600 * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its 601 601 * value. 602 + * @OVS_ACTION_ATTR_SET_MASKED: Replaces the contents of an existing header. A 603 + * nested %OVS_KEY_ATTR_* attribute specifies a header to modify, its value, 604 + * and a mask. For every bit set in the mask, the corresponding bit value 605 + * is copied from the value to the packet header field, rest of the bits are 606 + * left unchanged. The non-masked value bits must be passed in as zeroes. 607 + * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute. 602 608 * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the 603 609 * packet. 604 610 * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. ··· 623 617 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all 624 618 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment 625 619 * type may not be changed. 620 + * 621 + * @OVS_ACTION_ATTR_SET_TO_MASKED: Kernel internal masked set action translated 622 + * from the @OVS_ACTION_ATTR_SET. 626 623 */ 627 624 628 625 enum ovs_action_attr { ··· 640 631 OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */ 641 632 OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */ 642 633 OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */ 634 + OVS_ACTION_ATTR_SET_MASKED, /* One nested OVS_KEY_ATTR_* including 635 + * data immediately followed by a mask. 636 + * The data must be zero for the unmasked 637 + * bits. */ 643 638 644 - __OVS_ACTION_ATTR_MAX 639 + __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted 640 + * from userspace. */ 641 + 642 + #ifdef __KERNEL__ 643 + OVS_ACTION_ATTR_SET_TO_MASKED, /* Kernel module internal masked 644 + * set action converted from 645 + * OVS_ACTION_ATTR_SET. */ 646 + #endif 645 647 }; 646 648 647 649 #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
+233 -136
net/openvswitch/actions.c
··· 185 185 return 0; 186 186 } 187 187 188 - static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key, 189 - const __be32 *mpls_lse) 188 + /* 'KEY' must not have any bits set outside of the 'MASK' */ 189 + #define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) 190 + #define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK)) 191 + 192 + static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, 193 + const __be32 *mpls_lse, const __be32 *mask) 190 194 { 191 195 __be32 *stack; 196 + __be32 lse; 192 197 int err; 193 198 194 199 err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); ··· 201 196 return err; 202 197 203 198 stack = (__be32 *)skb_mpls_header(skb); 199 + lse = MASKED(*stack, *mpls_lse, *mask); 204 200 if (skb->ip_summed == CHECKSUM_COMPLETE) { 205 - __be32 diff[] = { ~(*stack), *mpls_lse }; 201 + __be32 diff[] = { ~(*stack), lse }; 202 + 206 203 skb->csum = ~csum_partial((char *)diff, sizeof(diff), 207 204 ~skb->csum); 208 205 } 209 206 210 - *stack = *mpls_lse; 211 - key->mpls.top_lse = *mpls_lse; 207 + *stack = lse; 208 + flow_key->mpls.top_lse = lse; 212 209 return 0; 213 210 } 214 211 ··· 237 230 ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); 238 231 } 239 232 240 - static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key, 241 - const struct ovs_key_ethernet *eth_key) 233 + /* 'src' is already properly masked. */ 234 + static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_) 235 + { 236 + u16 *dst = (u16 *)dst_; 237 + const u16 *src = (const u16 *)src_; 238 + const u16 *mask = (const u16 *)mask_; 239 + 240 + SET_MASKED(dst[0], src[0], mask[0]); 241 + SET_MASKED(dst[1], src[1], mask[1]); 242 + SET_MASKED(dst[2], src[2], mask[2]); 243 + } 244 + 245 + static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, 246 + const struct ovs_key_ethernet *key, 247 + const struct ovs_key_ethernet *mask) 242 248 { 243 249 int err; 250 + 244 251 err = skb_ensure_writable(skb, ETH_HLEN); 245 252 if (unlikely(err)) 246 253 return err; 247 254 248 255 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); 249 256 250 - ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src); 251 - ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst); 257 + ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src, 258 + mask->eth_src); 259 + ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, 260 + mask->eth_dst); 252 261 253 262 ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); 254 263 255 - ether_addr_copy(key->eth.src, eth_key->eth_src); 256 - ether_addr_copy(key->eth.dst, eth_key->eth_dst); 264 + ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); 265 + ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); 257 266 return 0; 258 267 } 259 268 ··· 327 304 } 328 305 } 329 306 307 + static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4], 308 + const __be32 mask[4], __be32 masked[4]) 309 + { 310 + masked[0] = MASKED(old[0], addr[0], mask[0]); 311 + masked[1] = MASKED(old[1], addr[1], mask[1]); 312 + masked[2] = MASKED(old[2], addr[2], mask[2]); 313 + masked[3] = MASKED(old[3], addr[3], mask[3]); 314 + } 315 + 330 316 static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, 331 317 __be32 addr[4], const __be32 new_addr[4], 332 318 bool recalculate_csum) ··· 347 315 memcpy(addr, new_addr, sizeof(__be32[4])); 348 316 } 349 317 350 - static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc) 318 + static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) 351 319 { 352 - nh->priority = tc >> 4; 353 - nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4); 320 + /* Bits 21-24 are always unmasked, so this retains their values. */ 321 + SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); 322 + SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); 323 + SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); 354 324 } 355 325 356 - static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl) 326 + static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl, 327 + u8 mask) 357 328 { 358 - nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16; 359 - nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8; 360 - nh->flow_lbl[2] = fl & 0x000000FF; 361 - } 329 + new_ttl = MASKED(nh->ttl, new_ttl, mask); 362 330 363 - static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) 364 - { 365 331 csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); 366 332 nh->ttl = new_ttl; 367 333 } 368 334 369 - static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key, 370 - const struct ovs_key_ipv4 *ipv4_key) 335 + static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key, 336 + const struct ovs_key_ipv4 *key, 337 + const struct ovs_key_ipv4 *mask) 371 338 { 372 339 struct iphdr *nh; 340 + __be32 new_addr; 373 341 int err; 374 342 375 343 err = skb_ensure_writable(skb, skb_network_offset(skb) + ··· 379 347 380 348 nh = ip_hdr(skb); 381 349 382 - if (ipv4_key->ipv4_src != nh->saddr) { 383 - set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src); 384 - key->ipv4.addr.src = ipv4_key->ipv4_src; 385 - } 350 + /* Setting an IP addresses is typically only a side effect of 351 + * matching on them in the current userspace implementation, so it 352 + * makes sense to check if the value actually changed. 353 + */ 354 + if (mask->ipv4_src) { 355 + new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src); 386 356 387 - if (ipv4_key->ipv4_dst != nh->daddr) { 388 - set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst); 389 - key->ipv4.addr.dst = ipv4_key->ipv4_dst; 357 + if (unlikely(new_addr != nh->saddr)) { 358 + set_ip_addr(skb, nh, &nh->saddr, new_addr); 359 + flow_key->ipv4.addr.src = new_addr; 360 + } 390 361 } 362 + if (mask->ipv4_dst) { 363 + new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst); 391 364 392 - if (ipv4_key->ipv4_tos != nh->tos) { 393 - ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos); 394 - key->ip.tos = nh->tos; 365 + if (unlikely(new_addr != nh->daddr)) { 366 + set_ip_addr(skb, nh, &nh->daddr, new_addr); 367 + flow_key->ipv4.addr.dst = new_addr; 368 + } 395 369 } 396 - 397 - if (ipv4_key->ipv4_ttl != nh->ttl) { 398 - set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl); 399 - key->ip.ttl = ipv4_key->ipv4_ttl; 370 + if (mask->ipv4_tos) { 371 + ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos); 372 + flow_key->ip.tos = nh->tos; 373 + } 374 + if (mask->ipv4_ttl) { 375 + set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl); 376 + flow_key->ip.ttl = nh->ttl; 400 377 } 401 378 402 379 return 0; 403 380 } 404 381 405 - static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key, 406 - const struct ovs_key_ipv6 *ipv6_key) 382 + static bool is_ipv6_mask_nonzero(const __be32 addr[4]) 383 + { 384 + return !!(addr[0] | addr[1] | addr[2] | addr[3]); 385 + } 386 + 387 + static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, 388 + const struct ovs_key_ipv6 *key, 389 + const struct ovs_key_ipv6 *mask) 407 390 { 408 391 struct ipv6hdr *nh; 409 392 int err; 410 - __be32 *saddr; 411 - __be32 *daddr; 412 393 413 394 err = skb_ensure_writable(skb, skb_network_offset(skb) + 414 395 sizeof(struct ipv6hdr)); ··· 429 384 return err; 430 385 431 386 nh = ipv6_hdr(skb); 432 - saddr = (__be32 *)&nh->saddr; 433 - daddr = (__be32 *)&nh->daddr; 434 387 435 - if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) { 436 - set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr, 437 - ipv6_key->ipv6_src, true); 438 - memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src, 439 - sizeof(ipv6_key->ipv6_src)); 388 + /* Setting an IP addresses is typically only a side effect of 389 + * matching on them in the current userspace implementation, so it 390 + * makes sense to check if the value actually changed. 391 + */ 392 + if (is_ipv6_mask_nonzero(mask->ipv6_src)) { 393 + __be32 *saddr = (__be32 *)&nh->saddr; 394 + __be32 masked[4]; 395 + 396 + mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); 397 + 398 + if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { 399 + set_ipv6_addr(skb, key->ipv6_proto, saddr, masked, 400 + true); 401 + memcpy(&flow_key->ipv6.addr.src, masked, 402 + sizeof(flow_key->ipv6.addr.src)); 403 + } 440 404 } 441 - 442 - if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { 405 + if (is_ipv6_mask_nonzero(mask->ipv6_dst)) { 443 406 unsigned int offset = 0; 444 407 int flags = IP6_FH_F_SKIP_RH; 445 408 bool recalc_csum = true; 409 + __be32 *daddr = (__be32 *)&nh->daddr; 410 + __be32 masked[4]; 446 411 447 - if (ipv6_ext_hdr(nh->nexthdr)) 448 - recalc_csum = ipv6_find_hdr(skb, &offset, 449 - NEXTHDR_ROUTING, NULL, 450 - &flags) != NEXTHDR_ROUTING; 412 + mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked); 451 413 452 - set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, 453 - ipv6_key->ipv6_dst, recalc_csum); 454 - memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst, 455 - sizeof(ipv6_key->ipv6_dst)); 414 + if (unlikely(memcmp(daddr, masked, sizeof(masked)))) { 415 + if (ipv6_ext_hdr(nh->nexthdr)) 416 + recalc_csum = (ipv6_find_hdr(skb, &offset, 417 + NEXTHDR_ROUTING, 418 + NULL, &flags) 419 + != NEXTHDR_ROUTING); 420 + 421 + set_ipv6_addr(skb, key->ipv6_proto, daddr, masked, 422 + recalc_csum); 423 + memcpy(&flow_key->ipv6.addr.dst, masked, 424 + sizeof(flow_key->ipv6.addr.dst)); 425 + } 456 426 } 457 - 458 - set_ipv6_tc(nh, ipv6_key->ipv6_tclass); 459 - key->ip.tos = ipv6_get_dsfield(nh); 460 - 461 - set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label)); 462 - key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); 463 - 464 - nh->hop_limit = ipv6_key->ipv6_hlimit; 465 - key->ip.ttl = ipv6_key->ipv6_hlimit; 427 + if (mask->ipv6_tclass) { 428 + ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass); 429 + flow_key->ip.tos = ipv6_get_dsfield(nh); 430 + } 431 + if (mask->ipv6_label) { 432 + set_ipv6_fl(nh, ntohl(key->ipv6_label), 433 + ntohl(mask->ipv6_label)); 434 + flow_key->ipv6.label = 435 + *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); 436 + } 437 + if (mask->ipv6_hlimit) { 438 + SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit); 439 + flow_key->ip.ttl = nh->hop_limit; 440 + } 466 441 return 0; 467 442 } 468 443 469 444 /* Must follow skb_ensure_writable() since that can move the skb data. */ 470 445 static void set_tp_port(struct sk_buff *skb, __be16 *port, 471 - __be16 new_port, __sum16 *check) 446 + __be16 new_port, __sum16 *check) 472 447 { 473 448 inet_proto_csum_replace2(check, skb, *port, new_port, 0); 474 449 *port = new_port; 475 - skb_clear_hash(skb); 476 450 } 477 451 478 - static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) 479 - { 480 - struct udphdr *uh = udp_hdr(skb); 481 - 482 - if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) { 483 - set_tp_port(skb, port, new_port, &uh->check); 484 - 485 - if (!uh->check) 486 - uh->check = CSUM_MANGLED_0; 487 - } else { 488 - *port = new_port; 489 - skb_clear_hash(skb); 490 - } 491 - } 492 - 493 - static int set_udp(struct sk_buff *skb, struct sw_flow_key *key, 494 - const struct ovs_key_udp *udp_port_key) 452 + static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key, 453 + const struct ovs_key_udp *key, 454 + const struct ovs_key_udp *mask) 495 455 { 496 456 struct udphdr *uh; 457 + __be16 src, dst; 497 458 int err; 498 459 499 460 err = skb_ensure_writable(skb, skb_transport_offset(skb) + ··· 508 457 return err; 509 458 510 459 uh = udp_hdr(skb); 511 - if (udp_port_key->udp_src != uh->source) { 512 - set_udp_port(skb, &uh->source, udp_port_key->udp_src); 513 - key->tp.src = udp_port_key->udp_src; 460 + /* Either of the masks is non-zero, so do not bother checking them. */ 461 + src = MASKED(uh->source, key->udp_src, mask->udp_src); 462 + dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst); 463 + 464 + if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) { 465 + if (likely(src != uh->source)) { 466 + set_tp_port(skb, &uh->source, src, &uh->check); 467 + flow_key->tp.src = src; 468 + } 469 + if (likely(dst != uh->dest)) { 470 + set_tp_port(skb, &uh->dest, dst, &uh->check); 471 + flow_key->tp.dst = dst; 472 + } 473 + 474 + if (unlikely(!uh->check)) 475 + uh->check = CSUM_MANGLED_0; 476 + } else { 477 + uh->source = src; 478 + uh->dest = dst; 479 + flow_key->tp.src = src; 480 + flow_key->tp.dst = dst; 514 481 } 515 482 516 - if (udp_port_key->udp_dst != uh->dest) { 517 - set_udp_port(skb, &uh->dest, udp_port_key->udp_dst); 518 - key->tp.dst = udp_port_key->udp_dst; 519 - } 483 + skb_clear_hash(skb); 520 484 521 485 return 0; 522 486 } 523 487 524 - static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key, 525 - const struct ovs_key_tcp *tcp_port_key) 488 + static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key, 489 + const struct ovs_key_tcp *key, 490 + const struct ovs_key_tcp *mask) 526 491 { 527 492 struct tcphdr *th; 493 + __be16 src, dst; 528 494 int err; 529 495 530 496 err = skb_ensure_writable(skb, skb_transport_offset(skb) + ··· 550 482 return err; 551 483 552 484 th = tcp_hdr(skb); 553 - if (tcp_port_key->tcp_src != th->source) { 554 - set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check); 555 - key->tp.src = tcp_port_key->tcp_src; 485 + src = MASKED(th->source, key->tcp_src, mask->tcp_src); 486 + if (likely(src != th->source)) { 487 + set_tp_port(skb, &th->source, src, &th->check); 488 + flow_key->tp.src = src; 556 489 } 557 - 558 - if (tcp_port_key->tcp_dst != th->dest) { 559 - set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check); 560 - key->tp.dst = tcp_port_key->tcp_dst; 490 + dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst); 491 + if (likely(dst != th->dest)) { 492 + set_tp_port(skb, &th->dest, dst, &th->check); 493 + flow_key->tp.dst = dst; 561 494 } 495 + skb_clear_hash(skb); 562 496 563 497 return 0; 564 498 } 565 499 566 - static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key, 567 - const struct ovs_key_sctp *sctp_port_key) 500 + static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, 501 + const struct ovs_key_sctp *key, 502 + const struct ovs_key_sctp *mask) 568 503 { 569 - struct sctphdr *sh; 570 - int err; 571 504 unsigned int sctphoff = skb_transport_offset(skb); 505 + struct sctphdr *sh; 506 + __le32 old_correct_csum, new_csum, old_csum; 507 + int err; 572 508 573 509 err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr)); 574 510 if (unlikely(err)) 575 511 return err; 576 512 577 513 sh = sctp_hdr(skb); 578 - if (sctp_port_key->sctp_src != sh->source || 579 - sctp_port_key->sctp_dst != sh->dest) { 580 - __le32 old_correct_csum, new_csum, old_csum; 514 + old_csum = sh->checksum; 515 + old_correct_csum = sctp_compute_cksum(skb, sctphoff); 581 516 582 - old_csum = sh->checksum; 583 - old_correct_csum = sctp_compute_cksum(skb, sctphoff); 517 + sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src); 518 + sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst); 584 519 585 - sh->source = sctp_port_key->sctp_src; 586 - sh->dest = sctp_port_key->sctp_dst; 520 + new_csum = sctp_compute_cksum(skb, sctphoff); 587 521 588 - new_csum = sctp_compute_cksum(skb, sctphoff); 522 + /* Carry any checksum errors through. */ 523 + sh->checksum = old_csum ^ old_correct_csum ^ new_csum; 589 524 590 - /* Carry any checksum errors through. */ 591 - sh->checksum = old_csum ^ old_correct_csum ^ new_csum; 592 - 593 - skb_clear_hash(skb); 594 - key->tp.src = sctp_port_key->sctp_src; 595 - key->tp.dst = sctp_port_key->sctp_dst; 596 - } 525 + skb_clear_hash(skb); 526 + flow_key->tp.src = sh->source; 527 + flow_key->tp.dst = sh->dest; 597 528 598 529 return 0; 599 530 } ··· 720 653 key->ovs_flow_hash = hash; 721 654 } 722 655 723 - static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key, 724 - const struct nlattr *nested_attr) 656 + static int execute_set_action(struct sk_buff *skb, 657 + struct sw_flow_key *flow_key, 658 + const struct nlattr *a) 659 + { 660 + /* Only tunnel set execution is supported without a mask. */ 661 + if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) { 662 + OVS_CB(skb)->egress_tun_info = nla_data(a); 663 + return 0; 664 + } 665 + 666 + return -EINVAL; 667 + } 668 + 669 + /* Mask is at the midpoint of the data. */ 670 + #define get_mask(a, type) ((const type)nla_data(a) + 1) 671 + 672 + static int execute_masked_set_action(struct sk_buff *skb, 673 + struct sw_flow_key *flow_key, 674 + const struct nlattr *a) 725 675 { 726 676 int err = 0; 727 677 728 - switch (nla_type(nested_attr)) { 678 + switch (nla_type(a)) { 729 679 case OVS_KEY_ATTR_PRIORITY: 730 - skb->priority = nla_get_u32(nested_attr); 731 - key->phy.priority = skb->priority; 680 + SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *)); 681 + flow_key->phy.priority = skb->priority; 732 682 break; 733 683 734 684 case OVS_KEY_ATTR_SKB_MARK: 735 - skb->mark = nla_get_u32(nested_attr); 736 - key->phy.skb_mark = skb->mark; 685 + SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *)); 686 + flow_key->phy.skb_mark = skb->mark; 737 687 break; 738 688 739 689 case OVS_KEY_ATTR_TUNNEL_INFO: 740 - OVS_CB(skb)->egress_tun_info = nla_data(nested_attr); 690 + /* Masked data not supported for tunnel. */ 691 + err = -EINVAL; 741 692 break; 742 693 743 694 case OVS_KEY_ATTR_ETHERNET: 744 - err = set_eth_addr(skb, key, nla_data(nested_attr)); 695 + err = set_eth_addr(skb, flow_key, nla_data(a), 696 + get_mask(a, struct ovs_key_ethernet *)); 745 697 break; 746 698 747 699 case OVS_KEY_ATTR_IPV4: 748 - err = set_ipv4(skb, key, nla_data(nested_attr)); 700 + err = set_ipv4(skb, flow_key, nla_data(a), 701 + get_mask(a, struct ovs_key_ipv4 *)); 749 702 break; 750 703 751 704 case OVS_KEY_ATTR_IPV6: 752 - err = set_ipv6(skb, key, nla_data(nested_attr)); 705 + err = set_ipv6(skb, flow_key, nla_data(a), 706 + get_mask(a, struct ovs_key_ipv6 *)); 753 707 break; 754 708 755 709 case OVS_KEY_ATTR_TCP: 756 - err = set_tcp(skb, key, nla_data(nested_attr)); 710 + err = set_tcp(skb, flow_key, nla_data(a), 711 + get_mask(a, struct ovs_key_tcp *)); 757 712 break; 758 713 759 714 case OVS_KEY_ATTR_UDP: 760 - err = set_udp(skb, key, nla_data(nested_attr)); 715 + err = set_udp(skb, flow_key, nla_data(a), 716 + get_mask(a, struct ovs_key_udp *)); 761 717 break; 762 718 763 719 case OVS_KEY_ATTR_SCTP: 764 - err = set_sctp(skb, key, nla_data(nested_attr)); 720 + err = set_sctp(skb, flow_key, nla_data(a), 721 + get_mask(a, struct ovs_key_sctp *)); 765 722 break; 766 723 767 724 case OVS_KEY_ATTR_MPLS: 768 - err = set_mpls(skb, key, nla_data(nested_attr)); 725 + err = set_mpls(skb, flow_key, nla_data(a), get_mask(a, 726 + __be32 *)); 769 727 break; 770 728 } 771 729 ··· 908 816 909 817 case OVS_ACTION_ATTR_SET: 910 818 err = execute_set_action(skb, key, nla_data(a)); 819 + break; 820 + 821 + case OVS_ACTION_ATTR_SET_MASKED: 822 + case OVS_ACTION_ATTR_SET_TO_MASKED: 823 + err = execute_masked_set_action(skb, key, nla_data(a)); 911 824 break; 912 825 913 826 case OVS_ACTION_ATTR_SAMPLE:
+127 -34
net/openvswitch/flow_netlink.c
··· 1695 1695 return 0; 1696 1696 } 1697 1697 1698 - static int validate_tp_port(const struct sw_flow_key *flow_key, 1699 - __be16 eth_type) 1700 - { 1701 - if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) && 1702 - (flow_key->tp.src || flow_key->tp.dst)) 1703 - return 0; 1704 - 1705 - return -EINVAL; 1706 - } 1707 - 1708 1698 void ovs_match_init(struct sw_flow_match *match, 1709 1699 struct sw_flow_key *key, 1710 1700 struct sw_flow_mask *mask) ··· 1795 1805 return err; 1796 1806 } 1797 1807 1808 + /* Return false if there are any non-masked bits set. 1809 + * Mask follows data immediately, before any netlink padding. 1810 + */ 1811 + static bool validate_masked(u8 *data, int len) 1812 + { 1813 + u8 *mask = data + len; 1814 + 1815 + while (len--) 1816 + if (*data++ & ~*mask++) 1817 + return false; 1818 + 1819 + return true; 1820 + } 1821 + 1798 1822 static int validate_set(const struct nlattr *a, 1799 1823 const struct sw_flow_key *flow_key, 1800 1824 struct sw_flow_actions **sfa, 1801 - bool *set_tun, __be16 eth_type, bool log) 1825 + bool *skip_copy, __be16 eth_type, bool masked, bool log) 1802 1826 { 1803 1827 const struct nlattr *ovs_key = nla_data(a); 1804 1828 int key_type = nla_type(ovs_key); 1829 + size_t key_len; 1805 1830 1806 1831 /* There can be only one key in a action */ 1807 1832 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 1808 1833 return -EINVAL; 1809 1834 1835 + key_len = nla_len(ovs_key); 1836 + if (masked) 1837 + key_len /= 2; 1838 + 1810 1839 if (key_type > OVS_KEY_ATTR_MAX || 1811 - (ovs_key_lens[key_type].len != nla_len(ovs_key) && 1840 + (ovs_key_lens[key_type].len != key_len && 1812 1841 ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) 1842 + return -EINVAL; 1843 + 1844 + if (masked && !validate_masked(nla_data(ovs_key), key_len)) 1813 1845 return -EINVAL; 1814 1846 1815 1847 switch (key_type) { ··· 1848 1836 if (eth_p_mpls(eth_type)) 1849 1837 return -EINVAL; 1850 1838 1851 - *set_tun = true; 1839 + if (masked) 1840 + return -EINVAL; /* Masked tunnel set not supported. */ 1841 + 1842 + *skip_copy = true; 1852 1843 err = validate_and_copy_set_tun(a, sfa, log); 1853 1844 if (err) 1854 1845 return err; ··· 1861 1846 if (eth_type != htons(ETH_P_IP)) 1862 1847 return -EINVAL; 1863 1848 1864 - if (!flow_key->ip.proto) 1865 - return -EINVAL; 1866 - 1867 1849 ipv4_key = nla_data(ovs_key); 1868 - if (ipv4_key->ipv4_proto != flow_key->ip.proto) 1869 - return -EINVAL; 1870 1850 1871 - if (ipv4_key->ipv4_frag != flow_key->ip.frag) 1872 - return -EINVAL; 1851 + if (masked) { 1852 + const struct ovs_key_ipv4 *mask = ipv4_key + 1; 1873 1853 1854 + /* Non-writeable fields. */ 1855 + if (mask->ipv4_proto || mask->ipv4_frag) 1856 + return -EINVAL; 1857 + } else { 1858 + if (ipv4_key->ipv4_proto != flow_key->ip.proto) 1859 + return -EINVAL; 1860 + 1861 + if (ipv4_key->ipv4_frag != flow_key->ip.frag) 1862 + return -EINVAL; 1863 + } 1874 1864 break; 1875 1865 1876 1866 case OVS_KEY_ATTR_IPV6: 1877 1867 if (eth_type != htons(ETH_P_IPV6)) 1878 1868 return -EINVAL; 1879 1869 1880 - if (!flow_key->ip.proto) 1881 - return -EINVAL; 1882 - 1883 1870 ipv6_key = nla_data(ovs_key); 1884 - if (ipv6_key->ipv6_proto != flow_key->ip.proto) 1885 - return -EINVAL; 1886 1871 1887 - if (ipv6_key->ipv6_frag != flow_key->ip.frag) 1888 - return -EINVAL; 1872 + if (masked) { 1873 + const struct ovs_key_ipv6 *mask = ipv6_key + 1; 1889 1874 1875 + /* Non-writeable fields. */ 1876 + if (mask->ipv6_proto || mask->ipv6_frag) 1877 + return -EINVAL; 1878 + 1879 + /* Invalid bits in the flow label mask? */ 1880 + if (ntohl(mask->ipv6_label) & 0xFFF00000) 1881 + return -EINVAL; 1882 + } else { 1883 + if (ipv6_key->ipv6_proto != flow_key->ip.proto) 1884 + return -EINVAL; 1885 + 1886 + if (ipv6_key->ipv6_frag != flow_key->ip.frag) 1887 + return -EINVAL; 1888 + } 1890 1889 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 1891 1890 return -EINVAL; 1892 1891 1893 1892 break; 1894 1893 1895 1894 case OVS_KEY_ATTR_TCP: 1896 - if (flow_key->ip.proto != IPPROTO_TCP) 1895 + if ((eth_type != htons(ETH_P_IP) && 1896 + eth_type != htons(ETH_P_IPV6)) || 1897 + flow_key->ip.proto != IPPROTO_TCP) 1897 1898 return -EINVAL; 1898 1899 1899 - return validate_tp_port(flow_key, eth_type); 1900 + break; 1900 1901 1901 1902 case OVS_KEY_ATTR_UDP: 1902 - if (flow_key->ip.proto != IPPROTO_UDP) 1903 + if ((eth_type != htons(ETH_P_IP) && 1904 + eth_type != htons(ETH_P_IPV6)) || 1905 + flow_key->ip.proto != IPPROTO_UDP) 1903 1906 return -EINVAL; 1904 1907 1905 - return validate_tp_port(flow_key, eth_type); 1908 + break; 1906 1909 1907 1910 case OVS_KEY_ATTR_MPLS: 1908 1911 if (!eth_p_mpls(eth_type)) ··· 1928 1895 break; 1929 1896 1930 1897 case OVS_KEY_ATTR_SCTP: 1931 - if (flow_key->ip.proto != IPPROTO_SCTP) 1898 + if ((eth_type != htons(ETH_P_IP) && 1899 + eth_type != htons(ETH_P_IPV6)) || 1900 + flow_key->ip.proto != IPPROTO_SCTP) 1932 1901 return -EINVAL; 1933 1902 1934 - return validate_tp_port(flow_key, eth_type); 1903 + break; 1935 1904 1936 1905 default: 1937 1906 return -EINVAL; 1907 + } 1908 + 1909 + /* Convert non-masked non-tunnel set actions to masked set actions. */ 1910 + if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { 1911 + int start, len = key_len * 2; 1912 + struct nlattr *at; 1913 + 1914 + *skip_copy = true; 1915 + 1916 + start = add_nested_action_start(sfa, 1917 + OVS_ACTION_ATTR_SET_TO_MASKED, 1918 + log); 1919 + if (start < 0) 1920 + return start; 1921 + 1922 + at = __add_action(sfa, key_type, NULL, len, log); 1923 + if (IS_ERR(at)) 1924 + return PTR_ERR(at); 1925 + 1926 + memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ 1927 + memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ 1928 + /* Clear non-writeable bits from otherwise writeable fields. */ 1929 + if (key_type == OVS_KEY_ATTR_IPV6) { 1930 + struct ovs_key_ipv6 *mask = nla_data(at) + key_len; 1931 + 1932 + mask->ipv6_label &= htonl(0x000FFFFF); 1933 + } 1934 + add_nested_action_end(*sfa, start); 1938 1935 } 1939 1936 1940 1937 return 0; ··· 2028 1965 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 2029 1966 [OVS_ACTION_ATTR_POP_VLAN] = 0, 2030 1967 [OVS_ACTION_ATTR_SET] = (u32)-1, 1968 + [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, 2031 1969 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 2032 1970 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) 2033 1971 }; ··· 2124 2060 2125 2061 case OVS_ACTION_ATTR_SET: 2126 2062 err = validate_set(a, key, sfa, 2127 - &skip_copy, eth_type, log); 2063 + &skip_copy, eth_type, false, log); 2064 + if (err) 2065 + return err; 2066 + break; 2067 + 2068 + case OVS_ACTION_ATTR_SET_MASKED: 2069 + err = validate_set(a, key, sfa, 2070 + &skip_copy, eth_type, true, log); 2128 2071 if (err) 2129 2072 return err; 2130 2073 break; ··· 2161 2090 return 0; 2162 2091 } 2163 2092 2093 + /* 'key' must be the masked key. */ 2164 2094 int ovs_nla_copy_actions(const struct nlattr *attr, 2165 2095 const struct sw_flow_key *key, 2166 2096 struct sw_flow_actions **sfa, bool log) ··· 2249 2177 return 0; 2250 2178 } 2251 2179 2180 + static int masked_set_action_to_set_action_attr(const struct nlattr *a, 2181 + struct sk_buff *skb) 2182 + { 2183 + const struct nlattr *ovs_key = nla_data(a); 2184 + size_t key_len = nla_len(ovs_key) / 2; 2185 + 2186 + /* Revert the conversion we did from a non-masked set action to 2187 + * masked set action. 2188 + */ 2189 + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key)) 2190 + return -EMSGSIZE; 2191 + 2192 + return 0; 2193 + } 2194 + 2252 2195 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 2253 2196 { 2254 2197 const struct nlattr *a; ··· 2275 2188 switch (type) { 2276 2189 case OVS_ACTION_ATTR_SET: 2277 2190 err = set_action_to_attr(a, skb); 2191 + if (err) 2192 + return err; 2193 + break; 2194 + 2195 + case OVS_ACTION_ATTR_SET_TO_MASKED: 2196 + err = masked_set_action_to_set_action_attr(a, skb); 2278 2197 if (err) 2279 2198 return err; 2280 2199 break;