Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch

Jesse Gross says:

====================
Open vSwitch

A set of updates for net-next/3.13. Major changes are:
* Restructure flow handling code to be more logically organized and
easier to read.
* Rehashing of the flow table is moved from a workqueue to flow
installation time. Before, heavy load could block the workqueue for
excessive periods of time.
* Additional debugging information is provided to help diagnose megaflows.
* It's now possible to match on TCP flags.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+2511 -2291
+15 -3
include/uapi/linux/openvswitch.h
··· 63 63 * not be sent. 64 64 * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the 65 65 * datapath. Always present in notifications. 66 + * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the 67 + * datapath. Always present in notifications. 66 68 * 67 69 * These attributes follow the &struct ovs_header within the Generic Netlink 68 70 * payload for %OVS_DP_* commands. 69 71 */ 70 72 enum ovs_datapath_attr { 71 73 OVS_DP_ATTR_UNSPEC, 72 - OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ 73 - OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ 74 - OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ 74 + OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ 75 + OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ 76 + OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ 77 + OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ 75 78 __OVS_DP_ATTR_MAX 76 79 }; 77 80 ··· 85 82 __u64 n_missed; /* Number of flow table misses. */ 86 83 __u64 n_lost; /* Number of misses not sent to userspace. */ 87 84 __u64 n_flows; /* Number of flows present */ 85 + }; 86 + 87 + struct ovs_dp_megaflow_stats { 88 + __u64 n_mask_hit; /* Number of masks used for flow lookups. */ 89 + __u32 n_masks; /* Number of masks for the datapath. */ 90 + __u32 pad0; /* Pad for future expension. */ 91 + __u64 pad1; /* Pad for future expension. */ 92 + __u64 pad2; /* Pad for future expension. */ 88 93 }; 89 94 90 95 struct ovs_vport_stats { ··· 271 260 OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ 272 261 OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */ 273 262 OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */ 263 + OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */ 274 264 275 265 #ifdef __KERNEL__ 276 266 OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */
+2
net/openvswitch/Makefile
··· 9 9 datapath.o \ 10 10 dp_notify.o \ 11 11 flow.o \ 12 + flow_netlink.o \ 13 + flow_table.o \ 12 14 vport.o \ 13 15 vport-internal_dev.o \ 14 16 vport-netdev.o
+70 -598
net/openvswitch/datapath.c
··· 55 55 56 56 #include "datapath.h" 57 57 #include "flow.h" 58 + #include "flow_netlink.h" 58 59 #include "vport-internal_dev.h" 59 60 #include "vport-netdev.h" 60 - 61 - 62 - #define REHASH_FLOW_INTERVAL (10 * 60 * HZ) 63 - static void rehash_flow_table(struct work_struct *work); 64 - static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); 65 61 66 62 int ovs_net_id __read_mostly; 67 63 ··· 161 165 { 162 166 struct datapath *dp = container_of(rcu, struct datapath, rcu); 163 167 164 - ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false); 168 + ovs_flow_tbl_destroy(&dp->table); 165 169 free_percpu(dp->stats_percpu); 166 170 release_net(ovs_dp_get_net(dp)); 167 171 kfree(dp->ports); ··· 221 225 struct dp_stats_percpu *stats; 222 226 struct sw_flow_key key; 223 227 u64 *stats_counter; 228 + u32 n_mask_hit; 224 229 int error; 225 230 226 231 stats = this_cpu_ptr(dp->stats_percpu); ··· 234 237 } 235 238 236 239 /* Look up flow. */ 237 - flow = ovs_flow_lookup(rcu_dereference(dp->table), &key); 240 + flow = ovs_flow_tbl_lookup(&dp->table, &key, &n_mask_hit); 238 241 if (unlikely(!flow)) { 239 242 struct dp_upcall_info upcall; 240 243 ··· 259 262 /* Update datapath statistics. */ 260 263 u64_stats_update_begin(&stats->sync); 261 264 (*stats_counter)++; 265 + stats->n_mask_hit += n_mask_hit; 262 266 u64_stats_update_end(&stats->sync); 263 267 } 264 268 ··· 433 435 upcall->dp_ifindex = dp_ifindex; 434 436 435 437 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); 436 - ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb); 438 + ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); 437 439 nla_nest_end(user_skb, nla); 438 440 439 441 if (upcall_info->userdata) ··· 451 453 out: 452 454 kfree_skb(nskb); 453 455 return err; 454 - } 455 - 456 - /* Called with ovs_mutex. */ 457 - static int flush_flows(struct datapath *dp) 458 - { 459 - struct flow_table *old_table; 460 - struct flow_table *new_table; 461 - 462 - old_table = ovsl_dereference(dp->table); 463 - new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); 464 - if (!new_table) 465 - return -ENOMEM; 466 - 467 - rcu_assign_pointer(dp->table, new_table); 468 - 469 - ovs_flow_tbl_destroy(old_table, true); 470 - return 0; 471 - } 472 - 473 - static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len) 474 - { 475 - 476 - struct sw_flow_actions *acts; 477 - int new_acts_size; 478 - int req_size = NLA_ALIGN(attr_len); 479 - int next_offset = offsetof(struct sw_flow_actions, actions) + 480 - (*sfa)->actions_len; 481 - 482 - if (req_size <= (ksize(*sfa) - next_offset)) 483 - goto out; 484 - 485 - new_acts_size = ksize(*sfa) * 2; 486 - 487 - if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 488 - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 489 - return ERR_PTR(-EMSGSIZE); 490 - new_acts_size = MAX_ACTIONS_BUFSIZE; 491 - } 492 - 493 - acts = ovs_flow_actions_alloc(new_acts_size); 494 - if (IS_ERR(acts)) 495 - return (void *)acts; 496 - 497 - memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); 498 - acts->actions_len = (*sfa)->actions_len; 499 - kfree(*sfa); 500 - *sfa = acts; 501 - 502 - out: 503 - (*sfa)->actions_len += req_size; 504 - return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); 505 - } 506 - 507 - static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) 508 - { 509 - struct nlattr *a; 510 - 511 - a = reserve_sfa_size(sfa, nla_attr_size(len)); 512 - if (IS_ERR(a)) 513 - return PTR_ERR(a); 514 - 515 - a->nla_type = attrtype; 516 - a->nla_len = nla_attr_size(len); 517 - 518 - if (data) 519 - memcpy(nla_data(a), data, len); 520 - memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); 521 - 522 - return 0; 523 - } 524 - 525 - static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype) 526 - { 527 - int used = (*sfa)->actions_len; 528 - int err; 529 - 530 - err = add_action(sfa, attrtype, NULL, 0); 531 - if (err) 532 - return err; 533 - 534 - return used; 535 - } 536 - 537 - static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset) 538 - { 539 - struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset); 540 - 541 - a->nla_len = sfa->actions_len - st_offset; 542 - } 543 - 544 - static int validate_and_copy_actions(const struct nlattr *attr, 545 - const struct sw_flow_key *key, int depth, 546 - struct sw_flow_actions **sfa); 547 - 548 - static int validate_and_copy_sample(const struct nlattr *attr, 549 - const struct sw_flow_key *key, int depth, 550 - struct sw_flow_actions **sfa) 551 - { 552 - const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 553 - const struct nlattr *probability, *actions; 554 - const struct nlattr *a; 555 - int rem, start, err, st_acts; 556 - 557 - memset(attrs, 0, sizeof(attrs)); 558 - nla_for_each_nested(a, attr, rem) { 559 - int type = nla_type(a); 560 - if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 561 - return -EINVAL; 562 - attrs[type] = a; 563 - } 564 - if (rem) 565 - return -EINVAL; 566 - 567 - probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 568 - if (!probability || nla_len(probability) != sizeof(u32)) 569 - return -EINVAL; 570 - 571 - actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 572 - if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 573 - return -EINVAL; 574 - 575 - /* validation done, copy sample action. */ 576 - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); 577 - if (start < 0) 578 - return start; 579 - err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32)); 580 - if (err) 581 - return err; 582 - st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); 583 - if (st_acts < 0) 584 - return st_acts; 585 - 586 - err = validate_and_copy_actions(actions, key, depth + 1, sfa); 587 - if (err) 588 - return err; 589 - 590 - add_nested_action_end(*sfa, st_acts); 591 - add_nested_action_end(*sfa, start); 592 - 593 - return 0; 594 - } 595 - 596 - static int validate_tp_port(const struct sw_flow_key *flow_key) 597 - { 598 - if (flow_key->eth.type == htons(ETH_P_IP)) { 599 - if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) 600 - return 0; 601 - } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { 602 - if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) 603 - return 0; 604 - } 605 - 606 - return -EINVAL; 607 - } 608 - 609 - static int validate_and_copy_set_tun(const struct nlattr *attr, 610 - struct sw_flow_actions **sfa) 611 - { 612 - struct sw_flow_match match; 613 - struct sw_flow_key key; 614 - int err, start; 615 - 616 - ovs_match_init(&match, &key, NULL); 617 - err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false); 618 - if (err) 619 - return err; 620 - 621 - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); 622 - if (start < 0) 623 - return start; 624 - 625 - err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, 626 - sizeof(match.key->tun_key)); 627 - add_nested_action_end(*sfa, start); 628 - 629 - return err; 630 - } 631 - 632 - static int validate_set(const struct nlattr *a, 633 - const struct sw_flow_key *flow_key, 634 - struct sw_flow_actions **sfa, 635 - bool *set_tun) 636 - { 637 - const struct nlattr *ovs_key = nla_data(a); 638 - int key_type = nla_type(ovs_key); 639 - 640 - /* There can be only one key in a action */ 641 - if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 642 - return -EINVAL; 643 - 644 - if (key_type > OVS_KEY_ATTR_MAX || 645 - (ovs_key_lens[key_type] != nla_len(ovs_key) && 646 - ovs_key_lens[key_type] != -1)) 647 - return -EINVAL; 648 - 649 - switch (key_type) { 650 - const struct ovs_key_ipv4 *ipv4_key; 651 - const struct ovs_key_ipv6 *ipv6_key; 652 - int err; 653 - 654 - case OVS_KEY_ATTR_PRIORITY: 655 - case OVS_KEY_ATTR_SKB_MARK: 656 - case OVS_KEY_ATTR_ETHERNET: 657 - break; 658 - 659 - case OVS_KEY_ATTR_TUNNEL: 660 - *set_tun = true; 661 - err = validate_and_copy_set_tun(a, sfa); 662 - if (err) 663 - return err; 664 - break; 665 - 666 - case OVS_KEY_ATTR_IPV4: 667 - if (flow_key->eth.type != htons(ETH_P_IP)) 668 - return -EINVAL; 669 - 670 - if (!flow_key->ip.proto) 671 - return -EINVAL; 672 - 673 - ipv4_key = nla_data(ovs_key); 674 - if (ipv4_key->ipv4_proto != flow_key->ip.proto) 675 - return -EINVAL; 676 - 677 - if (ipv4_key->ipv4_frag != flow_key->ip.frag) 678 - return -EINVAL; 679 - 680 - break; 681 - 682 - case OVS_KEY_ATTR_IPV6: 683 - if (flow_key->eth.type != htons(ETH_P_IPV6)) 684 - return -EINVAL; 685 - 686 - if (!flow_key->ip.proto) 687 - return -EINVAL; 688 - 689 - ipv6_key = nla_data(ovs_key); 690 - if (ipv6_key->ipv6_proto != flow_key->ip.proto) 691 - return -EINVAL; 692 - 693 - if (ipv6_key->ipv6_frag != flow_key->ip.frag) 694 - return -EINVAL; 695 - 696 - if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 697 - return -EINVAL; 698 - 699 - break; 700 - 701 - case OVS_KEY_ATTR_TCP: 702 - if (flow_key->ip.proto != IPPROTO_TCP) 703 - return -EINVAL; 704 - 705 - return validate_tp_port(flow_key); 706 - 707 - case OVS_KEY_ATTR_UDP: 708 - if (flow_key->ip.proto != IPPROTO_UDP) 709 - return -EINVAL; 710 - 711 - return validate_tp_port(flow_key); 712 - 713 - case OVS_KEY_ATTR_SCTP: 714 - if (flow_key->ip.proto != IPPROTO_SCTP) 715 - return -EINVAL; 716 - 717 - return validate_tp_port(flow_key); 718 - 719 - default: 720 - return -EINVAL; 721 - } 722 - 723 - return 0; 724 - } 725 - 726 - static int validate_userspace(const struct nlattr *attr) 727 - { 728 - static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 729 - [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 730 - [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 731 - }; 732 - struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 733 - int error; 734 - 735 - error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, 736 - attr, userspace_policy); 737 - if (error) 738 - return error; 739 - 740 - if (!a[OVS_USERSPACE_ATTR_PID] || 741 - !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 742 - return -EINVAL; 743 - 744 - return 0; 745 - } 746 - 747 - static int copy_action(const struct nlattr *from, 748 - struct sw_flow_actions **sfa) 749 - { 750 - int totlen = NLA_ALIGN(from->nla_len); 751 - struct nlattr *to; 752 - 753 - to = reserve_sfa_size(sfa, from->nla_len); 754 - if (IS_ERR(to)) 755 - return PTR_ERR(to); 756 - 757 - memcpy(to, from, totlen); 758 - return 0; 759 - } 760 - 761 - static int validate_and_copy_actions(const struct nlattr *attr, 762 - const struct sw_flow_key *key, 763 - int depth, 764 - struct sw_flow_actions **sfa) 765 - { 766 - const struct nlattr *a; 767 - int rem, err; 768 - 769 - if (depth >= SAMPLE_ACTION_DEPTH) 770 - return -EOVERFLOW; 771 - 772 - nla_for_each_nested(a, attr, rem) { 773 - /* Expected argument lengths, (u32)-1 for variable length. */ 774 - static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 775 - [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 776 - [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 777 - [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 778 - [OVS_ACTION_ATTR_POP_VLAN] = 0, 779 - [OVS_ACTION_ATTR_SET] = (u32)-1, 780 - [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 781 - }; 782 - const struct ovs_action_push_vlan *vlan; 783 - int type = nla_type(a); 784 - bool skip_copy; 785 - 786 - if (type > OVS_ACTION_ATTR_MAX || 787 - (action_lens[type] != nla_len(a) && 788 - action_lens[type] != (u32)-1)) 789 - return -EINVAL; 790 - 791 - skip_copy = false; 792 - switch (type) { 793 - case OVS_ACTION_ATTR_UNSPEC: 794 - return -EINVAL; 795 - 796 - case OVS_ACTION_ATTR_USERSPACE: 797 - err = validate_userspace(a); 798 - if (err) 799 - return err; 800 - break; 801 - 802 - case OVS_ACTION_ATTR_OUTPUT: 803 - if (nla_get_u32(a) >= DP_MAX_PORTS) 804 - return -EINVAL; 805 - break; 806 - 807 - 808 - case OVS_ACTION_ATTR_POP_VLAN: 809 - break; 810 - 811 - case OVS_ACTION_ATTR_PUSH_VLAN: 812 - vlan = nla_data(a); 813 - if (vlan->vlan_tpid != htons(ETH_P_8021Q)) 814 - return -EINVAL; 815 - if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 816 - return -EINVAL; 817 - break; 818 - 819 - case OVS_ACTION_ATTR_SET: 820 - err = validate_set(a, key, sfa, &skip_copy); 821 - if (err) 822 - return err; 823 - break; 824 - 825 - case OVS_ACTION_ATTR_SAMPLE: 826 - err = validate_and_copy_sample(a, key, depth, sfa); 827 - if (err) 828 - return err; 829 - skip_copy = true; 830 - break; 831 - 832 - default: 833 - return -EINVAL; 834 - } 835 - if (!skip_copy) { 836 - err = copy_action(a, sfa); 837 - if (err) 838 - return err; 839 - } 840 - } 841 - 842 - if (rem > 0) 843 - return -EINVAL; 844 - 845 - return 0; 846 456 } 847 457 848 458 static void clear_stats(struct sw_flow *flow) ··· 508 902 if (err) 509 903 goto err_flow_free; 510 904 511 - err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); 905 + err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]); 512 906 if (err) 513 907 goto err_flow_free; 514 - acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); 908 + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); 515 909 err = PTR_ERR(acts); 516 910 if (IS_ERR(acts)) 517 911 goto err_flow_free; 518 912 519 - err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts); 913 + err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], 914 + &flow->key, 0, &acts); 520 915 rcu_assign_pointer(flow->sf_acts, acts); 521 916 if (err) 522 917 goto err_flow_free; ··· 565 958 } 566 959 }; 567 960 568 - static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) 961 + static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, 962 + struct ovs_dp_megaflow_stats *mega_stats) 569 963 { 570 - struct flow_table *table; 571 964 int i; 572 965 573 - table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held()); 574 - stats->n_flows = ovs_flow_tbl_count(table); 966 + memset(mega_stats, 0, sizeof(*mega_stats)); 967 + 968 + stats->n_flows = ovs_flow_tbl_count(&dp->table); 969 + mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); 575 970 576 971 stats->n_hit = stats->n_missed = stats->n_lost = 0; 972 + 577 973 for_each_possible_cpu(i) { 578 974 const struct dp_stats_percpu *percpu_stats; 579 975 struct dp_stats_percpu local_stats; ··· 592 982 stats->n_hit += local_stats.n_hit; 593 983 stats->n_missed += local_stats.n_missed; 594 984 stats->n_lost += local_stats.n_lost; 985 + mega_stats->n_mask_hit += local_stats.n_mask_hit; 595 986 } 596 987 } 597 988 ··· 615 1004 static struct genl_multicast_group ovs_dp_flow_multicast_group = { 616 1005 .name = OVS_FLOW_MCGROUP 617 1006 }; 618 - 619 - static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb); 620 - static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) 621 - { 622 - const struct nlattr *a; 623 - struct nlattr *start; 624 - int err = 0, rem; 625 - 626 - start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); 627 - if (!start) 628 - return -EMSGSIZE; 629 - 630 - nla_for_each_nested(a, attr, rem) { 631 - int type = nla_type(a); 632 - struct nlattr *st_sample; 633 - 634 - switch (type) { 635 - case OVS_SAMPLE_ATTR_PROBABILITY: 636 - if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a))) 637 - return -EMSGSIZE; 638 - break; 639 - case OVS_SAMPLE_ATTR_ACTIONS: 640 - st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); 641 - if (!st_sample) 642 - return -EMSGSIZE; 643 - err = actions_to_attr(nla_data(a), nla_len(a), skb); 644 - if (err) 645 - return err; 646 - nla_nest_end(skb, st_sample); 647 - break; 648 - } 649 - } 650 - 651 - nla_nest_end(skb, start); 652 - return err; 653 - } 654 - 655 - static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 656 - { 657 - const struct nlattr *ovs_key = nla_data(a); 658 - int key_type = nla_type(ovs_key); 659 - struct nlattr *start; 660 - int err; 661 - 662 - switch (key_type) { 663 - case OVS_KEY_ATTR_IPV4_TUNNEL: 664 - start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 665 - if (!start) 666 - return -EMSGSIZE; 667 - 668 - err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key), 669 - nla_data(ovs_key)); 670 - if (err) 671 - return err; 672 - nla_nest_end(skb, start); 673 - break; 674 - default: 675 - if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) 676 - return -EMSGSIZE; 677 - break; 678 - } 679 - 680 - return 0; 681 - } 682 - 683 - static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) 684 - { 685 - const struct nlattr *a; 686 - int rem, err; 687 - 688 - nla_for_each_attr(a, attr, len, rem) { 689 - int type = nla_type(a); 690 - 691 - switch (type) { 692 - case OVS_ACTION_ATTR_SET: 693 - err = set_action_to_attr(a, skb); 694 - if (err) 695 - return err; 696 - break; 697 - 698 - case OVS_ACTION_ATTR_SAMPLE: 699 - err = sample_action_to_attr(a, skb); 700 - if (err) 701 - return err; 702 - break; 703 - default: 704 - if (nla_put(skb, type, nla_len(a), nla_data(a))) 705 - return -EMSGSIZE; 706 - break; 707 - } 708 - } 709 - 710 - return 0; 711 - } 712 1007 713 1008 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) 714 1009 { ··· 652 1135 if (!nla) 653 1136 goto nla_put_failure; 654 1137 655 - err = ovs_flow_to_nlattrs(&flow->unmasked_key, 656 - &flow->unmasked_key, skb); 1138 + err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); 657 1139 if (err) 658 1140 goto error; 659 1141 nla_nest_end(skb, nla); ··· 661 1145 if (!nla) 662 1146 goto nla_put_failure; 663 1147 664 - err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb); 1148 + err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); 665 1149 if (err) 666 1150 goto error; 667 1151 ··· 671 1155 used = flow->used; 672 1156 stats.n_packets = flow->packet_count; 673 1157 stats.n_bytes = flow->byte_count; 674 - tcp_flags = flow->tcp_flags; 1158 + tcp_flags = (u8)ntohs(flow->tcp_flags); 675 1159 spin_unlock_bh(&flow->lock); 676 1160 677 1161 if (used && ··· 704 1188 sf_acts = rcu_dereference_check(flow->sf_acts, 705 1189 lockdep_ovsl_is_held()); 706 1190 707 - err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb); 1191 + err = ovs_nla_put_actions(sf_acts->actions, 1192 + sf_acts->actions_len, skb); 708 1193 if (!err) 709 1194 nla_nest_end(skb, start); 710 1195 else { ··· 751 1234 return skb; 752 1235 } 753 1236 1237 + static struct sw_flow *__ovs_flow_tbl_lookup(struct flow_table *tbl, 1238 + const struct sw_flow_key *key) 1239 + { 1240 + u32 __always_unused n_mask_hit; 1241 + 1242 + return ovs_flow_tbl_lookup(tbl, key, &n_mask_hit); 1243 + } 1244 + 754 1245 static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) 755 1246 { 756 1247 struct nlattr **a = info->attrs; ··· 768 1243 struct sw_flow_mask mask; 769 1244 struct sk_buff *reply; 770 1245 struct datapath *dp; 771 - struct flow_table *table; 772 1246 struct sw_flow_actions *acts = NULL; 773 1247 struct sw_flow_match match; 774 1248 int error; ··· 778 1254 goto error; 779 1255 780 1256 ovs_match_init(&match, &key, &mask); 781 - error = ovs_match_from_nlattrs(&match, 782 - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 1257 + error = ovs_nla_get_match(&match, 1258 + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 783 1259 if (error) 784 1260 goto error; 785 1261 786 1262 /* Validate actions. */ 787 1263 if (a[OVS_FLOW_ATTR_ACTIONS]) { 788 - acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); 1264 + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); 789 1265 error = PTR_ERR(acts); 790 1266 if (IS_ERR(acts)) 791 1267 goto error; 792 1268 793 - ovs_flow_key_mask(&masked_key, &key, &mask); 794 - error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], 795 - &masked_key, 0, &acts); 1269 + ovs_flow_mask_key(&masked_key, &key, &mask); 1270 + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], 1271 + &masked_key, 0, &acts); 796 1272 if (error) { 797 1273 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 798 1274 goto err_kfree; ··· 808 1284 if (!dp) 809 1285 goto err_unlock_ovs; 810 1286 811 - table = ovsl_dereference(dp->table); 812 - 813 1287 /* Check if this is a duplicate flow */ 814 - flow = ovs_flow_lookup(table, &key); 1288 + flow = __ovs_flow_tbl_lookup(&dp->table, &key); 815 1289 if (!flow) { 816 - struct sw_flow_mask *mask_p; 817 1290 /* Bail out if we're not allowed to create a new flow. */ 818 1291 error = -ENOENT; 819 1292 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 820 1293 goto err_unlock_ovs; 821 - 822 - /* Expand table, if necessary, to make room. */ 823 - if (ovs_flow_tbl_need_to_expand(table)) { 824 - struct flow_table *new_table; 825 - 826 - new_table = ovs_flow_tbl_expand(table); 827 - if (!IS_ERR(new_table)) { 828 - rcu_assign_pointer(dp->table, new_table); 829 - ovs_flow_tbl_destroy(table, true); 830 - table = ovsl_dereference(dp->table); 831 - } 832 - } 833 1294 834 1295 /* Allocate flow. */ 835 1296 flow = ovs_flow_alloc(); ··· 826 1317 827 1318 flow->key = masked_key; 828 1319 flow->unmasked_key = key; 829 - 830 - /* Make sure mask is unique in the system */ 831 - mask_p = ovs_sw_flow_mask_find(table, &mask); 832 - if (!mask_p) { 833 - /* Allocate a new mask if none exsits. */ 834 - mask_p = ovs_sw_flow_mask_alloc(); 835 - if (!mask_p) 836 - goto err_flow_free; 837 - mask_p->key = mask.key; 838 - mask_p->range = mask.range; 839 - ovs_sw_flow_mask_insert(table, mask_p); 840 - } 841 - 842 - ovs_sw_flow_mask_add_ref(mask_p); 843 - flow->mask = mask_p; 844 1320 rcu_assign_pointer(flow->sf_acts, acts); 845 1321 846 1322 /* Put flow in bucket. */ 847 - ovs_flow_insert(table, flow); 1323 + error = ovs_flow_tbl_insert(&dp->table, flow, &mask); 1324 + if (error) { 1325 + acts = NULL; 1326 + goto err_flow_free; 1327 + } 848 1328 849 1329 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 850 1330 info->snd_seq, OVS_FLOW_CMD_NEW); ··· 854 1356 855 1357 /* The unmasked key has to be the same for flow updates. */ 856 1358 error = -EINVAL; 857 - if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) { 1359 + if (!ovs_flow_cmp_unmasked_key(flow, &match)) { 858 1360 OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n"); 859 1361 goto err_unlock_ovs; 860 1362 } ··· 862 1364 /* Update actions. */ 863 1365 old_acts = ovsl_dereference(flow->sf_acts); 864 1366 rcu_assign_pointer(flow->sf_acts, acts); 865 - ovs_flow_deferred_free_acts(old_acts); 1367 + ovs_nla_free_flow_actions(old_acts); 866 1368 867 1369 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 868 1370 info->snd_seq, OVS_FLOW_CMD_NEW); ··· 901 1403 struct sk_buff *reply; 902 1404 struct sw_flow *flow; 903 1405 struct datapath *dp; 904 - struct flow_table *table; 905 1406 struct sw_flow_match match; 906 1407 int err; 907 1408 ··· 910 1413 } 911 1414 912 1415 ovs_match_init(&match, &key, NULL); 913 - err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1416 + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 914 1417 if (err) 915 1418 return err; 916 1419 ··· 921 1424 goto unlock; 922 1425 } 923 1426 924 - table = ovsl_dereference(dp->table); 925 - flow = ovs_flow_lookup_unmasked_key(table, &match); 926 - if (!flow) { 1427 + flow = __ovs_flow_tbl_lookup(&dp->table, &key); 1428 + if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { 927 1429 err = -ENOENT; 928 1430 goto unlock; 929 1431 } ··· 949 1453 struct sk_buff *reply; 950 1454 struct sw_flow *flow; 951 1455 struct datapath *dp; 952 - struct flow_table *table; 953 1456 struct sw_flow_match match; 954 1457 int err; 955 1458 ··· 960 1465 } 961 1466 962 1467 if (!a[OVS_FLOW_ATTR_KEY]) { 963 - err = flush_flows(dp); 1468 + err = ovs_flow_tbl_flush(&dp->table); 964 1469 goto unlock; 965 1470 } 966 1471 967 1472 ovs_match_init(&match, &key, NULL); 968 - err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1473 + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 969 1474 if (err) 970 1475 goto unlock; 971 1476 972 - table = ovsl_dereference(dp->table); 973 - flow = ovs_flow_lookup_unmasked_key(table, &match); 974 - if (!flow) { 1477 + flow = __ovs_flow_tbl_lookup(&dp->table, &key); 1478 + if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { 975 1479 err = -ENOENT; 976 1480 goto unlock; 977 1481 } ··· 981 1487 goto unlock; 982 1488 } 983 1489 984 - ovs_flow_remove(table, flow); 1490 + ovs_flow_tbl_remove(&dp->table, flow); 985 1491 986 1492 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, 987 1493 info->snd_seq, 0, OVS_FLOW_CMD_DEL); ··· 1000 1506 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1001 1507 { 1002 1508 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1509 + struct table_instance *ti; 1003 1510 struct datapath *dp; 1004 - struct flow_table *table; 1005 1511 1006 1512 rcu_read_lock(); 1007 1513 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); ··· 1010 1516 return -ENODEV; 1011 1517 } 1012 1518 1013 - table = rcu_dereference(dp->table); 1519 + ti = rcu_dereference(dp->table.ti); 1014 1520 for (;;) { 1015 1521 struct sw_flow *flow; 1016 1522 u32 bucket, obj; 1017 1523 1018 1524 bucket = cb->args[0]; 1019 1525 obj = cb->args[1]; 1020 - flow = ovs_flow_dump_next(table, &bucket, &obj); 1526 + flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); 1021 1527 if (!flow) 1022 1528 break; 1023 1529 ··· 1083 1589 1084 1590 msgsize += nla_total_size(IFNAMSIZ); 1085 1591 msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); 1592 + msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); 1086 1593 1087 1594 return msgsize; 1088 1595 } ··· 1093 1598 { 1094 1599 struct ovs_header *ovs_header; 1095 1600 struct ovs_dp_stats dp_stats; 1601 + struct ovs_dp_megaflow_stats dp_megaflow_stats; 1096 1602 int err; 1097 1603 1098 1604 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, ··· 1109 1613 if (err) 1110 1614 goto nla_put_failure; 1111 1615 1112 - get_dp_stats(dp, &dp_stats); 1113 - if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats)) 1616 + get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); 1617 + if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), 1618 + &dp_stats)) 1619 + goto nla_put_failure; 1620 + 1621 + if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS, 1622 + sizeof(struct ovs_dp_megaflow_stats), 1623 + &dp_megaflow_stats)) 1114 1624 goto nla_put_failure; 1115 1625 1116 1626 return genlmsg_end(skb, ovs_header); ··· 1189 1687 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); 1190 1688 1191 1689 /* Allocate table. */ 1192 - err = -ENOMEM; 1193 - rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); 1194 - if (!dp->table) 1690 + err = ovs_flow_tbl_init(&dp->table); 1691 + if (err) 1195 1692 goto err_free_dp; 1196 1693 1197 1694 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); ··· 1200 1699 } 1201 1700 1202 1701 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), 1203 - GFP_KERNEL); 1702 + GFP_KERNEL); 1204 1703 if (!dp->ports) { 1205 1704 err = -ENOMEM; 1206 1705 goto err_destroy_percpu; ··· 1247 1746 err_destroy_percpu: 1248 1747 free_percpu(dp->stats_percpu); 1249 1748 err_destroy_table: 1250 - ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false); 1749 + ovs_flow_tbl_destroy(&dp->table); 1251 1750 err_free_dp: 1252 1751 release_net(ovs_dp_get_net(dp)); 1253 1752 kfree(dp); ··· 1837 2336 return err; 1838 2337 } 1839 2338 1840 - static void rehash_flow_table(struct work_struct *work) 1841 - { 1842 - struct datapath *dp; 1843 - struct net *net; 1844 - 1845 - ovs_lock(); 1846 - rtnl_lock(); 1847 - for_each_net(net) { 1848 - struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1849 - 1850 - list_for_each_entry(dp, &ovs_net->dps, list_node) { 1851 - struct flow_table *old_table = ovsl_dereference(dp->table); 1852 - struct flow_table *new_table; 1853 - 1854 - new_table = ovs_flow_tbl_rehash(old_table); 1855 - if (!IS_ERR(new_table)) { 1856 - rcu_assign_pointer(dp->table, new_table); 1857 - ovs_flow_tbl_destroy(old_table, true); 1858 - } 1859 - } 1860 - } 1861 - rtnl_unlock(); 1862 - ovs_unlock(); 1863 - schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); 1864 - } 1865 - 1866 2339 static int __net_init ovs_init_net(struct net *net) 1867 2340 { 1868 2341 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); ··· 1894 2419 if (err < 0) 1895 2420 goto error_unreg_notifier; 1896 2421 1897 - schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); 1898 - 1899 2422 return 0; 1900 2423 1901 2424 error_unreg_notifier: ··· 1910 2437 1911 2438 static void dp_cleanup(void) 1912 2439 { 1913 - cancel_delayed_work_sync(&rehash_flow_wq); 1914 2440 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 1915 2441 unregister_netdevice_notifier(&ovs_dp_device_notifier); 1916 2442 unregister_pernet_device(&ovs_net_ops);
+7 -2
net/openvswitch/datapath.h
··· 27 27 #include <linux/u64_stats_sync.h> 28 28 29 29 #include "flow.h" 30 + #include "flow_table.h" 30 31 #include "vport.h" 31 32 32 33 #define DP_MAX_PORTS USHRT_MAX ··· 46 45 * @n_lost: Number of received packets that had no matching flow in the flow 47 46 * table that could not be sent to userspace (normally due to an overflow in 48 47 * one of the datapath's queues). 48 + * @n_mask_hit: Number of masks looked up for flow match. 49 + * @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked 50 + * up per packet. 49 51 */ 50 52 struct dp_stats_percpu { 51 53 u64 n_hit; 52 54 u64 n_missed; 53 55 u64 n_lost; 56 + u64 n_mask_hit; 54 57 struct u64_stats_sync sync; 55 58 }; 56 59 ··· 62 57 * struct datapath - datapath for flow-based packet switching 63 58 * @rcu: RCU callback head for deferred destruction. 64 59 * @list_node: Element in global 'dps' list. 65 - * @table: Current flow table. Protected by ovs_mutex and RCU. 60 + * @table: flow table. 66 61 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by 67 62 * ovs_mutex and RCU. 68 63 * @stats_percpu: Per-CPU datapath statistics. ··· 76 71 struct list_head list_node; 77 72 78 73 /* Flow table. */ 79 - struct flow_table __rcu *table; 74 + struct flow_table table; 80 75 81 76 /* Switch ports. */ 82 77 struct hlist_head *ports;
+24 -1581
net/openvswitch/flow.c
··· 45 45 #include <net/ipv6.h> 46 46 #include <net/ndisc.h> 47 47 48 - static struct kmem_cache *flow_cache; 49 - 50 - static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, 51 - struct sw_flow_key_range *range, u8 val); 52 - 53 - static void update_range__(struct sw_flow_match *match, 54 - size_t offset, size_t size, bool is_mask) 48 + u64 ovs_flow_used_time(unsigned long flow_jiffies) 55 49 { 56 - struct sw_flow_key_range *range = NULL; 57 - size_t start = rounddown(offset, sizeof(long)); 58 - size_t end = roundup(offset + size, sizeof(long)); 50 + struct timespec cur_ts; 51 + u64 cur_ms, idle_ms; 59 52 60 - if (!is_mask) 61 - range = &match->range; 62 - else if (match->mask) 63 - range = &match->mask->range; 53 + ktime_get_ts(&cur_ts); 54 + idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); 55 + cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + 56 + cur_ts.tv_nsec / NSEC_PER_MSEC; 64 57 65 - if (!range) 66 - return; 67 - 68 - if (range->start == range->end) { 69 - range->start = start; 70 - range->end = end; 71 - return; 72 - } 73 - 74 - if (range->start > start) 75 - range->start = start; 76 - 77 - if (range->end < end) 78 - range->end = end; 58 + return cur_ms - idle_ms; 79 59 } 80 60 81 - #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 82 - do { \ 83 - update_range__(match, offsetof(struct sw_flow_key, field), \ 84 - sizeof((match)->key->field), is_mask); \ 85 - if (is_mask) { \ 86 - if ((match)->mask) \ 87 - (match)->mask->key.field = value; \ 88 - } else { \ 89 - (match)->key->field = value; \ 90 - } \ 91 - } while (0) 61 + #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) 92 62 93 - #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 94 - do { \ 95 - update_range__(match, offsetof(struct sw_flow_key, field), \ 96 - len, is_mask); \ 97 - if (is_mask) { \ 98 - if ((match)->mask) \ 99 - memcpy(&(match)->mask->key.field, value_p, len);\ 100 - } else { \ 101 - memcpy(&(match)->key->field, value_p, len); \ 102 - } \ 103 - } while (0) 104 - 105 - static u16 range_n_bytes(const struct sw_flow_key_range *range) 63 + void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) 106 64 { 107 - return range->end - range->start; 108 - } 65 + __be16 tcp_flags = 0; 109 66 110 - void ovs_match_init(struct sw_flow_match *match, 111 - struct sw_flow_key *key, 112 - struct sw_flow_mask *mask) 113 - { 114 - memset(match, 0, sizeof(*match)); 115 - match->key = key; 116 - match->mask = mask; 117 - 118 - memset(key, 0, sizeof(*key)); 119 - 120 - if (mask) { 121 - memset(&mask->key, 0, sizeof(mask->key)); 122 - mask->range.start = mask->range.end = 0; 123 - } 124 - } 125 - 126 - static bool ovs_match_validate(const struct sw_flow_match *match, 127 - u64 key_attrs, u64 mask_attrs) 128 - { 129 - u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 130 - u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 131 - 132 - /* The following mask attributes allowed only if they 133 - * pass the validation tests. */ 134 - mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 135 - | (1 << OVS_KEY_ATTR_IPV6) 136 - | (1 << OVS_KEY_ATTR_TCP) 137 - | (1 << OVS_KEY_ATTR_UDP) 138 - | (1 << OVS_KEY_ATTR_SCTP) 139 - | (1 << OVS_KEY_ATTR_ICMP) 140 - | (1 << OVS_KEY_ATTR_ICMPV6) 141 - | (1 << OVS_KEY_ATTR_ARP) 142 - | (1 << OVS_KEY_ATTR_ND)); 143 - 144 - /* Always allowed mask fields. */ 145 - mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 146 - | (1 << OVS_KEY_ATTR_IN_PORT) 147 - | (1 << OVS_KEY_ATTR_ETHERTYPE)); 148 - 149 - /* Check key attributes. */ 150 - if (match->key->eth.type == htons(ETH_P_ARP) 151 - || match->key->eth.type == htons(ETH_P_RARP)) { 152 - key_expected |= 1 << OVS_KEY_ATTR_ARP; 153 - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 154 - mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 67 + if ((flow->key.eth.type == htons(ETH_P_IP) || 68 + flow->key.eth.type == htons(ETH_P_IPV6)) && 69 + flow->key.ip.proto == IPPROTO_TCP && 70 + likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { 71 + tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); 155 72 } 156 73 157 - if (match->key->eth.type == htons(ETH_P_IP)) { 158 - key_expected |= 1 << OVS_KEY_ATTR_IPV4; 159 - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 160 - mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 161 - 162 - if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 163 - if (match->key->ip.proto == IPPROTO_UDP) { 164 - key_expected |= 1 << OVS_KEY_ATTR_UDP; 165 - if (match->mask && (match->mask->key.ip.proto == 0xff)) 166 - mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 167 - } 168 - 169 - if (match->key->ip.proto == IPPROTO_SCTP) { 170 - key_expected |= 1 << OVS_KEY_ATTR_SCTP; 171 - if (match->mask && (match->mask->key.ip.proto == 0xff)) 172 - mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 173 - } 174 - 175 - if (match->key->ip.proto == IPPROTO_TCP) { 176 - key_expected |= 1 << OVS_KEY_ATTR_TCP; 177 - if (match->mask && (match->mask->key.ip.proto == 0xff)) 178 - mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 179 - } 180 - 181 - if (match->key->ip.proto == IPPROTO_ICMP) { 182 - key_expected |= 1 << OVS_KEY_ATTR_ICMP; 183 - if (match->mask && (match->mask->key.ip.proto == 0xff)) 184 - mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 185 - } 186 - } 187 - } 188 - 189 - if (match->key->eth.type == htons(ETH_P_IPV6)) { 190 - key_expected |= 1 << OVS_KEY_ATTR_IPV6; 191 - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 192 - mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 193 - 194 - if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 195 - if (match->key->ip.proto == IPPROTO_UDP) { 196 - key_expected |= 1 << OVS_KEY_ATTR_UDP; 197 - if (match->mask && (match->mask->key.ip.proto == 0xff)) 198 - mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 199 - } 200 - 201 - if (match->key->ip.proto == IPPROTO_SCTP) { 202 - key_expected |= 1 << OVS_KEY_ATTR_SCTP; 203 - if (match->mask && (match->mask->key.ip.proto == 0xff)) 204 - mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 205 - } 206 - 207 - if (match->key->ip.proto == IPPROTO_TCP) { 208 - key_expected |= 1 << OVS_KEY_ATTR_TCP; 209 - if (match->mask && (match->mask->key.ip.proto == 0xff)) 210 - mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 211 - } 212 - 213 - if (match->key->ip.proto == IPPROTO_ICMPV6) { 214 - key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 215 - if (match->mask && (match->mask->key.ip.proto == 0xff)) 216 - mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 217 - 218 - if (match->key->ipv6.tp.src == 219 - htons(NDISC_NEIGHBOUR_SOLICITATION) || 220 - match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 221 - key_expected |= 1 << OVS_KEY_ATTR_ND; 222 - if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) 223 - mask_allowed |= 1 << OVS_KEY_ATTR_ND; 224 - } 225 - } 226 - } 227 - } 228 - 229 - if ((key_attrs & key_expected) != key_expected) { 230 - /* Key attributes check failed. */ 231 - OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", 232 - key_attrs, key_expected); 233 - return false; 234 - } 235 - 236 - if ((mask_attrs & mask_allowed) != mask_attrs) { 237 - /* Mask attributes check failed. */ 238 - OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", 239 - mask_attrs, mask_allowed); 240 - return false; 241 - } 242 - 243 - return true; 74 + spin_lock(&flow->lock); 75 + flow->used = jiffies; 76 + flow->packet_count++; 77 + flow->byte_count += skb->len; 78 + flow->tcp_flags |= tcp_flags; 79 + spin_unlock(&flow->lock); 244 80 } 245 81 246 82 static int check_header(struct sk_buff *skb, int len) ··· 147 311 sizeof(struct icmphdr)); 148 312 } 149 313 150 - u64 ovs_flow_used_time(unsigned long flow_jiffies) 151 - { 152 - struct timespec cur_ts; 153 - u64 cur_ms, idle_ms; 154 - 155 - ktime_get_ts(&cur_ts); 156 - idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); 157 - cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + 158 - cur_ts.tv_nsec / NSEC_PER_MSEC; 159 - 160 - return cur_ms - idle_ms; 161 - } 162 - 163 314 static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) 164 315 { 165 316 unsigned int nh_ofs = skb_network_offset(skb); ··· 193 370 { 194 371 return pskb_may_pull(skb, skb_transport_offset(skb) + 195 372 sizeof(struct icmp6hdr)); 196 - } 197 - 198 - void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, 199 - const struct sw_flow_mask *mask) 200 - { 201 - const long *m = (long *)((u8 *)&mask->key + mask->range.start); 202 - const long *s = (long *)((u8 *)src + mask->range.start); 203 - long *d = (long *)((u8 *)dst + mask->range.start); 204 - int i; 205 - 206 - /* The memory outside of the 'mask->range' are not set since 207 - * further operations on 'dst' only uses contents within 208 - * 'mask->range'. 209 - */ 210 - for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) 211 - *d++ = *s++ & *m++; 212 - } 213 - 214 - #define TCP_FLAGS_OFFSET 13 215 - #define TCP_FLAG_MASK 0x3f 216 - 217 - void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) 218 - { 219 - u8 tcp_flags = 0; 220 - 221 - if ((flow->key.eth.type == htons(ETH_P_IP) || 222 - flow->key.eth.type == htons(ETH_P_IPV6)) && 223 - flow->key.ip.proto == IPPROTO_TCP && 224 - likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { 225 - u8 *tcp = (u8 *)tcp_hdr(skb); 226 - tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; 227 - } 228 - 229 - spin_lock(&flow->lock); 230 - flow->used = jiffies; 231 - flow->packet_count++; 232 - flow->byte_count += skb->len; 233 - flow->tcp_flags |= tcp_flags; 234 - spin_unlock(&flow->lock); 235 - } 236 - 237 - struct sw_flow_actions *ovs_flow_actions_alloc(int size) 238 - { 239 - struct sw_flow_actions *sfa; 240 - 241 - if (size > MAX_ACTIONS_BUFSIZE) 242 - return ERR_PTR(-EINVAL); 243 - 244 - sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 245 - if (!sfa) 246 - return ERR_PTR(-ENOMEM); 247 - 248 - sfa->actions_len = 0; 249 - return sfa; 250 - } 251 - 252 - struct sw_flow *ovs_flow_alloc(void) 253 - { 254 - struct sw_flow *flow; 255 - 256 - flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); 257 - if (!flow) 258 - return ERR_PTR(-ENOMEM); 259 - 260 - spin_lock_init(&flow->lock); 261 - flow->sf_acts = NULL; 262 - flow->mask = NULL; 263 - 264 - return flow; 265 - } 266 - 267 - static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) 268 - { 269 - hash = jhash_1word(hash, table->hash_seed); 270 - return flex_array_get(table->buckets, 271 - (hash & (table->n_buckets - 1))); 272 - } 273 - 274 - static struct flex_array *alloc_buckets(unsigned int n_buckets) 275 - { 276 - struct flex_array *buckets; 277 - int i, err; 278 - 279 - buckets = flex_array_alloc(sizeof(struct hlist_head), 280 - n_buckets, GFP_KERNEL); 281 - if (!buckets) 282 - return NULL; 283 - 284 - err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); 285 - if (err) { 286 - flex_array_free(buckets); 287 - return NULL; 288 - } 289 - 290 - for (i = 0; i < n_buckets; i++) 291 - INIT_HLIST_HEAD((struct hlist_head *) 292 - flex_array_get(buckets, i)); 293 - 294 - return buckets; 295 - } 296 - 297 - static void free_buckets(struct flex_array *buckets) 298 - { 299 - flex_array_free(buckets); 300 - } 301 - 302 - static struct flow_table *__flow_tbl_alloc(int new_size) 303 - { 304 - struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); 305 - 306 - if (!table) 307 - return NULL; 308 - 309 - table->buckets = alloc_buckets(new_size); 310 - 311 - if (!table->buckets) { 312 - kfree(table); 313 - return NULL; 314 - } 315 - table->n_buckets = new_size; 316 - table->count = 0; 317 - table->node_ver = 0; 318 - table->keep_flows = false; 319 - get_random_bytes(&table->hash_seed, sizeof(u32)); 320 - table->mask_list = NULL; 321 - 322 - return table; 323 - } 324 - 325 - static void __flow_tbl_destroy(struct flow_table *table) 326 - { 327 - int i; 328 - 329 - if (table->keep_flows) 330 - goto skip_flows; 331 - 332 - for (i = 0; i < table->n_buckets; i++) { 333 - struct sw_flow *flow; 334 - struct hlist_head *head = flex_array_get(table->buckets, i); 335 - struct hlist_node *n; 336 - int ver = table->node_ver; 337 - 338 - hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { 339 - hlist_del(&flow->hash_node[ver]); 340 - ovs_flow_free(flow, false); 341 - } 342 - } 343 - 344 - BUG_ON(!list_empty(table->mask_list)); 345 - kfree(table->mask_list); 346 - 347 - skip_flows: 348 - free_buckets(table->buckets); 349 - kfree(table); 350 - } 351 - 352 - struct flow_table *ovs_flow_tbl_alloc(int new_size) 353 - { 354 - struct flow_table *table = __flow_tbl_alloc(new_size); 355 - 356 - if (!table) 357 - return NULL; 358 - 359 - table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); 360 - if (!table->mask_list) { 361 - table->keep_flows = true; 362 - __flow_tbl_destroy(table); 363 - return NULL; 364 - } 365 - INIT_LIST_HEAD(table->mask_list); 366 - 367 - return table; 368 - } 369 - 370 - static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) 371 - { 372 - struct flow_table *table = container_of(rcu, struct flow_table, rcu); 373 - 374 - __flow_tbl_destroy(table); 375 - } 376 - 377 - void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) 378 - { 379 - if (!table) 380 - return; 381 - 382 - if (deferred) 383 - call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); 384 - else 385 - __flow_tbl_destroy(table); 386 - } 387 - 388 - struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last) 389 - { 390 - struct sw_flow *flow; 391 - struct hlist_head *head; 392 - int ver; 393 - int i; 394 - 395 - ver = table->node_ver; 396 - while (*bucket < table->n_buckets) { 397 - i = 0; 398 - head = flex_array_get(table->buckets, *bucket); 399 - hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { 400 - if (i < *last) { 401 - i++; 402 - continue; 403 - } 404 - *last = i + 1; 405 - return flow; 406 - } 407 - (*bucket)++; 408 - *last = 0; 409 - } 410 - 411 - return NULL; 412 - } 413 - 414 - static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) 415 - { 416 - struct hlist_head *head; 417 - 418 - head = find_bucket(table, flow->hash); 419 - hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); 420 - 421 - table->count++; 422 - } 423 - 424 - static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) 425 - { 426 - int old_ver; 427 - int i; 428 - 429 - old_ver = old->node_ver; 430 - new->node_ver = !old_ver; 431 - 432 - /* Insert in new table. */ 433 - for (i = 0; i < old->n_buckets; i++) { 434 - struct sw_flow *flow; 435 - struct hlist_head *head; 436 - 437 - head = flex_array_get(old->buckets, i); 438 - 439 - hlist_for_each_entry(flow, head, hash_node[old_ver]) 440 - __tbl_insert(new, flow); 441 - } 442 - 443 - new->mask_list = old->mask_list; 444 - old->keep_flows = true; 445 - } 446 - 447 - static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets) 448 - { 449 - struct flow_table *new_table; 450 - 451 - new_table = __flow_tbl_alloc(n_buckets); 452 - if (!new_table) 453 - return ERR_PTR(-ENOMEM); 454 - 455 - flow_table_copy_flows(table, new_table); 456 - 457 - return new_table; 458 - } 459 - 460 - struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) 461 - { 462 - return __flow_tbl_rehash(table, table->n_buckets); 463 - } 464 - 465 - struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) 466 - { 467 - return __flow_tbl_rehash(table, table->n_buckets * 2); 468 - } 469 - 470 - static void __flow_free(struct sw_flow *flow) 471 - { 472 - kfree((struct sf_flow_acts __force *)flow->sf_acts); 473 - kmem_cache_free(flow_cache, flow); 474 - } 475 - 476 - static void rcu_free_flow_callback(struct rcu_head *rcu) 477 - { 478 - struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); 479 - 480 - __flow_free(flow); 481 - } 482 - 483 - void ovs_flow_free(struct sw_flow *flow, bool deferred) 484 - { 485 - if (!flow) 486 - return; 487 - 488 - ovs_sw_flow_mask_del_ref(flow->mask, deferred); 489 - 490 - if (deferred) 491 - call_rcu(&flow->rcu, rcu_free_flow_callback); 492 - else 493 - __flow_free(flow); 494 - } 495 - 496 - /* Schedules 'sf_acts' to be freed after the next RCU grace period. 497 - * The caller must hold rcu_read_lock for this to be sensible. */ 498 - void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) 499 - { 500 - kfree_rcu(sf_acts, rcu); 501 373 } 502 374 503 375 static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) ··· 428 910 struct tcphdr *tcp = tcp_hdr(skb); 429 911 key->ipv4.tp.src = tcp->source; 430 912 key->ipv4.tp.dst = tcp->dest; 913 + key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp); 431 914 } 432 915 } else if (key->ip.proto == IPPROTO_UDP) { 433 916 if (udphdr_ok(skb)) { ··· 497 978 struct tcphdr *tcp = tcp_hdr(skb); 498 979 key->ipv6.tp.src = tcp->source; 499 980 key->ipv6.tp.dst = tcp->dest; 981 + key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp); 500 982 } 501 983 } else if (key->ip.proto == NEXTHDR_UDP) { 502 984 if (udphdr_ok(skb)) { ··· 521 1001 } 522 1002 523 1003 return 0; 524 - } 525 - 526 - static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, 527 - int key_end) 528 - { 529 - u32 *hash_key = (u32 *)((u8 *)key + key_start); 530 - int hash_u32s = (key_end - key_start) >> 2; 531 - 532 - /* Make sure number of hash bytes are multiple of u32. */ 533 - BUILD_BUG_ON(sizeof(long) % sizeof(u32)); 534 - 535 - return jhash2(hash_key, hash_u32s, 0); 536 - } 537 - 538 - static int flow_key_start(const struct sw_flow_key *key) 539 - { 540 - if (key->tun_key.ipv4_dst) 541 - return 0; 542 - else 543 - return rounddown(offsetof(struct sw_flow_key, phy), 544 - sizeof(long)); 545 - } 546 - 547 - static bool __cmp_key(const struct sw_flow_key *key1, 548 - const struct sw_flow_key *key2, int key_start, int key_end) 549 - { 550 - const long *cp1 = (long *)((u8 *)key1 + key_start); 551 - const long *cp2 = (long *)((u8 *)key2 + key_start); 552 - long diffs = 0; 553 - int i; 554 - 555 - for (i = key_start; i < key_end; i += sizeof(long)) 556 - diffs |= *cp1++ ^ *cp2++; 557 - 558 - return diffs == 0; 559 - } 560 - 561 - static bool __flow_cmp_masked_key(const struct sw_flow *flow, 562 - const struct sw_flow_key *key, int key_start, int key_end) 563 - { 564 - return __cmp_key(&flow->key, key, key_start, key_end); 565 - } 566 - 567 - static bool __flow_cmp_unmasked_key(const struct sw_flow *flow, 568 - const struct sw_flow_key *key, int key_start, int key_end) 569 - { 570 - return __cmp_key(&flow->unmasked_key, key, key_start, key_end); 571 - } 572 - 573 - bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, 574 - const struct sw_flow_key *key, int key_end) 575 - { 576 - int key_start; 577 - key_start = flow_key_start(key); 578 - 579 - return __flow_cmp_unmasked_key(flow, key, key_start, key_end); 580 - 581 - } 582 - 583 - struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, 584 - struct sw_flow_match *match) 585 - { 586 - struct sw_flow_key *unmasked = match->key; 587 - int key_end = match->range.end; 588 - struct sw_flow *flow; 589 - 590 - flow = ovs_flow_lookup(table, unmasked); 591 - if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end))) 592 - flow = NULL; 593 - 594 - return flow; 595 - } 596 - 597 - static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table, 598 - const struct sw_flow_key *unmasked, 599 - struct sw_flow_mask *mask) 600 - { 601 - struct sw_flow *flow; 602 - struct hlist_head *head; 603 - int key_start = mask->range.start; 604 - int key_end = mask->range.end; 605 - u32 hash; 606 - struct sw_flow_key masked_key; 607 - 608 - ovs_flow_key_mask(&masked_key, unmasked, mask); 609 - hash = ovs_flow_hash(&masked_key, key_start, key_end); 610 - head = find_bucket(table, hash); 611 - hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { 612 - if (flow->mask == mask && 613 - __flow_cmp_masked_key(flow, &masked_key, 614 - key_start, key_end)) 615 - return flow; 616 - } 617 - return NULL; 618 - } 619 - 620 - struct sw_flow *ovs_flow_lookup(struct flow_table *tbl, 621 - const struct sw_flow_key *key) 622 - { 623 - struct sw_flow *flow = NULL; 624 - struct sw_flow_mask *mask; 625 - 626 - list_for_each_entry_rcu(mask, tbl->mask_list, list) { 627 - flow = ovs_masked_flow_lookup(tbl, key, mask); 628 - if (flow) /* Found */ 629 - break; 630 - } 631 - 632 - return flow; 633 - } 634 - 635 - 636 - void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow) 637 - { 638 - flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start, 639 - flow->mask->range.end); 640 - __tbl_insert(table, flow); 641 - } 642 - 643 - void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow) 644 - { 645 - BUG_ON(table->count == 0); 646 - hlist_del_rcu(&flow->hash_node[table->node_ver]); 647 - table->count--; 648 - } 649 - 650 - /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 651 - const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 652 - [OVS_KEY_ATTR_ENCAP] = -1, 653 - [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), 654 - [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), 655 - [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), 656 - [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), 657 - [OVS_KEY_ATTR_VLAN] = sizeof(__be16), 658 - [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), 659 - [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), 660 - [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), 661 - [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), 662 - [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), 663 - [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), 664 - [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), 665 - [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 666 - [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 667 - [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), 668 - [OVS_KEY_ATTR_TUNNEL] = -1, 669 - }; 670 - 671 - static bool is_all_zero(const u8 *fp, size_t size) 672 - { 673 - int i; 674 - 675 - if (!fp) 676 - return false; 677 - 678 - for (i = 0; i < size; i++) 679 - if (fp[i]) 680 - return false; 681 - 682 - return true; 683 - } 684 - 685 - static int __parse_flow_nlattrs(const struct nlattr *attr, 686 - const struct nlattr *a[], 687 - u64 *attrsp, bool nz) 688 - { 689 - const struct nlattr *nla; 690 - u32 attrs; 691 - int rem; 692 - 693 - attrs = *attrsp; 694 - nla_for_each_nested(nla, attr, rem) { 695 - u16 type = nla_type(nla); 696 - int expected_len; 697 - 698 - if (type > OVS_KEY_ATTR_MAX) { 699 - OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", 700 - type, OVS_KEY_ATTR_MAX); 701 - return -EINVAL; 702 - } 703 - 704 - if (attrs & (1 << type)) { 705 - OVS_NLERR("Duplicate key attribute (type %d).\n", type); 706 - return -EINVAL; 707 - } 708 - 709 - expected_len = ovs_key_lens[type]; 710 - if (nla_len(nla) != expected_len && expected_len != -1) { 711 - OVS_NLERR("Key attribute has unexpected length (type=%d" 712 - ", length=%d, expected=%d).\n", type, 713 - nla_len(nla), expected_len); 714 - return -EINVAL; 715 - } 716 - 717 - if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 718 - attrs |= 1 << type; 719 - a[type] = nla; 720 - } 721 - } 722 - if (rem) { 723 - OVS_NLERR("Message has %d unknown bytes.\n", rem); 724 - return -EINVAL; 725 - } 726 - 727 - *attrsp = attrs; 728 - return 0; 729 - } 730 - 731 - static int parse_flow_mask_nlattrs(const struct nlattr *attr, 732 - const struct nlattr *a[], u64 *attrsp) 733 - { 734 - return __parse_flow_nlattrs(attr, a, attrsp, true); 735 - } 736 - 737 - static int parse_flow_nlattrs(const struct nlattr *attr, 738 - const struct nlattr *a[], u64 *attrsp) 739 - { 740 - return __parse_flow_nlattrs(attr, a, attrsp, false); 741 - } 742 - 743 - int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, 744 - struct sw_flow_match *match, bool is_mask) 745 - { 746 - struct nlattr *a; 747 - int rem; 748 - bool ttl = false; 749 - __be16 tun_flags = 0; 750 - 751 - nla_for_each_nested(a, attr, rem) { 752 - int type = nla_type(a); 753 - static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 754 - [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), 755 - [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), 756 - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), 757 - [OVS_TUNNEL_KEY_ATTR_TOS] = 1, 758 - [OVS_TUNNEL_KEY_ATTR_TTL] = 1, 759 - [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, 760 - [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 761 - }; 762 - 763 - if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 764 - OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", 765 - type, OVS_TUNNEL_KEY_ATTR_MAX); 766 - return -EINVAL; 767 - } 768 - 769 - if (ovs_tunnel_key_lens[type] != nla_len(a)) { 770 - OVS_NLERR("IPv4 tunnel attribute type has unexpected " 771 - " length (type=%d, length=%d, expected=%d).\n", 772 - type, nla_len(a), ovs_tunnel_key_lens[type]); 773 - return -EINVAL; 774 - } 775 - 776 - switch (type) { 777 - case OVS_TUNNEL_KEY_ATTR_ID: 778 - SW_FLOW_KEY_PUT(match, tun_key.tun_id, 779 - nla_get_be64(a), is_mask); 780 - tun_flags |= TUNNEL_KEY; 781 - break; 782 - case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 783 - SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, 784 - nla_get_be32(a), is_mask); 785 - break; 786 - case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 787 - SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, 788 - nla_get_be32(a), is_mask); 789 - break; 790 - case OVS_TUNNEL_KEY_ATTR_TOS: 791 - SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, 792 - nla_get_u8(a), is_mask); 793 - break; 794 - case OVS_TUNNEL_KEY_ATTR_TTL: 795 - SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, 796 - nla_get_u8(a), is_mask); 797 - ttl = true; 798 - break; 799 - case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 800 - tun_flags |= TUNNEL_DONT_FRAGMENT; 801 - break; 802 - case OVS_TUNNEL_KEY_ATTR_CSUM: 803 - tun_flags |= TUNNEL_CSUM; 804 - break; 805 - default: 806 - return -EINVAL; 807 - } 808 - } 809 - 810 - SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 811 - 812 - if (rem > 0) { 813 - OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); 814 - return -EINVAL; 815 - } 816 - 817 - if (!is_mask) { 818 - if (!match->key->tun_key.ipv4_dst) { 819 - OVS_NLERR("IPv4 tunnel destination address is zero.\n"); 820 - return -EINVAL; 821 - } 822 - 823 - if (!ttl) { 824 - OVS_NLERR("IPv4 tunnel TTL not specified.\n"); 825 - return -EINVAL; 826 - } 827 - } 828 - 829 - return 0; 830 - } 831 - 832 - int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, 833 - const struct ovs_key_ipv4_tunnel *tun_key, 834 - const struct ovs_key_ipv4_tunnel *output) 835 - { 836 - struct nlattr *nla; 837 - 838 - nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 839 - if (!nla) 840 - return -EMSGSIZE; 841 - 842 - if (output->tun_flags & TUNNEL_KEY && 843 - nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) 844 - return -EMSGSIZE; 845 - if (output->ipv4_src && 846 - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) 847 - return -EMSGSIZE; 848 - if (output->ipv4_dst && 849 - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) 850 - return -EMSGSIZE; 851 - if (output->ipv4_tos && 852 - nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) 853 - return -EMSGSIZE; 854 - if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) 855 - return -EMSGSIZE; 856 - if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 857 - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 858 - return -EMSGSIZE; 859 - if ((output->tun_flags & TUNNEL_CSUM) && 860 - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 861 - return -EMSGSIZE; 862 - 863 - nla_nest_end(skb, nla); 864 - return 0; 865 - } 866 - 867 - static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, 868 - const struct nlattr **a, bool is_mask) 869 - { 870 - if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 871 - SW_FLOW_KEY_PUT(match, phy.priority, 872 - nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 873 - *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 874 - } 875 - 876 - if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 877 - u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 878 - 879 - if (is_mask) 880 - in_port = 0xffffffff; /* Always exact match in_port. */ 881 - else if (in_port >= DP_MAX_PORTS) 882 - return -EINVAL; 883 - 884 - SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 885 - *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 886 - } else if (!is_mask) { 887 - SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 888 - } 889 - 890 - if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 891 - uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 892 - 893 - SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 894 - *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 895 - } 896 - if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 897 - if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 898 - is_mask)) 899 - return -EINVAL; 900 - *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 901 - } 902 - return 0; 903 - } 904 - 905 - static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, 906 - const struct nlattr **a, bool is_mask) 907 - { 908 - int err; 909 - u64 orig_attrs = attrs; 910 - 911 - err = metadata_from_nlattrs(match, &attrs, a, is_mask); 912 - if (err) 913 - return err; 914 - 915 - if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 916 - const struct ovs_key_ethernet *eth_key; 917 - 918 - eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 919 - SW_FLOW_KEY_MEMCPY(match, eth.src, 920 - eth_key->eth_src, ETH_ALEN, is_mask); 921 - SW_FLOW_KEY_MEMCPY(match, eth.dst, 922 - eth_key->eth_dst, ETH_ALEN, is_mask); 923 - attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 924 - } 925 - 926 - if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 927 - __be16 tci; 928 - 929 - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 930 - if (!(tci & htons(VLAN_TAG_PRESENT))) { 931 - if (is_mask) 932 - OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); 933 - else 934 - OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); 935 - 936 - return -EINVAL; 937 - } 938 - 939 - SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); 940 - attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 941 - } else if (!is_mask) 942 - SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); 943 - 944 - if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 945 - __be16 eth_type; 946 - 947 - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 948 - if (is_mask) { 949 - /* Always exact match EtherType. */ 950 - eth_type = htons(0xffff); 951 - } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { 952 - OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", 953 - ntohs(eth_type), ETH_P_802_3_MIN); 954 - return -EINVAL; 955 - } 956 - 957 - SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 958 - attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 959 - } else if (!is_mask) { 960 - SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 961 - } 962 - 963 - if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 964 - const struct ovs_key_ipv4 *ipv4_key; 965 - 966 - ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 967 - if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 968 - OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", 969 - ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 970 - return -EINVAL; 971 - } 972 - SW_FLOW_KEY_PUT(match, ip.proto, 973 - ipv4_key->ipv4_proto, is_mask); 974 - SW_FLOW_KEY_PUT(match, ip.tos, 975 - ipv4_key->ipv4_tos, is_mask); 976 - SW_FLOW_KEY_PUT(match, ip.ttl, 977 - ipv4_key->ipv4_ttl, is_mask); 978 - SW_FLOW_KEY_PUT(match, ip.frag, 979 - ipv4_key->ipv4_frag, is_mask); 980 - SW_FLOW_KEY_PUT(match, ipv4.addr.src, 981 - ipv4_key->ipv4_src, is_mask); 982 - SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 983 - ipv4_key->ipv4_dst, is_mask); 984 - attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 985 - } 986 - 987 - if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 988 - const struct ovs_key_ipv6 *ipv6_key; 989 - 990 - ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 991 - if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 992 - OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", 993 - ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 994 - return -EINVAL; 995 - } 996 - SW_FLOW_KEY_PUT(match, ipv6.label, 997 - ipv6_key->ipv6_label, is_mask); 998 - SW_FLOW_KEY_PUT(match, ip.proto, 999 - ipv6_key->ipv6_proto, is_mask); 1000 - SW_FLOW_KEY_PUT(match, ip.tos, 1001 - ipv6_key->ipv6_tclass, is_mask); 1002 - SW_FLOW_KEY_PUT(match, ip.ttl, 1003 - ipv6_key->ipv6_hlimit, is_mask); 1004 - SW_FLOW_KEY_PUT(match, ip.frag, 1005 - ipv6_key->ipv6_frag, is_mask); 1006 - SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 1007 - ipv6_key->ipv6_src, 1008 - sizeof(match->key->ipv6.addr.src), 1009 - is_mask); 1010 - SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 1011 - ipv6_key->ipv6_dst, 1012 - sizeof(match->key->ipv6.addr.dst), 1013 - is_mask); 1014 - 1015 - attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 1016 - } 1017 - 1018 - if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 1019 - const struct ovs_key_arp *arp_key; 1020 - 1021 - arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 1022 - if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 1023 - OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", 1024 - arp_key->arp_op); 1025 - return -EINVAL; 1026 - } 1027 - 1028 - SW_FLOW_KEY_PUT(match, ipv4.addr.src, 1029 - arp_key->arp_sip, is_mask); 1030 - SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 1031 - arp_key->arp_tip, is_mask); 1032 - SW_FLOW_KEY_PUT(match, ip.proto, 1033 - ntohs(arp_key->arp_op), is_mask); 1034 - SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 1035 - arp_key->arp_sha, ETH_ALEN, is_mask); 1036 - SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 1037 - arp_key->arp_tha, ETH_ALEN, is_mask); 1038 - 1039 - attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1040 - } 1041 - 1042 - if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 1043 - const struct ovs_key_tcp *tcp_key; 1044 - 1045 - tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 1046 - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1047 - SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1048 - tcp_key->tcp_src, is_mask); 1049 - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1050 - tcp_key->tcp_dst, is_mask); 1051 - } else { 1052 - SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1053 - tcp_key->tcp_src, is_mask); 1054 - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1055 - tcp_key->tcp_dst, is_mask); 1056 - } 1057 - attrs &= ~(1 << OVS_KEY_ATTR_TCP); 1058 - } 1059 - 1060 - if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 1061 - const struct ovs_key_udp *udp_key; 1062 - 1063 - udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 1064 - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1065 - SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1066 - udp_key->udp_src, is_mask); 1067 - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1068 - udp_key->udp_dst, is_mask); 1069 - } else { 1070 - SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1071 - udp_key->udp_src, is_mask); 1072 - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1073 - udp_key->udp_dst, is_mask); 1074 - } 1075 - attrs &= ~(1 << OVS_KEY_ATTR_UDP); 1076 - } 1077 - 1078 - if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 1079 - const struct ovs_key_sctp *sctp_key; 1080 - 1081 - sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 1082 - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 1083 - SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1084 - sctp_key->sctp_src, is_mask); 1085 - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1086 - sctp_key->sctp_dst, is_mask); 1087 - } else { 1088 - SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1089 - sctp_key->sctp_src, is_mask); 1090 - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1091 - sctp_key->sctp_dst, is_mask); 1092 - } 1093 - attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 1094 - } 1095 - 1096 - if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 1097 - const struct ovs_key_icmp *icmp_key; 1098 - 1099 - icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 1100 - SW_FLOW_KEY_PUT(match, ipv4.tp.src, 1101 - htons(icmp_key->icmp_type), is_mask); 1102 - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 1103 - htons(icmp_key->icmp_code), is_mask); 1104 - attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 1105 - } 1106 - 1107 - if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 1108 - const struct ovs_key_icmpv6 *icmpv6_key; 1109 - 1110 - icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 1111 - SW_FLOW_KEY_PUT(match, ipv6.tp.src, 1112 - htons(icmpv6_key->icmpv6_type), is_mask); 1113 - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 1114 - htons(icmpv6_key->icmpv6_code), is_mask); 1115 - attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 1116 - } 1117 - 1118 - if (attrs & (1 << OVS_KEY_ATTR_ND)) { 1119 - const struct ovs_key_nd *nd_key; 1120 - 1121 - nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 1122 - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 1123 - nd_key->nd_target, 1124 - sizeof(match->key->ipv6.nd.target), 1125 - is_mask); 1126 - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 1127 - nd_key->nd_sll, ETH_ALEN, is_mask); 1128 - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 1129 - nd_key->nd_tll, ETH_ALEN, is_mask); 1130 - attrs &= ~(1 << OVS_KEY_ATTR_ND); 1131 - } 1132 - 1133 - if (attrs != 0) 1134 - return -EINVAL; 1135 - 1136 - return 0; 1137 - } 1138 - 1139 - /** 1140 - * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and 1141 - * mask. In case the 'mask' is NULL, the flow is treated as exact match 1142 - * flow. Otherwise, it is treated as a wildcarded flow, except the mask 1143 - * does not include any don't care bit. 1144 - * @match: receives the extracted flow match information. 1145 - * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1146 - * sequence. The fields should of the packet that triggered the creation 1147 - * of this flow. 1148 - * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 1149 - * attribute specifies the mask field of the wildcarded flow. 1150 - */ 1151 - int ovs_match_from_nlattrs(struct sw_flow_match *match, 1152 - const struct nlattr *key, 1153 - const struct nlattr *mask) 1154 - { 1155 - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1156 - const struct nlattr *encap; 1157 - u64 key_attrs = 0; 1158 - u64 mask_attrs = 0; 1159 - bool encap_valid = false; 1160 - int err; 1161 - 1162 - err = parse_flow_nlattrs(key, a, &key_attrs); 1163 - if (err) 1164 - return err; 1165 - 1166 - if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 1167 - (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 1168 - (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { 1169 - __be16 tci; 1170 - 1171 - if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 1172 - (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 1173 - OVS_NLERR("Invalid Vlan frame.\n"); 1174 - return -EINVAL; 1175 - } 1176 - 1177 - key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1178 - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1179 - encap = a[OVS_KEY_ATTR_ENCAP]; 1180 - key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 1181 - encap_valid = true; 1182 - 1183 - if (tci & htons(VLAN_TAG_PRESENT)) { 1184 - err = parse_flow_nlattrs(encap, a, &key_attrs); 1185 - if (err) 1186 - return err; 1187 - } else if (!tci) { 1188 - /* Corner case for truncated 802.1Q header. */ 1189 - if (nla_len(encap)) { 1190 - OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); 1191 - return -EINVAL; 1192 - } 1193 - } else { 1194 - OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); 1195 - return -EINVAL; 1196 - } 1197 - } 1198 - 1199 - err = ovs_key_from_nlattrs(match, key_attrs, a, false); 1200 - if (err) 1201 - return err; 1202 - 1203 - if (mask) { 1204 - err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); 1205 - if (err) 1206 - return err; 1207 - 1208 - if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) { 1209 - __be16 eth_type = 0; 1210 - __be16 tci = 0; 1211 - 1212 - if (!encap_valid) { 1213 - OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); 1214 - return -EINVAL; 1215 - } 1216 - 1217 - mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 1218 - if (a[OVS_KEY_ATTR_ETHERTYPE]) 1219 - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1220 - 1221 - if (eth_type == htons(0xffff)) { 1222 - mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1223 - encap = a[OVS_KEY_ATTR_ENCAP]; 1224 - err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); 1225 - } else { 1226 - OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", 1227 - ntohs(eth_type)); 1228 - return -EINVAL; 1229 - } 1230 - 1231 - if (a[OVS_KEY_ATTR_VLAN]) 1232 - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1233 - 1234 - if (!(tci & htons(VLAN_TAG_PRESENT))) { 1235 - OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); 1236 - return -EINVAL; 1237 - } 1238 - } 1239 - 1240 - err = ovs_key_from_nlattrs(match, mask_attrs, a, true); 1241 - if (err) 1242 - return err; 1243 - } else { 1244 - /* Populate exact match flow's key mask. */ 1245 - if (match->mask) 1246 - ovs_sw_flow_mask_set(match->mask, &match->range, 0xff); 1247 - } 1248 - 1249 - if (!ovs_match_validate(match, key_attrs, mask_attrs)) 1250 - return -EINVAL; 1251 - 1252 - return 0; 1253 - } 1254 - 1255 - /** 1256 - * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. 1257 - * @flow: Receives extracted in_port, priority, tun_key and skb_mark. 1258 - * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1259 - * sequence. 1260 - * 1261 - * This parses a series of Netlink attributes that form a flow key, which must 1262 - * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1263 - * get the metadata, that is, the parts of the flow key that cannot be 1264 - * extracted from the packet itself. 1265 - */ 1266 - 1267 - int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, 1268 - const struct nlattr *attr) 1269 - { 1270 - struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; 1271 - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1272 - u64 attrs = 0; 1273 - int err; 1274 - struct sw_flow_match match; 1275 - 1276 - flow->key.phy.in_port = DP_MAX_PORTS; 1277 - flow->key.phy.priority = 0; 1278 - flow->key.phy.skb_mark = 0; 1279 - memset(tun_key, 0, sizeof(flow->key.tun_key)); 1280 - 1281 - err = parse_flow_nlattrs(attr, a, &attrs); 1282 - if (err) 1283 - return -EINVAL; 1284 - 1285 - memset(&match, 0, sizeof(match)); 1286 - match.key = &flow->key; 1287 - 1288 - err = metadata_from_nlattrs(&match, &attrs, a, false); 1289 - if (err) 1290 - return err; 1291 - 1292 - return 0; 1293 - } 1294 - 1295 - int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, 1296 - const struct sw_flow_key *output, struct sk_buff *skb) 1297 - { 1298 - struct ovs_key_ethernet *eth_key; 1299 - struct nlattr *nla, *encap; 1300 - bool is_mask = (swkey != output); 1301 - 1302 - if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 1303 - goto nla_put_failure; 1304 - 1305 - if ((swkey->tun_key.ipv4_dst || is_mask) && 1306 - ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) 1307 - goto nla_put_failure; 1308 - 1309 - if (swkey->phy.in_port == DP_MAX_PORTS) { 1310 - if (is_mask && (output->phy.in_port == 0xffff)) 1311 - if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 1312 - goto nla_put_failure; 1313 - } else { 1314 - u16 upper_u16; 1315 - upper_u16 = !is_mask ? 0 : 0xffff; 1316 - 1317 - if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 1318 - (upper_u16 << 16) | output->phy.in_port)) 1319 - goto nla_put_failure; 1320 - } 1321 - 1322 - if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 1323 - goto nla_put_failure; 1324 - 1325 - nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1326 - if (!nla) 1327 - goto nla_put_failure; 1328 - 1329 - eth_key = nla_data(nla); 1330 - memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); 1331 - memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); 1332 - 1333 - if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 1334 - __be16 eth_type; 1335 - eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); 1336 - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 1337 - nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) 1338 - goto nla_put_failure; 1339 - encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1340 - if (!swkey->eth.tci) 1341 - goto unencap; 1342 - } else 1343 - encap = NULL; 1344 - 1345 - if (swkey->eth.type == htons(ETH_P_802_2)) { 1346 - /* 1347 - * Ethertype 802.2 is represented in the netlink with omitted 1348 - * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 1349 - * 0xffff in the mask attribute. Ethertype can also 1350 - * be wildcarded. 1351 - */ 1352 - if (is_mask && output->eth.type) 1353 - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 1354 - output->eth.type)) 1355 - goto nla_put_failure; 1356 - goto unencap; 1357 - } 1358 - 1359 - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 1360 - goto nla_put_failure; 1361 - 1362 - if (swkey->eth.type == htons(ETH_P_IP)) { 1363 - struct ovs_key_ipv4 *ipv4_key; 1364 - 1365 - nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 1366 - if (!nla) 1367 - goto nla_put_failure; 1368 - ipv4_key = nla_data(nla); 1369 - ipv4_key->ipv4_src = output->ipv4.addr.src; 1370 - ipv4_key->ipv4_dst = output->ipv4.addr.dst; 1371 - ipv4_key->ipv4_proto = output->ip.proto; 1372 - ipv4_key->ipv4_tos = output->ip.tos; 1373 - ipv4_key->ipv4_ttl = output->ip.ttl; 1374 - ipv4_key->ipv4_frag = output->ip.frag; 1375 - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1376 - struct ovs_key_ipv6 *ipv6_key; 1377 - 1378 - nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 1379 - if (!nla) 1380 - goto nla_put_failure; 1381 - ipv6_key = nla_data(nla); 1382 - memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 1383 - sizeof(ipv6_key->ipv6_src)); 1384 - memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 1385 - sizeof(ipv6_key->ipv6_dst)); 1386 - ipv6_key->ipv6_label = output->ipv6.label; 1387 - ipv6_key->ipv6_proto = output->ip.proto; 1388 - ipv6_key->ipv6_tclass = output->ip.tos; 1389 - ipv6_key->ipv6_hlimit = output->ip.ttl; 1390 - ipv6_key->ipv6_frag = output->ip.frag; 1391 - } else if (swkey->eth.type == htons(ETH_P_ARP) || 1392 - swkey->eth.type == htons(ETH_P_RARP)) { 1393 - struct ovs_key_arp *arp_key; 1394 - 1395 - nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1396 - if (!nla) 1397 - goto nla_put_failure; 1398 - arp_key = nla_data(nla); 1399 - memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1400 - arp_key->arp_sip = output->ipv4.addr.src; 1401 - arp_key->arp_tip = output->ipv4.addr.dst; 1402 - arp_key->arp_op = htons(output->ip.proto); 1403 - memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); 1404 - memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); 1405 - } 1406 - 1407 - if ((swkey->eth.type == htons(ETH_P_IP) || 1408 - swkey->eth.type == htons(ETH_P_IPV6)) && 1409 - swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1410 - 1411 - if (swkey->ip.proto == IPPROTO_TCP) { 1412 - struct ovs_key_tcp *tcp_key; 1413 - 1414 - nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1415 - if (!nla) 1416 - goto nla_put_failure; 1417 - tcp_key = nla_data(nla); 1418 - if (swkey->eth.type == htons(ETH_P_IP)) { 1419 - tcp_key->tcp_src = output->ipv4.tp.src; 1420 - tcp_key->tcp_dst = output->ipv4.tp.dst; 1421 - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1422 - tcp_key->tcp_src = output->ipv6.tp.src; 1423 - tcp_key->tcp_dst = output->ipv6.tp.dst; 1424 - } 1425 - } else if (swkey->ip.proto == IPPROTO_UDP) { 1426 - struct ovs_key_udp *udp_key; 1427 - 1428 - nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1429 - if (!nla) 1430 - goto nla_put_failure; 1431 - udp_key = nla_data(nla); 1432 - if (swkey->eth.type == htons(ETH_P_IP)) { 1433 - udp_key->udp_src = output->ipv4.tp.src; 1434 - udp_key->udp_dst = output->ipv4.tp.dst; 1435 - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1436 - udp_key->udp_src = output->ipv6.tp.src; 1437 - udp_key->udp_dst = output->ipv6.tp.dst; 1438 - } 1439 - } else if (swkey->ip.proto == IPPROTO_SCTP) { 1440 - struct ovs_key_sctp *sctp_key; 1441 - 1442 - nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1443 - if (!nla) 1444 - goto nla_put_failure; 1445 - sctp_key = nla_data(nla); 1446 - if (swkey->eth.type == htons(ETH_P_IP)) { 1447 - sctp_key->sctp_src = swkey->ipv4.tp.src; 1448 - sctp_key->sctp_dst = swkey->ipv4.tp.dst; 1449 - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1450 - sctp_key->sctp_src = swkey->ipv6.tp.src; 1451 - sctp_key->sctp_dst = swkey->ipv6.tp.dst; 1452 - } 1453 - } else if (swkey->eth.type == htons(ETH_P_IP) && 1454 - swkey->ip.proto == IPPROTO_ICMP) { 1455 - struct ovs_key_icmp *icmp_key; 1456 - 1457 - nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1458 - if (!nla) 1459 - goto nla_put_failure; 1460 - icmp_key = nla_data(nla); 1461 - icmp_key->icmp_type = ntohs(output->ipv4.tp.src); 1462 - icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); 1463 - } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1464 - swkey->ip.proto == IPPROTO_ICMPV6) { 1465 - struct ovs_key_icmpv6 *icmpv6_key; 1466 - 1467 - nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1468 - sizeof(*icmpv6_key)); 1469 - if (!nla) 1470 - goto nla_put_failure; 1471 - icmpv6_key = nla_data(nla); 1472 - icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); 1473 - icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); 1474 - 1475 - if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1476 - icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1477 - struct ovs_key_nd *nd_key; 1478 - 1479 - nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1480 - if (!nla) 1481 - goto nla_put_failure; 1482 - nd_key = nla_data(nla); 1483 - memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1484 - sizeof(nd_key->nd_target)); 1485 - memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); 1486 - memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); 1487 - } 1488 - } 1489 - } 1490 - 1491 - unencap: 1492 - if (encap) 1493 - nla_nest_end(skb, encap); 1494 - 1495 - return 0; 1496 - 1497 - nla_put_failure: 1498 - return -EMSGSIZE; 1499 - } 1500 - 1501 - /* Initializes the flow module. 1502 - * Returns zero if successful or a negative error code. */ 1503 - int ovs_flow_init(void) 1504 - { 1505 - BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); 1506 - BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); 1507 - 1508 - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, 1509 - 0, NULL); 1510 - if (flow_cache == NULL) 1511 - return -ENOMEM; 1512 - 1513 - return 0; 1514 - } 1515 - 1516 - /* Uninitializes the flow module. */ 1517 - void ovs_flow_exit(void) 1518 - { 1519 - kmem_cache_destroy(flow_cache); 1520 - } 1521 - 1522 - struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) 1523 - { 1524 - struct sw_flow_mask *mask; 1525 - 1526 - mask = kmalloc(sizeof(*mask), GFP_KERNEL); 1527 - if (mask) 1528 - mask->ref_count = 0; 1529 - 1530 - return mask; 1531 - } 1532 - 1533 - void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask) 1534 - { 1535 - mask->ref_count++; 1536 - } 1537 - 1538 - void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) 1539 - { 1540 - if (!mask) 1541 - return; 1542 - 1543 - BUG_ON(!mask->ref_count); 1544 - mask->ref_count--; 1545 - 1546 - if (!mask->ref_count) { 1547 - list_del_rcu(&mask->list); 1548 - if (deferred) 1549 - kfree_rcu(mask, rcu); 1550 - else 1551 - kfree(mask); 1552 - } 1553 - } 1554 - 1555 - static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a, 1556 - const struct sw_flow_mask *b) 1557 - { 1558 - u8 *a_ = (u8 *)&a->key + a->range.start; 1559 - u8 *b_ = (u8 *)&b->key + b->range.start; 1560 - 1561 - return (a->range.end == b->range.end) 1562 - && (a->range.start == b->range.start) 1563 - && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); 1564 - } 1565 - 1566 - struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, 1567 - const struct sw_flow_mask *mask) 1568 - { 1569 - struct list_head *ml; 1570 - 1571 - list_for_each(ml, tbl->mask_list) { 1572 - struct sw_flow_mask *m; 1573 - m = container_of(ml, struct sw_flow_mask, list); 1574 - if (ovs_sw_flow_mask_equal(mask, m)) 1575 - return m; 1576 - } 1577 - 1578 - return NULL; 1579 - } 1580 - 1581 - /** 1582 - * add a new mask into the mask list. 1583 - * The caller needs to make sure that 'mask' is not the same 1584 - * as any masks that are already on the list. 1585 - */ 1586 - void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) 1587 - { 1588 - list_add_rcu(&mask->list, tbl->mask_list); 1589 - } 1590 - 1591 - /** 1592 - * Set 'range' fields in the mask to the value of 'val'. 1593 - */ 1594 - static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, 1595 - struct sw_flow_key_range *range, u8 val) 1596 - { 1597 - u8 *m = (u8 *)&mask->key + range->start; 1598 - 1599 - mask->range = *range; 1600 - memset(m, val, range_n_bytes(range)); 1601 1004 }
+29 -103
net/openvswitch/flow.h
··· 33 33 #include <net/inet_ecn.h> 34 34 35 35 struct sk_buff; 36 - struct sw_flow_mask; 37 - struct flow_table; 38 - 39 - struct sw_flow_actions { 40 - struct rcu_head rcu; 41 - u32 actions_len; 42 - struct nlattr actions[]; 43 - }; 44 36 45 37 /* Used to memset ovs_key_ipv4_tunnel padding. */ 46 38 #define OVS_TUNNEL_KEY_SIZE \ ··· 93 101 struct { 94 102 __be16 src; /* TCP/UDP/SCTP source port. */ 95 103 __be16 dst; /* TCP/UDP/SCTP destination port. */ 104 + __be16 flags; /* TCP flags. */ 96 105 } tp; 97 106 struct { 98 107 u8 sha[ETH_ALEN]; /* ARP source hardware address. */ ··· 110 117 struct { 111 118 __be16 src; /* TCP/UDP/SCTP source port. */ 112 119 __be16 dst; /* TCP/UDP/SCTP destination port. */ 120 + __be16 flags; /* TCP flags. */ 113 121 } tp; 114 122 struct { 115 123 struct in6_addr target; /* ND target address. */ ··· 120 126 } ipv6; 121 127 }; 122 128 } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ 129 + 130 + struct sw_flow_key_range { 131 + size_t start; 132 + size_t end; 133 + }; 134 + 135 + struct sw_flow_mask { 136 + int ref_count; 137 + struct rcu_head rcu; 138 + struct list_head list; 139 + struct sw_flow_key_range range; 140 + struct sw_flow_key key; 141 + }; 142 + 143 + struct sw_flow_match { 144 + struct sw_flow_key *key; 145 + struct sw_flow_key_range range; 146 + struct sw_flow_mask *mask; 147 + }; 148 + 149 + struct sw_flow_actions { 150 + struct rcu_head rcu; 151 + u32 actions_len; 152 + struct nlattr actions[]; 153 + }; 123 154 124 155 struct sw_flow { 125 156 struct rcu_head rcu; ··· 160 141 unsigned long used; /* Last used time (in jiffies). */ 161 142 u64 packet_count; /* Number of packets matched. */ 162 143 u64 byte_count; /* Number of bytes matched. */ 163 - u8 tcp_flags; /* Union of seen TCP flags. */ 144 + __be16 tcp_flags; /* Union of seen TCP flags. */ 164 145 }; 165 - 166 - struct sw_flow_key_range { 167 - size_t start; 168 - size_t end; 169 - }; 170 - 171 - struct sw_flow_match { 172 - struct sw_flow_key *key; 173 - struct sw_flow_key_range range; 174 - struct sw_flow_mask *mask; 175 - }; 176 - 177 - void ovs_match_init(struct sw_flow_match *match, 178 - struct sw_flow_key *key, struct sw_flow_mask *mask); 179 146 180 147 struct arp_eth_header { 181 148 __be16 ar_hrd; /* format of hardware address */ ··· 177 172 unsigned char ar_tip[4]; /* target IP address */ 178 173 } __packed; 179 174 180 - int ovs_flow_init(void); 181 - void ovs_flow_exit(void); 182 - 183 - struct sw_flow *ovs_flow_alloc(void); 184 - void ovs_flow_deferred_free(struct sw_flow *); 185 - void ovs_flow_free(struct sw_flow *, bool deferred); 186 - 187 - struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); 188 - void ovs_flow_deferred_free_acts(struct sw_flow_actions *); 189 - 190 - int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); 191 175 void ovs_flow_used(struct sw_flow *, struct sk_buff *); 192 176 u64 ovs_flow_used_time(unsigned long flow_jiffies); 193 - int ovs_flow_to_nlattrs(const struct sw_flow_key *, 194 - const struct sw_flow_key *, struct sk_buff *); 195 - int ovs_match_from_nlattrs(struct sw_flow_match *match, 196 - const struct nlattr *, 197 - const struct nlattr *); 198 - int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, 199 - const struct nlattr *attr); 200 177 201 - #define MAX_ACTIONS_BUFSIZE (32 * 1024) 202 - #define TBL_MIN_BUCKETS 1024 178 + int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); 203 179 204 - struct flow_table { 205 - struct flex_array *buckets; 206 - unsigned int count, n_buckets; 207 - struct rcu_head rcu; 208 - struct list_head *mask_list; 209 - int node_ver; 210 - u32 hash_seed; 211 - bool keep_flows; 212 - }; 213 - 214 - static inline int ovs_flow_tbl_count(struct flow_table *table) 215 - { 216 - return table->count; 217 - } 218 - 219 - static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table) 220 - { 221 - return (table->count > table->n_buckets); 222 - } 223 - 224 - struct sw_flow *ovs_flow_lookup(struct flow_table *, 225 - const struct sw_flow_key *); 226 - struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, 227 - struct sw_flow_match *match); 228 - 229 - void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); 230 - struct flow_table *ovs_flow_tbl_alloc(int new_size); 231 - struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); 232 - struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); 233 - 234 - void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow); 235 - void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow); 236 - 237 - struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx); 238 - extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; 239 - int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, 240 - struct sw_flow_match *match, bool is_mask); 241 - int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, 242 - const struct ovs_key_ipv4_tunnel *tun_key, 243 - const struct ovs_key_ipv4_tunnel *output); 244 - 245 - bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, 246 - const struct sw_flow_key *key, int key_end); 247 - 248 - struct sw_flow_mask { 249 - int ref_count; 250 - struct rcu_head rcu; 251 - struct list_head list; 252 - struct sw_flow_key_range range; 253 - struct sw_flow_key key; 254 - }; 255 - 256 - struct sw_flow_mask *ovs_sw_flow_mask_alloc(void); 257 - void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *); 258 - void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred); 259 - void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *); 260 - struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *, 261 - const struct sw_flow_mask *); 262 - void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, 263 - const struct sw_flow_mask *mask); 264 180 #endif /* flow.h */
+1630
net/openvswitch/flow_netlink.c
··· 1 + /* 2 + * Copyright (c) 2007-2013 Nicira, Inc. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of version 2 of the GNU General Public 6 + * License as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, but 9 + * WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 + * General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program; if not, write to the Free Software 15 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 + * 02110-1301, USA 17 + */ 18 + 19 + #include "flow.h" 20 + #include "datapath.h" 21 + #include <linux/uaccess.h> 22 + #include <linux/netdevice.h> 23 + #include <linux/etherdevice.h> 24 + #include <linux/if_ether.h> 25 + #include <linux/if_vlan.h> 26 + #include <net/llc_pdu.h> 27 + #include <linux/kernel.h> 28 + #include <linux/jhash.h> 29 + #include <linux/jiffies.h> 30 + #include <linux/llc.h> 31 + #include <linux/module.h> 32 + #include <linux/in.h> 33 + #include <linux/rcupdate.h> 34 + #include <linux/if_arp.h> 35 + #include <linux/ip.h> 36 + #include <linux/ipv6.h> 37 + #include <linux/sctp.h> 38 + #include <linux/tcp.h> 39 + #include <linux/udp.h> 40 + #include <linux/icmp.h> 41 + #include <linux/icmpv6.h> 42 + #include <linux/rculist.h> 43 + #include <net/ip.h> 44 + #include <net/ipv6.h> 45 + #include <net/ndisc.h> 46 + 47 + #include "flow_netlink.h" 48 + 49 + static void update_range__(struct sw_flow_match *match, 50 + size_t offset, size_t size, bool is_mask) 51 + { 52 + struct sw_flow_key_range *range = NULL; 53 + size_t start = rounddown(offset, sizeof(long)); 54 + size_t end = roundup(offset + size, sizeof(long)); 55 + 56 + if (!is_mask) 57 + range = &match->range; 58 + else if (match->mask) 59 + range = &match->mask->range; 60 + 61 + if (!range) 62 + return; 63 + 64 + if (range->start == range->end) { 65 + range->start = start; 66 + range->end = end; 67 + return; 68 + } 69 + 70 + if (range->start > start) 71 + range->start = start; 72 + 73 + if (range->end < end) 74 + range->end = end; 75 + } 76 + 77 + #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 78 + do { \ 79 + update_range__(match, offsetof(struct sw_flow_key, field), \ 80 + sizeof((match)->key->field), is_mask); \ 81 + if (is_mask) { \ 82 + if ((match)->mask) \ 83 + (match)->mask->key.field = value; \ 84 + } else { \ 85 + (match)->key->field = value; \ 86 + } \ 87 + } while (0) 88 + 89 + #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ 90 + do { \ 91 + update_range__(match, offsetof(struct sw_flow_key, field), \ 92 + len, is_mask); \ 93 + if (is_mask) { \ 94 + if ((match)->mask) \ 95 + memcpy(&(match)->mask->key.field, value_p, len);\ 96 + } else { \ 97 + memcpy(&(match)->key->field, value_p, len); \ 98 + } \ 99 + } while (0) 100 + 101 + static u16 range_n_bytes(const struct sw_flow_key_range *range) 102 + { 103 + return range->end - range->start; 104 + } 105 + 106 + static bool match_validate(const struct sw_flow_match *match, 107 + u64 key_attrs, u64 mask_attrs) 108 + { 109 + u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 110 + u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 111 + 112 + /* The following mask attributes allowed only if they 113 + * pass the validation tests. */ 114 + mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) 115 + | (1 << OVS_KEY_ATTR_IPV6) 116 + | (1 << OVS_KEY_ATTR_TCP) 117 + | (1 << OVS_KEY_ATTR_TCP_FLAGS) 118 + | (1 << OVS_KEY_ATTR_UDP) 119 + | (1 << OVS_KEY_ATTR_SCTP) 120 + | (1 << OVS_KEY_ATTR_ICMP) 121 + | (1 << OVS_KEY_ATTR_ICMPV6) 122 + | (1 << OVS_KEY_ATTR_ARP) 123 + | (1 << OVS_KEY_ATTR_ND)); 124 + 125 + /* Always allowed mask fields. */ 126 + mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 127 + | (1 << OVS_KEY_ATTR_IN_PORT) 128 + | (1 << OVS_KEY_ATTR_ETHERTYPE)); 129 + 130 + /* Check key attributes. */ 131 + if (match->key->eth.type == htons(ETH_P_ARP) 132 + || match->key->eth.type == htons(ETH_P_RARP)) { 133 + key_expected |= 1 << OVS_KEY_ATTR_ARP; 134 + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 135 + mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 136 + } 137 + 138 + if (match->key->eth.type == htons(ETH_P_IP)) { 139 + key_expected |= 1 << OVS_KEY_ATTR_IPV4; 140 + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 141 + mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; 142 + 143 + if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 144 + if (match->key->ip.proto == IPPROTO_UDP) { 145 + key_expected |= 1 << OVS_KEY_ATTR_UDP; 146 + if (match->mask && (match->mask->key.ip.proto == 0xff)) 147 + mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 148 + } 149 + 150 + if (match->key->ip.proto == IPPROTO_SCTP) { 151 + key_expected |= 1 << OVS_KEY_ATTR_SCTP; 152 + if (match->mask && (match->mask->key.ip.proto == 0xff)) 153 + mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 154 + } 155 + 156 + if (match->key->ip.proto == IPPROTO_TCP) { 157 + key_expected |= 1 << OVS_KEY_ATTR_TCP; 158 + key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 159 + if (match->mask && (match->mask->key.ip.proto == 0xff)) { 160 + mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 161 + mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 162 + } 163 + } 164 + 165 + if (match->key->ip.proto == IPPROTO_ICMP) { 166 + key_expected |= 1 << OVS_KEY_ATTR_ICMP; 167 + if (match->mask && (match->mask->key.ip.proto == 0xff)) 168 + mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; 169 + } 170 + } 171 + } 172 + 173 + if (match->key->eth.type == htons(ETH_P_IPV6)) { 174 + key_expected |= 1 << OVS_KEY_ATTR_IPV6; 175 + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 176 + mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; 177 + 178 + if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { 179 + if (match->key->ip.proto == IPPROTO_UDP) { 180 + key_expected |= 1 << OVS_KEY_ATTR_UDP; 181 + if (match->mask && (match->mask->key.ip.proto == 0xff)) 182 + mask_allowed |= 1 << OVS_KEY_ATTR_UDP; 183 + } 184 + 185 + if (match->key->ip.proto == IPPROTO_SCTP) { 186 + key_expected |= 1 << OVS_KEY_ATTR_SCTP; 187 + if (match->mask && (match->mask->key.ip.proto == 0xff)) 188 + mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; 189 + } 190 + 191 + if (match->key->ip.proto == IPPROTO_TCP) { 192 + key_expected |= 1 << OVS_KEY_ATTR_TCP; 193 + key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 194 + if (match->mask && (match->mask->key.ip.proto == 0xff)) { 195 + mask_allowed |= 1 << OVS_KEY_ATTR_TCP; 196 + mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; 197 + } 198 + } 199 + 200 + if (match->key->ip.proto == IPPROTO_ICMPV6) { 201 + key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; 202 + if (match->mask && (match->mask->key.ip.proto == 0xff)) 203 + mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; 204 + 205 + if (match->key->ipv6.tp.src == 206 + htons(NDISC_NEIGHBOUR_SOLICITATION) || 207 + match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 208 + key_expected |= 1 << OVS_KEY_ATTR_ND; 209 + if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) 210 + mask_allowed |= 1 << OVS_KEY_ATTR_ND; 211 + } 212 + } 213 + } 214 + } 215 + 216 + if ((key_attrs & key_expected) != key_expected) { 217 + /* Key attributes check failed. */ 218 + OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", 219 + key_attrs, key_expected); 220 + return false; 221 + } 222 + 223 + if ((mask_attrs & mask_allowed) != mask_attrs) { 224 + /* Mask attributes check failed. */ 225 + OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", 226 + mask_attrs, mask_allowed); 227 + return false; 228 + } 229 + 230 + return true; 231 + } 232 + 233 + /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 234 + static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 235 + [OVS_KEY_ATTR_ENCAP] = -1, 236 + [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), 237 + [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), 238 + [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), 239 + [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), 240 + [OVS_KEY_ATTR_VLAN] = sizeof(__be16), 241 + [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), 242 + [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), 243 + [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), 244 + [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), 245 + [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), 246 + [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), 247 + [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), 248 + [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), 249 + [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 250 + [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 251 + [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), 252 + [OVS_KEY_ATTR_TUNNEL] = -1, 253 + }; 254 + 255 + static bool is_all_zero(const u8 *fp, size_t size) 256 + { 257 + int i; 258 + 259 + if (!fp) 260 + return false; 261 + 262 + for (i = 0; i < size; i++) 263 + if (fp[i]) 264 + return false; 265 + 266 + return true; 267 + } 268 + 269 + static int __parse_flow_nlattrs(const struct nlattr *attr, 270 + const struct nlattr *a[], 271 + u64 *attrsp, bool nz) 272 + { 273 + const struct nlattr *nla; 274 + u64 attrs; 275 + int rem; 276 + 277 + attrs = *attrsp; 278 + nla_for_each_nested(nla, attr, rem) { 279 + u16 type = nla_type(nla); 280 + int expected_len; 281 + 282 + if (type > OVS_KEY_ATTR_MAX) { 283 + OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", 284 + type, OVS_KEY_ATTR_MAX); 285 + return -EINVAL; 286 + } 287 + 288 + if (attrs & (1 << type)) { 289 + OVS_NLERR("Duplicate key attribute (type %d).\n", type); 290 + return -EINVAL; 291 + } 292 + 293 + expected_len = ovs_key_lens[type]; 294 + if (nla_len(nla) != expected_len && expected_len != -1) { 295 + OVS_NLERR("Key attribute has unexpected length (type=%d" 296 + ", length=%d, expected=%d).\n", type, 297 + nla_len(nla), expected_len); 298 + return -EINVAL; 299 + } 300 + 301 + if (!nz || !is_all_zero(nla_data(nla), expected_len)) { 302 + attrs |= 1 << type; 303 + a[type] = nla; 304 + } 305 + } 306 + if (rem) { 307 + OVS_NLERR("Message has %d unknown bytes.\n", rem); 308 + return -EINVAL; 309 + } 310 + 311 + *attrsp = attrs; 312 + return 0; 313 + } 314 + 315 + static int parse_flow_mask_nlattrs(const struct nlattr *attr, 316 + const struct nlattr *a[], u64 *attrsp) 317 + { 318 + return __parse_flow_nlattrs(attr, a, attrsp, true); 319 + } 320 + 321 + static int parse_flow_nlattrs(const struct nlattr *attr, 322 + const struct nlattr *a[], u64 *attrsp) 323 + { 324 + return __parse_flow_nlattrs(attr, a, attrsp, false); 325 + } 326 + 327 + static int ipv4_tun_from_nlattr(const struct nlattr *attr, 328 + struct sw_flow_match *match, bool is_mask) 329 + { 330 + struct nlattr *a; 331 + int rem; 332 + bool ttl = false; 333 + __be16 tun_flags = 0; 334 + 335 + nla_for_each_nested(a, attr, rem) { 336 + int type = nla_type(a); 337 + static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 338 + [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), 339 + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), 340 + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), 341 + [OVS_TUNNEL_KEY_ATTR_TOS] = 1, 342 + [OVS_TUNNEL_KEY_ATTR_TTL] = 1, 343 + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, 344 + [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 345 + }; 346 + 347 + if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 348 + OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", 349 + type, OVS_TUNNEL_KEY_ATTR_MAX); 350 + return -EINVAL; 351 + } 352 + 353 + if (ovs_tunnel_key_lens[type] != nla_len(a)) { 354 + OVS_NLERR("IPv4 tunnel attribute type has unexpected " 355 + " length (type=%d, length=%d, expected=%d).\n", 356 + type, nla_len(a), ovs_tunnel_key_lens[type]); 357 + return -EINVAL; 358 + } 359 + 360 + switch (type) { 361 + case OVS_TUNNEL_KEY_ATTR_ID: 362 + SW_FLOW_KEY_PUT(match, tun_key.tun_id, 363 + nla_get_be64(a), is_mask); 364 + tun_flags |= TUNNEL_KEY; 365 + break; 366 + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 367 + SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, 368 + nla_get_be32(a), is_mask); 369 + break; 370 + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 371 + SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, 372 + nla_get_be32(a), is_mask); 373 + break; 374 + case OVS_TUNNEL_KEY_ATTR_TOS: 375 + SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, 376 + nla_get_u8(a), is_mask); 377 + break; 378 + case OVS_TUNNEL_KEY_ATTR_TTL: 379 + SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, 380 + nla_get_u8(a), is_mask); 381 + ttl = true; 382 + break; 383 + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 384 + tun_flags |= TUNNEL_DONT_FRAGMENT; 385 + break; 386 + case OVS_TUNNEL_KEY_ATTR_CSUM: 387 + tun_flags |= TUNNEL_CSUM; 388 + break; 389 + default: 390 + return -EINVAL; 391 + } 392 + } 393 + 394 + SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 395 + 396 + if (rem > 0) { 397 + OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); 398 + return -EINVAL; 399 + } 400 + 401 + if (!is_mask) { 402 + if (!match->key->tun_key.ipv4_dst) { 403 + OVS_NLERR("IPv4 tunnel destination address is zero.\n"); 404 + return -EINVAL; 405 + } 406 + 407 + if (!ttl) { 408 + OVS_NLERR("IPv4 tunnel TTL not specified.\n"); 409 + return -EINVAL; 410 + } 411 + } 412 + 413 + return 0; 414 + } 415 + 416 + static int ipv4_tun_to_nlattr(struct sk_buff *skb, 417 + const struct ovs_key_ipv4_tunnel *tun_key, 418 + const struct ovs_key_ipv4_tunnel *output) 419 + { 420 + struct nlattr *nla; 421 + 422 + nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); 423 + if (!nla) 424 + return -EMSGSIZE; 425 + 426 + if (output->tun_flags & TUNNEL_KEY && 427 + nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) 428 + return -EMSGSIZE; 429 + if (output->ipv4_src && 430 + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) 431 + return -EMSGSIZE; 432 + if (output->ipv4_dst && 433 + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) 434 + return -EMSGSIZE; 435 + if (output->ipv4_tos && 436 + nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) 437 + return -EMSGSIZE; 438 + if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) 439 + return -EMSGSIZE; 440 + if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && 441 + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 442 + return -EMSGSIZE; 443 + if ((output->tun_flags & TUNNEL_CSUM) && 444 + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 445 + return -EMSGSIZE; 446 + 447 + nla_nest_end(skb, nla); 448 + return 0; 449 + } 450 + 451 + 452 + static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, 453 + const struct nlattr **a, bool is_mask) 454 + { 455 + if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 456 + SW_FLOW_KEY_PUT(match, phy.priority, 457 + nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); 458 + *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 459 + } 460 + 461 + if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 462 + u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 463 + 464 + if (is_mask) 465 + in_port = 0xffffffff; /* Always exact match in_port. */ 466 + else if (in_port >= DP_MAX_PORTS) 467 + return -EINVAL; 468 + 469 + SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 470 + *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 471 + } else if (!is_mask) { 472 + SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); 473 + } 474 + 475 + if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 476 + uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 477 + 478 + SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); 479 + *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 480 + } 481 + if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 482 + if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 483 + is_mask)) 484 + return -EINVAL; 485 + *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 486 + } 487 + return 0; 488 + } 489 + 490 + static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, 491 + const struct nlattr **a, bool is_mask) 492 + { 493 + int err; 494 + u64 orig_attrs = attrs; 495 + 496 + err = metadata_from_nlattrs(match, &attrs, a, is_mask); 497 + if (err) 498 + return err; 499 + 500 + if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { 501 + const struct ovs_key_ethernet *eth_key; 502 + 503 + eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 504 + SW_FLOW_KEY_MEMCPY(match, eth.src, 505 + eth_key->eth_src, ETH_ALEN, is_mask); 506 + SW_FLOW_KEY_MEMCPY(match, eth.dst, 507 + eth_key->eth_dst, ETH_ALEN, is_mask); 508 + attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); 509 + } 510 + 511 + if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { 512 + __be16 tci; 513 + 514 + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 515 + if (!(tci & htons(VLAN_TAG_PRESENT))) { 516 + if (is_mask) 517 + OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); 518 + else 519 + OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); 520 + 521 + return -EINVAL; 522 + } 523 + 524 + SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); 525 + attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 526 + } else if (!is_mask) 527 + SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); 528 + 529 + if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 530 + __be16 eth_type; 531 + 532 + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 533 + if (is_mask) { 534 + /* Always exact match EtherType. */ 535 + eth_type = htons(0xffff); 536 + } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { 537 + OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", 538 + ntohs(eth_type), ETH_P_802_3_MIN); 539 + return -EINVAL; 540 + } 541 + 542 + SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); 543 + attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 544 + } else if (!is_mask) { 545 + SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 546 + } 547 + 548 + if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 549 + const struct ovs_key_ipv4 *ipv4_key; 550 + 551 + ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 552 + if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 553 + OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", 554 + ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 555 + return -EINVAL; 556 + } 557 + SW_FLOW_KEY_PUT(match, ip.proto, 558 + ipv4_key->ipv4_proto, is_mask); 559 + SW_FLOW_KEY_PUT(match, ip.tos, 560 + ipv4_key->ipv4_tos, is_mask); 561 + SW_FLOW_KEY_PUT(match, ip.ttl, 562 + ipv4_key->ipv4_ttl, is_mask); 563 + SW_FLOW_KEY_PUT(match, ip.frag, 564 + ipv4_key->ipv4_frag, is_mask); 565 + SW_FLOW_KEY_PUT(match, ipv4.addr.src, 566 + ipv4_key->ipv4_src, is_mask); 567 + SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 568 + ipv4_key->ipv4_dst, is_mask); 569 + attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 570 + } 571 + 572 + if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { 573 + const struct ovs_key_ipv6 *ipv6_key; 574 + 575 + ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 576 + if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 577 + OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", 578 + ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 579 + return -EINVAL; 580 + } 581 + SW_FLOW_KEY_PUT(match, ipv6.label, 582 + ipv6_key->ipv6_label, is_mask); 583 + SW_FLOW_KEY_PUT(match, ip.proto, 584 + ipv6_key->ipv6_proto, is_mask); 585 + SW_FLOW_KEY_PUT(match, ip.tos, 586 + ipv6_key->ipv6_tclass, is_mask); 587 + SW_FLOW_KEY_PUT(match, ip.ttl, 588 + ipv6_key->ipv6_hlimit, is_mask); 589 + SW_FLOW_KEY_PUT(match, ip.frag, 590 + ipv6_key->ipv6_frag, is_mask); 591 + SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, 592 + ipv6_key->ipv6_src, 593 + sizeof(match->key->ipv6.addr.src), 594 + is_mask); 595 + SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, 596 + ipv6_key->ipv6_dst, 597 + sizeof(match->key->ipv6.addr.dst), 598 + is_mask); 599 + 600 + attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 601 + } 602 + 603 + if (attrs & (1 << OVS_KEY_ATTR_ARP)) { 604 + const struct ovs_key_arp *arp_key; 605 + 606 + arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 607 + if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 608 + OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", 609 + arp_key->arp_op); 610 + return -EINVAL; 611 + } 612 + 613 + SW_FLOW_KEY_PUT(match, ipv4.addr.src, 614 + arp_key->arp_sip, is_mask); 615 + SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 616 + arp_key->arp_tip, is_mask); 617 + SW_FLOW_KEY_PUT(match, ip.proto, 618 + ntohs(arp_key->arp_op), is_mask); 619 + SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, 620 + arp_key->arp_sha, ETH_ALEN, is_mask); 621 + SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, 622 + arp_key->arp_tha, ETH_ALEN, is_mask); 623 + 624 + attrs &= ~(1 << OVS_KEY_ATTR_ARP); 625 + } 626 + 627 + if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 628 + const struct ovs_key_tcp *tcp_key; 629 + 630 + tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); 631 + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 632 + SW_FLOW_KEY_PUT(match, ipv4.tp.src, 633 + tcp_key->tcp_src, is_mask); 634 + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 635 + tcp_key->tcp_dst, is_mask); 636 + } else { 637 + SW_FLOW_KEY_PUT(match, ipv6.tp.src, 638 + tcp_key->tcp_src, is_mask); 639 + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 640 + tcp_key->tcp_dst, is_mask); 641 + } 642 + attrs &= ~(1 << OVS_KEY_ATTR_TCP); 643 + } 644 + 645 + if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { 646 + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 647 + SW_FLOW_KEY_PUT(match, ipv4.tp.flags, 648 + nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 649 + is_mask); 650 + } else { 651 + SW_FLOW_KEY_PUT(match, ipv6.tp.flags, 652 + nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 653 + is_mask); 654 + } 655 + attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); 656 + } 657 + 658 + if (attrs & (1 << OVS_KEY_ATTR_UDP)) { 659 + const struct ovs_key_udp *udp_key; 660 + 661 + udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); 662 + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 663 + SW_FLOW_KEY_PUT(match, ipv4.tp.src, 664 + udp_key->udp_src, is_mask); 665 + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 666 + udp_key->udp_dst, is_mask); 667 + } else { 668 + SW_FLOW_KEY_PUT(match, ipv6.tp.src, 669 + udp_key->udp_src, is_mask); 670 + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 671 + udp_key->udp_dst, is_mask); 672 + } 673 + attrs &= ~(1 << OVS_KEY_ATTR_UDP); 674 + } 675 + 676 + if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { 677 + const struct ovs_key_sctp *sctp_key; 678 + 679 + sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); 680 + if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 681 + SW_FLOW_KEY_PUT(match, ipv4.tp.src, 682 + sctp_key->sctp_src, is_mask); 683 + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 684 + sctp_key->sctp_dst, is_mask); 685 + } else { 686 + SW_FLOW_KEY_PUT(match, ipv6.tp.src, 687 + sctp_key->sctp_src, is_mask); 688 + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 689 + sctp_key->sctp_dst, is_mask); 690 + } 691 + attrs &= ~(1 << OVS_KEY_ATTR_SCTP); 692 + } 693 + 694 + if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { 695 + const struct ovs_key_icmp *icmp_key; 696 + 697 + icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); 698 + SW_FLOW_KEY_PUT(match, ipv4.tp.src, 699 + htons(icmp_key->icmp_type), is_mask); 700 + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, 701 + htons(icmp_key->icmp_code), is_mask); 702 + attrs &= ~(1 << OVS_KEY_ATTR_ICMP); 703 + } 704 + 705 + if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { 706 + const struct ovs_key_icmpv6 *icmpv6_key; 707 + 708 + icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); 709 + SW_FLOW_KEY_PUT(match, ipv6.tp.src, 710 + htons(icmpv6_key->icmpv6_type), is_mask); 711 + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, 712 + htons(icmpv6_key->icmpv6_code), is_mask); 713 + attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); 714 + } 715 + 716 + if (attrs & (1 << OVS_KEY_ATTR_ND)) { 717 + const struct ovs_key_nd *nd_key; 718 + 719 + nd_key = nla_data(a[OVS_KEY_ATTR_ND]); 720 + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, 721 + nd_key->nd_target, 722 + sizeof(match->key->ipv6.nd.target), 723 + is_mask); 724 + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, 725 + nd_key->nd_sll, ETH_ALEN, is_mask); 726 + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, 727 + nd_key->nd_tll, ETH_ALEN, is_mask); 728 + attrs &= ~(1 << OVS_KEY_ATTR_ND); 729 + } 730 + 731 + if (attrs != 0) 732 + return -EINVAL; 733 + 734 + return 0; 735 + } 736 + 737 + static void sw_flow_mask_set(struct sw_flow_mask *mask, 738 + struct sw_flow_key_range *range, u8 val) 739 + { 740 + u8 *m = (u8 *)&mask->key + range->start; 741 + 742 + mask->range = *range; 743 + memset(m, val, range_n_bytes(range)); 744 + } 745 + 746 + /** 747 + * ovs_nla_get_match - parses Netlink attributes into a flow key and 748 + * mask. In case the 'mask' is NULL, the flow is treated as exact match 749 + * flow. Otherwise, it is treated as a wildcarded flow, except the mask 750 + * does not include any don't care bit. 751 + * @match: receives the extracted flow match information. 752 + * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 753 + * sequence. The fields should of the packet that triggered the creation 754 + * of this flow. 755 + * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 756 + * attribute specifies the mask field of the wildcarded flow. 757 + */ 758 + int ovs_nla_get_match(struct sw_flow_match *match, 759 + const struct nlattr *key, 760 + const struct nlattr *mask) 761 + { 762 + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 763 + const struct nlattr *encap; 764 + u64 key_attrs = 0; 765 + u64 mask_attrs = 0; 766 + bool encap_valid = false; 767 + int err; 768 + 769 + err = parse_flow_nlattrs(key, a, &key_attrs); 770 + if (err) 771 + return err; 772 + 773 + if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && 774 + (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && 775 + (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { 776 + __be16 tci; 777 + 778 + if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 779 + (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 780 + OVS_NLERR("Invalid Vlan frame.\n"); 781 + return -EINVAL; 782 + } 783 + 784 + key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 785 + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 786 + encap = a[OVS_KEY_ATTR_ENCAP]; 787 + key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 788 + encap_valid = true; 789 + 790 + if (tci & htons(VLAN_TAG_PRESENT)) { 791 + err = parse_flow_nlattrs(encap, a, &key_attrs); 792 + if (err) 793 + return err; 794 + } else if (!tci) { 795 + /* Corner case for truncated 802.1Q header. */ 796 + if (nla_len(encap)) { 797 + OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); 798 + return -EINVAL; 799 + } 800 + } else { 801 + OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); 802 + return -EINVAL; 803 + } 804 + } 805 + 806 + err = ovs_key_from_nlattrs(match, key_attrs, a, false); 807 + if (err) 808 + return err; 809 + 810 + if (mask) { 811 + err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); 812 + if (err) 813 + return err; 814 + 815 + if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { 816 + __be16 eth_type = 0; 817 + __be16 tci = 0; 818 + 819 + if (!encap_valid) { 820 + OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); 821 + return -EINVAL; 822 + } 823 + 824 + mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); 825 + if (a[OVS_KEY_ATTR_ETHERTYPE]) 826 + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 827 + 828 + if (eth_type == htons(0xffff)) { 829 + mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 830 + encap = a[OVS_KEY_ATTR_ENCAP]; 831 + err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); 832 + } else { 833 + OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", 834 + ntohs(eth_type)); 835 + return -EINVAL; 836 + } 837 + 838 + if (a[OVS_KEY_ATTR_VLAN]) 839 + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 840 + 841 + if (!(tci & htons(VLAN_TAG_PRESENT))) { 842 + OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); 843 + return -EINVAL; 844 + } 845 + } 846 + 847 + err = ovs_key_from_nlattrs(match, mask_attrs, a, true); 848 + if (err) 849 + return err; 850 + } else { 851 + /* Populate exact match flow's key mask. */ 852 + if (match->mask) 853 + sw_flow_mask_set(match->mask, &match->range, 0xff); 854 + } 855 + 856 + if (!match_validate(match, key_attrs, mask_attrs)) 857 + return -EINVAL; 858 + 859 + return 0; 860 + } 861 + 862 + /** 863 + * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. 864 + * @flow: Receives extracted in_port, priority, tun_key and skb_mark. 865 + * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 866 + * sequence. 867 + * 868 + * This parses a series of Netlink attributes that form a flow key, which must 869 + * take the same form accepted by flow_from_nlattrs(), but only enough of it to 870 + * get the metadata, that is, the parts of the flow key that cannot be 871 + * extracted from the packet itself. 872 + */ 873 + 874 + int ovs_nla_get_flow_metadata(struct sw_flow *flow, 875 + const struct nlattr *attr) 876 + { 877 + struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; 878 + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 879 + u64 attrs = 0; 880 + int err; 881 + struct sw_flow_match match; 882 + 883 + flow->key.phy.in_port = DP_MAX_PORTS; 884 + flow->key.phy.priority = 0; 885 + flow->key.phy.skb_mark = 0; 886 + memset(tun_key, 0, sizeof(flow->key.tun_key)); 887 + 888 + err = parse_flow_nlattrs(attr, a, &attrs); 889 + if (err) 890 + return -EINVAL; 891 + 892 + memset(&match, 0, sizeof(match)); 893 + match.key = &flow->key; 894 + 895 + err = metadata_from_nlattrs(&match, &attrs, a, false); 896 + if (err) 897 + return err; 898 + 899 + return 0; 900 + } 901 + 902 + int ovs_nla_put_flow(const struct sw_flow_key *swkey, 903 + const struct sw_flow_key *output, struct sk_buff *skb) 904 + { 905 + struct ovs_key_ethernet *eth_key; 906 + struct nlattr *nla, *encap; 907 + bool is_mask = (swkey != output); 908 + 909 + if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 910 + goto nla_put_failure; 911 + 912 + if ((swkey->tun_key.ipv4_dst || is_mask) && 913 + ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) 914 + goto nla_put_failure; 915 + 916 + if (swkey->phy.in_port == DP_MAX_PORTS) { 917 + if (is_mask && (output->phy.in_port == 0xffff)) 918 + if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 919 + goto nla_put_failure; 920 + } else { 921 + u16 upper_u16; 922 + upper_u16 = !is_mask ? 0 : 0xffff; 923 + 924 + if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 925 + (upper_u16 << 16) | output->phy.in_port)) 926 + goto nla_put_failure; 927 + } 928 + 929 + if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 930 + goto nla_put_failure; 931 + 932 + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 933 + if (!nla) 934 + goto nla_put_failure; 935 + 936 + eth_key = nla_data(nla); 937 + memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); 938 + memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); 939 + 940 + if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 941 + __be16 eth_type; 942 + eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); 943 + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || 944 + nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) 945 + goto nla_put_failure; 946 + encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 947 + if (!swkey->eth.tci) 948 + goto unencap; 949 + } else 950 + encap = NULL; 951 + 952 + if (swkey->eth.type == htons(ETH_P_802_2)) { 953 + /* 954 + * Ethertype 802.2 is represented in the netlink with omitted 955 + * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 956 + * 0xffff in the mask attribute. Ethertype can also 957 + * be wildcarded. 958 + */ 959 + if (is_mask && output->eth.type) 960 + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 961 + output->eth.type)) 962 + goto nla_put_failure; 963 + goto unencap; 964 + } 965 + 966 + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 967 + goto nla_put_failure; 968 + 969 + if (swkey->eth.type == htons(ETH_P_IP)) { 970 + struct ovs_key_ipv4 *ipv4_key; 971 + 972 + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 973 + if (!nla) 974 + goto nla_put_failure; 975 + ipv4_key = nla_data(nla); 976 + ipv4_key->ipv4_src = output->ipv4.addr.src; 977 + ipv4_key->ipv4_dst = output->ipv4.addr.dst; 978 + ipv4_key->ipv4_proto = output->ip.proto; 979 + ipv4_key->ipv4_tos = output->ip.tos; 980 + ipv4_key->ipv4_ttl = output->ip.ttl; 981 + ipv4_key->ipv4_frag = output->ip.frag; 982 + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 983 + struct ovs_key_ipv6 *ipv6_key; 984 + 985 + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 986 + if (!nla) 987 + goto nla_put_failure; 988 + ipv6_key = nla_data(nla); 989 + memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 990 + sizeof(ipv6_key->ipv6_src)); 991 + memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 992 + sizeof(ipv6_key->ipv6_dst)); 993 + ipv6_key->ipv6_label = output->ipv6.label; 994 + ipv6_key->ipv6_proto = output->ip.proto; 995 + ipv6_key->ipv6_tclass = output->ip.tos; 996 + ipv6_key->ipv6_hlimit = output->ip.ttl; 997 + ipv6_key->ipv6_frag = output->ip.frag; 998 + } else if (swkey->eth.type == htons(ETH_P_ARP) || 999 + swkey->eth.type == htons(ETH_P_RARP)) { 1000 + struct ovs_key_arp *arp_key; 1001 + 1002 + nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1003 + if (!nla) 1004 + goto nla_put_failure; 1005 + arp_key = nla_data(nla); 1006 + memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1007 + arp_key->arp_sip = output->ipv4.addr.src; 1008 + arp_key->arp_tip = output->ipv4.addr.dst; 1009 + arp_key->arp_op = htons(output->ip.proto); 1010 + memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); 1011 + memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); 1012 + } 1013 + 1014 + if ((swkey->eth.type == htons(ETH_P_IP) || 1015 + swkey->eth.type == htons(ETH_P_IPV6)) && 1016 + swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 1017 + 1018 + if (swkey->ip.proto == IPPROTO_TCP) { 1019 + struct ovs_key_tcp *tcp_key; 1020 + 1021 + nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 1022 + if (!nla) 1023 + goto nla_put_failure; 1024 + tcp_key = nla_data(nla); 1025 + if (swkey->eth.type == htons(ETH_P_IP)) { 1026 + tcp_key->tcp_src = output->ipv4.tp.src; 1027 + tcp_key->tcp_dst = output->ipv4.tp.dst; 1028 + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1029 + output->ipv4.tp.flags)) 1030 + goto nla_put_failure; 1031 + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1032 + tcp_key->tcp_src = output->ipv6.tp.src; 1033 + tcp_key->tcp_dst = output->ipv6.tp.dst; 1034 + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 1035 + output->ipv6.tp.flags)) 1036 + goto nla_put_failure; 1037 + } 1038 + } else if (swkey->ip.proto == IPPROTO_UDP) { 1039 + struct ovs_key_udp *udp_key; 1040 + 1041 + nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 1042 + if (!nla) 1043 + goto nla_put_failure; 1044 + udp_key = nla_data(nla); 1045 + if (swkey->eth.type == htons(ETH_P_IP)) { 1046 + udp_key->udp_src = output->ipv4.tp.src; 1047 + udp_key->udp_dst = output->ipv4.tp.dst; 1048 + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1049 + udp_key->udp_src = output->ipv6.tp.src; 1050 + udp_key->udp_dst = output->ipv6.tp.dst; 1051 + } 1052 + } else if (swkey->ip.proto == IPPROTO_SCTP) { 1053 + struct ovs_key_sctp *sctp_key; 1054 + 1055 + nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 1056 + if (!nla) 1057 + goto nla_put_failure; 1058 + sctp_key = nla_data(nla); 1059 + if (swkey->eth.type == htons(ETH_P_IP)) { 1060 + sctp_key->sctp_src = swkey->ipv4.tp.src; 1061 + sctp_key->sctp_dst = swkey->ipv4.tp.dst; 1062 + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1063 + sctp_key->sctp_src = swkey->ipv6.tp.src; 1064 + sctp_key->sctp_dst = swkey->ipv6.tp.dst; 1065 + } 1066 + } else if (swkey->eth.type == htons(ETH_P_IP) && 1067 + swkey->ip.proto == IPPROTO_ICMP) { 1068 + struct ovs_key_icmp *icmp_key; 1069 + 1070 + nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 1071 + if (!nla) 1072 + goto nla_put_failure; 1073 + icmp_key = nla_data(nla); 1074 + icmp_key->icmp_type = ntohs(output->ipv4.tp.src); 1075 + icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); 1076 + } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1077 + swkey->ip.proto == IPPROTO_ICMPV6) { 1078 + struct ovs_key_icmpv6 *icmpv6_key; 1079 + 1080 + nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 1081 + sizeof(*icmpv6_key)); 1082 + if (!nla) 1083 + goto nla_put_failure; 1084 + icmpv6_key = nla_data(nla); 1085 + icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); 1086 + icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); 1087 + 1088 + if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1089 + icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1090 + struct ovs_key_nd *nd_key; 1091 + 1092 + nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 1093 + if (!nla) 1094 + goto nla_put_failure; 1095 + nd_key = nla_data(nla); 1096 + memcpy(nd_key->nd_target, &output->ipv6.nd.target, 1097 + sizeof(nd_key->nd_target)); 1098 + memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); 1099 + memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); 1100 + } 1101 + } 1102 + } 1103 + 1104 + unencap: 1105 + if (encap) 1106 + nla_nest_end(skb, encap); 1107 + 1108 + return 0; 1109 + 1110 + nla_put_failure: 1111 + return -EMSGSIZE; 1112 + } 1113 + 1114 + #define MAX_ACTIONS_BUFSIZE (32 * 1024) 1115 + 1116 + struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) 1117 + { 1118 + struct sw_flow_actions *sfa; 1119 + 1120 + if (size > MAX_ACTIONS_BUFSIZE) 1121 + return ERR_PTR(-EINVAL); 1122 + 1123 + sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 1124 + if (!sfa) 1125 + return ERR_PTR(-ENOMEM); 1126 + 1127 + sfa->actions_len = 0; 1128 + return sfa; 1129 + } 1130 + 1131 + /* RCU callback used by ovs_nla_free_flow_actions. */ 1132 + static void rcu_free_acts_callback(struct rcu_head *rcu) 1133 + { 1134 + struct sw_flow_actions *sf_acts = container_of(rcu, 1135 + struct sw_flow_actions, rcu); 1136 + kfree(sf_acts); 1137 + } 1138 + 1139 + /* Schedules 'sf_acts' to be freed after the next RCU grace period. 1140 + * The caller must hold rcu_read_lock for this to be sensible. */ 1141 + void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) 1142 + { 1143 + call_rcu(&sf_acts->rcu, rcu_free_acts_callback); 1144 + } 1145 + 1146 + static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, 1147 + int attr_len) 1148 + { 1149 + 1150 + struct sw_flow_actions *acts; 1151 + int new_acts_size; 1152 + int req_size = NLA_ALIGN(attr_len); 1153 + int next_offset = offsetof(struct sw_flow_actions, actions) + 1154 + (*sfa)->actions_len; 1155 + 1156 + if (req_size <= (ksize(*sfa) - next_offset)) 1157 + goto out; 1158 + 1159 + new_acts_size = ksize(*sfa) * 2; 1160 + 1161 + if (new_acts_size > MAX_ACTIONS_BUFSIZE) { 1162 + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) 1163 + return ERR_PTR(-EMSGSIZE); 1164 + new_acts_size = MAX_ACTIONS_BUFSIZE; 1165 + } 1166 + 1167 + acts = ovs_nla_alloc_flow_actions(new_acts_size); 1168 + if (IS_ERR(acts)) 1169 + return (void *)acts; 1170 + 1171 + memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); 1172 + acts->actions_len = (*sfa)->actions_len; 1173 + kfree(*sfa); 1174 + *sfa = acts; 1175 + 1176 + out: 1177 + (*sfa)->actions_len += req_size; 1178 + return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); 1179 + } 1180 + 1181 + static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) 1182 + { 1183 + struct nlattr *a; 1184 + 1185 + a = reserve_sfa_size(sfa, nla_attr_size(len)); 1186 + if (IS_ERR(a)) 1187 + return PTR_ERR(a); 1188 + 1189 + a->nla_type = attrtype; 1190 + a->nla_len = nla_attr_size(len); 1191 + 1192 + if (data) 1193 + memcpy(nla_data(a), data, len); 1194 + memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); 1195 + 1196 + return 0; 1197 + } 1198 + 1199 + static inline int add_nested_action_start(struct sw_flow_actions **sfa, 1200 + int attrtype) 1201 + { 1202 + int used = (*sfa)->actions_len; 1203 + int err; 1204 + 1205 + err = add_action(sfa, attrtype, NULL, 0); 1206 + if (err) 1207 + return err; 1208 + 1209 + return used; 1210 + } 1211 + 1212 + static inline void add_nested_action_end(struct sw_flow_actions *sfa, 1213 + int st_offset) 1214 + { 1215 + struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + 1216 + st_offset); 1217 + 1218 + a->nla_len = sfa->actions_len - st_offset; 1219 + } 1220 + 1221 + static int validate_and_copy_sample(const struct nlattr *attr, 1222 + const struct sw_flow_key *key, int depth, 1223 + struct sw_flow_actions **sfa) 1224 + { 1225 + const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 1226 + const struct nlattr *probability, *actions; 1227 + const struct nlattr *a; 1228 + int rem, start, err, st_acts; 1229 + 1230 + memset(attrs, 0, sizeof(attrs)); 1231 + nla_for_each_nested(a, attr, rem) { 1232 + int type = nla_type(a); 1233 + if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) 1234 + return -EINVAL; 1235 + attrs[type] = a; 1236 + } 1237 + if (rem) 1238 + return -EINVAL; 1239 + 1240 + probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; 1241 + if (!probability || nla_len(probability) != sizeof(u32)) 1242 + return -EINVAL; 1243 + 1244 + actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 1245 + if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 1246 + return -EINVAL; 1247 + 1248 + /* validation done, copy sample action. */ 1249 + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); 1250 + if (start < 0) 1251 + return start; 1252 + err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, 1253 + nla_data(probability), sizeof(u32)); 1254 + if (err) 1255 + return err; 1256 + st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); 1257 + if (st_acts < 0) 1258 + return st_acts; 1259 + 1260 + err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); 1261 + if (err) 1262 + return err; 1263 + 1264 + add_nested_action_end(*sfa, st_acts); 1265 + add_nested_action_end(*sfa, start); 1266 + 1267 + return 0; 1268 + } 1269 + 1270 + static int validate_tp_port(const struct sw_flow_key *flow_key) 1271 + { 1272 + if (flow_key->eth.type == htons(ETH_P_IP)) { 1273 + if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) 1274 + return 0; 1275 + } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { 1276 + if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) 1277 + return 0; 1278 + } 1279 + 1280 + return -EINVAL; 1281 + } 1282 + 1283 + void ovs_match_init(struct sw_flow_match *match, 1284 + struct sw_flow_key *key, 1285 + struct sw_flow_mask *mask) 1286 + { 1287 + memset(match, 0, sizeof(*match)); 1288 + match->key = key; 1289 + match->mask = mask; 1290 + 1291 + memset(key, 0, sizeof(*key)); 1292 + 1293 + if (mask) { 1294 + memset(&mask->key, 0, sizeof(mask->key)); 1295 + mask->range.start = mask->range.end = 0; 1296 + } 1297 + } 1298 + 1299 + static int validate_and_copy_set_tun(const struct nlattr *attr, 1300 + struct sw_flow_actions **sfa) 1301 + { 1302 + struct sw_flow_match match; 1303 + struct sw_flow_key key; 1304 + int err, start; 1305 + 1306 + ovs_match_init(&match, &key, NULL); 1307 + err = ipv4_tun_from_nlattr(nla_data(attr), &match, false); 1308 + if (err) 1309 + return err; 1310 + 1311 + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); 1312 + if (start < 0) 1313 + return start; 1314 + 1315 + err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, 1316 + sizeof(match.key->tun_key)); 1317 + add_nested_action_end(*sfa, start); 1318 + 1319 + return err; 1320 + } 1321 + 1322 + static int validate_set(const struct nlattr *a, 1323 + const struct sw_flow_key *flow_key, 1324 + struct sw_flow_actions **sfa, 1325 + bool *set_tun) 1326 + { 1327 + const struct nlattr *ovs_key = nla_data(a); 1328 + int key_type = nla_type(ovs_key); 1329 + 1330 + /* There can be only one key in a action */ 1331 + if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 1332 + return -EINVAL; 1333 + 1334 + if (key_type > OVS_KEY_ATTR_MAX || 1335 + (ovs_key_lens[key_type] != nla_len(ovs_key) && 1336 + ovs_key_lens[key_type] != -1)) 1337 + return -EINVAL; 1338 + 1339 + switch (key_type) { 1340 + const struct ovs_key_ipv4 *ipv4_key; 1341 + const struct ovs_key_ipv6 *ipv6_key; 1342 + int err; 1343 + 1344 + case OVS_KEY_ATTR_PRIORITY: 1345 + case OVS_KEY_ATTR_SKB_MARK: 1346 + case OVS_KEY_ATTR_ETHERNET: 1347 + break; 1348 + 1349 + case OVS_KEY_ATTR_TUNNEL: 1350 + *set_tun = true; 1351 + err = validate_and_copy_set_tun(a, sfa); 1352 + if (err) 1353 + return err; 1354 + break; 1355 + 1356 + case OVS_KEY_ATTR_IPV4: 1357 + if (flow_key->eth.type != htons(ETH_P_IP)) 1358 + return -EINVAL; 1359 + 1360 + if (!flow_key->ip.proto) 1361 + return -EINVAL; 1362 + 1363 + ipv4_key = nla_data(ovs_key); 1364 + if (ipv4_key->ipv4_proto != flow_key->ip.proto) 1365 + return -EINVAL; 1366 + 1367 + if (ipv4_key->ipv4_frag != flow_key->ip.frag) 1368 + return -EINVAL; 1369 + 1370 + break; 1371 + 1372 + case OVS_KEY_ATTR_IPV6: 1373 + if (flow_key->eth.type != htons(ETH_P_IPV6)) 1374 + return -EINVAL; 1375 + 1376 + if (!flow_key->ip.proto) 1377 + return -EINVAL; 1378 + 1379 + ipv6_key = nla_data(ovs_key); 1380 + if (ipv6_key->ipv6_proto != flow_key->ip.proto) 1381 + return -EINVAL; 1382 + 1383 + if (ipv6_key->ipv6_frag != flow_key->ip.frag) 1384 + return -EINVAL; 1385 + 1386 + if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 1387 + return -EINVAL; 1388 + 1389 + break; 1390 + 1391 + case OVS_KEY_ATTR_TCP: 1392 + if (flow_key->ip.proto != IPPROTO_TCP) 1393 + return -EINVAL; 1394 + 1395 + return validate_tp_port(flow_key); 1396 + 1397 + case OVS_KEY_ATTR_UDP: 1398 + if (flow_key->ip.proto != IPPROTO_UDP) 1399 + return -EINVAL; 1400 + 1401 + return validate_tp_port(flow_key); 1402 + 1403 + case OVS_KEY_ATTR_SCTP: 1404 + if (flow_key->ip.proto != IPPROTO_SCTP) 1405 + return -EINVAL; 1406 + 1407 + return validate_tp_port(flow_key); 1408 + 1409 + default: 1410 + return -EINVAL; 1411 + } 1412 + 1413 + return 0; 1414 + } 1415 + 1416 + static int validate_userspace(const struct nlattr *attr) 1417 + { 1418 + static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 1419 + [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 1420 + [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 1421 + }; 1422 + struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 1423 + int error; 1424 + 1425 + error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, 1426 + attr, userspace_policy); 1427 + if (error) 1428 + return error; 1429 + 1430 + if (!a[OVS_USERSPACE_ATTR_PID] || 1431 + !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) 1432 + return -EINVAL; 1433 + 1434 + return 0; 1435 + } 1436 + 1437 + static int copy_action(const struct nlattr *from, 1438 + struct sw_flow_actions **sfa) 1439 + { 1440 + int totlen = NLA_ALIGN(from->nla_len); 1441 + struct nlattr *to; 1442 + 1443 + to = reserve_sfa_size(sfa, from->nla_len); 1444 + if (IS_ERR(to)) 1445 + return PTR_ERR(to); 1446 + 1447 + memcpy(to, from, totlen); 1448 + return 0; 1449 + } 1450 + 1451 + int ovs_nla_copy_actions(const struct nlattr *attr, 1452 + const struct sw_flow_key *key, 1453 + int depth, 1454 + struct sw_flow_actions **sfa) 1455 + { 1456 + const struct nlattr *a; 1457 + int rem, err; 1458 + 1459 + if (depth >= SAMPLE_ACTION_DEPTH) 1460 + return -EOVERFLOW; 1461 + 1462 + nla_for_each_nested(a, attr, rem) { 1463 + /* Expected argument lengths, (u32)-1 for variable length. */ 1464 + static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { 1465 + [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 1466 + [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 1467 + [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 1468 + [OVS_ACTION_ATTR_POP_VLAN] = 0, 1469 + [OVS_ACTION_ATTR_SET] = (u32)-1, 1470 + [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 1471 + }; 1472 + const struct ovs_action_push_vlan *vlan; 1473 + int type = nla_type(a); 1474 + bool skip_copy; 1475 + 1476 + if (type > OVS_ACTION_ATTR_MAX || 1477 + (action_lens[type] != nla_len(a) && 1478 + action_lens[type] != (u32)-1)) 1479 + return -EINVAL; 1480 + 1481 + skip_copy = false; 1482 + switch (type) { 1483 + case OVS_ACTION_ATTR_UNSPEC: 1484 + return -EINVAL; 1485 + 1486 + case OVS_ACTION_ATTR_USERSPACE: 1487 + err = validate_userspace(a); 1488 + if (err) 1489 + return err; 1490 + break; 1491 + 1492 + case OVS_ACTION_ATTR_OUTPUT: 1493 + if (nla_get_u32(a) >= DP_MAX_PORTS) 1494 + return -EINVAL; 1495 + break; 1496 + 1497 + 1498 + case OVS_ACTION_ATTR_POP_VLAN: 1499 + break; 1500 + 1501 + case OVS_ACTION_ATTR_PUSH_VLAN: 1502 + vlan = nla_data(a); 1503 + if (vlan->vlan_tpid != htons(ETH_P_8021Q)) 1504 + return -EINVAL; 1505 + if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 1506 + return -EINVAL; 1507 + break; 1508 + 1509 + case OVS_ACTION_ATTR_SET: 1510 + err = validate_set(a, key, sfa, &skip_copy); 1511 + if (err) 1512 + return err; 1513 + break; 1514 + 1515 + case OVS_ACTION_ATTR_SAMPLE: 1516 + err = validate_and_copy_sample(a, key, depth, sfa); 1517 + if (err) 1518 + return err; 1519 + skip_copy = true; 1520 + break; 1521 + 1522 + default: 1523 + return -EINVAL; 1524 + } 1525 + if (!skip_copy) { 1526 + err = copy_action(a, sfa); 1527 + if (err) 1528 + return err; 1529 + } 1530 + } 1531 + 1532 + if (rem > 0) 1533 + return -EINVAL; 1534 + 1535 + return 0; 1536 + } 1537 + 1538 + static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) 1539 + { 1540 + const struct nlattr *a; 1541 + struct nlattr *start; 1542 + int err = 0, rem; 1543 + 1544 + start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); 1545 + if (!start) 1546 + return -EMSGSIZE; 1547 + 1548 + nla_for_each_nested(a, attr, rem) { 1549 + int type = nla_type(a); 1550 + struct nlattr *st_sample; 1551 + 1552 + switch (type) { 1553 + case OVS_SAMPLE_ATTR_PROBABILITY: 1554 + if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, 1555 + sizeof(u32), nla_data(a))) 1556 + return -EMSGSIZE; 1557 + break; 1558 + case OVS_SAMPLE_ATTR_ACTIONS: 1559 + st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); 1560 + if (!st_sample) 1561 + return -EMSGSIZE; 1562 + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); 1563 + if (err) 1564 + return err; 1565 + nla_nest_end(skb, st_sample); 1566 + break; 1567 + } 1568 + } 1569 + 1570 + nla_nest_end(skb, start); 1571 + return err; 1572 + } 1573 + 1574 + static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) 1575 + { 1576 + const struct nlattr *ovs_key = nla_data(a); 1577 + int key_type = nla_type(ovs_key); 1578 + struct nlattr *start; 1579 + int err; 1580 + 1581 + switch (key_type) { 1582 + case OVS_KEY_ATTR_IPV4_TUNNEL: 1583 + start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); 1584 + if (!start) 1585 + return -EMSGSIZE; 1586 + 1587 + err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key), 1588 + nla_data(ovs_key)); 1589 + if (err) 1590 + return err; 1591 + nla_nest_end(skb, start); 1592 + break; 1593 + default: 1594 + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) 1595 + return -EMSGSIZE; 1596 + break; 1597 + } 1598 + 1599 + return 0; 1600 + } 1601 + 1602 + int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 1603 + { 1604 + const struct nlattr *a; 1605 + int rem, err; 1606 + 1607 + nla_for_each_attr(a, attr, len, rem) { 1608 + int type = nla_type(a); 1609 + 1610 + switch (type) { 1611 + case OVS_ACTION_ATTR_SET: 1612 + err = set_action_to_attr(a, skb); 1613 + if (err) 1614 + return err; 1615 + break; 1616 + 1617 + case OVS_ACTION_ATTR_SAMPLE: 1618 + err = sample_action_to_attr(a, skb); 1619 + if (err) 1620 + return err; 1621 + break; 1622 + default: 1623 + if (nla_put(skb, type, nla_len(a), nla_data(a))) 1624 + return -EMSGSIZE; 1625 + break; 1626 + } 1627 + } 1628 + 1629 + return 0; 1630 + }
+60
net/openvswitch/flow_netlink.h
··· 1 + /* 2 + * Copyright (c) 2007-2013 Nicira, Inc. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of version 2 of the GNU General Public 6 + * License as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, but 9 + * WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 + * General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program; if not, write to the Free Software 15 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 + * 02110-1301, USA 17 + */ 18 + 19 + 20 + #ifndef FLOW_NETLINK_H 21 + #define FLOW_NETLINK_H 1 22 + 23 + #include <linux/kernel.h> 24 + #include <linux/netlink.h> 25 + #include <linux/openvswitch.h> 26 + #include <linux/spinlock.h> 27 + #include <linux/types.h> 28 + #include <linux/rcupdate.h> 29 + #include <linux/if_ether.h> 30 + #include <linux/in6.h> 31 + #include <linux/jiffies.h> 32 + #include <linux/time.h> 33 + #include <linux/flex_array.h> 34 + 35 + #include <net/inet_ecn.h> 36 + #include <net/ip_tunnels.h> 37 + 38 + #include "flow.h" 39 + 40 + void ovs_match_init(struct sw_flow_match *match, 41 + struct sw_flow_key *key, struct sw_flow_mask *mask); 42 + 43 + int ovs_nla_put_flow(const struct sw_flow_key *, 44 + const struct sw_flow_key *, struct sk_buff *); 45 + int ovs_nla_get_flow_metadata(struct sw_flow *flow, 46 + const struct nlattr *attr); 47 + int ovs_nla_get_match(struct sw_flow_match *match, 48 + const struct nlattr *, 49 + const struct nlattr *); 50 + 51 + int ovs_nla_copy_actions(const struct nlattr *attr, 52 + const struct sw_flow_key *key, int depth, 53 + struct sw_flow_actions **sfa); 54 + int ovs_nla_put_actions(const struct nlattr *attr, 55 + int len, struct sk_buff *skb); 56 + 57 + struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len); 58 + void ovs_nla_free_flow_actions(struct sw_flow_actions *); 59 + 60 + #endif /* flow_netlink.h */
+592
net/openvswitch/flow_table.c
··· 1 + /* 2 + * Copyright (c) 2007-2013 Nicira, Inc. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of version 2 of the GNU General Public 6 + * License as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, but 9 + * WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 + * General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program; if not, write to the Free Software 15 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 + * 02110-1301, USA 17 + */ 18 + 19 + #include "flow.h" 20 + #include "datapath.h" 21 + #include <linux/uaccess.h> 22 + #include <linux/netdevice.h> 23 + #include <linux/etherdevice.h> 24 + #include <linux/if_ether.h> 25 + #include <linux/if_vlan.h> 26 + #include <net/llc_pdu.h> 27 + #include <linux/kernel.h> 28 + #include <linux/jhash.h> 29 + #include <linux/jiffies.h> 30 + #include <linux/llc.h> 31 + #include <linux/module.h> 32 + #include <linux/in.h> 33 + #include <linux/rcupdate.h> 34 + #include <linux/if_arp.h> 35 + #include <linux/ip.h> 36 + #include <linux/ipv6.h> 37 + #include <linux/sctp.h> 38 + #include <linux/tcp.h> 39 + #include <linux/udp.h> 40 + #include <linux/icmp.h> 41 + #include <linux/icmpv6.h> 42 + #include <linux/rculist.h> 43 + #include <net/ip.h> 44 + #include <net/ipv6.h> 45 + #include <net/ndisc.h> 46 + 47 + #include "datapath.h" 48 + 49 + #define TBL_MIN_BUCKETS 1024 50 + #define REHASH_INTERVAL (10 * 60 * HZ) 51 + 52 + static struct kmem_cache *flow_cache; 53 + 54 + static u16 range_n_bytes(const struct sw_flow_key_range *range) 55 + { 56 + return range->end - range->start; 57 + } 58 + 59 + void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, 60 + const struct sw_flow_mask *mask) 61 + { 62 + const long *m = (long *)((u8 *)&mask->key + mask->range.start); 63 + const long *s = (long *)((u8 *)src + mask->range.start); 64 + long *d = (long *)((u8 *)dst + mask->range.start); 65 + int i; 66 + 67 + /* The memory outside of the 'mask->range' are not set since 68 + * further operations on 'dst' only uses contents within 69 + * 'mask->range'. 70 + */ 71 + for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) 72 + *d++ = *s++ & *m++; 73 + } 74 + 75 + struct sw_flow *ovs_flow_alloc(void) 76 + { 77 + struct sw_flow *flow; 78 + 79 + flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); 80 + if (!flow) 81 + return ERR_PTR(-ENOMEM); 82 + 83 + spin_lock_init(&flow->lock); 84 + flow->sf_acts = NULL; 85 + flow->mask = NULL; 86 + 87 + return flow; 88 + } 89 + 90 + int ovs_flow_tbl_count(struct flow_table *table) 91 + { 92 + return table->count; 93 + } 94 + 95 + static struct flex_array *alloc_buckets(unsigned int n_buckets) 96 + { 97 + struct flex_array *buckets; 98 + int i, err; 99 + 100 + buckets = flex_array_alloc(sizeof(struct hlist_head), 101 + n_buckets, GFP_KERNEL); 102 + if (!buckets) 103 + return NULL; 104 + 105 + err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); 106 + if (err) { 107 + flex_array_free(buckets); 108 + return NULL; 109 + } 110 + 111 + for (i = 0; i < n_buckets; i++) 112 + INIT_HLIST_HEAD((struct hlist_head *) 113 + flex_array_get(buckets, i)); 114 + 115 + return buckets; 116 + } 117 + 118 + static void flow_free(struct sw_flow *flow) 119 + { 120 + kfree((struct sf_flow_acts __force *)flow->sf_acts); 121 + kmem_cache_free(flow_cache, flow); 122 + } 123 + 124 + static void rcu_free_flow_callback(struct rcu_head *rcu) 125 + { 126 + struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); 127 + 128 + flow_free(flow); 129 + } 130 + 131 + static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) 132 + { 133 + struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); 134 + 135 + kfree(mask); 136 + } 137 + 138 + static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) 139 + { 140 + if (!mask) 141 + return; 142 + 143 + BUG_ON(!mask->ref_count); 144 + mask->ref_count--; 145 + 146 + if (!mask->ref_count) { 147 + list_del_rcu(&mask->list); 148 + if (deferred) 149 + call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); 150 + else 151 + kfree(mask); 152 + } 153 + } 154 + 155 + void ovs_flow_free(struct sw_flow *flow, bool deferred) 156 + { 157 + if (!flow) 158 + return; 159 + 160 + flow_mask_del_ref(flow->mask, deferred); 161 + 162 + if (deferred) 163 + call_rcu(&flow->rcu, rcu_free_flow_callback); 164 + else 165 + flow_free(flow); 166 + } 167 + 168 + static void free_buckets(struct flex_array *buckets) 169 + { 170 + flex_array_free(buckets); 171 + } 172 + 173 + static void __table_instance_destroy(struct table_instance *ti) 174 + { 175 + int i; 176 + 177 + if (ti->keep_flows) 178 + goto skip_flows; 179 + 180 + for (i = 0; i < ti->n_buckets; i++) { 181 + struct sw_flow *flow; 182 + struct hlist_head *head = flex_array_get(ti->buckets, i); 183 + struct hlist_node *n; 184 + int ver = ti->node_ver; 185 + 186 + hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { 187 + hlist_del(&flow->hash_node[ver]); 188 + ovs_flow_free(flow, false); 189 + } 190 + } 191 + 192 + skip_flows: 193 + free_buckets(ti->buckets); 194 + kfree(ti); 195 + } 196 + 197 + static struct table_instance *table_instance_alloc(int new_size) 198 + { 199 + struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL); 200 + 201 + if (!ti) 202 + return NULL; 203 + 204 + ti->buckets = alloc_buckets(new_size); 205 + 206 + if (!ti->buckets) { 207 + kfree(ti); 208 + return NULL; 209 + } 210 + ti->n_buckets = new_size; 211 + ti->node_ver = 0; 212 + ti->keep_flows = false; 213 + get_random_bytes(&ti->hash_seed, sizeof(u32)); 214 + 215 + return ti; 216 + } 217 + 218 + int ovs_flow_tbl_init(struct flow_table *table) 219 + { 220 + struct table_instance *ti; 221 + 222 + ti = table_instance_alloc(TBL_MIN_BUCKETS); 223 + 224 + if (!ti) 225 + return -ENOMEM; 226 + 227 + rcu_assign_pointer(table->ti, ti); 228 + INIT_LIST_HEAD(&table->mask_list); 229 + table->last_rehash = jiffies; 230 + table->count = 0; 231 + return 0; 232 + } 233 + 234 + static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) 235 + { 236 + struct table_instance *ti = container_of(rcu, struct table_instance, rcu); 237 + 238 + __table_instance_destroy(ti); 239 + } 240 + 241 + static void table_instance_destroy(struct table_instance *ti, bool deferred) 242 + { 243 + if (!ti) 244 + return; 245 + 246 + if (deferred) 247 + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); 248 + else 249 + __table_instance_destroy(ti); 250 + } 251 + 252 + void ovs_flow_tbl_destroy(struct flow_table *table) 253 + { 254 + struct table_instance *ti = ovsl_dereference(table->ti); 255 + 256 + table_instance_destroy(ti, false); 257 + } 258 + 259 + struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, 260 + u32 *bucket, u32 *last) 261 + { 262 + struct sw_flow *flow; 263 + struct hlist_head *head; 264 + int ver; 265 + int i; 266 + 267 + ver = ti->node_ver; 268 + while (*bucket < ti->n_buckets) { 269 + i = 0; 270 + head = flex_array_get(ti->buckets, *bucket); 271 + hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { 272 + if (i < *last) { 273 + i++; 274 + continue; 275 + } 276 + *last = i + 1; 277 + return flow; 278 + } 279 + (*bucket)++; 280 + *last = 0; 281 + } 282 + 283 + return NULL; 284 + } 285 + 286 + static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) 287 + { 288 + hash = jhash_1word(hash, ti->hash_seed); 289 + return flex_array_get(ti->buckets, 290 + (hash & (ti->n_buckets - 1))); 291 + } 292 + 293 + static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow) 294 + { 295 + struct hlist_head *head; 296 + 297 + head = find_bucket(ti, flow->hash); 298 + hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head); 299 + } 300 + 301 + static void flow_table_copy_flows(struct table_instance *old, 302 + struct table_instance *new) 303 + { 304 + int old_ver; 305 + int i; 306 + 307 + old_ver = old->node_ver; 308 + new->node_ver = !old_ver; 309 + 310 + /* Insert in new table. */ 311 + for (i = 0; i < old->n_buckets; i++) { 312 + struct sw_flow *flow; 313 + struct hlist_head *head; 314 + 315 + head = flex_array_get(old->buckets, i); 316 + 317 + hlist_for_each_entry(flow, head, hash_node[old_ver]) 318 + table_instance_insert(new, flow); 319 + } 320 + 321 + old->keep_flows = true; 322 + } 323 + 324 + static struct table_instance *table_instance_rehash(struct table_instance *ti, 325 + int n_buckets) 326 + { 327 + struct table_instance *new_ti; 328 + 329 + new_ti = table_instance_alloc(n_buckets); 330 + if (!new_ti) 331 + return NULL; 332 + 333 + flow_table_copy_flows(ti, new_ti); 334 + 335 + return new_ti; 336 + } 337 + 338 + int ovs_flow_tbl_flush(struct flow_table *flow_table) 339 + { 340 + struct table_instance *old_ti; 341 + struct table_instance *new_ti; 342 + 343 + old_ti = ovsl_dereference(flow_table->ti); 344 + new_ti = table_instance_alloc(TBL_MIN_BUCKETS); 345 + if (!new_ti) 346 + return -ENOMEM; 347 + 348 + rcu_assign_pointer(flow_table->ti, new_ti); 349 + flow_table->last_rehash = jiffies; 350 + flow_table->count = 0; 351 + 352 + table_instance_destroy(old_ti, true); 353 + return 0; 354 + } 355 + 356 + static u32 flow_hash(const struct sw_flow_key *key, int key_start, 357 + int key_end) 358 + { 359 + u32 *hash_key = (u32 *)((u8 *)key + key_start); 360 + int hash_u32s = (key_end - key_start) >> 2; 361 + 362 + /* Make sure number of hash bytes are multiple of u32. */ 363 + BUILD_BUG_ON(sizeof(long) % sizeof(u32)); 364 + 365 + return jhash2(hash_key, hash_u32s, 0); 366 + } 367 + 368 + static int flow_key_start(const struct sw_flow_key *key) 369 + { 370 + if (key->tun_key.ipv4_dst) 371 + return 0; 372 + else 373 + return rounddown(offsetof(struct sw_flow_key, phy), 374 + sizeof(long)); 375 + } 376 + 377 + static bool cmp_key(const struct sw_flow_key *key1, 378 + const struct sw_flow_key *key2, 379 + int key_start, int key_end) 380 + { 381 + const long *cp1 = (long *)((u8 *)key1 + key_start); 382 + const long *cp2 = (long *)((u8 *)key2 + key_start); 383 + long diffs = 0; 384 + int i; 385 + 386 + for (i = key_start; i < key_end; i += sizeof(long)) 387 + diffs |= *cp1++ ^ *cp2++; 388 + 389 + return diffs == 0; 390 + } 391 + 392 + static bool flow_cmp_masked_key(const struct sw_flow *flow, 393 + const struct sw_flow_key *key, 394 + int key_start, int key_end) 395 + { 396 + return cmp_key(&flow->key, key, key_start, key_end); 397 + } 398 + 399 + bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, 400 + struct sw_flow_match *match) 401 + { 402 + struct sw_flow_key *key = match->key; 403 + int key_start = flow_key_start(key); 404 + int key_end = match->range.end; 405 + 406 + return cmp_key(&flow->unmasked_key, key, key_start, key_end); 407 + } 408 + 409 + static struct sw_flow *masked_flow_lookup(struct table_instance *ti, 410 + const struct sw_flow_key *unmasked, 411 + struct sw_flow_mask *mask) 412 + { 413 + struct sw_flow *flow; 414 + struct hlist_head *head; 415 + int key_start = mask->range.start; 416 + int key_end = mask->range.end; 417 + u32 hash; 418 + struct sw_flow_key masked_key; 419 + 420 + ovs_flow_mask_key(&masked_key, unmasked, mask); 421 + hash = flow_hash(&masked_key, key_start, key_end); 422 + head = find_bucket(ti, hash); 423 + hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { 424 + if (flow->mask == mask && flow->hash == hash && 425 + flow_cmp_masked_key(flow, &masked_key, 426 + key_start, key_end)) 427 + return flow; 428 + } 429 + return NULL; 430 + } 431 + 432 + struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, 433 + const struct sw_flow_key *key, 434 + u32 *n_mask_hit) 435 + { 436 + struct table_instance *ti = rcu_dereference(tbl->ti); 437 + struct sw_flow_mask *mask; 438 + struct sw_flow *flow; 439 + 440 + *n_mask_hit = 0; 441 + list_for_each_entry_rcu(mask, &tbl->mask_list, list) { 442 + (*n_mask_hit)++; 443 + flow = masked_flow_lookup(ti, key, mask); 444 + if (flow) /* Found */ 445 + return flow; 446 + } 447 + return NULL; 448 + } 449 + 450 + int ovs_flow_tbl_num_masks(const struct flow_table *table) 451 + { 452 + struct sw_flow_mask *mask; 453 + int num = 0; 454 + 455 + list_for_each_entry(mask, &table->mask_list, list) 456 + num++; 457 + 458 + return num; 459 + } 460 + 461 + static struct table_instance *table_instance_expand(struct table_instance *ti) 462 + { 463 + return table_instance_rehash(ti, ti->n_buckets * 2); 464 + } 465 + 466 + void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) 467 + { 468 + struct table_instance *ti = ovsl_dereference(table->ti); 469 + 470 + BUG_ON(table->count == 0); 471 + hlist_del_rcu(&flow->hash_node[ti->node_ver]); 472 + table->count--; 473 + } 474 + 475 + static struct sw_flow_mask *mask_alloc(void) 476 + { 477 + struct sw_flow_mask *mask; 478 + 479 + mask = kmalloc(sizeof(*mask), GFP_KERNEL); 480 + if (mask) 481 + mask->ref_count = 0; 482 + 483 + return mask; 484 + } 485 + 486 + static void mask_add_ref(struct sw_flow_mask *mask) 487 + { 488 + mask->ref_count++; 489 + } 490 + 491 + static bool mask_equal(const struct sw_flow_mask *a, 492 + const struct sw_flow_mask *b) 493 + { 494 + u8 *a_ = (u8 *)&a->key + a->range.start; 495 + u8 *b_ = (u8 *)&b->key + b->range.start; 496 + 497 + return (a->range.end == b->range.end) 498 + && (a->range.start == b->range.start) 499 + && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); 500 + } 501 + 502 + static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, 503 + const struct sw_flow_mask *mask) 504 + { 505 + struct list_head *ml; 506 + 507 + list_for_each(ml, &tbl->mask_list) { 508 + struct sw_flow_mask *m; 509 + m = container_of(ml, struct sw_flow_mask, list); 510 + if (mask_equal(mask, m)) 511 + return m; 512 + } 513 + 514 + return NULL; 515 + } 516 + 517 + /** 518 + * add a new mask into the mask list. 519 + * The caller needs to make sure that 'mask' is not the same 520 + * as any masks that are already on the list. 521 + */ 522 + static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, 523 + struct sw_flow_mask *new) 524 + { 525 + struct sw_flow_mask *mask; 526 + mask = flow_mask_find(tbl, new); 527 + if (!mask) { 528 + /* Allocate a new mask if none exsits. */ 529 + mask = mask_alloc(); 530 + if (!mask) 531 + return -ENOMEM; 532 + mask->key = new->key; 533 + mask->range = new->range; 534 + list_add_rcu(&mask->list, &tbl->mask_list); 535 + } 536 + 537 + mask_add_ref(mask); 538 + flow->mask = mask; 539 + return 0; 540 + } 541 + 542 + int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, 543 + struct sw_flow_mask *mask) 544 + { 545 + struct table_instance *new_ti = NULL; 546 + struct table_instance *ti; 547 + int err; 548 + 549 + err = flow_mask_insert(table, flow, mask); 550 + if (err) 551 + return err; 552 + 553 + flow->hash = flow_hash(&flow->key, flow->mask->range.start, 554 + flow->mask->range.end); 555 + ti = ovsl_dereference(table->ti); 556 + table_instance_insert(ti, flow); 557 + table->count++; 558 + 559 + /* Expand table, if necessary, to make room. */ 560 + if (table->count > ti->n_buckets) 561 + new_ti = table_instance_expand(ti); 562 + else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) 563 + new_ti = table_instance_rehash(ti, ti->n_buckets); 564 + 565 + if (new_ti) { 566 + rcu_assign_pointer(table->ti, new_ti); 567 + table_instance_destroy(ti, true); 568 + table->last_rehash = jiffies; 569 + } 570 + return 0; 571 + } 572 + 573 + /* Initializes the flow module. 574 + * Returns zero if successful or a negative error code. */ 575 + int ovs_flow_init(void) 576 + { 577 + BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); 578 + BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); 579 + 580 + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, 581 + 0, NULL); 582 + if (flow_cache == NULL) 583 + return -ENOMEM; 584 + 585 + return 0; 586 + } 587 + 588 + /* Uninitializes the flow module. */ 589 + void ovs_flow_exit(void) 590 + { 591 + kmem_cache_destroy(flow_cache); 592 + }
+81
net/openvswitch/flow_table.h
··· 1 + /* 2 + * Copyright (c) 2007-2013 Nicira, Inc. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of version 2 of the GNU General Public 6 + * License as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, but 9 + * WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 + * General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program; if not, write to the Free Software 15 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 + * 02110-1301, USA 17 + */ 18 + 19 + #ifndef FLOW_TABLE_H 20 + #define FLOW_TABLE_H 1 21 + 22 + #include <linux/kernel.h> 23 + #include <linux/netlink.h> 24 + #include <linux/openvswitch.h> 25 + #include <linux/spinlock.h> 26 + #include <linux/types.h> 27 + #include <linux/rcupdate.h> 28 + #include <linux/if_ether.h> 29 + #include <linux/in6.h> 30 + #include <linux/jiffies.h> 31 + #include <linux/time.h> 32 + #include <linux/flex_array.h> 33 + 34 + #include <net/inet_ecn.h> 35 + #include <net/ip_tunnels.h> 36 + 37 + #include "flow.h" 38 + 39 + struct table_instance { 40 + struct flex_array *buckets; 41 + unsigned int n_buckets; 42 + struct rcu_head rcu; 43 + int node_ver; 44 + u32 hash_seed; 45 + bool keep_flows; 46 + }; 47 + 48 + struct flow_table { 49 + struct table_instance __rcu *ti; 50 + struct list_head mask_list; 51 + unsigned long last_rehash; 52 + unsigned int count; 53 + }; 54 + 55 + int ovs_flow_init(void); 56 + void ovs_flow_exit(void); 57 + 58 + struct sw_flow *ovs_flow_alloc(void); 59 + void ovs_flow_free(struct sw_flow *, bool deferred); 60 + 61 + int ovs_flow_tbl_init(struct flow_table *); 62 + int ovs_flow_tbl_count(struct flow_table *table); 63 + void ovs_flow_tbl_destroy(struct flow_table *table); 64 + int ovs_flow_tbl_flush(struct flow_table *flow_table); 65 + 66 + int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, 67 + struct sw_flow_mask *mask); 68 + void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); 69 + int ovs_flow_tbl_num_masks(const struct flow_table *table); 70 + struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, 71 + u32 *bucket, u32 *idx); 72 + struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, 73 + const struct sw_flow_key *, 74 + u32 *n_mask_hit); 75 + 76 + bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, 77 + struct sw_flow_match *match); 78 + 79 + void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, 80 + const struct sw_flow_mask *mask); 81 + #endif /* flow_table.h */
-2
net/openvswitch/vport-gre.c
··· 24 24 #include <linux/if_tunnel.h> 25 25 #include <linux/if_vlan.h> 26 26 #include <linux/in.h> 27 - #include <linux/if_vlan.h> 28 - #include <linux/in.h> 29 27 #include <linux/in_route.h> 30 28 #include <linux/inetdevice.h> 31 29 #include <linux/jhash.h>
+1 -1
net/openvswitch/vport-internal_dev.c
··· 134 134 netdev->tx_queue_len = 0; 135 135 136 136 netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | 137 - NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; 137 + NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 138 138 139 139 netdev->vlan_features = netdev->features; 140 140 netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
-1
net/openvswitch/vport-vxlan.c
··· 29 29 #include <net/ip.h> 30 30 #include <net/udp.h> 31 31 #include <net/ip_tunnels.h> 32 - #include <net/udp.h> 33 32 #include <net/rtnetlink.h> 34 33 #include <net/route.h> 35 34 #include <net/dsfield.h>