Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch

Jesse Gross says:

====================
[GIT net-next] Open vSwitch

Open vSwitch changes for net-next/3.14. Highlights are:
* Performance improvements in the mechanism to get packets to userspace
using memory mapped netlink and skb zero copy where appropriate.
* Per-cpu flow stats in situations where flows are likely to be shared
across CPUs. Standard flow stats are used in other situations to save
memory and allocation time.
* A handful of code cleanups and rationalization.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+483 -213
+3
include/linux/skbuff.h
··· 2445 2445 struct pipe_inode_info *pipe, unsigned int len, 2446 2446 unsigned int flags); 2447 2447 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); 2448 + unsigned int skb_zerocopy_headlen(const struct sk_buff *from); 2449 + void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, 2450 + int len, int hlen); 2448 2451 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); 2449 2452 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); 2450 2453 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
+4
include/net/genetlink.h
··· 73 73 * @attrs: netlink attributes 74 74 * @_net: network namespace 75 75 * @user_ptr: user pointers 76 + * @dst_sk: destination socket 76 77 */ 77 78 struct genl_info { 78 79 u32 snd_seq; ··· 86 85 struct net * _net; 87 86 #endif 88 87 void * user_ptr[2]; 88 + struct sock * dst_sk; 89 89 }; 90 90 91 91 static inline struct net *genl_info_net(struct genl_info *info) ··· 179 177 struct sk_buff *skb, struct net *net, u32 portid, 180 178 u32 group, struct nlmsghdr *nlh, gfp_t flags); 181 179 180 + struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, 181 + gfp_t flags); 182 182 void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, 183 183 struct genl_family *family, int flags, u8 cmd); 184 184
+13 -1
include/uapi/linux/openvswitch.h
··· 40 40 41 41 #define OVS_DATAPATH_FAMILY "ovs_datapath" 42 42 #define OVS_DATAPATH_MCGROUP "ovs_datapath" 43 - #define OVS_DATAPATH_VERSION 0x1 43 + 44 + /* V2: 45 + * - API users are expected to provide OVS_DP_ATTR_USER_FEATURES 46 + * when creating the datapath. 47 + */ 48 + #define OVS_DATAPATH_VERSION 2 49 + 50 + /* First OVS datapath version to support features */ 51 + #define OVS_DP_VER_FEATURES 2 44 52 45 53 enum ovs_datapath_cmd { 46 54 OVS_DP_CMD_UNSPEC, ··· 83 75 OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ 84 76 OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ 85 77 OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ 78 + OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ 86 79 __OVS_DP_ATTR_MAX 87 80 }; 88 81 ··· 114 105 __u64 rx_dropped; /* no space in linux buffers */ 115 106 __u64 tx_dropped; /* no space available in linux */ 116 107 }; 108 + 109 + /* Allow last Netlink attribute to be unaligned */ 110 + #define OVS_DP_F_UNALIGNED (1 << 0) 117 111 118 112 /* Fixed logical ports. */ 119 113 #define OVSP_LOCAL ((__u32)0)
+85
net/core/skbuff.c
··· 2121 2121 } 2122 2122 EXPORT_SYMBOL(skb_copy_and_csum_bits); 2123 2123 2124 + /** 2125 + * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() 2126 + * @from: source buffer 2127 + * 2128 + * Calculates the amount of linear headroom needed in the 'to' skb passed 2129 + * into skb_zerocopy(). 2130 + */ 2131 + unsigned int 2132 + skb_zerocopy_headlen(const struct sk_buff *from) 2133 + { 2134 + unsigned int hlen = 0; 2135 + 2136 + if (!from->head_frag || 2137 + skb_headlen(from) < L1_CACHE_BYTES || 2138 + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) 2139 + hlen = skb_headlen(from); 2140 + 2141 + if (skb_has_frag_list(from)) 2142 + hlen = from->len; 2143 + 2144 + return hlen; 2145 + } 2146 + EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); 2147 + 2148 + /** 2149 + * skb_zerocopy - Zero copy skb to skb 2150 + * @to: destination buffer 2151 + * @source: source buffer 2152 + * @len: number of bytes to copy from source buffer 2153 + * @hlen: size of linear headroom in destination buffer 2154 + * 2155 + * Copies up to `len` bytes from `from` to `to` by creating references 2156 + * to the frags in the source buffer. 2157 + * 2158 + * The `hlen` as calculated by skb_zerocopy_headlen() specifies the 2159 + * headroom in the `to` buffer. 2160 + */ 2161 + void 2162 + skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) 2163 + { 2164 + int i, j = 0; 2165 + int plen = 0; /* length of skb->head fragment */ 2166 + struct page *page; 2167 + unsigned int offset; 2168 + 2169 + BUG_ON(!from->head_frag && !hlen); 2170 + 2171 + /* dont bother with small payloads */ 2172 + if (len <= skb_tailroom(to)) { 2173 + skb_copy_bits(from, 0, skb_put(to, len), len); 2174 + return; 2175 + } 2176 + 2177 + if (hlen) { 2178 + skb_copy_bits(from, 0, skb_put(to, hlen), hlen); 2179 + len -= hlen; 2180 + } else { 2181 + plen = min_t(int, skb_headlen(from), len); 2182 + if (plen) { 2183 + page = virt_to_head_page(from->head); 2184 + offset = from->data - (unsigned char *)page_address(page); 2185 + __skb_fill_page_desc(to, 0, page, offset, plen); 2186 + get_page(page); 2187 + j = 1; 2188 + len -= plen; 2189 + } 2190 + } 2191 + 2192 + to->truesize += len + plen; 2193 + to->len += len + plen; 2194 + to->data_len += len + plen; 2195 + 2196 + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { 2197 + if (!len) 2198 + break; 2199 + skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; 2200 + skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); 2201 + len -= skb_shinfo(to)->frags[j].size; 2202 + skb_frag_ref(to, j); 2203 + j++; 2204 + } 2205 + skb_shinfo(to)->nr_frags = j; 2206 + } 2207 + EXPORT_SYMBOL_GPL(skb_zerocopy); 2208 + 2124 2209 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 2125 2210 { 2126 2211 __wsum csum;
+4
net/netlink/af_netlink.c
··· 1773 1773 if (ring->pg_vec == NULL) 1774 1774 goto out_put; 1775 1775 1776 + if (ring->frame_size - NL_MMAP_HDRLEN < size) 1777 + goto out_put; 1778 + 1776 1779 skb = alloc_skb_head(gfp_mask); 1777 1780 if (skb == NULL) 1778 1781 goto err1; ··· 1785 1782 if (ring->pg_vec == NULL) 1786 1783 goto out_free; 1787 1784 1785 + /* check again under lock */ 1788 1786 maxlen = ring->frame_size - NL_MMAP_HDRLEN; 1789 1787 if (maxlen < size) 1790 1788 goto out_free;
+21
net/netlink/genetlink.c
··· 461 461 EXPORT_SYMBOL(genl_unregister_family); 462 462 463 463 /** 464 + * genlmsg_new_unicast - Allocate generic netlink message for unicast 465 + * @payload: size of the message payload 466 + * @info: information on destination 467 + * @flags: the type of memory to allocate 468 + * 469 + * Allocates a new sk_buff large enough to cover the specified payload 470 + * plus required Netlink headers. Will check receiving socket for 471 + * memory mapped i/o capability and use it if enabled. Will fall back 472 + * to non-mapped skb if message size exceeds the frame size of the ring. 473 + */ 474 + struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, 475 + gfp_t flags) 476 + { 477 + size_t len = nlmsg_total_size(genlmsg_total_size(payload)); 478 + 479 + return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags); 480 + } 481 + EXPORT_SYMBOL_GPL(genlmsg_new_unicast); 482 + 483 + /** 464 484 * genlmsg_put - Add generic netlink header to netlink message 465 485 * @skb: socket buffer holding the message 466 486 * @portid: netlink portid the message is addressed to ··· 620 600 info.genlhdr = nlmsg_data(nlh); 621 601 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; 622 602 info.attrs = attrbuf; 603 + info.dst_sk = skb->sk; 623 604 genl_info_net_set(&info, net); 624 605 memset(&info.user_ptr, 0, sizeof(info.user_ptr)); 625 606
+128 -103
net/openvswitch/datapath.c
··· 108 108 #endif 109 109 110 110 static struct vport *new_vport(const struct vport_parms *); 111 - static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, 111 + static int queue_gso_packets(struct datapath *dp, struct sk_buff *, 112 112 const struct dp_upcall_info *); 113 - static int queue_userspace_packet(struct net *, int dp_ifindex, 114 - struct sk_buff *, 113 + static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, 115 114 const struct dp_upcall_info *); 116 115 117 116 /* Must be called with rcu_read_lock or ovs_mutex. */ ··· 132 133 } 133 134 134 135 /* Must be called with rcu_read_lock or ovs_mutex. */ 135 - const char *ovs_dp_name(const struct datapath *dp) 136 + static const char *ovs_dp_name(const struct datapath *dp) 136 137 { 137 138 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); 138 139 return vport->ops->get_name(vport); ··· 233 234 } 234 235 235 236 /* Look up flow. */ 236 - flow = ovs_flow_tbl_lookup(&dp->table, &key, &n_mask_hit); 237 + flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit); 237 238 if (unlikely(!flow)) { 238 239 struct dp_upcall_info upcall; 239 240 ··· 250 251 OVS_CB(skb)->flow = flow; 251 252 OVS_CB(skb)->pkt_key = &key; 252 253 253 - stats_counter = &stats->n_hit; 254 - ovs_flow_used(OVS_CB(skb)->flow, skb); 254 + ovs_flow_stats_update(OVS_CB(skb)->flow, skb); 255 255 ovs_execute_actions(dp, skb); 256 + stats_counter = &stats->n_hit; 256 257 257 258 out: 258 259 /* Update datapath statistics. */ ··· 276 277 const struct dp_upcall_info *upcall_info) 277 278 { 278 279 struct dp_stats_percpu *stats; 279 - int dp_ifindex; 280 280 int err; 281 281 282 282 if (upcall_info->portid == 0) { ··· 283 285 goto err; 284 286 } 285 287 286 - dp_ifindex = get_dpifindex(dp); 287 - if (!dp_ifindex) { 288 - err = -ENODEV; 289 - goto err; 290 - } 291 - 292 288 if (!skb_is_gso(skb)) 293 - err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); 289 + err = queue_userspace_packet(dp, skb, upcall_info); 294 290 else 295 - err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); 291 + err = queue_gso_packets(dp, skb, upcall_info); 296 292 if (err) 297 293 goto err; 298 294 ··· 302 310 return err; 303 311 } 304 312 305 - static int queue_gso_packets(struct net *net, int dp_ifindex, 306 - struct sk_buff *skb, 313 + static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, 307 314 const struct dp_upcall_info *upcall_info) 308 315 { 309 316 unsigned short gso_type = skb_shinfo(skb)->gso_type; ··· 311 320 struct sk_buff *segs, *nskb; 312 321 int err; 313 322 314 - segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); 323 + segs = __skb_gso_segment(skb, NETIF_F_SG, false); 315 324 if (IS_ERR(segs)) 316 325 return PTR_ERR(segs); 317 326 318 327 /* Queue all of the segments. */ 319 328 skb = segs; 320 329 do { 321 - err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); 330 + err = queue_userspace_packet(dp, skb, upcall_info); 322 331 if (err) 323 332 break; 324 333 ··· 371 380 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 372 381 } 373 382 374 - static size_t upcall_msg_size(const struct sk_buff *skb, 375 - const struct nlattr *userdata) 383 + static size_t upcall_msg_size(const struct nlattr *userdata, 384 + unsigned int hdrlen) 376 385 { 377 386 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) 378 - + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */ 387 + + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ 379 388 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ 380 389 381 390 /* OVS_PACKET_ATTR_USERDATA */ ··· 385 394 return size; 386 395 } 387 396 388 - static int queue_userspace_packet(struct net *net, int dp_ifindex, 389 - struct sk_buff *skb, 397 + static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 390 398 const struct dp_upcall_info *upcall_info) 391 399 { 392 400 struct ovs_header *upcall; 393 401 struct sk_buff *nskb = NULL; 394 402 struct sk_buff *user_skb; /* to be queued to userspace */ 395 403 struct nlattr *nla; 396 - int err; 404 + struct genl_info info = { 405 + .dst_sk = ovs_dp_get_net(dp)->genl_sock, 406 + .snd_portid = upcall_info->portid, 407 + }; 408 + size_t len; 409 + unsigned int hlen; 410 + int err, dp_ifindex; 411 + 412 + dp_ifindex = get_dpifindex(dp); 413 + if (!dp_ifindex) 414 + return -ENODEV; 397 415 398 416 if (vlan_tx_tag_present(skb)) { 399 417 nskb = skb_clone(skb, GFP_ATOMIC); ··· 422 422 goto out; 423 423 } 424 424 425 - user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC); 425 + /* Complete checksum if needed */ 426 + if (skb->ip_summed == CHECKSUM_PARTIAL && 427 + (err = skb_checksum_help(skb))) 428 + goto out; 429 + 430 + /* Older versions of OVS user space enforce alignment of the last 431 + * Netlink attribute to NLA_ALIGNTO which would require extensive 432 + * padding logic. Only perform zerocopy if padding is not required. 433 + */ 434 + if (dp->user_features & OVS_DP_F_UNALIGNED) 435 + hlen = skb_zerocopy_headlen(skb); 436 + else 437 + hlen = skb->len; 438 + 439 + len = upcall_msg_size(upcall_info->userdata, hlen); 440 + user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); 426 441 if (!user_skb) { 427 442 err = -ENOMEM; 428 443 goto out; ··· 456 441 nla_len(upcall_info->userdata), 457 442 nla_data(upcall_info->userdata)); 458 443 459 - nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); 444 + /* Only reserve room for attribute header, packet data is added 445 + * in skb_zerocopy() */ 446 + if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 447 + err = -ENOBUFS; 448 + goto out; 449 + } 450 + nla->nla_len = nla_attr_size(skb->len); 460 451 461 - skb_copy_and_csum_dev(skb, nla_data(nla)); 452 + skb_zerocopy(user_skb, skb, skb->len, hlen); 462 453 463 - genlmsg_end(user_skb, upcall); 464 - err = genlmsg_unicast(net, user_skb, upcall_info->portid); 454 + ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; 465 455 456 + err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); 466 457 out: 467 458 kfree_skb(nskb); 468 459 return err; 469 - } 470 - 471 - static void clear_stats(struct sw_flow *flow) 472 - { 473 - flow->used = 0; 474 - flow->tcp_flags = 0; 475 - flow->packet_count = 0; 476 - flow->byte_count = 0; 477 460 } 478 461 479 462 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) ··· 512 499 packet->protocol = htons(ETH_P_802_2); 513 500 514 501 /* Build an sw_flow for sending this packet. */ 515 - flow = ovs_flow_alloc(); 502 + flow = ovs_flow_alloc(false); 516 503 err = PTR_ERR(flow); 517 504 if (IS_ERR(flow)) 518 505 goto err_kfree_skb; ··· 648 635 const int skb_orig_len = skb->len; 649 636 struct nlattr *start; 650 637 struct ovs_flow_stats stats; 638 + __be16 tcp_flags; 639 + unsigned long used; 651 640 struct ovs_header *ovs_header; 652 641 struct nlattr *nla; 653 - unsigned long used; 654 - u8 tcp_flags; 655 642 int err; 656 643 657 644 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); ··· 680 667 681 668 nla_nest_end(skb, nla); 682 669 683 - spin_lock_bh(&flow->lock); 684 - used = flow->used; 685 - stats.n_packets = flow->packet_count; 686 - stats.n_bytes = flow->byte_count; 687 - tcp_flags = (u8)ntohs(flow->tcp_flags); 688 - spin_unlock_bh(&flow->lock); 689 - 670 + ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); 690 671 if (used && 691 672 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) 692 673 goto nla_put_failure; 693 674 694 675 if (stats.n_packets && 695 - nla_put(skb, OVS_FLOW_ATTR_STATS, 696 - sizeof(struct ovs_flow_stats), &stats)) 676 + nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) 697 677 goto nla_put_failure; 698 678 699 - if (tcp_flags && 700 - nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags)) 679 + if ((u8)ntohs(tcp_flags) && 680 + nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) 701 681 goto nla_put_failure; 702 682 703 683 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if ··· 707 701 if (start) { 708 702 const struct sw_flow_actions *sf_acts; 709 703 710 - sf_acts = rcu_dereference_check(flow->sf_acts, 711 - lockdep_ovsl_is_held()); 704 + sf_acts = rcu_dereference_ovsl(flow->sf_acts); 712 705 713 706 err = ovs_nla_put_actions(sf_acts->actions, 714 707 sf_acts->actions_len, skb); ··· 731 726 return err; 732 727 } 733 728 734 - static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) 729 + static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, 730 + struct genl_info *info) 735 731 { 736 - const struct sw_flow_actions *sf_acts; 732 + size_t len; 737 733 738 - sf_acts = ovsl_dereference(flow->sf_acts); 734 + len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); 739 735 740 - return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL); 736 + return genlmsg_new_unicast(len, info, GFP_KERNEL); 741 737 } 742 738 743 739 static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, 744 740 struct datapath *dp, 745 - u32 portid, u32 seq, u8 cmd) 741 + struct genl_info *info, 742 + u8 cmd) 746 743 { 747 744 struct sk_buff *skb; 748 745 int retval; 749 746 750 - skb = ovs_flow_cmd_alloc_info(flow); 747 + skb = ovs_flow_cmd_alloc_info(flow, info); 751 748 if (!skb) 752 749 return ERR_PTR(-ENOMEM); 753 750 754 - retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); 751 + retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, 752 + info->snd_seq, 0, cmd); 755 753 BUG_ON(retval < 0); 756 754 return skb; 757 - } 758 - 759 - static struct sw_flow *__ovs_flow_tbl_lookup(struct flow_table *tbl, 760 - const struct sw_flow_key *key) 761 - { 762 - u32 __always_unused n_mask_hit; 763 - 764 - return ovs_flow_tbl_lookup(tbl, key, &n_mask_hit); 765 755 } 766 756 767 757 static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ··· 770 770 struct datapath *dp; 771 771 struct sw_flow_actions *acts = NULL; 772 772 struct sw_flow_match match; 773 + bool exact_5tuple; 773 774 int error; 774 775 775 776 /* Extract key. */ ··· 779 778 goto error; 780 779 781 780 ovs_match_init(&match, &key, &mask); 782 - error = ovs_nla_get_match(&match, 781 + error = ovs_nla_get_match(&match, &exact_5tuple, 783 782 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 784 783 if (error) 785 784 goto error; ··· 810 809 goto err_unlock_ovs; 811 810 812 811 /* Check if this is a duplicate flow */ 813 - flow = __ovs_flow_tbl_lookup(&dp->table, &key); 812 + flow = ovs_flow_tbl_lookup(&dp->table, &key); 814 813 if (!flow) { 815 814 /* Bail out if we're not allowed to create a new flow. */ 816 815 error = -ENOENT; ··· 818 817 goto err_unlock_ovs; 819 818 820 819 /* Allocate flow. */ 821 - flow = ovs_flow_alloc(); 820 + flow = ovs_flow_alloc(!exact_5tuple); 822 821 if (IS_ERR(flow)) { 823 822 error = PTR_ERR(flow); 824 823 goto err_unlock_ovs; 825 824 } 826 - clear_stats(flow); 827 825 828 826 flow->key = masked_key; 829 827 flow->unmasked_key = key; ··· 835 835 goto err_flow_free; 836 836 } 837 837 838 - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 839 - info->snd_seq, OVS_FLOW_CMD_NEW); 838 + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); 840 839 } else { 841 840 /* We found a matching flow. */ 842 841 struct sw_flow_actions *old_acts; ··· 863 864 rcu_assign_pointer(flow->sf_acts, acts); 864 865 ovs_nla_free_flow_actions(old_acts); 865 866 866 - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 867 - info->snd_seq, OVS_FLOW_CMD_NEW); 867 + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); 868 868 869 869 /* Clear stats. */ 870 - if (a[OVS_FLOW_ATTR_CLEAR]) { 871 - spin_lock_bh(&flow->lock); 872 - clear_stats(flow); 873 - spin_unlock_bh(&flow->lock); 874 - } 870 + if (a[OVS_FLOW_ATTR_CLEAR]) 871 + ovs_flow_stats_clear(flow); 875 872 } 876 873 ovs_unlock(); 877 874 ··· 905 910 } 906 911 907 912 ovs_match_init(&match, &key, NULL); 908 - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 913 + err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); 909 914 if (err) 910 915 return err; 911 916 ··· 916 921 goto unlock; 917 922 } 918 923 919 - flow = __ovs_flow_tbl_lookup(&dp->table, &key); 924 + flow = ovs_flow_tbl_lookup(&dp->table, &key); 920 925 if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { 921 926 err = -ENOENT; 922 927 goto unlock; 923 928 } 924 929 925 - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 926 - info->snd_seq, OVS_FLOW_CMD_NEW); 930 + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); 927 931 if (IS_ERR(reply)) { 928 932 err = PTR_ERR(reply); 929 933 goto unlock; ··· 959 965 } 960 966 961 967 ovs_match_init(&match, &key, NULL); 962 - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 968 + err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); 963 969 if (err) 964 970 goto unlock; 965 971 966 - flow = __ovs_flow_tbl_lookup(&dp->table, &key); 972 + flow = ovs_flow_tbl_lookup(&dp->table, &key); 967 973 if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { 968 974 err = -ENOENT; 969 975 goto unlock; 970 976 } 971 977 972 - reply = ovs_flow_cmd_alloc_info(flow); 978 + reply = ovs_flow_cmd_alloc_info(flow, info); 973 979 if (!reply) { 974 980 err = -ENOMEM; 975 981 goto unlock; ··· 1055 1061 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { 1056 1062 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 1057 1063 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 1064 + [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, 1058 1065 }; 1059 1066 1060 1067 static struct genl_family dp_datapath_genl_family = { ··· 1114 1119 &dp_megaflow_stats)) 1115 1120 goto nla_put_failure; 1116 1121 1122 + if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) 1123 + goto nla_put_failure; 1124 + 1117 1125 return genlmsg_end(skb, ovs_header); 1118 1126 1119 1127 nla_put_failure: ··· 1125 1127 return -EMSGSIZE; 1126 1128 } 1127 1129 1128 - static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, 1129 - u32 seq, u8 cmd) 1130 + static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, 1131 + struct genl_info *info, u8 cmd) 1130 1132 { 1131 1133 struct sk_buff *skb; 1132 1134 int retval; 1133 1135 1134 - skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); 1136 + skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); 1135 1137 if (!skb) 1136 1138 return ERR_PTR(-ENOMEM); 1137 1139 1138 - retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); 1140 + retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd); 1139 1141 if (retval < 0) { 1140 1142 kfree_skb(skb); 1141 1143 return ERR_PTR(retval); ··· 1161 1163 rcu_read_unlock(); 1162 1164 } 1163 1165 return dp ? dp : ERR_PTR(-ENODEV); 1166 + } 1167 + 1168 + static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) 1169 + { 1170 + struct datapath *dp; 1171 + 1172 + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1173 + if (!dp) 1174 + return; 1175 + 1176 + WARN(dp->user_features, "Dropping previously announced user features\n"); 1177 + dp->user_features = 0; 1178 + } 1179 + 1180 + static void ovs_dp_change(struct datapath *dp, struct nlattr **a) 1181 + { 1182 + if (a[OVS_DP_ATTR_USER_FEATURES]) 1183 + dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); 1164 1184 } 1165 1185 1166 1186 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ··· 1239 1223 parms.port_no = OVSP_LOCAL; 1240 1224 parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); 1241 1225 1226 + ovs_dp_change(dp, a); 1227 + 1242 1228 vport = new_vport(&parms); 1243 1229 if (IS_ERR(vport)) { 1244 1230 err = PTR_ERR(vport); 1245 1231 if (err == -EBUSY) 1246 1232 err = -EEXIST; 1247 1233 1234 + if (err == -EEXIST) { 1235 + /* An outdated user space instance that does not understand 1236 + * the concept of user_features has attempted to create a new 1237 + * datapath and is likely to reuse it. Drop all user features. 1238 + */ 1239 + if (info->genlhdr->version < OVS_DP_VER_FEATURES) 1240 + ovs_dp_reset_user_features(skb, info); 1241 + } 1242 + 1248 1243 goto err_destroy_ports_array; 1249 1244 } 1250 1245 1251 - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1252 - info->snd_seq, OVS_DP_CMD_NEW); 1246 + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); 1253 1247 err = PTR_ERR(reply); 1254 1248 if (IS_ERR(reply)) 1255 1249 goto err_destroy_local_port; ··· 1325 1299 if (IS_ERR(dp)) 1326 1300 goto unlock; 1327 1301 1328 - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1329 - info->snd_seq, OVS_DP_CMD_DEL); 1302 + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL); 1330 1303 err = PTR_ERR(reply); 1331 1304 if (IS_ERR(reply)) 1332 1305 goto unlock; ··· 1353 1328 if (IS_ERR(dp)) 1354 1329 goto unlock; 1355 1330 1356 - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1357 - info->snd_seq, OVS_DP_CMD_NEW); 1331 + ovs_dp_change(dp, info->attrs); 1332 + 1333 + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); 1358 1334 if (IS_ERR(reply)) { 1359 1335 err = PTR_ERR(reply); 1360 1336 genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0, ··· 1386 1360 goto unlock; 1387 1361 } 1388 1362 1389 - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1390 - info->snd_seq, OVS_DP_CMD_NEW); 1363 + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); 1391 1364 if (IS_ERR(reply)) { 1392 1365 err = PTR_ERR(reply); 1393 1366 goto unlock; ··· 1466 1441 .parallel_ops = true, 1467 1442 }; 1468 1443 1469 - struct genl_multicast_group ovs_dp_vport_multicast_group = { 1444 + static struct genl_multicast_group ovs_dp_vport_multicast_group = { 1470 1445 .name = OVS_VPORT_MCGROUP 1471 1446 }; 1472 1447
+4 -2
net/openvswitch/datapath.h
··· 88 88 /* Network namespace ref. */ 89 89 struct net *net; 90 90 #endif 91 + 92 + u32 user_features; 91 93 }; 92 94 93 95 /** ··· 147 145 #define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held())) 148 146 #define ovsl_dereference(p) \ 149 147 rcu_dereference_protected(p, lockdep_ovsl_is_held()) 148 + #define rcu_dereference_ovsl(p) \ 149 + rcu_dereference_check(p, lockdep_ovsl_is_held()) 150 150 151 151 static inline struct net *ovs_dp_get_net(struct datapath *dp) 152 152 { ··· 182 178 183 179 extern struct notifier_block ovs_dp_device_notifier; 184 180 extern struct genl_family dp_vport_genl_family; 185 - extern struct genl_multicast_group ovs_dp_vport_multicast_group; 186 181 187 182 void ovs_dp_process_received_packet(struct vport *, struct sk_buff *); 188 183 void ovs_dp_detach_port(struct vport *); 189 184 int ovs_dp_upcall(struct datapath *, struct sk_buff *, 190 185 const struct dp_upcall_info *); 191 186 192 - const char *ovs_dp_name(const struct datapath *dp); 193 187 struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, 194 188 u8 cmd); 195 189
+89 -7
net/openvswitch/flow.c
··· 35 35 #include <linux/ip.h> 36 36 #include <linux/ipv6.h> 37 37 #include <linux/sctp.h> 38 + #include <linux/smp.h> 38 39 #include <linux/tcp.h> 39 40 #include <linux/udp.h> 40 41 #include <linux/icmp.h> ··· 61 60 62 61 #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) 63 62 64 - void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) 63 + void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) 65 64 { 65 + struct flow_stats *stats; 66 66 __be16 tcp_flags = 0; 67 + 68 + if (!flow->stats.is_percpu) 69 + stats = flow->stats.stat; 70 + else 71 + stats = this_cpu_ptr(flow->stats.cpu_stats); 67 72 68 73 if ((flow->key.eth.type == htons(ETH_P_IP) || 69 74 flow->key.eth.type == htons(ETH_P_IPV6)) && ··· 78 71 tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); 79 72 } 80 73 81 - spin_lock(&flow->lock); 82 - flow->used = jiffies; 83 - flow->packet_count++; 84 - flow->byte_count += skb->len; 85 - flow->tcp_flags |= tcp_flags; 86 - spin_unlock(&flow->lock); 74 + spin_lock(&stats->lock); 75 + stats->used = jiffies; 76 + stats->packet_count++; 77 + stats->byte_count += skb->len; 78 + stats->tcp_flags |= tcp_flags; 79 + spin_unlock(&stats->lock); 80 + } 81 + 82 + static void stats_read(struct flow_stats *stats, 83 + struct ovs_flow_stats *ovs_stats, 84 + unsigned long *used, __be16 *tcp_flags) 85 + { 86 + spin_lock(&stats->lock); 87 + if (time_after(stats->used, *used)) 88 + *used = stats->used; 89 + *tcp_flags |= stats->tcp_flags; 90 + ovs_stats->n_packets += stats->packet_count; 91 + ovs_stats->n_bytes += stats->byte_count; 92 + spin_unlock(&stats->lock); 93 + } 94 + 95 + void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, 96 + unsigned long *used, __be16 *tcp_flags) 97 + { 98 + int cpu, cur_cpu; 99 + 100 + *used = 0; 101 + *tcp_flags = 0; 102 + memset(ovs_stats, 0, sizeof(*ovs_stats)); 103 + 104 + if (!flow->stats.is_percpu) { 105 + stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); 106 + } else { 107 + cur_cpu = get_cpu(); 108 + for_each_possible_cpu(cpu) { 109 + struct flow_stats *stats; 110 + 111 + if (cpu == cur_cpu) 112 + local_bh_disable(); 113 + 114 + stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); 115 + stats_read(stats, ovs_stats, used, tcp_flags); 116 + 117 + if (cpu == cur_cpu) 118 + local_bh_enable(); 119 + } 120 + put_cpu(); 121 + } 122 + } 123 + 124 + static void stats_reset(struct flow_stats *stats) 125 + { 126 + spin_lock(&stats->lock); 127 + stats->used = 0; 128 + stats->packet_count = 0; 129 + stats->byte_count = 0; 130 + stats->tcp_flags = 0; 131 + spin_unlock(&stats->lock); 132 + } 133 + 134 + void ovs_flow_stats_clear(struct sw_flow *flow) 135 + { 136 + int cpu, cur_cpu; 137 + 138 + if (!flow->stats.is_percpu) { 139 + stats_reset(flow->stats.stat); 140 + } else { 141 + cur_cpu = get_cpu(); 142 + 143 + for_each_possible_cpu(cpu) { 144 + 145 + if (cpu == cur_cpu) 146 + local_bh_disable(); 147 + 148 + stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); 149 + 150 + if (cpu == cur_cpu) 151 + local_bh_enable(); 152 + } 153 + put_cpu(); 154 + } 87 155 } 88 156 89 157 static int check_header(struct sk_buff *skb, int len)
+24 -9
net/openvswitch/flow.h
··· 19 19 #ifndef FLOW_H 20 20 #define FLOW_H 1 21 21 22 + #include <linux/cache.h> 22 23 #include <linux/kernel.h> 23 24 #include <linux/netlink.h> 24 25 #include <linux/openvswitch.h> ··· 123 122 } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ 124 123 125 124 struct sw_flow_key_range { 126 - size_t start; 127 - size_t end; 125 + unsigned short int start; 126 + unsigned short int end; 128 127 }; 129 128 130 129 struct sw_flow_mask { ··· 147 146 struct nlattr actions[]; 148 147 }; 149 148 149 + struct flow_stats { 150 + u64 packet_count; /* Number of packets matched. */ 151 + u64 byte_count; /* Number of bytes matched. */ 152 + unsigned long used; /* Last used time (in jiffies). */ 153 + spinlock_t lock; /* Lock for atomic stats update. */ 154 + __be16 tcp_flags; /* Union of seen TCP flags. */ 155 + }; 156 + 157 + struct sw_flow_stats { 158 + bool is_percpu; 159 + union { 160 + struct flow_stats *stat; 161 + struct flow_stats __percpu *cpu_stats; 162 + }; 163 + }; 164 + 150 165 struct sw_flow { 151 166 struct rcu_head rcu; 152 167 struct hlist_node hash_node[2]; ··· 172 155 struct sw_flow_key unmasked_key; 173 156 struct sw_flow_mask *mask; 174 157 struct sw_flow_actions __rcu *sf_acts; 175 - 176 - spinlock_t lock; /* Lock for values below. */ 177 - unsigned long used; /* Last used time (in jiffies). */ 178 - u64 packet_count; /* Number of packets matched. */ 179 - u64 byte_count; /* Number of bytes matched. */ 180 - __be16 tcp_flags; /* Union of seen TCP flags. */ 158 + struct sw_flow_stats stats; 181 159 }; 182 160 183 161 struct arp_eth_header { ··· 189 177 unsigned char ar_tip[4]; /* target IP address */ 190 178 } __packed; 191 179 192 - void ovs_flow_used(struct sw_flow *, struct sk_buff *); 180 + void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb); 181 + void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats, 182 + unsigned long *used, __be16 *tcp_flags); 183 + void ovs_flow_stats_clear(struct sw_flow *flow); 193 184 u64 ovs_flow_used_time(unsigned long flow_jiffies); 194 185 195 186 int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
+53 -13
net/openvswitch/flow_netlink.c
··· 266 266 return true; 267 267 } 268 268 269 + static bool is_all_set(const u8 *fp, size_t size) 270 + { 271 + int i; 272 + 273 + if (!fp) 274 + return false; 275 + 276 + for (i = 0; i < size; i++) 277 + if (fp[i] != 0xff) 278 + return false; 279 + 280 + return true; 281 + } 282 + 269 283 static int __parse_flow_nlattrs(const struct nlattr *attr, 270 284 const struct nlattr *a[], 271 285 u64 *attrsp, bool nz) ··· 501 487 return 0; 502 488 } 503 489 504 - static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, 505 - const struct nlattr **a, bool is_mask) 490 + static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple, 491 + u64 attrs, const struct nlattr **a, 492 + bool is_mask) 506 493 { 507 494 int err; 508 495 u64 orig_attrs = attrs; ··· 560 545 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); 561 546 } 562 547 548 + if (is_mask && exact_5tuple) { 549 + if (match->mask->key.eth.type != htons(0xffff)) 550 + *exact_5tuple = false; 551 + } 552 + 563 553 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { 564 554 const struct ovs_key_ipv4 *ipv4_key; 565 555 ··· 587 567 SW_FLOW_KEY_PUT(match, ipv4.addr.dst, 588 568 ipv4_key->ipv4_dst, is_mask); 589 569 attrs &= ~(1 << OVS_KEY_ATTR_IPV4); 570 + 571 + if (is_mask && exact_5tuple && *exact_5tuple) { 572 + if (ipv4_key->ipv4_proto != 0xff || 573 + ipv4_key->ipv4_src != htonl(0xffffffff) || 574 + ipv4_key->ipv4_dst != htonl(0xffffffff)) 575 + *exact_5tuple = false; 576 + } 590 577 } 591 578 592 579 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { ··· 625 598 is_mask); 626 599 627 600 attrs &= ~(1 << OVS_KEY_ATTR_IPV6); 601 + 602 + if (is_mask && exact_5tuple && *exact_5tuple) { 603 + if (ipv6_key->ipv6_proto != 0xff || 604 + !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) || 605 + !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst))) 606 + *exact_5tuple = false; 607 + } 628 608 } 629 609 630 610 if (attrs & (1 << OVS_KEY_ATTR_ARP)) { ··· 674 640 tcp_key->tcp_dst, is_mask); 675 641 } 676 642 attrs &= ~(1 << OVS_KEY_ATTR_TCP); 643 + 644 + if (is_mask && exact_5tuple && *exact_5tuple && 645 + (tcp_key->tcp_src != htons(0xffff) || 646 + tcp_key->tcp_dst != htons(0xffff))) 647 + *exact_5tuple = false; 677 648 } 678 649 679 650 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { ··· 710 671 udp_key->udp_dst, is_mask); 711 672 } 712 673 attrs &= ~(1 << OVS_KEY_ATTR_UDP); 674 + 675 + if (is_mask && exact_5tuple && *exact_5tuple && 676 + (udp_key->udp_src != htons(0xffff) || 677 + udp_key->udp_dst != htons(0xffff))) 678 + *exact_5tuple = false; 713 679 } 714 680 715 681 if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { ··· 800 756 * attribute specifies the mask field of the wildcarded flow. 801 757 */ 802 758 int ovs_nla_get_match(struct sw_flow_match *match, 759 + bool *exact_5tuple, 803 760 const struct nlattr *key, 804 761 const struct nlattr *mask) 805 762 { ··· 848 803 } 849 804 } 850 805 851 - err = ovs_key_from_nlattrs(match, key_attrs, a, false); 806 + err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false); 852 807 if (err) 853 808 return err; 809 + 810 + if (exact_5tuple) 811 + *exact_5tuple = true; 854 812 855 813 if (mask) { 856 814 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); ··· 892 844 } 893 845 } 894 846 895 - err = ovs_key_from_nlattrs(match, mask_attrs, a, true); 847 + err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true); 896 848 if (err) 897 849 return err; 898 850 } else { ··· 1176 1128 return sfa; 1177 1129 } 1178 1130 1179 - /* RCU callback used by ovs_nla_free_flow_actions. */ 1180 - static void rcu_free_acts_callback(struct rcu_head *rcu) 1181 - { 1182 - struct sw_flow_actions *sf_acts = container_of(rcu, 1183 - struct sw_flow_actions, rcu); 1184 - kfree(sf_acts); 1185 - } 1186 - 1187 1131 /* Schedules 'sf_acts' to be freed after the next RCU grace period. 1188 1132 * The caller must hold rcu_read_lock for this to be sensible. */ 1189 1133 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) 1190 1134 { 1191 - call_rcu(&sf_acts->rcu, rcu_free_acts_callback); 1135 + kfree_rcu(sf_acts, rcu); 1192 1136 } 1193 1137 1194 1138 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
+1
net/openvswitch/flow_netlink.h
··· 45 45 int ovs_nla_get_flow_metadata(struct sw_flow *flow, 46 46 const struct nlattr *attr); 47 47 int ovs_nla_get_match(struct sw_flow_match *match, 48 + bool *exact_5tuple, 48 49 const struct nlattr *, 49 50 const struct nlattr *); 50 51
+41 -19
net/openvswitch/flow_table.c
··· 44 44 #include <net/ipv6.h> 45 45 #include <net/ndisc.h> 46 46 47 - #include "datapath.h" 48 - 49 47 #define TBL_MIN_BUCKETS 1024 50 48 #define REHASH_INTERVAL (10 * 60 * HZ) 51 49 ··· 70 72 *d++ = *s++ & *m++; 71 73 } 72 74 73 - struct sw_flow *ovs_flow_alloc(void) 75 + struct sw_flow *ovs_flow_alloc(bool percpu_stats) 74 76 { 75 77 struct sw_flow *flow; 78 + int cpu; 76 79 77 80 flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); 78 81 if (!flow) 79 82 return ERR_PTR(-ENOMEM); 80 83 81 - spin_lock_init(&flow->lock); 82 84 flow->sf_acts = NULL; 83 85 flow->mask = NULL; 84 86 87 + flow->stats.is_percpu = percpu_stats; 88 + 89 + if (!percpu_stats) { 90 + flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL); 91 + if (!flow->stats.stat) 92 + goto err; 93 + 94 + spin_lock_init(&flow->stats.stat->lock); 95 + } else { 96 + flow->stats.cpu_stats = alloc_percpu(struct flow_stats); 97 + if (!flow->stats.cpu_stats) 98 + goto err; 99 + 100 + for_each_possible_cpu(cpu) { 101 + struct flow_stats *cpu_stats; 102 + 103 + cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); 104 + spin_lock_init(&cpu_stats->lock); 105 + } 106 + } 85 107 return flow; 108 + err: 109 + kfree(flow); 110 + return ERR_PTR(-ENOMEM); 86 111 } 87 112 88 113 int ovs_flow_tbl_count(struct flow_table *table) ··· 139 118 static void flow_free(struct sw_flow *flow) 140 119 { 141 120 kfree((struct sf_flow_acts __force *)flow->sf_acts); 121 + if (flow->stats.is_percpu) 122 + free_percpu(flow->stats.cpu_stats); 123 + else 124 + kfree(flow->stats.stat); 142 125 kmem_cache_free(flow_cache, flow); 143 126 } 144 127 ··· 151 126 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); 152 127 153 128 flow_free(flow); 154 - } 155 - 156 - static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) 157 - { 158 - struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); 159 - 160 - kfree(mask); 161 129 } 162 130 163 131 static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) ··· 164 146 if (!mask->ref_count) { 165 147 list_del_rcu(&mask->list); 166 148 if (deferred) 167 - call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); 149 + kfree_rcu(mask, rcu); 168 150 else 169 151 kfree(mask); 170 152 } ··· 447 429 return NULL; 448 430 } 449 431 450 - struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, 432 + struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, 451 433 const struct sw_flow_key *key, 452 434 u32 *n_mask_hit) 453 435 { 454 - struct table_instance *ti = rcu_dereference(tbl->ti); 436 + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); 455 437 struct sw_flow_mask *mask; 456 438 struct sw_flow *flow; 457 439 ··· 463 445 return flow; 464 446 } 465 447 return NULL; 448 + } 449 + 450 + struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, 451 + const struct sw_flow_key *key) 452 + { 453 + u32 __always_unused n_mask_hit; 454 + 455 + return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); 466 456 } 467 457 468 458 int ovs_flow_tbl_num_masks(const struct flow_table *table) ··· 540 514 return NULL; 541 515 } 542 516 543 - /** 544 - * add a new mask into the mask list. 545 - * The caller needs to make sure that 'mask' is not the same 546 - * as any masks that are already on the list. 547 - */ 517 + /* Add 'mask' into the mask list, if it is not already there. */ 548 518 static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, 549 519 struct sw_flow_mask *new) 550 520 {
+4 -2
net/openvswitch/flow_table.h
··· 55 55 int ovs_flow_init(void); 56 56 void ovs_flow_exit(void); 57 57 58 - struct sw_flow *ovs_flow_alloc(void); 58 + struct sw_flow *ovs_flow_alloc(bool percpu_stats); 59 59 void ovs_flow_free(struct sw_flow *, bool deferred); 60 60 61 61 int ovs_flow_tbl_init(struct flow_table *); ··· 69 69 int ovs_flow_tbl_num_masks(const struct flow_table *table); 70 70 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, 71 71 u32 *bucket, u32 *idx); 72 - struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, 72 + struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, 73 73 const struct sw_flow_key *, 74 74 u32 *n_mask_hit); 75 + struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, 76 + const struct sw_flow_key *); 75 77 76 78 bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, 77 79 struct sw_flow_match *match);
+5 -1
net/openvswitch/vport.c
··· 33 33 #include "vport.h" 34 34 #include "vport-internal_dev.h" 35 35 36 + static void ovs_vport_record_error(struct vport *, 37 + enum vport_err_type err_type); 38 + 36 39 /* List of statically compiled vport implementations. Don't forget to also 37 40 * add yours to the list at the bottom of vport.h. */ 38 41 static const struct vport_ops *vport_ops_list[] = { ··· 399 396 * If using the vport generic stats layer indicate that an error of the given 400 397 * type has occurred. 401 398 */ 402 - void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) 399 + static void ovs_vport_record_error(struct vport *vport, 400 + enum vport_err_type err_type) 403 401 { 404 402 spin_lock(&vport->stats_lock); 405 403
-1
net/openvswitch/vport.h
··· 192 192 193 193 void ovs_vport_receive(struct vport *, struct sk_buff *, 194 194 struct ovs_key_ipv4_tunnel *); 195 - void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); 196 195 197 196 /* List of statically compiled vport implementations. Don't forget to also 198 197 * add yours to the list at the top of vport.c. */