at v3.17 52 kB view raw
1/* 2 * Copyright (c) 2007-2014 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21#include <linux/init.h> 22#include <linux/module.h> 23#include <linux/if_arp.h> 24#include <linux/if_vlan.h> 25#include <linux/in.h> 26#include <linux/ip.h> 27#include <linux/jhash.h> 28#include <linux/delay.h> 29#include <linux/time.h> 30#include <linux/etherdevice.h> 31#include <linux/genetlink.h> 32#include <linux/kernel.h> 33#include <linux/kthread.h> 34#include <linux/mutex.h> 35#include <linux/percpu.h> 36#include <linux/rcupdate.h> 37#include <linux/tcp.h> 38#include <linux/udp.h> 39#include <linux/ethtool.h> 40#include <linux/wait.h> 41#include <asm/div64.h> 42#include <linux/highmem.h> 43#include <linux/netfilter_bridge.h> 44#include <linux/netfilter_ipv4.h> 45#include <linux/inetdevice.h> 46#include <linux/list.h> 47#include <linux/openvswitch.h> 48#include <linux/rculist.h> 49#include <linux/dmi.h> 50#include <net/genetlink.h> 51#include <net/net_namespace.h> 52#include <net/netns/generic.h> 53 54#include "datapath.h" 55#include "flow.h" 56#include "flow_table.h" 57#include "flow_netlink.h" 58#include "vport-internal_dev.h" 59#include "vport-netdev.h" 60 61int ovs_net_id __read_mostly; 62 63static struct genl_family dp_packet_genl_family; 64static struct genl_family dp_flow_genl_family; 65static struct genl_family dp_datapath_genl_family; 66 67static const struct genl_multicast_group ovs_dp_flow_multicast_group = { 68 .name = OVS_FLOW_MCGROUP, 69}; 70 71static const struct genl_multicast_group ovs_dp_datapath_multicast_group = { 72 .name = OVS_DATAPATH_MCGROUP, 73}; 74 75static const struct genl_multicast_group ovs_dp_vport_multicast_group = { 76 .name = OVS_VPORT_MCGROUP, 77}; 78 79/* Check if need to build a reply message. 80 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ 81static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, 82 unsigned int group) 83{ 84 return info->nlhdr->nlmsg_flags & NLM_F_ECHO || 85 genl_has_listeners(family, genl_info_net(info)->genl_sock, 86 group); 87} 88 89static void ovs_notify(struct genl_family *family, 90 struct sk_buff *skb, struct genl_info *info) 91{ 92 genl_notify(family, skb, genl_info_net(info), info->snd_portid, 93 0, info->nlhdr, GFP_KERNEL); 94} 95 96/** 97 * DOC: Locking: 98 * 99 * All writes e.g. Writes to device state (add/remove datapath, port, set 100 * operations on vports, etc.), Writes to other state (flow table 101 * modifications, set miscellaneous datapath parameters, etc.) are protected 102 * by ovs_lock. 103 * 104 * Reads are protected by RCU. 105 * 106 * There are a few special cases (mostly stats) that have their own 107 * synchronization but they nest under all of above and don't interact with 108 * each other. 109 * 110 * The RTNL lock nests inside ovs_mutex. 111 */ 112 113static DEFINE_MUTEX(ovs_mutex); 114 115void ovs_lock(void) 116{ 117 mutex_lock(&ovs_mutex); 118} 119 120void ovs_unlock(void) 121{ 122 mutex_unlock(&ovs_mutex); 123} 124 125#ifdef CONFIG_LOCKDEP 126int lockdep_ovsl_is_held(void) 127{ 128 if (debug_locks) 129 return lockdep_is_held(&ovs_mutex); 130 else 131 return 1; 132} 133#endif 134 135static struct vport *new_vport(const struct vport_parms *); 136static int queue_gso_packets(struct datapath *dp, struct sk_buff *, 137 const struct dp_upcall_info *); 138static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, 139 const struct dp_upcall_info *); 140 141/* Must be called with rcu_read_lock or ovs_mutex. */ 142static struct datapath *get_dp(struct net *net, int dp_ifindex) 143{ 144 struct datapath *dp = NULL; 145 struct net_device *dev; 146 147 rcu_read_lock(); 148 dev = dev_get_by_index_rcu(net, dp_ifindex); 149 if (dev) { 150 struct vport *vport = ovs_internal_dev_get_vport(dev); 151 if (vport) 152 dp = vport->dp; 153 } 154 rcu_read_unlock(); 155 156 return dp; 157} 158 159/* Must be called with rcu_read_lock or ovs_mutex. */ 160static const char *ovs_dp_name(const struct datapath *dp) 161{ 162 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); 163 return vport->ops->get_name(vport); 164} 165 166static int get_dpifindex(struct datapath *dp) 167{ 168 struct vport *local; 169 int ifindex; 170 171 rcu_read_lock(); 172 173 local = ovs_vport_rcu(dp, OVSP_LOCAL); 174 if (local) 175 ifindex = netdev_vport_priv(local)->dev->ifindex; 176 else 177 ifindex = 0; 178 179 rcu_read_unlock(); 180 181 return ifindex; 182} 183 184static void destroy_dp_rcu(struct rcu_head *rcu) 185{ 186 struct datapath *dp = container_of(rcu, struct datapath, rcu); 187 188 free_percpu(dp->stats_percpu); 189 release_net(ovs_dp_get_net(dp)); 190 kfree(dp->ports); 191 kfree(dp); 192} 193 194static struct hlist_head *vport_hash_bucket(const struct datapath *dp, 195 u16 port_no) 196{ 197 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; 198} 199 200/* Called with ovs_mutex or RCU read lock. */ 201struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) 202{ 203 struct vport *vport; 204 struct hlist_head *head; 205 206 head = vport_hash_bucket(dp, port_no); 207 hlist_for_each_entry_rcu(vport, head, dp_hash_node) { 208 if (vport->port_no == port_no) 209 return vport; 210 } 211 return NULL; 212} 213 214/* Called with ovs_mutex. */ 215static struct vport *new_vport(const struct vport_parms *parms) 216{ 217 struct vport *vport; 218 219 vport = ovs_vport_add(parms); 220 if (!IS_ERR(vport)) { 221 struct datapath *dp = parms->dp; 222 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); 223 224 hlist_add_head_rcu(&vport->dp_hash_node, head); 225 } 226 return vport; 227} 228 229void ovs_dp_detach_port(struct vport *p) 230{ 231 ASSERT_OVSL(); 232 233 /* First drop references to device. */ 234 hlist_del_rcu(&p->dp_hash_node); 235 236 /* Then destroy it. */ 237 ovs_vport_del(p); 238} 239 240/* Must be called with rcu_read_lock. */ 241void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) 242{ 243 struct datapath *dp = p->dp; 244 struct sw_flow *flow; 245 struct dp_stats_percpu *stats; 246 struct sw_flow_key key; 247 u64 *stats_counter; 248 u32 n_mask_hit; 249 int error; 250 251 stats = this_cpu_ptr(dp->stats_percpu); 252 253 /* Extract flow from 'skb' into 'key'. */ 254 error = ovs_flow_extract(skb, p->port_no, &key); 255 if (unlikely(error)) { 256 kfree_skb(skb); 257 return; 258 } 259 260 /* Look up flow. */ 261 flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit); 262 if (unlikely(!flow)) { 263 struct dp_upcall_info upcall; 264 265 upcall.cmd = OVS_PACKET_CMD_MISS; 266 upcall.key = &key; 267 upcall.userdata = NULL; 268 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 269 error = ovs_dp_upcall(dp, skb, &upcall); 270 if (unlikely(error)) 271 kfree_skb(skb); 272 else 273 consume_skb(skb); 274 stats_counter = &stats->n_missed; 275 goto out; 276 } 277 278 OVS_CB(skb)->flow = flow; 279 OVS_CB(skb)->pkt_key = &key; 280 281 ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb); 282 ovs_execute_actions(dp, skb); 283 stats_counter = &stats->n_hit; 284 285out: 286 /* Update datapath statistics. */ 287 u64_stats_update_begin(&stats->syncp); 288 (*stats_counter)++; 289 stats->n_mask_hit += n_mask_hit; 290 u64_stats_update_end(&stats->syncp); 291} 292 293int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 294 const struct dp_upcall_info *upcall_info) 295{ 296 struct dp_stats_percpu *stats; 297 int err; 298 299 if (upcall_info->portid == 0) { 300 err = -ENOTCONN; 301 goto err; 302 } 303 304 if (!skb_is_gso(skb)) 305 err = queue_userspace_packet(dp, skb, upcall_info); 306 else 307 err = queue_gso_packets(dp, skb, upcall_info); 308 if (err) 309 goto err; 310 311 return 0; 312 313err: 314 stats = this_cpu_ptr(dp->stats_percpu); 315 316 u64_stats_update_begin(&stats->syncp); 317 stats->n_lost++; 318 u64_stats_update_end(&stats->syncp); 319 320 return err; 321} 322 323static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, 324 const struct dp_upcall_info *upcall_info) 325{ 326 unsigned short gso_type = skb_shinfo(skb)->gso_type; 327 struct dp_upcall_info later_info; 328 struct sw_flow_key later_key; 329 struct sk_buff *segs, *nskb; 330 int err; 331 332 segs = __skb_gso_segment(skb, NETIF_F_SG, false); 333 if (IS_ERR(segs)) 334 return PTR_ERR(segs); 335 336 /* Queue all of the segments. */ 337 skb = segs; 338 do { 339 err = queue_userspace_packet(dp, skb, upcall_info); 340 if (err) 341 break; 342 343 if (skb == segs && gso_type & SKB_GSO_UDP) { 344 /* The initial flow key extracted by ovs_flow_extract() 345 * in this case is for a first fragment, so we need to 346 * properly mark later fragments. 347 */ 348 later_key = *upcall_info->key; 349 later_key.ip.frag = OVS_FRAG_TYPE_LATER; 350 351 later_info = *upcall_info; 352 later_info.key = &later_key; 353 upcall_info = &later_info; 354 } 355 } while ((skb = skb->next)); 356 357 /* Free all of the segments. */ 358 skb = segs; 359 do { 360 nskb = skb->next; 361 if (err) 362 kfree_skb(skb); 363 else 364 consume_skb(skb); 365 } while ((skb = nskb)); 366 return err; 367} 368 369static size_t key_attr_size(void) 370{ 371 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 372 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ 373 + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ 374 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ 375 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ 376 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ 377 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ 378 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ 379 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 380 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 381 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 382 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 383 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 384 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */ 385 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ 386 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 387 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ 388 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ 389 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 390} 391 392static size_t upcall_msg_size(const struct nlattr *userdata, 393 unsigned int hdrlen) 394{ 395 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) 396 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ 397 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ 398 399 /* OVS_PACKET_ATTR_USERDATA */ 400 if (userdata) 401 size += NLA_ALIGN(userdata->nla_len); 402 403 return size; 404} 405 406static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 407 const struct dp_upcall_info *upcall_info) 408{ 409 struct ovs_header *upcall; 410 struct sk_buff *nskb = NULL; 411 struct sk_buff *user_skb = NULL; /* to be queued to userspace */ 412 struct nlattr *nla; 413 struct genl_info info = { 414 .dst_sk = ovs_dp_get_net(dp)->genl_sock, 415 .snd_portid = upcall_info->portid, 416 }; 417 size_t len; 418 unsigned int hlen; 419 int err, dp_ifindex; 420 421 dp_ifindex = get_dpifindex(dp); 422 if (!dp_ifindex) 423 return -ENODEV; 424 425 if (vlan_tx_tag_present(skb)) { 426 nskb = skb_clone(skb, GFP_ATOMIC); 427 if (!nskb) 428 return -ENOMEM; 429 430 nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb)); 431 if (!nskb) 432 return -ENOMEM; 433 434 nskb->vlan_tci = 0; 435 skb = nskb; 436 } 437 438 if (nla_attr_size(skb->len) > USHRT_MAX) { 439 err = -EFBIG; 440 goto out; 441 } 442 443 /* Complete checksum if needed */ 444 if (skb->ip_summed == CHECKSUM_PARTIAL && 445 (err = skb_checksum_help(skb))) 446 goto out; 447 448 /* Older versions of OVS user space enforce alignment of the last 449 * Netlink attribute to NLA_ALIGNTO which would require extensive 450 * padding logic. Only perform zerocopy if padding is not required. 451 */ 452 if (dp->user_features & OVS_DP_F_UNALIGNED) 453 hlen = skb_zerocopy_headlen(skb); 454 else 455 hlen = skb->len; 456 457 len = upcall_msg_size(upcall_info->userdata, hlen); 458 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); 459 if (!user_skb) { 460 err = -ENOMEM; 461 goto out; 462 } 463 464 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 465 0, upcall_info->cmd); 466 upcall->dp_ifindex = dp_ifindex; 467 468 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); 469 err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); 470 BUG_ON(err); 471 nla_nest_end(user_skb, nla); 472 473 if (upcall_info->userdata) 474 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, 475 nla_len(upcall_info->userdata), 476 nla_data(upcall_info->userdata)); 477 478 /* Only reserve room for attribute header, packet data is added 479 * in skb_zerocopy() */ 480 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 481 err = -ENOBUFS; 482 goto out; 483 } 484 nla->nla_len = nla_attr_size(skb->len); 485 486 err = skb_zerocopy(user_skb, skb, skb->len, hlen); 487 if (err) 488 goto out; 489 490 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ 491 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { 492 size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len; 493 494 if (plen > 0) 495 memset(skb_put(user_skb, plen), 0, plen); 496 } 497 498 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; 499 500 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); 501 user_skb = NULL; 502out: 503 if (err) 504 skb_tx_error(skb); 505 kfree_skb(user_skb); 506 kfree_skb(nskb); 507 return err; 508} 509 510static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) 511{ 512 struct ovs_header *ovs_header = info->userhdr; 513 struct nlattr **a = info->attrs; 514 struct sw_flow_actions *acts; 515 struct sk_buff *packet; 516 struct sw_flow *flow; 517 struct datapath *dp; 518 struct ethhdr *eth; 519 int len; 520 int err; 521 522 err = -EINVAL; 523 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 524 !a[OVS_PACKET_ATTR_ACTIONS]) 525 goto err; 526 527 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 528 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); 529 err = -ENOMEM; 530 if (!packet) 531 goto err; 532 skb_reserve(packet, NET_IP_ALIGN); 533 534 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); 535 536 skb_reset_mac_header(packet); 537 eth = eth_hdr(packet); 538 539 /* Normally, setting the skb 'protocol' field would be handled by a 540 * call to eth_type_trans(), but it assumes there's a sending 541 * device, which we may not have. */ 542 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) 543 packet->protocol = eth->h_proto; 544 else 545 packet->protocol = htons(ETH_P_802_2); 546 547 /* Build an sw_flow for sending this packet. */ 548 flow = ovs_flow_alloc(); 549 err = PTR_ERR(flow); 550 if (IS_ERR(flow)) 551 goto err_kfree_skb; 552 553 err = ovs_flow_extract(packet, -1, &flow->key); 554 if (err) 555 goto err_flow_free; 556 557 err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]); 558 if (err) 559 goto err_flow_free; 560 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); 561 err = PTR_ERR(acts); 562 if (IS_ERR(acts)) 563 goto err_flow_free; 564 565 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], 566 &flow->key, 0, &acts); 567 rcu_assign_pointer(flow->sf_acts, acts); 568 if (err) 569 goto err_flow_free; 570 571 OVS_CB(packet)->flow = flow; 572 OVS_CB(packet)->pkt_key = &flow->key; 573 packet->priority = flow->key.phy.priority; 574 packet->mark = flow->key.phy.skb_mark; 575 576 rcu_read_lock(); 577 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 578 err = -ENODEV; 579 if (!dp) 580 goto err_unlock; 581 582 local_bh_disable(); 583 err = ovs_execute_actions(dp, packet); 584 local_bh_enable(); 585 rcu_read_unlock(); 586 587 ovs_flow_free(flow, false); 588 return err; 589 590err_unlock: 591 rcu_read_unlock(); 592err_flow_free: 593 ovs_flow_free(flow, false); 594err_kfree_skb: 595 kfree_skb(packet); 596err: 597 return err; 598} 599 600static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 601 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, 602 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 603 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 604}; 605 606static const struct genl_ops dp_packet_genl_ops[] = { 607 { .cmd = OVS_PACKET_CMD_EXECUTE, 608 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 609 .policy = packet_policy, 610 .doit = ovs_packet_cmd_execute 611 } 612}; 613 614static struct genl_family dp_packet_genl_family = { 615 .id = GENL_ID_GENERATE, 616 .hdrsize = sizeof(struct ovs_header), 617 .name = OVS_PACKET_FAMILY, 618 .version = OVS_PACKET_VERSION, 619 .maxattr = OVS_PACKET_ATTR_MAX, 620 .netnsok = true, 621 .parallel_ops = true, 622 .ops = dp_packet_genl_ops, 623 .n_ops = ARRAY_SIZE(dp_packet_genl_ops), 624}; 625 626static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, 627 struct ovs_dp_megaflow_stats *mega_stats) 628{ 629 int i; 630 631 memset(mega_stats, 0, sizeof(*mega_stats)); 632 633 stats->n_flows = ovs_flow_tbl_count(&dp->table); 634 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); 635 636 stats->n_hit = stats->n_missed = stats->n_lost = 0; 637 638 for_each_possible_cpu(i) { 639 const struct dp_stats_percpu *percpu_stats; 640 struct dp_stats_percpu local_stats; 641 unsigned int start; 642 643 percpu_stats = per_cpu_ptr(dp->stats_percpu, i); 644 645 do { 646 start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); 647 local_stats = *percpu_stats; 648 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); 649 650 stats->n_hit += local_stats.n_hit; 651 stats->n_missed += local_stats.n_missed; 652 stats->n_lost += local_stats.n_lost; 653 mega_stats->n_mask_hit += local_stats.n_mask_hit; 654 } 655} 656 657static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) 658{ 659 return NLMSG_ALIGN(sizeof(struct ovs_header)) 660 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ 661 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */ 662 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 663 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 664 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ 665 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ 666} 667 668/* Called with ovs_mutex or RCU read lock. */ 669static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, 670 struct sk_buff *skb, u32 portid, 671 u32 seq, u32 flags, u8 cmd) 672{ 673 const int skb_orig_len = skb->len; 674 struct nlattr *start; 675 struct ovs_flow_stats stats; 676 __be16 tcp_flags; 677 unsigned long used; 678 struct ovs_header *ovs_header; 679 struct nlattr *nla; 680 int err; 681 682 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); 683 if (!ovs_header) 684 return -EMSGSIZE; 685 686 ovs_header->dp_ifindex = dp_ifindex; 687 688 /* Fill flow key. */ 689 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); 690 if (!nla) 691 goto nla_put_failure; 692 693 err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); 694 if (err) 695 goto error; 696 nla_nest_end(skb, nla); 697 698 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); 699 if (!nla) 700 goto nla_put_failure; 701 702 err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); 703 if (err) 704 goto error; 705 706 nla_nest_end(skb, nla); 707 708 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); 709 710 if (used && 711 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) 712 goto nla_put_failure; 713 714 if (stats.n_packets && 715 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) 716 goto nla_put_failure; 717 718 if ((u8)ntohs(tcp_flags) && 719 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) 720 goto nla_put_failure; 721 722 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if 723 * this is the first flow to be dumped into 'skb'. This is unusual for 724 * Netlink but individual action lists can be longer than 725 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. 726 * The userspace caller can always fetch the actions separately if it 727 * really wants them. (Most userspace callers in fact don't care.) 728 * 729 * This can only fail for dump operations because the skb is always 730 * properly sized for single flows. 731 */ 732 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); 733 if (start) { 734 const struct sw_flow_actions *sf_acts; 735 736 sf_acts = rcu_dereference_ovsl(flow->sf_acts); 737 err = ovs_nla_put_actions(sf_acts->actions, 738 sf_acts->actions_len, skb); 739 740 if (!err) 741 nla_nest_end(skb, start); 742 else { 743 if (skb_orig_len) 744 goto error; 745 746 nla_nest_cancel(skb, start); 747 } 748 } else if (skb_orig_len) 749 goto nla_put_failure; 750 751 return genlmsg_end(skb, ovs_header); 752 753nla_put_failure: 754 err = -EMSGSIZE; 755error: 756 genlmsg_cancel(skb, ovs_header); 757 return err; 758} 759 760/* May not be called with RCU read lock. */ 761static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, 762 struct genl_info *info, 763 bool always) 764{ 765 struct sk_buff *skb; 766 767 if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) 768 return NULL; 769 770 skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); 771 if (!skb) 772 return ERR_PTR(-ENOMEM); 773 774 return skb; 775} 776 777/* Called with ovs_mutex. */ 778static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, 779 int dp_ifindex, 780 struct genl_info *info, u8 cmd, 781 bool always) 782{ 783 struct sk_buff *skb; 784 int retval; 785 786 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, 787 always); 788 if (IS_ERR_OR_NULL(skb)) 789 return skb; 790 791 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, 792 info->snd_portid, info->snd_seq, 0, 793 cmd); 794 BUG_ON(retval < 0); 795 return skb; 796} 797 798static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) 799{ 800 struct nlattr **a = info->attrs; 801 struct ovs_header *ovs_header = info->userhdr; 802 struct sw_flow *flow, *new_flow; 803 struct sw_flow_mask mask; 804 struct sk_buff *reply; 805 struct datapath *dp; 806 struct sw_flow_actions *acts; 807 struct sw_flow_match match; 808 int error; 809 810 /* Must have key and actions. */ 811 error = -EINVAL; 812 if (!a[OVS_FLOW_ATTR_KEY]) 813 goto error; 814 if (!a[OVS_FLOW_ATTR_ACTIONS]) 815 goto error; 816 817 /* Most of the time we need to allocate a new flow, do it before 818 * locking. 819 */ 820 new_flow = ovs_flow_alloc(); 821 if (IS_ERR(new_flow)) { 822 error = PTR_ERR(new_flow); 823 goto error; 824 } 825 826 /* Extract key. */ 827 ovs_match_init(&match, &new_flow->unmasked_key, &mask); 828 error = ovs_nla_get_match(&match, 829 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 830 if (error) 831 goto err_kfree_flow; 832 833 ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); 834 835 /* Validate actions. */ 836 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); 837 error = PTR_ERR(acts); 838 if (IS_ERR(acts)) 839 goto err_kfree_flow; 840 841 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, 842 0, &acts); 843 if (error) { 844 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 845 goto err_kfree_acts; 846 } 847 848 reply = ovs_flow_cmd_alloc_info(acts, info, false); 849 if (IS_ERR(reply)) { 850 error = PTR_ERR(reply); 851 goto err_kfree_acts; 852 } 853 854 ovs_lock(); 855 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 856 if (unlikely(!dp)) { 857 error = -ENODEV; 858 goto err_unlock_ovs; 859 } 860 /* Check if this is a duplicate flow */ 861 flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); 862 if (likely(!flow)) { 863 rcu_assign_pointer(new_flow->sf_acts, acts); 864 865 /* Put flow in bucket. */ 866 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); 867 if (unlikely(error)) { 868 acts = NULL; 869 goto err_unlock_ovs; 870 } 871 872 if (unlikely(reply)) { 873 error = ovs_flow_cmd_fill_info(new_flow, 874 ovs_header->dp_ifindex, 875 reply, info->snd_portid, 876 info->snd_seq, 0, 877 OVS_FLOW_CMD_NEW); 878 BUG_ON(error < 0); 879 } 880 ovs_unlock(); 881 } else { 882 struct sw_flow_actions *old_acts; 883 884 /* Bail out if we're not allowed to modify an existing flow. 885 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL 886 * because Generic Netlink treats the latter as a dump 887 * request. We also accept NLM_F_EXCL in case that bug ever 888 * gets fixed. 889 */ 890 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE 891 | NLM_F_EXCL))) { 892 error = -EEXIST; 893 goto err_unlock_ovs; 894 } 895 /* The unmasked key has to be the same for flow updates. */ 896 if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { 897 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 898 if (!flow) { 899 error = -ENOENT; 900 goto err_unlock_ovs; 901 } 902 } 903 /* Update actions. */ 904 old_acts = ovsl_dereference(flow->sf_acts); 905 rcu_assign_pointer(flow->sf_acts, acts); 906 907 if (unlikely(reply)) { 908 error = ovs_flow_cmd_fill_info(flow, 909 ovs_header->dp_ifindex, 910 reply, info->snd_portid, 911 info->snd_seq, 0, 912 OVS_FLOW_CMD_NEW); 913 BUG_ON(error < 0); 914 } 915 ovs_unlock(); 916 917 ovs_nla_free_flow_actions(old_acts); 918 ovs_flow_free(new_flow, false); 919 } 920 921 if (reply) 922 ovs_notify(&dp_flow_genl_family, reply, info); 923 return 0; 924 925err_unlock_ovs: 926 ovs_unlock(); 927 kfree_skb(reply); 928err_kfree_acts: 929 kfree(acts); 930err_kfree_flow: 931 ovs_flow_free(new_flow, false); 932error: 933 return error; 934} 935 936static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) 937{ 938 struct nlattr **a = info->attrs; 939 struct ovs_header *ovs_header = info->userhdr; 940 struct sw_flow_key key, masked_key; 941 struct sw_flow *flow; 942 struct sw_flow_mask mask; 943 struct sk_buff *reply = NULL; 944 struct datapath *dp; 945 struct sw_flow_actions *old_acts = NULL, *acts = NULL; 946 struct sw_flow_match match; 947 int error; 948 949 /* Extract key. */ 950 error = -EINVAL; 951 if (!a[OVS_FLOW_ATTR_KEY]) 952 goto error; 953 954 ovs_match_init(&match, &key, &mask); 955 error = ovs_nla_get_match(&match, 956 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 957 if (error) 958 goto error; 959 960 /* Validate actions. */ 961 if (a[OVS_FLOW_ATTR_ACTIONS]) { 962 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); 963 error = PTR_ERR(acts); 964 if (IS_ERR(acts)) 965 goto error; 966 967 ovs_flow_mask_key(&masked_key, &key, &mask); 968 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], 969 &masked_key, 0, &acts); 970 if (error) { 971 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 972 goto err_kfree_acts; 973 } 974 } 975 976 /* Can allocate before locking if have acts. */ 977 if (acts) { 978 reply = ovs_flow_cmd_alloc_info(acts, info, false); 979 if (IS_ERR(reply)) { 980 error = PTR_ERR(reply); 981 goto err_kfree_acts; 982 } 983 } 984 985 ovs_lock(); 986 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 987 if (unlikely(!dp)) { 988 error = -ENODEV; 989 goto err_unlock_ovs; 990 } 991 /* Check that the flow exists. */ 992 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 993 if (unlikely(!flow)) { 994 error = -ENOENT; 995 goto err_unlock_ovs; 996 } 997 998 /* Update actions, if present. */ 999 if (likely(acts)) { 1000 old_acts = ovsl_dereference(flow->sf_acts); 1001 rcu_assign_pointer(flow->sf_acts, acts); 1002 1003 if (unlikely(reply)) { 1004 error = ovs_flow_cmd_fill_info(flow, 1005 ovs_header->dp_ifindex, 1006 reply, info->snd_portid, 1007 info->snd_seq, 0, 1008 OVS_FLOW_CMD_NEW); 1009 BUG_ON(error < 0); 1010 } 1011 } else { 1012 /* Could not alloc without acts before locking. */ 1013 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, 1014 info, OVS_FLOW_CMD_NEW, false); 1015 if (unlikely(IS_ERR(reply))) { 1016 error = PTR_ERR(reply); 1017 goto err_unlock_ovs; 1018 } 1019 } 1020 1021 /* Clear stats. */ 1022 if (a[OVS_FLOW_ATTR_CLEAR]) 1023 ovs_flow_stats_clear(flow); 1024 ovs_unlock(); 1025 1026 if (reply) 1027 ovs_notify(&dp_flow_genl_family, reply, info); 1028 if (old_acts) 1029 ovs_nla_free_flow_actions(old_acts); 1030 1031 return 0; 1032 1033err_unlock_ovs: 1034 ovs_unlock(); 1035 kfree_skb(reply); 1036err_kfree_acts: 1037 kfree(acts); 1038error: 1039 return error; 1040} 1041 1042static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) 1043{ 1044 struct nlattr **a = info->attrs; 1045 struct ovs_header *ovs_header = info->userhdr; 1046 struct sw_flow_key key; 1047 struct sk_buff *reply; 1048 struct sw_flow *flow; 1049 struct datapath *dp; 1050 struct sw_flow_match match; 1051 int err; 1052 1053 if (!a[OVS_FLOW_ATTR_KEY]) { 1054 OVS_NLERR("Flow get message rejected, Key attribute missing.\n"); 1055 return -EINVAL; 1056 } 1057 1058 ovs_match_init(&match, &key, NULL); 1059 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1060 if (err) 1061 return err; 1062 1063 ovs_lock(); 1064 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1065 if (!dp) { 1066 err = -ENODEV; 1067 goto unlock; 1068 } 1069 1070 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1071 if (!flow) { 1072 err = -ENOENT; 1073 goto unlock; 1074 } 1075 1076 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, 1077 OVS_FLOW_CMD_NEW, true); 1078 if (IS_ERR(reply)) { 1079 err = PTR_ERR(reply); 1080 goto unlock; 1081 } 1082 1083 ovs_unlock(); 1084 return genlmsg_reply(reply, info); 1085unlock: 1086 ovs_unlock(); 1087 return err; 1088} 1089 1090static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1091{ 1092 struct nlattr **a = info->attrs; 1093 struct ovs_header *ovs_header = info->userhdr; 1094 struct sw_flow_key key; 1095 struct sk_buff *reply; 1096 struct sw_flow *flow; 1097 struct datapath *dp; 1098 struct sw_flow_match match; 1099 int err; 1100 1101 if (likely(a[OVS_FLOW_ATTR_KEY])) { 1102 ovs_match_init(&match, &key, NULL); 1103 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1104 if (unlikely(err)) 1105 return err; 1106 } 1107 1108 ovs_lock(); 1109 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1110 if (unlikely(!dp)) { 1111 err = -ENODEV; 1112 goto unlock; 1113 } 1114 1115 if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { 1116 err = ovs_flow_tbl_flush(&dp->table); 1117 goto unlock; 1118 } 1119 1120 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 1121 if (unlikely(!flow)) { 1122 err = -ENOENT; 1123 goto unlock; 1124 } 1125 1126 ovs_flow_tbl_remove(&dp->table, flow); 1127 ovs_unlock(); 1128 1129 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, 1130 info, false); 1131 if (likely(reply)) { 1132 if (likely(!IS_ERR(reply))) { 1133 rcu_read_lock(); /*To keep RCU checker happy. */ 1134 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, 1135 reply, info->snd_portid, 1136 info->snd_seq, 0, 1137 OVS_FLOW_CMD_DEL); 1138 rcu_read_unlock(); 1139 BUG_ON(err < 0); 1140 1141 ovs_notify(&dp_flow_genl_family, reply, info); 1142 } else { 1143 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply)); 1144 } 1145 } 1146 1147 ovs_flow_free(flow, true); 1148 return 0; 1149unlock: 1150 ovs_unlock(); 1151 return err; 1152} 1153 1154static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1155{ 1156 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1157 struct table_instance *ti; 1158 struct datapath *dp; 1159 1160 rcu_read_lock(); 1161 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1162 if (!dp) { 1163 rcu_read_unlock(); 1164 return -ENODEV; 1165 } 1166 1167 ti = rcu_dereference(dp->table.ti); 1168 for (;;) { 1169 struct sw_flow *flow; 1170 u32 bucket, obj; 1171 1172 bucket = cb->args[0]; 1173 obj = cb->args[1]; 1174 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); 1175 if (!flow) 1176 break; 1177 1178 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, 1179 NETLINK_CB(cb->skb).portid, 1180 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1181 OVS_FLOW_CMD_NEW) < 0) 1182 break; 1183 1184 cb->args[0] = bucket; 1185 cb->args[1] = obj; 1186 } 1187 rcu_read_unlock(); 1188 return skb->len; 1189} 1190 1191static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { 1192 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, 1193 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, 1194 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, 1195}; 1196 1197static const struct genl_ops dp_flow_genl_ops[] = { 1198 { .cmd = OVS_FLOW_CMD_NEW, 1199 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1200 .policy = flow_policy, 1201 .doit = ovs_flow_cmd_new 1202 }, 1203 { .cmd = OVS_FLOW_CMD_DEL, 1204 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1205 .policy = flow_policy, 1206 .doit = ovs_flow_cmd_del 1207 }, 1208 { .cmd = OVS_FLOW_CMD_GET, 1209 .flags = 0, /* OK for unprivileged users. */ 1210 .policy = flow_policy, 1211 .doit = ovs_flow_cmd_get, 1212 .dumpit = ovs_flow_cmd_dump 1213 }, 1214 { .cmd = OVS_FLOW_CMD_SET, 1215 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1216 .policy = flow_policy, 1217 .doit = ovs_flow_cmd_set, 1218 }, 1219}; 1220 1221static struct genl_family dp_flow_genl_family = { 1222 .id = GENL_ID_GENERATE, 1223 .hdrsize = sizeof(struct ovs_header), 1224 .name = OVS_FLOW_FAMILY, 1225 .version = OVS_FLOW_VERSION, 1226 .maxattr = OVS_FLOW_ATTR_MAX, 1227 .netnsok = true, 1228 .parallel_ops = true, 1229 .ops = dp_flow_genl_ops, 1230 .n_ops = ARRAY_SIZE(dp_flow_genl_ops), 1231 .mcgrps = &ovs_dp_flow_multicast_group, 1232 .n_mcgrps = 1, 1233}; 1234 1235static size_t ovs_dp_cmd_msg_size(void) 1236{ 1237 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); 1238 1239 msgsize += nla_total_size(IFNAMSIZ); 1240 msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); 1241 msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); 1242 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ 1243 1244 return msgsize; 1245} 1246 1247/* Called with ovs_mutex or RCU read lock. */ 1248static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1249 u32 portid, u32 seq, u32 flags, u8 cmd) 1250{ 1251 struct ovs_header *ovs_header; 1252 struct ovs_dp_stats dp_stats; 1253 struct ovs_dp_megaflow_stats dp_megaflow_stats; 1254 int err; 1255 1256 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, 1257 flags, cmd); 1258 if (!ovs_header) 1259 goto error; 1260 1261 ovs_header->dp_ifindex = get_dpifindex(dp); 1262 1263 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); 1264 if (err) 1265 goto nla_put_failure; 1266 1267 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); 1268 if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), 1269 &dp_stats)) 1270 goto nla_put_failure; 1271 1272 if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS, 1273 sizeof(struct ovs_dp_megaflow_stats), 1274 &dp_megaflow_stats)) 1275 goto nla_put_failure; 1276 1277 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) 1278 goto nla_put_failure; 1279 1280 return genlmsg_end(skb, ovs_header); 1281 1282nla_put_failure: 1283 genlmsg_cancel(skb, ovs_header); 1284error: 1285 return -EMSGSIZE; 1286} 1287 1288static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) 1289{ 1290 return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); 1291} 1292 1293/* Called with rcu_read_lock or ovs_mutex. */ 1294static struct datapath *lookup_datapath(struct net *net, 1295 struct ovs_header *ovs_header, 1296 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1297{ 1298 struct datapath *dp; 1299 1300 if (!a[OVS_DP_ATTR_NAME]) 1301 dp = get_dp(net, ovs_header->dp_ifindex); 1302 else { 1303 struct vport *vport; 1304 1305 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); 1306 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; 1307 } 1308 return dp ? dp : ERR_PTR(-ENODEV); 1309} 1310 1311static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) 1312{ 1313 struct datapath *dp; 1314 1315 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1316 if (IS_ERR(dp)) 1317 return; 1318 1319 WARN(dp->user_features, "Dropping previously announced user features\n"); 1320 dp->user_features = 0; 1321} 1322 1323static void ovs_dp_change(struct datapath *dp, struct nlattr **a) 1324{ 1325 if (a[OVS_DP_ATTR_USER_FEATURES]) 1326 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); 1327} 1328 1329static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) 1330{ 1331 struct nlattr **a = info->attrs; 1332 struct vport_parms parms; 1333 struct sk_buff *reply; 1334 struct datapath *dp; 1335 struct vport *vport; 1336 struct ovs_net *ovs_net; 1337 int err, i; 1338 1339 err = -EINVAL; 1340 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1341 goto err; 1342 1343 reply = ovs_dp_cmd_alloc_info(info); 1344 if (!reply) 1345 return -ENOMEM; 1346 1347 err = -ENOMEM; 1348 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1349 if (dp == NULL) 1350 goto err_free_reply; 1351 1352 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); 1353 1354 /* Allocate table. */ 1355 err = ovs_flow_tbl_init(&dp->table); 1356 if (err) 1357 goto err_free_dp; 1358 1359 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); 1360 if (!dp->stats_percpu) { 1361 err = -ENOMEM; 1362 goto err_destroy_table; 1363 } 1364 1365 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), 1366 GFP_KERNEL); 1367 if (!dp->ports) { 1368 err = -ENOMEM; 1369 goto err_destroy_percpu; 1370 } 1371 1372 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) 1373 INIT_HLIST_HEAD(&dp->ports[i]); 1374 1375 /* Set up our datapath device. */ 1376 parms.name = nla_data(a[OVS_DP_ATTR_NAME]); 1377 parms.type = OVS_VPORT_TYPE_INTERNAL; 1378 parms.options = NULL; 1379 parms.dp = dp; 1380 parms.port_no = OVSP_LOCAL; 1381 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; 1382 1383 ovs_dp_change(dp, a); 1384 1385 /* So far only local changes have been made, now need the lock. */ 1386 ovs_lock(); 1387 1388 vport = new_vport(&parms); 1389 if (IS_ERR(vport)) { 1390 err = PTR_ERR(vport); 1391 if (err == -EBUSY) 1392 err = -EEXIST; 1393 1394 if (err == -EEXIST) { 1395 /* An outdated user space instance that does not understand 1396 * the concept of user_features has attempted to create a new 1397 * datapath and is likely to reuse it. Drop all user features. 1398 */ 1399 if (info->genlhdr->version < OVS_DP_VER_FEATURES) 1400 ovs_dp_reset_user_features(skb, info); 1401 } 1402 1403 goto err_destroy_ports_array; 1404 } 1405 1406 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1407 info->snd_seq, 0, OVS_DP_CMD_NEW); 1408 BUG_ON(err < 0); 1409 1410 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1411 list_add_tail_rcu(&dp->list_node, &ovs_net->dps); 1412 1413 ovs_unlock(); 1414 1415 ovs_notify(&dp_datapath_genl_family, reply, info); 1416 return 0; 1417 1418err_destroy_ports_array: 1419 ovs_unlock(); 1420 kfree(dp->ports); 1421err_destroy_percpu: 1422 free_percpu(dp->stats_percpu); 1423err_destroy_table: 1424 ovs_flow_tbl_destroy(&dp->table, false); 1425err_free_dp: 1426 release_net(ovs_dp_get_net(dp)); 1427 kfree(dp); 1428err_free_reply: 1429 kfree_skb(reply); 1430err: 1431 return err; 1432} 1433 1434/* Called with ovs_mutex. */ 1435static void __dp_destroy(struct datapath *dp) 1436{ 1437 int i; 1438 1439 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1440 struct vport *vport; 1441 struct hlist_node *n; 1442 1443 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) 1444 if (vport->port_no != OVSP_LOCAL) 1445 ovs_dp_detach_port(vport); 1446 } 1447 1448 list_del_rcu(&dp->list_node); 1449 1450 /* OVSP_LOCAL is datapath internal port. We need to make sure that 1451 * all ports in datapath are destroyed first before freeing datapath. 1452 */ 1453 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); 1454 1455 /* RCU destroy the flow table */ 1456 ovs_flow_tbl_destroy(&dp->table, true); 1457 1458 call_rcu(&dp->rcu, destroy_dp_rcu); 1459} 1460 1461static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) 1462{ 1463 struct sk_buff *reply; 1464 struct datapath *dp; 1465 int err; 1466 1467 reply = ovs_dp_cmd_alloc_info(info); 1468 if (!reply) 1469 return -ENOMEM; 1470 1471 ovs_lock(); 1472 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1473 err = PTR_ERR(dp); 1474 if (IS_ERR(dp)) 1475 goto err_unlock_free; 1476 1477 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1478 info->snd_seq, 0, OVS_DP_CMD_DEL); 1479 BUG_ON(err < 0); 1480 1481 __dp_destroy(dp); 1482 ovs_unlock(); 1483 1484 ovs_notify(&dp_datapath_genl_family, reply, info); 1485 1486 return 0; 1487 1488err_unlock_free: 1489 ovs_unlock(); 1490 kfree_skb(reply); 1491 return err; 1492} 1493 1494static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1495{ 1496 struct sk_buff *reply; 1497 struct datapath *dp; 1498 int err; 1499 1500 reply = ovs_dp_cmd_alloc_info(info); 1501 if (!reply) 1502 return -ENOMEM; 1503 1504 ovs_lock(); 1505 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1506 err = PTR_ERR(dp); 1507 if (IS_ERR(dp)) 1508 goto err_unlock_free; 1509 1510 ovs_dp_change(dp, info->attrs); 1511 1512 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1513 info->snd_seq, 0, OVS_DP_CMD_NEW); 1514 BUG_ON(err < 0); 1515 1516 ovs_unlock(); 1517 ovs_notify(&dp_datapath_genl_family, reply, info); 1518 1519 return 0; 1520 1521err_unlock_free: 1522 ovs_unlock(); 1523 kfree_skb(reply); 1524 return err; 1525} 1526 1527static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1528{ 1529 struct sk_buff *reply; 1530 struct datapath *dp; 1531 int err; 1532 1533 reply = ovs_dp_cmd_alloc_info(info); 1534 if (!reply) 1535 return -ENOMEM; 1536 1537 rcu_read_lock(); 1538 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1539 if (IS_ERR(dp)) { 1540 err = PTR_ERR(dp); 1541 goto err_unlock_free; 1542 } 1543 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1544 info->snd_seq, 0, OVS_DP_CMD_NEW); 1545 BUG_ON(err < 0); 1546 rcu_read_unlock(); 1547 1548 return genlmsg_reply(reply, info); 1549 1550err_unlock_free: 1551 rcu_read_unlock(); 1552 kfree_skb(reply); 1553 return err; 1554} 1555 1556static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1557{ 1558 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); 1559 struct datapath *dp; 1560 int skip = cb->args[0]; 1561 int i = 0; 1562 1563 rcu_read_lock(); 1564 list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) { 1565 if (i >= skip && 1566 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1567 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1568 OVS_DP_CMD_NEW) < 0) 1569 break; 1570 i++; 1571 } 1572 rcu_read_unlock(); 1573 1574 cb->args[0] = i; 1575 1576 return skb->len; 1577} 1578 1579static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { 1580 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 1581 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 1582 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, 1583}; 1584 1585static const struct genl_ops dp_datapath_genl_ops[] = { 1586 { .cmd = OVS_DP_CMD_NEW, 1587 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1588 .policy = datapath_policy, 1589 .doit = ovs_dp_cmd_new 1590 }, 1591 { .cmd = OVS_DP_CMD_DEL, 1592 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1593 .policy = datapath_policy, 1594 .doit = ovs_dp_cmd_del 1595 }, 1596 { .cmd = OVS_DP_CMD_GET, 1597 .flags = 0, /* OK for unprivileged users. */ 1598 .policy = datapath_policy, 1599 .doit = ovs_dp_cmd_get, 1600 .dumpit = ovs_dp_cmd_dump 1601 }, 1602 { .cmd = OVS_DP_CMD_SET, 1603 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1604 .policy = datapath_policy, 1605 .doit = ovs_dp_cmd_set, 1606 }, 1607}; 1608 1609static struct genl_family dp_datapath_genl_family = { 1610 .id = GENL_ID_GENERATE, 1611 .hdrsize = sizeof(struct ovs_header), 1612 .name = OVS_DATAPATH_FAMILY, 1613 .version = OVS_DATAPATH_VERSION, 1614 .maxattr = OVS_DP_ATTR_MAX, 1615 .netnsok = true, 1616 .parallel_ops = true, 1617 .ops = dp_datapath_genl_ops, 1618 .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), 1619 .mcgrps = &ovs_dp_datapath_multicast_group, 1620 .n_mcgrps = 1, 1621}; 1622 1623/* Called with ovs_mutex or RCU read lock. */ 1624static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1625 u32 portid, u32 seq, u32 flags, u8 cmd) 1626{ 1627 struct ovs_header *ovs_header; 1628 struct ovs_vport_stats vport_stats; 1629 int err; 1630 1631 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, 1632 flags, cmd); 1633 if (!ovs_header) 1634 return -EMSGSIZE; 1635 1636 ovs_header->dp_ifindex = get_dpifindex(vport->dp); 1637 1638 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || 1639 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || 1640 nla_put_string(skb, OVS_VPORT_ATTR_NAME, 1641 vport->ops->get_name(vport))) 1642 goto nla_put_failure; 1643 1644 ovs_vport_get_stats(vport, &vport_stats); 1645 if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), 1646 &vport_stats)) 1647 goto nla_put_failure; 1648 1649 if (ovs_vport_get_upcall_portids(vport, skb)) 1650 goto nla_put_failure; 1651 1652 err = ovs_vport_get_options(vport, skb); 1653 if (err == -EMSGSIZE) 1654 goto error; 1655 1656 return genlmsg_end(skb, ovs_header); 1657 1658nla_put_failure: 1659 err = -EMSGSIZE; 1660error: 1661 genlmsg_cancel(skb, ovs_header); 1662 return err; 1663} 1664 1665static struct sk_buff *ovs_vport_cmd_alloc_info(void) 1666{ 1667 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1668} 1669 1670/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ 1671struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1672 u32 seq, u8 cmd) 1673{ 1674 struct sk_buff *skb; 1675 int retval; 1676 1677 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 1678 if (!skb) 1679 return ERR_PTR(-ENOMEM); 1680 1681 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); 1682 BUG_ON(retval < 0); 1683 1684 return skb; 1685} 1686 1687/* Called with ovs_mutex or RCU read lock. */ 1688static struct vport *lookup_vport(struct net *net, 1689 struct ovs_header *ovs_header, 1690 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1691{ 1692 struct datapath *dp; 1693 struct vport *vport; 1694 1695 if (a[OVS_VPORT_ATTR_NAME]) { 1696 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); 1697 if (!vport) 1698 return ERR_PTR(-ENODEV); 1699 if (ovs_header->dp_ifindex && 1700 ovs_header->dp_ifindex != get_dpifindex(vport->dp)) 1701 return ERR_PTR(-ENODEV); 1702 return vport; 1703 } else if (a[OVS_VPORT_ATTR_PORT_NO]) { 1704 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); 1705 1706 if (port_no >= DP_MAX_PORTS) 1707 return ERR_PTR(-EFBIG); 1708 1709 dp = get_dp(net, ovs_header->dp_ifindex); 1710 if (!dp) 1711 return ERR_PTR(-ENODEV); 1712 1713 vport = ovs_vport_ovsl_rcu(dp, port_no); 1714 if (!vport) 1715 return ERR_PTR(-ENODEV); 1716 return vport; 1717 } else 1718 return ERR_PTR(-EINVAL); 1719} 1720 1721static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) 1722{ 1723 struct nlattr **a = info->attrs; 1724 struct ovs_header *ovs_header = info->userhdr; 1725 struct vport_parms parms; 1726 struct sk_buff *reply; 1727 struct vport *vport; 1728 struct datapath *dp; 1729 u32 port_no; 1730 int err; 1731 1732 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || 1733 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1734 return -EINVAL; 1735 1736 port_no = a[OVS_VPORT_ATTR_PORT_NO] 1737 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; 1738 if (port_no >= DP_MAX_PORTS) 1739 return -EFBIG; 1740 1741 reply = ovs_vport_cmd_alloc_info(); 1742 if (!reply) 1743 return -ENOMEM; 1744 1745 ovs_lock(); 1746 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1747 err = -ENODEV; 1748 if (!dp) 1749 goto exit_unlock_free; 1750 1751 if (port_no) { 1752 vport = ovs_vport_ovsl(dp, port_no); 1753 err = -EBUSY; 1754 if (vport) 1755 goto exit_unlock_free; 1756 } else { 1757 for (port_no = 1; ; port_no++) { 1758 if (port_no >= DP_MAX_PORTS) { 1759 err = -EFBIG; 1760 goto exit_unlock_free; 1761 } 1762 vport = ovs_vport_ovsl(dp, port_no); 1763 if (!vport) 1764 break; 1765 } 1766 } 1767 1768 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); 1769 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); 1770 parms.options = a[OVS_VPORT_ATTR_OPTIONS]; 1771 parms.dp = dp; 1772 parms.port_no = port_no; 1773 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; 1774 1775 vport = new_vport(&parms); 1776 err = PTR_ERR(vport); 1777 if (IS_ERR(vport)) 1778 goto exit_unlock_free; 1779 1780 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1781 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1782 BUG_ON(err < 0); 1783 ovs_unlock(); 1784 1785 ovs_notify(&dp_vport_genl_family, reply, info); 1786 return 0; 1787 1788exit_unlock_free: 1789 ovs_unlock(); 1790 kfree_skb(reply); 1791 return err; 1792} 1793 1794static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) 1795{ 1796 struct nlattr **a = info->attrs; 1797 struct sk_buff *reply; 1798 struct vport *vport; 1799 int err; 1800 1801 reply = ovs_vport_cmd_alloc_info(); 1802 if (!reply) 1803 return -ENOMEM; 1804 1805 ovs_lock(); 1806 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1807 err = PTR_ERR(vport); 1808 if (IS_ERR(vport)) 1809 goto exit_unlock_free; 1810 1811 if (a[OVS_VPORT_ATTR_TYPE] && 1812 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { 1813 err = -EINVAL; 1814 goto exit_unlock_free; 1815 } 1816 1817 if (a[OVS_VPORT_ATTR_OPTIONS]) { 1818 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); 1819 if (err) 1820 goto exit_unlock_free; 1821 } 1822 1823 1824 if (a[OVS_VPORT_ATTR_UPCALL_PID]) { 1825 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID]; 1826 1827 err = ovs_vport_set_upcall_portids(vport, ids); 1828 if (err) 1829 goto exit_unlock_free; 1830 } 1831 1832 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1833 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1834 BUG_ON(err < 0); 1835 1836 ovs_unlock(); 1837 ovs_notify(&dp_vport_genl_family, reply, info); 1838 return 0; 1839 1840exit_unlock_free: 1841 ovs_unlock(); 1842 kfree_skb(reply); 1843 return err; 1844} 1845 1846static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) 1847{ 1848 struct nlattr **a = info->attrs; 1849 struct sk_buff *reply; 1850 struct vport *vport; 1851 int err; 1852 1853 reply = ovs_vport_cmd_alloc_info(); 1854 if (!reply) 1855 return -ENOMEM; 1856 1857 ovs_lock(); 1858 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1859 err = PTR_ERR(vport); 1860 if (IS_ERR(vport)) 1861 goto exit_unlock_free; 1862 1863 if (vport->port_no == OVSP_LOCAL) { 1864 err = -EINVAL; 1865 goto exit_unlock_free; 1866 } 1867 1868 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1869 info->snd_seq, 0, OVS_VPORT_CMD_DEL); 1870 BUG_ON(err < 0); 1871 ovs_dp_detach_port(vport); 1872 ovs_unlock(); 1873 1874 ovs_notify(&dp_vport_genl_family, reply, info); 1875 return 0; 1876 1877exit_unlock_free: 1878 ovs_unlock(); 1879 kfree_skb(reply); 1880 return err; 1881} 1882 1883static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) 1884{ 1885 struct nlattr **a = info->attrs; 1886 struct ovs_header *ovs_header = info->userhdr; 1887 struct sk_buff *reply; 1888 struct vport *vport; 1889 int err; 1890 1891 reply = ovs_vport_cmd_alloc_info(); 1892 if (!reply) 1893 return -ENOMEM; 1894 1895 rcu_read_lock(); 1896 vport = lookup_vport(sock_net(skb->sk), ovs_header, a); 1897 err = PTR_ERR(vport); 1898 if (IS_ERR(vport)) 1899 goto exit_unlock_free; 1900 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1901 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1902 BUG_ON(err < 0); 1903 rcu_read_unlock(); 1904 1905 return genlmsg_reply(reply, info); 1906 1907exit_unlock_free: 1908 rcu_read_unlock(); 1909 kfree_skb(reply); 1910 return err; 1911} 1912 1913static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1914{ 1915 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1916 struct datapath *dp; 1917 int bucket = cb->args[0], skip = cb->args[1]; 1918 int i, j = 0; 1919 1920 rcu_read_lock(); 1921 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1922 if (!dp) { 1923 rcu_read_unlock(); 1924 return -ENODEV; 1925 } 1926 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { 1927 struct vport *vport; 1928 1929 j = 0; 1930 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { 1931 if (j >= skip && 1932 ovs_vport_cmd_fill_info(vport, skb, 1933 NETLINK_CB(cb->skb).portid, 1934 cb->nlh->nlmsg_seq, 1935 NLM_F_MULTI, 1936 OVS_VPORT_CMD_NEW) < 0) 1937 goto out; 1938 1939 j++; 1940 } 1941 skip = 0; 1942 } 1943out: 1944 rcu_read_unlock(); 1945 1946 cb->args[0] = i; 1947 cb->args[1] = j; 1948 1949 return skb->len; 1950} 1951 1952static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { 1953 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, 1954 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, 1955 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, 1956 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, 1957 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, 1958 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, 1959}; 1960 1961static const struct genl_ops dp_vport_genl_ops[] = { 1962 { .cmd = OVS_VPORT_CMD_NEW, 1963 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1964 .policy = vport_policy, 1965 .doit = ovs_vport_cmd_new 1966 }, 1967 { .cmd = OVS_VPORT_CMD_DEL, 1968 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1969 .policy = vport_policy, 1970 .doit = ovs_vport_cmd_del 1971 }, 1972 { .cmd = OVS_VPORT_CMD_GET, 1973 .flags = 0, /* OK for unprivileged users. */ 1974 .policy = vport_policy, 1975 .doit = ovs_vport_cmd_get, 1976 .dumpit = ovs_vport_cmd_dump 1977 }, 1978 { .cmd = OVS_VPORT_CMD_SET, 1979 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1980 .policy = vport_policy, 1981 .doit = ovs_vport_cmd_set, 1982 }, 1983}; 1984 1985struct genl_family dp_vport_genl_family = { 1986 .id = GENL_ID_GENERATE, 1987 .hdrsize = sizeof(struct ovs_header), 1988 .name = OVS_VPORT_FAMILY, 1989 .version = OVS_VPORT_VERSION, 1990 .maxattr = OVS_VPORT_ATTR_MAX, 1991 .netnsok = true, 1992 .parallel_ops = true, 1993 .ops = dp_vport_genl_ops, 1994 .n_ops = ARRAY_SIZE(dp_vport_genl_ops), 1995 .mcgrps = &ovs_dp_vport_multicast_group, 1996 .n_mcgrps = 1, 1997}; 1998 1999static struct genl_family * const dp_genl_families[] = { 2000 &dp_datapath_genl_family, 2001 &dp_vport_genl_family, 2002 &dp_flow_genl_family, 2003 &dp_packet_genl_family, 2004}; 2005 2006static void dp_unregister_genl(int n_families) 2007{ 2008 int i; 2009 2010 for (i = 0; i < n_families; i++) 2011 genl_unregister_family(dp_genl_families[i]); 2012} 2013 2014static int dp_register_genl(void) 2015{ 2016 int err; 2017 int i; 2018 2019 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { 2020 2021 err = genl_register_family(dp_genl_families[i]); 2022 if (err) 2023 goto error; 2024 } 2025 2026 return 0; 2027 2028error: 2029 dp_unregister_genl(i); 2030 return err; 2031} 2032 2033static int __net_init ovs_init_net(struct net *net) 2034{ 2035 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2036 2037 INIT_LIST_HEAD(&ovs_net->dps); 2038 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); 2039 return 0; 2040} 2041 2042static void __net_exit ovs_exit_net(struct net *net) 2043{ 2044 struct datapath *dp, *dp_next; 2045 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2046 2047 ovs_lock(); 2048 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2049 __dp_destroy(dp); 2050 ovs_unlock(); 2051 2052 cancel_work_sync(&ovs_net->dp_notify_work); 2053} 2054 2055static struct pernet_operations ovs_net_ops = { 2056 .init = ovs_init_net, 2057 .exit = ovs_exit_net, 2058 .id = &ovs_net_id, 2059 .size = sizeof(struct ovs_net), 2060}; 2061 2062static int __init dp_init(void) 2063{ 2064 int err; 2065 2066 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); 2067 2068 pr_info("Open vSwitch switching datapath\n"); 2069 2070 err = ovs_internal_dev_rtnl_link_register(); 2071 if (err) 2072 goto error; 2073 2074 err = ovs_flow_init(); 2075 if (err) 2076 goto error_unreg_rtnl_link; 2077 2078 err = ovs_vport_init(); 2079 if (err) 2080 goto error_flow_exit; 2081 2082 err = register_pernet_device(&ovs_net_ops); 2083 if (err) 2084 goto error_vport_exit; 2085 2086 err = register_netdevice_notifier(&ovs_dp_device_notifier); 2087 if (err) 2088 goto error_netns_exit; 2089 2090 err = dp_register_genl(); 2091 if (err < 0) 2092 goto error_unreg_notifier; 2093 2094 return 0; 2095 2096error_unreg_notifier: 2097 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2098error_netns_exit: 2099 unregister_pernet_device(&ovs_net_ops); 2100error_vport_exit: 2101 ovs_vport_exit(); 2102error_flow_exit: 2103 ovs_flow_exit(); 2104error_unreg_rtnl_link: 2105 ovs_internal_dev_rtnl_link_unregister(); 2106error: 2107 return err; 2108} 2109 2110static void dp_cleanup(void) 2111{ 2112 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 2113 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2114 unregister_pernet_device(&ovs_net_ops); 2115 rcu_barrier(); 2116 ovs_vport_exit(); 2117 ovs_flow_exit(); 2118 ovs_internal_dev_rtnl_link_unregister(); 2119} 2120 2121module_init(dp_init); 2122module_exit(dp_cleanup); 2123 2124MODULE_DESCRIPTION("Open vSwitch switching datapath"); 2125MODULE_LICENSE("GPL");