Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vxlan: vni filtering support on collect metadata device

This patch adds vnifiltering support to collect metadata device.

Motivation:
You can only use a single vxlan collect metadata device for a given
vxlan udp port in the system today. The vxlan collect metadata device
terminates all received vxlan packets. As shown in the below diagram,
there are use-cases where you need to support multiple such vxlan devices in
independent bridge domains. Each vxlan device must terminate the vni's
it is configured for.
Example usecase: In a service provider network a service provider
typically supports multiple bridge domains with overlapping vlans.
One bridge domain per customer. Vlans in each bridge domain are
mapped to globally unique vxlan ranges assigned to each customer.

vnifiltering support in collect metadata devices terminates only configured
vnis. This is similar to vlan filtering in bridge driver. The vni filtering
capability is provided by a new flag on collect metadata device.

In the below pic:
- customer1 is mapped to br1 bridge domain
- customer2 is mapped to br2 bridge domain
- customer1 vlan 10-11 is mapped to vni 1001-1002
- customer2 vlan 10-11 is mapped to vni 2001-2002
- br1 and br2 are vlan filtering bridges
- vxlan1 and vxlan2 are collect metadata devices with
vnifiltering enabled

┌──────────────────────────────────────────────────────────────────┐
│ switch │
│ │
│ ┌───────────┐ ┌───────────┐ │
│ │ │ │ │ │
│ │ br1 │ │ br2 │ │
│ └┬─────────┬┘ └──┬───────┬┘ │
│ vlans│ │ vlans │ │ │
│ 10,11│ │ 10,11│ │ │
│ │ vlanvnimap: │ vlanvnimap: │
│ │ 10-1001,11-1002 │ 10-2001,11-2002 │
│ │ │ │ │ │
│ ┌──────┴┐ ┌──┴─────────┐ ┌───┴────┐ │ │
│ │ swp1 │ │vxlan1 │ │ swp2 │ ┌┴─────────────┐ │
│ │ │ │ vnifilter:│ │ │ │vxlan2 │ │
│ └───┬───┘ │ 1001,1002│ └───┬────┘ │ vnifilter: │ │
│ │ └────────────┘ │ │ 2001,2002 │ │
│ │ │ └──────────────┘ │
│ │ │ │
└───────┼──────────────────────────────────┼───────────────────────┘
│ │
│ │
┌─────┴───────┐ │
│ customer1 │ ┌─────┴──────┐
│ host/VM │ │customer2 │
└─────────────┘ │ host/VM │
└────────────┘

With this implementation, vxlan dst metadata device can
be associated with range of vnis.
struct vxlan_vni_node is introduced to represent
a configured vni. We start with vni and its
associated remote_ip in this structure. This
structure can be extended to bring in other
per vni attributes if there are usecases for it.
A vni inherits an attribute from the base vxlan device
if there is no per vni attributes defined.

struct vxlan_dev gets a new rhashtable for
vnis called vxlan_vni_group. vxlan_vnifilter.c
implements the necessary netlink api, notifications
and helper functions to process and manage lifecycle
of vxlan_vni_node.

This patch also adds new helper functions in vxlan_multicast.c
to handle per vni remote_ip multicast groups which are part
of vxlan_vni_group.

Fix build problems:
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Roopa Prabhu <roopa@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Roopa Prabhu and committed by
David S. Miller
f9c4bb0b a498c595

+1147 -32
+1 -1
drivers/net/vxlan/Makefile
··· 4 4 5 5 obj-$(CONFIG_VXLAN) += vxlan.o 6 6 7 - vxlan-objs := vxlan_core.o vxlan_multicast.o 7 + vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o
+75 -21
drivers/net/vxlan/vxlan_core.c
··· 145 145 struct vxlan_dev_node *node; 146 146 147 147 /* For flow based devices, map all packets to VNI 0 */ 148 - if (vs->flags & VXLAN_F_COLLECT_METADATA) 148 + if (vs->flags & VXLAN_F_COLLECT_METADATA && 149 + !(vs->flags & VXLAN_F_VNIFILTER)) 149 150 vni = 0; 150 151 151 152 hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) { 152 - if (node->vxlan->default_dst.remote_vni != vni) 153 + if (!node->vxlan) 153 154 continue; 155 + if (node->vxlan->cfg.flags & VXLAN_F_VNIFILTER) { 156 + if (!vxlan_vnifilter_lookup(node->vxlan, vni)) 157 + continue; 158 + } else if (node->vxlan->default_dst.remote_vni != vni) { 159 + continue; 160 + } 154 161 155 162 if (IS_ENABLED(CONFIG_IPV6)) { 156 163 const struct vxlan_config *cfg = &node->vxlan->cfg; ··· 1485 1478 RCU_INIT_POINTER(vxlan->vn4_sock, NULL); 1486 1479 synchronize_net(); 1487 1480 1488 - vxlan_vs_del_dev(vxlan); 1481 + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) 1482 + vxlan_vs_del_vnigrp(vxlan); 1483 + else 1484 + vxlan_vs_del_dev(vxlan); 1489 1485 1490 1486 if (__vxlan_sock_release_prep(sock4)) { 1491 1487 udp_tunnel_sock_release(sock4->sock); ··· 2867 2857 struct vxlan_dev *vxlan = netdev_priv(dev); 2868 2858 int err; 2869 2859 2860 + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) 2861 + vxlan_vnigroup_init(vxlan); 2862 + 2870 2863 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 2871 2864 if (!dev->tstats) 2872 2865 return -ENOMEM; ··· 2899 2886 { 2900 2887 struct vxlan_dev *vxlan = netdev_priv(dev); 2901 2888 2889 + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) 2890 + vxlan_vnigroup_uninit(vxlan); 2891 + 2902 2892 gro_cells_destroy(&vxlan->gro_cells); 2903 2893 2904 2894 vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni); ··· 2919 2903 if (ret < 0) 2920 2904 return ret; 2921 2905 2922 - if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) { 2923 - ret = vxlan_igmp_join(vxlan, &vxlan->default_dst.remote_ip, 2924 - vxlan->default_dst.remote_ifindex); 2925 - if (ret == -EADDRINUSE) 2926 - ret = 0; 2927 - if (ret) { 2928 - vxlan_sock_release(vxlan); 2929 - return ret; 2930 - } 2906 + ret = vxlan_multicast_join(vxlan); 2907 + if (ret) { 2908 + vxlan_sock_release(vxlan); 2909 + return ret; 2931 2910 } 2932 2911 2933 2912 if (vxlan->cfg.age_interval) ··· 2959 2948 static int vxlan_stop(struct net_device *dev) 2960 2949 { 2961 2950 struct vxlan_dev *vxlan = netdev_priv(dev); 2962 - struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 2963 2951 int ret = 0; 2964 2952 2965 - if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && 2966 - !vxlan_group_used(vn, vxlan, NULL, 0)) 2967 - ret = vxlan_igmp_leave(vxlan, &vxlan->default_dst.remote_ip, 2968 - vxlan->default_dst.remote_ifindex); 2953 + vxlan_multicast_leave(vxlan); 2969 2954 2970 2955 del_timer_sync(&vxlan->age_timer); 2971 2956 ··· 3191 3184 [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG }, 3192 3185 [IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG }, 3193 3186 [IFLA_VXLAN_DF] = { .type = NLA_U8 }, 3187 + [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 }, 3194 3188 }; 3195 3189 3196 3190 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], ··· 3377 3369 static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) 3378 3370 { 3379 3371 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 3372 + bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA; 3380 3373 struct vxlan_sock *vs = NULL; 3381 3374 struct vxlan_dev_node *node; 3382 3375 int l3mdev_index = 0; ··· 3413 3404 rcu_assign_pointer(vxlan->vn4_sock, vs); 3414 3405 node = &vxlan->hlist4; 3415 3406 } 3416 - vxlan_vs_add_dev(vs, vxlan, node); 3407 + 3408 + if (metadata && (vxlan->cfg.flags & VXLAN_F_VNIFILTER)) 3409 + vxlan_vs_add_vnigrp(vxlan, vs, ipv6); 3410 + else 3411 + vxlan_vs_add_dev(vs, vxlan, node); 3412 + 3417 3413 return 0; 3418 3414 } 3419 3415 ··· 3445 3431 return ret; 3446 3432 } 3447 3433 3448 - static int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan, 3449 - struct vxlan_config *conf, __be32 vni) 3434 + int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan, 3435 + struct vxlan_config *conf, __be32 vni) 3450 3436 { 3451 3437 struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); 3452 3438 struct vxlan_dev *tmp; ··· 3454 3440 list_for_each_entry(tmp, &vn->vxlan_list, next) { 3455 3441 if (tmp == vxlan) 3456 3442 continue; 3457 - if (tmp->cfg.vni != vni) 3443 + if (tmp->cfg.flags & VXLAN_F_VNIFILTER) { 3444 + if (!vxlan_vnifilter_lookup(tmp, vni)) 3445 + continue; 3446 + } else if (tmp->cfg.vni != vni) { 3458 3447 continue; 3448 + } 3459 3449 if (tmp->cfg.dst_port != conf->dst_port) 3460 3450 continue; 3461 3451 if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) != ··· 4069 4051 if (data[IFLA_VXLAN_DF]) 4070 4052 conf->df = nla_get_u8(data[IFLA_VXLAN_DF]); 4071 4053 4054 + if (data[IFLA_VXLAN_VNIFILTER]) { 4055 + err = vxlan_nl2flag(conf, data, IFLA_VXLAN_VNIFILTER, 4056 + VXLAN_F_VNIFILTER, changelink, false, 4057 + extack); 4058 + if (err) 4059 + return err; 4060 + 4061 + if ((conf->flags & VXLAN_F_VNIFILTER) && 4062 + !(conf->flags & VXLAN_F_COLLECT_METADATA)) { 4063 + NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_VNIFILTER], 4064 + "vxlan vnifilter only valid in collect metadata mode"); 4065 + return -EINVAL; 4066 + } 4067 + } 4068 + 4072 4069 return 0; 4073 4070 } 4074 4071 ··· 4159 4126 dst->remote_ifindex, 4160 4127 true); 4161 4128 spin_unlock_bh(&vxlan->hash_lock[hash_index]); 4129 + 4130 + /* If vni filtering device, also update fdb entries of 4131 + * all vnis that were using default remote ip 4132 + */ 4133 + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) { 4134 + err = vxlan_vnilist_update_group(vxlan, &dst->remote_ip, 4135 + &conf.remote_ip, extack); 4136 + if (err) { 4137 + netdev_adjacent_change_abort(dst->remote_dev, 4138 + lowerdev, dev); 4139 + return err; 4140 + } 4141 + } 4162 4142 } 4163 4143 4164 4144 if (conf.age_interval != vxlan->cfg.age_interval) ··· 4315 4269 4316 4270 if (vxlan->cfg.flags & VXLAN_F_REMCSUM_NOPARTIAL && 4317 4271 nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL)) 4272 + goto nla_put_failure; 4273 + 4274 + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER && 4275 + nla_put_u8(skb, IFLA_VXLAN_VNIFILTER, 4276 + !!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))) 4318 4277 goto nla_put_failure; 4319 4278 4320 4279 return 0; ··· 4681 4630 if (rc) 4682 4631 goto out4; 4683 4632 4633 + vxlan_vnifilter_init(); 4634 + 4684 4635 return 0; 4685 4636 out4: 4686 4637 unregister_switchdev_notifier(&vxlan_switchdev_notifier_block); ··· 4697 4644 4698 4645 static void __exit vxlan_cleanup_module(void) 4699 4646 { 4647 + vxlan_vnifilter_uninit(); 4700 4648 rtnl_link_unregister(&vxlan_link_ops); 4701 4649 unregister_switchdev_notifier(&vxlan_switchdev_notifier_block); 4702 4650 unregister_netdevice_notifier(&vxlan_notifier_block);
+144 -6
drivers/net/vxlan/vxlan_multicast.c
··· 82 82 return ret; 83 83 } 84 84 85 + static bool vxlan_group_used_match(union vxlan_addr *ip, int ifindex, 86 + union vxlan_addr *rip, int rifindex) 87 + { 88 + if (!vxlan_addr_multicast(rip)) 89 + return false; 90 + 91 + if (!vxlan_addr_equal(rip, ip)) 92 + return false; 93 + 94 + if (rifindex != ifindex) 95 + return false; 96 + 97 + return true; 98 + } 99 + 100 + static bool vxlan_group_used_by_vnifilter(struct vxlan_dev *vxlan, 101 + union vxlan_addr *ip, int ifindex) 102 + { 103 + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); 104 + struct vxlan_vni_node *v, *tmp; 105 + 106 + if (vxlan_group_used_match(ip, ifindex, 107 + &vxlan->default_dst.remote_ip, 108 + vxlan->default_dst.remote_ifindex)) 109 + return true; 110 + 111 + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 112 + if (!vxlan_addr_multicast(&v->remote_ip)) 113 + continue; 114 + 115 + if (vxlan_group_used_match(ip, ifindex, 116 + &v->remote_ip, 117 + vxlan->default_dst.remote_ifindex)) 118 + return true; 119 + } 120 + 121 + return false; 122 + } 123 + 85 124 /* See if multicast group is already in use by other ID */ 86 125 bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev, 87 - union vxlan_addr *rip, int rifindex) 126 + __be32 vni, union vxlan_addr *rip, int rifindex) 88 127 { 89 128 union vxlan_addr *ip = (rip ? : &dev->default_dst.remote_ip); 90 129 int ifindex = (rifindex ? : dev->default_dst.remote_ifindex); ··· 160 121 rtnl_dereference(vxlan->vn6_sock) != sock6) 161 122 continue; 162 123 #endif 163 - if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip, ip)) 164 - continue; 165 - 166 - if (vxlan->default_dst.remote_ifindex != ifindex) 167 - continue; 124 + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) { 125 + if (!vxlan_group_used_by_vnifilter(vxlan, ip, ifindex)) 126 + continue; 127 + } else { 128 + if (!vxlan_group_used_match(ip, ifindex, 129 + &vxlan->default_dst.remote_ip, 130 + vxlan->default_dst.remote_ifindex)) 131 + continue; 132 + } 168 133 169 134 return true; 170 135 } 171 136 172 137 return false; 138 + } 139 + 140 + static int vxlan_multicast_join_vnigrp(struct vxlan_dev *vxlan) 141 + { 142 + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); 143 + struct vxlan_vni_node *v, *tmp, *vgood = NULL; 144 + int ret = 0; 145 + 146 + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 147 + if (!vxlan_addr_multicast(&v->remote_ip)) 148 + continue; 149 + /* skip if address is same as default address */ 150 + if (vxlan_addr_equal(&v->remote_ip, 151 + &vxlan->default_dst.remote_ip)) 152 + continue; 153 + ret = vxlan_igmp_join(vxlan, &v->remote_ip, 0); 154 + if (ret == -EADDRINUSE) 155 + ret = 0; 156 + if (ret) 157 + goto out; 158 + vgood = v; 159 + } 160 + out: 161 + if (ret) { 162 + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 163 + if (!vxlan_addr_multicast(&v->remote_ip)) 164 + continue; 165 + if (vxlan_addr_equal(&v->remote_ip, 166 + &vxlan->default_dst.remote_ip)) 167 + continue; 168 + vxlan_igmp_leave(vxlan, &v->remote_ip, 0); 169 + if (v == vgood) 170 + break; 171 + } 172 + } 173 + 174 + return ret; 175 + } 176 + 177 + static int vxlan_multicast_leave_vnigrp(struct vxlan_dev *vxlan) 178 + { 179 + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 180 + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); 181 + struct vxlan_vni_node *v, *tmp; 182 + int last_err = 0, ret; 183 + 184 + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 185 + if (vxlan_addr_multicast(&v->remote_ip) && 186 + !vxlan_group_used(vn, vxlan, v->vni, &v->remote_ip, 187 + 0)) { 188 + ret = vxlan_igmp_leave(vxlan, &v->remote_ip, 0); 189 + if (ret) 190 + last_err = ret; 191 + } 192 + } 193 + 194 + return last_err; 195 + } 196 + 197 + int vxlan_multicast_join(struct vxlan_dev *vxlan) 198 + { 199 + int ret = 0; 200 + 201 + if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) { 202 + ret = vxlan_igmp_join(vxlan, &vxlan->default_dst.remote_ip, 203 + vxlan->default_dst.remote_ifindex); 204 + if (ret == -EADDRINUSE) 205 + ret = 0; 206 + if (ret) 207 + return ret; 208 + } 209 + 210 + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) 211 + return vxlan_multicast_join_vnigrp(vxlan); 212 + 213 + return 0; 214 + } 215 + 216 + int vxlan_multicast_leave(struct vxlan_dev *vxlan) 217 + { 218 + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 219 + int ret = 0; 220 + 221 + if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) && 222 + !vxlan_group_used(vn, vxlan, 0, NULL, 0)) { 223 + ret = vxlan_igmp_leave(vxlan, &vxlan->default_dst.remote_ip, 224 + vxlan->default_dst.remote_ifindex); 225 + if (ret) 226 + return ret; 227 + } 228 + 229 + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) 230 + return vxlan_multicast_leave_vnigrp(vxlan); 231 + 232 + return 0; 173 233 }
+39 -2
drivers/net/vxlan/vxlan_private.h
··· 7 7 #ifndef _VXLAN_PRIVATE_H 8 8 #define _VXLAN_PRIVATE_H 9 9 10 + #include <linux/rhashtable.h> 11 + 10 12 extern unsigned int vxlan_net_id; 11 13 extern const u8 all_zeros_mac[ETH_ALEN + 2]; 14 + extern const struct rhashtable_params vxlan_vni_rht_params; 12 15 13 16 #define PORT_HASH_BITS 8 14 17 #define PORT_HASH_SIZE (1 << PORT_HASH_BITS) ··· 95 92 96 93 #endif 97 94 95 + static inline struct vxlan_vni_node * 96 + vxlan_vnifilter_lookup(struct vxlan_dev *vxlan, __be32 vni) 97 + { 98 + struct vxlan_vni_group *vg; 99 + 100 + vg = rcu_dereference_rtnl(vxlan->vnigrp); 101 + if (!vg) 102 + return NULL; 103 + 104 + return rhashtable_lookup_fast(&vg->vni_hash, &vni, 105 + vxlan_vni_rht_params); 106 + } 107 + 98 108 /* vxlan_core.c */ 99 109 int vxlan_fdb_create(struct vxlan_dev *vxlan, 100 110 const u8 *mac, union vxlan_addr *ip, ··· 127 111 __be16 port, __be32 src_vni, __be32 vni, 128 112 __u32 ifindex, __u16 ndm_flags, u32 nhid, 129 113 bool swdev_notify, struct netlink_ext_ack *extack); 114 + int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan, 115 + struct vxlan_config *conf, __be32 vni); 116 + 117 + /* vxlan_vnifilter.c */ 118 + int vxlan_vnigroup_init(struct vxlan_dev *vxlan); 119 + void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan); 120 + 121 + void vxlan_vnifilter_init(void); 122 + void vxlan_vnifilter_uninit(void); 123 + 124 + void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan, 125 + struct vxlan_sock *vs, 126 + bool ipv6); 127 + void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan); 128 + int vxlan_vnilist_update_group(struct vxlan_dev *vxlan, 129 + union vxlan_addr *old_remote_ip, 130 + union vxlan_addr *new_remote_ip, 131 + struct netlink_ext_ack *extack); 132 + 130 133 131 134 /* vxlan_multicast.c */ 135 + int vxlan_multicast_join(struct vxlan_dev *vxlan); 136 + int vxlan_multicast_leave(struct vxlan_dev *vxlan); 137 + bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev, 138 + __be32 vni, union vxlan_addr *rip, int rifindex); 132 139 int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip, 133 140 int rifindex); 134 141 int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip, 135 142 int rifindex); 136 - bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev, 137 - union vxlan_addr *rip, int rifindex); 138 143 #endif
+862
drivers/net/vxlan/vxlan_vnifilter.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Vxlan vni filter for collect metadata mode 4 + * 5 + * Authors: Roopa Prabhu <roopa@nvidia.com> 6 + * 7 + */ 8 + 9 + #include <linux/kernel.h> 10 + #include <linux/slab.h> 11 + #include <linux/etherdevice.h> 12 + #include <linux/rhashtable.h> 13 + #include <net/rtnetlink.h> 14 + #include <net/net_namespace.h> 15 + #include <net/sock.h> 16 + #include <net/vxlan.h> 17 + 18 + #include "vxlan_private.h" 19 + 20 + static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg, 21 + const void *ptr) 22 + { 23 + const struct vxlan_vni_node *vnode = ptr; 24 + __be32 vni = *(__be32 *)arg->key; 25 + 26 + return vnode->vni != vni; 27 + } 28 + 29 + const struct rhashtable_params vxlan_vni_rht_params = { 30 + .head_offset = offsetof(struct vxlan_vni_node, vnode), 31 + .key_offset = offsetof(struct vxlan_vni_node, vni), 32 + .key_len = sizeof(__be32), 33 + .nelem_hint = 3, 34 + .max_size = VXLAN_N_VID, 35 + .obj_cmpfn = vxlan_vni_cmp, 36 + .automatic_shrinking = true, 37 + }; 38 + 39 + static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan, 40 + struct vxlan_vni_node *v, 41 + bool del) 42 + { 43 + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 44 + struct vxlan_dev_node *node; 45 + struct vxlan_sock *vs; 46 + 47 + spin_lock(&vn->sock_lock); 48 + if (del) { 49 + if (!hlist_unhashed(&v->hlist4.hlist)) 50 + hlist_del_init_rcu(&v->hlist4.hlist); 51 + #if IS_ENABLED(CONFIG_IPV6) 52 + if (!hlist_unhashed(&v->hlist6.hlist)) 53 + hlist_del_init_rcu(&v->hlist6.hlist); 54 + #endif 55 + goto out; 56 + } 57 + 58 + #if IS_ENABLED(CONFIG_IPV6) 59 + vs = rtnl_dereference(vxlan->vn6_sock); 60 + if (vs && v) { 61 + node = &v->hlist6; 62 + hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); 63 + } 64 + #endif 65 + vs = rtnl_dereference(vxlan->vn4_sock); 66 + if (vs && v) { 67 + node = &v->hlist4; 68 + hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); 69 + } 70 + out: 71 + spin_unlock(&vn->sock_lock); 72 + } 73 + 74 + void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan, 75 + struct vxlan_sock *vs, 76 + bool ipv6) 77 + { 78 + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 79 + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); 80 + struct vxlan_vni_node *v, *tmp; 81 + struct vxlan_dev_node *node; 82 + 83 + if (!vg) 84 + return; 85 + 86 + spin_lock(&vn->sock_lock); 87 + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 88 + #if IS_ENABLED(CONFIG_IPV6) 89 + if (ipv6) 90 + node = &v->hlist6; 91 + else 92 + #endif 93 + node = &v->hlist4; 94 + node->vxlan = vxlan; 95 + hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); 96 + } 97 + spin_unlock(&vn->sock_lock); 98 + } 99 + 100 + void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan) 101 + { 102 + struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); 103 + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 104 + struct vxlan_vni_node *v, *tmp; 105 + 106 + if (!vg) 107 + return; 108 + 109 + spin_lock(&vn->sock_lock); 110 + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 111 + hlist_del_init_rcu(&v->hlist4.hlist); 112 + #if IS_ENABLED(CONFIG_IPV6) 113 + hlist_del_init_rcu(&v->hlist6.hlist); 114 + #endif 115 + } 116 + spin_unlock(&vn->sock_lock); 117 + } 118 + 119 + static u32 vnirange(struct vxlan_vni_node *vbegin, 120 + struct vxlan_vni_node *vend) 121 + { 122 + return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni)); 123 + } 124 + 125 + static size_t vxlan_vnifilter_entry_nlmsg_size(void) 126 + { 127 + return NLMSG_ALIGN(sizeof(struct tunnel_msg)) 128 + + nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */ 129 + + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */ 130 + + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */ 131 + + nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */ 132 + } 133 + 134 + static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb, 135 + struct vxlan_vni_node *vbegin, 136 + struct vxlan_vni_node *vend) 137 + { 138 + struct nlattr *ventry; 139 + u32 vs = be32_to_cpu(vbegin->vni); 140 + u32 ve = 0; 141 + 142 + if (vbegin != vend) 143 + ve = be32_to_cpu(vend->vni); 144 + 145 + ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY); 146 + if (!ventry) 147 + return false; 148 + 149 + if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs)) 150 + goto out_err; 151 + 152 + if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve)) 153 + goto out_err; 154 + 155 + if (!vxlan_addr_any(&vbegin->remote_ip)) { 156 + if (vbegin->remote_ip.sa.sa_family == AF_INET) { 157 + if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP, 158 + vbegin->remote_ip.sin.sin_addr.s_addr)) 159 + goto out_err; 160 + #if IS_ENABLED(CONFIG_IPV6) 161 + } else { 162 + if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6, 163 + &vbegin->remote_ip.sin6.sin6_addr)) 164 + goto out_err; 165 + #endif 166 + } 167 + } 168 + 169 + nla_nest_end(skb, ventry); 170 + 171 + return true; 172 + 173 + out_err: 174 + nla_nest_cancel(skb, ventry); 175 + 176 + return false; 177 + } 178 + 179 + static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan, 180 + struct vxlan_vni_node *vninode, int cmd) 181 + { 182 + struct tunnel_msg *tmsg; 183 + struct sk_buff *skb; 184 + struct nlmsghdr *nlh; 185 + struct net *net = dev_net(vxlan->dev); 186 + int err = -ENOBUFS; 187 + 188 + skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL); 189 + if (!skb) 190 + goto out_err; 191 + 192 + err = -EMSGSIZE; 193 + nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0); 194 + if (!nlh) 195 + goto out_err; 196 + tmsg = nlmsg_data(nlh); 197 + memset(tmsg, 0, sizeof(*tmsg)); 198 + tmsg->family = AF_BRIDGE; 199 + tmsg->ifindex = vxlan->dev->ifindex; 200 + 201 + if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode)) 202 + goto out_err; 203 + 204 + nlmsg_end(skb, nlh); 205 + rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL); 206 + 207 + return; 208 + 209 + out_err: 210 + rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err); 211 + 212 + kfree_skb(skb); 213 + } 214 + 215 + static int vxlan_vnifilter_dump_dev(const struct net_device *dev, 216 + struct sk_buff *skb, 217 + struct netlink_callback *cb) 218 + { 219 + struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL; 220 + struct vxlan_dev *vxlan = netdev_priv(dev); 221 + struct tunnel_msg *new_tmsg; 222 + int idx = 0, s_idx = cb->args[1]; 223 + struct vxlan_vni_group *vg; 224 + struct nlmsghdr *nlh; 225 + int err = 0; 226 + 227 + if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) 228 + return -EINVAL; 229 + 230 + /* RCU needed because of the vni locking rules (rcu || rtnl) */ 231 + vg = rcu_dereference(vxlan->vnigrp); 232 + if (!vg || !vg->num_vnis) 233 + return 0; 234 + 235 + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 236 + RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI); 237 + if (!nlh) 238 + return -EMSGSIZE; 239 + new_tmsg = nlmsg_data(nlh); 240 + memset(new_tmsg, 0, sizeof(*new_tmsg)); 241 + new_tmsg->family = PF_BRIDGE; 242 + new_tmsg->ifindex = dev->ifindex; 243 + 244 + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 245 + if (idx < s_idx) { 246 + idx++; 247 + continue; 248 + } 249 + if (!vbegin) { 250 + vbegin = v; 251 + vend = v; 252 + continue; 253 + } 254 + if (vnirange(vend, v) == 1 && 255 + vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) { 256 + goto update_end; 257 + } else { 258 + if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend)) { 259 + err = -EMSGSIZE; 260 + break; 261 + } 262 + idx += vnirange(vbegin, vend) + 1; 263 + vbegin = v; 264 + } 265 + update_end: 266 + vend = v; 267 + } 268 + 269 + if (!err && vbegin) { 270 + if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend)) 271 + err = -EMSGSIZE; 272 + } 273 + 274 + cb->args[1] = err ? idx : 0; 275 + 276 + nlmsg_end(skb, nlh); 277 + 278 + return err; 279 + } 280 + 281 + static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb) 282 + { 283 + int idx = 0, err = 0, s_idx = cb->args[0]; 284 + struct net *net = sock_net(skb->sk); 285 + struct tunnel_msg *tmsg; 286 + struct net_device *dev; 287 + 288 + tmsg = nlmsg_data(cb->nlh); 289 + 290 + rcu_read_lock(); 291 + if (tmsg->ifindex) { 292 + dev = dev_get_by_index_rcu(net, tmsg->ifindex); 293 + if (!dev) { 294 + err = -ENODEV; 295 + goto out_err; 296 + } 297 + err = vxlan_vnifilter_dump_dev(dev, skb, cb); 298 + /* if the dump completed without an error we return 0 here */ 299 + if (err != -EMSGSIZE) 300 + goto out_err; 301 + } else { 302 + for_each_netdev_rcu(net, dev) { 303 + if (!netif_is_vxlan(dev)) 304 + continue; 305 + if (idx < s_idx) 306 + goto skip; 307 + err = vxlan_vnifilter_dump_dev(dev, skb, cb); 308 + if (err == -EMSGSIZE) 309 + break; 310 + skip: 311 + idx++; 312 + } 313 + } 314 + cb->args[0] = idx; 315 + rcu_read_unlock(); 316 + 317 + return skb->len; 318 + 319 + out_err: 320 + rcu_read_unlock(); 321 + 322 + return err; 323 + } 324 + 325 + static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = { 326 + [VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 }, 327 + [VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 }, 328 + [VXLAN_VNIFILTER_ENTRY_GROUP] = { .type = NLA_BINARY, 329 + .len = sizeof_field(struct iphdr, daddr) }, 330 + [VXLAN_VNIFILTER_ENTRY_GROUP6] = { .type = NLA_BINARY, 331 + .len = sizeof(struct in6_addr) }, 332 + }; 333 + 334 + static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = { 335 + [VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED }, 336 + }; 337 + 338 + static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, 339 + union vxlan_addr *old_remote_ip, 340 + union vxlan_addr *remote_ip, 341 + struct netlink_ext_ack *extack) 342 + { 343 + struct vxlan_rdst *dst = &vxlan->default_dst; 344 + u32 hash_index; 345 + int err = 0; 346 + 347 + hash_index = fdb_head_index(vxlan, all_zeros_mac, vni); 348 + spin_lock_bh(&vxlan->hash_lock[hash_index]); 349 + if (remote_ip && !vxlan_addr_any(remote_ip)) { 350 + err = vxlan_fdb_update(vxlan, all_zeros_mac, 351 + remote_ip, 352 + NUD_REACHABLE | NUD_PERMANENT, 353 + NLM_F_APPEND | NLM_F_CREATE, 354 + vxlan->cfg.dst_port, 355 + vni, 356 + vni, 357 + dst->remote_ifindex, 358 + NTF_SELF, 0, true, extack); 359 + if (err) { 360 + spin_unlock_bh(&vxlan->hash_lock[hash_index]); 361 + return err; 362 + } 363 + } 364 + 365 + if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) { 366 + __vxlan_fdb_delete(vxlan, all_zeros_mac, 367 + *old_remote_ip, 368 + vxlan->cfg.dst_port, 369 + vni, vni, 370 + dst->remote_ifindex, 371 + true); 372 + } 373 + spin_unlock_bh(&vxlan->hash_lock[hash_index]); 374 + 375 + return err; 376 + } 377 + 378 + static int vxlan_vni_update_group(struct vxlan_dev *vxlan, 379 + struct vxlan_vni_node *vninode, 380 + union vxlan_addr *group, 381 + bool create, bool *changed, 382 + struct netlink_ext_ack *extack) 383 + { 384 + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 385 + struct vxlan_rdst *dst = &vxlan->default_dst; 386 + union vxlan_addr *newrip = NULL, *oldrip = NULL; 387 + union vxlan_addr old_remote_ip; 388 + int ret = 0; 389 + 390 + memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip)); 391 + 392 + /* if per vni remote ip is not present use vxlan dev 393 + * default dst remote ip for fdb entry 394 + */ 395 + if (group && !vxlan_addr_any(group)) { 396 + newrip = group; 397 + } else { 398 + if (!vxlan_addr_any(&dst->remote_ip)) 399 + newrip = &dst->remote_ip; 400 + } 401 + 402 + /* if old rip exists, and no newrip, 403 + * explicitly delete old rip 404 + */ 405 + if (!newrip && !vxlan_addr_any(&old_remote_ip)) 406 + oldrip = &old_remote_ip; 407 + 408 + if (!newrip && !oldrip) 409 + return 0; 410 + 411 + if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip)) 412 + return 0; 413 + 414 + ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni, 415 + oldrip, newrip, 416 + extack); 417 + if (ret) 418 + goto out; 419 + 420 + if (group) 421 + memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip)); 422 + 423 + if (vxlan->dev->flags & IFF_UP) { 424 + if (vxlan_addr_multicast(&old_remote_ip) && 425 + !vxlan_group_used(vn, vxlan, vninode->vni, 426 + &old_remote_ip, 427 + vxlan->default_dst.remote_ifindex)) { 428 + ret = vxlan_igmp_leave(vxlan, &old_remote_ip, 429 + 0); 430 + if (ret) 431 + goto out; 432 + } 433 + 434 + if (vxlan_addr_multicast(&vninode->remote_ip)) { 435 + ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0); 436 + if (ret == -EADDRINUSE) 437 + ret = 0; 438 + if (ret) 439 + goto out; 440 + } 441 + } 442 + 443 + *changed = true; 444 + 445 + return 0; 446 + out: 447 + return ret; 448 + } 449 + 450 + int vxlan_vnilist_update_group(struct vxlan_dev *vxlan, 451 + union vxlan_addr *old_remote_ip, 452 + union vxlan_addr *new_remote_ip, 453 + struct netlink_ext_ack *extack) 454 + { 455 + struct list_head *headp, *hpos; 456 + struct vxlan_vni_group *vg; 457 + struct vxlan_vni_node *vent; 458 + int ret; 459 + 460 + vg = rtnl_dereference(vxlan->vnigrp); 461 + 462 + headp = &vg->vni_list; 463 + list_for_each_prev(hpos, headp) { 464 + vent = list_entry(hpos, struct vxlan_vni_node, vlist); 465 + if (vxlan_addr_any(&vent->remote_ip)) { 466 + ret = vxlan_update_default_fdb_entry(vxlan, vent->vni, 467 + old_remote_ip, 468 + new_remote_ip, 469 + extack); 470 + if (ret) 471 + return ret; 472 + } 473 + } 474 + 475 + return 0; 476 + } 477 + 478 + static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, 479 + struct vxlan_vni_node *vninode) 480 + { 481 + struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 482 + struct vxlan_rdst *dst = &vxlan->default_dst; 483 + 484 + /* if per vni remote_ip not present, delete the 485 + * default dst remote_ip previously added for this vni 486 + */ 487 + if (!vxlan_addr_any(&vninode->remote_ip) || 488 + !vxlan_addr_any(&dst->remote_ip)) 489 + __vxlan_fdb_delete(vxlan, all_zeros_mac, 490 + (vxlan_addr_any(&vninode->remote_ip) ? 491 + dst->remote_ip : vninode->remote_ip), 492 + vxlan->cfg.dst_port, 493 + vninode->vni, vninode->vni, 494 + dst->remote_ifindex, 495 + true); 496 + 497 + if (vxlan->dev->flags & IFF_UP) { 498 + if (vxlan_addr_multicast(&vninode->remote_ip) && 499 + !vxlan_group_used(vn, vxlan, vninode->vni, 500 + &vninode->remote_ip, 501 + dst->remote_ifindex)) { 502 + vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0); 503 + } 504 + } 505 + } 506 + 507 + static int vxlan_vni_update(struct vxlan_dev *vxlan, 508 + struct vxlan_vni_group *vg, 509 + __be32 vni, union vxlan_addr *group, 510 + bool *changed, 511 + struct netlink_ext_ack *extack) 512 + { 513 + struct vxlan_vni_node *vninode; 514 + int ret; 515 + 516 + vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni, 517 + vxlan_vni_rht_params); 518 + if (!vninode) 519 + return 0; 520 + 521 + ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed, 522 + extack); 523 + if (ret) 524 + return ret; 525 + 526 + if (changed) 527 + vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); 528 + 529 + return 0; 530 + } 531 + 532 + static void __vxlan_vni_add_list(struct vxlan_vni_group *vg, 533 + struct vxlan_vni_node *v) 534 + { 535 + struct list_head *headp, *hpos; 536 + struct vxlan_vni_node *vent; 537 + 538 + headp = &vg->vni_list; 539 + list_for_each_prev(hpos, headp) { 540 + vent = list_entry(hpos, struct vxlan_vni_node, vlist); 541 + if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni)) 542 + continue; 543 + else 544 + break; 545 + } 546 + list_add_rcu(&v->vlist, hpos); 547 + vg->num_vnis++; 548 + } 549 + 550 + static void __vxlan_vni_del_list(struct vxlan_vni_group *vg, 551 + struct vxlan_vni_node *v) 552 + { 553 + list_del_rcu(&v->vlist); 554 + vg->num_vnis--; 555 + } 556 + 557 + static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan, 558 + __be32 vni) 559 + { 560 + struct vxlan_vni_node *vninode; 561 + 562 + vninode = kzalloc(sizeof(*vninode), GFP_ATOMIC); 563 + if (!vninode) 564 + return NULL; 565 + vninode->vni = vni; 566 + vninode->hlist4.vxlan = vxlan; 567 + #if IS_ENABLED(CONFIG_IPV6) 568 + vninode->hlist6.vxlan = vxlan; 569 + #endif 570 + 571 + return vninode; 572 + } 573 + 574 + static int vxlan_vni_add(struct vxlan_dev *vxlan, 575 + struct vxlan_vni_group *vg, 576 + u32 vni, union vxlan_addr *group, 577 + struct netlink_ext_ack *extack) 578 + { 579 + struct vxlan_vni_node *vninode; 580 + __be32 v = cpu_to_be32(vni); 581 + bool changed = false; 582 + int err = 0; 583 + 584 + if (vxlan_vnifilter_lookup(vxlan, v)) 585 + return vxlan_vni_update(vxlan, vg, v, group, &changed, extack); 586 + 587 + err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v); 588 + if (err) { 589 + NL_SET_ERR_MSG(extack, "VNI in use"); 590 + return err; 591 + } 592 + 593 + vninode = vxlan_vni_alloc(vxlan, v); 594 + if (!vninode) 595 + return -ENOMEM; 596 + 597 + err = rhashtable_lookup_insert_fast(&vg->vni_hash, 598 + &vninode->vnode, 599 + vxlan_vni_rht_params); 600 + if (err) { 601 + kfree(vninode); 602 + return err; 603 + } 604 + 605 + __vxlan_vni_add_list(vg, vninode); 606 + 607 + if (vxlan->dev->flags & IFF_UP) 608 + vxlan_vs_add_del_vninode(vxlan, vninode, false); 609 + 610 + err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed, 611 + extack); 612 + 613 + if (changed) 614 + vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); 615 + 616 + return err; 617 + } 618 + 619 + static void vxlan_vni_node_rcu_free(struct rcu_head *rcu) 620 + { 621 + struct vxlan_vni_node *v; 622 + 623 + v = container_of(rcu, struct vxlan_vni_node, rcu); 624 + kfree(v); 625 + } 626 + 627 + static int vxlan_vni_del(struct vxlan_dev *vxlan, 628 + struct vxlan_vni_group *vg, 629 + u32 vni, struct netlink_ext_ack *extack) 630 + { 631 + struct vxlan_vni_node *vninode; 632 + __be32 v = cpu_to_be32(vni); 633 + int err = 0; 634 + 635 + vg = rtnl_dereference(vxlan->vnigrp); 636 + 637 + vninode = rhashtable_lookup_fast(&vg->vni_hash, &v, 638 + vxlan_vni_rht_params); 639 + if (!vninode) { 640 + err = -ENOENT; 641 + goto out; 642 + } 643 + 644 + vxlan_vni_delete_group(vxlan, vninode); 645 + 646 + err = rhashtable_remove_fast(&vg->vni_hash, 647 + &vninode->vnode, 648 + vxlan_vni_rht_params); 649 + if (err) 650 + goto out; 651 + 652 + __vxlan_vni_del_list(vg, vninode); 653 + 654 + vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL); 655 + 656 + if (vxlan->dev->flags & IFF_UP) 657 + vxlan_vs_add_del_vninode(vxlan, vninode, true); 658 + 659 + call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free); 660 + 661 + return 0; 662 + out: 663 + return err; 664 + } 665 + 666 + static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni, 667 + __u32 end_vni, union vxlan_addr *group, 668 + int cmd, struct netlink_ext_ack *extack) 669 + { 670 + struct vxlan_vni_group *vg; 671 + int v, err = 0; 672 + 673 + vg = rtnl_dereference(vxlan->vnigrp); 674 + 675 + for (v = start_vni; v <= end_vni; v++) { 676 + switch (cmd) { 677 + case RTM_NEWTUNNEL: 678 + err = vxlan_vni_add(vxlan, vg, v, group, extack); 679 + break; 680 + case RTM_DELTUNNEL: 681 + err = vxlan_vni_del(vxlan, vg, v, extack); 682 + break; 683 + default: 684 + err = -EOPNOTSUPP; 685 + break; 686 + } 687 + if (err) 688 + goto out; 689 + } 690 + 691 + return 0; 692 + out: 693 + return err; 694 + } 695 + 696 + static int vxlan_process_vni_filter(struct vxlan_dev *vxlan, 697 + struct nlattr *nlvnifilter, 698 + int cmd, struct netlink_ext_ack *extack) 699 + { 700 + struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1]; 701 + u32 vni_start = 0, vni_end = 0; 702 + union vxlan_addr group; 703 + int err; 704 + 705 + err = nla_parse_nested(vattrs, 706 + VXLAN_VNIFILTER_ENTRY_MAX, 707 + nlvnifilter, vni_filter_entry_policy, 708 + extack); 709 + if (err) 710 + return err; 711 + 712 + if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) { 713 + vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]); 714 + vni_end = vni_start; 715 + } 716 + 717 + if (vattrs[VXLAN_VNIFILTER_ENTRY_END]) 718 + vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]); 719 + 720 + if (!vni_start && !vni_end) { 721 + NL_SET_ERR_MSG_ATTR(extack, nlvnifilter, 722 + "vni start nor end found in vni entry"); 723 + return -EINVAL; 724 + } 725 + 726 + if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) { 727 + group.sin.sin_addr.s_addr = 728 + nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]); 729 + group.sa.sa_family = AF_INET; 730 + } else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) { 731 + group.sin6.sin6_addr = 732 + nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]); 733 + group.sa.sa_family = AF_INET6; 734 + } else { 735 + memset(&group, 0, sizeof(group)); 736 + } 737 + 738 + if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) { 739 + NL_SET_ERR_MSG(extack, 740 + "Local interface required for multicast remote group"); 741 + 742 + return -EINVAL; 743 + } 744 + 745 + err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd, 746 + extack); 747 + if (err) 748 + return err; 749 + 750 + return 0; 751 + } 752 + 753 + void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan) 754 + { 755 + struct vxlan_vni_node *v, *tmp; 756 + struct vxlan_vni_group *vg; 757 + 758 + vg = rtnl_dereference(vxlan->vnigrp); 759 + list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 760 + rhashtable_remove_fast(&vg->vni_hash, &v->vnode, 761 + vxlan_vni_rht_params); 762 + hlist_del_init_rcu(&v->hlist4.hlist); 763 + #if IS_ENABLED(CONFIG_IPV6) 764 + hlist_del_init_rcu(&v->hlist6.hlist); 765 + #endif 766 + __vxlan_vni_del_list(vg, v); 767 + vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL); 768 + call_rcu(&v->rcu, vxlan_vni_node_rcu_free); 769 + } 770 + rhashtable_destroy(&vg->vni_hash); 771 + kfree(vg); 772 + } 773 + 774 + int vxlan_vnigroup_init(struct vxlan_dev *vxlan) 775 + { 776 + struct vxlan_vni_group *vg; 777 + int ret; 778 + 779 + vg = kzalloc(sizeof(*vg), GFP_KERNEL); 780 + if (!vg) 781 + return -ENOMEM; 782 + ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params); 783 + if (ret) { 784 + kfree(vg); 785 + return ret; 786 + } 787 + INIT_LIST_HEAD(&vg->vni_list); 788 + rcu_assign_pointer(vxlan->vnigrp, vg); 789 + 790 + return 0; 791 + } 792 + 793 + static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh, 794 + struct netlink_ext_ack *extack) 795 + { 796 + struct net *net = sock_net(skb->sk); 797 + struct tunnel_msg *tmsg; 798 + struct vxlan_dev *vxlan; 799 + struct net_device *dev; 800 + struct nlattr *attr; 801 + int err, vnis = 0; 802 + int rem; 803 + 804 + /* this should validate the header and check for remaining bytes */ 805 + err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX, 806 + vni_filter_policy, extack); 807 + if (err < 0) 808 + return err; 809 + 810 + tmsg = nlmsg_data(nlh); 811 + dev = __dev_get_by_index(net, tmsg->ifindex); 812 + if (!dev) 813 + return -ENODEV; 814 + 815 + if (!netif_is_vxlan(dev)) { 816 + NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device"); 817 + return -EINVAL; 818 + } 819 + 820 + vxlan = netdev_priv(dev); 821 + 822 + if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) 823 + return -EOPNOTSUPP; 824 + 825 + nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) { 826 + switch (nla_type(attr)) { 827 + case VXLAN_VNIFILTER_ENTRY: 828 + err = vxlan_process_vni_filter(vxlan, attr, 829 + nlh->nlmsg_type, extack); 830 + break; 831 + default: 832 + continue; 833 + } 834 + vnis++; 835 + if (err) 836 + break; 837 + } 838 + 839 + if (!vnis) { 840 + NL_SET_ERR_MSG_MOD(extack, "No vnis found to process"); 841 + err = -EINVAL; 842 + } 843 + 844 + return err; 845 + } 846 + 847 + void vxlan_vnifilter_init(void) 848 + { 849 + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL, 850 + vxlan_vnifilter_dump, 0); 851 + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL, 852 + vxlan_vnifilter_process, NULL, 0); 853 + rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL, 854 + vxlan_vnifilter_process, NULL, 0); 855 + } 856 + 857 + void vxlan_vnifilter_uninit(void) 858 + { 859 + rtnl_unregister(PF_BRIDGE, RTM_GETTUNNEL); 860 + rtnl_unregister(PF_BRIDGE, RTM_NEWTUNNEL); 861 + rtnl_unregister(PF_BRIDGE, RTM_DELTUNNEL); 862 + }
+26 -2
include/net/vxlan.h
··· 232 232 struct vxlan_dev *vxlan; 233 233 }; 234 234 235 + struct vxlan_vni_node { 236 + struct rhash_head vnode; 237 + struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ 238 + #if IS_ENABLED(CONFIG_IPV6) 239 + struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ 240 + #endif 241 + struct list_head vlist; 242 + __be32 vni; 243 + union vxlan_addr remote_ip; /* default remote ip for this vni */ 244 + 245 + struct rcu_head rcu; 246 + }; 247 + 248 + struct vxlan_vni_group { 249 + struct rhashtable vni_hash; 250 + struct list_head vni_list; 251 + u32 num_vnis; 252 + }; 253 + 235 254 /* Pseudo network device */ 236 255 struct vxlan_dev { 237 256 struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ ··· 273 254 274 255 struct vxlan_config cfg; 275 256 257 + struct vxlan_vni_group __rcu *vnigrp; 258 + 276 259 struct hlist_head fdb_head[FDB_HASH_SIZE]; 277 260 }; 278 261 ··· 295 274 #define VXLAN_F_GPE 0x4000 296 275 #define VXLAN_F_IPV6_LINKLOCAL 0x8000 297 276 #define VXLAN_F_TTL_INHERIT 0x10000 277 + #define VXLAN_F_VNIFILTER 0x20000 298 278 299 279 /* Flags that are used in the receive path. These flags must match in 300 280 * order for a socket to be shareable ··· 305 283 VXLAN_F_UDP_ZERO_CSUM6_RX | \ 306 284 VXLAN_F_REMCSUM_RX | \ 307 285 VXLAN_F_REMCSUM_NOPARTIAL | \ 308 - VXLAN_F_COLLECT_METADATA) 286 + VXLAN_F_COLLECT_METADATA | \ 287 + VXLAN_F_VNIFILTER) 309 288 310 289 /* Flags that can be set together with VXLAN_F_GPE. */ 311 290 #define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ ··· 315 292 VXLAN_F_UDP_ZERO_CSUM_TX | \ 316 293 VXLAN_F_UDP_ZERO_CSUM6_TX | \ 317 294 VXLAN_F_UDP_ZERO_CSUM6_RX | \ 318 - VXLAN_F_COLLECT_METADATA) 295 + VXLAN_F_COLLECT_METADATA | \ 296 + VXLAN_F_VNIFILTER) 319 297 320 298 struct net_device *vxlan_dev_create(struct net *net, const char *name, 321 299 u8 name_assign_type, struct vxlan_config *conf);