sfc: neighbour lookup for TC encap action offload

+34

drivers/net/ethernet/sfc/ef100_netdev.c

··· 24 24 #include "rx_common.h" 25 25 #include "ef100_sriov.h" 26 26 #include "tc_bindings.h" 27 + #include "tc_encap_actions.h" 27 28 #include "efx_devlink.h" 28 29 29 30 static void ef100_update_name(struct efx_nic *efx) ··· 301 300 { 302 301 struct efx_nic *efx = container_of(this, struct efx_nic, netdev_notifier); 303 302 struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); 303 + struct ef100_nic_data *nic_data = efx->nic_data; 304 + int err; 304 305 305 306 if (efx->net_dev == net_dev && 306 307 (event == NETDEV_CHANGENAME || event == NETDEV_REGISTER)) 307 308 ef100_update_name(efx); 308 309 310 + if (!nic_data->grp_mae) 311 + return NOTIFY_DONE; 312 + err = efx_tc_netdev_event(efx, event, net_dev); 313 + if (err & NOTIFY_STOP_MASK) 314 + return err; 315 + 309 316 return NOTIFY_DONE; 310 317 } 318 + 319 + static int ef100_netevent_event(struct notifier_block *this, 320 + unsigned long event, void *ptr) 321 + { 322 + struct efx_nic *efx = container_of(this, struct efx_nic, netevent_notifier); 323 + struct ef100_nic_data *nic_data = efx->nic_data; 324 + int err; 325 + 326 + if (!nic_data->grp_mae) 327 + return NOTIFY_DONE; 328 + err = efx_tc_netevent_event(efx, event, ptr); 329 + if (err & NOTIFY_STOP_MASK) 330 + return err; 331 + 332 + return NOTIFY_DONE; 333 + }; 311 334 312 335 static int ef100_register_netdev(struct efx_nic *efx) 313 336 { ··· 392 367 rtnl_unlock(); 393 368 394 369 unregister_netdevice_notifier(&efx->netdev_notifier); 370 + unregister_netevent_notifier(&efx->netevent_notifier); 395 371 #if defined(CONFIG_SFC_SRIOV) 396 372 if (!efx->type->is_vf) 397 373 efx_ef100_pci_sriov_disable(efx, true); ··· 510 484 if (rc) { 511 485 netif_err(efx, probe, efx->net_dev, 512 486 "Failed to register netdevice notifier, rc=%d\n", rc); 487 + goto fail; 488 + } 489 + 490 + efx->netevent_notifier.notifier_call = ef100_netevent_event; 491 + rc = register_netevent_notifier(&efx->netevent_notifier); 492 + if (rc) { 493 + netif_err(efx, probe, efx->net_dev, 494 + "Failed to register netevent notifier, rc=%d\n", rc); 513 495 goto fail; 514 496 } 515 497

+3

drivers/net/ethernet/sfc/net_driver.h

··· 27 27 #include <linux/mtd/mtd.h> 28 28 #include <net/busy_poll.h> 29 29 #include <net/xdp.h> 30 + #include <net/netevent.h> 30 31 31 32 #include "enum.h" 32 33 #include "bitfield.h" ··· 997 996 * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their 998 997 * xdp_rxq_info structures? 999 998 * @netdev_notifier: Netdevice notifier. 999 + * @netevent_notifier: Netevent notifier (for neighbour updates). 1000 1000 * @tc: state for TC offload (EF100). 1001 1001 * @devlink: reference to devlink structure owned by this device 1002 1002 * @dl_port: devlink port associated with the PF ··· 1185 1183 bool xdp_rxq_info_failed; 1186 1184 1187 1185 struct notifier_block netdev_notifier; 1186 + struct notifier_block netevent_notifier; 1188 1187 struct efx_tc_state *tc; 1189 1188 1190 1189 struct devlink *devlink;

+8 -4

drivers/net/ethernet/sfc/tc.c

··· 34 34 * May return NULL for the PF (us), or an error pointer for a device that 35 35 * isn't supported as a TC offload endpoint 36 36 */ 37 - static struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, 38 - struct net_device *dev) 37 + struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, 38 + struct net_device *dev) 39 39 { 40 40 struct efx_rep *efv; 41 41 ··· 71 71 } 72 72 73 73 /* Convert a driver-internal vport ID into an external device (wire or VF) */ 74 - static s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) 74 + s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) 75 75 { 76 76 u32 mport; 77 77 ··· 112 112 } 113 113 if (act->count) 114 114 efx_tc_flower_put_counter_index(efx, act->count); 115 - if (act->encap_md) 115 + if (act->encap_md) { 116 + list_del(&act->encap_user); 116 117 efx_tc_flower_release_encap_md(efx, act->encap_md); 118 + } 117 119 kfree(act); 118 120 } 119 121 ··· 1117 1115 goto release; 1118 1116 } 1119 1117 act->encap_md = encap; 1118 + list_add_tail(&act->encap_user, &encap->users); 1120 1119 act->dest_mport = encap->dest_mport; 1121 1120 act->deliver = 1; 1122 1121 rc = efx_mae_alloc_action_set(efx, act); ··· 1126 1123 goto release; 1127 1124 } 1128 1125 list_add_tail(&act->list, &rule->acts.list); 1126 + act->user = &rule->acts; 1129 1127 act = NULL; 1130 1128 if (fa->id == FLOW_ACTION_REDIRECT) 1131 1129 break; /* end of the line */

+7

drivers/net/ethernet/sfc/tc.h

··· 36 36 __be16 vlan_proto[2]; /* Ethertypes for vlan_push */ 37 37 struct efx_tc_counter_index *count; 38 38 struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */ 39 + struct list_head encap_user; /* entry on encap_md->users list */ 40 + struct efx_tc_action_set_list *user; /* Only populated if encap_md */ 39 41 u32 dest_mport; 40 42 u32 fw_id; /* index of this entry in firmware actions table */ 41 43 struct list_head list; ··· 153 151 * @encap_ht: Hashtable of TC encap actions 154 152 * @encap_match_ht: Hashtable of TC encap matches 155 153 * @match_action_ht: Hashtable of TC match-action rules 154 + * @neigh_ht: Hashtable of neighbour watches (&struct efx_neigh_binder) 156 155 * @reps_mport_id: MAE port allocated for representor RX 157 156 * @reps_filter_uc: VNIC filter for representor unicast RX (promisc) 158 157 * @reps_filter_mc: VNIC filter for representor multicast RX (allmulti) ··· 184 181 struct rhashtable encap_ht; 185 182 struct rhashtable encap_match_ht; 186 183 struct rhashtable match_action_ht; 184 + struct rhashtable neigh_ht; 187 185 u32 reps_mport_id, reps_mport_vport_id; 188 186 s32 reps_filter_uc, reps_filter_mc; 189 187 bool flush_counters; ··· 205 201 struct efx_rep; 206 202 207 203 enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev); 204 + struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, 205 + struct net_device *dev); 206 + s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv); 208 207 int efx_tc_configure_default_rule_rep(struct efx_rep *efv); 209 208 void efx_tc_deconfigure_default_rule(struct efx_nic *efx, 210 209 struct efx_tc_flow_rule *rule);

+13

drivers/net/ethernet/sfc/tc_bindings.c

··· 10 10 11 11 #include "tc_bindings.h" 12 12 #include "tc.h" 13 + #include "tc_encap_actions.h" 13 14 14 15 struct efx_tc_block_binding { 15 16 struct list_head list; ··· 226 225 return efx_tc_setup_block(net_dev, efx, type_data, NULL); 227 226 228 227 return -EOPNOTSUPP; 228 + } 229 + 230 + int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event, 231 + struct net_device *net_dev) 232 + { 233 + if (efx->type->is_vf) 234 + return NOTIFY_DONE; 235 + 236 + if (event == NETDEV_UNREGISTER) 237 + efx_tc_unregister_egdev(efx, net_dev); 238 + 239 + return NOTIFY_OK; 229 240 }

+2

drivers/net/ethernet/sfc/tc_bindings.h

··· 26 26 void *cb_priv, enum tc_setup_type type, 27 27 void *type_data, void *data, 28 28 void (*cleanup)(struct flow_block_cb *block_cb)); 29 + int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event, 30 + struct net_device *net_dev); 29 31 #endif /* EFX_TC_BINDINGS_H */

+446 -2

drivers/net/ethernet/sfc/tc_encap_actions.c

··· 13 13 #include "mae.h" 14 14 #include <net/vxlan.h> 15 15 #include <net/geneve.h> 16 + #include <net/netevent.h> 17 + #include <net/arp.h> 18 + 19 + static const struct rhashtable_params efx_neigh_ht_params = { 20 + .key_len = offsetof(struct efx_neigh_binder, ha), 21 + .key_offset = 0, 22 + .head_offset = offsetof(struct efx_neigh_binder, linkage), 23 + }; 16 24 17 25 static const struct rhashtable_params efx_tc_encap_ht_params = { 18 26 .key_len = offsetofend(struct efx_tc_encap_action, key), ··· 36 28 kfree(enc); 37 29 } 38 30 31 + static void efx_neigh_free(void *ptr, void *__unused) 32 + { 33 + struct efx_neigh_binder *neigh = ptr; 34 + 35 + WARN_ON(refcount_read(&neigh->ref)); 36 + WARN_ON(!list_empty(&neigh->users)); 37 + put_net_track(neigh->net, &neigh->ns_tracker); 38 + netdev_put(neigh->egdev, &neigh->dev_tracker); 39 + kfree(neigh); 40 + } 41 + 39 42 int efx_tc_init_encap_actions(struct efx_nic *efx) 40 43 { 41 - return rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params); 44 + int rc; 45 + 46 + rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params); 47 + if (rc < 0) 48 + goto fail_neigh_ht; 49 + rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params); 50 + if (rc < 0) 51 + goto fail_encap_ht; 52 + return 0; 53 + fail_encap_ht: 54 + rhashtable_destroy(&efx->tc->neigh_ht); 55 + fail_neigh_ht: 56 + return rc; 42 57 } 43 58 44 59 /* Only call this in init failure teardown. ··· 70 39 void efx_tc_destroy_encap_actions(struct efx_nic *efx) 71 40 { 72 41 rhashtable_destroy(&efx->tc->encap_ht); 42 + rhashtable_destroy(&efx->tc->neigh_ht); 73 43 } 74 44 75 45 void efx_tc_fini_encap_actions(struct efx_nic *efx) 76 46 { 77 47 rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL); 48 + rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL); 49 + } 50 + 51 + static void efx_neigh_update(struct work_struct *work); 52 + 53 + static int efx_bind_neigh(struct efx_nic *efx, 54 + struct efx_tc_encap_action *encap, struct net *net, 55 + struct netlink_ext_ack *extack) 56 + { 57 + struct efx_neigh_binder *neigh, *old; 58 + struct flowi6 flow6 = {}; 59 + struct flowi4 flow4 = {}; 60 + int rc; 61 + 62 + /* GCC stupidly thinks that only values explicitly listed in the enum 63 + * definition can _possibly_ be sensible case values, so without this 64 + * cast it complains about the IPv6 versions. 65 + */ 66 + switch ((int)encap->type) { 67 + case EFX_ENCAP_TYPE_VXLAN: 68 + case EFX_ENCAP_TYPE_GENEVE: 69 + flow4.flowi4_proto = IPPROTO_UDP; 70 + flow4.fl4_dport = encap->key.tp_dst; 71 + flow4.flowi4_tos = encap->key.tos; 72 + flow4.daddr = encap->key.u.ipv4.dst; 73 + flow4.saddr = encap->key.u.ipv4.src; 74 + break; 75 + case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6: 76 + case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6: 77 + flow6.flowi6_proto = IPPROTO_UDP; 78 + flow6.fl6_dport = encap->key.tp_dst; 79 + flow6.flowlabel = ip6_make_flowinfo(encap->key.tos, 80 + encap->key.label); 81 + flow6.daddr = encap->key.u.ipv6.dst; 82 + flow6.saddr = encap->key.u.ipv6.src; 83 + break; 84 + default: 85 + NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d", 86 + (int)encap->type); 87 + return -EOPNOTSUPP; 88 + } 89 + 90 + neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT); 91 + if (!neigh) 92 + return -ENOMEM; 93 + neigh->net = get_net_track(net, &neigh->ns_tracker, GFP_KERNEL_ACCOUNT); 94 + neigh->dst_ip = flow4.daddr; 95 + neigh->dst_ip6 = flow6.daddr; 96 + 97 + old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht, 98 + &neigh->linkage, 99 + efx_neigh_ht_params); 100 + if (old) { 101 + /* don't need our new entry */ 102 + put_net_track(neigh->net, &neigh->ns_tracker); 103 + kfree(neigh); 104 + if (!refcount_inc_not_zero(&old->ref)) 105 + return -EAGAIN; 106 + /* existing entry found, ref taken */ 107 + neigh = old; 108 + } else { 109 + /* New entry. We need to initiate a lookup */ 110 + struct neighbour *n; 111 + struct rtable *rt; 112 + 113 + if (encap->type & EFX_ENCAP_FLAG_IPV6) { 114 + #if IS_ENABLED(CONFIG_IPV6) 115 + struct dst_entry *dst; 116 + 117 + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6, 118 + NULL); 119 + rc = PTR_ERR_OR_ZERO(dst); 120 + if (rc) { 121 + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap"); 122 + goto out_free; 123 + } 124 + neigh->egdev = dst->dev; 125 + netdev_hold(neigh->egdev, &neigh->dev_tracker, 126 + GFP_KERNEL_ACCOUNT); 127 + neigh->ttl = ip6_dst_hoplimit(dst); 128 + n = dst_neigh_lookup(dst, &flow6.daddr); 129 + dst_release(dst); 130 + #else 131 + /* We shouldn't ever get here, because if IPv6 isn't 132 + * enabled how did someone create an IPv6 tunnel_key? 133 + */ 134 + rc = -EOPNOTSUPP; 135 + NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)"); 136 + #endif 137 + } else { 138 + rt = ip_route_output_key(net, &flow4); 139 + if (IS_ERR_OR_NULL(rt)) { 140 + rc = PTR_ERR_OR_ZERO(rt); 141 + if (!rc) 142 + rc = -EIO; 143 + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap"); 144 + goto out_free; 145 + } 146 + neigh->egdev = rt->dst.dev; 147 + netdev_hold(neigh->egdev, &neigh->dev_tracker, 148 + GFP_KERNEL_ACCOUNT); 149 + neigh->ttl = ip4_dst_hoplimit(&rt->dst); 150 + n = dst_neigh_lookup(&rt->dst, &flow4.daddr); 151 + ip_rt_put(rt); 152 + } 153 + if (!n) { 154 + rc = -ENETUNREACH; 155 + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap"); 156 + netdev_put(neigh->egdev, &neigh->dev_tracker); 157 + goto out_free; 158 + } 159 + refcount_set(&neigh->ref, 1); 160 + INIT_LIST_HEAD(&neigh->users); 161 + read_lock_bh(&n->lock); 162 + ether_addr_copy(neigh->ha, n->ha); 163 + neigh->n_valid = n->nud_state & NUD_VALID; 164 + read_unlock_bh(&n->lock); 165 + rwlock_init(&neigh->lock); 166 + INIT_WORK(&neigh->work, efx_neigh_update); 167 + neigh->efx = efx; 168 + neigh->used = jiffies; 169 + if (!neigh->n_valid) 170 + /* Prod ARP to find us a neighbour */ 171 + neigh_event_send(n, NULL); 172 + neigh_release(n); 173 + } 174 + /* Add us to this neigh */ 175 + encap->neigh = neigh; 176 + list_add_tail(&encap->list, &neigh->users); 177 + return 0; 178 + 179 + out_free: 180 + /* cleanup common to several error paths */ 181 + rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage, 182 + efx_neigh_ht_params); 183 + synchronize_rcu(); 184 + put_net_track(net, &neigh->ns_tracker); 185 + kfree(neigh); 186 + return rc; 187 + } 188 + 189 + static void efx_free_neigh(struct efx_neigh_binder *neigh) 190 + { 191 + struct efx_nic *efx = neigh->efx; 192 + 193 + rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage, 194 + efx_neigh_ht_params); 195 + synchronize_rcu(); 196 + netdev_put(neigh->egdev, &neigh->dev_tracker); 197 + put_net_track(neigh->net, &neigh->ns_tracker); 198 + kfree(neigh); 199 + } 200 + 201 + static void efx_release_neigh(struct efx_nic *efx, 202 + struct efx_tc_encap_action *encap) 203 + { 204 + struct efx_neigh_binder *neigh = encap->neigh; 205 + 206 + if (!neigh) 207 + return; 208 + list_del(&encap->list); 209 + encap->neigh = NULL; 210 + if (!refcount_dec_and_test(&neigh->ref)) 211 + return; /* still in use */ 212 + efx_free_neigh(neigh); 213 + } 214 + 215 + static void efx_gen_encap_header(struct efx_tc_encap_action *encap) 216 + { 217 + /* stub for now */ 218 + encap->n_valid = false; 219 + memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr)); 220 + encap->encap_hdr_len = ETH_HLEN; 221 + } 222 + 223 + static void efx_tc_update_encap(struct efx_nic *efx, 224 + struct efx_tc_encap_action *encap) 225 + { 226 + struct efx_tc_action_set_list *acts, *fallback; 227 + struct efx_tc_flow_rule *rule; 228 + struct efx_tc_action_set *act; 229 + int rc; 230 + 231 + if (encap->n_valid) { 232 + /* Make sure no rules are using this encap while we change it */ 233 + list_for_each_entry(act, &encap->users, encap_user) { 234 + acts = act->user; 235 + if (WARN_ON(!acts)) /* can't happen */ 236 + continue; 237 + rule = container_of(acts, struct efx_tc_flow_rule, acts); 238 + if (rule->fallback) 239 + fallback = rule->fallback; 240 + else /* fallback fallback: deliver to PF */ 241 + fallback = &efx->tc->facts.pf; 242 + rc = efx_mae_update_rule(efx, fallback->fw_id, 243 + rule->fw_id); 244 + if (rc) 245 + netif_err(efx, drv, efx->net_dev, 246 + "Failed to update (f) rule %08x rc %d\n", 247 + rule->fw_id, rc); 248 + else 249 + netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n", 250 + rule->fw_id); 251 + } 252 + } 253 + 254 + if (encap->neigh) { 255 + read_lock_bh(&encap->neigh->lock); 256 + efx_gen_encap_header(encap); 257 + read_unlock_bh(&encap->neigh->lock); 258 + } else { 259 + encap->n_valid = false; 260 + memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr)); 261 + encap->encap_hdr_len = ETH_HLEN; 262 + } 263 + 264 + rc = efx_mae_update_encap_md(efx, encap); 265 + if (rc) { 266 + netif_err(efx, drv, efx->net_dev, 267 + "Failed to update encap hdr %08x rc %d\n", 268 + encap->fw_id, rc); 269 + return; 270 + } 271 + netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n", 272 + encap->fw_id); 273 + if (!encap->n_valid) 274 + return; 275 + /* Update rule users: use the action if they are now ready */ 276 + list_for_each_entry(act, &encap->users, encap_user) { 277 + acts = act->user; 278 + if (WARN_ON(!acts)) /* can't happen */ 279 + continue; 280 + rule = container_of(acts, struct efx_tc_flow_rule, acts); 281 + if (!efx_tc_check_ready(efx, rule)) 282 + continue; 283 + rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id); 284 + if (rc) 285 + netif_err(efx, drv, efx->net_dev, 286 + "Failed to update rule %08x rc %d\n", 287 + rule->fw_id, rc); 288 + else 289 + netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n", 290 + rule->fw_id); 291 + } 292 + } 293 + 294 + static void efx_neigh_update(struct work_struct *work) 295 + { 296 + struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work); 297 + struct efx_tc_encap_action *encap; 298 + struct efx_nic *efx = neigh->efx; 299 + 300 + mutex_lock(&efx->tc->mutex); 301 + list_for_each_entry(encap, &neigh->users, list) 302 + efx_tc_update_encap(neigh->efx, encap); 303 + /* release ref taken in efx_neigh_event() */ 304 + if (refcount_dec_and_test(&neigh->ref)) 305 + efx_free_neigh(neigh); 306 + mutex_unlock(&efx->tc->mutex); 307 + } 308 + 309 + static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n) 310 + { 311 + struct efx_neigh_binder keys = {NULL}, *neigh; 312 + bool n_valid, ipv6 = false; 313 + char ha[ETH_ALEN]; 314 + size_t keysize; 315 + 316 + if (WARN_ON(!efx->tc)) 317 + return NOTIFY_DONE; 318 + 319 + if (n->tbl == &arp_tbl) { 320 + keysize = sizeof(keys.dst_ip); 321 + #if IS_ENABLED(CONFIG_IPV6) 322 + } else if (n->tbl == ipv6_stub->nd_tbl) { 323 + ipv6 = true; 324 + keysize = sizeof(keys.dst_ip6); 325 + #endif 326 + } else { 327 + return NOTIFY_DONE; 328 + } 329 + if (!n->parms) { 330 + netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n"); 331 + return NOTIFY_DONE; 332 + } 333 + keys.net = read_pnet(&n->parms->net); 334 + if (n->tbl->key_len != keysize) { 335 + netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n", 336 + n->tbl->key_len); 337 + return NOTIFY_DONE; 338 + } 339 + read_lock_bh(&n->lock); /* Get a consistent view */ 340 + memcpy(ha, n->ha, ETH_ALEN); 341 + n_valid = (n->nud_state & NUD_VALID) && !n->dead; 342 + read_unlock_bh(&n->lock); 343 + if (ipv6) 344 + memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len); 345 + else 346 + memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len); 347 + rcu_read_lock(); 348 + neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys, 349 + efx_neigh_ht_params); 350 + if (!neigh || neigh->dying) 351 + /* We're not interested in this neighbour */ 352 + goto done; 353 + write_lock_bh(&neigh->lock); 354 + if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) { 355 + write_unlock_bh(&neigh->lock); 356 + /* Nothing has changed; no work to do */ 357 + goto done; 358 + } 359 + neigh->n_valid = n_valid; 360 + memcpy(neigh->ha, ha, ETH_ALEN); 361 + write_unlock_bh(&neigh->lock); 362 + if (refcount_inc_not_zero(&neigh->ref)) { 363 + rcu_read_unlock(); 364 + if (!schedule_work(&neigh->work)) 365 + /* failed to schedule, release the ref we just took */ 366 + if (refcount_dec_and_test(&neigh->ref)) 367 + efx_free_neigh(neigh); 368 + } else { 369 + done: 370 + rcu_read_unlock(); 371 + } 372 + return NOTIFY_DONE; 78 373 } 79 374 80 375 bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule) ··· 411 54 * neighbour info for the outer Ethernet header. 412 55 */ 413 56 list_for_each_entry(act, &rule->acts.list, list) 414 - if (act->encap_md) /* neigh bindings not implemented yet */ 57 + if (act->encap_md && !act->encap_md->n_valid) 415 58 return false; 416 59 return true; 417 60 } ··· 422 65 { 423 66 enum efx_encap_type type = efx_tc_indr_netdev_type(egdev); 424 67 struct efx_tc_encap_action *encap, *old; 68 + struct efx_rep *to_efv; 425 69 s64 rc; 426 70 427 71 if (type == EFX_ENCAP_TYPE_NONE) { ··· 456 98 return ERR_PTR(-ENOMEM); 457 99 encap->type = type; 458 100 encap->key = info->key; 101 + INIT_LIST_HEAD(&encap->users); 459 102 old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht, 460 103 &encap->linkage, 461 104 efx_tc_encap_ht_params); ··· 469 110 return old; 470 111 } 471 112 113 + rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack); 114 + if (rc < 0) 115 + goto out_remove; 116 + to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev); 117 + if (IS_ERR(to_efv)) { 118 + /* neigh->egdev isn't ours */ 119 + NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch"); 120 + rc = PTR_ERR(to_efv); 121 + goto out_release; 122 + } 123 + rc = efx_tc_flower_external_mport(efx, to_efv); 124 + if (rc < 0) { 125 + NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port"); 126 + goto out_release; 127 + } 128 + encap->dest_mport = rc; 129 + read_lock_bh(&encap->neigh->lock); 130 + efx_gen_encap_header(encap); 131 + read_unlock_bh(&encap->neigh->lock); 132 + 133 + rc = efx_mae_allocate_encap_md(efx, encap); 134 + if (rc < 0) { 135 + NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw"); 136 + goto out_release; 137 + } 138 + 472 139 /* ref and return */ 473 140 refcount_set(&encap->ref, 1); 474 141 return encap; 142 + out_release: 143 + efx_release_neigh(efx, encap); 144 + out_remove: 145 + rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage, 146 + efx_tc_encap_ht_params); 147 + kfree(encap); 148 + return ERR_PTR(rc); 475 149 } 476 150 477 151 void efx_tc_flower_release_encap_md(struct efx_nic *efx, ··· 512 120 { 513 121 if (!refcount_dec_and_test(&encap->ref)) 514 122 return; /* still in use */ 123 + efx_release_neigh(efx, encap); 515 124 rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage, 516 125 efx_tc_encap_ht_params); 126 + efx_mae_free_encap_md(efx, encap); 517 127 kfree(encap); 128 + } 129 + 130 + static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh) 131 + { 132 + struct efx_tc_encap_action *encap, *next; 133 + 134 + list_for_each_entry_safe(encap, next, &neigh->users, list) { 135 + /* Should cause neigh usage count to fall to zero, freeing it */ 136 + efx_release_neigh(efx, encap); 137 + /* The encap has lost its neigh, so it's now unready */ 138 + efx_tc_update_encap(efx, encap); 139 + } 140 + } 141 + 142 + void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev) 143 + { 144 + struct efx_neigh_binder *neigh; 145 + struct rhashtable_iter walk; 146 + 147 + mutex_lock(&efx->tc->mutex); 148 + rhashtable_walk_enter(&efx->tc->neigh_ht, &walk); 149 + rhashtable_walk_start(&walk); 150 + while ((neigh = rhashtable_walk_next(&walk)) != NULL) { 151 + if (IS_ERR(neigh)) 152 + continue; 153 + if (neigh->egdev != net_dev) 154 + continue; 155 + neigh->dying = true; 156 + rhashtable_walk_stop(&walk); 157 + synchronize_rcu(); /* Make sure any updates see dying flag */ 158 + efx_tc_remove_neigh_users(efx, neigh); /* might sleep */ 159 + rhashtable_walk_start(&walk); 160 + } 161 + rhashtable_walk_stop(&walk); 162 + rhashtable_walk_exit(&walk); 163 + mutex_unlock(&efx->tc->mutex); 164 + } 165 + 166 + int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event, 167 + void *ptr) 168 + { 169 + if (efx->type->is_vf) 170 + return NOTIFY_DONE; 171 + 172 + switch (event) { 173 + case NETEVENT_NEIGH_UPDATE: 174 + return efx_neigh_event(efx, ptr); 175 + default: 176 + return NOTIFY_DONE; 177 + } 518 178 }

+56

drivers/net/ethernet/sfc/tc_encap_actions.h

··· 15 15 #include <linux/refcount.h> 16 16 #include <net/tc_act/tc_tunnel_key.h> 17 17 18 + /** 19 + * struct efx_neigh_binder - driver state for a neighbour entry 20 + * @net: the network namespace in which this neigh resides 21 + * @dst_ip: the IPv4 destination address resolved by this neigh 22 + * @dst_ip6: the IPv6 destination address resolved by this neigh 23 + * @ha: the hardware (Ethernet) address of the neighbour 24 + * @n_valid: true if the neighbour is in NUD_VALID state 25 + * @lock: protects @ha and @n_valid 26 + * @ttl: Time To Live associated with the route used 27 + * @dying: set when egdev is going away, to skip further updates 28 + * @egdev: egress device from the route lookup. Holds a reference 29 + * @dev_tracker: reference tracker entry for @egdev 30 + * @ns_tracker: reference tracker entry for @ns 31 + * @ref: counts encap actions referencing this entry 32 + * @used: jiffies of last time traffic hit any encap action using this. 33 + * When counter reads update this, a new neighbour event is sent to 34 + * indicate that the neighbour entry is still in use. 35 + * @users: list of &struct efx_tc_encap_action 36 + * @linkage: entry in efx->neigh_ht (keys are @net, @dst_ip, @dst_ip6). 37 + * @work: processes neighbour state changes, updates the encap actions 38 + * @efx: owning NIC instance. 39 + * 40 + * Associates a neighbour entry with the encap actions that are 41 + * interested in it, allowing the latter to be updated when the 42 + * neighbour details change. 43 + * Whichever of @dst_ip and @dst_ip6 is not in use will be all-zeroes, 44 + * this distinguishes IPv4 from IPv6 entries. 45 + */ 46 + struct efx_neigh_binder { 47 + struct net *net; 48 + __be32 dst_ip; 49 + struct in6_addr dst_ip6; 50 + char ha[ETH_ALEN]; 51 + bool n_valid; 52 + rwlock_t lock; 53 + u8 ttl; 54 + bool dying; 55 + struct net_device *egdev; 56 + netdevice_tracker dev_tracker; 57 + netns_tracker ns_tracker; 58 + refcount_t ref; 59 + unsigned long used; 60 + struct list_head users; 61 + struct rhash_head linkage; 62 + struct work_struct work; 63 + struct efx_nic *efx; 64 + }; 65 + 18 66 /* This limit is arbitrary; current hardware (SN1022) handles encap headers 19 67 * of up to 126 bytes, but that limit is not enshrined in the MCDI protocol. 20 68 */ ··· 72 24 struct ip_tunnel_key key; /* 52 bytes */ 73 25 u32 dest_mport; /* is copied into struct efx_tc_action_set */ 74 26 u8 encap_hdr_len; 27 + bool n_valid; 75 28 u8 encap_hdr[EFX_TC_MAX_ENCAP_HDR]; 29 + struct efx_neigh_binder *neigh; 30 + struct list_head list; /* entry on neigh->users list */ 31 + struct list_head users; /* action sets using this encap_md */ 76 32 struct rhash_head linkage; /* efx->tc_encap_ht */ 77 33 refcount_t ref; 78 34 u32 fw_id; /* index of this entry in firmware encap table */ ··· 95 43 struct net_device *egdev, struct netlink_ext_ack *extack); 96 44 void efx_tc_flower_release_encap_md(struct efx_nic *efx, 97 45 struct efx_tc_encap_action *encap); 46 + 47 + void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev); 48 + int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event, 49 + void *ptr); 98 50 99 51 #endif /* EFX_TC_ENCAP_ACTIONS_H */