Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mlxsw-maintain-candidate-rifs'

Petr Machata says:

====================
mlxsw: Maintain candidate RIFs

The mlxsw driver currently makes the assumption that the user applies
configuration in a bottom-up manner. Thus netdevices need to be added to
the bridge before IP addresses are configured on that bridge or SVI added
on top of it. Enslaving a netdevice to another netdevice that already has
uppers is in fact forbidden by mlxsw for this reason. Despite this safety,
it is rather easy to get into situations where the offloaded configuration
is just plain wrong.

As an example, take a front panel port, configure an IP address: it gets a
RIF. Now enslave the port to the bridge, and the RIF is gone. Remove the
port from the bridge again, but the RIF never comes back. There is a number
of similar situations, where changing the configuration there and back
utterly breaks the offload.

The situation is going to be made better by implementing a range of replays
and post-hoc offloads.

This patch set lays the ground for replay of next hops. The particular
issue that it deals with is that currently, driver-specific bookkeeping for
next hops is hooked off RIF objects, which come and go across the lifetime
of a netdevice. We would rather keep these objects at an entity that
mirrors the lifetime of the netdevice itself. That way they are at hand and
can be offloaded when a RIF is eventually created.

To that end, with this patchset, mlxsw keeps a hash table of CRIFs:
candidate RIFs, persistent handles for netdevices that mlxsw deems
potentially interesting. The lifetime of a CRIF matches that of the
underlying netdevice, and thus a RIF can always assume a CRIF exists. A
CRIF is where next hops are kept, and when RIF is created, these next hops
can be easily offloaded. (Previously only the next hops created after the
RIF was created were offloaded.)

- Patches #1 and #2 are minor adjustments.
- In patches #3 and #4, add CRIF bookkeeping.
- In patch #5, link CRIFs to RIFs such that given a netdevice-backed RIF,
the corresponding CRIF is easy to look up.
- Patch #6 is a clean-up allowed by the previous patches
- Patches #7 and #8 move next hop tracking to CRIFs

No observable effects are intended as of yet. This will be useful once
there is support for RIF creation for netdevices that become mlxsw uppers,
which will come in following patch sets.
====================

Link: https://lore.kernel.org/r/cover.1687438411.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+333 -86
+331 -85
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
··· 51 51 struct mlxsw_sp_lpm_tree; 52 52 struct mlxsw_sp_rif_ops; 53 53 54 - struct mlxsw_sp_rif { 54 + struct mlxsw_sp_crif_key { 55 + struct net_device *dev; 56 + }; 57 + 58 + struct mlxsw_sp_crif { 59 + struct mlxsw_sp_crif_key key; 60 + struct rhash_head ht_node; 61 + bool can_destroy; 55 62 struct list_head nexthop_list; 63 + struct mlxsw_sp_rif *rif; 64 + }; 65 + 66 + static const struct rhashtable_params mlxsw_sp_crif_ht_params = { 67 + .key_offset = offsetof(struct mlxsw_sp_crif, key), 68 + .key_len = sizeof_field(struct mlxsw_sp_crif, key), 69 + .head_offset = offsetof(struct mlxsw_sp_crif, ht_node), 70 + }; 71 + 72 + struct mlxsw_sp_rif { 73 + struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */ 56 74 struct list_head neigh_list; 57 - struct net_device *dev; /* NULL for underlay RIF */ 58 75 struct mlxsw_sp_fid *fid; 59 76 unsigned char addr[ETH_ALEN]; 60 77 int mtu; ··· 90 73 91 74 static struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) 92 75 { 93 - return rif->dev; 76 + if (!rif->crif) 77 + return NULL; 78 + return rif->crif->key.dev; 94 79 } 95 80 96 81 struct mlxsw_sp_rif_params { ··· 1079 1060 return tb_id; 1080 1061 } 1081 1062 1063 + static void 1064 + mlxsw_sp_crif_init(struct mlxsw_sp_crif *crif, struct net_device *dev) 1065 + { 1066 + crif->key.dev = dev; 1067 + INIT_LIST_HEAD(&crif->nexthop_list); 1068 + } 1069 + 1070 + static struct mlxsw_sp_crif * 1071 + mlxsw_sp_crif_alloc(struct net_device *dev) 1072 + { 1073 + struct mlxsw_sp_crif *crif; 1074 + 1075 + crif = kzalloc(sizeof(*crif), GFP_KERNEL); 1076 + if (!crif) 1077 + return NULL; 1078 + 1079 + mlxsw_sp_crif_init(crif, dev); 1080 + return crif; 1081 + } 1082 + 1083 + static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif) 1084 + { 1085 + if (WARN_ON(crif->rif)) 1086 + return; 1087 + 1088 + WARN_ON(!list_empty(&crif->nexthop_list)); 1089 + kfree(crif); 1090 + } 1091 + 1092 + static int mlxsw_sp_crif_insert(struct mlxsw_sp_router *router, 1093 + struct mlxsw_sp_crif *crif) 1094 + { 1095 + return rhashtable_insert_fast(&router->crif_ht, &crif->ht_node, 1096 + mlxsw_sp_crif_ht_params); 1097 + } 1098 + 1099 + static void mlxsw_sp_crif_remove(struct mlxsw_sp_router *router, 1100 + struct mlxsw_sp_crif *crif) 1101 + { 1102 + rhashtable_remove_fast(&router->crif_ht, &crif->ht_node, 1103 + mlxsw_sp_crif_ht_params); 1104 + } 1105 + 1106 + static struct mlxsw_sp_crif * 1107 + mlxsw_sp_crif_lookup(struct mlxsw_sp_router *router, 1108 + const struct net_device *dev) 1109 + { 1110 + struct mlxsw_sp_crif_key key = { 1111 + .dev = (struct net_device *)dev, 1112 + }; 1113 + 1114 + return rhashtable_lookup_fast(&router->crif_ht, &key, 1115 + mlxsw_sp_crif_ht_params); 1116 + } 1117 + 1082 1118 static struct mlxsw_sp_rif * 1083 1119 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, 1084 1120 const struct mlxsw_sp_rif_params *params, ··· 1722 1648 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); 1723 1649 } 1724 1650 1725 - static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, 1726 - struct mlxsw_sp_rif *old_rif, 1727 - struct mlxsw_sp_rif *new_rif); 1651 + static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, 1652 + struct mlxsw_sp_rif *rif); 1653 + 1728 1654 static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp, 1729 1655 struct mlxsw_sp_rif *old_rif, 1730 1656 struct mlxsw_sp_rif *new_rif, 1731 1657 bool migrate_nhs) 1732 1658 { 1733 - if (migrate_nhs) 1734 - mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, old_rif, new_rif); 1659 + struct mlxsw_sp_crif *crif = old_rif->crif; 1660 + struct mlxsw_sp_crif mock_crif = {}; 1735 1661 1662 + if (migrate_nhs) 1663 + mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); 1664 + 1665 + /* Plant a mock CRIF so that destroying the old RIF doesn't unoffload 1666 + * our nexthops and IPIP tunnels, and doesn't sever the crif->rif link. 1667 + */ 1668 + mlxsw_sp_crif_init(&mock_crif, crif->key.dev); 1669 + old_rif->crif = &mock_crif; 1670 + mock_crif.rif = old_rif; 1736 1671 mlxsw_sp_rif_destroy(old_rif); 1737 1672 } 1738 1673 ··· 1766 1683 &new_lb_rif->common, keep_encap); 1767 1684 return 0; 1768 1685 } 1769 - 1770 - static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, 1771 - struct mlxsw_sp_rif *rif); 1772 1686 1773 1687 /** 1774 1688 * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry. ··· 2995 2915 2996 2916 struct mlxsw_sp_nexthop { 2997 2917 struct list_head neigh_list_node; /* member of neigh entry list */ 2998 - struct list_head rif_list_node; 2918 + struct list_head crif_list_node; 2999 2919 struct list_head router_list_node; 3000 2920 struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group 3001 2921 * this nexthop belongs to ··· 3008 2928 int nh_weight; 3009 2929 int norm_nh_weight; 3010 2930 int num_adj_entries; 3011 - struct mlxsw_sp_rif *rif; 2931 + struct mlxsw_sp_crif *crif; 3012 2932 u8 should_offload:1, /* set indicates this nexthop should be written 3013 2933 * to the adjacency table. 3014 2934 */ ··· 3031 2951 static struct net_device * 3032 2952 mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh) 3033 2953 { 3034 - if (nh->rif) 3035 - return mlxsw_sp_rif_dev(nh->rif); 3036 - return NULL; 2954 + if (!nh->crif) 2955 + return NULL; 2956 + return nh->crif->key.dev; 3037 2957 } 3038 2958 3039 2959 enum mlxsw_sp_nexthop_group_type { ··· 3058 2978 static struct mlxsw_sp_rif * 3059 2979 mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi) 3060 2980 { 3061 - return nhgi->nexthops[0].rif; 2981 + struct mlxsw_sp_crif *crif = nhgi->nexthops[0].crif; 2982 + 2983 + if (!crif) 2984 + return NULL; 2985 + return crif->rif; 3062 2986 } 3063 2987 3064 2988 struct mlxsw_sp_nexthop_group_vr_key { ··· 3186 3102 3187 3103 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) 3188 3104 { 3189 - return nh->rif; 3105 + if (WARN_ON(!nh->crif)) 3106 + return NULL; 3107 + return nh->crif->rif; 3190 3108 } 3191 3109 3192 3110 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) ··· 3573 3487 bool force, char *ratr_pl) 3574 3488 { 3575 3489 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 3490 + struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh); 3576 3491 enum mlxsw_reg_ratr_op op; 3577 3492 u16 rif_index; 3578 3493 3579 - rif_index = nh->rif ? nh->rif->rif_index : 3580 - mlxsw_sp->router->lb_rif_index; 3494 + rif_index = rif ? rif->rif_index : 3495 + mlxsw_sp->router->lb_crif->rif->rif_index; 3581 3496 op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : 3582 3497 MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; 3583 3498 mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET, ··· 4196 4109 } 4197 4110 } 4198 4111 4199 - static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, 4200 - struct mlxsw_sp_rif *rif) 4112 + static void mlxsw_sp_nexthop_crif_init(struct mlxsw_sp_nexthop *nh, 4113 + struct mlxsw_sp_crif *crif) 4201 4114 { 4202 - if (nh->rif) 4115 + if (nh->crif) 4203 4116 return; 4204 4117 4205 - nh->rif = rif; 4206 - list_add(&nh->rif_list_node, &rif->nexthop_list); 4118 + nh->crif = crif; 4119 + list_add(&nh->crif_list_node, &crif->nexthop_list); 4207 4120 } 4208 4121 4209 - static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) 4122 + static void mlxsw_sp_nexthop_crif_fini(struct mlxsw_sp_nexthop *nh) 4210 4123 { 4211 - if (!nh->rif) 4124 + if (!nh->crif) 4212 4125 return; 4213 4126 4214 - list_del(&nh->rif_list_node); 4215 - nh->rif = NULL; 4127 + list_del(&nh->crif_list_node); 4128 + nh->crif = NULL; 4216 4129 } 4217 4130 4218 4131 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, ··· 4223 4136 struct neighbour *n; 4224 4137 u8 nud_state, dead; 4225 4138 int err; 4139 + 4140 + if (WARN_ON(!nh->crif->rif)) 4141 + return 0; 4226 4142 4227 4143 if (!nh->nhgi->gateway || nh->neigh_entry) 4228 4144 return 0; ··· 4317 4227 struct mlxsw_sp_nexthop *nh, 4318 4228 struct mlxsw_sp_ipip_entry *ipip_entry) 4319 4229 { 4230 + struct mlxsw_sp_crif *crif; 4320 4231 bool removing; 4321 4232 4322 4233 if (!nh->nhgi->gateway || nh->ipip_entry) 4323 4234 return; 4324 4235 4236 + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, ipip_entry->ol_dev); 4237 + if (WARN_ON(!crif)) 4238 + return; 4239 + 4325 4240 nh->ipip_entry = ipip_entry; 4326 4241 removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev); 4327 4242 __mlxsw_sp_nexthop_neigh_update(nh, removing); 4328 - mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common); 4243 + mlxsw_sp_nexthop_crif_init(nh, crif); 4329 4244 } 4330 4245 4331 4246 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, ··· 4362 4267 { 4363 4268 const struct mlxsw_sp_ipip_ops *ipip_ops; 4364 4269 struct mlxsw_sp_ipip_entry *ipip_entry; 4365 - struct mlxsw_sp_rif *rif; 4270 + struct mlxsw_sp_crif *crif; 4366 4271 int err; 4367 4272 4368 4273 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); ··· 4376 4281 } 4377 4282 4378 4283 nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; 4379 - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); 4380 - if (!rif) 4284 + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, dev); 4285 + if (!crif) 4381 4286 return 0; 4382 4287 4383 - mlxsw_sp_nexthop_rif_init(nh, rif); 4288 + mlxsw_sp_nexthop_crif_init(nh, crif); 4289 + 4290 + if (!crif->rif) 4291 + return 0; 4292 + 4384 4293 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); 4385 4294 if (err) 4386 4295 goto err_neigh_init; ··· 4392 4293 return 0; 4393 4294 4394 4295 err_neigh_init: 4395 - mlxsw_sp_nexthop_rif_fini(nh); 4296 + mlxsw_sp_nexthop_crif_fini(nh); 4396 4297 return err; 4298 + } 4299 + 4300 + static void mlxsw_sp_nexthop_type_rif_gone(struct mlxsw_sp *mlxsw_sp, 4301 + struct mlxsw_sp_nexthop *nh) 4302 + { 4303 + switch (nh->type) { 4304 + case MLXSW_SP_NEXTHOP_TYPE_ETH: 4305 + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); 4306 + break; 4307 + case MLXSW_SP_NEXTHOP_TYPE_IPIP: 4308 + mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); 4309 + break; 4310 + } 4397 4311 } 4398 4312 4399 4313 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, 4400 4314 struct mlxsw_sp_nexthop *nh) 4401 4315 { 4402 - switch (nh->type) { 4403 - case MLXSW_SP_NEXTHOP_TYPE_ETH: 4404 - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); 4405 - mlxsw_sp_nexthop_rif_fini(nh); 4406 - break; 4407 - case MLXSW_SP_NEXTHOP_TYPE_IPIP: 4408 - mlxsw_sp_nexthop_rif_fini(nh); 4409 - mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); 4410 - break; 4411 - } 4316 + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); 4317 + mlxsw_sp_nexthop_crif_fini(nh); 4412 4318 } 4413 4319 4414 4320 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, ··· 4506 4402 struct mlxsw_sp_nexthop *nh; 4507 4403 bool removing; 4508 4404 4509 - list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) { 4405 + list_for_each_entry(nh, &rif->crif->nexthop_list, crif_list_node) { 4510 4406 switch (nh->type) { 4511 4407 case MLXSW_SP_NEXTHOP_TYPE_ETH: 4512 4408 removing = false; ··· 4524 4420 } 4525 4421 } 4526 4422 4527 - static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, 4528 - struct mlxsw_sp_rif *old_rif, 4529 - struct mlxsw_sp_rif *new_rif) 4530 - { 4531 - struct mlxsw_sp_nexthop *nh; 4532 - 4533 - list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list); 4534 - list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node) 4535 - nh->rif = new_rif; 4536 - mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); 4537 - } 4538 - 4539 4423 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 4540 4424 struct mlxsw_sp_rif *rif) 4541 4425 { 4542 4426 struct mlxsw_sp_nexthop *nh, *tmp; 4543 4427 4544 - list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { 4545 - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); 4428 + list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list, 4429 + crif_list_node) { 4430 + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); 4546 4431 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); 4547 4432 } 4548 4433 } ··· 4551 4458 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true, 4552 4459 MLXSW_REG_RATR_TYPE_ETHERNET, 4553 4460 mlxsw_sp->router->adj_trap_index, 4554 - mlxsw_sp->router->lb_rif_index); 4461 + mlxsw_sp->router->lb_crif->rif->rif_index); 4555 4462 mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action); 4556 4463 mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0); 4557 4464 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); ··· 4867 4774 static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp, 4868 4775 struct mlxsw_sp_nexthop *nh) 4869 4776 { 4870 - u16 lb_rif_index = mlxsw_sp->router->lb_rif_index; 4871 - 4872 4777 nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD; 4873 4778 nh->should_offload = 1; 4874 4779 /* While nexthops that discard packets do not forward packets 4875 4780 * via an egress RIF, they still need to be programmed using a 4876 4781 * valid RIF, so use the loopback RIF created during init. 4877 4782 */ 4878 - nh->rif = mlxsw_sp->router->rifs[lb_rif_index]; 4783 + nh->crif = mlxsw_sp->router->lb_crif; 4879 4784 } 4880 4785 4881 4786 static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp, 4882 4787 struct mlxsw_sp_nexthop *nh) 4883 4788 { 4884 - nh->rif = NULL; 4789 + nh->crif = NULL; 4885 4790 nh->should_offload = 0; 4886 4791 } 4887 4792 ··· 7887 7796 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 7888 7797 struct mlxsw_sp_rif *rif) 7889 7798 { 7799 + /* Signal to nexthop cleanup that the RIF is going away. */ 7800 + rif->crif->rif = NULL; 7801 + 7890 7802 mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index); 7891 7803 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif); 7892 7804 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); ··· 7999 7905 8000 7906 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, 8001 7907 u16 vr_id, 8002 - struct net_device *l3_dev) 7908 + struct mlxsw_sp_crif *crif) 8003 7909 { 7910 + struct net_device *l3_dev = crif ? crif->key.dev : NULL; 8004 7911 struct mlxsw_sp_rif *rif; 8005 7912 8006 7913 rif = kzalloc(rif_size, GFP_KERNEL); 8007 7914 if (!rif) 8008 7915 return NULL; 8009 7916 8010 - INIT_LIST_HEAD(&rif->nexthop_list); 8011 7917 INIT_LIST_HEAD(&rif->neigh_list); 8012 7918 if (l3_dev) { 8013 7919 ether_addr_copy(rif->addr, l3_dev->dev_addr); 8014 7920 rif->mtu = l3_dev->mtu; 8015 - rif->dev = l3_dev; 8016 7921 } 8017 7922 rif->vr_id = vr_id; 8018 7923 rif->rif_index = rif_index; 7924 + if (crif) { 7925 + rif->crif = crif; 7926 + crif->rif = rif; 7927 + } 8019 7928 8020 7929 return rif; 8021 7930 } ··· 8026 7929 static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif) 8027 7930 { 8028 7931 WARN_ON(!list_empty(&rif->neigh_list)); 8029 - WARN_ON(!list_empty(&rif->nexthop_list)); 7932 + 7933 + if (rif->crif) 7934 + rif->crif->rif = NULL; 8030 7935 kfree(rif); 8031 7936 } 8032 7937 ··· 8262 8163 const struct mlxsw_sp_rif_ops *ops; 8263 8164 struct mlxsw_sp_fid *fid = NULL; 8264 8165 enum mlxsw_sp_rif_type type; 8166 + struct mlxsw_sp_crif *crif; 8265 8167 struct mlxsw_sp_rif *rif; 8266 8168 struct mlxsw_sp_vr *vr; 8267 8169 u16 rif_index; ··· 8282 8182 goto err_rif_index_alloc; 8283 8183 } 8284 8184 8285 - rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev); 8185 + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, params->dev); 8186 + if (WARN_ON(!crif)) { 8187 + err = -ENOENT; 8188 + goto err_crif_lookup; 8189 + } 8190 + 8191 + rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, crif); 8286 8192 if (!rif) { 8287 8193 err = -ENOMEM; 8288 8194 goto err_rif_alloc; ··· 8347 8241 dev_put(params->dev); 8348 8242 mlxsw_sp_rif_free(rif); 8349 8243 err_rif_alloc: 8244 + err_crif_lookup: 8350 8245 mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); 8351 8246 err_rif_index_alloc: 8352 8247 vr->rif_count--; ··· 8360 8253 struct net_device *dev = mlxsw_sp_rif_dev(rif); 8361 8254 const struct mlxsw_sp_rif_ops *ops = rif->ops; 8362 8255 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; 8256 + struct mlxsw_sp_crif *crif = rif->crif; 8363 8257 struct mlxsw_sp_fid *fid = rif->fid; 8364 8258 u8 rif_entries = rif->rif_entries; 8365 8259 u16 rif_index = rif->rif_index; ··· 8391 8283 mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); 8392 8284 vr->rif_count--; 8393 8285 mlxsw_sp_vr_put(mlxsw_sp, vr); 8286 + 8287 + if (crif->can_destroy) 8288 + mlxsw_sp_crif_free(crif); 8394 8289 } 8395 8290 8396 8291 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, ··· 9259 9148 return -ENOBUFS; 9260 9149 } 9261 9150 9151 + static bool mlxsw_sp_router_netdevice_interesting(struct mlxsw_sp *mlxsw_sp, 9152 + struct net_device *dev) 9153 + { 9154 + struct vlan_dev_priv *vlan; 9155 + 9156 + if (netif_is_lag_master(dev) || 9157 + netif_is_bridge_master(dev) || 9158 + mlxsw_sp_port_dev_check(dev) || 9159 + mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev) || 9160 + netif_is_l3_master(dev)) 9161 + return true; 9162 + 9163 + if (!is_vlan_dev(dev)) 9164 + return false; 9165 + 9166 + vlan = vlan_dev_priv(dev); 9167 + return netif_is_lag_master(vlan->real_dev) || 9168 + netif_is_bridge_master(vlan->real_dev) || 9169 + mlxsw_sp_port_dev_check(vlan->real_dev); 9170 + } 9171 + 9172 + static struct mlxsw_sp_crif * 9173 + mlxsw_sp_crif_register(struct mlxsw_sp_router *router, struct net_device *dev) 9174 + { 9175 + struct mlxsw_sp_crif *crif; 9176 + int err; 9177 + 9178 + if (WARN_ON(mlxsw_sp_crif_lookup(router, dev))) 9179 + return NULL; 9180 + 9181 + crif = mlxsw_sp_crif_alloc(dev); 9182 + if (!crif) 9183 + return ERR_PTR(-ENOMEM); 9184 + 9185 + err = mlxsw_sp_crif_insert(router, crif); 9186 + if (err) 9187 + goto err_netdev_insert; 9188 + 9189 + return crif; 9190 + 9191 + err_netdev_insert: 9192 + mlxsw_sp_crif_free(crif); 9193 + return ERR_PTR(err); 9194 + } 9195 + 9196 + static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router, 9197 + struct mlxsw_sp_crif *crif) 9198 + { 9199 + struct mlxsw_sp_nexthop *nh, *tmp; 9200 + 9201 + mlxsw_sp_crif_remove(router, crif); 9202 + 9203 + list_for_each_entry_safe(nh, tmp, &crif->nexthop_list, crif_list_node) 9204 + mlxsw_sp_nexthop_type_fini(router->mlxsw_sp, nh); 9205 + 9206 + if (crif->rif) 9207 + crif->can_destroy = true; 9208 + else 9209 + mlxsw_sp_crif_free(crif); 9210 + } 9211 + 9212 + static int mlxsw_sp_netdevice_register(struct mlxsw_sp_router *router, 9213 + struct net_device *dev) 9214 + { 9215 + struct mlxsw_sp_crif *crif; 9216 + 9217 + if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev)) 9218 + return 0; 9219 + 9220 + crif = mlxsw_sp_crif_register(router, dev); 9221 + return PTR_ERR_OR_ZERO(crif); 9222 + } 9223 + 9224 + static void mlxsw_sp_netdevice_unregister(struct mlxsw_sp_router *router, 9225 + struct net_device *dev) 9226 + { 9227 + struct mlxsw_sp_crif *crif; 9228 + 9229 + if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev)) 9230 + return; 9231 + 9232 + /* netdev_run_todo(), by way of netdev_wait_allrefs_any(), rebroadcasts 9233 + * the NETDEV_UNREGISTER message, so we can get here twice. If that's 9234 + * what happened, the netdevice state is NETREG_UNREGISTERED. In that 9235 + * case, we expect to have collected the CRIF already, and warn if it 9236 + * still exists. Otherwise we expect the CRIF to exist. 9237 + */ 9238 + crif = mlxsw_sp_crif_lookup(router, dev); 9239 + if (dev->reg_state == NETREG_UNREGISTERED) { 9240 + if (!WARN_ON(crif)) 9241 + return; 9242 + } 9243 + if (WARN_ON(!crif)) 9244 + return; 9245 + 9246 + mlxsw_sp_crif_unregister(router, crif); 9247 + } 9248 + 9262 9249 static bool mlxsw_sp_is_offload_xstats_event(unsigned long event) 9263 9250 { 9264 9251 switch (event) { ··· 9576 9367 9577 9368 mutex_lock(&mlxsw_sp->router->lock); 9578 9369 9370 + if (event == NETDEV_REGISTER) { 9371 + err = mlxsw_sp_netdevice_register(router, dev); 9372 + if (err) 9373 + /* No need to roll this back, UNREGISTER will collect it 9374 + * anyhow. 9375 + */ 9376 + goto out; 9377 + } 9378 + 9579 9379 if (mlxsw_sp_is_offload_xstats_event(event)) 9580 9380 err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev, 9581 9381 event, ptr); ··· 9599 9381 else if (mlxsw_sp_is_vrf_event(event, ptr)) 9600 9382 err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); 9601 9383 9384 + if (event == NETDEV_UNREGISTER) 9385 + mlxsw_sp_netdevice_unregister(router, dev); 9386 + 9387 + out: 9602 9388 mutex_unlock(&mlxsw_sp->router->lock); 9603 9389 9604 9390 return notifier_from_errno(err); ··· 10123 9901 10124 9902 static struct mlxsw_sp_rif * 10125 9903 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, 9904 + struct mlxsw_sp_crif *ul_crif, 10126 9905 struct netlink_ext_ack *extack) 10127 9906 { 10128 9907 struct mlxsw_sp_rif *ul_rif; ··· 10137 9914 return ERR_PTR(err); 10138 9915 } 10139 9916 10140 - ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL); 9917 + ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, 9918 + ul_crif); 10141 9919 if (!ul_rif) { 10142 9920 err = -ENOMEM; 10143 9921 goto err_rif_alloc; ··· 10177 9953 10178 9954 static struct mlxsw_sp_rif * 10179 9955 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, 9956 + struct mlxsw_sp_crif *ul_crif, 10180 9957 struct netlink_ext_ack *extack) 10181 9958 { 10182 9959 struct mlxsw_sp_vr *vr; ··· 10190 9965 if (refcount_inc_not_zero(&vr->ul_rif_refcnt)) 10191 9966 return vr->ul_rif; 10192 9967 10193 - vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack); 9968 + vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, ul_crif, extack); 10194 9969 if (IS_ERR(vr->ul_rif)) { 10195 9970 err = PTR_ERR(vr->ul_rif); 10196 9971 goto err_ul_rif_create; ··· 10228 10003 int err = 0; 10229 10004 10230 10005 mutex_lock(&mlxsw_sp->router->lock); 10231 - ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); 10006 + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, NULL); 10232 10007 if (IS_ERR(ul_rif)) { 10233 10008 err = PTR_ERR(ul_rif); 10234 10009 goto out; ··· 10264 10039 struct mlxsw_sp_rif *ul_rif; 10265 10040 int err; 10266 10041 10267 - ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, extack); 10042 + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, extack); 10268 10043 if (IS_ERR(ul_rif)) 10269 10044 return PTR_ERR(ul_rif); 10270 10045 ··· 10786 10561 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); 10787 10562 } 10788 10563 10789 - static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp) 10564 + static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, 10565 + struct netlink_ext_ack *extack) 10790 10566 { 10791 - u16 lb_rif_index; 10567 + struct mlxsw_sp_router *router = mlxsw_sp->router; 10568 + struct mlxsw_sp_rif *lb_rif; 10792 10569 int err; 10570 + 10571 + router->lb_crif = mlxsw_sp_crif_alloc(NULL); 10572 + if (IS_ERR(router->lb_crif)) 10573 + return PTR_ERR(router->lb_crif); 10793 10574 10794 10575 /* Create a generic loopback RIF associated with the main table 10795 10576 * (default VRF). Any table can be used, but the main table exists 10796 - * anyway, so we do not waste resources. 10577 + * anyway, so we do not waste resources. Loopback RIFs are usually 10578 + * created with a NULL CRIF, but this RIF is used as a fallback RIF 10579 + * for blackhole nexthops, and nexthops expect to have a valid CRIF. 10797 10580 */ 10798 - err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, 10799 - &lb_rif_index); 10800 - if (err) 10801 - return err; 10802 - 10803 - mlxsw_sp->router->lb_rif_index = lb_rif_index; 10581 + lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, router->lb_crif, 10582 + extack); 10583 + if (IS_ERR(lb_rif)) { 10584 + err = PTR_ERR(lb_rif); 10585 + goto err_ul_rif_get; 10586 + } 10804 10587 10805 10588 return 0; 10589 + 10590 + err_ul_rif_get: 10591 + mlxsw_sp_crif_free(router->lb_crif); 10592 + return err; 10806 10593 } 10807 10594 10808 10595 static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp) 10809 10596 { 10810 - mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index); 10597 + mlxsw_sp_ul_rif_put(mlxsw_sp->router->lb_crif->rif); 10598 + mlxsw_sp_crif_free(mlxsw_sp->router->lb_crif); 10811 10599 } 10812 10600 10813 10601 static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp) ··· 10885 10647 if (err) 10886 10648 goto err_ipips_init; 10887 10649 10650 + err = rhashtable_init(&mlxsw_sp->router->crif_ht, 10651 + &mlxsw_sp_crif_ht_params); 10652 + if (err) 10653 + goto err_crif_ht_init; 10654 + 10888 10655 err = mlxsw_sp_rifs_init(mlxsw_sp); 10889 10656 if (err) 10890 10657 goto err_rifs_init; ··· 10917 10674 if (err) 10918 10675 goto err_vrs_init; 10919 10676 10920 - err = mlxsw_sp_lb_rif_init(mlxsw_sp); 10677 + err = mlxsw_sp_lb_rif_init(mlxsw_sp, extack); 10921 10678 if (err) 10922 10679 goto err_lb_rif_init; 10923 10680 ··· 11021 10778 err_nexthop_ht_init: 11022 10779 mlxsw_sp_rifs_fini(mlxsw_sp); 11023 10780 err_rifs_init: 10781 + rhashtable_destroy(&mlxsw_sp->router->crif_ht); 10782 + err_crif_ht_init: 11024 10783 mlxsw_sp_ipips_fini(mlxsw_sp); 11025 10784 err_ipips_init: 11026 10785 __mlxsw_sp_router_fini(mlxsw_sp); ··· 11058 10813 rhashtable_destroy(&router->nexthop_group_ht); 11059 10814 rhashtable_destroy(&router->nexthop_ht); 11060 10815 mlxsw_sp_rifs_fini(mlxsw_sp); 10816 + rhashtable_destroy(&mlxsw_sp->router->crif_ht); 11061 10817 mlxsw_sp_ipips_fini(mlxsw_sp); 11062 10818 __mlxsw_sp_router_fini(mlxsw_sp); 11063 10819 cancel_delayed_work_sync(&router->nh_grp_activity_dw);
+2 -1
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
··· 20 20 21 21 struct mlxsw_sp_router { 22 22 struct mlxsw_sp *mlxsw_sp; 23 + struct rhashtable crif_ht; 23 24 struct gen_pool *rifs_table; 24 25 struct mlxsw_sp_rif **rifs; 25 26 struct idr rif_mac_profiles_idr; ··· 60 59 struct mlxsw_sp_router_nve_decap nve_decap_config; 61 60 struct mutex lock; /* Protects shared router resources */ 62 61 struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx; 63 - u16 lb_rif_index; 62 + struct mlxsw_sp_crif *lb_crif; 64 63 const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges; 65 64 size_t adj_grp_size_ranges_count; 66 65 struct delayed_work nh_grp_activity_dw;