Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mlxsw-Reflect-nexthop-status-changes'

Jiri Pirko says:

====================
mlxsw: Reflect nexthop status changes

Ido says:

When the kernel forwards IPv4 packets via multipath routes it doesn't
consider nexthops that are dead or linkdown. For example, if the nexthop
netdev is administratively down or doesn't have a carrier.

Devices capable of offloading such multipath routes need to be made
aware of changes in the reflected nexthops' status. Otherwise, the
device might forward packets via non-functional nexthops, resulting in
packet loss. This patchset aims to fix that.

The first 11 patches deal with the necessary restructuring in the
mlxsw driver, so that it's able to correctly add and remove nexthops
from the device's adjacency table.

The 12th patch adds the NH_{ADD,DEL} events to the FIB notification
chain. These notifications are sent whenever the kernel decides to add
or remove a nexthop from the forwarding plane.

Finally, the last three patches add support for these events in the
mlxsw driver, which is currently the only driver capable of offloading
multipath routes.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+450 -130
+6
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
··· 3473 3473 if (!r) 3474 3474 return NULL; 3475 3475 3476 + INIT_LIST_HEAD(&r->nexthop_list); 3477 + INIT_LIST_HEAD(&r->neigh_list); 3476 3478 ether_addr_copy(r->addr, l3_dev->dev_addr); 3477 3479 r->mtu = l3_dev->mtu; 3478 3480 r->ref_count = 1; ··· 3542 3540 struct mlxsw_sp_fid *f = r->f; 3543 3541 u16 fid = f->fid; 3544 3542 u16 rif = r->rif; 3543 + 3544 + mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r); 3545 3545 3546 3546 mlxsw_sp->rifs[rif] = NULL; 3547 3547 f->r = NULL; ··· 3773 3769 struct net_device *l3_dev = r->dev; 3774 3770 struct mlxsw_sp_fid *f = r->f; 3775 3771 u16 rif = r->rif; 3772 + 3773 + mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r); 3776 3774 3777 3775 mlxsw_sp->rifs[rif] = NULL; 3778 3776 f->r = NULL;
+6 -1
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
··· 108 108 }; 109 109 110 110 struct mlxsw_sp_rif { 111 + struct list_head nexthop_list; 112 + struct list_head neigh_list; 111 113 struct net_device *dev; 112 114 unsigned int ref_count; 113 115 struct mlxsw_sp_fid *f; ··· 256 254 struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; 257 255 struct mlxsw_sp_vr *vrs; 258 256 struct rhashtable neigh_ht; 257 + struct rhashtable nexthop_group_ht; 258 + struct rhashtable nexthop_ht; 259 259 struct { 260 260 struct delayed_work dw; 261 261 unsigned long interval; /* ms */ 262 262 } neighs_update; 263 263 struct delayed_work nexthop_probe_dw; 264 264 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ 265 - struct list_head nexthop_group_list; 266 265 struct list_head nexthop_neighs_list; 267 266 bool aborted; 268 267 }; ··· 604 601 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); 605 602 int mlxsw_sp_router_netevent_event(struct notifier_block *unused, 606 603 unsigned long event, void *ptr); 604 + void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 605 + struct mlxsw_sp_rif *r); 607 606 608 607 int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); 609 608 void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
+398 -129
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
··· 127 127 struct mlxsw_sp_fib_key key; 128 128 enum mlxsw_sp_fib_entry_type type; 129 129 unsigned int ref_count; 130 - u16 rif; /* used for action local */ 131 130 struct mlxsw_sp_vr *vr; 132 - struct fib_info *fi; 133 131 struct list_head nexthop_group_node; 134 132 struct mlxsw_sp_nexthop_group *nh_group; 133 + bool offloaded; 135 134 }; 136 135 137 136 struct mlxsw_sp_fib { ··· 609 610 }; 610 611 611 612 struct mlxsw_sp_neigh_entry { 613 + struct list_head rif_list_node; 612 614 struct rhash_head ht_node; 613 615 struct mlxsw_sp_neigh_key key; 614 616 u16 rif; ··· 686 686 if (err) 687 687 goto err_neigh_entry_insert; 688 688 689 + list_add(&neigh_entry->rif_list_node, &r->neigh_list); 690 + 689 691 return neigh_entry; 690 692 691 693 err_neigh_entry_insert: ··· 699 697 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp, 700 698 struct mlxsw_sp_neigh_entry *neigh_entry) 701 699 { 700 + list_del(&neigh_entry->rif_list_node); 702 701 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); 703 702 mlxsw_sp_neigh_entry_free(neigh_entry); 704 703 } ··· 1093 1090 rhashtable_destroy(&mlxsw_sp->router.neigh_ht); 1094 1091 } 1095 1092 1093 + static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, 1094 + const struct mlxsw_sp_rif *r) 1095 + { 1096 + char rauht_pl[MLXSW_REG_RAUHT_LEN]; 1097 + 1098 + mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, 1099 + r->rif, r->addr); 1100 + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); 1101 + } 1102 + 1103 + static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 1104 + struct mlxsw_sp_rif *r) 1105 + { 1106 + struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; 1107 + 1108 + mlxsw_sp_neigh_rif_flush(mlxsw_sp, r); 1109 + list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list, 1110 + rif_list_node) 1111 + mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 1112 + } 1113 + 1114 + struct mlxsw_sp_nexthop_key { 1115 + struct fib_nh *fib_nh; 1116 + }; 1117 + 1096 1118 struct mlxsw_sp_nexthop { 1097 1119 struct list_head neigh_list_node; /* member of neigh entry list */ 1120 + struct list_head rif_list_node; 1098 1121 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group 1099 1122 * this belongs to 1100 1123 */ 1124 + struct rhash_head ht_node; 1125 + struct mlxsw_sp_nexthop_key key; 1126 + struct mlxsw_sp_rif *r; 1101 1127 u8 should_offload:1, /* set indicates this neigh is connected and 1102 1128 * should be put to KVD linear area of this group. 1103 1129 */ ··· 1139 1107 struct mlxsw_sp_neigh_entry *neigh_entry; 1140 1108 }; 1141 1109 1110 + struct mlxsw_sp_nexthop_group_key { 1111 + struct fib_info *fi; 1112 + }; 1113 + 1142 1114 struct mlxsw_sp_nexthop_group { 1143 - struct list_head list; /* node in mlxsw->router.nexthop_group_list */ 1115 + struct rhash_head ht_node; 1144 1116 struct list_head fib_list; /* list of fib entries that use this group */ 1145 - u8 adj_index_valid:1; 1117 + struct mlxsw_sp_nexthop_group_key key; 1118 + u8 adj_index_valid:1, 1119 + gateway:1; /* routes using the group use a gateway */ 1146 1120 u32 adj_index; 1147 1121 u16 ecmp_size; 1148 1122 u16 count; 1149 1123 struct mlxsw_sp_nexthop nexthops[0]; 1124 + #define nh_rif nexthops[0].r 1150 1125 }; 1126 + 1127 + static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { 1128 + .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key), 1129 + .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node), 1130 + .key_len = sizeof(struct mlxsw_sp_nexthop_group_key), 1131 + }; 1132 + 1133 + static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, 1134 + struct mlxsw_sp_nexthop_group *nh_grp) 1135 + { 1136 + return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht, 1137 + &nh_grp->ht_node, 1138 + mlxsw_sp_nexthop_group_ht_params); 1139 + } 1140 + 1141 + static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp, 1142 + struct mlxsw_sp_nexthop_group *nh_grp) 1143 + { 1144 + rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht, 1145 + &nh_grp->ht_node, 1146 + mlxsw_sp_nexthop_group_ht_params); 1147 + } 1148 + 1149 + static struct mlxsw_sp_nexthop_group * 1150 + mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp, 1151 + struct mlxsw_sp_nexthop_group_key key) 1152 + { 1153 + return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key, 1154 + mlxsw_sp_nexthop_group_ht_params); 1155 + } 1156 + 1157 + static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = { 1158 + .key_offset = offsetof(struct mlxsw_sp_nexthop, key), 1159 + .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node), 1160 + .key_len = sizeof(struct mlxsw_sp_nexthop_key), 1161 + }; 1162 + 1163 + static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp, 1164 + struct mlxsw_sp_nexthop *nh) 1165 + { 1166 + return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht, 1167 + &nh->ht_node, mlxsw_sp_nexthop_ht_params); 1168 + } 1169 + 1170 + static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp, 1171 + struct mlxsw_sp_nexthop *nh) 1172 + { 1173 + rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node, 1174 + mlxsw_sp_nexthop_ht_params); 1175 + } 1176 + 1177 + static struct mlxsw_sp_nexthop * 1178 + mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp, 1179 + struct mlxsw_sp_nexthop_key key) 1180 + { 1181 + return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key, 1182 + mlxsw_sp_nexthop_ht_params); 1183 + } 1151 1184 1152 1185 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, 1153 1186 struct mlxsw_sp_vr *vr, ··· 1328 1231 int i; 1329 1232 int err; 1330 1233 1234 + if (!nh_grp->gateway) { 1235 + mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); 1236 + return; 1237 + } 1238 + 1331 1239 for (i = 0; i < nh_grp->count; i++) { 1332 1240 nh = &nh_grp->nexthops[i]; 1333 1241 ··· 1442 1340 } 1443 1341 } 1444 1342 1445 - static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, 1446 - struct mlxsw_sp_nexthop_group *nh_grp, 1447 - struct mlxsw_sp_nexthop *nh, 1448 - struct fib_nh *fib_nh) 1343 + static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, 1344 + struct mlxsw_sp_rif *r) 1345 + { 1346 + if (nh->r) 1347 + return; 1348 + 1349 + nh->r = r; 1350 + list_add(&nh->rif_list_node, &r->nexthop_list); 1351 + } 1352 + 1353 + static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) 1354 + { 1355 + if (!nh->r) 1356 + return; 1357 + 1358 + list_del(&nh->rif_list_node); 1359 + nh->r = NULL; 1360 + } 1361 + 1362 + static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, 1363 + struct mlxsw_sp_nexthop *nh) 1449 1364 { 1450 1365 struct mlxsw_sp_neigh_entry *neigh_entry; 1451 - struct net_device *dev = fib_nh->nh_dev; 1366 + struct fib_nh *fib_nh = nh->key.fib_nh; 1452 1367 struct neighbour *n; 1453 1368 u8 nud_state, dead; 1369 + int err; 1370 + 1371 + if (!nh->nh_grp->gateway || nh->neigh_entry) 1372 + return 0; 1454 1373 1455 1374 /* Take a reference of neigh here ensuring that neigh would 1456 1375 * not be detructed before the nexthop entry is finished. 1457 1376 * The reference is taken either in neigh_lookup() or 1458 1377 * in neigh_create() in case n is not found. 1459 1378 */ 1460 - n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev); 1379 + n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); 1461 1380 if (!n) { 1462 - n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev); 1381 + n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); 1463 1382 if (IS_ERR(n)) 1464 1383 return PTR_ERR(n); 1465 1384 neigh_event_send(n, NULL); ··· 1489 1366 if (!neigh_entry) { 1490 1367 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n); 1491 1368 if (IS_ERR(neigh_entry)) { 1492 - neigh_release(n); 1493 - return -EINVAL; 1369 + err = -EINVAL; 1370 + goto err_neigh_entry_create; 1494 1371 } 1495 1372 } 1496 1373 ··· 1501 1378 list_add_tail(&neigh_entry->nexthop_neighs_list_node, 1502 1379 &mlxsw_sp->router.nexthop_neighs_list); 1503 1380 1504 - nh->nh_grp = nh_grp; 1505 1381 nh->neigh_entry = neigh_entry; 1506 1382 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); 1507 1383 read_lock_bh(&n->lock); ··· 1510 1388 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead)); 1511 1389 1512 1390 return 0; 1391 + 1392 + err_neigh_entry_create: 1393 + neigh_release(n); 1394 + return err; 1513 1395 } 1514 1396 1515 - static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, 1516 - struct mlxsw_sp_nexthop *nh) 1397 + static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, 1398 + struct mlxsw_sp_nexthop *nh) 1517 1399 { 1518 1400 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; 1519 - struct neighbour *n = neigh_entry->key.n; 1401 + struct neighbour *n; 1402 + 1403 + if (!neigh_entry) 1404 + return; 1405 + n = neigh_entry->key.n; 1520 1406 1521 1407 __mlxsw_sp_nexthop_neigh_update(nh, true); 1522 1408 list_del(&nh->neigh_list_node); 1409 + nh->neigh_entry = NULL; 1523 1410 1524 1411 /* If that is the last nexthop connected to that neigh, remove from 1525 1412 * nexthop_neighs_list 1526 1413 */ 1527 - if (list_empty(&nh->neigh_entry->nexthop_list)) 1528 - list_del(&nh->neigh_entry->nexthop_neighs_list_node); 1414 + if (list_empty(&neigh_entry->nexthop_list)) 1415 + list_del(&neigh_entry->nexthop_neighs_list_node); 1529 1416 1530 1417 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) 1531 1418 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); 1532 1419 1533 1420 neigh_release(n); 1421 + } 1422 + 1423 + static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, 1424 + struct mlxsw_sp_nexthop_group *nh_grp, 1425 + struct mlxsw_sp_nexthop *nh, 1426 + struct fib_nh *fib_nh) 1427 + { 1428 + struct net_device *dev = fib_nh->nh_dev; 1429 + struct mlxsw_sp_rif *r; 1430 + int err; 1431 + 1432 + nh->nh_grp = nh_grp; 1433 + nh->key.fib_nh = fib_nh; 1434 + err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); 1435 + if (err) 1436 + return err; 1437 + 1438 + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); 1439 + if (!r) 1440 + return 0; 1441 + mlxsw_sp_nexthop_rif_init(nh, r); 1442 + 1443 + err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); 1444 + if (err) 1445 + goto err_nexthop_neigh_init; 1446 + 1447 + return 0; 1448 + 1449 + err_nexthop_neigh_init: 1450 + mlxsw_sp_nexthop_remove(mlxsw_sp, nh); 1451 + return err; 1452 + } 1453 + 1454 + static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, 1455 + struct mlxsw_sp_nexthop *nh) 1456 + { 1457 + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); 1458 + mlxsw_sp_nexthop_rif_fini(nh); 1459 + mlxsw_sp_nexthop_remove(mlxsw_sp, nh); 1460 + } 1461 + 1462 + static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, 1463 + unsigned long event, struct fib_nh *fib_nh) 1464 + { 1465 + struct mlxsw_sp_nexthop_key key; 1466 + struct mlxsw_sp_nexthop *nh; 1467 + struct mlxsw_sp_rif *r; 1468 + 1469 + if (mlxsw_sp->router.aborted) 1470 + return; 1471 + 1472 + key.fib_nh = fib_nh; 1473 + nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key); 1474 + if (WARN_ON_ONCE(!nh)) 1475 + return; 1476 + 1477 + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev); 1478 + if (!r) 1479 + return; 1480 + 1481 + switch (event) { 1482 + case FIB_EVENT_NH_ADD: 1483 + mlxsw_sp_nexthop_rif_init(nh, r); 1484 + mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); 1485 + break; 1486 + case FIB_EVENT_NH_DEL: 1487 + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); 1488 + mlxsw_sp_nexthop_rif_fini(nh); 1489 + break; 1490 + } 1491 + 1492 + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); 1493 + } 1494 + 1495 + static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 1496 + struct mlxsw_sp_rif *r) 1497 + { 1498 + struct mlxsw_sp_nexthop *nh, *tmp; 1499 + 1500 + list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) { 1501 + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); 1502 + mlxsw_sp_nexthop_rif_fini(nh); 1503 + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); 1504 + } 1534 1505 } 1535 1506 1536 1507 static struct mlxsw_sp_nexthop_group * ··· 1642 1427 if (!nh_grp) 1643 1428 return ERR_PTR(-ENOMEM); 1644 1429 INIT_LIST_HEAD(&nh_grp->fib_list); 1430 + nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK; 1645 1431 nh_grp->count = fi->fib_nhs; 1432 + nh_grp->key.fi = fi; 1646 1433 for (i = 0; i < nh_grp->count; i++) { 1647 1434 nh = &nh_grp->nexthops[i]; 1648 1435 fib_nh = &fi->fib_nh[i]; ··· 1652 1435 if (err) 1653 1436 goto err_nexthop_init; 1654 1437 } 1655 - list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list); 1438 + err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); 1439 + if (err) 1440 + goto err_nexthop_group_insert; 1656 1441 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); 1657 1442 return nh_grp; 1658 1443 1444 + err_nexthop_group_insert: 1659 1445 err_nexthop_init: 1660 1446 for (i--; i >= 0; i--) 1661 1447 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); ··· 1673 1453 struct mlxsw_sp_nexthop *nh; 1674 1454 int i; 1675 1455 1676 - list_del(&nh_grp->list); 1456 + mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); 1677 1457 for (i = 0; i < nh_grp->count; i++) { 1678 1458 nh = &nh_grp->nexthops[i]; 1679 1459 mlxsw_sp_nexthop_fini(mlxsw_sp, nh); ··· 1683 1463 kfree(nh_grp); 1684 1464 } 1685 1465 1686 - static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, 1687 - struct fib_info *fi) 1688 - { 1689 - int i; 1690 - 1691 - for (i = 0; i < fi->fib_nhs; i++) { 1692 - struct fib_nh *fib_nh = &fi->fib_nh[i]; 1693 - struct neighbour *n = nh->neigh_entry->key.n; 1694 - 1695 - if (memcmp(n->primary_key, &fib_nh->nh_gw, 1696 - sizeof(fib_nh->nh_gw)) == 0 && 1697 - n->dev == fib_nh->nh_dev) 1698 - return true; 1699 - } 1700 - return false; 1701 - } 1702 - 1703 - static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp, 1704 - struct fib_info *fi) 1705 - { 1706 - int i; 1707 - 1708 - if (nh_grp->count != fi->fib_nhs) 1709 - return false; 1710 - for (i = 0; i < nh_grp->count; i++) { 1711 - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; 1712 - 1713 - if (!mlxsw_sp_nexthop_match(nh, fi)) 1714 - return false; 1715 - } 1716 - return true; 1717 - } 1718 - 1719 - static struct mlxsw_sp_nexthop_group * 1720 - mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) 1721 - { 1722 - struct mlxsw_sp_nexthop_group *nh_grp; 1723 - 1724 - list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list, 1725 - list) { 1726 - if (mlxsw_sp_nexthop_group_match(nh_grp, fi)) 1727 - return nh_grp; 1728 - } 1729 - return NULL; 1730 - } 1731 - 1732 1466 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, 1733 1467 struct mlxsw_sp_fib_entry *fib_entry, 1734 1468 struct fib_info *fi) 1735 1469 { 1470 + struct mlxsw_sp_nexthop_group_key key; 1736 1471 struct mlxsw_sp_nexthop_group *nh_grp; 1737 1472 1738 - nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi); 1473 + key.fi = fi; 1474 + nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key); 1739 1475 if (!nh_grp) { 1740 1476 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); 1741 1477 if (IS_ERR(nh_grp)) ··· 1713 1537 mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); 1714 1538 } 1715 1539 1540 + static bool 1541 + mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) 1542 + { 1543 + struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; 1544 + 1545 + switch (fib_entry->type) { 1546 + case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: 1547 + return !!nh_group->adj_index_valid; 1548 + case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: 1549 + return !!nh_group->nh_rif; 1550 + default: 1551 + return false; 1552 + } 1553 + } 1554 + 1555 + static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) 1556 + { 1557 + fib_entry->offloaded = true; 1558 + 1559 + switch (fib_entry->vr->proto) { 1560 + case MLXSW_SP_L3_PROTO_IPV4: 1561 + fib_info_offload_inc(fib_entry->nh_group->key.fi); 1562 + break; 1563 + case MLXSW_SP_L3_PROTO_IPV6: 1564 + WARN_ON_ONCE(1); 1565 + } 1566 + } 1567 + 1568 + static void 1569 + mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) 1570 + { 1571 + switch (fib_entry->vr->proto) { 1572 + case MLXSW_SP_L3_PROTO_IPV4: 1573 + fib_info_offload_dec(fib_entry->nh_group->key.fi); 1574 + break; 1575 + case MLXSW_SP_L3_PROTO_IPV6: 1576 + WARN_ON_ONCE(1); 1577 + } 1578 + 1579 + fib_entry->offloaded = false; 1580 + } 1581 + 1582 + static void 1583 + mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, 1584 + enum mlxsw_reg_ralue_op op, int err) 1585 + { 1586 + switch (op) { 1587 + case MLXSW_REG_RALUE_OP_WRITE_DELETE: 1588 + if (!fib_entry->offloaded) 1589 + return; 1590 + return mlxsw_sp_fib_entry_offload_unset(fib_entry); 1591 + case MLXSW_REG_RALUE_OP_WRITE_WRITE: 1592 + if (err) 1593 + return; 1594 + if (mlxsw_sp_fib_entry_should_offload(fib_entry) && 1595 + !fib_entry->offloaded) 1596 + mlxsw_sp_fib_entry_offload_set(fib_entry); 1597 + else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) && 1598 + fib_entry->offloaded) 1599 + mlxsw_sp_fib_entry_offload_unset(fib_entry); 1600 + return; 1601 + default: 1602 + return; 1603 + } 1604 + } 1605 + 1716 1606 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, 1717 1607 struct mlxsw_sp_fib_entry *fib_entry, 1718 1608 enum mlxsw_reg_ralue_op op) ··· 1795 1553 * with provided ECMP size. Otherwise, setup trap and pass 1796 1554 * traffic to kernel. 1797 1555 */ 1798 - if (fib_entry->nh_group->adj_index_valid) { 1556 + if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { 1799 1557 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; 1800 1558 adjacency_index = fib_entry->nh_group->adj_index; 1801 1559 ecmp_size = fib_entry->nh_group->ecmp_size; ··· 1816 1574 struct mlxsw_sp_fib_entry *fib_entry, 1817 1575 enum mlxsw_reg_ralue_op op) 1818 1576 { 1577 + struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif; 1578 + enum mlxsw_reg_ralue_trap_action trap_action; 1819 1579 char ralue_pl[MLXSW_REG_RALUE_LEN]; 1820 1580 u32 *p_dip = (u32 *) fib_entry->key.addr; 1821 1581 struct mlxsw_sp_vr *vr = fib_entry->vr; 1582 + u16 trap_id = 0; 1583 + u16 rif = 0; 1584 + 1585 + if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { 1586 + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; 1587 + rif = r->rif; 1588 + } else { 1589 + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; 1590 + trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; 1591 + } 1822 1592 1823 1593 mlxsw_reg_ralue_pack4(ralue_pl, 1824 1594 (enum mlxsw_reg_ralxx_protocol) vr->proto, op, 1825 1595 vr->id, fib_entry->key.prefix_len, *p_dip); 1826 - mlxsw_reg_ralue_act_local_pack(ralue_pl, 1827 - MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, 1828 - fib_entry->rif); 1596 + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif); 1829 1597 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); 1830 1598 } 1831 1599 ··· 1873 1621 struct mlxsw_sp_fib_entry *fib_entry, 1874 1622 enum mlxsw_reg_ralue_op op) 1875 1623 { 1624 + int err = -EINVAL; 1625 + 1876 1626 switch (fib_entry->vr->proto) { 1877 1627 case MLXSW_SP_L3_PROTO_IPV4: 1878 - return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); 1628 + err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); 1629 + break; 1879 1630 case MLXSW_SP_L3_PROTO_IPV6: 1880 - return -EINVAL; 1631 + return err; 1881 1632 } 1882 - return -EINVAL; 1633 + mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); 1634 + return err; 1883 1635 } 1884 1636 1885 1637 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, ··· 1901 1645 } 1902 1646 1903 1647 static int 1904 - mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, 1905 - const struct fib_entry_notifier_info *fen_info, 1906 - struct mlxsw_sp_fib_entry *fib_entry) 1648 + mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, 1649 + const struct fib_entry_notifier_info *fen_info, 1650 + struct mlxsw_sp_fib_entry *fib_entry) 1907 1651 { 1908 1652 struct fib_info *fi = fen_info->fi; 1909 - struct mlxsw_sp_rif *r = NULL; 1910 - int nhsel; 1911 - int err; 1912 1653 1913 1654 if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { 1914 1655 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; ··· 1913 1660 } 1914 1661 if (fen_info->type != RTN_UNICAST) 1915 1662 return -EINVAL; 1916 - 1917 - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1918 - const struct fib_nh *nh = &fi->fib_nh[nhsel]; 1919 - 1920 - if (!nh->nh_dev) 1921 - continue; 1922 - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev); 1923 - if (!r) { 1924 - /* In case router interface is not found for 1925 - * at least one of the nexthops, that means 1926 - * the nexthop points to some device unrelated 1927 - * to us. Set trap and pass the packets for 1928 - * this prefix to kernel. 1929 - */ 1930 - break; 1931 - } 1932 - } 1933 - 1934 - if (!r) { 1935 - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; 1936 - return 0; 1937 - } 1938 - 1939 - if (fi->fib_scope != RT_SCOPE_UNIVERSE) { 1663 + if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) 1940 1664 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; 1941 - fib_entry->rif = r->rif; 1942 - } else { 1665 + else 1943 1666 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; 1944 - err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); 1945 - if (err) 1946 - return err; 1947 - } 1948 - fib_info_offload_inc(fen_info->fi); 1949 1667 return 0; 1950 - } 1951 - 1952 - static void 1953 - mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, 1954 - struct mlxsw_sp_fib_entry *fib_entry) 1955 - { 1956 - if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) 1957 - fib_info_offload_dec(fib_entry->fi); 1958 - if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) 1959 - mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); 1960 1668 } 1961 1669 1962 1670 static struct mlxsw_sp_fib_entry * ··· 1950 1736 goto err_fib_entry_create; 1951 1737 } 1952 1738 fib_entry->vr = vr; 1953 - fib_entry->fi = fi; 1954 1739 fib_entry->ref_count = 1; 1955 1740 1956 - err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry); 1741 + err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); 1957 1742 if (err) 1958 - goto err_fib4_entry_init; 1743 + goto err_fib4_entry_type_set; 1744 + 1745 + err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); 1746 + if (err) 1747 + goto err_nexthop_group_get; 1959 1748 1960 1749 return fib_entry; 1961 1750 1962 - err_fib4_entry_init: 1751 + err_nexthop_group_get: 1752 + err_fib4_entry_type_set: 1963 1753 mlxsw_sp_fib_entry_destroy(fib_entry); 1964 1754 err_fib_entry_create: 1965 1755 mlxsw_sp_vr_put(mlxsw_sp, vr); ··· 1994 1776 struct mlxsw_sp_vr *vr = fib_entry->vr; 1995 1777 1996 1778 if (--fib_entry->ref_count == 0) { 1997 - mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); 1779 + mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); 1998 1780 mlxsw_sp_fib_entry_destroy(fib_entry); 1999 1781 } 2000 1782 mlxsw_sp_vr_put(mlxsw_sp, vr); ··· 2140 1922 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); 2141 1923 } 2142 1924 1925 + static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) 1926 + { 1927 + char ritr_pl[MLXSW_REG_RITR_LEN]; 1928 + int err; 1929 + 1930 + mlxsw_reg_ritr_rif_pack(ritr_pl, rif); 1931 + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); 1932 + if (WARN_ON_ONCE(err)) 1933 + return err; 1934 + 1935 + mlxsw_reg_ritr_enable_set(ritr_pl, false); 1936 + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); 1937 + } 1938 + 1939 + void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, 1940 + struct mlxsw_sp_rif *r) 1941 + { 1942 + mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif); 1943 + mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r); 1944 + mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r); 1945 + } 1946 + 2143 1947 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) 2144 1948 { 2145 1949 char rgcr_pl[MLXSW_REG_RGCR_LEN]; ··· 2206 1966 2207 1967 struct mlxsw_sp_fib_event_work { 2208 1968 struct work_struct work; 2209 - struct fib_entry_notifier_info fen_info; 1969 + union { 1970 + struct fib_entry_notifier_info fen_info; 1971 + struct fib_nh_notifier_info fnh_info; 1972 + }; 2210 1973 struct mlxsw_sp *mlxsw_sp; 2211 1974 unsigned long event; 2212 1975 }; ··· 2237 1994 case FIB_EVENT_RULE_ADD: /* fall through */ 2238 1995 case FIB_EVENT_RULE_DEL: 2239 1996 mlxsw_sp_router_fib4_abort(mlxsw_sp); 1997 + break; 1998 + case FIB_EVENT_NH_ADD: /* fall through */ 1999 + case FIB_EVENT_NH_DEL: 2000 + mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event, 2001 + fib_work->fnh_info.fib_nh); 2002 + fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); 2240 2003 break; 2241 2004 } 2242 2005 rtnl_unlock(); ··· 2277 2028 */ 2278 2029 fib_info_hold(fib_work->fen_info.fi); 2279 2030 break; 2031 + case FIB_EVENT_NH_ADD: /* fall through */ 2032 + case FIB_EVENT_NH_DEL: 2033 + memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info)); 2034 + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); 2035 + break; 2280 2036 } 2281 2037 2282 2038 mlxsw_core_schedule_work(&fib_work->work); ··· 2306 2052 int err; 2307 2053 2308 2054 INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); 2309 - INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); 2310 2055 err = __mlxsw_sp_router_init(mlxsw_sp); 2311 2056 if (err) 2312 2057 return err; 2058 + 2059 + err = rhashtable_init(&mlxsw_sp->router.nexthop_ht, 2060 + &mlxsw_sp_nexthop_ht_params); 2061 + if (err) 2062 + goto err_nexthop_ht_init; 2063 + 2064 + err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht, 2065 + &mlxsw_sp_nexthop_group_ht_params); 2066 + if (err) 2067 + goto err_nexthop_group_ht_init; 2313 2068 2314 2069 mlxsw_sp_lpm_init(mlxsw_sp); 2315 2070 err = mlxsw_sp_vrs_init(mlxsw_sp); ··· 2342 2079 err_neigh_init: 2343 2080 mlxsw_sp_vrs_fini(mlxsw_sp); 2344 2081 err_vrs_init: 2082 + rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); 2083 + err_nexthop_group_ht_init: 2084 + rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); 2085 + err_nexthop_ht_init: 2345 2086 __mlxsw_sp_router_fini(mlxsw_sp); 2346 2087 return err; 2347 2088 } ··· 2355 2088 unregister_fib_notifier(&mlxsw_sp->fib_nb); 2356 2089 mlxsw_sp_neigh_fini(mlxsw_sp); 2357 2090 mlxsw_sp_vrs_fini(mlxsw_sp); 2091 + rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); 2092 + rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); 2358 2093 __mlxsw_sp_router_fini(mlxsw_sp); 2359 2094 }
+7
include/net/ip_fib.h
··· 214 214 u32 nlflags; 215 215 }; 216 216 217 + struct fib_nh_notifier_info { 218 + struct fib_notifier_info info; /* must be first */ 219 + struct fib_nh *fib_nh; 220 + }; 221 + 217 222 enum fib_event_type { 218 223 FIB_EVENT_ENTRY_ADD, 219 224 FIB_EVENT_ENTRY_DEL, 220 225 FIB_EVENT_RULE_ADD, 221 226 FIB_EVENT_RULE_DEL, 227 + FIB_EVENT_NH_ADD, 228 + FIB_EVENT_NH_DEL, 222 229 }; 223 230 224 231 int register_fib_notifier(struct notifier_block *nb,
+33
net/ipv4/fib_semantics.c
··· 1355 1355 return ret; 1356 1356 } 1357 1357 1358 + static int call_fib_nh_notifiers(struct fib_nh *fib_nh, 1359 + enum fib_event_type event_type) 1360 + { 1361 + struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev); 1362 + struct fib_nh_notifier_info info = { 1363 + .fib_nh = fib_nh, 1364 + }; 1365 + 1366 + switch (event_type) { 1367 + case FIB_EVENT_NH_ADD: 1368 + if (fib_nh->nh_flags & RTNH_F_DEAD) 1369 + break; 1370 + if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 1371 + fib_nh->nh_flags & RTNH_F_LINKDOWN) 1372 + break; 1373 + return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type, 1374 + &info.info); 1375 + case FIB_EVENT_NH_DEL: 1376 + if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && 1377 + fib_nh->nh_flags & RTNH_F_LINKDOWN) || 1378 + (fib_nh->nh_flags & RTNH_F_DEAD)) 1379 + return call_fib_notifiers(dev_net(fib_nh->nh_dev), 1380 + event_type, &info.info); 1381 + default: 1382 + break; 1383 + } 1384 + 1385 + return NOTIFY_DONE; 1386 + } 1387 + 1358 1388 /* Event force Flags Description 1359 1389 * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host 1360 1390 * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host ··· 1426 1396 nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; 1427 1397 break; 1428 1398 } 1399 + call_fib_nh_notifiers(nexthop_nh, 1400 + FIB_EVENT_NH_DEL); 1429 1401 dead++; 1430 1402 } 1431 1403 #ifdef CONFIG_IP_ROUTE_MULTIPATH ··· 1582 1550 continue; 1583 1551 alive++; 1584 1552 nexthop_nh->nh_flags &= ~nh_flags; 1553 + call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); 1585 1554 } endfor_nexthops(fi) 1586 1555 1587 1556 if (alive > 0) {