Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'devlink-Introduce-PCI-PF-VF-ports-and-attributes'

Parav Pandit says:

====================
devlink: Introduce PCI PF, VF ports and attributes

This patchset carry forwards the work initiated in [1] and discussion
futher concluded at [2].

To improve visibility of representor netdevice, its association with
PF or VF, physical port, two new devlink port flavours are added as
PCI PF and PCI VF ports.

A sample eswitch view can be seen below, which will be futher extended to
mdev subdevices of a PCI function in future.

Patch-1 moves physical port's attribute to new structure
Patch-2 enhances netlink response to consider port flavour
Patch-3,4 extends devlink port attributes and port flavour
Patch-5 extends mlx5 driver to register devlink ports for PF, VF and
physical link.

+---+ +---+
vf| | | | pf
+-+-+ +-+-+
physical link <---------+ | |
| | |
| | |
+-+-+ +-+-+ +-+-+
| 1 | | 2 | | 3 |
+--+---+-----+---+------+---+--+
| physical vf pf |
| port port port |
| |
| eswitch |
| |
+------------------------------+

[1] https://www.spinics.net/lists/netdev/msg555797.html
[2] https://marc.info/?l=linux-netdev&m=155354609408485&w=2

Changelog:
v5->v6:
- Fixed port flavour check order for PCI PF vs other flavours in
netlink response.
- Changed 'physical' to 'phys'.
v4->v5:
- Split first patch to two patches to handle netlink response in
separate patch.
- Corrected typo 'otwerwise' to 'otherwise' in patches 3 and 4.
v3->v4:
- Addressed comments from Jiri.
- Split first patch to two patches.
- Renamed phys_port to physical to be consistent with pci_pf.
- Removed port_number from __devlink_port_attrs_set and moved
assignment to caller function.
- Used capital letter while moving old comment to new structure.
- Removed helper function is_devlink_phy_port_num_supported().
v2->v3:
- Made port_number and split_port_number applicable only to
physical port flavours.
v1->v2:
- Updated new APIs and mlx5 driver to drop port_number for PF, VF
attributes
- Updated port_number comment for its usage
- Limited putting port_number to physical ports
====================

Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

+232 -53
+77 -31
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
··· 37 37 #include <net/act_api.h> 38 38 #include <net/netevent.h> 39 39 #include <net/arp.h> 40 + #include <net/devlink.h> 40 41 41 42 #include "eswitch.h" 42 43 #include "en.h" ··· 1120 1119 return ret; 1121 1120 } 1122 1121 1123 - static int mlx5e_rep_get_phys_port_name(struct net_device *dev, 1124 - char *buf, size_t len) 1125 - { 1126 - struct mlx5e_priv *priv = netdev_priv(dev); 1127 - struct mlx5e_rep_priv *rpriv = priv->ppriv; 1128 - struct mlx5_eswitch_rep *rep = rpriv->rep; 1129 - unsigned int fn; 1130 - int ret; 1131 - 1132 - fn = PCI_FUNC(priv->mdev->pdev->devfn); 1133 - if (fn >= MLX5_MAX_PORTS) 1134 - return -EOPNOTSUPP; 1135 - 1136 - if (rep->vport == MLX5_VPORT_UPLINK) 1137 - ret = snprintf(buf, len, "p%d", fn); 1138 - else if (rep->vport == MLX5_VPORT_PF) 1139 - ret = snprintf(buf, len, "pf%d", fn); 1140 - else 1141 - ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1); 1142 - 1143 - if (ret >= len) 1144 - return -EOPNOTSUPP; 1145 - 1146 - return 0; 1147 - } 1148 - 1149 1122 static int 1150 1123 mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, 1151 1124 struct tc_cls_flower_offload *cls_flower, int flags) ··· 1273 1298 return 0; 1274 1299 } 1275 1300 1301 + static struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) 1302 + { 1303 + struct mlx5e_priv *priv = netdev_priv(dev); 1304 + struct mlx5e_rep_priv *rpriv = priv->ppriv; 1305 + 1306 + return &rpriv->dl_port; 1307 + } 1308 + 1276 1309 static const struct net_device_ops mlx5e_netdev_ops_rep = { 1277 1310 .ndo_open = mlx5e_rep_open, 1278 1311 .ndo_stop = mlx5e_rep_close, 1279 1312 .ndo_start_xmit = mlx5e_xmit, 1280 - .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, 1281 1313 .ndo_setup_tc = mlx5e_rep_setup_tc, 1314 + .ndo_get_devlink_port = mlx5e_get_devlink_port, 1282 1315 .ndo_get_stats64 = mlx5e_rep_get_stats, 1283 1316 .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, 1284 1317 .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, 1285 1318 .ndo_change_mtu = mlx5e_rep_change_mtu, 1286 - .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, 1287 1319 }; 1288 1320 1289 1321 static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { ··· 1298 1316 .ndo_stop = mlx5e_close, 1299 1317 .ndo_start_xmit = mlx5e_xmit, 1300 1318 .ndo_set_mac_address = mlx5e_uplink_rep_set_mac, 1301 - .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, 1302 1319 .ndo_setup_tc = mlx5e_rep_setup_tc, 1320 + .ndo_get_devlink_port = mlx5e_get_devlink_port, 1303 1321 .ndo_get_stats64 = mlx5e_get_stats, 1304 1322 .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, 1305 1323 .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, ··· 1312 1330 .ndo_get_vf_config = mlx5e_get_vf_config, 1313 1331 .ndo_get_vf_stats = mlx5e_get_vf_stats, 1314 1332 .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, 1315 - .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, 1316 1333 .ndo_set_features = mlx5e_set_features, 1317 1334 }; 1318 1335 ··· 1712 1731 .max_tc = MLX5E_MAX_NUM_TC, 1713 1732 }; 1714 1733 1734 + static bool 1735 + is_devlink_port_supported(const struct mlx5_core_dev *dev, 1736 + const struct mlx5e_rep_priv *rpriv) 1737 + { 1738 + return rpriv->rep->vport == MLX5_VPORT_UPLINK || 1739 + rpriv->rep->vport == MLX5_VPORT_PF || 1740 + mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport); 1741 + } 1742 + 1743 + static int register_devlink_port(struct mlx5_core_dev *dev, 1744 + struct mlx5e_rep_priv *rpriv) 1745 + { 1746 + struct devlink *devlink = priv_to_devlink(dev); 1747 + struct mlx5_eswitch_rep *rep = rpriv->rep; 1748 + struct netdev_phys_item_id ppid = {}; 1749 + int ret; 1750 + 1751 + if (!is_devlink_port_supported(dev, rpriv)) 1752 + return 0; 1753 + 1754 + ret = mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid); 1755 + if (ret) 1756 + return ret; 1757 + 1758 + if (rep->vport == MLX5_VPORT_UPLINK) 1759 + devlink_port_attrs_set(&rpriv->dl_port, 1760 + DEVLINK_PORT_FLAVOUR_PHYSICAL, 1761 + PCI_FUNC(dev->pdev->devfn), false, 0, 1762 + &ppid.id[0], ppid.id_len); 1763 + else if (rep->vport == MLX5_VPORT_PF) 1764 + devlink_port_attrs_pci_pf_set(&rpriv->dl_port, 1765 + &ppid.id[0], ppid.id_len, 1766 + dev->pdev->devfn); 1767 + else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) 1768 + devlink_port_attrs_pci_vf_set(&rpriv->dl_port, 1769 + &ppid.id[0], ppid.id_len, 1770 + dev->pdev->devfn, 1771 + rep->vport - 1); 1772 + 1773 + return devlink_port_register(devlink, &rpriv->dl_port, rep->vport); 1774 + } 1775 + 1776 + static void unregister_devlink_port(struct mlx5_core_dev *dev, 1777 + struct mlx5e_rep_priv *rpriv) 1778 + { 1779 + if (is_devlink_port_supported(dev, rpriv)) 1780 + devlink_port_unregister(&rpriv->dl_port); 1781 + } 1782 + 1715 1783 /* e-Switch vport representors */ 1716 1784 static int 1717 1785 mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) ··· 1812 1782 goto err_detach_netdev; 1813 1783 } 1814 1784 1785 + err = register_devlink_port(dev, rpriv); 1786 + if (err) { 1787 + esw_warn(dev, "Failed to register devlink port %d\n", 1788 + rep->vport); 1789 + goto err_neigh_cleanup; 1790 + } 1791 + 1815 1792 err = register_netdev(netdev); 1816 1793 if (err) { 1817 1794 pr_warn("Failed to register representor netdev for vport %d\n", 1818 1795 rep->vport); 1819 - goto err_neigh_cleanup; 1796 + goto err_devlink_cleanup; 1820 1797 } 1821 1798 1799 + if (is_devlink_port_supported(dev, rpriv)) 1800 + devlink_port_type_eth_set(&rpriv->dl_port, netdev); 1822 1801 return 0; 1802 + 1803 + err_devlink_cleanup: 1804 + unregister_devlink_port(dev, rpriv); 1823 1805 1824 1806 err_neigh_cleanup: 1825 1807 mlx5e_rep_neigh_cleanup(rpriv); ··· 1855 1813 struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); 1856 1814 struct net_device *netdev = rpriv->netdev; 1857 1815 struct mlx5e_priv *priv = netdev_priv(netdev); 1816 + struct mlx5_core_dev *dev = priv->mdev; 1858 1817 void *ppriv = priv->ppriv; 1859 1818 1819 + if (is_devlink_port_supported(dev, rpriv)) 1820 + devlink_port_type_clear(&rpriv->dl_port); 1860 1821 unregister_netdev(netdev); 1822 + unregister_devlink_port(dev, rpriv); 1861 1823 mlx5e_rep_neigh_cleanup(rpriv); 1862 1824 mlx5e_detach_netdev(priv); 1863 1825 if (rep->vport == MLX5_VPORT_UPLINK)
+1
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
··· 86 86 struct mlx5_flow_handle *vport_rx_rule; 87 87 struct list_head vport_sqs_list; 88 88 struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ 89 + struct devlink_port dl_port; 89 90 }; 90 91 91 92 static inline
+29 -2
include/net/devlink.h
··· 38 38 char priv[0] __aligned(NETDEV_ALIGN); 39 39 }; 40 40 41 + struct devlink_port_phys_attrs { 42 + u32 port_number; /* Same value as "split group". 43 + * A physical port which is visible to the user 44 + * for a given port flavour. 45 + */ 46 + u32 split_subport_number; 47 + }; 48 + 49 + struct devlink_port_pci_pf_attrs { 50 + u16 pf; /* Associated PCI PF for this port. */ 51 + }; 52 + 53 + struct devlink_port_pci_vf_attrs { 54 + u16 pf; /* Associated PCI PF for this port. */ 55 + u16 vf; /* Associated PCI VF for of the PCI PF for this port. */ 56 + }; 57 + 41 58 struct devlink_port_attrs { 42 59 u8 set:1, 43 60 split:1, 44 61 switch_port:1; 45 62 enum devlink_port_flavour flavour; 46 - u32 port_number; /* same value as "split group" */ 47 - u32 split_subport_number; 48 63 struct netdev_phys_item_id switch_id; 64 + union { 65 + struct devlink_port_phys_attrs phys; 66 + struct devlink_port_pci_pf_attrs pci_pf; 67 + struct devlink_port_pci_vf_attrs pci_vf; 68 + }; 49 69 }; 50 70 51 71 struct devlink_port { ··· 610 590 u32 split_subport_number, 611 591 const unsigned char *switch_id, 612 592 unsigned char switch_id_len); 593 + void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, 594 + const unsigned char *switch_id, 595 + unsigned char switch_id_len, u16 pf); 596 + void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, 597 + const unsigned char *switch_id, 598 + unsigned char switch_id_len, 599 + u16 pf, u16 vf); 613 600 int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, 614 601 u32 size, u16 ingress_pools_count, 615 602 u16 egress_pools_count, u16 ingress_tc_count,
+11
include/uapi/linux/devlink.h
··· 169 169 DEVLINK_PORT_FLAVOUR_DSA, /* Distributed switch architecture 170 170 * interconnect port. 171 171 */ 172 + DEVLINK_PORT_FLAVOUR_PCI_PF, /* Represents eswitch port for 173 + * the PCI PF. It is an internal 174 + * port that faces the PCI PF. 175 + */ 176 + DEVLINK_PORT_FLAVOUR_PCI_VF, /* Represents eswitch port 177 + * for the PCI VF. It is an internal 178 + * port that faces the PCI VF. 179 + */ 172 180 }; 173 181 174 182 enum devlink_param_cmode { ··· 344 336 DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG, /* string */ 345 337 DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE, /* u64 */ 346 338 DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL, /* u64 */ 339 + 340 + DEVLINK_ATTR_PORT_PCI_PF_NUMBER, /* u16 */ 341 + DEVLINK_ATTR_PORT_PCI_VF_NUMBER, /* u16 */ 347 342 348 343 /* add new attributes above here, update the policy in devlink.c */ 349 344
+114 -20
net/core/devlink.c
··· 515 515 return 0; 516 516 if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour)) 517 517 return -EMSGSIZE; 518 - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, attrs->port_number)) 518 + if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) { 519 + if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, 520 + attrs->pci_pf.pf)) 521 + return -EMSGSIZE; 522 + } else if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_VF) { 523 + if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, 524 + attrs->pci_vf.pf) || 525 + nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, 526 + attrs->pci_vf.vf)) 527 + return -EMSGSIZE; 528 + } 529 + if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PHYSICAL && 530 + devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU && 531 + devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA) 532 + return 0; 533 + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, 534 + attrs->phys.port_number)) 519 535 return -EMSGSIZE; 520 536 if (!attrs->split) 521 537 return 0; 522 - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, attrs->port_number)) 538 + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, 539 + attrs->phys.port_number)) 523 540 return -EMSGSIZE; 524 541 if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, 525 - attrs->split_subport_number)) 542 + attrs->phys.split_subport_number)) 526 543 return -EMSGSIZE; 527 544 return 0; 528 545 } ··· 5755 5738 } 5756 5739 EXPORT_SYMBOL_GPL(devlink_port_type_clear); 5757 5740 5741 + static int __devlink_port_attrs_set(struct devlink_port *devlink_port, 5742 + enum devlink_port_flavour flavour, 5743 + const unsigned char *switch_id, 5744 + unsigned char switch_id_len) 5745 + { 5746 + struct devlink_port_attrs *attrs = &devlink_port->attrs; 5747 + 5748 + if (WARN_ON(devlink_port->registered)) 5749 + return -EEXIST; 5750 + attrs->set = true; 5751 + attrs->flavour = flavour; 5752 + if (switch_id) { 5753 + attrs->switch_port = true; 5754 + if (WARN_ON(switch_id_len > MAX_PHYS_ITEM_ID_LEN)) 5755 + switch_id_len = MAX_PHYS_ITEM_ID_LEN; 5756 + memcpy(attrs->switch_id.id, switch_id, switch_id_len); 5757 + attrs->switch_id.id_len = switch_id_len; 5758 + } else { 5759 + attrs->switch_port = false; 5760 + } 5761 + return 0; 5762 + } 5763 + 5758 5764 /** 5759 5765 * devlink_port_attrs_set - Set port attributes 5760 5766 * ··· 5800 5760 unsigned char switch_id_len) 5801 5761 { 5802 5762 struct devlink_port_attrs *attrs = &devlink_port->attrs; 5763 + int ret; 5803 5764 5804 - if (WARN_ON(devlink_port->registered)) 5765 + ret = __devlink_port_attrs_set(devlink_port, flavour, 5766 + switch_id, switch_id_len); 5767 + if (ret) 5805 5768 return; 5806 - attrs->set = true; 5807 - attrs->flavour = flavour; 5808 - attrs->port_number = port_number; 5809 5769 attrs->split = split; 5810 - attrs->split_subport_number = split_subport_number; 5811 - if (switch_id) { 5812 - attrs->switch_port = true; 5813 - if (WARN_ON(switch_id_len > MAX_PHYS_ITEM_ID_LEN)) 5814 - switch_id_len = MAX_PHYS_ITEM_ID_LEN; 5815 - memcpy(attrs->switch_id.id, switch_id, switch_id_len); 5816 - attrs->switch_id.id_len = switch_id_len; 5817 - } else { 5818 - attrs->switch_port = false; 5819 - } 5770 + attrs->phys.port_number = port_number; 5771 + attrs->phys.split_subport_number = split_subport_number; 5820 5772 } 5821 5773 EXPORT_SYMBOL_GPL(devlink_port_attrs_set); 5774 + 5775 + /** 5776 + * devlink_port_attrs_pci_pf_set - Set PCI PF port attributes 5777 + * 5778 + * @devlink_port: devlink port 5779 + * @pf: associated PF for the devlink port instance 5780 + * @switch_id: if the port is part of switch, this is buffer with ID, 5781 + * otherwise this is NULL 5782 + * @switch_id_len: length of the switch_id buffer 5783 + */ 5784 + void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, 5785 + const unsigned char *switch_id, 5786 + unsigned char switch_id_len, u16 pf) 5787 + { 5788 + struct devlink_port_attrs *attrs = &devlink_port->attrs; 5789 + int ret; 5790 + 5791 + ret = __devlink_port_attrs_set(devlink_port, 5792 + DEVLINK_PORT_FLAVOUR_PCI_PF, 5793 + switch_id, switch_id_len); 5794 + if (ret) 5795 + return; 5796 + 5797 + attrs->pci_pf.pf = pf; 5798 + } 5799 + EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set); 5800 + 5801 + /** 5802 + * devlink_port_attrs_pci_vf_set - Set PCI VF port attributes 5803 + * 5804 + * @devlink_port: devlink port 5805 + * @pf: associated PF for the devlink port instance 5806 + * @vf: associated VF of a PF for the devlink port instance 5807 + * @switch_id: if the port is part of switch, this is buffer with ID, 5808 + * otherwise this is NULL 5809 + * @switch_id_len: length of the switch_id buffer 5810 + */ 5811 + void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, 5812 + const unsigned char *switch_id, 5813 + unsigned char switch_id_len, 5814 + u16 pf, u16 vf) 5815 + { 5816 + struct devlink_port_attrs *attrs = &devlink_port->attrs; 5817 + int ret; 5818 + 5819 + ret = __devlink_port_attrs_set(devlink_port, 5820 + DEVLINK_PORT_FLAVOUR_PCI_VF, 5821 + switch_id, switch_id_len); 5822 + if (ret) 5823 + return; 5824 + attrs->pci_vf.pf = pf; 5825 + attrs->pci_vf.vf = vf; 5826 + } 5827 + EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set); 5822 5828 5823 5829 static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, 5824 5830 char *name, size_t len) ··· 5878 5792 switch (attrs->flavour) { 5879 5793 case DEVLINK_PORT_FLAVOUR_PHYSICAL: 5880 5794 if (!attrs->split) 5881 - n = snprintf(name, len, "p%u", attrs->port_number); 5795 + n = snprintf(name, len, "p%u", attrs->phys.port_number); 5882 5796 else 5883 - n = snprintf(name, len, "p%us%u", attrs->port_number, 5884 - attrs->split_subport_number); 5797 + n = snprintf(name, len, "p%us%u", 5798 + attrs->phys.port_number, 5799 + attrs->phys.split_subport_number); 5885 5800 break; 5886 5801 case DEVLINK_PORT_FLAVOUR_CPU: 5887 5802 case DEVLINK_PORT_FLAVOUR_DSA: ··· 5891 5804 */ 5892 5805 WARN_ON(1); 5893 5806 return -EINVAL; 5807 + case DEVLINK_PORT_FLAVOUR_PCI_PF: 5808 + n = snprintf(name, len, "pf%u", attrs->pci_pf.pf); 5809 + break; 5810 + case DEVLINK_PORT_FLAVOUR_PCI_VF: 5811 + n = snprintf(name, len, "pf%uvf%u", 5812 + attrs->pci_vf.pf, attrs->pci_vf.vf); 5813 + break; 5894 5814 } 5895 5815 5896 5816 if (n >= len)