Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'support-bandwidth-clamping-in-mana-using-net-shapers'

Erni Sri Satya Vennela says:

====================
Support bandwidth clamping in mana using net shapers

This patchset introduces hardware-backed bandwidth rate limiting
for MANA NICs via the net_shaper_ops interface, enabling efficient and
fine-grained traffic shaping directly on the device.

Previously, MANA lacked a mechanism for user-configurable bandwidth
control. With this addition, users can now configure shaping parameters,
allowing better traffic management and performance isolation.

The implementation includes the net_shaper_ops callbacks in the MANA
driver and supports one shaper per vport. Add shaping support via
mana_set_bw_clamp(), allowing the configuration of bandwidth rates
in 100 Mbps increments (minimum 100 Mbps). The driver validates input
and rejects unsupported values. On failure, it restores the previous
configuration which is queried using mana_query_link_cfg() or
retains the current state.

To prevent potential deadlocks introduced by net_shaper_ops, switch to
_locked variants of NAPI APIs when netdevops_lock is held during
VF setup and teardown.

Also, Add support for ethtool get_link_ksettings to report the maximum
link speed supported by the SKU in mbps.

These APIs when invoked on hardware that are older or that do
not support these APIs, the speed would be reported as UNKNOWN and
the net-shaper calls to set speed would fail.
====================

Link: https://patch.msgid.link/1750144656-2021-1-git-send-email-ernis@linux.microsoft.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+239 -9
+4
drivers/net/ethernet/microsoft/mana/hw_channel.c
··· 891 891 } 892 892 893 893 if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) { 894 + if (ctx->status_code == GDMA_STATUS_CMD_UNSUPPORTED) { 895 + err = -EOPNOTSUPP; 896 + goto out; 897 + } 894 898 if (req_msg->req.msg_type != MANA_QUERY_PHY_STAT) 895 899 dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", 896 900 ctx->status_code);
+186 -9
drivers/net/ethernet/microsoft/mana/mana_en.c
··· 719 719 return err; 720 720 } 721 721 722 + static int mana_shaper_set(struct net_shaper_binding *binding, 723 + const struct net_shaper *shaper, 724 + struct netlink_ext_ack *extack) 725 + { 726 + struct mana_port_context *apc = netdev_priv(binding->netdev); 727 + u32 old_speed, rate; 728 + int err; 729 + 730 + if (shaper->handle.scope != NET_SHAPER_SCOPE_NETDEV) { 731 + NL_SET_ERR_MSG_MOD(extack, "net shaper scope should be netdev"); 732 + return -EINVAL; 733 + } 734 + 735 + if (apc->handle.id && shaper->handle.id != apc->handle.id) { 736 + NL_SET_ERR_MSG_MOD(extack, "Cannot create multiple shapers"); 737 + return -EOPNOTSUPP; 738 + } 739 + 740 + if (!shaper->bw_max || (shaper->bw_max % 100000000)) { 741 + NL_SET_ERR_MSG_MOD(extack, "Please use multiples of 100Mbps for bandwidth"); 742 + return -EINVAL; 743 + } 744 + 745 + rate = div_u64(shaper->bw_max, 1000); /* Convert bps to Kbps */ 746 + rate = div_u64(rate, 1000); /* Convert Kbps to Mbps */ 747 + 748 + /* Get current speed */ 749 + err = mana_query_link_cfg(apc); 750 + old_speed = (err) ? SPEED_UNKNOWN : apc->speed; 751 + 752 + if (!err) { 753 + err = mana_set_bw_clamp(apc, rate, TRI_STATE_TRUE); 754 + apc->speed = (err) ? old_speed : rate; 755 + apc->handle = (err) ? apc->handle : shaper->handle; 756 + } 757 + 758 + return err; 759 + } 760 + 761 + static int mana_shaper_del(struct net_shaper_binding *binding, 762 + const struct net_shaper_handle *handle, 763 + struct netlink_ext_ack *extack) 764 + { 765 + struct mana_port_context *apc = netdev_priv(binding->netdev); 766 + int err; 767 + 768 + err = mana_set_bw_clamp(apc, 0, TRI_STATE_FALSE); 769 + 770 + if (!err) { 771 + /* Reset mana port context parameters */ 772 + apc->handle.id = 0; 773 + apc->handle.scope = NET_SHAPER_SCOPE_UNSPEC; 774 + apc->speed = 0; 775 + } 776 + 777 + return err; 778 + } 779 + 780 + static void mana_shaper_cap(struct net_shaper_binding *binding, 781 + enum net_shaper_scope scope, 782 + unsigned long *flags) 783 + { 784 + *flags = BIT(NET_SHAPER_A_CAPS_SUPPORT_BW_MAX) | 785 + BIT(NET_SHAPER_A_CAPS_SUPPORT_METRIC_BPS); 786 + } 787 + 788 + static const struct net_shaper_ops mana_shaper_ops = { 789 + .set = mana_shaper_set, 790 + .delete = mana_shaper_del, 791 + .capabilities = mana_shaper_cap, 792 + }; 793 + 722 794 static const struct net_device_ops mana_devops = { 723 795 .ndo_open = mana_open, 724 796 .ndo_stop = mana_close, ··· 801 729 .ndo_bpf = mana_bpf, 802 730 .ndo_xdp_xmit = mana_xdp_xmit, 803 731 .ndo_change_mtu = mana_change_mtu, 732 + .net_shaper_ops = &mana_shaper_ops, 804 733 }; 805 734 806 735 static void mana_cleanup_port_context(struct mana_port_context *apc) ··· 847 774 err = mana_gd_send_request(gc, in_len, in_buf, out_len, 848 775 out_buf); 849 776 if (err || resp->status) { 777 + if (err == -EOPNOTSUPP) 778 + return err; 779 + 850 780 if (req->req.msg_type != MANA_QUERY_PHY_STAT) 851 781 dev_err(dev, "Failed to send mana message: %d, 0x%x\n", 852 782 err, resp->status); ··· 1236 1160 out: 1237 1161 kfree(req); 1238 1162 return err; 1163 + } 1164 + 1165 + int mana_query_link_cfg(struct mana_port_context *apc) 1166 + { 1167 + struct net_device *ndev = apc->ndev; 1168 + struct mana_query_link_config_resp resp = {}; 1169 + struct mana_query_link_config_req req = {}; 1170 + int err; 1171 + 1172 + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_LINK_CONFIG, 1173 + sizeof(req), sizeof(resp)); 1174 + 1175 + req.vport = apc->port_handle; 1176 + req.hdr.resp.msg_version = GDMA_MESSAGE_V2; 1177 + 1178 + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1179 + sizeof(resp)); 1180 + 1181 + if (err) { 1182 + if (err == -EOPNOTSUPP) { 1183 + netdev_info_once(ndev, "MANA_QUERY_LINK_CONFIG not supported\n"); 1184 + return err; 1185 + } 1186 + netdev_err(ndev, "Failed to query link config: %d\n", err); 1187 + return err; 1188 + } 1189 + 1190 + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_LINK_CONFIG, 1191 + sizeof(resp)); 1192 + 1193 + if (err || resp.hdr.status) { 1194 + netdev_err(ndev, "Failed to query link config: %d, 0x%x\n", err, 1195 + resp.hdr.status); 1196 + if (!err) 1197 + err = -EOPNOTSUPP; 1198 + return err; 1199 + } 1200 + 1201 + if (resp.qos_unconfigured) { 1202 + err = -EINVAL; 1203 + return err; 1204 + } 1205 + apc->speed = resp.link_speed_mbps; 1206 + apc->max_speed = resp.qos_speed_mbps; 1207 + return 0; 1208 + } 1209 + 1210 + int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed, 1211 + int enable_clamping) 1212 + { 1213 + struct mana_set_bw_clamp_resp resp = {}; 1214 + struct mana_set_bw_clamp_req req = {}; 1215 + struct net_device *ndev = apc->ndev; 1216 + int err; 1217 + 1218 + mana_gd_init_req_hdr(&req.hdr, MANA_SET_BW_CLAMP, 1219 + sizeof(req), sizeof(resp)); 1220 + req.vport = apc->port_handle; 1221 + req.link_speed_mbps = speed; 1222 + req.enable_clamping = enable_clamping; 1223 + 1224 + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, 1225 + sizeof(resp)); 1226 + 1227 + if (err) { 1228 + if (err == -EOPNOTSUPP) { 1229 + netdev_info_once(ndev, "MANA_SET_BW_CLAMP not supported\n"); 1230 + return err; 1231 + } 1232 + netdev_err(ndev, "Failed to set bandwidth clamp for speed %u, err = %d", 1233 + speed, err); 1234 + return err; 1235 + } 1236 + 1237 + err = mana_verify_resp_hdr(&resp.hdr, MANA_SET_BW_CLAMP, 1238 + sizeof(resp)); 1239 + 1240 + if (err || resp.hdr.status) { 1241 + netdev_err(ndev, "Failed to set bandwidth clamp: %d, 0x%x\n", err, 1242 + resp.hdr.status); 1243 + if (!err) 1244 + err = -EOPNOTSUPP; 1245 + return err; 1246 + } 1247 + 1248 + if (resp.qos_unconfigured) 1249 + netdev_info(ndev, "QoS is unconfigured\n"); 1250 + 1251 + return 0; 1239 1252 } 1240 1253 1241 1254 int mana_create_wq_obj(struct mana_port_context *apc, ··· 2077 1912 napi = &apc->tx_qp[i].tx_cq.napi; 2078 1913 if (apc->tx_qp[i].txq.napi_initialized) { 2079 1914 napi_synchronize(napi); 2080 - napi_disable(napi); 2081 - netif_napi_del(napi); 1915 + netdev_lock_ops_to_full(napi->dev); 1916 + napi_disable_locked(napi); 1917 + netif_napi_del_locked(napi); 1918 + netdev_unlock_full_to_ops(napi->dev); 2082 1919 apc->tx_qp[i].txq.napi_initialized = false; 2083 1920 } 2084 1921 mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); ··· 2232 2065 2233 2066 mana_create_txq_debugfs(apc, i); 2234 2067 2235 - netif_napi_add_tx(net, &cq->napi, mana_poll); 2236 - napi_enable(&cq->napi); 2068 + set_bit(NAPI_STATE_NO_BUSY_POLL, &cq->napi.state); 2069 + netdev_lock_ops_to_full(net); 2070 + netif_napi_add_locked(net, &cq->napi, mana_poll); 2071 + napi_enable_locked(&cq->napi); 2072 + netdev_unlock_full_to_ops(net); 2237 2073 txq->napi_initialized = true; 2238 2074 2239 2075 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); ··· 2272 2102 if (napi_initialized) { 2273 2103 napi_synchronize(napi); 2274 2104 2275 - napi_disable(napi); 2276 - 2277 - netif_napi_del(napi); 2105 + netdev_lock_ops_to_full(napi->dev); 2106 + napi_disable_locked(napi); 2107 + netif_napi_del_locked(napi); 2108 + netdev_unlock_full_to_ops(napi->dev); 2278 2109 } 2279 2110 xdp_rxq_info_unreg(&rxq->xdp_rxq); 2280 2111 ··· 2526 2355 2527 2356 gc->cq_table[cq->gdma_id] = cq->gdma_cq; 2528 2357 2529 - netif_napi_add_weight(ndev, &cq->napi, mana_poll, 1); 2358 + netdev_lock_ops_to_full(ndev); 2359 + netif_napi_add_weight_locked(ndev, &cq->napi, mana_poll, 1); 2360 + netdev_unlock_full_to_ops(ndev); 2530 2361 2531 2362 WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx, 2532 2363 cq->napi.napi_id)); 2533 2364 WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL, 2534 2365 rxq->page_pool)); 2535 2366 2536 - napi_enable(&cq->napi); 2367 + netdev_lock_ops_to_full(ndev); 2368 + napi_enable_locked(&cq->napi); 2369 + netdev_unlock_full_to_ops(ndev); 2537 2370 2538 2371 mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); 2539 2372 out: ··· 3175 3000 netdev_err(ndev, "Unable to register netdev.\n"); 3176 3001 goto free_indir; 3177 3002 } 3003 + 3004 + debugfs_create_u32("current_speed", 0400, apc->mana_port_debugfs, &apc->speed); 3178 3005 3179 3006 return 0; 3180 3007
+6
drivers/net/ethernet/microsoft/mana/mana_ethtool.c
··· 495 495 static int mana_get_link_ksettings(struct net_device *ndev, 496 496 struct ethtool_link_ksettings *cmd) 497 497 { 498 + struct mana_port_context *apc = netdev_priv(ndev); 499 + int err; 500 + 501 + err = mana_query_link_cfg(apc); 502 + cmd->base.speed = (err) ? SPEED_UNKNOWN : apc->max_speed; 503 + 498 504 cmd->base.duplex = DUPLEX_FULL; 499 505 cmd->base.port = PORT_OTHER; 500 506
+1
include/net/mana/gdma.h
··· 10 10 #include "shm_channel.h" 11 11 12 12 #define GDMA_STATUS_MORE_ENTRIES 0x00000105 13 + #define GDMA_STATUS_CMD_UNSUPPORTED 0xffffffff 13 14 14 15 /* Structures labeled with "HW DATA" are exchanged with the hardware. All of 15 16 * them are naturally aligned and hence don't need __packed.
+42
include/net/mana/mana.h
··· 5 5 #define _MANA_H 6 6 7 7 #include <net/xdp.h> 8 + #include <net/net_shaper.h> 8 9 9 10 #include "gdma.h" 10 11 #include "hw_channel.h" ··· 527 526 struct mutex vport_mutex; 528 527 int vport_use_count; 529 528 529 + /* Net shaper handle*/ 530 + struct net_shaper_handle handle; 531 + 530 532 u16 port_idx; 533 + /* Currently configured speed (mbps) */ 534 + u32 speed; 535 + /* Maximum speed supported by the SKU (mbps) */ 536 + u32 max_speed; 531 537 532 538 bool port_is_up; 533 539 bool port_st_save; /* Saved port state */ ··· 570 562 void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); 571 563 int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); 572 564 void mana_query_gf_stats(struct mana_port_context *apc); 565 + int mana_query_link_cfg(struct mana_port_context *apc); 566 + int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed, 567 + int enable_clamping); 573 568 void mana_query_phy_stats(struct mana_port_context *apc); 574 569 int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues); 575 570 void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); ··· 600 589 MANA_FENCE_RQ = 0x20006, 601 590 MANA_CONFIG_VPORT_RX = 0x20007, 602 591 MANA_QUERY_VPORT_CONFIG = 0x20008, 592 + MANA_QUERY_LINK_CONFIG = 0x2000A, 593 + MANA_SET_BW_CLAMP = 0x2000B, 603 594 MANA_QUERY_PHY_STAT = 0x2000c, 604 595 605 596 /* Privileged commands for the PF mode */ ··· 610 597 MANA_REGISTER_HW_PORT = 0x28003, 611 598 MANA_DEREGISTER_HW_PORT = 0x28004, 612 599 }; 600 + 601 + /* Query Link Configuration*/ 602 + struct mana_query_link_config_req { 603 + struct gdma_req_hdr hdr; 604 + mana_handle_t vport; 605 + }; /* HW DATA */ 606 + 607 + struct mana_query_link_config_resp { 608 + struct gdma_resp_hdr hdr; 609 + u32 qos_speed_mbps; 610 + u8 qos_unconfigured; 611 + u8 reserved1[3]; 612 + u32 link_speed_mbps; 613 + u8 reserved2[4]; 614 + }; /* HW DATA */ 615 + 616 + /* Set Bandwidth Clamp*/ 617 + struct mana_set_bw_clamp_req { 618 + struct gdma_req_hdr hdr; 619 + mana_handle_t vport; 620 + enum TRI_STATE enable_clamping; 621 + u32 link_speed_mbps; 622 + }; /* HW DATA */ 623 + 624 + struct mana_set_bw_clamp_resp { 625 + struct gdma_resp_hdr hdr; 626 + u8 qos_unconfigured; 627 + u8 reserved[7]; 628 + }; /* HW DATA */ 613 629 614 630 /* Query Device Configuration */ 615 631 struct mana_query_device_cfg_req {