Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'add-ethtool-support-for-completion-queue-event-size'

Subbaraya Sundeep says:

====================
Add ethtool support for completion queue event size

After a packet is sent or received by NIC then NIC posts
a completion queue event which consists of transmission status
(like send success or error) and received status(like
pointers to packet fragments). These completion events may
also use a ring similar to rx and tx rings. This patchset
introduces cqe-size ethtool parameter to modify the size
of the completion queue event if NIC hardware has that capability.
A bigger completion queue event can have more receive buffer pointers
inturn NIC can transfer a bigger frame from wire as long as
hardware(MAC) receive frame size limit is not exceeded.

Patch 1 adds support setting/getting cqe-size via
ethtool -G and ethtool -g.

Patch 2 includes octeontx2 driver changes to use
completion queue event size set from ethtool -G.
====================

Link: https://lore.kernel.org/r/1645555153-4932-1-git-send-email-sbhatta@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+55 -8
+11
Documentation/networking/ethtool-netlink.rst
··· 861 861 ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring 862 862 ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring 863 863 ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split 864 + ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE 864 865 ==================================== ====== =========================== 865 866 866 867 ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with ··· 886 885 ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring 887 886 ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring 888 887 ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring 888 + ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE 889 889 ==================================== ====== =========================== 890 890 891 891 Kernel checks that requested ring sizes do not exceed limits reported by 892 892 driver. Driver may impose additional constraints and may not suspport all 893 893 attributes. 894 894 895 + 896 + ``ETHTOOL_A_RINGS_CQE_SIZE`` specifies the completion queue event size. 897 + Completion queue events(CQE) are the events posted by NIC to indicate the 898 + completion status of a packet when the packet is sent(like send success or 899 + error) or received(like pointers to packet fragments). The CQE size parameter 900 + enables to modify the CQE size other than default size if NIC supports it. 901 + A bigger CQE can have more receive buffer pointers inturn NIC can transfer 902 + a bigger frame from wire. Based on the NIC hardware, the overall completion 903 + queue size can be adjusted in the driver if CQE size is modified. 895 904 896 905 CHANNELS_GET 897 906 ============
+2 -2
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
··· 1048 1048 struct nix_lf_alloc_rsp *rsp; 1049 1049 int err; 1050 1050 1051 - pfvf->qset.xqe_size = NIX_XQESZ_W16 ? 128 : 512; 1051 + pfvf->qset.xqe_size = pfvf->hw.xqe_size; 1052 1052 1053 1053 /* Get memory to put this msg */ 1054 1054 nixlf = otx2_mbox_alloc_msg_nix_lf_alloc(&pfvf->mbox); ··· 1061 1061 nixlf->cq_cnt = pfvf->qset.cq_cnt; 1062 1062 nixlf->rss_sz = MAX_RSS_INDIR_TBL_SIZE; 1063 1063 nixlf->rss_grps = MAX_RSS_GROUPS; 1064 - nixlf->xqe_sz = NIX_XQESZ_W16; 1064 + nixlf->xqe_sz = pfvf->hw.xqe_size == 128 ? NIX_XQESZ_W16 : NIX_XQESZ_W64; 1065 1065 /* We don't know absolute NPA LF idx attached. 1066 1066 * AF will replace 'RVU_DEFAULT_PF_FUNC' with 1067 1067 * NPA LF attached to this RVU PF/VF.
+1
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
··· 181 181 182 182 #define OTX2_DEFAULT_RBUF_LEN 2048 183 183 u16 rbuf_len; 184 + u32 xqe_size; 184 185 185 186 /* NPA */ 186 187 u32 stack_pg_ptrs; /* No of ptrs per stack page */
+14 -3
drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
··· 372 372 ring->tx_max_pending = Q_COUNT(Q_SIZE_MAX); 373 373 ring->tx_pending = qs->sqe_cnt ? qs->sqe_cnt : Q_COUNT(Q_SIZE_4K); 374 374 kernel_ring->rx_buf_len = pfvf->hw.rbuf_len; 375 + kernel_ring->cqe_size = pfvf->hw.xqe_size; 375 376 } 376 377 377 378 static int otx2_set_ringparam(struct net_device *netdev, ··· 383 382 struct otx2_nic *pfvf = netdev_priv(netdev); 384 383 u32 rx_buf_len = kernel_ring->rx_buf_len; 385 384 u32 old_rx_buf_len = pfvf->hw.rbuf_len; 385 + u32 xqe_size = kernel_ring->cqe_size; 386 386 bool if_up = netif_running(netdev); 387 387 struct otx2_qset *qs = &pfvf->qset; 388 388 u32 rx_count, tx_count; ··· 397 395 if (rx_buf_len && (rx_buf_len < 1536 || rx_buf_len > 32768)) { 398 396 netdev_err(netdev, 399 397 "Receive buffer range is 1536 - 32768"); 398 + return -EINVAL; 399 + } 400 + 401 + if (xqe_size != 128 && xqe_size != 512) { 402 + netdev_err(netdev, 403 + "Completion event size must be 128 or 512"); 400 404 return -EINVAL; 401 405 } 402 406 ··· 424 416 tx_count = Q_COUNT(Q_SIZE(tx_count, 3)); 425 417 426 418 if (tx_count == qs->sqe_cnt && rx_count == qs->rqe_cnt && 427 - rx_buf_len == old_rx_buf_len) 419 + rx_buf_len == old_rx_buf_len && xqe_size == pfvf->hw.xqe_size) 428 420 return 0; 429 421 430 422 if (if_up) ··· 435 427 qs->rqe_cnt = rx_count; 436 428 437 429 pfvf->hw.rbuf_len = rx_buf_len; 430 + pfvf->hw.xqe_size = xqe_size; 438 431 439 432 if (if_up) 440 433 return netdev->netdev_ops->ndo_open(netdev); ··· 1231 1222 static const struct ethtool_ops otx2_ethtool_ops = { 1232 1223 .supported_coalesce_params = ETHTOOL_COALESCE_USECS | 1233 1224 ETHTOOL_COALESCE_MAX_FRAMES, 1234 - .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN, 1225 + .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN | 1226 + ETHTOOL_RING_USE_CQE_SIZE, 1235 1227 .get_link = otx2_get_link, 1236 1228 .get_drvinfo = otx2_get_drvinfo, 1237 1229 .get_strings = otx2_get_strings, ··· 1352 1342 static const struct ethtool_ops otx2vf_ethtool_ops = { 1353 1343 .supported_coalesce_params = ETHTOOL_COALESCE_USECS | 1354 1344 ETHTOOL_COALESCE_MAX_FRAMES, 1355 - .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN, 1345 + .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN | 1346 + ETHTOOL_RING_USE_CQE_SIZE, 1356 1347 .get_link = otx2_get_link, 1357 1348 .get_drvinfo = otx2vf_get_drvinfo, 1358 1349 .get_strings = otx2vf_get_strings,
+2
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
··· 2585 2585 hw->tot_tx_queues = qcount; 2586 2586 hw->max_queues = qcount; 2587 2587 hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; 2588 + /* Use CQE of 128 byte descriptor size by default */ 2589 + hw->xqe_size = 128; 2588 2590 2589 2591 num_vec = pci_msix_vec_count(pdev); 2590 2592 hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE,
+2
drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
··· 572 572 hw->max_queues = qcount; 573 573 hw->tot_tx_queues = qcount; 574 574 hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; 575 + /* Use CQE of 128 byte descriptor size by default */ 576 + hw->xqe_size = 128; 575 577 576 578 hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE, 577 579 GFP_KERNEL);
+4
include/linux/ethtool.h
··· 71 71 * struct kernel_ethtool_ringparam - RX/TX ring configuration 72 72 * @rx_buf_len: Current length of buffers on the rx ring. 73 73 * @tcp_data_split: Scatter packet headers and data to separate buffers 74 + * @cqe_size: Size of TX/RX completion queue event 74 75 */ 75 76 struct kernel_ethtool_ringparam { 76 77 u32 rx_buf_len; 77 78 u8 tcp_data_split; 79 + u32 cqe_size; 78 80 }; 79 81 80 82 /** 81 83 * enum ethtool_supported_ring_param - indicator caps for setting ring params 82 84 * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len 85 + * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size 83 86 */ 84 87 enum ethtool_supported_ring_param { 85 88 ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), 89 + ETHTOOL_RING_USE_CQE_SIZE = BIT(1), 86 90 }; 87 91 88 92 #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit))
+1
include/uapi/linux/ethtool_netlink.h
··· 337 337 ETHTOOL_A_RINGS_TX, /* u32 */ 338 338 ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ 339 339 ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ 340 + ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ 340 341 341 342 /* add new constants above here */ 342 343 __ETHTOOL_A_RINGS_CNT,
+1 -1
net/ethtool/netlink.h
··· 363 363 extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; 364 364 extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; 365 365 extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; 366 - extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_BUF_LEN + 1]; 366 + extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_CQE_SIZE + 1]; 367 367 extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; 368 368 extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; 369 369 extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
+17 -2
net/ethtool/rings.c
··· 54 54 nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */ 55 55 nla_total_size(sizeof(u32)) + /* _RINGS_TX */ 56 56 nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */ 57 - nla_total_size(sizeof(u8)); /* _RINGS_TCP_DATA_SPLIT */ 57 + nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */ 58 + nla_total_size(sizeof(u32)); /* _RINGS_CQE_SIZE */ 58 59 } 59 60 60 61 static int rings_fill_reply(struct sk_buff *skb, ··· 92 91 (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, kr->rx_buf_len))) || 93 92 (kr->tcp_data_split && 94 93 (nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT, 95 - kr->tcp_data_split)))) 94 + kr->tcp_data_split))) || 95 + (kr->cqe_size && 96 + (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size)))) 96 97 return -EMSGSIZE; 97 98 98 99 return 0; ··· 122 119 [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 }, 123 120 [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, 124 121 [ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1), 122 + [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1), 125 123 }; 126 124 127 125 int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) ··· 163 159 ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod); 164 160 ethnl_update_u32(&kernel_ringparam.rx_buf_len, 165 161 tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod); 162 + ethnl_update_u32(&kernel_ringparam.cqe_size, 163 + tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod); 166 164 ret = 0; 167 165 if (!mod) 168 166 goto out_ops; ··· 193 187 NL_SET_ERR_MSG_ATTR(info->extack, 194 188 tb[ETHTOOL_A_RINGS_RX_BUF_LEN], 195 189 "setting rx buf len not supported"); 190 + goto out_ops; 191 + } 192 + 193 + if (kernel_ringparam.cqe_size && 194 + !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) { 195 + ret = -EOPNOTSUPP; 196 + NL_SET_ERR_MSG_ATTR(info->extack, 197 + tb[ETHTOOL_A_RINGS_CQE_SIZE], 198 + "setting cqe size not supported"); 196 199 goto out_ops; 197 200 } 198 201