Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'blk-dim-v2' into rdma.git for-next

Generic DIM

From: Tal Gilboa and Yamin Fridman

Implement net DIM over a generic DIM library, add RDMA DIM

dim.h lib exposes an implementation of the DIM algorithm for
dynamically-tuned interrupt moderation for networking interfaces.

We want a similar functionality for other protocols, which might need to
optimize interrupts differently. Main motivation here is DIM for NVMf
storage protocol.

Current DIM implementation prioritizes reducing interrupt overhead over
latency. Also, in order to reduce DIM's own overhead, the algorithm might
take some time to identify it needs to change profiles. While this is
acceptable for networking, it might not work well on other scenarios.

Here we propose a new structure to DIM. The idea is to allow a slightly
modified functionality without the risk of breaking Net DIM behavior for
netdev. We verified there are no degradations in current DIM behavior with
the modified solution.

Suggested solution:
- Common logic is implemented in lib/dim/dim.c
- Net DIM (existing) logic is implemented in lib/dim/net_dim.c, which uses
the common logic in dim.c
- Any new DIM logic will be implemented in "lib/dim/new_dim.c".
This new implementation will expose modified versions of profiles,
dim_step() and dim_decision().
- DIM API is declared in include/linux/dim.h for all implementations.

Pros for this solution are:
- Zero impact on existing net_dim implementation and usage
- Relatively more code reuse (compared to two separate solutions)
- Increased extensibility

Required for dependencies in the next series.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

+728 -489
+2 -1
MAINTAINERS
··· 5600 5600 DYNAMIC INTERRUPT MODERATION 5601 5601 M: Tal Gilboa <talgi@mellanox.com> 5602 5602 S: Maintained 5603 - F: include/linux/net_dim.h 5603 + F: include/linux/dim.h 5604 + F: lib/dim/ 5604 5605 5605 5606 DZ DECSTATION DZ11 SERIAL DRIVER 5606 5607 M: "Maciej W. Rozycki" <macro@linux-mips.org>
+1
drivers/net/ethernet/broadcom/Kconfig
··· 8 8 default y 9 9 depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \ 10 10 SIBYTE_SB1xxx_SOC 11 + select DIMLIB 11 12 ---help--- 12 13 If you have a network (Ethernet) chipset belonging to this class, 13 14 say Y.
+10 -10
drivers/net/ethernet/broadcom/bcmsysport.c
··· 609 609 struct ethtool_coalesce *ec) 610 610 { 611 611 struct bcm_sysport_priv *priv = netdev_priv(dev); 612 - struct net_dim_cq_moder moder; 612 + struct dim_cq_moder moder; 613 613 u32 usecs, pkts; 614 614 unsigned int i; 615 615 ··· 992 992 { 993 993 struct bcm_sysport_priv *priv = 994 994 container_of(napi, struct bcm_sysport_priv, napi); 995 - struct net_dim_sample dim_sample; 995 + struct dim_sample dim_sample; 996 996 unsigned int work_done = 0; 997 997 998 998 work_done = bcm_sysport_desc_rx(priv, budget); ··· 1016 1016 } 1017 1017 1018 1018 if (priv->dim.use_dim) { 1019 - net_dim_sample(priv->dim.event_ctr, priv->dim.packets, 1020 - priv->dim.bytes, &dim_sample); 1019 + dim_update_sample(priv->dim.event_ctr, priv->dim.packets, 1020 + priv->dim.bytes, &dim_sample); 1021 1021 net_dim(&priv->dim.dim, dim_sample); 1022 1022 } 1023 1023 ··· 1087 1087 1088 1088 static void bcm_sysport_dim_work(struct work_struct *work) 1089 1089 { 1090 - struct net_dim *dim = container_of(work, struct net_dim, work); 1090 + struct dim *dim = container_of(work, struct dim, work); 1091 1091 struct bcm_sysport_net_dim *ndim = 1092 1092 container_of(dim, struct bcm_sysport_net_dim, dim); 1093 1093 struct bcm_sysport_priv *priv = 1094 1094 container_of(ndim, struct bcm_sysport_priv, dim); 1095 - struct net_dim_cq_moder cur_profile = 1096 - net_dim_get_rx_moderation(dim->mode, dim->profile_ix); 1095 + struct dim_cq_moder cur_profile = net_dim_get_rx_moderation(dim->mode, 1096 + dim->profile_ix); 1097 1097 1098 1098 bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts); 1099 - dim->state = NET_DIM_START_MEASURE; 1099 + dim->state = DIM_START_MEASURE; 1100 1100 } 1101 1101 1102 1102 /* RX and misc interrupt routine */ ··· 1437 1437 struct bcm_sysport_net_dim *dim = &priv->dim; 1438 1438 1439 1439 INIT_WORK(&dim->dim.work, cb); 1440 - dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; 1440 + dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; 1441 1441 dim->event_ctr = 0; 1442 1442 dim->packets = 0; 1443 1443 dim->bytes = 0; ··· 1446 1446 static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv) 1447 1447 { 1448 1448 struct bcm_sysport_net_dim *dim = &priv->dim; 1449 - struct net_dim_cq_moder moder; 1449 + struct dim_cq_moder moder; 1450 1450 u32 usecs, pkts; 1451 1451 1452 1452 usecs = priv->rx_coalesce_usecs;
+2 -2
drivers/net/ethernet/broadcom/bcmsysport.h
··· 11 11 #include <linux/bitmap.h> 12 12 #include <linux/ethtool.h> 13 13 #include <linux/if_vlan.h> 14 - #include <linux/net_dim.h> 14 + #include <linux/dim.h> 15 15 16 16 /* Receive/transmit descriptor format */ 17 17 #define DESC_ADDR_HI_STATUS_LEN 0x00 ··· 702 702 u16 event_ctr; 703 703 unsigned long packets; 704 704 unsigned long bytes; 705 - struct net_dim dim; 705 + struct dim dim; 706 706 }; 707 707 708 708 /* Software view of the TX ring */
+6 -6
drivers/net/ethernet/broadcom/bnxt/bnxt.c
··· 2130 2130 } 2131 2131 } 2132 2132 if (bp->flags & BNXT_FLAG_DIM) { 2133 - struct net_dim_sample dim_sample; 2133 + struct dim_sample dim_sample; 2134 2134 2135 - net_dim_sample(cpr->event_ctr, 2136 - cpr->rx_packets, 2137 - cpr->rx_bytes, 2138 - &dim_sample); 2135 + dim_update_sample(cpr->event_ctr, 2136 + cpr->rx_packets, 2137 + cpr->rx_bytes, 2138 + &dim_sample); 2139 2139 net_dim(&cpr->dim, dim_sample); 2140 2140 } 2141 2141 return work_done; ··· 7813 7813 7814 7814 if (bp->bnapi[i]->rx_ring) { 7815 7815 INIT_WORK(&cpr->dim.work, bnxt_dim_work); 7816 - cpr->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; 7816 + cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; 7817 7817 } 7818 7818 napi_enable(&bp->bnapi[i]->napi); 7819 7819 }
+2 -2
drivers/net/ethernet/broadcom/bnxt/bnxt.h
··· 24 24 #include <net/devlink.h> 25 25 #include <net/dst_metadata.h> 26 26 #include <net/xdp.h> 27 - #include <linux/net_dim.h> 27 + #include <linux/dim.h> 28 28 29 29 struct tx_bd { 30 30 __le32 tx_bd_len_flags_type; ··· 810 810 u64 rx_bytes; 811 811 u64 event_ctr; 812 812 813 - struct net_dim dim; 813 + struct dim dim; 814 814 815 815 union { 816 816 struct tx_cmp *cp_desc_ring[MAX_CP_PAGES];
+3 -3
drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c
··· 11 11 #include <linux/module.h> 12 12 #include <linux/pci.h> 13 13 #include "bnxt_hsi.h" 14 - #include <linux/net_dim.h> 14 + #include <linux/dim.h> 15 15 #include "bnxt.h" 16 16 #include "bnxt_debugfs.h" 17 17 ··· 21 21 char __user *buffer, 22 22 size_t count, loff_t *ppos) 23 23 { 24 - struct net_dim *dim = filep->private_data; 24 + struct dim *dim = filep->private_data; 25 25 int len; 26 26 char *buf; 27 27 ··· 61 61 .read = debugfs_dim_read, 62 62 }; 63 63 64 - static struct dentry *debugfs_dim_ring_init(struct net_dim *dim, int ring_idx, 64 + static struct dentry *debugfs_dim_ring_init(struct dim *dim, int ring_idx, 65 65 struct dentry *dd) 66 66 { 67 67 static char qname[16];
+4 -5
drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c
··· 7 7 * the Free Software Foundation. 8 8 */ 9 9 10 - #include <linux/net_dim.h> 10 + #include <linux/dim.h> 11 11 #include "bnxt_hsi.h" 12 12 #include "bnxt.h" 13 13 14 14 void bnxt_dim_work(struct work_struct *work) 15 15 { 16 - struct net_dim *dim = container_of(work, struct net_dim, 17 - work); 16 + struct dim *dim = container_of(work, struct dim, work); 18 17 struct bnxt_cp_ring_info *cpr = container_of(dim, 19 18 struct bnxt_cp_ring_info, 20 19 dim); 21 20 struct bnxt_napi *bnapi = container_of(cpr, 22 21 struct bnxt_napi, 23 22 cp_ring); 24 - struct net_dim_cq_moder cur_moder = 23 + struct dim_cq_moder cur_moder = 25 24 net_dim_get_rx_moderation(dim->mode, dim->profile_ix); 26 25 27 26 cpr->rx_ring_coal.coal_ticks = cur_moder.usec; 28 27 cpr->rx_ring_coal.coal_bufs = cur_moder.pkts; 29 28 30 29 bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi); 31 - dim->state = NET_DIM_START_MEASURE; 30 + dim->state = DIM_START_MEASURE; 32 31 }
+9 -9
drivers/net/ethernet/broadcom/genet/bcmgenet.c
··· 640 640 static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring, 641 641 struct ethtool_coalesce *ec) 642 642 { 643 - struct net_dim_cq_moder moder; 643 + struct dim_cq_moder moder; 644 644 u32 usecs, pkts; 645 645 646 646 ring->rx_coalesce_usecs = ec->rx_coalesce_usecs; ··· 1895 1895 { 1896 1896 struct bcmgenet_rx_ring *ring = container_of(napi, 1897 1897 struct bcmgenet_rx_ring, napi); 1898 - struct net_dim_sample dim_sample; 1898 + struct dim_sample dim_sample; 1899 1899 unsigned int work_done; 1900 1900 1901 1901 work_done = bcmgenet_desc_rx(ring, budget); ··· 1906 1906 } 1907 1907 1908 1908 if (ring->dim.use_dim) { 1909 - net_dim_sample(ring->dim.event_ctr, ring->dim.packets, 1910 - ring->dim.bytes, &dim_sample); 1909 + dim_update_sample(ring->dim.event_ctr, ring->dim.packets, 1910 + ring->dim.bytes, &dim_sample); 1911 1911 net_dim(&ring->dim.dim, dim_sample); 1912 1912 } 1913 1913 ··· 1916 1916 1917 1917 static void bcmgenet_dim_work(struct work_struct *work) 1918 1918 { 1919 - struct net_dim *dim = container_of(work, struct net_dim, work); 1919 + struct dim *dim = container_of(work, struct dim, work); 1920 1920 struct bcmgenet_net_dim *ndim = 1921 1921 container_of(dim, struct bcmgenet_net_dim, dim); 1922 1922 struct bcmgenet_rx_ring *ring = 1923 1923 container_of(ndim, struct bcmgenet_rx_ring, dim); 1924 - struct net_dim_cq_moder cur_profile = 1924 + struct dim_cq_moder cur_profile = 1925 1925 net_dim_get_rx_moderation(dim->mode, dim->profile_ix); 1926 1926 1927 1927 bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts); 1928 - dim->state = NET_DIM_START_MEASURE; 1928 + dim->state = DIM_START_MEASURE; 1929 1929 } 1930 1930 1931 1931 /* Assign skb to RX DMA descriptor. */ ··· 2082 2082 struct bcmgenet_net_dim *dim = &ring->dim; 2083 2083 2084 2084 INIT_WORK(&dim->dim.work, cb); 2085 - dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; 2085 + dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; 2086 2086 dim->event_ctr = 0; 2087 2087 dim->packets = 0; 2088 2088 dim->bytes = 0; ··· 2091 2091 static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring) 2092 2092 { 2093 2093 struct bcmgenet_net_dim *dim = &ring->dim; 2094 - struct net_dim_cq_moder moder; 2094 + struct dim_cq_moder moder; 2095 2095 u32 usecs, pkts; 2096 2096 2097 2097 usecs = ring->rx_coalesce_usecs;
+2 -2
drivers/net/ethernet/broadcom/genet/bcmgenet.h
··· 13 13 #include <linux/mii.h> 14 14 #include <linux/if_vlan.h> 15 15 #include <linux/phy.h> 16 - #include <linux/net_dim.h> 16 + #include <linux/dim.h> 17 17 18 18 /* total number of Buffer Descriptors, same for Rx/Tx */ 19 19 #define TOTAL_DESC 256 ··· 578 578 u16 event_ctr; 579 579 unsigned long packets; 580 580 unsigned long bytes; 581 - struct net_dim dim; 581 + struct dim dim; 582 582 }; 583 583 584 584 struct bcmgenet_rx_ring {
+1
drivers/net/ethernet/mellanox/mlx5/core/Kconfig
··· 34 34 depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE 35 35 depends on IPV6=y || IPV6=n || MLX5_CORE=m 36 36 select PAGE_POOL 37 + select DIMLIB 37 38 default n 38 39 ---help--- 39 40 Ethernet support in Mellanox Technologies ConnectX-4 NIC.
+5 -5
drivers/net/ethernet/mellanox/mlx5/core/en.h
··· 48 48 #include <linux/rhashtable.h> 49 49 #include <net/switchdev.h> 50 50 #include <net/xdp.h> 51 - #include <linux/net_dim.h> 51 + #include <linux/dim.h> 52 52 #include <linux/bits.h> 53 53 #include "wq.h" 54 54 #include "mlx5_core.h" ··· 238 238 u16 num_channels; 239 239 u8 num_tc; 240 240 bool rx_cqe_compress_def; 241 - struct net_dim_cq_moder rx_cq_moderation; 242 - struct net_dim_cq_moder tx_cq_moderation; 243 241 bool tunneled_offload_en; 242 + struct dim_cq_moder rx_cq_moderation; 243 + struct dim_cq_moder tx_cq_moderation; 244 244 bool lro_en; 245 245 u8 tx_min_inline_mode; 246 246 bool vlan_strip_disable; ··· 356 356 /* dirtied @completion */ 357 357 u16 cc; 358 358 u32 dma_fifo_cc; 359 - struct net_dim dim; /* Adaptive Moderation */ 359 + struct dim dim; /* Adaptive Moderation */ 360 360 361 361 /* dirtied @xmit */ 362 362 u16 pc ____cacheline_aligned_in_smp; ··· 596 596 int ix; 597 597 unsigned int hw_mtu; 598 598 599 - struct net_dim dim; /* Dynamic Interrupt Moderation */ 599 + struct dim dim; /* Dynamic Interrupt Moderation */ 600 600 601 601 /* XDP */ 602 602 struct bpf_prog *xdp_prog;
+7 -7
drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
··· 30 30 * SOFTWARE. 31 31 */ 32 32 33 - #include <linux/net_dim.h> 33 + #include <linux/dim.h> 34 34 #include "en.h" 35 35 36 36 static void 37 - mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, 37 + mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder, 38 38 struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) 39 39 { 40 40 mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); 41 - dim->state = NET_DIM_START_MEASURE; 41 + dim->state = DIM_START_MEASURE; 42 42 } 43 43 44 44 void mlx5e_rx_dim_work(struct work_struct *work) 45 45 { 46 - struct net_dim *dim = container_of(work, struct net_dim, work); 46 + struct dim *dim = container_of(work, struct dim, work); 47 47 struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); 48 - struct net_dim_cq_moder cur_moder = 48 + struct dim_cq_moder cur_moder = 49 49 net_dim_get_rx_moderation(dim->mode, dim->profile_ix); 50 50 51 51 mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); ··· 53 53 54 54 void mlx5e_tx_dim_work(struct work_struct *work) 55 55 { 56 - struct net_dim *dim = container_of(work, struct net_dim, work); 56 + struct dim *dim = container_of(work, struct dim, work); 57 57 struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); 58 - struct net_dim_cq_moder cur_moder = 58 + struct dim_cq_moder cur_moder = 59 59 net_dim_get_tx_moderation(dim->mode, dim->profile_ix); 60 60 61 61 mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);
+2 -2
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
··· 466 466 int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, 467 467 struct ethtool_coalesce *coal) 468 468 { 469 - struct net_dim_cq_moder *rx_moder, *tx_moder; 469 + struct dim_cq_moder *rx_moder, *tx_moder; 470 470 471 471 if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) 472 472 return -EOPNOTSUPP; ··· 521 521 int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, 522 522 struct ethtool_coalesce *coal) 523 523 { 524 - struct net_dim_cq_moder *rx_moder, *tx_moder; 524 + struct dim_cq_moder *rx_moder, *tx_moder; 525 525 struct mlx5_core_dev *mdev = priv->mdev; 526 526 struct mlx5e_channels new_channels = {}; 527 527 int err = 0;
+11 -11
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 584 584 585 585 switch (params->rx_cq_moderation.cq_period_mode) { 586 586 case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: 587 - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE; 587 + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; 588 588 break; 589 589 case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: 590 590 default: 591 - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; 591 + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; 592 592 } 593 593 594 594 rq->page_cache.head = 0; ··· 1571 1571 } 1572 1572 1573 1573 static int mlx5e_open_cq(struct mlx5e_channel *c, 1574 - struct net_dim_cq_moder moder, 1574 + struct dim_cq_moder moder, 1575 1575 struct mlx5e_cq_param *param, 1576 1576 struct mlx5e_cq *cq) 1577 1577 { ··· 1776 1776 struct mlx5e_channel **cp) 1777 1777 { 1778 1778 int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); 1779 - struct net_dim_cq_moder icocq_moder = {0, 0}; 1779 + struct dim_cq_moder icocq_moder = {0, 0}; 1780 1780 struct net_device *netdev = priv->netdev; 1781 1781 struct mlx5e_channel *c; 1782 1782 unsigned int irq; ··· 2153 2153 2154 2154 mlx5e_build_common_cq_param(priv, param); 2155 2155 2156 - param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; 2156 + param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; 2157 2157 } 2158 2158 2159 2159 static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, ··· 4421 4421 link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; 4422 4422 } 4423 4423 4424 - static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) 4424 + static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) 4425 4425 { 4426 - struct net_dim_cq_moder moder; 4426 + struct dim_cq_moder moder; 4427 4427 4428 4428 moder.cq_period_mode = cq_period_mode; 4429 4429 moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; ··· 4434 4434 return moder; 4435 4435 } 4436 4436 4437 - static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) 4437 + static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) 4438 4438 { 4439 - struct net_dim_cq_moder moder; 4439 + struct dim_cq_moder moder; 4440 4440 4441 4441 moder.cq_period_mode = cq_period_mode; 4442 4442 moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; ··· 4450 4450 static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) 4451 4451 { 4452 4452 return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? 4453 - NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE : 4454 - NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; 4453 + DIM_CQ_PERIOD_MODE_START_FROM_CQE : 4454 + DIM_CQ_PERIOD_MODE_START_FROM_EQE; 4455 4455 } 4456 4456 4457 4457 void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+4 -6
drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
··· 48 48 static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) 49 49 { 50 50 struct mlx5e_sq_stats *stats = sq->stats; 51 - struct net_dim_sample dim_sample; 51 + struct dim_sample dim_sample; 52 52 53 53 if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state))) 54 54 return; 55 55 56 - net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes, 57 - &dim_sample); 56 + dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); 58 57 net_dim(&sq->dim, dim_sample); 59 58 } 60 59 61 60 static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) 62 61 { 63 62 struct mlx5e_rq_stats *stats = rq->stats; 64 - struct net_dim_sample dim_sample; 63 + struct dim_sample dim_sample; 65 64 66 65 if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state))) 67 66 return; 68 67 69 - net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes, 70 - &dim_sample); 68 + dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); 71 69 net_dim(&rq->dim, dim_sample); 72 70 } 73 71
+366
include/linux/dim.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 2 + /* Copyright (c) 2019 Mellanox Technologies. */ 3 + 4 + #ifndef DIM_H 5 + #define DIM_H 6 + 7 + #include <linux/module.h> 8 + 9 + /** 10 + * Number of events between DIM iterations. 11 + * Causes a moderation of the algorithm run. 12 + */ 13 + #define DIM_NEVENTS 64 14 + 15 + /** 16 + * Is a difference between values justifies taking an action. 17 + * We consider 10% difference as significant. 18 + */ 19 + #define IS_SIGNIFICANT_DIFF(val, ref) \ 20 + (((100UL * abs((val) - (ref))) / (ref)) > 10) 21 + 22 + /** 23 + * Calculate the gap between two values. 24 + * Take wrap-around and variable size into consideration. 25 + */ 26 + #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \ 27 + & (BIT_ULL(bits) - 1)) 28 + 29 + /** 30 + * Structure for CQ moderation values. 31 + * Used for communications between DIM and its consumer. 32 + * 33 + * @usec: CQ timer suggestion (by DIM) 34 + * @pkts: CQ packet counter suggestion (by DIM) 35 + * @cq_period_mode: CQ priod count mode (from CQE/EQE) 36 + */ 37 + struct dim_cq_moder { 38 + u16 usec; 39 + u16 pkts; 40 + u16 comps; 41 + u8 cq_period_mode; 42 + }; 43 + 44 + /** 45 + * Structure for DIM sample data. 46 + * Used for communications between DIM and its consumer. 47 + * 48 + * @time: Sample timestamp 49 + * @pkt_ctr: Number of packets 50 + * @byte_ctr: Number of bytes 51 + * @event_ctr: Number of events 52 + */ 53 + struct dim_sample { 54 + ktime_t time; 55 + u32 pkt_ctr; 56 + u32 byte_ctr; 57 + u16 event_ctr; 58 + u32 comp_ctr; 59 + }; 60 + 61 + /** 62 + * Structure for DIM stats. 63 + * Used for holding current measured rates. 64 + * 65 + * @ppms: Packets per msec 66 + * @bpms: Bytes per msec 67 + * @epms: Events per msec 68 + */ 69 + struct dim_stats { 70 + int ppms; /* packets per msec */ 71 + int bpms; /* bytes per msec */ 72 + int epms; /* events per msec */ 73 + int cpms; /* completions per msec */ 74 + int cpe_ratio; /* ratio of completions to events */ 75 + }; 76 + 77 + /** 78 + * Main structure for dynamic interrupt moderation (DIM). 79 + * Used for holding all information about a specific DIM instance. 80 + * 81 + * @state: Algorithm state (see below) 82 + * @prev_stats: Measured rates from previous iteration (for comparison) 83 + * @start_sample: Sampled data at start of current iteration 84 + * @work: Work to perform on action required 85 + * @profile_ix: Current moderation profile 86 + * @mode: CQ period count mode 87 + * @tune_state: Algorithm tuning state (see below) 88 + * @steps_right: Number of steps taken towards higher moderation 89 + * @steps_left: Number of steps taken towards lower moderation 90 + * @tired: Parking depth counter 91 + */ 92 + struct dim { 93 + u8 state; 94 + struct dim_stats prev_stats; 95 + struct dim_sample start_sample; 96 + struct dim_sample measuring_sample; 97 + struct work_struct work; 98 + u8 profile_ix; 99 + u8 mode; 100 + u8 tune_state; 101 + u8 steps_right; 102 + u8 steps_left; 103 + u8 tired; 104 + }; 105 + 106 + /** 107 + * enum dim_cq_period_mode 108 + * 109 + * These are the modes for CQ period count. 110 + * 111 + * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE 112 + * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset) 113 + * @DIM_CQ_PERIOD_NUM_MODES: Number of modes 114 + */ 115 + enum { 116 + DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, 117 + DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, 118 + DIM_CQ_PERIOD_NUM_MODES 119 + }; 120 + 121 + /** 122 + * enum dim_state 123 + * 124 + * These are the DIM algorithm states. 125 + * These will determine if the algorithm is in a valid state to start an iteration. 126 + * 127 + * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile) 128 + * @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if 129 + * need to perform an action 130 + * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure 131 + */ 132 + enum { 133 + DIM_START_MEASURE, 134 + DIM_MEASURE_IN_PROGRESS, 135 + DIM_APPLY_NEW_PROFILE, 136 + }; 137 + 138 + /** 139 + * enum dim_tune_state 140 + * 141 + * These are the DIM algorithm tune states. 142 + * These will determine which action the algorithm should perform. 143 + * 144 + * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference 145 + * @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0 146 + * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels 147 + * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels 148 + */ 149 + enum { 150 + DIM_PARKING_ON_TOP, 151 + DIM_PARKING_TIRED, 152 + DIM_GOING_RIGHT, 153 + DIM_GOING_LEFT, 154 + }; 155 + 156 + /** 157 + * enum dim_stats_state 158 + * 159 + * These are the DIM algorithm statistics states. 160 + * These will determine the verdict of current iteration. 161 + * 162 + * @DIM_STATS_WORSE: Current iteration shows worse performance than before 163 + * @DIM_STATS_WORSE: Current iteration shows same performance than before 164 + * @DIM_STATS_WORSE: Current iteration shows better performance than before 165 + */ 166 + enum { 167 + DIM_STATS_WORSE, 168 + DIM_STATS_SAME, 169 + DIM_STATS_BETTER, 170 + }; 171 + 172 + /** 173 + * enum dim_step_result 174 + * 175 + * These are the DIM algorithm step results. 176 + * These describe the result of a step. 177 + * 178 + * @DIM_STEPPED: Performed a regular step 179 + * @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to 180 + * tired parking 181 + * @DIM_ON_EDGE: Stepped to the most left/right profile 182 + */ 183 + enum { 184 + DIM_STEPPED, 185 + DIM_TOO_TIRED, 186 + DIM_ON_EDGE, 187 + }; 188 + 189 + /** 190 + * dim_on_top - check if current state is a good place to stop (top location) 191 + * @dim: DIM context 192 + * 193 + * Check if current profile is a good place to park at. 194 + * This will result in reducing the DIM checks frequency as we assume we 195 + * shouldn't probably change profiles, unless traffic pattern wasn't changed. 196 + */ 197 + bool dim_on_top(struct dim *dim); 198 + 199 + /** 200 + * dim_turn - change profile alterning direction 201 + * @dim: DIM context 202 + * 203 + * Go left if we were going right and vice-versa. 204 + * Do nothing if currently parking. 205 + */ 206 + void dim_turn(struct dim *dim); 207 + 208 + /** 209 + * dim_park_on_top - enter a parking state on a top location 210 + * @dim: DIM context 211 + * 212 + * Enter parking state. 213 + * Clear all movement history. 214 + */ 215 + void dim_park_on_top(struct dim *dim); 216 + 217 + /** 218 + * dim_park_tired - enter a tired parking state 219 + * @dim: DIM context 220 + * 221 + * Enter parking state. 222 + * Clear all movement history and cause DIM checks frequency to reduce. 223 + */ 224 + void dim_park_tired(struct dim *dim); 225 + 226 + /** 227 + * dim_calc_stats - calculate the difference between two samples 228 + * @start: start sample 229 + * @end: end sample 230 + * @curr_stats: delta between samples 231 + * 232 + * Calculate the delta between two samples (in data rates). 233 + * Takes into consideration counter wrap-around. 234 + */ 235 + void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, 236 + struct dim_stats *curr_stats); 237 + 238 + /** 239 + * dim_update_sample - set a sample's fields with give values 240 + * @event_ctr: number of events to set 241 + * @packets: number of packets to set 242 + * @bytes: number of bytes to set 243 + * @s: DIM sample 244 + */ 245 + static inline void 246 + dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s) 247 + { 248 + s->time = ktime_get(); 249 + s->pkt_ctr = packets; 250 + s->byte_ctr = bytes; 251 + s->event_ctr = event_ctr; 252 + } 253 + 254 + /** 255 + * dim_update_sample_with_comps - set a sample's fields with given 256 + * values including the completion parameter 257 + * @event_ctr: number of events to set 258 + * @packets: number of packets to set 259 + * @bytes: number of bytes to set 260 + * @comps: number of completions to set 261 + * @s: DIM sample 262 + */ 263 + static inline void 264 + dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps, 265 + struct dim_sample *s) 266 + { 267 + dim_update_sample(event_ctr, packets, bytes, s); 268 + s->comp_ctr = comps; 269 + } 270 + 271 + /* Net DIM */ 272 + 273 + /* 274 + * Net DIM profiles: 275 + * There are different set of profiles for each CQ period mode. 276 + * There are different set of profiles for RX/TX CQs. 277 + * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES 278 + */ 279 + #define NET_DIM_PARAMS_NUM_PROFILES 5 280 + #define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 281 + #define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 282 + #define NET_DIM_DEF_PROFILE_CQE 1 283 + #define NET_DIM_DEF_PROFILE_EQE 1 284 + 285 + #define NET_DIM_RX_EQE_PROFILES { \ 286 + {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ 287 + {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ 288 + {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ 289 + {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ 290 + {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ 291 + } 292 + 293 + #define NET_DIM_RX_CQE_PROFILES { \ 294 + {2, 256}, \ 295 + {8, 128}, \ 296 + {16, 64}, \ 297 + {32, 64}, \ 298 + {64, 64} \ 299 + } 300 + 301 + #define NET_DIM_TX_EQE_PROFILES { \ 302 + {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ 303 + {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ 304 + {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ 305 + {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ 306 + {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ 307 + } 308 + 309 + #define NET_DIM_TX_CQE_PROFILES { \ 310 + {5, 128}, \ 311 + {8, 64}, \ 312 + {16, 32}, \ 313 + {32, 32}, \ 314 + {64, 32} \ 315 + } 316 + 317 + static const struct dim_cq_moder 318 + rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { 319 + NET_DIM_RX_EQE_PROFILES, 320 + NET_DIM_RX_CQE_PROFILES, 321 + }; 322 + 323 + static const struct dim_cq_moder 324 + tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { 325 + NET_DIM_TX_EQE_PROFILES, 326 + NET_DIM_TX_CQE_PROFILES, 327 + }; 328 + 329 + /** 330 + * net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile 331 + * @cq_period_mode: CQ period mode 332 + * @ix: Profile index 333 + */ 334 + struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix); 335 + 336 + /** 337 + * net_dim_get_def_rx_moderation - provide the default RX moderation 338 + * @cq_period_mode: CQ period mode 339 + */ 340 + struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode); 341 + 342 + /** 343 + * net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile 344 + * @cq_period_mode: CQ period mode 345 + * @ix: Profile index 346 + */ 347 + struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix); 348 + 349 + /** 350 + * net_dim_get_def_tx_moderation - provide the default TX moderation 351 + * @cq_period_mode: CQ period mode 352 + */ 353 + struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); 354 + 355 + /** 356 + * net_dim - main DIM algorithm entry point 357 + * @dim: DIM instance information 358 + * @end_sample: Current data measurement 359 + * 360 + * Called by the consumer. 361 + * This is the main logic of the algorithm, where data is processed in order to decide on next 362 + * required action. 363 + */ 364 + void net_dim(struct dim *dim, struct dim_sample end_sample); 365 + 366 + #endif /* DIM_H */
-418
include/linux/net_dim.h
··· 1 - /* 2 - * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 - * Copyright (c) 2017-2018, Broadcom Limited. All rights reserved. 4 - * 5 - * This software is available to you under a choice of one of two 6 - * licenses. You may choose to be licensed under the terms of the GNU 7 - * General Public License (GPL) Version 2, available from the file 8 - * COPYING in the main directory of this source tree, or the 9 - * OpenIB.org BSD license below: 10 - * 11 - * Redistribution and use in source and binary forms, with or 12 - * without modification, are permitted provided that the following 13 - * conditions are met: 14 - * 15 - * - Redistributions of source code must retain the above 16 - * copyright notice, this list of conditions and the following 17 - * disclaimer. 18 - * 19 - * - Redistributions in binary form must reproduce the above 20 - * copyright notice, this list of conditions and the following 21 - * disclaimer in the documentation and/or other materials 22 - * provided with the distribution. 23 - * 24 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 - * SOFTWARE. 32 - */ 33 - 34 - #ifndef NET_DIM_H 35 - #define NET_DIM_H 36 - 37 - #include <linux/module.h> 38 - 39 - struct net_dim_cq_moder { 40 - u16 usec; 41 - u16 pkts; 42 - u8 cq_period_mode; 43 - }; 44 - 45 - struct net_dim_sample { 46 - ktime_t time; 47 - u32 pkt_ctr; 48 - u32 byte_ctr; 49 - u16 event_ctr; 50 - }; 51 - 52 - struct net_dim_stats { 53 - int ppms; /* packets per msec */ 54 - int bpms; /* bytes per msec */ 55 - int epms; /* events per msec */ 56 - }; 57 - 58 - struct net_dim { /* Adaptive Moderation */ 59 - u8 state; 60 - struct net_dim_stats prev_stats; 61 - struct net_dim_sample start_sample; 62 - struct work_struct work; 63 - u8 profile_ix; 64 - u8 mode; 65 - u8 tune_state; 66 - u8 steps_right; 67 - u8 steps_left; 68 - u8 tired; 69 - }; 70 - 71 - enum { 72 - NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, 73 - NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, 74 - NET_DIM_CQ_PERIOD_NUM_MODES 75 - }; 76 - 77 - /* Adaptive moderation logic */ 78 - enum { 79 - NET_DIM_START_MEASURE, 80 - NET_DIM_MEASURE_IN_PROGRESS, 81 - NET_DIM_APPLY_NEW_PROFILE, 82 - }; 83 - 84 - enum { 85 - NET_DIM_PARKING_ON_TOP, 86 - NET_DIM_PARKING_TIRED, 87 - NET_DIM_GOING_RIGHT, 88 - NET_DIM_GOING_LEFT, 89 - }; 90 - 91 - enum { 92 - NET_DIM_STATS_WORSE, 93 - NET_DIM_STATS_SAME, 94 - NET_DIM_STATS_BETTER, 95 - }; 96 - 97 - enum { 98 - NET_DIM_STEPPED, 99 - NET_DIM_TOO_TIRED, 100 - NET_DIM_ON_EDGE, 101 - }; 102 - 103 - #define NET_DIM_PARAMS_NUM_PROFILES 5 104 - /* Adaptive moderation profiles */ 105 - #define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 106 - #define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 107 - #define NET_DIM_DEF_PROFILE_CQE 1 108 - #define NET_DIM_DEF_PROFILE_EQE 1 109 - 110 - /* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */ 111 - #define NET_DIM_RX_EQE_PROFILES { \ 112 - {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ 113 - {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ 114 - {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ 115 - {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ 116 - {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ 117 - } 118 - 119 - #define NET_DIM_RX_CQE_PROFILES { \ 120 - {2, 256}, \ 121 - {8, 128}, \ 122 - {16, 64}, \ 123 - {32, 64}, \ 124 - {64, 64} \ 125 - } 126 - 127 - #define NET_DIM_TX_EQE_PROFILES { \ 128 - {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ 129 - {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ 130 - {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ 131 - {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ 132 - {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ 133 - } 134 - 135 - #define NET_DIM_TX_CQE_PROFILES { \ 136 - {5, 128}, \ 137 - {8, 64}, \ 138 - {16, 32}, \ 139 - {32, 32}, \ 140 - {64, 32} \ 141 - } 142 - 143 - static const struct net_dim_cq_moder 144 - rx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { 145 - NET_DIM_RX_EQE_PROFILES, 146 - NET_DIM_RX_CQE_PROFILES, 147 - }; 148 - 149 - static const struct net_dim_cq_moder 150 - tx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { 151 - NET_DIM_TX_EQE_PROFILES, 152 - NET_DIM_TX_CQE_PROFILES, 153 - }; 154 - 155 - static inline struct net_dim_cq_moder 156 - net_dim_get_rx_moderation(u8 cq_period_mode, int ix) 157 - { 158 - struct net_dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; 159 - 160 - cq_moder.cq_period_mode = cq_period_mode; 161 - return cq_moder; 162 - } 163 - 164 - static inline struct net_dim_cq_moder 165 - net_dim_get_def_rx_moderation(u8 cq_period_mode) 166 - { 167 - u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ? 168 - NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; 169 - 170 - return net_dim_get_rx_moderation(cq_period_mode, profile_ix); 171 - } 172 - 173 - static inline struct net_dim_cq_moder 174 - net_dim_get_tx_moderation(u8 cq_period_mode, int ix) 175 - { 176 - struct net_dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; 177 - 178 - cq_moder.cq_period_mode = cq_period_mode; 179 - return cq_moder; 180 - } 181 - 182 - static inline struct net_dim_cq_moder 183 - net_dim_get_def_tx_moderation(u8 cq_period_mode) 184 - { 185 - u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ? 186 - NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; 187 - 188 - return net_dim_get_tx_moderation(cq_period_mode, profile_ix); 189 - } 190 - 191 - static inline bool net_dim_on_top(struct net_dim *dim) 192 - { 193 - switch (dim->tune_state) { 194 - case NET_DIM_PARKING_ON_TOP: 195 - case NET_DIM_PARKING_TIRED: 196 - return true; 197 - case NET_DIM_GOING_RIGHT: 198 - return (dim->steps_left > 1) && (dim->steps_right == 1); 199 - default: /* NET_DIM_GOING_LEFT */ 200 - return (dim->steps_right > 1) && (dim->steps_left == 1); 201 - } 202 - } 203 - 204 - static inline void net_dim_turn(struct net_dim *dim) 205 - { 206 - switch (dim->tune_state) { 207 - case NET_DIM_PARKING_ON_TOP: 208 - case NET_DIM_PARKING_TIRED: 209 - break; 210 - case NET_DIM_GOING_RIGHT: 211 - dim->tune_state = NET_DIM_GOING_LEFT; 212 - dim->steps_left = 0; 213 - break; 214 - case NET_DIM_GOING_LEFT: 215 - dim->tune_state = NET_DIM_GOING_RIGHT; 216 - dim->steps_right = 0; 217 - break; 218 - } 219 - } 220 - 221 - static inline int net_dim_step(struct net_dim *dim) 222 - { 223 - if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) 224 - return NET_DIM_TOO_TIRED; 225 - 226 - switch (dim->tune_state) { 227 - case NET_DIM_PARKING_ON_TOP: 228 - case NET_DIM_PARKING_TIRED: 229 - break; 230 - case NET_DIM_GOING_RIGHT: 231 - if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1)) 232 - return NET_DIM_ON_EDGE; 233 - dim->profile_ix++; 234 - dim->steps_right++; 235 - break; 236 - case NET_DIM_GOING_LEFT: 237 - if (dim->profile_ix == 0) 238 - return NET_DIM_ON_EDGE; 239 - dim->profile_ix--; 240 - dim->steps_left++; 241 - break; 242 - } 243 - 244 - dim->tired++; 245 - return NET_DIM_STEPPED; 246 - } 247 - 248 - static inline void net_dim_park_on_top(struct net_dim *dim) 249 - { 250 - dim->steps_right = 0; 251 - dim->steps_left = 0; 252 - dim->tired = 0; 253 - dim->tune_state = NET_DIM_PARKING_ON_TOP; 254 - } 255 - 256 - static inline void net_dim_park_tired(struct net_dim *dim) 257 - { 258 - dim->steps_right = 0; 259 - dim->steps_left = 0; 260 - dim->tune_state = NET_DIM_PARKING_TIRED; 261 - } 262 - 263 - static inline void net_dim_exit_parking(struct net_dim *dim) 264 - { 265 - dim->tune_state = dim->profile_ix ? NET_DIM_GOING_LEFT : 266 - NET_DIM_GOING_RIGHT; 267 - net_dim_step(dim); 268 - } 269 - 270 - #define IS_SIGNIFICANT_DIFF(val, ref) \ 271 - (((100UL * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */ 272 - 273 - static inline int net_dim_stats_compare(struct net_dim_stats *curr, 274 - struct net_dim_stats *prev) 275 - { 276 - if (!prev->bpms) 277 - return curr->bpms ? NET_DIM_STATS_BETTER : 278 - NET_DIM_STATS_SAME; 279 - 280 - if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms)) 281 - return (curr->bpms > prev->bpms) ? NET_DIM_STATS_BETTER : 282 - NET_DIM_STATS_WORSE; 283 - 284 - if (!prev->ppms) 285 - return curr->ppms ? NET_DIM_STATS_BETTER : 286 - NET_DIM_STATS_SAME; 287 - 288 - if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) 289 - return (curr->ppms > prev->ppms) ? NET_DIM_STATS_BETTER : 290 - NET_DIM_STATS_WORSE; 291 - 292 - if (!prev->epms) 293 - return NET_DIM_STATS_SAME; 294 - 295 - if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) 296 - return (curr->epms < prev->epms) ? NET_DIM_STATS_BETTER : 297 - NET_DIM_STATS_WORSE; 298 - 299 - return NET_DIM_STATS_SAME; 300 - } 301 - 302 - static inline bool net_dim_decision(struct net_dim_stats *curr_stats, 303 - struct net_dim *dim) 304 - { 305 - int prev_state = dim->tune_state; 306 - int prev_ix = dim->profile_ix; 307 - int stats_res; 308 - int step_res; 309 - 310 - switch (dim->tune_state) { 311 - case NET_DIM_PARKING_ON_TOP: 312 - stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats); 313 - if (stats_res != NET_DIM_STATS_SAME) 314 - net_dim_exit_parking(dim); 315 - break; 316 - 317 - case NET_DIM_PARKING_TIRED: 318 - dim->tired--; 319 - if (!dim->tired) 320 - net_dim_exit_parking(dim); 321 - break; 322 - 323 - case NET_DIM_GOING_RIGHT: 324 - case NET_DIM_GOING_LEFT: 325 - stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats); 326 - if (stats_res != NET_DIM_STATS_BETTER) 327 - net_dim_turn(dim); 328 - 329 - if (net_dim_on_top(dim)) { 330 - net_dim_park_on_top(dim); 331 - break; 332 - } 333 - 334 - step_res = net_dim_step(dim); 335 - switch (step_res) { 336 - case NET_DIM_ON_EDGE: 337 - net_dim_park_on_top(dim); 338 - break; 339 - case NET_DIM_TOO_TIRED: 340 - net_dim_park_tired(dim); 341 - break; 342 - } 343 - 344 - break; 345 - } 346 - 347 - if ((prev_state != NET_DIM_PARKING_ON_TOP) || 348 - (dim->tune_state != NET_DIM_PARKING_ON_TOP)) 349 - dim->prev_stats = *curr_stats; 350 - 351 - return dim->profile_ix != prev_ix; 352 - } 353 - 354 - static inline void net_dim_sample(u16 event_ctr, 355 - u64 packets, 356 - u64 bytes, 357 - struct net_dim_sample *s) 358 - { 359 - s->time = ktime_get(); 360 - s->pkt_ctr = packets; 361 - s->byte_ctr = bytes; 362 - s->event_ctr = event_ctr; 363 - } 364 - 365 - #define NET_DIM_NEVENTS 64 366 - #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1)) 367 - 368 - static inline void net_dim_calc_stats(struct net_dim_sample *start, 369 - struct net_dim_sample *end, 370 - struct net_dim_stats *curr_stats) 371 - { 372 - /* u32 holds up to 71 minutes, should be enough */ 373 - u32 delta_us = ktime_us_delta(end->time, start->time); 374 - u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); 375 - u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, 376 - start->byte_ctr); 377 - 378 - if (!delta_us) 379 - return; 380 - 381 - curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); 382 - curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); 383 - curr_stats->epms = DIV_ROUND_UP(NET_DIM_NEVENTS * USEC_PER_MSEC, 384 - delta_us); 385 - } 386 - 387 - static inline void net_dim(struct net_dim *dim, 388 - struct net_dim_sample end_sample) 389 - { 390 - struct net_dim_stats curr_stats; 391 - u16 nevents; 392 - 393 - switch (dim->state) { 394 - case NET_DIM_MEASURE_IN_PROGRESS: 395 - nevents = BIT_GAP(BITS_PER_TYPE(u16), 396 - end_sample.event_ctr, 397 - dim->start_sample.event_ctr); 398 - if (nevents < NET_DIM_NEVENTS) 399 - break; 400 - net_dim_calc_stats(&dim->start_sample, &end_sample, 401 - &curr_stats); 402 - if (net_dim_decision(&curr_stats, dim)) { 403 - dim->state = NET_DIM_APPLY_NEW_PROFILE; 404 - schedule_work(&dim->work); 405 - break; 406 - } 407 - /* fall through */ 408 - case NET_DIM_START_MEASURE: 409 - net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr, 410 - &dim->start_sample); 411 - dim->state = NET_DIM_MEASURE_IN_PROGRESS; 412 - break; 413 - case NET_DIM_APPLY_NEW_PROFILE: 414 - break; 415 - } 416 - } 417 - 418 - #endif /* NET_DIM_H */
+8
lib/Kconfig
··· 562 562 Digital signature verification. Currently only RSA is supported. 563 563 Implementation is done using GnuPG MPI library 564 564 565 + config DIMLIB 566 + bool "DIM library" 567 + default y 568 + help 569 + Dynamic Interrupt Moderation library. 570 + Implements an algorithm for dynamically change CQ modertion values 571 + according to run time performance. 572 + 565 573 # 566 574 # libfdt files, only selected if needed. 567 575 #
+1
lib/Makefile
··· 202 202 obj-$(CONFIG_GLOB_SELFTEST) += globtest.o 203 203 204 204 obj-$(CONFIG_MPILIB) += mpi/ 205 + obj-$(CONFIG_DIMLIB) += dim/ 205 206 obj-$(CONFIG_SIGNATURE) += digsig.o 206 207 207 208 lib-$(CONFIG_CLZ_TAB) += clz_tab.o
+9
lib/dim/Makefile
··· 1 + # 2 + # DIM Dynamic Interrupt Moderation library 3 + # 4 + 5 + obj-$(CONFIG_DIMLIB) = net_dim.o 6 + 7 + net_dim-y = \ 8 + dim.o \ 9 + net_dim.o
+83
lib/dim/dim.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 + /* 3 + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. 4 + */ 5 + 6 + #include <linux/dim.h> 7 + 8 + bool dim_on_top(struct dim *dim) 9 + { 10 + switch (dim->tune_state) { 11 + case DIM_PARKING_ON_TOP: 12 + case DIM_PARKING_TIRED: 13 + return true; 14 + case DIM_GOING_RIGHT: 15 + return (dim->steps_left > 1) && (dim->steps_right == 1); 16 + default: /* DIM_GOING_LEFT */ 17 + return (dim->steps_right > 1) && (dim->steps_left == 1); 18 + } 19 + } 20 + EXPORT_SYMBOL(dim_on_top); 21 + 22 + void dim_turn(struct dim *dim) 23 + { 24 + switch (dim->tune_state) { 25 + case DIM_PARKING_ON_TOP: 26 + case DIM_PARKING_TIRED: 27 + break; 28 + case DIM_GOING_RIGHT: 29 + dim->tune_state = DIM_GOING_LEFT; 30 + dim->steps_left = 0; 31 + break; 32 + case DIM_GOING_LEFT: 33 + dim->tune_state = DIM_GOING_RIGHT; 34 + dim->steps_right = 0; 35 + break; 36 + } 37 + } 38 + EXPORT_SYMBOL(dim_turn); 39 + 40 + void dim_park_on_top(struct dim *dim) 41 + { 42 + dim->steps_right = 0; 43 + dim->steps_left = 0; 44 + dim->tired = 0; 45 + dim->tune_state = DIM_PARKING_ON_TOP; 46 + } 47 + EXPORT_SYMBOL(dim_park_on_top); 48 + 49 + void dim_park_tired(struct dim *dim) 50 + { 51 + dim->steps_right = 0; 52 + dim->steps_left = 0; 53 + dim->tune_state = DIM_PARKING_TIRED; 54 + } 55 + EXPORT_SYMBOL(dim_park_tired); 56 + 57 + void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, 58 + struct dim_stats *curr_stats) 59 + { 60 + /* u32 holds up to 71 minutes, should be enough */ 61 + u32 delta_us = ktime_us_delta(end->time, start->time); 62 + u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr); 63 + u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr, 64 + start->byte_ctr); 65 + u32 ncomps = BIT_GAP(BITS_PER_TYPE(u32), end->comp_ctr, 66 + start->comp_ctr); 67 + 68 + if (!delta_us) 69 + return; 70 + 71 + curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); 72 + curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); 73 + curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC, 74 + delta_us); 75 + curr_stats->cpms = DIV_ROUND_UP(ncomps * USEC_PER_MSEC, delta_us); 76 + if (curr_stats->epms != 0) 77 + curr_stats->cpe_ratio = 78 + (curr_stats->cpms * 100) / curr_stats->epms; 79 + else 80 + curr_stats->cpe_ratio = 0; 81 + 82 + } 83 + EXPORT_SYMBOL(dim_calc_stats);
+190
lib/dim/net_dim.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 + /* 3 + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. 4 + */ 5 + 6 + #include <linux/dim.h> 7 + 8 + struct dim_cq_moder 9 + net_dim_get_rx_moderation(u8 cq_period_mode, int ix) 10 + { 11 + struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; 12 + 13 + cq_moder.cq_period_mode = cq_period_mode; 14 + return cq_moder; 15 + } 16 + EXPORT_SYMBOL(net_dim_get_rx_moderation); 17 + 18 + struct dim_cq_moder 19 + net_dim_get_def_rx_moderation(u8 cq_period_mode) 20 + { 21 + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? 22 + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; 23 + 24 + return net_dim_get_rx_moderation(cq_period_mode, profile_ix); 25 + } 26 + EXPORT_SYMBOL(net_dim_get_def_rx_moderation); 27 + 28 + struct dim_cq_moder 29 + net_dim_get_tx_moderation(u8 cq_period_mode, int ix) 30 + { 31 + struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; 32 + 33 + cq_moder.cq_period_mode = cq_period_mode; 34 + return cq_moder; 35 + } 36 + EXPORT_SYMBOL(net_dim_get_tx_moderation); 37 + 38 + struct dim_cq_moder 39 + net_dim_get_def_tx_moderation(u8 cq_period_mode) 40 + { 41 + u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ? 42 + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; 43 + 44 + return net_dim_get_tx_moderation(cq_period_mode, profile_ix); 45 + } 46 + EXPORT_SYMBOL(net_dim_get_def_tx_moderation); 47 + 48 + static int net_dim_step(struct dim *dim) 49 + { 50 + if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) 51 + return DIM_TOO_TIRED; 52 + 53 + switch (dim->tune_state) { 54 + case DIM_PARKING_ON_TOP: 55 + case DIM_PARKING_TIRED: 56 + break; 57 + case DIM_GOING_RIGHT: 58 + if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1)) 59 + return DIM_ON_EDGE; 60 + dim->profile_ix++; 61 + dim->steps_right++; 62 + break; 63 + case DIM_GOING_LEFT: 64 + if (dim->profile_ix == 0) 65 + return DIM_ON_EDGE; 66 + dim->profile_ix--; 67 + dim->steps_left++; 68 + break; 69 + } 70 + 71 + dim->tired++; 72 + return DIM_STEPPED; 73 + } 74 + 75 + static void net_dim_exit_parking(struct dim *dim) 76 + { 77 + dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT; 78 + net_dim_step(dim); 79 + } 80 + 81 + static int net_dim_stats_compare(struct dim_stats *curr, 82 + struct dim_stats *prev) 83 + { 84 + if (!prev->bpms) 85 + return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME; 86 + 87 + if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms)) 88 + return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER : 89 + DIM_STATS_WORSE; 90 + 91 + if (!prev->ppms) 92 + return curr->ppms ? DIM_STATS_BETTER : 93 + DIM_STATS_SAME; 94 + 95 + if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) 96 + return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER : 97 + DIM_STATS_WORSE; 98 + 99 + if (!prev->epms) 100 + return DIM_STATS_SAME; 101 + 102 + if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) 103 + return (curr->epms < prev->epms) ? DIM_STATS_BETTER : 104 + DIM_STATS_WORSE; 105 + 106 + return DIM_STATS_SAME; 107 + } 108 + 109 + static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim) 110 + { 111 + int prev_state = dim->tune_state; 112 + int prev_ix = dim->profile_ix; 113 + int stats_res; 114 + int step_res; 115 + 116 + switch (dim->tune_state) { 117 + case DIM_PARKING_ON_TOP: 118 + stats_res = net_dim_stats_compare(curr_stats, 119 + &dim->prev_stats); 120 + if (stats_res != DIM_STATS_SAME) 121 + net_dim_exit_parking(dim); 122 + break; 123 + 124 + case DIM_PARKING_TIRED: 125 + dim->tired--; 126 + if (!dim->tired) 127 + net_dim_exit_parking(dim); 128 + break; 129 + 130 + case DIM_GOING_RIGHT: 131 + case DIM_GOING_LEFT: 132 + stats_res = net_dim_stats_compare(curr_stats, 133 + &dim->prev_stats); 134 + if (stats_res != DIM_STATS_BETTER) 135 + dim_turn(dim); 136 + 137 + if (dim_on_top(dim)) { 138 + dim_park_on_top(dim); 139 + break; 140 + } 141 + 142 + step_res = net_dim_step(dim); 143 + switch (step_res) { 144 + case DIM_ON_EDGE: 145 + dim_park_on_top(dim); 146 + break; 147 + case DIM_TOO_TIRED: 148 + dim_park_tired(dim); 149 + break; 150 + } 151 + 152 + break; 153 + } 154 + 155 + if (prev_state != DIM_PARKING_ON_TOP || 156 + dim->tune_state != DIM_PARKING_ON_TOP) 157 + dim->prev_stats = *curr_stats; 158 + 159 + return dim->profile_ix != prev_ix; 160 + } 161 + 162 + void net_dim(struct dim *dim, struct dim_sample end_sample) 163 + { 164 + struct dim_stats curr_stats; 165 + u16 nevents; 166 + 167 + switch (dim->state) { 168 + case DIM_MEASURE_IN_PROGRESS: 169 + nevents = BIT_GAP(BITS_PER_TYPE(u16), 170 + end_sample.event_ctr, 171 + dim->start_sample.event_ctr); 172 + if (nevents < DIM_NEVENTS) 173 + break; 174 + dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); 175 + if (net_dim_decision(&curr_stats, dim)) { 176 + dim->state = DIM_APPLY_NEW_PROFILE; 177 + schedule_work(&dim->work); 178 + break; 179 + } 180 + /* fall through */ 181 + case DIM_START_MEASURE: 182 + dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, 183 + end_sample.byte_ctr, &dim->start_sample); 184 + dim->state = DIM_MEASURE_IN_PROGRESS; 185 + break; 186 + case DIM_APPLY_NEW_PROFILE: 187 + break; 188 + } 189 + } 190 + EXPORT_SYMBOL(net_dim);