Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/mlx5: Add optional counters for RDMA_TX/RX_packets/bytes

Add the following optional counters:
rdma_tx_packets,rdma_rx_bytes,rdma_rx_packets,rdma_tx_bytes.

Which counts all RDMA packets/bytes sent and received per link.

Note that since each direction packet and byte counter are shared,
the counter is only reset when both counters of that direction
are removed. But from user-perspective each can be enabled/disabled separately.

The counters can be enabled using:
sudo rdma stat set link rocep8s0f0/1 optional-counters rdma_tx_packets
And can be seen using:
rdma stat -j show link rocep8s0f0/1

Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/9f2753ad636f21704416df64b47395c8991d1123.1741875070.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>

authored by

Patrisious Haddad and committed by
Leon Romanovsky
d375db42 1d6a9e74

+133 -7
+83 -3
drivers/infiniband/hw/mlx5/counters.c
··· 140 140 INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS), 141 141 }; 142 142 143 + static const struct mlx5_ib_counter packets_op_cnts[] = { 144 + INIT_OP_COUNTER(rdma_tx_packets, RDMA_TX_PACKETS), 145 + INIT_OP_COUNTER(rdma_tx_bytes, RDMA_TX_BYTES), 146 + INIT_OP_COUNTER(rdma_rx_packets, RDMA_RX_PACKETS), 147 + INIT_OP_COUNTER(rdma_rx_bytes, RDMA_RX_BYTES), 148 + }; 149 + 143 150 static int mlx5_ib_read_counters(struct ib_counters *counters, 144 151 struct ib_counters_read_attr *read_attr, 145 152 struct uverbs_attr_bundle *attrs) ··· 434 427 return num_counters; 435 428 } 436 429 430 + static bool is_rdma_bytes_counter(u32 type) 431 + { 432 + if (type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES || 433 + type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES) 434 + return true; 435 + 436 + return false; 437 + } 438 + 437 439 static int do_get_op_stat(struct ib_device *ibdev, 438 440 struct rdma_hw_stats *stats, 439 441 u32 port_num, int index) ··· 450 434 struct mlx5_ib_dev *dev = to_mdev(ibdev); 451 435 const struct mlx5_ib_counters *cnts; 452 436 const struct mlx5_ib_op_fc *opfcs; 453 - u64 packets = 0, bytes; 437 + u64 packets, bytes; 454 438 u32 type; 455 439 int ret; 456 440 ··· 469 453 if (ret) 470 454 return ret; 471 455 456 + if (is_rdma_bytes_counter(type)) 457 + stats->value[index] = bytes; 458 + else 459 + stats->value[index] = packets; 472 460 out: 473 - stats->value[index] = packets; 474 461 return index; 475 462 } 476 463 ··· 696 677 descs[j].priv = &rdmatx_cnp_op_cnts[i].type; 697 678 } 698 679 } 680 + 681 + for (i = 0; i < ARRAY_SIZE(packets_op_cnts); i++, j++) { 682 + descs[j].name = packets_op_cnts[i].name; 683 + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; 684 + descs[j].priv = &packets_op_cnts[i].type; 685 + } 699 686 } 700 687 701 688 ··· 752 727 753 728 num_op_counters = ARRAY_SIZE(basic_op_cnts); 754 729 730 + num_op_counters += ARRAY_SIZE(packets_op_cnts); 731 + 755 732 if (MLX5_CAP_FLOWTABLE(dev->mdev, 756 733 ft_field_support_2_nic_receive_rdma.bth_opcode)) 757 734 num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts); ··· 783 756 return -ENOMEM; 784 757 } 785 758 759 + /* 760 + * Checks if the given flow counter type should be sharing the same flow counter 761 + * with another type and if it should, checks if that other type flow counter 762 + * was already created, if both conditions are met return true and the counter 763 + * else return false. 764 + */ 765 + static bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, 766 + u32 type, 767 + struct mlx5_ib_op_fc **opfc) 768 + { 769 + u32 shared_fc_type; 770 + 771 + switch (type) { 772 + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS: 773 + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES; 774 + break; 775 + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: 776 + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS; 777 + break; 778 + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS: 779 + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; 780 + break; 781 + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: 782 + shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS; 783 + break; 784 + default: 785 + return false; 786 + } 787 + 788 + *opfc = &opfcs[shared_fc_type]; 789 + if (!(*opfc)->fc) 790 + return false; 791 + 792 + return true; 793 + } 794 + 786 795 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) 787 796 { 788 797 u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 789 798 int num_cnt_ports = dev->num_ports; 799 + struct mlx5_ib_op_fc *in_use_opfc; 790 800 int i, j; 791 801 792 802 if (is_mdev_switchdev_mode(dev->mdev)) ··· 845 781 if (!dev->port[i].cnts.opfcs[j].fc) 846 782 continue; 847 783 784 + if (mlx5r_is_opfc_shared_and_in_use( 785 + dev->port[i].cnts.opfcs, j, &in_use_opfc)) 786 + goto skip; 787 + 848 788 if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) 849 789 mlx5_ib_fs_remove_op_fc(dev, 850 790 &dev->port[i].cnts.opfcs[j], j); 851 791 mlx5_fc_destroy(dev->mdev, 852 792 dev->port[i].cnts.opfcs[j].fc); 793 + skip: 853 794 dev->port[i].cnts.opfcs[j].fc = NULL; 854 795 } 855 796 } ··· 1048 979 unsigned int index, bool enable) 1049 980 { 1050 981 struct mlx5_ib_dev *dev = to_mdev(device); 982 + struct mlx5_ib_op_fc *opfc, *in_use_opfc; 1051 983 struct mlx5_ib_counters *cnts; 1052 - struct mlx5_ib_op_fc *opfc; 1053 984 u32 num_hw_counters, type; 1054 985 int ret; 1055 986 ··· 1073 1004 if (opfc->fc) 1074 1005 return -EEXIST; 1075 1006 1007 + if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, 1008 + &in_use_opfc)) { 1009 + opfc->fc = in_use_opfc->fc; 1010 + opfc->rule[0] = in_use_opfc->rule[0]; 1011 + return 0; 1012 + } 1013 + 1076 1014 opfc->fc = mlx5_fc_create(dev->mdev, false); 1077 1015 if (IS_ERR(opfc->fc)) 1078 1016 return PTR_ERR(opfc->fc); ··· 1095 1019 if (!opfc->fc) 1096 1020 return -EINVAL; 1097 1021 1022 + if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, &in_use_opfc)) 1023 + goto out; 1024 + 1098 1025 mlx5_ib_fs_remove_op_fc(dev, opfc, type); 1099 1026 mlx5_fc_destroy(dev->mdev, opfc->fc); 1027 + out: 1100 1028 opfc->fc = NULL; 1101 1029 return 0; 1102 1030 }
+44 -2
drivers/infiniband/hw/mlx5/fs.c
··· 802 802 enum { 803 803 RDMA_RX_ECN_OPCOUNTER_PRIO, 804 804 RDMA_RX_CNP_OPCOUNTER_PRIO, 805 + RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO, 805 806 }; 806 807 807 808 enum { 808 809 RDMA_TX_CNP_OPCOUNTER_PRIO, 810 + RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO, 809 811 }; 810 812 811 813 static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num, ··· 871 869 return 0; 872 870 } 873 871 872 + /* Returns the prio we should use for the given optional counter type, 873 + * whereas for bytes type we use the packet type, since they share the same 874 + * resources. 875 + */ 876 + static struct mlx5_ib_flow_prio *get_opfc_prio(struct mlx5_ib_dev *dev, 877 + u32 type) 878 + { 879 + u32 prio_type; 880 + 881 + switch (type) { 882 + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: 883 + prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS; 884 + break; 885 + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: 886 + prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS; 887 + break; 888 + default: 889 + prio_type = type; 890 + } 891 + 892 + return &dev->flow_db->opfcs[prio_type]; 893 + } 894 + 874 895 int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, 875 896 struct mlx5_ib_op_fc *opfc, 876 897 enum mlx5_ib_optional_counter_type type) ··· 948 923 priority = RDMA_TX_CNP_OPCOUNTER_PRIO; 949 924 break; 950 925 926 + case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS: 927 + case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES: 928 + spec_num = 1; 929 + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; 930 + priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO; 931 + break; 932 + 933 + case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS: 934 + case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES: 935 + spec_num = 1; 936 + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; 937 + priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO; 938 + break; 939 + 951 940 default: 952 941 err = -EOPNOTSUPP; 953 942 goto free; ··· 973 934 goto free; 974 935 } 975 936 976 - prio = &dev->flow_db->opfcs[type]; 937 + prio = get_opfc_prio(dev, type); 977 938 if (!prio->flow_table) { 978 939 prio = _get_prio(dev, ns, prio, priority, 979 940 dev->num_ports * MAX_OPFC_RULES, 1, 0, 0); ··· 1015 976 struct mlx5_ib_op_fc *opfc, 1016 977 enum mlx5_ib_optional_counter_type type) 1017 978 { 979 + struct mlx5_ib_flow_prio *prio; 1018 980 int i; 981 + 982 + prio = get_opfc_prio(dev, type); 1019 983 1020 984 for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) { 1021 985 mlx5_del_flow_rules(opfc->rule[i]); 1022 - put_flow_table(dev, &dev->flow_db->opfcs[type], true); 986 + put_flow_table(dev, prio, true); 1023 987 } 1024 988 } 1025 989
+4
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 294 294 MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS, 295 295 MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS, 296 296 MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS, 297 + MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS, 298 + MLX5_IB_OPCOUNTER_RDMA_TX_BYTES, 299 + MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS, 300 + MLX5_IB_OPCOUNTER_RDMA_RX_BYTES, 297 301 298 302 MLX5_IB_OPCOUNTER_MAX, 299 303 };
+2 -2
include/linux/mlx5/device.h
··· 1532 1532 return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; 1533 1533 } 1534 1534 1535 - #define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 2 1536 - #define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 1 1535 + #define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 3 1536 + #define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 2 1537 1537 #define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16 1538 1538 #define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16 1539 1539 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1