Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/counter: Add optional counter support

An optional counter is a driver-specific counter that may be dynamically
enabled/disabled. This enhancement allows drivers to expose counters
which are, for example, mutually exclusive and cannot be enabled at the
same time, counters that might degrades performance, optional debug
counters, etc.

Optional counters are marked with IB_STAT_FLAG_OPTIONAL flag. They are not
exported in sysfs, and must be at the end of all stats, otherwise the
attr->show() in sysfs would get wrong indexes for hwcounters that are
behind optional counters.

Link: https://lore.kernel.org/r/20211008122439.166063-7-markzhang@nvidia.com
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Signed-off-by: Neta Ostrovsky <netao@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Aharon Landau and committed by
Jason Gunthorpe
5e2ddd1e 0dc89684

+74 -10
+32
drivers/infiniband/core/counters.c
··· 106 106 return ret; 107 107 } 108 108 109 + int rdma_counter_modify(struct ib_device *dev, u32 port, 110 + unsigned int index, bool enable) 111 + { 112 + struct rdma_hw_stats *stats; 113 + int ret = 0; 114 + 115 + if (!dev->ops.modify_hw_stat) 116 + return -EOPNOTSUPP; 117 + 118 + stats = ib_get_hw_stats_port(dev, port); 119 + if (!stats || index >= stats->num_counters || 120 + !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) 121 + return -EINVAL; 122 + 123 + mutex_lock(&stats->lock); 124 + 125 + if (enable != test_bit(index, stats->is_disabled)) 126 + goto out; 127 + 128 + ret = dev->ops.modify_hw_stat(dev, port, index, enable); 129 + if (ret) 130 + goto out; 131 + 132 + if (enable) 133 + clear_bit(index, stats->is_disabled); 134 + else 135 + set_bit(index, stats->is_disabled); 136 + out: 137 + mutex_unlock(&stats->lock); 138 + return ret; 139 + } 140 + 109 141 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, 110 142 struct ib_qp *qp, 111 143 enum rdma_nl_counter_mode mode)
+1
drivers/infiniband/core/device.c
··· 2676 2676 SET_DEVICE_OP(dev_ops, modify_cq); 2677 2677 SET_DEVICE_OP(dev_ops, modify_device); 2678 2678 SET_DEVICE_OP(dev_ops, modify_flow_action_esp); 2679 + SET_DEVICE_OP(dev_ops, modify_hw_stat); 2679 2680 SET_DEVICE_OP(dev_ops, modify_port); 2680 2681 SET_DEVICE_OP(dev_ops, modify_qp); 2681 2682 SET_DEVICE_OP(dev_ops, modify_srq);
+26 -10
drivers/infiniband/core/sysfs.c
··· 934 934 { 935 935 struct hw_stats_device_attribute *attr; 936 936 struct hw_stats_device_data *data; 937 - int i, ret; 937 + bool opstat_skipped = false; 938 + int i, ret, pos = 0; 938 939 939 940 data = alloc_hw_stats_device(ibdev); 940 941 if (IS_ERR(data)) { ··· 956 955 data->stats->timestamp = jiffies; 957 956 958 957 for (i = 0; i < data->stats->num_counters; i++) { 959 - attr = &data->attrs[i]; 958 + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { 959 + opstat_skipped = true; 960 + continue; 961 + } 962 + 963 + WARN_ON(opstat_skipped); 964 + attr = &data->attrs[pos]; 960 965 sysfs_attr_init(&attr->attr.attr); 961 966 attr->attr.attr.name = data->stats->descs[i].name; 962 967 attr->attr.attr.mode = 0444; 963 968 attr->attr.show = hw_stat_device_show; 964 969 attr->show = show_hw_stats; 965 - data->group.attrs[i] = &attr->attr.attr; 970 + data->group.attrs[pos] = &attr->attr.attr; 971 + pos++; 966 972 } 967 973 968 - attr = &data->attrs[i]; 974 + attr = &data->attrs[pos]; 969 975 sysfs_attr_init(&attr->attr.attr); 970 976 attr->attr.attr.name = "lifespan"; 971 977 attr->attr.attr.mode = 0644; ··· 980 972 attr->show = show_stats_lifespan; 981 973 attr->attr.store = hw_stat_device_store; 982 974 attr->store = set_stats_lifespan; 983 - data->group.attrs[i] = &attr->attr.attr; 975 + data->group.attrs[pos] = &attr->attr.attr; 984 976 for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++) 985 977 if (!ibdev->groups[i]) { 986 978 ibdev->groups[i] = &data->group; ··· 1035 1027 { 1036 1028 struct hw_stats_port_attribute *attr; 1037 1029 struct hw_stats_port_data *data; 1038 - int i, ret; 1030 + bool opstat_skipped = false; 1031 + int i, ret, pos = 0; 1039 1032 1040 1033 data = alloc_hw_stats_port(port, group); 1041 1034 if (IS_ERR(data)) ··· 1054 1045 data->stats->timestamp = jiffies; 1055 1046 1056 1047 for (i = 0; i < data->stats->num_counters; i++) { 1057 - attr = &data->attrs[i]; 1048 + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { 1049 + opstat_skipped = true; 1050 + continue; 1051 + } 1052 + 1053 + WARN_ON(opstat_skipped); 1054 + attr = &data->attrs[pos]; 1058 1055 sysfs_attr_init(&attr->attr.attr); 1059 1056 attr->attr.attr.name = data->stats->descs[i].name; 1060 1057 attr->attr.attr.mode = 0444; 1061 1058 attr->attr.show = hw_stat_port_show; 1062 1059 attr->show = show_hw_stats; 1063 - group->attrs[i] = &attr->attr.attr; 1060 + group->attrs[pos] = &attr->attr.attr; 1061 + pos++; 1064 1062 } 1065 1063 1066 - attr = &data->attrs[i]; 1064 + attr = &data->attrs[pos]; 1067 1065 sysfs_attr_init(&attr->attr.attr); 1068 1066 attr->attr.attr.name = "lifespan"; 1069 1067 attr->attr.attr.mode = 0644; ··· 1078 1062 attr->show = show_stats_lifespan; 1079 1063 attr->attr.store = hw_stat_port_store; 1080 1064 attr->store = set_stats_lifespan; 1081 - group->attrs[i] = &attr->attr.attr; 1065 + group->attrs[pos] = &attr->attr.attr; 1082 1066 1083 1067 port->hw_stats_data = data; 1084 1068 return 0;
+13
include/rdma/ib_verbs.h
··· 545 545 IB_SPEED_NDR = 128, 546 546 }; 547 547 548 + enum ib_stat_flag { 549 + IB_STAT_FLAG_OPTIONAL = 1 << 0, 550 + }; 551 + 548 552 /** 549 553 * struct rdma_stat_desc 550 554 * @name - The name of the counter 555 + * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL 551 556 */ 552 557 struct rdma_stat_desc { 553 558 const char *name; 559 + unsigned int flags; 554 560 }; 555 561 556 562 /** ··· 2568 2562 int (*get_hw_stats)(struct ib_device *device, 2569 2563 struct rdma_hw_stats *stats, u32 port, int index); 2570 2564 2565 + /** 2566 + * modify_hw_stat - Modify the counter configuration 2567 + * @enable: true/false when enable/disable a counter 2568 + * Return codes - 0 on success or error code otherwise. 2569 + */ 2570 + int (*modify_hw_stat)(struct ib_device *device, u32 port, 2571 + unsigned int counter_index, bool enable); 2571 2572 /** 2572 2573 * Allows rdma drivers to add their own restrack attributes. 2573 2574 */
+2
include/rdma/rdma_counter.h
··· 63 63 enum rdma_nl_counter_mode *mode, 64 64 enum rdma_nl_counter_mask *mask); 65 65 66 + int rdma_counter_modify(struct ib_device *dev, u32 port, 67 + unsigned int index, bool enable); 66 68 #endif /* _RDMA_COUNTER_H_ */