Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/counter: Add "auto" configuration mode support

In auto mode all QPs belong to one category are bind automatically to a
single counter set. Currently only "qp type" is supported.

In this mode the qp counter is set in RST2INIT modification, and when a qp
is destroyed the counter is unbound.

Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

authored by

Mark Zhang and committed by
Jason Gunthorpe
99fa331d 413d3347

+259
+221
drivers/infiniband/core/counters.c
··· 54 54 return ret; 55 55 } 56 56 57 + static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, 58 + enum rdma_nl_counter_mode mode) 59 + { 60 + struct rdma_counter *counter; 61 + 62 + if (!dev->ops.counter_dealloc) 63 + return NULL; 64 + 65 + counter = kzalloc(sizeof(*counter), GFP_KERNEL); 66 + if (!counter) 67 + return NULL; 68 + 69 + counter->device = dev; 70 + counter->port = port; 71 + counter->res.type = RDMA_RESTRACK_COUNTER; 72 + counter->mode.mode = mode; 73 + kref_init(&counter->kref); 74 + mutex_init(&counter->lock); 75 + 76 + return counter; 77 + } 78 + 79 + static void rdma_counter_free(struct rdma_counter *counter) 80 + { 81 + rdma_restrack_del(&counter->res); 82 + kfree(counter); 83 + } 84 + 85 + static void auto_mode_init_counter(struct rdma_counter *counter, 86 + const struct ib_qp *qp, 87 + enum rdma_nl_counter_mask new_mask) 88 + { 89 + struct auto_mode_param *param = &counter->mode.param; 90 + 91 + counter->mode.mode = RDMA_COUNTER_MODE_AUTO; 92 + counter->mode.mask = new_mask; 93 + 94 + if (new_mask & RDMA_COUNTER_MASK_QP_TYPE) 95 + param->qp_type = qp->qp_type; 96 + } 97 + 98 + static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter, 99 + enum rdma_nl_counter_mask auto_mask) 100 + { 101 + struct auto_mode_param *param = &counter->mode.param; 102 + bool match = true; 103 + 104 + if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) 105 + return false; 106 + 107 + /* Ensure that counter belong to right PID */ 108 + if (!rdma_is_kernel_res(&counter->res) && 109 + !rdma_is_kernel_res(&qp->res) && 110 + (task_pid_vnr(counter->res.task) != current->pid)) 111 + return false; 112 + 113 + if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE) 114 + match &= (param->qp_type == qp->qp_type); 115 + 116 + return match; 117 + } 118 + 119 + static int __rdma_counter_bind_qp(struct rdma_counter *counter, 120 + struct ib_qp *qp) 121 + { 122 + int ret; 123 + 124 + if (qp->counter) 125 + return -EINVAL; 126 + 127 + if (!qp->device->ops.counter_bind_qp) 128 + return -EOPNOTSUPP; 129 + 130 + mutex_lock(&counter->lock); 131 + ret = qp->device->ops.counter_bind_qp(counter, qp); 132 + mutex_unlock(&counter->lock); 133 + 134 + return ret; 135 + } 136 + 137 + static int __rdma_counter_unbind_qp(struct ib_qp *qp) 138 + { 139 + struct rdma_counter *counter = qp->counter; 140 + int ret; 141 + 142 + if (!qp->device->ops.counter_unbind_qp) 143 + return -EOPNOTSUPP; 144 + 145 + mutex_lock(&counter->lock); 146 + ret = qp->device->ops.counter_unbind_qp(qp); 147 + mutex_unlock(&counter->lock); 148 + 149 + return ret; 150 + } 151 + 152 + /** 153 + * rdma_get_counter_auto_mode - Find the counter that @qp should be bound 154 + * with in auto mode 155 + * 156 + * Return: The counter (with ref-count increased) if found 157 + */ 158 + static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp, 159 + u8 port) 160 + { 161 + struct rdma_port_counter *port_counter; 162 + struct rdma_counter *counter = NULL; 163 + struct ib_device *dev = qp->device; 164 + struct rdma_restrack_entry *res; 165 + struct rdma_restrack_root *rt; 166 + unsigned long id = 0; 167 + 168 + port_counter = &dev->port_data[port].port_counter; 169 + rt = &dev->res[RDMA_RESTRACK_COUNTER]; 170 + xa_lock(&rt->xa); 171 + xa_for_each(&rt->xa, id, res) { 172 + if (!rdma_is_visible_in_pid_ns(res)) 173 + continue; 174 + 175 + counter = container_of(res, struct rdma_counter, res); 176 + if ((counter->device != qp->device) || (counter->port != port)) 177 + goto next; 178 + 179 + if (auto_mode_match(qp, counter, port_counter->mode.mask)) 180 + break; 181 + next: 182 + counter = NULL; 183 + } 184 + 185 + if (counter && !kref_get_unless_zero(&counter->kref)) 186 + counter = NULL; 187 + 188 + xa_unlock(&rt->xa); 189 + return counter; 190 + } 191 + 192 + static void rdma_counter_res_add(struct rdma_counter *counter, 193 + struct ib_qp *qp) 194 + { 195 + if (rdma_is_kernel_res(&qp->res)) { 196 + rdma_restrack_set_task(&counter->res, qp->res.kern_name); 197 + rdma_restrack_kadd(&counter->res); 198 + } else { 199 + rdma_restrack_attach_task(&counter->res, qp->res.task); 200 + rdma_restrack_uadd(&counter->res); 201 + } 202 + } 203 + 204 + static void counter_release(struct kref *kref) 205 + { 206 + struct rdma_counter *counter; 207 + 208 + counter = container_of(kref, struct rdma_counter, kref); 209 + counter->device->ops.counter_dealloc(counter); 210 + rdma_counter_free(counter); 211 + } 212 + 213 + /** 214 + * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on 215 + * the auto-mode rule 216 + */ 217 + int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) 218 + { 219 + struct rdma_port_counter *port_counter; 220 + struct ib_device *dev = qp->device; 221 + struct rdma_counter *counter; 222 + int ret; 223 + 224 + if (!rdma_is_port_valid(dev, port)) 225 + return -EINVAL; 226 + 227 + port_counter = &dev->port_data[port].port_counter; 228 + if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO) 229 + return 0; 230 + 231 + counter = rdma_get_counter_auto_mode(qp, port); 232 + if (counter) { 233 + ret = __rdma_counter_bind_qp(counter, qp); 234 + if (ret) { 235 + kref_put(&counter->kref, counter_release); 236 + return ret; 237 + } 238 + } else { 239 + counter = rdma_counter_alloc(dev, port, RDMA_COUNTER_MODE_AUTO); 240 + if (!counter) 241 + return -ENOMEM; 242 + 243 + auto_mode_init_counter(counter, qp, port_counter->mode.mask); 244 + 245 + ret = __rdma_counter_bind_qp(counter, qp); 246 + if (ret) { 247 + rdma_counter_free(counter); 248 + return ret; 249 + } 250 + 251 + rdma_counter_res_add(counter, qp); 252 + } 253 + 254 + return 0; 255 + } 256 + 257 + /** 258 + * rdma_counter_unbind_qp - Unbind a qp from a counter 259 + * @force: 260 + * true - Decrease the counter ref-count anyway (e.g., qp destroy) 261 + */ 262 + int rdma_counter_unbind_qp(struct ib_qp *qp, bool force) 263 + { 264 + struct rdma_counter *counter = qp->counter; 265 + int ret; 266 + 267 + if (!counter) 268 + return -EINVAL; 269 + 270 + ret = __rdma_counter_unbind_qp(qp); 271 + if (ret && !force) 272 + return ret; 273 + 274 + kref_put(&counter->kref, counter_release); 275 + return 0; 276 + } 277 + 57 278 void rdma_counter_init(struct ib_device *dev) 58 279 { 59 280 struct rdma_port_counter *port_counter;
+3
drivers/infiniband/core/device.c
··· 2471 2471 SET_DEVICE_OP(dev_ops, alloc_xrcd); 2472 2472 SET_DEVICE_OP(dev_ops, attach_mcast); 2473 2473 SET_DEVICE_OP(dev_ops, check_mr_status); 2474 + SET_DEVICE_OP(dev_ops, counter_bind_qp); 2475 + SET_DEVICE_OP(dev_ops, counter_dealloc); 2476 + SET_DEVICE_OP(dev_ops, counter_unbind_qp); 2474 2477 SET_DEVICE_OP(dev_ops, create_ah); 2475 2478 SET_DEVICE_OP(dev_ops, create_counters); 2476 2479 SET_DEVICE_OP(dev_ops, create_cq);
+9
drivers/infiniband/core/verbs.c
··· 1690 1690 } 1691 1691 } 1692 1692 1693 + /* 1694 + * Bind this qp to a counter automatically based on the rdma counter 1695 + * rules. This only set in RST2INIT with port specified 1696 + */ 1697 + if (!qp->counter && (attr_mask & IB_QP_PORT) && 1698 + ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT)) 1699 + rdma_counter_bind_qp_auto(qp, attr->port_num); 1700 + 1693 1701 ret = ib_security_modify_qp(qp, attr, attr_mask, udata); 1694 1702 if (ret) 1695 1703 goto out; ··· 1893 1885 if (!qp->uobject) 1894 1886 rdma_rw_cleanup_mrs(qp); 1895 1887 1888 + rdma_counter_unbind_qp(qp, true); 1896 1889 rdma_restrack_del(&qp->res); 1897 1890 ret = qp->device->ops.destroy_qp(qp, udata); 1898 1891 if (!ret) {
+18
include/rdma/ib_verbs.h
··· 1698 1698 * Implementation details of the RDMA core, don't use in drivers: 1699 1699 */ 1700 1700 struct rdma_restrack_entry res; 1701 + 1702 + /* The counter the qp is bind to */ 1703 + struct rdma_counter *counter; 1701 1704 }; 1702 1705 1703 1706 struct ib_dm { ··· 2488 2485 u8 pdata_len); 2489 2486 int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog); 2490 2487 int (*iw_destroy_listen)(struct iw_cm_id *cm_id); 2488 + /** 2489 + * counter_bind_qp - Bind a QP to a counter. 2490 + * @counter - The counter to be bound. If counter->id is zero then 2491 + * the driver needs to allocate a new counter and set counter->id 2492 + */ 2493 + int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp); 2494 + /** 2495 + * counter_unbind_qp - Unbind the qp from the dynamically-allocated 2496 + * counter and bind it onto the default one 2497 + */ 2498 + int (*counter_unbind_qp)(struct ib_qp *qp); 2499 + /** 2500 + * counter_dealloc -De-allocate the hw counter 2501 + */ 2502 + int (*counter_dealloc)(struct rdma_counter *counter); 2491 2503 2492 2504 DECLARE_RDMA_OBJ_SIZE(ib_ah); 2493 2505 DECLARE_RDMA_OBJ_SIZE(ib_cq);
+8
include/rdma/rdma_counter.h
··· 7 7 #define _RDMA_COUNTER_H_ 8 8 9 9 #include <linux/mutex.h> 10 + #include <linux/pid_namespace.h> 10 11 11 12 #include <rdma/ib_verbs.h> 12 13 #include <rdma/restrack.h> 13 14 #include <rdma/rdma_netlink.h> 15 + 16 + struct ib_qp; 14 17 15 18 struct auto_mode_param { 16 19 int qp_type; ··· 34 31 struct rdma_restrack_entry res; 35 32 struct ib_device *device; 36 33 uint32_t id; 34 + struct kref kref; 35 + struct rdma_counter_mode mode; 36 + struct mutex lock; 37 37 u8 port; 38 38 }; 39 39 ··· 44 38 void rdma_counter_release(struct ib_device *dev); 45 39 int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, 46 40 bool on, enum rdma_nl_counter_mask mask); 41 + int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port); 42 + int rdma_counter_unbind_qp(struct ib_qp *qp, bool force); 47 43 48 44 #endif /* _RDMA_COUNTER_H_ */