Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/mlx4: Add support for resizing CQs

Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by

Vladimir Sokolovsky and committed by
Roland Dreier
bbf8eed1 3fdcb97f

+300 -33
+259 -33
drivers/infiniband/hw/mlx4/cq.c
··· 93 93 return mlx4_cq_modify(dev->dev, &mcq->mcq, cq_count, cq_period); 94 94 } 95 95 96 + static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent) 97 + { 98 + int err; 99 + 100 + err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe), 101 + PAGE_SIZE * 2, &buf->buf); 102 + 103 + if (err) 104 + goto out; 105 + 106 + err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, 107 + &buf->mtt); 108 + if (err) 109 + goto err_buf; 110 + 111 + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); 112 + if (err) 113 + goto err_mtt; 114 + 115 + return 0; 116 + 117 + err_mtt: 118 + mlx4_mtt_cleanup(dev->dev, &buf->mtt); 119 + 120 + err_buf: 121 + mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe), 122 + &buf->buf); 123 + 124 + out: 125 + return err; 126 + } 127 + 128 + static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) 129 + { 130 + mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf); 131 + } 132 + 133 + static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, 134 + struct mlx4_ib_cq_buf *buf, struct ib_umem **umem, 135 + u64 buf_addr, int cqe) 136 + { 137 + int err; 138 + 139 + *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), 140 + IB_ACCESS_LOCAL_WRITE); 141 + if (IS_ERR(*umem)) 142 + return PTR_ERR(*umem); 143 + 144 + err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem), 145 + ilog2((*umem)->page_size), &buf->mtt); 146 + if (err) 147 + goto err_buf; 148 + 149 + err = mlx4_ib_umem_write_mtt(dev, &buf->mtt, *umem); 150 + if (err) 151 + goto err_mtt; 152 + 153 + return 0; 154 + 155 + err_mtt: 156 + mlx4_mtt_cleanup(dev->dev, &buf->mtt); 157 + 158 + err_buf: 159 + ib_umem_release(*umem); 160 + 161 + return err; 162 + } 163 + 96 164 struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, 97 165 struct ib_ucontext *context, 98 166 struct ib_udata *udata) ··· 168 100 struct mlx4_ib_dev *dev = to_mdev(ibdev); 169 101 struct mlx4_ib_cq *cq; 170 102 struct mlx4_uar *uar; 171 - int buf_size; 172 103 int err; 173 104 174 105 if (entries < 1 || entries > dev->dev->caps.max_cqes) ··· 179 112 180 113 entries = roundup_pow_of_two(entries + 1); 181 114 cq->ibcq.cqe = entries - 1; 182 - buf_size = entries * sizeof (struct mlx4_cqe); 115 + mutex_init(&cq->resize_mutex); 183 116 spin_lock_init(&cq->lock); 117 + cq->resize_buf = NULL; 118 + cq->resize_umem = NULL; 184 119 185 120 if (context) { 186 121 struct mlx4_ib_create_cq ucmd; ··· 192 123 goto err_cq; 193 124 } 194 125 195 - cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size, 196 - IB_ACCESS_LOCAL_WRITE); 197 - if (IS_ERR(cq->umem)) { 198 - err = PTR_ERR(cq->umem); 126 + err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem, 127 + ucmd.buf_addr, entries); 128 + if (err) 199 129 goto err_cq; 200 - } 201 - 202 - err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem), 203 - ilog2(cq->umem->page_size), &cq->buf.mtt); 204 - if (err) 205 - goto err_buf; 206 - 207 - err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem); 208 - if (err) 209 - goto err_mtt; 210 130 211 131 err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr, 212 132 &cq->db); ··· 213 155 *cq->mcq.set_ci_db = 0; 214 156 *cq->mcq.arm_db = 0; 215 157 216 - if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) { 217 - err = -ENOMEM; 158 + err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries); 159 + if (err) 218 160 goto err_db; 219 - } 220 - 221 - err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift, 222 - &cq->buf.mtt); 223 - if (err) 224 - goto err_buf; 225 - 226 - err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf); 227 - if (err) 228 - goto err_mtt; 229 161 230 162 uar = &dev->priv_uar; 231 163 } ··· 243 195 err_mtt: 244 196 mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); 245 197 246 - err_buf: 247 198 if (context) 248 199 ib_umem_release(cq->umem); 249 200 else 250 - mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe), 251 - &cq->buf.buf); 201 + mlx4_ib_free_cq_buf(dev, &cq->buf, entries); 252 202 253 203 err_db: 254 204 if (!context) ··· 256 210 kfree(cq); 257 211 258 212 return ERR_PTR(err); 213 + } 214 + 215 + static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, 216 + int entries) 217 + { 218 + int err; 219 + 220 + if (cq->resize_buf) 221 + return -EBUSY; 222 + 223 + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); 224 + if (!cq->resize_buf) 225 + return -ENOMEM; 226 + 227 + err = mlx4_ib_alloc_cq_buf(dev, &cq->resize_buf->buf, entries); 228 + if (err) { 229 + kfree(cq->resize_buf); 230 + cq->resize_buf = NULL; 231 + return err; 232 + } 233 + 234 + cq->resize_buf->cqe = entries - 1; 235 + 236 + return 0; 237 + } 238 + 239 + static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, 240 + int entries, struct ib_udata *udata) 241 + { 242 + struct mlx4_ib_resize_cq ucmd; 243 + int err; 244 + 245 + if (cq->resize_umem) 246 + return -EBUSY; 247 + 248 + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) 249 + return -EFAULT; 250 + 251 + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); 252 + if (!cq->resize_buf) 253 + return -ENOMEM; 254 + 255 + err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf, 256 + &cq->resize_umem, ucmd.buf_addr, entries); 257 + if (err) { 258 + kfree(cq->resize_buf); 259 + cq->resize_buf = NULL; 260 + return err; 261 + } 262 + 263 + cq->resize_buf->cqe = entries - 1; 264 + 265 + return 0; 266 + } 267 + 268 + static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq) 269 + { 270 + u32 i; 271 + 272 + i = cq->mcq.cons_index; 273 + while (get_sw_cqe(cq, i & cq->ibcq.cqe)) 274 + ++i; 275 + 276 + return i - cq->mcq.cons_index; 277 + } 278 + 279 + static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) 280 + { 281 + struct mlx4_cqe *cqe; 282 + int i; 283 + 284 + i = cq->mcq.cons_index; 285 + cqe = get_cqe(cq, i & cq->ibcq.cqe); 286 + while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { 287 + memcpy(get_cqe_from_buf(&cq->resize_buf->buf, 288 + (i + 1) & cq->resize_buf->cqe), 289 + get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); 290 + cqe = get_cqe(cq, ++i & cq->ibcq.cqe); 291 + } 292 + ++cq->mcq.cons_index; 293 + } 294 + 295 + int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) 296 + { 297 + struct mlx4_ib_dev *dev = to_mdev(ibcq->device); 298 + struct mlx4_ib_cq *cq = to_mcq(ibcq); 299 + int outst_cqe; 300 + int err; 301 + 302 + mutex_lock(&cq->resize_mutex); 303 + 304 + if (entries < 1 || entries > dev->dev->caps.max_cqes) { 305 + err = -EINVAL; 306 + goto out; 307 + } 308 + 309 + entries = roundup_pow_of_two(entries + 1); 310 + if (entries == ibcq->cqe + 1) { 311 + err = 0; 312 + goto out; 313 + } 314 + 315 + if (ibcq->uobject) { 316 + err = mlx4_alloc_resize_umem(dev, cq, entries, udata); 317 + if (err) 318 + goto out; 319 + } else { 320 + /* Can't be smaller then the number of outstanding CQEs */ 321 + outst_cqe = mlx4_ib_get_outstanding_cqes(cq); 322 + if (entries < outst_cqe + 1) { 323 + err = 0; 324 + goto out; 325 + } 326 + 327 + err = mlx4_alloc_resize_buf(dev, cq, entries); 328 + if (err) 329 + goto out; 330 + } 331 + 332 + err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt); 333 + if (err) 334 + goto err_buf; 335 + 336 + if (ibcq->uobject) { 337 + cq->buf = cq->resize_buf->buf; 338 + cq->ibcq.cqe = cq->resize_buf->cqe; 339 + ib_umem_release(cq->umem); 340 + cq->umem = cq->resize_umem; 341 + 342 + kfree(cq->resize_buf); 343 + cq->resize_buf = NULL; 344 + cq->resize_umem = NULL; 345 + } else { 346 + spin_lock_irq(&cq->lock); 347 + if (cq->resize_buf) { 348 + mlx4_ib_cq_resize_copy_cqes(cq); 349 + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); 350 + cq->buf = cq->resize_buf->buf; 351 + cq->ibcq.cqe = cq->resize_buf->cqe; 352 + 353 + kfree(cq->resize_buf); 354 + cq->resize_buf = NULL; 355 + } 356 + spin_unlock_irq(&cq->lock); 357 + } 358 + 359 + goto out; 360 + 361 + err_buf: 362 + if (!ibcq->uobject) 363 + mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf, 364 + cq->resize_buf->cqe); 365 + 366 + kfree(cq->resize_buf); 367 + cq->resize_buf = NULL; 368 + 369 + if (cq->resize_umem) { 370 + ib_umem_release(cq->resize_umem); 371 + cq->resize_umem = NULL; 372 + } 373 + 374 + out: 375 + mutex_unlock(&cq->resize_mutex); 376 + return err; 259 377 } 260 378 261 379 int mlx4_ib_destroy_cq(struct ib_cq *cq) ··· 434 224 mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); 435 225 ib_umem_release(mcq->umem); 436 226 } else { 437 - mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe), 438 - &mcq->buf.buf); 227 + mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe + 1); 439 228 mlx4_ib_db_free(dev, &mcq->db); 440 229 } 441 230 ··· 541 332 u32 g_mlpath_rqpn; 542 333 u16 wqe_ctr; 543 334 335 + repoll: 544 336 cqe = next_cqe_sw(cq); 545 337 if (!cqe) 546 338 return -EAGAIN; ··· 562 352 is_send)) { 563 353 printk(KERN_WARNING "Completion for NOP opcode detected!\n"); 564 354 return -EINVAL; 355 + } 356 + 357 + /* Resize CQ in progress */ 358 + if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) { 359 + if (cq->resize_buf) { 360 + struct mlx4_ib_dev *dev = to_mdev(cq->ibcq.device); 361 + 362 + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); 363 + cq->buf = cq->resize_buf->buf; 364 + cq->ibcq.cqe = cq->resize_buf->cqe; 365 + 366 + kfree(cq->resize_buf); 367 + cq->resize_buf = NULL; 368 + } 369 + 370 + goto repoll; 565 371 } 566 372 567 373 if (!*cur_qp ||
+2
drivers/infiniband/hw/mlx4/main.c
··· 571 571 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 572 572 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 573 573 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 574 + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 574 575 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 575 576 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 576 577 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | ··· 611 610 ibdev->ib_dev.post_recv = mlx4_ib_post_recv; 612 611 ibdev->ib_dev.create_cq = mlx4_ib_create_cq; 613 612 ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; 613 + ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; 614 614 ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; 615 615 ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; 616 616 ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
+9
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 78 78 struct mlx4_mtt mtt; 79 79 }; 80 80 81 + struct mlx4_ib_cq_resize { 82 + struct mlx4_ib_cq_buf buf; 83 + int cqe; 84 + }; 85 + 81 86 struct mlx4_ib_cq { 82 87 struct ib_cq ibcq; 83 88 struct mlx4_cq mcq; 84 89 struct mlx4_ib_cq_buf buf; 90 + struct mlx4_ib_cq_resize *resize_buf; 85 91 struct mlx4_ib_db db; 86 92 spinlock_t lock; 93 + struct mutex resize_mutex; 87 94 struct ib_umem *umem; 95 + struct ib_umem *resize_umem; 88 96 }; 89 97 90 98 struct mlx4_ib_mr { ··· 263 255 int mlx4_ib_dereg_mr(struct ib_mr *mr); 264 256 265 257 int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); 258 + int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); 266 259 struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, 267 260 struct ib_ucontext *context, 268 261 struct ib_udata *udata);
+28
drivers/net/mlx4/cq.c
··· 159 159 } 160 160 EXPORT_SYMBOL_GPL(mlx4_cq_modify); 161 161 162 + int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq, 163 + int entries, struct mlx4_mtt *mtt) 164 + { 165 + struct mlx4_cmd_mailbox *mailbox; 166 + struct mlx4_cq_context *cq_context; 167 + u64 mtt_addr; 168 + int err; 169 + 170 + mailbox = mlx4_alloc_cmd_mailbox(dev); 171 + if (IS_ERR(mailbox)) 172 + return PTR_ERR(mailbox); 173 + 174 + cq_context = mailbox->buf; 175 + memset(cq_context, 0, sizeof *cq_context); 176 + 177 + cq_context->logsize_usrpage = cpu_to_be32(ilog2(entries) << 24); 178 + cq_context->log_page_size = mtt->page_shift - 12; 179 + mtt_addr = mlx4_mtt_addr(dev, mtt); 180 + cq_context->mtt_base_addr_h = mtt_addr >> 32; 181 + cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); 182 + 183 + err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 1); 184 + 185 + mlx4_free_cmd_mailbox(dev, mailbox); 186 + return err; 187 + } 188 + EXPORT_SYMBOL_GPL(mlx4_cq_resize); 189 + 162 190 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, 163 191 struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq) 164 192 {
+2
include/linux/mlx4/cq.h
··· 132 132 133 133 int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq, 134 134 u16 count, u16 period); 135 + int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq, 136 + int entries, struct mlx4_mtt *mtt); 135 137 136 138 #endif /* MLX4_CQ_H */