Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/mlx5: Lower setting the umem's PAS for SRQ

Some of the SRQ types are created using a WQ, and the WQ requires a
different parameter set to mlx5_umem_find_best_quantized_pgoff() as it has
a 5 bit page_offset.

Add the umem to the mlx5_srq_attr and defer computing the PAS data until
the code has figured out what kind of mailbox to use. Compute the PAS
directly from the umem for each of the four unique mailbox types.

This also avoids allocating memory to store the user PAS, instead it is
written directly to the mailbox as in most other cases.

Fixes: 01949d0109ee ("net/mlx5_core: Enable XRCs and SRQs when using ISSI > 0")
Link: https://lore.kernel.org/r/20201115114311.136250-8-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

+79 -29
+2 -25
drivers/infiniband/hw/mlx5/srq.c
··· 51 51 udata, struct mlx5_ib_ucontext, ibucontext); 52 52 size_t ucmdlen; 53 53 int err; 54 - unsigned int page_offset_quantized; 55 - unsigned int page_size; 56 54 u32 uidx = MLX5_IB_DEFAULT_UIDX; 57 55 58 56 ucmdlen = min(udata->inlen, sizeof(ucmd)); ··· 82 84 err = PTR_ERR(srq->umem); 83 85 return err; 84 86 } 85 - 86 - page_size = mlx5_umem_find_best_quantized_pgoff( 87 - srq->umem, srqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT, 88 - page_offset, 64, &page_offset_quantized); 89 - if (!page_size) { 90 - mlx5_ib_warn(dev, "bad offset\n"); 91 - goto err_umem; 92 - } 93 - 94 - in->pas = kvcalloc(ib_umem_num_dma_blocks(srq->umem, page_size), 95 - sizeof(*in->pas), GFP_KERNEL); 96 - if (!in->pas) { 97 - err = -ENOMEM; 98 - goto err_umem; 99 - } 100 - 101 - mlx5_ib_populate_pas(srq->umem, page_size, in->pas, 0); 87 + in->umem = srq->umem; 102 88 103 89 err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db); 104 90 if (err) { 105 91 mlx5_ib_dbg(dev, "map doorbell failed\n"); 106 - goto err_in; 92 + goto err_umem; 107 93 } 108 94 109 - in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; 110 - in->page_offset = page_offset_quantized; 111 95 in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; 112 96 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && 113 97 in->type != IB_SRQT_BASIC) 114 98 in->user_index = uidx; 115 99 116 100 return 0; 117 - 118 - err_in: 119 - kvfree(in->pas); 120 101 121 102 err_umem: 122 103 ib_umem_release(srq->umem);
+1
drivers/infiniband/hw/mlx5/srq.h
··· 28 28 u32 user_index; 29 29 u64 db_record; 30 30 __be64 *pas; 31 + struct ib_umem *umem; 31 32 u32 tm_log_list_size; 32 33 u32 tm_next_tag; 33 34 u32 tm_hw_phase_cnt;
+76 -4
drivers/infiniband/hw/mlx5/srq_cmd.c
··· 92 92 return srq; 93 93 } 94 94 95 + static int __set_srq_page_size(struct mlx5_srq_attr *in, 96 + unsigned long page_size) 97 + { 98 + if (!page_size) 99 + return -EINVAL; 100 + in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT; 101 + 102 + if (WARN_ON(get_pas_size(in) != 103 + ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64))) 104 + return -EINVAL; 105 + return 0; 106 + } 107 + 108 + #define set_srq_page_size(in, typ, log_pgsz_fld) \ 109 + __set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff( \ 110 + (in)->umem, typ, log_pgsz_fld, \ 111 + MLX5_ADAPTER_PAGE_SHIFT, page_offset, \ 112 + 64, &(in)->page_offset)) 113 + 95 114 static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, 96 115 struct mlx5_srq_attr *in) 97 116 { ··· 121 102 int pas_size; 122 103 int inlen; 123 104 int err; 105 + 106 + if (in->umem) { 107 + err = set_srq_page_size(in, srqc, log_page_size); 108 + if (err) 109 + return err; 110 + } 124 111 125 112 pas_size = get_pas_size(in); 126 113 inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; ··· 139 114 pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); 140 115 141 116 set_srqc(srqc, in); 142 - memcpy(pas, in->pas, pas_size); 117 + if (in->umem) 118 + mlx5_ib_populate_pas( 119 + in->umem, 120 + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), 121 + pas, 0); 122 + else 123 + memcpy(pas, in->pas, pas_size); 143 124 144 125 MLX5_SET(create_srq_in, create_in, opcode, 145 126 MLX5_CMD_OP_CREATE_SRQ); ··· 225 194 int inlen; 226 195 int err; 227 196 197 + if (in->umem) { 198 + err = set_srq_page_size(in, xrc_srqc, log_page_size); 199 + if (err) 200 + return err; 201 + } 202 + 228 203 pas_size = get_pas_size(in); 229 204 inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; 230 205 create_in = kvzalloc(inlen, GFP_KERNEL); ··· 244 207 245 208 set_srqc(xrc_srqc, in); 246 209 MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); 247 - memcpy(pas, in->pas, pas_size); 210 + if (in->umem) 211 + mlx5_ib_populate_pas( 212 + in->umem, 213 + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), 214 + pas, 0); 215 + else 216 + memcpy(pas, in->pas, pas_size); 248 217 MLX5_SET(create_xrc_srq_in, create_in, opcode, 249 218 MLX5_CMD_OP_CREATE_XRC_SRQ); 250 219 ··· 332 289 void *create_in = NULL; 333 290 void *rmpc; 334 291 void *wq; 292 + void *pas; 335 293 int pas_size; 336 294 int outlen; 337 295 int inlen; 338 296 int err; 297 + 298 + if (in->umem) { 299 + err = set_srq_page_size(in, wq, log_wq_pg_sz); 300 + if (err) 301 + return err; 302 + } 339 303 340 304 pas_size = get_pas_size(in); 341 305 inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; ··· 359 309 360 310 MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); 361 311 MLX5_SET(create_rmp_in, create_in, uid, in->uid); 312 + pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas); 313 + 362 314 set_wq(wq, in); 363 - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); 315 + if (in->umem) 316 + mlx5_ib_populate_pas( 317 + in->umem, 318 + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), 319 + pas, 0); 320 + else 321 + memcpy(pas, in->pas, pas_size); 364 322 365 323 MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); 366 324 err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); ··· 479 421 void *create_in; 480 422 void *xrqc; 481 423 void *wq; 424 + void *pas; 482 425 int pas_size; 483 426 int inlen; 484 427 int err; 428 + 429 + if (in->umem) { 430 + err = set_srq_page_size(in, wq, log_wq_pg_sz); 431 + if (err) 432 + return err; 433 + } 485 434 486 435 pas_size = get_pas_size(in); 487 436 inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; ··· 498 433 499 434 xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); 500 435 wq = MLX5_ADDR_OF(xrqc, xrqc, wq); 436 + pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas); 501 437 502 438 set_wq(wq, in); 503 - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); 439 + if (in->umem) 440 + mlx5_ib_populate_pas( 441 + in->umem, 442 + 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT), 443 + pas, 0); 444 + else 445 + memcpy(pas, in->pas, pas_size); 504 446 505 447 if (in->type == IB_SRQT_TM) { 506 448 MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);