IB/mlx4: Pass send queue sizes from userspace to kernel

Pass the number of WQEs for the send queue and their size from userspace
to the kernel to avoid having to keep the QP size calculations in sync
between the kernel driver and libmlx4. This fixes a bug seen with the
current mlx4_ib driver and current libmlx4 caused by a difference in the
calculated sizes for SQ WQEs. Also, this gives more flexibility for
userspace to experiment with using multiple WQE BBs for a single SQ WQE.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by Eli Cohen and committed by Roland Dreier 2446304d 59b0ed12

+51 -17
+47 -16
drivers/infiniband/hw/mlx4/qp.c
··· 188 } 189 } 190 191 - static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 192 - enum ib_qp_type type, struct mlx4_ib_qp *qp) 193 { 194 - /* Sanity check QP size before proceeding */ 195 if (cap->max_send_wr > dev->dev->caps.max_wqes || 196 - cap->max_recv_wr > dev->dev->caps.max_wqes || 197 cap->max_send_sge > dev->dev->caps.max_sq_sg || 198 - cap->max_recv_sge > dev->dev->caps.max_rq_sg || 199 cap->max_inline_data + send_wqe_overhead(type) + 200 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) 201 return -EINVAL; ··· 226 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 227 return -EINVAL; 228 229 - qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; 230 - qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0; 231 - 232 - qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge * 233 - sizeof (struct mlx4_wqe_data_seg))); 234 - qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg); 235 236 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * 237 sizeof (struct mlx4_wqe_data_seg), ··· 246 qp->sq.offset = 0; 247 } 248 249 - cap->max_send_wr = qp->sq.max; 250 - cap->max_recv_wr = qp->rq.max; 251 - cap->max_send_sge = qp->sq.max_gs; 252 - cap->max_recv_sge = qp->rq.max_gs; 253 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) - 254 sizeof (struct mlx4_wqe_inline_seg); 255 256 return 0; 257 } ··· 287 qp->sq.head = 0; 288 qp->sq.tail = 0; 289 290 - err = set_qp_size(dev, &init_attr->cap, init_attr->qp_type, qp); 291 if (err) 292 goto err; 293 ··· 298 err = -EFAULT; 299 goto err; 300 } 301 302 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, 303 qp->buf_size, 0); ··· 324 if (err) 325 goto err_mtt; 326 } else { 327 err = mlx4_ib_db_alloc(dev, &qp->db, 0); 328 if (err) 329 goto err;
··· 188 } 189 } 190 191 + static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 192 + struct mlx4_ib_qp *qp) 193 { 194 + /* Sanity check RQ size before proceeding */ 195 + if (cap->max_recv_wr > dev->dev->caps.max_wqes || 196 + cap->max_recv_sge > dev->dev->caps.max_rq_sg) 197 + return -EINVAL; 198 + 199 + qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; 200 + 201 + qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge * 202 + sizeof (struct mlx4_wqe_data_seg))); 203 + qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg); 204 + 205 + cap->max_recv_wr = qp->rq.max; 206 + cap->max_recv_sge = qp->rq.max_gs; 207 + 208 + return 0; 209 + } 210 + 211 + static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 212 + enum ib_qp_type type, struct mlx4_ib_qp *qp) 213 + { 214 + /* Sanity check SQ size before proceeding */ 215 if (cap->max_send_wr > dev->dev->caps.max_wqes || 216 cap->max_send_sge > dev->dev->caps.max_sq_sg || 217 cap->max_inline_data + send_wqe_overhead(type) + 218 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) 219 return -EINVAL; ··· 208 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 209 return -EINVAL; 210 211 + qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 1; 212 213 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * 214 sizeof (struct mlx4_wqe_data_seg), ··· 233 qp->sq.offset = 0; 234 } 235 236 + cap->max_send_wr = qp->sq.max; 237 + cap->max_send_sge = qp->sq.max_gs; 238 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) - 239 sizeof (struct mlx4_wqe_inline_seg); 240 + 241 + return 0; 242 + } 243 + 244 + static int set_user_sq_size(struct mlx4_ib_qp *qp, 245 + struct mlx4_ib_create_qp *ucmd) 246 + { 247 + qp->sq.max = 1 << ucmd->log_sq_bb_count; 248 + qp->sq.wqe_shift = ucmd->log_sq_stride; 249 + 250 + qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) + 251 + (qp->sq.max << qp->sq.wqe_shift); 252 253 return 0; 254 } ··· 264 qp->sq.head = 0; 265 qp->sq.tail = 0; 266 267 + err = set_rq_size(dev, &init_attr->cap, qp); 268 if (err) 269 goto err; 270 ··· 275 err = -EFAULT; 276 goto err; 277 } 278 + 279 + err = set_user_sq_size(qp, &ucmd); 280 + if (err) 281 + goto err; 282 283 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, 284 qp->buf_size, 0); ··· 297 if (err) 298 goto err_mtt; 299 } else { 300 + err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); 301 + if (err) 302 + goto err; 303 + 304 err = mlx4_ib_db_alloc(dev, &qp->db, 0); 305 if (err) 306 goto err;
+4 -1
drivers/infiniband/hw/mlx4/user.h
··· 39 * Increment this value if any changes that break userspace ABI 40 * compatibility are made. 41 */ 42 - #define MLX4_IB_UVERBS_ABI_VERSION 1 43 44 /* 45 * Make sure that all structs defined in this file remain laid out so ··· 87 struct mlx4_ib_create_qp { 88 __u64 buf_addr; 89 __u64 db_addr; 90 }; 91 92 #endif /* MLX4_IB_USER_H */
··· 39 * Increment this value if any changes that break userspace ABI 40 * compatibility are made. 41 */ 42 + #define MLX4_IB_UVERBS_ABI_VERSION 2 43 44 /* 45 * Make sure that all structs defined in this file remain laid out so ··· 87 struct mlx4_ib_create_qp { 88 __u64 buf_addr; 89 __u64 db_addr; 90 + __u8 log_sq_bb_count; 91 + __u8 log_sq_stride; 92 + __u8 reserved[6]; 93 }; 94 95 #endif /* MLX4_IB_USER_H */