Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
IB/mlx4: Fix MTT leakage in resize CQ
IB/ehca: Fix problem with generated flush work completions
IB/ehca: Change misleading error message on memory hotplug
mlx4_core: Save/restore default port IB capability mask

+110 -34
+3 -1
drivers/infiniband/hw/ehca/ehca_classes.h
··· 163 /* struct for tracking if cqes have been reported to the application */ 164 struct ehca_qmap_entry { 165 u16 app_wr_id; 166 - u16 reported; 167 }; 168 169 struct ehca_queue_map { ··· 172 unsigned int entries; 173 unsigned int tail; 174 unsigned int left_to_poll; 175 }; 176 177 struct ehca_qp {
··· 163 /* struct for tracking if cqes have been reported to the application */ 164 struct ehca_qmap_entry { 165 u16 app_wr_id; 166 + u8 reported; 167 + u8 cqe_req; 168 }; 169 170 struct ehca_queue_map { ··· 171 unsigned int entries; 172 unsigned int tail; 173 unsigned int left_to_poll; 174 + unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ 175 }; 176 177 struct ehca_qp {
+1 -2
drivers/infiniband/hw/ehca/ehca_main.c
··· 994 if (printk_timed_ratelimit(&ehca_dmem_warn_time, 995 30 * 1000)) 996 ehca_gen_err("DMEM operations are not allowed" 997 - "as long as an ehca adapter is" 998 - "attached to the LPAR"); 999 return NOTIFY_BAD; 1000 } 1001 }
··· 994 if (printk_timed_ratelimit(&ehca_dmem_warn_time, 995 30 * 1000)) 996 ehca_gen_err("DMEM operations are not allowed" 997 + "in conjunction with eHCA"); 998 return NOTIFY_BAD; 999 } 1000 }
+20 -6
drivers/infiniband/hw/ehca/ehca_qp.c
··· 435 { 436 int i; 437 438 - qmap->tail = 0; 439 - for (i = 0; i < qmap->entries; i++) 440 qmap->map[i].reported = 1; 441 } 442 443 /* ··· 1125 void *wqe_v; 1126 u64 q_ofs; 1127 u32 wqe_idx; 1128 1129 /* convert real to abs address */ 1130 wqe_p = wqe_p & (~(1UL << 63)); ··· 1138 return -EFAULT; 1139 } 1140 1141 wqe_idx = q_ofs / ipz_queue->qe_size; 1142 - if (wqe_idx < qmap->tail) 1143 - qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx; 1144 - else 1145 - qmap->left_to_poll = wqe_idx - qmap->tail; 1146 1147 return 0; 1148 } 1149 ··· 1195 } else { 1196 spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); 1197 my_qp->sq_map.left_to_poll = 0; 1198 spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); 1199 1200 spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); 1201 my_qp->rq_map.left_to_poll = 0; 1202 spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); 1203 } 1204
··· 435 { 436 int i; 437 438 + qmap->tail = qmap->entries - 1; 439 + qmap->left_to_poll = 0; 440 + qmap->next_wqe_idx = 0; 441 + for (i = 0; i < qmap->entries; i++) { 442 qmap->map[i].reported = 1; 443 + qmap->map[i].cqe_req = 0; 444 + } 445 } 446 447 /* ··· 1121 void *wqe_v; 1122 u64 q_ofs; 1123 u32 wqe_idx; 1124 + unsigned int tail_idx; 1125 1126 /* convert real to abs address */ 1127 wqe_p = wqe_p & (~(1UL << 63)); ··· 1133 return -EFAULT; 1134 } 1135 1136 + tail_idx = (qmap->tail + 1) % qmap->entries; 1137 wqe_idx = q_ofs / ipz_queue->qe_size; 1138 1139 + /* check all processed wqes, whether a cqe is requested or not */ 1140 + while (tail_idx != wqe_idx) { 1141 + if (qmap->map[tail_idx].cqe_req) 1142 + qmap->left_to_poll++; 1143 + tail_idx = (tail_idx + 1) % qmap->entries; 1144 + } 1145 + /* save index in queue, where we have to start flushing */ 1146 + qmap->next_wqe_idx = wqe_idx; 1147 return 0; 1148 } 1149 ··· 1185 } else { 1186 spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); 1187 my_qp->sq_map.left_to_poll = 0; 1188 + my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % 1189 + my_qp->sq_map.entries; 1190 spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); 1191 1192 spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); 1193 my_qp->rq_map.left_to_poll = 0; 1194 + my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % 1195 + my_qp->rq_map.entries; 1196 spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); 1197 } 1198
+33 -24
drivers/infiniband/hw/ehca/ehca_reqs.c
··· 179 180 qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); 181 qmap_entry->reported = 0; 182 183 switch (send_wr->opcode) { 184 case IB_WR_SEND: ··· 204 205 if ((send_wr->send_flags & IB_SEND_SIGNALED || 206 qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) 207 - && !hidden) 208 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; 209 210 if (send_wr->opcode == IB_WR_SEND_WITH_IMM || 211 send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { ··· 572 qmap_entry = &my_qp->rq_map.map[rq_map_idx]; 573 qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); 574 qmap_entry->reported = 0; 575 576 wqe_cnt++; 577 } /* eof for cur_recv_wr */ ··· 710 goto repoll; 711 wc->qp = &my_qp->ib_qp; 712 713 - if (is_error) { 714 - /* 715 - * set left_to_poll to 0 because in error state, we will not 716 - * get any additional CQEs 717 - */ 718 - ehca_add_to_err_list(my_qp, 1); 719 - my_qp->sq_map.left_to_poll = 0; 720 - 721 - if (HAS_RQ(my_qp)) 722 - ehca_add_to_err_list(my_qp, 0); 723 - my_qp->rq_map.left_to_poll = 0; 724 - } 725 - 726 qmap_tail_idx = get_app_wr_id(cqe->work_request_id); 727 if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) 728 /* We got a send completion. */ ··· 717 else 718 /* We got a receive completion. */ 719 qmap = &my_qp->rq_map; 720 721 qmap_entry = &qmap->map[qmap_tail_idx]; 722 if (qmap_entry->reported) { ··· 748 749 wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); 750 qmap_entry->reported = 1; 751 - 752 - /* this is a proper completion, we need to advance the tail pointer */ 753 - if (++qmap->tail == qmap->entries) 754 - qmap->tail = 0; 755 756 /* if left_to_poll is decremented to 0, add the QP to the error list */ 757 if (qmap->left_to_poll > 0) { ··· 812 else 813 qmap = &my_qp->rq_map; 814 815 - qmap_entry = &qmap->map[qmap->tail]; 816 817 while ((nr < num_entries) && (qmap_entry->reported == 0)) { 818 /* generate flush CQE */ 819 memset(wc, 0, sizeof(*wc)); 820 821 - offset = qmap->tail * ipz_queue->qe_size; 822 wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); 823 if (!wqe) { 824 ehca_err(cq->device, "Invalid wqe offset=%#lx on " ··· 858 859 wc->qp = &my_qp->ib_qp; 860 861 - /* mark as reported and advance tail pointer */ 862 qmap_entry->reported = 1; 863 - if (++qmap->tail == qmap->entries) 864 - qmap->tail = 0; 865 - qmap_entry = &qmap->map[qmap->tail]; 866 867 wc++; nr++; 868 }
··· 179 180 qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); 181 qmap_entry->reported = 0; 182 + qmap_entry->cqe_req = 0; 183 184 switch (send_wr->opcode) { 185 case IB_WR_SEND: ··· 203 204 if ((send_wr->send_flags & IB_SEND_SIGNALED || 205 qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) 206 + && !hidden) { 207 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; 208 + qmap_entry->cqe_req = 1; 209 + } 210 211 if (send_wr->opcode == IB_WR_SEND_WITH_IMM || 212 send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { ··· 569 qmap_entry = &my_qp->rq_map.map[rq_map_idx]; 570 qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); 571 qmap_entry->reported = 0; 572 + qmap_entry->cqe_req = 1; 573 574 wqe_cnt++; 575 } /* eof for cur_recv_wr */ ··· 706 goto repoll; 707 wc->qp = &my_qp->ib_qp; 708 709 qmap_tail_idx = get_app_wr_id(cqe->work_request_id); 710 if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) 711 /* We got a send completion. */ ··· 726 else 727 /* We got a receive completion. */ 728 qmap = &my_qp->rq_map; 729 + 730 + /* advance the tail pointer */ 731 + qmap->tail = qmap_tail_idx; 732 + 733 + if (is_error) { 734 + /* 735 + * set left_to_poll to 0 because in error state, we will not 736 + * get any additional CQEs 737 + */ 738 + my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % 739 + my_qp->sq_map.entries; 740 + my_qp->sq_map.left_to_poll = 0; 741 + ehca_add_to_err_list(my_qp, 1); 742 + 743 + my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % 744 + my_qp->rq_map.entries; 745 + my_qp->rq_map.left_to_poll = 0; 746 + if (HAS_RQ(my_qp)) 747 + ehca_add_to_err_list(my_qp, 0); 748 + } 749 750 qmap_entry = &qmap->map[qmap_tail_idx]; 751 if (qmap_entry->reported) { ··· 737 738 wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); 739 qmap_entry->reported = 1; 740 741 /* if left_to_poll is decremented to 0, add the QP to the error list */ 742 if (qmap->left_to_poll > 0) { ··· 805 else 806 qmap = &my_qp->rq_map; 807 808 + qmap_entry = &qmap->map[qmap->next_wqe_idx]; 809 810 while ((nr < num_entries) && (qmap_entry->reported == 0)) { 811 /* generate flush CQE */ 812 + 813 memset(wc, 0, sizeof(*wc)); 814 815 + offset = qmap->next_wqe_idx * ipz_queue->qe_size; 816 wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); 817 if (!wqe) { 818 ehca_err(cq->device, "Invalid wqe offset=%#lx on " ··· 850 851 wc->qp = &my_qp->ib_qp; 852 853 + /* mark as reported and advance next_wqe pointer */ 854 qmap_entry->reported = 1; 855 + qmap->next_wqe_idx++; 856 + if (qmap->next_wqe_idx == qmap->entries) 857 + qmap->next_wqe_idx = 0; 858 + qmap_entry = &qmap->map[qmap->next_wqe_idx]; 859 860 wc++; nr++; 861 }
+5
drivers/infiniband/hw/mlx4/cq.c
··· 343 { 344 struct mlx4_ib_dev *dev = to_mdev(ibcq->device); 345 struct mlx4_ib_cq *cq = to_mcq(ibcq); 346 int outst_cqe; 347 int err; 348 ··· 377 goto out; 378 } 379 380 err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt); 381 if (err) 382 goto err_buf; 383 384 if (ibcq->uobject) { 385 cq->buf = cq->resize_buf->buf; 386 cq->ibcq.cqe = cq->resize_buf->cqe; ··· 410 goto out; 411 412 err_buf: 413 if (!ibcq->uobject) 414 mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf, 415 cq->resize_buf->cqe);
··· 343 { 344 struct mlx4_ib_dev *dev = to_mdev(ibcq->device); 345 struct mlx4_ib_cq *cq = to_mcq(ibcq); 346 + struct mlx4_mtt mtt; 347 int outst_cqe; 348 int err; 349 ··· 376 goto out; 377 } 378 379 + mtt = cq->buf.mtt; 380 + 381 err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt); 382 if (err) 383 goto err_buf; 384 385 + mlx4_mtt_cleanup(dev->dev, &mtt); 386 if (ibcq->uobject) { 387 cq->buf = cq->resize_buf->buf; 388 cq->ibcq.cqe = cq->resize_buf->cqe; ··· 406 goto out; 407 408 err_buf: 409 + mlx4_mtt_cleanup(dev->dev, &cq->resize_buf->buf.mtt); 410 if (!ibcq->uobject) 411 mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf, 412 cq->resize_buf->cqe);
+8
drivers/net/mlx4/main.c
··· 753 struct mlx4_priv *priv = mlx4_priv(dev); 754 int err; 755 int port; 756 757 err = mlx4_init_uar_table(dev); 758 if (err) { ··· 853 } 854 855 for (port = 1; port <= dev->caps.num_ports; port++) { 856 err = mlx4_SET_PORT(dev, port); 857 if (err) { 858 mlx4_err(dev, "Failed to set port %d, aborting\n",
··· 753 struct mlx4_priv *priv = mlx4_priv(dev); 754 int err; 755 int port; 756 + __be32 ib_port_default_caps; 757 758 err = mlx4_init_uar_table(dev); 759 if (err) { ··· 852 } 853 854 for (port = 1; port <= dev->caps.num_ports; port++) { 855 + ib_port_default_caps = 0; 856 + err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps); 857 + if (err) 858 + mlx4_warn(dev, "failed to get port %d default " 859 + "ib capabilities (%d). Continuing with " 860 + "caps = 0\n", port, err); 861 + dev->caps.ib_port_def_cap[port] = ib_port_default_caps; 862 err = mlx4_SET_PORT(dev, port); 863 if (err) { 864 mlx4_err(dev, "Failed to set port %d, aborting\n",
+1
drivers/net/mlx4/mlx4.h
··· 385 void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); 386 387 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port); 388 389 #endif /* MLX4_H */
··· 385 void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); 386 387 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port); 388 + int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps); 389 390 #endif /* MLX4_H */
+38 -1
drivers/net/mlx4/port.c
··· 258 } 259 EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); 260 261 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) 262 { 263 struct mlx4_cmd_mailbox *mailbox; ··· 309 ((u8 *) mailbox->buf)[3] = 6; 310 ((__be16 *) mailbox->buf)[4] = cpu_to_be16(1 << 15); 311 ((__be16 *) mailbox->buf)[6] = cpu_to_be16(1 << 15); 312 - } 313 err = mlx4_cmd(dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, 314 MLX4_CMD_TIME_CLASS_B); 315
··· 258 } 259 EXPORT_SYMBOL_GPL(mlx4_unregister_vlan); 260 261 + int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) 262 + { 263 + struct mlx4_cmd_mailbox *inmailbox, *outmailbox; 264 + u8 *inbuf, *outbuf; 265 + int err; 266 + 267 + inmailbox = mlx4_alloc_cmd_mailbox(dev); 268 + if (IS_ERR(inmailbox)) 269 + return PTR_ERR(inmailbox); 270 + 271 + outmailbox = mlx4_alloc_cmd_mailbox(dev); 272 + if (IS_ERR(outmailbox)) { 273 + mlx4_free_cmd_mailbox(dev, inmailbox); 274 + return PTR_ERR(outmailbox); 275 + } 276 + 277 + inbuf = inmailbox->buf; 278 + outbuf = outmailbox->buf; 279 + memset(inbuf, 0, 256); 280 + memset(outbuf, 0, 256); 281 + inbuf[0] = 1; 282 + inbuf[1] = 1; 283 + inbuf[2] = 1; 284 + inbuf[3] = 1; 285 + *(__be16 *) (&inbuf[16]) = cpu_to_be16(0x0015); 286 + *(__be32 *) (&inbuf[20]) = cpu_to_be32(port); 287 + 288 + err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3, 289 + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C); 290 + if (!err) 291 + *caps = *(__be32 *) (outbuf + 84); 292 + mlx4_free_cmd_mailbox(dev, inmailbox); 293 + mlx4_free_cmd_mailbox(dev, outmailbox); 294 + return err; 295 + } 296 + 297 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) 298 { 299 struct mlx4_cmd_mailbox *mailbox; ··· 273 ((u8 *) mailbox->buf)[3] = 6; 274 ((__be16 *) mailbox->buf)[4] = cpu_to_be16(1 << 15); 275 ((__be16 *) mailbox->buf)[6] = cpu_to_be16(1 << 15); 276 + } else 277 + ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port]; 278 err = mlx4_cmd(dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, 279 MLX4_CMD_TIME_CLASS_B); 280
+1
include/linux/mlx4/device.h
··· 179 int num_ports; 180 int vl_cap[MLX4_MAX_PORTS + 1]; 181 int ib_mtu_cap[MLX4_MAX_PORTS + 1]; 182 u64 def_mac[MLX4_MAX_PORTS + 1]; 183 int eth_mtu_cap[MLX4_MAX_PORTS + 1]; 184 int gid_table_len[MLX4_MAX_PORTS + 1];
··· 179 int num_ports; 180 int vl_cap[MLX4_MAX_PORTS + 1]; 181 int ib_mtu_cap[MLX4_MAX_PORTS + 1]; 182 + __be32 ib_port_def_cap[MLX4_MAX_PORTS + 1]; 183 u64 def_mac[MLX4_MAX_PORTS + 1]; 184 int eth_mtu_cap[MLX4_MAX_PORTS + 1]; 185 int gid_table_len[MLX4_MAX_PORTS + 1];