Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
MAINTAINERS: Remove Glenn Streiff from NetEffect entry
mlx4_core: Improve error message when not enough UAR pages are available
IB/mlx4: Add support for memory management extensions and local DMA L_Key
IB/mthca: Keep free count for MTT buddy allocator
mlx4_core: Keep free count for MTT buddy allocator
mlx4_code: Add missing FW status return code
IB/mlx4: Rename struct mlx4_lso_seg to mlx4_wqe_lso_seg
mlx4_core: Add module parameter to enable QoS support
RDMA/iwcm: Remove IB_ACCESS_LOCAL_WRITE from remote QP attributes
IPoIB: Include err code in trace message for ib_sa_path_rec_get() failures
IB/sa_query: Check if sm_ah is NULL in ib_sa_remove_one()
IB/ehca: Release mutex in error path of alloc_small_queue_page()
IB/ehca: Use default value for Local CA ACK Delay if FW returns 0
IB/ehca: Filter PATH_MIG events if QP was never armed
IB/iser: Add support for RDMA_CM_EVENT_ADDR_CHANGE event
RDMA/cma: Add RDMA_CM_EVENT_TIMEWAIT_EXIT event
RDMA/cma: Add RDMA_CM_EVENT_ADDR_CHANGE event

+398 -51
-2
MAINTAINERS
··· 2937 2937 M: flatif@neteffect.com 2938 2938 P: Chien Tung 2939 2939 M: ctung@neteffect.com 2940 - P: Glenn Streiff 2941 - M: gstreiff@neteffect.com 2942 2940 L: general@lists.openfabrics.org 2943 2941 W: http://www.neteffect.com 2944 2942 S: Supported
+98 -1
drivers/infiniband/core/cma.c
··· 168 168 struct rdma_cm_event event; 169 169 }; 170 170 171 + struct cma_ndev_work { 172 + struct work_struct work; 173 + struct rdma_id_private *id; 174 + struct rdma_cm_event event; 175 + }; 176 + 171 177 union cma_ip_addr { 172 178 struct in6_addr ip6; 173 179 struct { ··· 920 914 struct rdma_cm_event event; 921 915 int ret = 0; 922 916 923 - if (cma_disable_callback(id_priv, CMA_CONNECT)) 917 + if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 918 + cma_disable_callback(id_priv, CMA_CONNECT)) || 919 + (ib_event->event == IB_CM_TIMEWAIT_EXIT && 920 + cma_disable_callback(id_priv, CMA_DISCONNECT))) 924 921 return 0; 925 922 926 923 memset(&event, 0, sizeof event); ··· 959 950 event.event = RDMA_CM_EVENT_DISCONNECTED; 960 951 break; 961 952 case IB_CM_TIMEWAIT_EXIT: 953 + event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 954 + break; 962 955 case IB_CM_MRA_RECEIVED: 963 956 /* ignore event */ 964 957 goto out; ··· 1601 1590 cma_exch(id_priv, CMA_DESTROYING); 1602 1591 destroy = 1; 1603 1592 } 1593 + out: 1594 + mutex_unlock(&id_priv->handler_mutex); 1595 + cma_deref_id(id_priv); 1596 + if (destroy) 1597 + rdma_destroy_id(&id_priv->id); 1598 + kfree(work); 1599 + } 1600 + 1601 + static void cma_ndev_work_handler(struct work_struct *_work) 1602 + { 1603 + struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); 1604 + struct rdma_id_private *id_priv = work->id; 1605 + int destroy = 0; 1606 + 1607 + mutex_lock(&id_priv->handler_mutex); 1608 + if (id_priv->state == CMA_DESTROYING || 1609 + id_priv->state == CMA_DEVICE_REMOVAL) 1610 + goto out; 1611 + 1612 + if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 1613 + cma_exch(id_priv, CMA_DESTROYING); 1614 + destroy = 1; 1615 + } 1616 + 1604 1617 out: 1605 1618 mutex_unlock(&id_priv->handler_mutex); 1606 1619 cma_deref_id(id_priv); ··· 2758 2723 } 2759 2724 EXPORT_SYMBOL(rdma_leave_multicast); 2760 2725 2726 + static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) 2727 + { 2728 + struct rdma_dev_addr *dev_addr; 2729 + struct cma_ndev_work *work; 2730 + 2731 + dev_addr = &id_priv->id.route.addr.dev_addr; 2732 + 2733 + if ((dev_addr->src_dev == ndev) && 2734 + memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 2735 + printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", 2736 + ndev->name, &id_priv->id); 2737 + work = kzalloc(sizeof *work, GFP_KERNEL); 2738 + if (!work) 2739 + return -ENOMEM; 2740 + 2741 + INIT_WORK(&work->work, cma_ndev_work_handler); 2742 + work->id = id_priv; 2743 + work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; 2744 + atomic_inc(&id_priv->refcount); 2745 + queue_work(cma_wq, &work->work); 2746 + } 2747 + 2748 + return 0; 2749 + } 2750 + 2751 + static int cma_netdev_callback(struct notifier_block *self, unsigned long event, 2752 + void *ctx) 2753 + { 2754 + struct net_device *ndev = (struct net_device *)ctx; 2755 + struct cma_device *cma_dev; 2756 + struct rdma_id_private *id_priv; 2757 + int ret = NOTIFY_DONE; 2758 + 2759 + if (dev_net(ndev) != &init_net) 2760 + return NOTIFY_DONE; 2761 + 2762 + if (event != NETDEV_BONDING_FAILOVER) 2763 + return NOTIFY_DONE; 2764 + 2765 + if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING)) 2766 + return NOTIFY_DONE; 2767 + 2768 + mutex_lock(&lock); 2769 + list_for_each_entry(cma_dev, &dev_list, list) 2770 + list_for_each_entry(id_priv, &cma_dev->id_list, list) { 2771 + ret = cma_netdev_change(ndev, id_priv); 2772 + if (ret) 2773 + goto out; 2774 + } 2775 + 2776 + out: 2777 + mutex_unlock(&lock); 2778 + return ret; 2779 + } 2780 + 2781 + static struct notifier_block cma_nb = { 2782 + .notifier_call = cma_netdev_callback 2783 + }; 2784 + 2761 2785 static void cma_add_one(struct ib_device *device) 2762 2786 { 2763 2787 struct cma_device *cma_dev; ··· 2925 2831 2926 2832 ib_sa_register_client(&sa_client); 2927 2833 rdma_addr_register_client(&addr_client); 2834 + register_netdevice_notifier(&cma_nb); 2928 2835 2929 2836 ret = ib_register_client(&cma_client); 2930 2837 if (ret) ··· 2933 2838 return 0; 2934 2839 2935 2840 err: 2841 + unregister_netdevice_notifier(&cma_nb); 2936 2842 rdma_addr_unregister_client(&addr_client); 2937 2843 ib_sa_unregister_client(&sa_client); 2938 2844 destroy_workqueue(cma_wq); ··· 2943 2847 static void cma_cleanup(void) 2944 2848 { 2945 2849 ib_unregister_client(&cma_client); 2850 + unregister_netdevice_notifier(&cma_nb); 2946 2851 rdma_addr_unregister_client(&addr_client); 2947 2852 ib_sa_unregister_client(&sa_client); 2948 2853 destroy_workqueue(cma_wq);
+1 -2
drivers/infiniband/core/iwcm.c
··· 942 942 case IW_CM_STATE_CONN_RECV: 943 943 case IW_CM_STATE_ESTABLISHED: 944 944 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 945 - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | 946 - IB_ACCESS_REMOTE_WRITE| 945 + qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 947 946 IB_ACCESS_REMOTE_READ; 948 947 ret = 0; 949 948 break;
+2 -1
drivers/infiniband/core/sa_query.c
··· 1064 1064 1065 1065 for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { 1066 1066 ib_unregister_mad_agent(sa_dev->port[i].agent); 1067 - kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); 1067 + if (sa_dev->port[i].sm_ah) 1068 + kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); 1068 1069 } 1069 1070 1070 1071 kfree(sa_dev);
+1
drivers/infiniband/hw/ehca/ehca_classes.h
··· 194 194 u32 packet_count; 195 195 atomic_t nr_events; /* events seen */ 196 196 wait_queue_head_t wait_completion; 197 + int mig_armed; 197 198 }; 198 199 199 200 #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
+3 -1
drivers/infiniband/hw/ehca/ehca_hca.c
··· 114 114 } 115 115 116 116 props->max_pkeys = 16; 117 - props->local_ca_ack_delay = min_t(u8, rblock->local_ca_ack_delay, 255); 117 + /* Some FW versions say 0 here; insert sensible value in that case */ 118 + props->local_ca_ack_delay = rblock->local_ca_ack_delay ? 119 + min_t(u8, rblock->local_ca_ack_delay, 255) : 12; 118 120 props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); 119 121 props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); 120 122 props->max_mcast_grp = limit_uint(rblock->max_mcast_grp);
+4
drivers/infiniband/hw/ehca/ehca_irq.c
··· 178 178 { 179 179 struct ib_event event; 180 180 181 + /* PATH_MIG without the QP ever having been armed is false alarm */ 182 + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) 183 + return; 184 + 181 185 event.device = &shca->ib_device; 182 186 event.event = event_type; 183 187
+2
drivers/infiniband/hw/ehca/ehca_qp.c
··· 1460 1460 goto modify_qp_exit2; 1461 1461 } 1462 1462 mqpcb->path_migration_state = attr->path_mig_state + 1; 1463 + if (attr->path_mig_state == IB_MIG_REARM) 1464 + my_qp->mig_armed = 1; 1463 1465 update_mask |= 1464 1466 EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); 1465 1467 }
+1
drivers/infiniband/hw/ehca/ipz_pt_fn.c
··· 163 163 164 164 out: 165 165 ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); 166 + mutex_unlock(&pd->lock); 166 167 return 0; 167 168 } 168 169
+12
drivers/infiniband/hw/mlx4/cq.c
··· 637 637 case MLX4_OPCODE_SEND_IMM: 638 638 wc->wc_flags |= IB_WC_WITH_IMM; 639 639 case MLX4_OPCODE_SEND: 640 + case MLX4_OPCODE_SEND_INVAL: 640 641 wc->opcode = IB_WC_SEND; 641 642 break; 642 643 case MLX4_OPCODE_RDMA_READ: ··· 658 657 case MLX4_OPCODE_LSO: 659 658 wc->opcode = IB_WC_LSO; 660 659 break; 660 + case MLX4_OPCODE_FMR: 661 + wc->opcode = IB_WC_FAST_REG_MR; 662 + break; 663 + case MLX4_OPCODE_LOCAL_INVAL: 664 + wc->opcode = IB_WC_LOCAL_INV; 665 + break; 661 666 } 662 667 } else { 663 668 wc->byte_len = be32_to_cpu(cqe->byte_cnt); ··· 673 666 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 674 667 wc->wc_flags = IB_WC_WITH_IMM; 675 668 wc->ex.imm_data = cqe->immed_rss_invalid; 669 + break; 670 + case MLX4_RECV_OPCODE_SEND_INVAL: 671 + wc->opcode = IB_WC_RECV; 672 + wc->wc_flags = IB_WC_WITH_INVALIDATE; 673 + wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid); 676 674 break; 677 675 case MLX4_RECV_OPCODE_SEND: 678 676 wc->opcode = IB_WC_RECV;
+11
drivers/infiniband/hw/mlx4/main.c
··· 104 104 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 105 105 if (dev->dev->caps.max_gso_sz) 106 106 props->device_cap_flags |= IB_DEVICE_UD_TSO; 107 + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) 108 + props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; 109 + if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && 110 + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && 111 + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) 112 + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 107 113 108 114 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 109 115 0xffffff; ··· 133 127 props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; 134 128 props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; 135 129 props->max_srq_sge = dev->dev->caps.max_srq_sge; 130 + props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64); 136 131 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; 137 132 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? 138 133 IB_ATOMIC_HCA : IB_ATOMIC_NONE; ··· 572 565 strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); 573 566 ibdev->ib_dev.owner = THIS_MODULE; 574 567 ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; 568 + ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; 575 569 ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports; 576 570 ibdev->ib_dev.num_comp_vectors = 1; 577 571 ibdev->ib_dev.dma_device = &dev->pdev->dev; ··· 635 627 ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; 636 628 ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; 637 629 ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; 630 + ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; 631 + ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; 632 + ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; 638 633 ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; 639 634 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; 640 635 ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
+15
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 83 83 struct ib_umem *umem; 84 84 }; 85 85 86 + struct mlx4_ib_fast_reg_page_list { 87 + struct ib_fast_reg_page_list ibfrpl; 88 + dma_addr_t map; 89 + }; 90 + 86 91 struct mlx4_ib_fmr { 87 92 struct ib_fmr ibfmr; 88 93 struct mlx4_fmr mfmr; ··· 204 199 return container_of(ibmr, struct mlx4_ib_mr, ibmr); 205 200 } 206 201 202 + static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) 203 + { 204 + return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); 205 + } 206 + 207 207 static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) 208 208 { 209 209 return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); ··· 249 239 u64 virt_addr, int access_flags, 250 240 struct ib_udata *udata); 251 241 int mlx4_ib_dereg_mr(struct ib_mr *mr); 242 + struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, 243 + int max_page_list_len); 244 + struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 245 + int page_list_len); 246 + void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); 252 247 253 248 int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); 254 249 int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
+70
drivers/infiniband/hw/mlx4/mr.c
··· 183 183 return 0; 184 184 } 185 185 186 + struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, 187 + int max_page_list_len) 188 + { 189 + struct mlx4_ib_dev *dev = to_mdev(pd->device); 190 + struct mlx4_ib_mr *mr; 191 + int err; 192 + 193 + mr = kmalloc(sizeof *mr, GFP_KERNEL); 194 + if (!mr) 195 + return ERR_PTR(-ENOMEM); 196 + 197 + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, 198 + max_page_list_len, 0, &mr->mmr); 199 + if (err) 200 + goto err_free; 201 + 202 + err = mlx4_mr_enable(dev->dev, &mr->mmr); 203 + if (err) 204 + goto err_mr; 205 + 206 + return &mr->ibmr; 207 + 208 + err_mr: 209 + mlx4_mr_free(dev->dev, &mr->mmr); 210 + 211 + err_free: 212 + kfree(mr); 213 + return ERR_PTR(err); 214 + } 215 + 216 + struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 217 + int page_list_len) 218 + { 219 + struct mlx4_ib_dev *dev = to_mdev(ibdev); 220 + struct mlx4_ib_fast_reg_page_list *mfrpl; 221 + int size = page_list_len * sizeof (u64); 222 + 223 + if (size > PAGE_SIZE) 224 + return ERR_PTR(-EINVAL); 225 + 226 + mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); 227 + if (!mfrpl) 228 + return ERR_PTR(-ENOMEM); 229 + 230 + mfrpl->ibfrpl.page_list = dma_alloc_coherent(&dev->dev->pdev->dev, 231 + size, &mfrpl->map, 232 + GFP_KERNEL); 233 + if (!mfrpl->ibfrpl.page_list) 234 + goto err_free; 235 + 236 + WARN_ON(mfrpl->map & 0x3f); 237 + 238 + return &mfrpl->ibfrpl; 239 + 240 + err_free: 241 + kfree(mfrpl); 242 + return ERR_PTR(-ENOMEM); 243 + } 244 + 245 + void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) 246 + { 247 + struct mlx4_ib_dev *dev = to_mdev(page_list->device); 248 + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); 249 + int size = page_list->max_page_list_len * sizeof (u64); 250 + 251 + dma_free_coherent(&dev->dev->pdev->dev, size, page_list->page_list, 252 + mfrpl->map); 253 + kfree(mfrpl); 254 + } 255 + 186 256 struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, 187 257 struct ib_fmr_attr *fmr_attr) 188 258 {
+68 -6
drivers/infiniband/hw/mlx4/qp.c
··· 78 78 [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), 79 79 [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), 80 80 [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), 81 + [IB_WR_SEND_WITH_INV] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL), 82 + [IB_WR_LOCAL_INV] = __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), 83 + [IB_WR_FAST_REG_MR] = __constant_cpu_to_be32(MLX4_OPCODE_FMR), 81 84 }; 82 85 83 86 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) ··· 979 976 context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); 980 977 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); 981 978 979 + /* Set "fast registration enabled" for all kernel QPs */ 980 + if (!qp->ibqp.uobject) 981 + context->params1 |= cpu_to_be32(1 << 11); 982 + 982 983 if (attr_mask & IB_QP_RNR_RETRY) { 983 984 context->params1 |= cpu_to_be32(attr->rnr_retry << 13); 984 985 optpar |= MLX4_QP_OPTPAR_RNR_RETRY; ··· 1329 1322 return cur + nreq >= wq->max_post; 1330 1323 } 1331 1324 1325 + static __be32 convert_access(int acc) 1326 + { 1327 + return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) | 1328 + (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) | 1329 + (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) | 1330 + (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | 1331 + cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); 1332 + } 1333 + 1334 + static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) 1335 + { 1336 + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); 1337 + 1338 + fseg->flags = convert_access(wr->wr.fast_reg.access_flags); 1339 + fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); 1340 + fseg->buf_list = cpu_to_be64(mfrpl->map); 1341 + fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); 1342 + fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); 1343 + fseg->offset = 0; /* XXX -- is this just for ZBVA? */ 1344 + fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); 1345 + fseg->reserved[0] = 0; 1346 + fseg->reserved[1] = 0; 1347 + } 1348 + 1349 + static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) 1350 + { 1351 + iseg->flags = 0; 1352 + iseg->mem_key = cpu_to_be32(rkey); 1353 + iseg->guest_id = 0; 1354 + iseg->pa = 0; 1355 + } 1356 + 1332 1357 static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, 1333 1358 u64 remote_addr, u32 rkey) 1334 1359 { ··· 1434 1395 dseg->addr = cpu_to_be64(sg->addr); 1435 1396 } 1436 1397 1437 - static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, 1398 + static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, 1438 1399 struct mlx4_ib_qp *qp, unsigned *lso_seg_len) 1439 1400 { 1440 1401 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); ··· 1460 1421 1461 1422 *lso_seg_len = halign; 1462 1423 return 0; 1424 + } 1425 + 1426 + static __be32 send_ieth(struct ib_send_wr *wr) 1427 + { 1428 + switch (wr->opcode) { 1429 + case IB_WR_SEND_WITH_IMM: 1430 + case IB_WR_RDMA_WRITE_WITH_IMM: 1431 + return wr->ex.imm_data; 1432 + 1433 + case IB_WR_SEND_WITH_INV: 1434 + return cpu_to_be32(wr->ex.invalidate_rkey); 1435 + 1436 + default: 1437 + return 0; 1438 + } 1463 1439 } 1464 1440 1465 1441 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ··· 1523 1469 MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | 1524 1470 qp->sq_signal_bits; 1525 1471 1526 - if (wr->opcode == IB_WR_SEND_WITH_IMM || 1527 - wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) 1528 - ctrl->imm = wr->ex.imm_data; 1529 - else 1530 - ctrl->imm = 0; 1472 + ctrl->imm = send_ieth(wr); 1531 1473 1532 1474 wqe += sizeof *ctrl; 1533 1475 size = sizeof *ctrl / 16; ··· 1553 1503 wr->wr.rdma.rkey); 1554 1504 wqe += sizeof (struct mlx4_wqe_raddr_seg); 1555 1505 size += sizeof (struct mlx4_wqe_raddr_seg) / 16; 1506 + break; 1507 + 1508 + case IB_WR_LOCAL_INV: 1509 + set_local_inv_seg(wqe, wr->ex.invalidate_rkey); 1510 + wqe += sizeof (struct mlx4_wqe_local_inval_seg); 1511 + size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; 1512 + break; 1513 + 1514 + case IB_WR_FAST_REG_MR: 1515 + set_fmr_seg(wqe, wr); 1516 + wqe += sizeof (struct mlx4_wqe_fmr_seg); 1517 + size += sizeof (struct mlx4_wqe_fmr_seg) / 16; 1556 1518 break; 1557 1519 1558 1520 default:
+1
drivers/infiniband/hw/mthca/mthca_dev.h
··· 202 202 203 203 struct mthca_buddy { 204 204 unsigned long **bits; 205 + int *num_free; 205 206 int max_order; 206 207 spinlock_t lock; 207 208 };
+19 -9
drivers/infiniband/hw/mthca/mthca_mr.c
··· 89 89 90 90 spin_lock(&buddy->lock); 91 91 92 - for (o = order; o <= buddy->max_order; ++o) { 93 - m = 1 << (buddy->max_order - o); 94 - seg = find_first_bit(buddy->bits[o], m); 95 - if (seg < m) 96 - goto found; 97 - } 92 + for (o = order; o <= buddy->max_order; ++o) 93 + if (buddy->num_free[o]) { 94 + m = 1 << (buddy->max_order - o); 95 + seg = find_first_bit(buddy->bits[o], m); 96 + if (seg < m) 97 + goto found; 98 + } 98 99 99 100 spin_unlock(&buddy->lock); 100 101 return -1; 101 102 102 103 found: 103 104 clear_bit(seg, buddy->bits[o]); 105 + --buddy->num_free[o]; 104 106 105 107 while (o > order) { 106 108 --o; 107 109 seg <<= 1; 108 110 set_bit(seg ^ 1, buddy->bits[o]); 111 + ++buddy->num_free[o]; 109 112 } 110 113 111 114 spin_unlock(&buddy->lock); ··· 126 123 127 124 while (test_bit(seg ^ 1, buddy->bits[order])) { 128 125 clear_bit(seg ^ 1, buddy->bits[order]); 126 + --buddy->num_free[order]; 129 127 seg >>= 1; 130 128 ++order; 131 129 } 132 130 133 131 set_bit(seg, buddy->bits[order]); 132 + ++buddy->num_free[order]; 134 133 135 134 spin_unlock(&buddy->lock); 136 135 } ··· 146 141 147 142 buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), 148 143 GFP_KERNEL); 149 - if (!buddy->bits) 144 + buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *), 145 + GFP_KERNEL); 146 + if (!buddy->bits || !buddy->num_free) 150 147 goto err_out; 151 148 152 149 for (i = 0; i <= buddy->max_order; ++i) { ··· 161 154 } 162 155 163 156 set_bit(0, buddy->bits[buddy->max_order]); 157 + buddy->num_free[buddy->max_order] = 1; 164 158 165 159 return 0; 166 160 ··· 169 161 for (i = 0; i <= buddy->max_order; ++i) 170 162 kfree(buddy->bits[i]); 171 163 172 - kfree(buddy->bits); 173 - 174 164 err_out: 165 + kfree(buddy->bits); 166 + kfree(buddy->num_free); 167 + 175 168 return -ENOMEM; 176 169 } 177 170 ··· 184 175 kfree(buddy->bits[i]); 185 176 186 177 kfree(buddy->bits); 178 + kfree(buddy->num_free); 187 179 } 188 180 189 181 static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
+1 -1
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 548 548 path_rec_completion, 549 549 path, &path->query); 550 550 if (path->query_id < 0) { 551 - ipoib_warn(priv, "ib_sa_path_rec_get failed\n"); 551 + ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id); 552 552 path->query = NULL; 553 553 return path->query_id; 554 554 }
+1
drivers/infiniband/ulp/iser/iser_verbs.c
··· 483 483 break; 484 484 case RDMA_CM_EVENT_DISCONNECTED: 485 485 case RDMA_CM_EVENT_DEVICE_REMOVAL: 486 + case RDMA_CM_EVENT_ADDR_CHANGE: 486 487 iser_disconnected_handler(cma_id); 487 488 break; 488 489 default:
+3
drivers/net/mlx4/cmd.c
··· 67 67 CMD_STAT_BAD_INDEX = 0x0a, 68 68 /* FW image corrupted: */ 69 69 CMD_STAT_BAD_NVMEM = 0x0b, 70 + /* Error in ICM mapping (e.g. not enough auxiliary ICM pages to execute command): */ 71 + CMD_STAT_ICM_ERROR = 0x0c, 70 72 /* Attempt to modify a QP/EE which is not in the presumed state: */ 71 73 CMD_STAT_BAD_QP_STATE = 0x10, 72 74 /* Bad segment parameters (Address/Size): */ ··· 121 119 [CMD_STAT_BAD_RES_STATE] = -EBADF, 122 120 [CMD_STAT_BAD_INDEX] = -EBADF, 123 121 [CMD_STAT_BAD_NVMEM] = -EFAULT, 122 + [CMD_STAT_ICM_ERROR] = -ENFILE, 124 123 [CMD_STAT_BAD_QP_STATE] = -EINVAL, 125 124 [CMD_STAT_BAD_SEG_PARAM] = -EFAULT, 126 125 [CMD_STAT_REG_BOUND] = -EBUSY,
+11 -7
drivers/net/mlx4/fw.c
··· 46 46 extern void __buggy_use_of_MLX4_GET(void); 47 47 extern void __buggy_use_of_MLX4_PUT(void); 48 48 49 + static int enable_qos; 50 + module_param(enable_qos, bool, 0444); 51 + MODULE_PARM_DESC(enable_qos, "Enable Quality of Service support in the HCA (default: off)"); 52 + 49 53 #define MLX4_GET(dest, source, offset) \ 50 54 do { \ 51 55 void *__p = (char *) (source) + (offset); \ ··· 202 198 #define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e 203 199 #define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90 204 200 #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 205 - #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x97 201 + #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94 206 202 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 207 203 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 208 204 ··· 377 373 } 378 374 } 379 375 380 - if (dev_cap->bmme_flags & 1) 381 - mlx4_dbg(dev, "Base MM extensions: yes " 382 - "(flags %d, rsvd L_Key %08x)\n", 383 - dev_cap->bmme_flags, dev_cap->reserved_lkey); 384 - else 385 - mlx4_dbg(dev, "Base MM extensions: no\n"); 376 + mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n", 377 + dev_cap->bmme_flags, dev_cap->reserved_lkey); 386 378 387 379 /* 388 380 * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then ··· 736 736 /* Enable IPoIB checksumming if we can: */ 737 737 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) 738 738 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3); 739 + 740 + /* Enable QoS support if module parameter set */ 741 + if (enable_qos) 742 + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2); 739 743 740 744 /* QPC/EEC/CQC/EQC/RDMARC attributes */ 741 745
+1 -1
drivers/net/mlx4/fw.h
··· 98 98 int cmpt_entry_sz; 99 99 int mtt_entry_sz; 100 100 int resize_srq; 101 - u8 bmme_flags; 101 + u32 bmme_flags; 102 102 u32 reserved_lkey; 103 103 u64 max_icm_sz; 104 104 int max_gso_sz;
+2
drivers/net/mlx4/main.c
··· 158 158 dev->caps.max_msg_sz = dev_cap->max_msg_sz; 159 159 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); 160 160 dev->caps.flags = dev_cap->flags; 161 + dev->caps.bmme_flags = dev_cap->bmme_flags; 162 + dev->caps.reserved_lkey = dev_cap->reserved_lkey; 161 163 dev->caps.stat_rate_support = dev_cap->stat_rate_support; 162 164 dev->caps.max_gso_sz = dev_cap->max_gso_sz; 163 165
+1
drivers/net/mlx4/mlx4.h
··· 118 118 119 119 struct mlx4_buddy { 120 120 unsigned long **bits; 121 + unsigned int *num_free; 121 122 int max_order; 122 123 spinlock_t lock; 123 124 };
+37 -14
drivers/net/mlx4/mr.c
··· 47 47 __be32 flags; 48 48 __be32 qpn; 49 49 __be32 key; 50 - __be32 pd; 50 + __be32 pd_flags; 51 51 __be64 start; 52 52 __be64 length; 53 53 __be32 lkey; ··· 61 61 } __attribute__((packed)); 62 62 63 63 #define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) 64 + #define MLX4_MPT_FLAG_FREE (0x3UL << 28) 64 65 #define MLX4_MPT_FLAG_MIO (1 << 17) 65 66 #define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) 66 67 #define MLX4_MPT_FLAG_PHYSICAL (1 << 9) 67 68 #define MLX4_MPT_FLAG_REGION (1 << 8) 69 + 70 + #define MLX4_MPT_PD_FLAG_FAST_REG (1 << 26) 71 + #define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) 68 72 69 73 #define MLX4_MTT_FLAG_PRESENT 1 70 74 ··· 83 79 84 80 spin_lock(&buddy->lock); 85 81 86 - for (o = order; o <= buddy->max_order; ++o) { 87 - m = 1 << (buddy->max_order - o); 88 - seg = find_first_bit(buddy->bits[o], m); 89 - if (seg < m) 90 - goto found; 91 - } 82 + for (o = order; o <= buddy->max_order; ++o) 83 + if (buddy->num_free[o]) { 84 + m = 1 << (buddy->max_order - o); 85 + seg = find_first_bit(buddy->bits[o], m); 86 + if (seg < m) 87 + goto found; 88 + } 92 89 93 90 spin_unlock(&buddy->lock); 94 91 return -1; 95 92 96 93 found: 97 94 clear_bit(seg, buddy->bits[o]); 95 + --buddy->num_free[o]; 98 96 99 97 while (o > order) { 100 98 --o; 101 99 seg <<= 1; 102 100 set_bit(seg ^ 1, buddy->bits[o]); 101 + ++buddy->num_free[o]; 103 102 } 104 103 105 104 spin_unlock(&buddy->lock); ··· 120 113 121 114 while (test_bit(seg ^ 1, buddy->bits[order])) { 122 115 clear_bit(seg ^ 1, buddy->bits[order]); 116 + --buddy->num_free[order]; 123 117 seg >>= 1; 124 118 ++order; 125 119 } 126 120 127 121 set_bit(seg, buddy->bits[order]); 122 + ++buddy->num_free[order]; 128 123 129 124 spin_unlock(&buddy->lock); 130 125 } ··· 140 131 141 132 buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), 142 133 GFP_KERNEL); 143 - if (!buddy->bits) 134 + buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *), 135 + GFP_KERNEL); 136 + if (!buddy->bits || !buddy->num_free) 144 137 goto err_out; 145 138 146 139 for (i = 0; i <= buddy->max_order; ++i) { ··· 154 143 } 155 144 156 145 set_bit(0, buddy->bits[buddy->max_order]); 146 + buddy->num_free[buddy->max_order] = 1; 157 147 158 148 return 0; 159 149 ··· 162 150 for (i = 0; i <= buddy->max_order; ++i) 163 151 kfree(buddy->bits[i]); 164 152 165 - kfree(buddy->bits); 166 - 167 153 err_out: 154 + kfree(buddy->bits); 155 + kfree(buddy->num_free); 156 + 168 157 return -ENOMEM; 169 158 } 170 159 ··· 177 164 kfree(buddy->bits[i]); 178 165 179 166 kfree(buddy->bits); 167 + kfree(buddy->num_free); 180 168 } 181 169 182 170 static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order) ··· 328 314 329 315 memset(mpt_entry, 0, sizeof *mpt_entry); 330 316 331 - mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS | 332 - MLX4_MPT_FLAG_MIO | 317 + mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_MIO | 333 318 MLX4_MPT_FLAG_REGION | 334 319 mr->access); 335 320 336 321 mpt_entry->key = cpu_to_be32(key_to_hw_index(mr->key)); 337 - mpt_entry->pd = cpu_to_be32(mr->pd); 322 + mpt_entry->pd_flags = cpu_to_be32(mr->pd | MLX4_MPT_PD_FLAG_EN_INV); 338 323 mpt_entry->start = cpu_to_be64(mr->iova); 339 324 mpt_entry->length = cpu_to_be64(mr->size); 340 325 mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); 326 + 341 327 if (mr->mtt.order < 0) { 342 328 mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); 343 329 mpt_entry->mtt_seg = 0; 344 - } else 330 + } else { 345 331 mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt)); 332 + } 333 + 334 + if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) { 335 + /* fast register MR in free state */ 336 + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE); 337 + mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG); 338 + } else { 339 + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS); 340 + } 346 341 347 342 err = mlx4_SW2HW_MPT(dev, mailbox, 348 343 key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1));
+7
drivers/net/mlx4/pd.c
··· 91 91 92 92 int mlx4_init_uar_table(struct mlx4_dev *dev) 93 93 { 94 + if (dev->caps.num_uars <= 128) { 95 + mlx4_err(dev, "Only %d UAR pages (need more than 128)\n", 96 + dev->caps.num_uars); 97 + mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n"); 98 + return -ENODEV; 99 + } 100 + 94 101 return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap, 95 102 dev->caps.num_uars, dev->caps.num_uars - 1, 96 103 max(128, dev->caps.reserved_uars));
+10
include/linux/mlx4/device.h
··· 68 68 MLX4_DEV_CAP_FLAG_UD_MCAST = 1 << 21 69 69 }; 70 70 71 + enum { 72 + MLX4_BMME_FLAG_LOCAL_INV = 1 << 6, 73 + MLX4_BMME_FLAG_REMOTE_INV = 1 << 7, 74 + MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, 75 + MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, 76 + MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, 77 + }; 78 + 71 79 enum mlx4_event { 72 80 MLX4_EVENT_TYPE_COMP = 0x00, 73 81 MLX4_EVENT_TYPE_PATH_MIG = 0x01, ··· 192 184 u32 max_msg_sz; 193 185 u32 page_size_cap; 194 186 u32 flags; 187 + u32 bmme_flags; 188 + u32 reserved_lkey; 195 189 u16 stat_rate_support; 196 190 u8 port_width_cap[MLX4_MAX_PORTS + 1]; 197 191 int max_gso_sz;
+13 -5
include/linux/mlx4/qp.h
··· 219 219 __be32 reservd[2]; 220 220 }; 221 221 222 - struct mlx4_lso_seg { 222 + struct mlx4_wqe_lso_seg { 223 223 __be32 mss_hdr_size; 224 224 __be32 header[0]; 225 225 }; ··· 231 231 __be32 lkey; 232 232 __be64 addr; 233 233 __be64 length; 234 + }; 235 + 236 + enum { 237 + MLX4_WQE_FMR_PERM_LOCAL_READ = 1 << 27, 238 + MLX4_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, 239 + MLX4_WQE_FMR_PERM_REMOTE_READ = 1 << 29, 240 + MLX4_WQE_FMR_PERM_REMOTE_WRITE = 1 << 30, 241 + MLX4_WQE_FMR_PERM_ATOMIC = 1 << 31 234 242 }; 235 243 236 244 struct mlx4_wqe_fmr_seg { ··· 263 255 }; 264 256 265 257 struct mlx4_wqe_local_inval_seg { 266 - u8 flags; 267 - u8 reserved1[3]; 258 + __be32 flags; 259 + u32 reserved1; 268 260 __be32 mem_key; 269 - u8 reserved2[3]; 270 - u8 guest_id; 261 + u32 reserved2[2]; 262 + __be32 guest_id; 271 263 __be64 pa; 272 264 }; 273 265
+3 -1
include/rdma/rdma_cm.h
··· 57 57 RDMA_CM_EVENT_DISCONNECTED, 58 58 RDMA_CM_EVENT_DEVICE_REMOVAL, 59 59 RDMA_CM_EVENT_MULTICAST_JOIN, 60 - RDMA_CM_EVENT_MULTICAST_ERROR 60 + RDMA_CM_EVENT_MULTICAST_ERROR, 61 + RDMA_CM_EVENT_ADDR_CHANGE, 62 + RDMA_CM_EVENT_TIMEWAIT_EXIT 61 63 }; 62 64 63 65 enum rdma_port_space {