Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull infiniband changes from Roland Dreier:
"Second batch of changes for the 3.7 merge window:
- Late-breaking fix for IPoIB on mlx4 SR-IOV VFs.
- Fix for IPoIB build breakage with CONFIG_INFINIBAND_IPOIB_CM=n (new
netlink config changes are to blame).
- Make sure retry count values are in range in RDMA CM.
- A few nes hardware driver fixes and cleanups.
- Have iSER initiator use >1 interrupt vectors if available."

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
RDMA/cma: Check that retry count values are in range
IB/iser: Add more RX CQs to scale out processing of SCSI responses
RDMA/nes: Bump the version number of nes driver
RDMA/nes: Remove unused module parameter "send_first"
RDMA/nes: Remove unnecessary if-else statement
RDMA/nes: Add missing break to switch.
mlx4_core: Adjust flow steering attach wrapper so that IB works on SR-IOV VFs
IPoIB: Fix build with CONFIG_INFINIBAND_IPOIB_CM=n

+146 -99
+3 -3
drivers/infiniband/core/cma.c
··· 2648 2648 req.responder_resources = conn_param->responder_resources; 2649 2649 req.initiator_depth = conn_param->initiator_depth; 2650 2650 req.flow_control = conn_param->flow_control; 2651 - req.retry_count = conn_param->retry_count; 2652 - req.rnr_retry_count = conn_param->rnr_retry_count; 2651 + req.retry_count = min_t(u8, 7, conn_param->retry_count); 2652 + req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 2653 2653 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 2654 2654 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 2655 2655 req.max_cm_retries = CMA_MAX_CM_RETRIES; ··· 2770 2770 rep.initiator_depth = conn_param->initiator_depth; 2771 2771 rep.failover_accepted = 0; 2772 2772 rep.flow_control = conn_param->flow_control; 2773 - rep.rnr_retry_count = conn_param->rnr_retry_count; 2773 + rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 2774 2774 rep.srq = id_priv->srq ? 1 : 0; 2775 2775 2776 2776 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
-5
drivers/infiniband/hw/nes/nes.c
··· 79 79 module_param(disable_mpa_crc, int, 0644); 80 80 MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC"); 81 81 82 - unsigned int send_first = 0; 83 - module_param(send_first, int, 0644); 84 - MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection"); 85 - 86 - 87 82 unsigned int nes_drv_opt = NES_DRV_OPT_DISABLE_INT_MOD | NES_DRV_OPT_ENABLE_PAU; 88 83 module_param(nes_drv_opt, int, 0644); 89 84 MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters");
+1 -2
drivers/infiniband/hw/nes/nes.h
··· 57 57 #define QUEUE_DISCONNECTS 58 58 59 59 #define DRV_NAME "iw_nes" 60 - #define DRV_VERSION "1.5.0.0" 60 + #define DRV_VERSION "1.5.0.1" 61 61 #define PFX DRV_NAME ": " 62 62 63 63 /* ··· 172 172 extern int nes_if_count; 173 173 extern int mpa_version; 174 174 extern int disable_mpa_crc; 175 - extern unsigned int send_first; 176 175 extern unsigned int nes_drv_opt; 177 176 extern unsigned int nes_debug_level; 178 177 extern unsigned int wqm_quanta;
+4 -12
drivers/infiniband/hw/nes/nes_verbs.c
··· 3006 3006 switch (nesqp->hw_iwarp_state) { 3007 3007 case NES_AEQE_IWARP_STATE_CLOSING: 3008 3008 next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; 3009 + break; 3009 3010 case NES_AEQE_IWARP_STATE_TERMINATE: 3010 3011 next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; 3011 3012 break; ··· 3069 3068 } 3070 3069 3071 3070 nesqp->ibqp_state = attr->qp_state; 3072 - if (((nesqp->iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == 3073 - (u32)NES_CQP_QP_IWARP_STATE_RTS) && 3074 - ((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) > 3075 - (u32)NES_CQP_QP_IWARP_STATE_RTS)) { 3076 - nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; 3077 - nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", 3078 - nesqp->iwarp_state); 3079 - } else { 3080 - nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; 3081 - nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", 3082 - nesqp->iwarp_state); 3083 - } 3071 + nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; 3072 + nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", 3073 + nesqp->iwarp_state); 3084 3074 } 3085 3075 3086 3076 if (attr_mask & IB_QP_ACCESS_FLAGS) {
+2 -2
drivers/infiniband/ulp/ipoib/ipoib.h
··· 535 535 void ipoib_set_ethtool_ops(struct net_device *dev); 536 536 int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca); 537 537 538 - #ifdef CONFIG_INFINIBAND_IPOIB_CM 539 - 540 538 #define IPOIB_FLAGS_RC 0x80 541 539 #define IPOIB_FLAGS_UC 0x40 542 540 543 541 /* We don't support UC connections at the moment */ 544 542 #define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC)) 543 + 544 + #ifdef CONFIG_INFINIBAND_IPOIB_CM 545 545 546 546 extern int ipoib_max_conn_qp; 547 547
-31
drivers/infiniband/ulp/ipoib/ipoib_cm.c
··· 1448 1448 return sprintf(buf, "datagram\n"); 1449 1449 } 1450 1450 1451 - int ipoib_set_mode(struct net_device *dev, const char *buf) 1452 - { 1453 - struct ipoib_dev_priv *priv = netdev_priv(dev); 1454 - 1455 - /* flush paths if we switch modes so that connections are restarted */ 1456 - if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) { 1457 - set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 1458 - ipoib_warn(priv, "enabling connected mode " 1459 - "will cause multicast packet drops\n"); 1460 - netdev_update_features(dev); 1461 - rtnl_unlock(); 1462 - priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; 1463 - 1464 - ipoib_flush_paths(dev); 1465 - rtnl_lock(); 1466 - return 0; 1467 - } 1468 - 1469 - if (!strcmp(buf, "datagram\n")) { 1470 - clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 1471 - netdev_update_features(dev); 1472 - dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); 1473 - rtnl_unlock(); 1474 - ipoib_flush_paths(dev); 1475 - rtnl_lock(); 1476 - return 0; 1477 - } 1478 - 1479 - return -EINVAL; 1480 - } 1481 - 1482 1451 static ssize_t set_mode(struct device *d, struct device_attribute *attr, 1483 1452 const char *buf, size_t count) 1484 1453 {
+31
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 215 215 return 0; 216 216 } 217 217 218 + int ipoib_set_mode(struct net_device *dev, const char *buf) 219 + { 220 + struct ipoib_dev_priv *priv = netdev_priv(dev); 221 + 222 + /* flush paths if we switch modes so that connections are restarted */ 223 + if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) { 224 + set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 225 + ipoib_warn(priv, "enabling connected mode " 226 + "will cause multicast packet drops\n"); 227 + netdev_update_features(dev); 228 + rtnl_unlock(); 229 + priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; 230 + 231 + ipoib_flush_paths(dev); 232 + rtnl_lock(); 233 + return 0; 234 + } 235 + 236 + if (!strcmp(buf, "datagram\n")) { 237 + clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 238 + netdev_update_features(dev); 239 + dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); 240 + rtnl_unlock(); 241 + ipoib_flush_paths(dev); 242 + rtnl_lock(); 243 + return 0; 244 + } 245 + 246 + return -EINVAL; 247 + } 248 + 218 249 static struct ipoib_path *__path_find(struct net_device *dev, void *gid) 219 250 { 220 251 struct ipoib_dev_priv *priv = netdev_priv(dev);
+14 -3
drivers/infiniband/ulp/iser/iscsi_iser.h
··· 177 177 178 178 /* fwd declarations */ 179 179 struct iser_device; 180 + struct iser_cq_desc; 180 181 struct iscsi_iser_conn; 181 182 struct iscsi_iser_task; 182 183 struct iscsi_endpoint; ··· 227 226 char pad[ISER_RX_PAD_SIZE]; 228 227 } __attribute__((packed)); 229 228 229 + #define ISER_MAX_CQ 4 230 + 230 231 struct iser_device { 231 232 struct ib_device *ib_device; 232 233 struct ib_pd *pd; 233 - struct ib_cq *rx_cq; 234 - struct ib_cq *tx_cq; 234 + struct ib_cq *rx_cq[ISER_MAX_CQ]; 235 + struct ib_cq *tx_cq[ISER_MAX_CQ]; 235 236 struct ib_mr *mr; 236 - struct tasklet_struct cq_tasklet; 237 + struct tasklet_struct cq_tasklet[ISER_MAX_CQ]; 237 238 struct ib_event_handler event_handler; 238 239 struct list_head ig_list; /* entry in ig devices list */ 239 240 int refcount; 241 + int cq_active_qps[ISER_MAX_CQ]; 242 + int cqs_used; 243 + struct iser_cq_desc *cq_desc; 240 244 }; 241 245 242 246 struct iser_conn { ··· 291 285 int length; 292 286 int offset; 293 287 int data_size; 288 + }; 289 + 290 + struct iser_cq_desc { 291 + struct iser_device *device; 292 + int cq_index; 294 293 }; 295 294 296 295 struct iser_global {
+89 -41
drivers/infiniband/ulp/iser/iser_verbs.c
··· 70 70 */ 71 71 static int iser_create_device_ib_res(struct iser_device *device) 72 72 { 73 + int i, j; 74 + struct iser_cq_desc *cq_desc; 75 + 76 + device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); 77 + iser_err("using %d CQs, device %s supports %d vectors\n", device->cqs_used, 78 + device->ib_device->name, device->ib_device->num_comp_vectors); 79 + 80 + device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used, 81 + GFP_KERNEL); 82 + if (device->cq_desc == NULL) 83 + goto cq_desc_err; 84 + cq_desc = device->cq_desc; 85 + 73 86 device->pd = ib_alloc_pd(device->ib_device); 74 87 if (IS_ERR(device->pd)) 75 88 goto pd_err; 76 89 77 - device->rx_cq = ib_create_cq(device->ib_device, 78 - iser_cq_callback, 79 - iser_cq_event_callback, 80 - (void *)device, 81 - ISER_MAX_RX_CQ_LEN, 0); 82 - if (IS_ERR(device->rx_cq)) 83 - goto rx_cq_err; 90 + for (i = 0; i < device->cqs_used; i++) { 91 + cq_desc[i].device = device; 92 + cq_desc[i].cq_index = i; 84 93 85 - device->tx_cq = ib_create_cq(device->ib_device, 86 - NULL, iser_cq_event_callback, 87 - (void *)device, 88 - ISER_MAX_TX_CQ_LEN, 0); 94 + device->rx_cq[i] = ib_create_cq(device->ib_device, 95 + iser_cq_callback, 96 + iser_cq_event_callback, 97 + (void *)&cq_desc[i], 98 + ISER_MAX_RX_CQ_LEN, i); 99 + if (IS_ERR(device->rx_cq[i])) 100 + goto cq_err; 89 101 90 - if (IS_ERR(device->tx_cq)) 91 - goto tx_cq_err; 102 + device->tx_cq[i] = ib_create_cq(device->ib_device, 103 + NULL, iser_cq_event_callback, 104 + (void *)&cq_desc[i], 105 + ISER_MAX_TX_CQ_LEN, i); 92 106 93 - if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP)) 94 - goto cq_arm_err; 107 + if (IS_ERR(device->tx_cq[i])) 108 + goto cq_err; 95 109 96 - tasklet_init(&device->cq_tasklet, 97 - iser_cq_tasklet_fn, 98 - (unsigned long)device); 110 + if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP)) 111 + goto cq_err; 112 + 113 + tasklet_init(&device->cq_tasklet[i], 114 + iser_cq_tasklet_fn, 115 + (unsigned long)&cq_desc[i]); 116 + } 99 117 100 118 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | 101 119 IB_ACCESS_REMOTE_WRITE | ··· 131 113 handler_err: 132 114 ib_dereg_mr(device->mr); 133 115 dma_mr_err: 134 - tasklet_kill(&device->cq_tasklet); 135 - cq_arm_err: 136 - ib_destroy_cq(device->tx_cq); 137 - tx_cq_err: 138 - ib_destroy_cq(device->rx_cq); 139 - rx_cq_err: 116 + for (j = 0; j < device->cqs_used; j++) 117 + tasklet_kill(&device->cq_tasklet[j]); 118 + cq_err: 119 + for (j = 0; j < i; j++) { 120 + if (device->tx_cq[j]) 121 + ib_destroy_cq(device->tx_cq[j]); 122 + if (device->rx_cq[j]) 123 + ib_destroy_cq(device->rx_cq[j]); 124 + } 140 125 ib_dealloc_pd(device->pd); 141 126 pd_err: 127 + kfree(device->cq_desc); 128 + cq_desc_err: 142 129 iser_err("failed to allocate an IB resource\n"); 143 130 return -1; 144 131 } ··· 154 131 */ 155 132 static void iser_free_device_ib_res(struct iser_device *device) 156 133 { 134 + int i; 157 135 BUG_ON(device->mr == NULL); 158 136 159 - tasklet_kill(&device->cq_tasklet); 137 + for (i = 0; i < device->cqs_used; i++) { 138 + tasklet_kill(&device->cq_tasklet[i]); 139 + (void)ib_destroy_cq(device->tx_cq[i]); 140 + (void)ib_destroy_cq(device->rx_cq[i]); 141 + device->tx_cq[i] = NULL; 142 + device->rx_cq[i] = NULL; 143 + } 144 + 160 145 (void)ib_unregister_event_handler(&device->event_handler); 161 146 (void)ib_dereg_mr(device->mr); 162 - (void)ib_destroy_cq(device->tx_cq); 163 - (void)ib_destroy_cq(device->rx_cq); 164 147 (void)ib_dealloc_pd(device->pd); 165 148 149 + kfree(device->cq_desc); 150 + 166 151 device->mr = NULL; 167 - device->tx_cq = NULL; 168 - device->rx_cq = NULL; 169 152 device->pd = NULL; 170 153 } 171 154 ··· 186 157 struct ib_qp_init_attr init_attr; 187 158 int req_err, resp_err, ret = -ENOMEM; 188 159 struct ib_fmr_pool_param params; 160 + int index, min_index = 0; 189 161 190 162 BUG_ON(ib_conn->device == NULL); 191 163 ··· 250 220 251 221 memset(&init_attr, 0, sizeof init_attr); 252 222 223 + mutex_lock(&ig.connlist_mutex); 224 + /* select the CQ with the minimal number of usages */ 225 + for (index = 0; index < device->cqs_used; index++) 226 + if (device->cq_active_qps[index] < 227 + device->cq_active_qps[min_index]) 228 + min_index = index; 229 + device->cq_active_qps[min_index]++; 230 + mutex_unlock(&ig.connlist_mutex); 231 + iser_err("cq index %d used for ib_conn %p\n", min_index, ib_conn); 232 + 253 233 init_attr.event_handler = iser_qp_event_callback; 254 234 init_attr.qp_context = (void *)ib_conn; 255 - init_attr.send_cq = device->tx_cq; 256 - init_attr.recv_cq = device->rx_cq; 235 + init_attr.send_cq = device->tx_cq[min_index]; 236 + init_attr.recv_cq = device->rx_cq[min_index]; 257 237 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 258 238 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 259 239 init_attr.cap.max_send_sge = 2; ··· 292 252 */ 293 253 static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id) 294 254 { 255 + int cq_index; 295 256 BUG_ON(ib_conn == NULL); 296 257 297 258 iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n", ··· 303 262 if (ib_conn->fmr_pool != NULL) 304 263 ib_destroy_fmr_pool(ib_conn->fmr_pool); 305 264 306 - if (ib_conn->qp != NULL) 307 - rdma_destroy_qp(ib_conn->cma_id); 265 + if (ib_conn->qp != NULL) { 266 + cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index; 267 + ib_conn->device->cq_active_qps[cq_index]--; 308 268 269 + rdma_destroy_qp(ib_conn->cma_id); 270 + } 309 271 /* if cma handler context, the caller acts s.t the cma destroy the id */ 310 272 if (ib_conn->cma_id != NULL && can_destroy_id) 311 273 rdma_destroy_id(ib_conn->cma_id); ··· 835 791 } 836 792 } 837 793 838 - static int iser_drain_tx_cq(struct iser_device *device) 794 + static int iser_drain_tx_cq(struct iser_device *device, int cq_index) 839 795 { 840 - struct ib_cq *cq = device->tx_cq; 796 + struct ib_cq *cq = device->tx_cq[cq_index]; 841 797 struct ib_wc wc; 842 798 struct iser_tx_desc *tx_desc; 843 799 struct iser_conn *ib_conn; ··· 866 822 867 823 static void iser_cq_tasklet_fn(unsigned long data) 868 824 { 869 - struct iser_device *device = (struct iser_device *)data; 870 - struct ib_cq *cq = device->rx_cq; 825 + struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data; 826 + struct iser_device *device = cq_desc->device; 827 + int cq_index = cq_desc->cq_index; 828 + struct ib_cq *cq = device->rx_cq[cq_index]; 871 829 struct ib_wc wc; 872 830 struct iser_rx_desc *desc; 873 831 unsigned long xfer_len; ··· 897 851 } 898 852 completed_rx++; 899 853 if (!(completed_rx & 63)) 900 - completed_tx += iser_drain_tx_cq(device); 854 + completed_tx += iser_drain_tx_cq(device, cq_index); 901 855 } 902 856 /* #warning "it is assumed here that arming CQ only once its empty" * 903 857 * " would not cause interrupts to be missed" */ 904 858 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 905 859 906 - completed_tx += iser_drain_tx_cq(device); 860 + completed_tx += iser_drain_tx_cq(device, cq_index); 907 861 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); 908 862 } 909 863 910 864 static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 911 865 { 912 - struct iser_device *device = (struct iser_device *)cq_context; 866 + struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context; 867 + struct iser_device *device = cq_desc->device; 868 + int cq_index = cq_desc->cq_index; 913 869 914 - tasklet_schedule(&device->cq_tasklet); 870 + tasklet_schedule(&device->cq_tasklet[cq_index]); 915 871 }
+2
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
··· 3094 3094 if (validate_eth_header_mac(slave, rule_header, rlist)) 3095 3095 return -EINVAL; 3096 3096 break; 3097 + case MLX4_NET_TRANS_RULE_ID_IB: 3098 + break; 3097 3099 case MLX4_NET_TRANS_RULE_ID_IPV4: 3098 3100 case MLX4_NET_TRANS_RULE_ID_TCP: 3099 3101 case MLX4_NET_TRANS_RULE_ID_UDP: