Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband:
IB/cm: Improve local id allocation
IPoIB/cm: Fix SRQ WR leak
IB/ipoib: Fix typos in error messages
IB/mlx4: Check if SRQ is full when posting receive
IB/mlx4: Pass send queue sizes from userspace to kernel
IB/mlx4: Fix check of opcode in mlx4_ib_post_send()
mlx4_core: Fix array overrun in dump_dev_cap_flags()
IB/mlx4: Fix RESET to RESET and RESET to ERROR transitions
IB/mthca: Fix RESET to ERROR transition
IB/mlx4: Set GRH:HopLimit when sending globally routed MADs
IB/mthca: Set GRH:HopLimit when building MLX headers
IB/mlx4: Fix check of max_qp_dest_rdma in modify QP
IB/mthca: Fix use-after-free on device restart
IB/ehca: Return proper error code if register_mr fails
IPoIB: Handle P_Key table reordering
IB/core: Use start_port() and end_port()
IB/core: Add helpers for uncached GID and P_Key searches
IB/ipath: Fix potential deadlock with multicast spinlocks
IB/core: Free umem when mm is already gone

+697 -222
+3 -1
drivers/infiniband/core/cm.c
··· 306 306 do { 307 307 spin_lock_irqsave(&cm.lock, flags); 308 308 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 309 - next_id++, &id); 309 + next_id, &id); 310 + if (!ret) 311 + next_id = ((unsigned) id + 1) & MAX_ID_MASK; 310 312 spin_unlock_irqrestore(&cm.lock, flags); 311 313 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); 312 314
+127 -8
drivers/infiniband/core/device.c
··· 150 150 return 0; 151 151 } 152 152 153 + static int start_port(struct ib_device *device) 154 + { 155 + return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; 156 + } 157 + 158 + 159 + static int end_port(struct ib_device *device) 160 + { 161 + return (device->node_type == RDMA_NODE_IB_SWITCH) ? 162 + 0 : device->phys_port_cnt; 163 + } 164 + 153 165 /** 154 166 * ib_alloc_device - allocate an IB device struct 155 167 * @size:size of structure to allocate ··· 221 209 return 0; 222 210 } 223 211 212 + static int read_port_table_lengths(struct ib_device *device) 213 + { 214 + struct ib_port_attr *tprops = NULL; 215 + int num_ports, ret = -ENOMEM; 216 + u8 port_index; 217 + 218 + tprops = kmalloc(sizeof *tprops, GFP_KERNEL); 219 + if (!tprops) 220 + goto out; 221 + 222 + num_ports = end_port(device) - start_port(device) + 1; 223 + 224 + device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports, 225 + GFP_KERNEL); 226 + device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports, 227 + GFP_KERNEL); 228 + if (!device->pkey_tbl_len || !device->gid_tbl_len) 229 + goto err; 230 + 231 + for (port_index = 0; port_index < num_ports; ++port_index) { 232 + ret = ib_query_port(device, port_index + start_port(device), 233 + tprops); 234 + if (ret) 235 + goto err; 236 + device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len; 237 + device->gid_tbl_len[port_index] = tprops->gid_tbl_len; 238 + } 239 + 240 + ret = 0; 241 + goto out; 242 + 243 + err: 244 + kfree(device->gid_tbl_len); 245 + kfree(device->pkey_tbl_len); 246 + out: 247 + kfree(tprops); 248 + return ret; 249 + } 250 + 224 251 /** 225 252 * ib_register_device - Register an IB device with IB core 226 253 * @device:Device to register ··· 291 240 spin_lock_init(&device->event_handler_lock); 292 241 spin_lock_init(&device->client_data_lock); 293 242 243 + ret = read_port_table_lengths(device); 244 + if (ret) { 245 + printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n", 246 + device->name); 247 + goto out; 248 + } 249 + 294 250 ret = ib_device_register_sysfs(device); 295 251 if (ret) { 296 252 printk(KERN_WARNING "Couldn't register device %s with driver model\n", 297 253 device->name); 254 + kfree(device->gid_tbl_len); 255 + kfree(device->pkey_tbl_len); 298 256 goto out; 299 257 } 300 258 ··· 344 284 client->remove(device); 345 285 346 286 list_del(&device->core_list); 287 + 288 + kfree(device->gid_tbl_len); 289 + kfree(device->pkey_tbl_len); 347 290 348 291 mutex_unlock(&device_mutex); 349 292 ··· 570 507 u8 port_num, 571 508 struct ib_port_attr *port_attr) 572 509 { 573 - if (device->node_type == RDMA_NODE_IB_SWITCH) { 574 - if (port_num) 575 - return -EINVAL; 576 - } else if (port_num < 1 || port_num > device->phys_port_cnt) 510 + if (port_num < start_port(device) || port_num > end_port(device)) 577 511 return -EINVAL; 578 512 579 513 return device->query_port(device, port_num, port_attr); ··· 642 582 u8 port_num, int port_modify_mask, 643 583 struct ib_port_modify *port_modify) 644 584 { 645 - if (device->node_type == RDMA_NODE_IB_SWITCH) { 646 - if (port_num) 647 - return -EINVAL; 648 - } else if (port_num < 1 || port_num > device->phys_port_cnt) 585 + if (port_num < start_port(device) || port_num > end_port(device)) 649 586 return -EINVAL; 650 587 651 588 return device->modify_port(device, port_num, port_modify_mask, 652 589 port_modify); 653 590 } 654 591 EXPORT_SYMBOL(ib_modify_port); 592 + 593 + /** 594 + * ib_find_gid - Returns the port number and GID table index where 595 + * a specified GID value occurs. 596 + * @device: The device to query. 597 + * @gid: The GID value to search for. 598 + * @port_num: The port number of the device where the GID value was found. 599 + * @index: The index into the GID table where the GID was found. This 600 + * parameter may be NULL. 601 + */ 602 + int ib_find_gid(struct ib_device *device, union ib_gid *gid, 603 + u8 *port_num, u16 *index) 604 + { 605 + union ib_gid tmp_gid; 606 + int ret, port, i; 607 + 608 + for (port = start_port(device); port <= end_port(device); ++port) { 609 + for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) { 610 + ret = ib_query_gid(device, port, i, &tmp_gid); 611 + if (ret) 612 + return ret; 613 + if (!memcmp(&tmp_gid, gid, sizeof *gid)) { 614 + *port_num = port; 615 + if (index) 616 + *index = i; 617 + return 0; 618 + } 619 + } 620 + } 621 + 622 + return -ENOENT; 623 + } 624 + EXPORT_SYMBOL(ib_find_gid); 625 + 626 + /** 627 + * ib_find_pkey - Returns the PKey table index where a specified 628 + * PKey value occurs. 629 + * @device: The device to query. 630 + * @port_num: The port number of the device to search for the PKey. 631 + * @pkey: The PKey value to search for. 632 + * @index: The index into the PKey table where the PKey was found. 633 + */ 634 + int ib_find_pkey(struct ib_device *device, 635 + u8 port_num, u16 pkey, u16 *index) 636 + { 637 + int ret, i; 638 + u16 tmp_pkey; 639 + 640 + for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { 641 + ret = ib_query_pkey(device, port_num, i, &tmp_pkey); 642 + if (ret) 643 + return ret; 644 + 645 + if (pkey == tmp_pkey) { 646 + *index = i; 647 + return 0; 648 + } 649 + } 650 + 651 + return -ENOENT; 652 + } 653 + EXPORT_SYMBOL(ib_find_pkey); 655 654 656 655 static int __init ib_core_init(void) 657 656 {
+3 -1
drivers/infiniband/core/umem.c
··· 210 210 __ib_umem_release(umem->context->device, umem, 1); 211 211 212 212 mm = get_task_mm(current); 213 - if (!mm) 213 + if (!mm) { 214 + kfree(umem); 214 215 return; 216 + } 215 217 216 218 diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; 217 219
+2 -5
drivers/infiniband/hw/ehca/ehca_mrmw.c
··· 2050 2050 switch (hipz_rc) { 2051 2051 case H_SUCCESS: /* successful completion */ 2052 2052 return 0; 2053 - case H_ADAPTER_PARM: /* invalid adapter handle */ 2054 - case H_RT_PARM: /* invalid resource type */ 2055 2053 case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ 2056 - case H_MLENGTH_PARM: /* invalid memory length */ 2057 - case H_MEM_ACCESS_PARM: /* invalid access controls */ 2058 2054 case H_CONSTRAINED: /* resource constraint */ 2059 - return -EINVAL; 2055 + case H_NO_MEM: 2056 + return -ENOMEM; 2060 2057 case H_BUSY: /* long busy */ 2061 2058 return -EBUSY; 2062 2059 default:
+7 -9
drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
··· 165 165 { 166 166 struct rb_node **n = &mcast_tree.rb_node; 167 167 struct rb_node *pn = NULL; 168 - unsigned long flags; 169 168 int ret; 170 169 171 - spin_lock_irqsave(&mcast_lock, flags); 170 + spin_lock_irq(&mcast_lock); 172 171 173 172 while (*n) { 174 173 struct ipath_mcast *tmcast; ··· 227 228 ret = 0; 228 229 229 230 bail: 230 - spin_unlock_irqrestore(&mcast_lock, flags); 231 + spin_unlock_irq(&mcast_lock); 231 232 232 233 return ret; 233 234 } ··· 288 289 struct ipath_mcast *mcast = NULL; 289 290 struct ipath_mcast_qp *p, *tmp; 290 291 struct rb_node *n; 291 - unsigned long flags; 292 292 int last = 0; 293 293 int ret; 294 294 295 - spin_lock_irqsave(&mcast_lock, flags); 295 + spin_lock_irq(&mcast_lock); 296 296 297 297 /* Find the GID in the mcast table. */ 298 298 n = mcast_tree.rb_node; 299 299 while (1) { 300 300 if (n == NULL) { 301 - spin_unlock_irqrestore(&mcast_lock, flags); 301 + spin_unlock_irq(&mcast_lock); 302 302 ret = -EINVAL; 303 303 goto bail; 304 304 } ··· 332 334 break; 333 335 } 334 336 335 - spin_unlock_irqrestore(&mcast_lock, flags); 337 + spin_unlock_irq(&mcast_lock); 336 338 337 339 if (p) { 338 340 /* ··· 346 348 atomic_dec(&mcast->refcount); 347 349 wait_event(mcast->wait, !atomic_read(&mcast->refcount)); 348 350 ipath_mcast_free(mcast); 349 - spin_lock(&dev->n_mcast_grps_lock); 351 + spin_lock_irq(&dev->n_mcast_grps_lock); 350 352 dev->n_mcast_grps_allocated--; 351 - spin_unlock(&dev->n_mcast_grps_lock); 353 + spin_unlock_irq(&dev->n_mcast_grps_lock); 352 354 } 353 355 354 356 ret = 0;
+129 -52
drivers/infiniband/hw/mlx4/qp.c
··· 188 188 } 189 189 } 190 190 191 - static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 192 - enum ib_qp_type type, struct mlx4_ib_qp *qp) 191 + static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 192 + struct mlx4_ib_qp *qp) 193 193 { 194 - /* Sanity check QP size before proceeding */ 194 + /* Sanity check RQ size before proceeding */ 195 + if (cap->max_recv_wr > dev->dev->caps.max_wqes || 196 + cap->max_recv_sge > dev->dev->caps.max_rq_sg) 197 + return -EINVAL; 198 + 199 + qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; 200 + 201 + qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge * 202 + sizeof (struct mlx4_wqe_data_seg))); 203 + qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg); 204 + 205 + cap->max_recv_wr = qp->rq.max; 206 + cap->max_recv_sge = qp->rq.max_gs; 207 + 208 + return 0; 209 + } 210 + 211 + static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 212 + enum ib_qp_type type, struct mlx4_ib_qp *qp) 213 + { 214 + /* Sanity check SQ size before proceeding */ 195 215 if (cap->max_send_wr > dev->dev->caps.max_wqes || 196 - cap->max_recv_wr > dev->dev->caps.max_wqes || 197 216 cap->max_send_sge > dev->dev->caps.max_sq_sg || 198 - cap->max_recv_sge > dev->dev->caps.max_rq_sg || 199 217 cap->max_inline_data + send_wqe_overhead(type) + 200 218 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) 201 219 return -EINVAL; ··· 226 208 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 227 209 return -EINVAL; 228 210 229 - qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; 230 - qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0; 231 - 232 - qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge * 233 - sizeof (struct mlx4_wqe_data_seg))); 234 - qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg); 211 + qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 1; 235 212 236 213 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * 237 214 sizeof (struct mlx4_wqe_data_seg), ··· 246 233 qp->sq.offset = 0; 247 234 } 248 235 249 - cap->max_send_wr = qp->sq.max; 250 - cap->max_recv_wr = qp->rq.max; 251 - cap->max_send_sge = qp->sq.max_gs; 252 - cap->max_recv_sge = qp->rq.max_gs; 236 + cap->max_send_wr = qp->sq.max; 237 + cap->max_send_sge = qp->sq.max_gs; 253 238 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) - 254 239 sizeof (struct mlx4_wqe_inline_seg); 240 + 241 + return 0; 242 + } 243 + 244 + static int set_user_sq_size(struct mlx4_ib_qp *qp, 245 + struct mlx4_ib_create_qp *ucmd) 246 + { 247 + qp->sq.max = 1 << ucmd->log_sq_bb_count; 248 + qp->sq.wqe_shift = ucmd->log_sq_stride; 249 + 250 + qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) + 251 + (qp->sq.max << qp->sq.wqe_shift); 255 252 256 253 return 0; 257 254 } ··· 287 264 qp->sq.head = 0; 288 265 qp->sq.tail = 0; 289 266 290 - err = set_qp_size(dev, &init_attr->cap, init_attr->qp_type, qp); 267 + err = set_rq_size(dev, &init_attr->cap, qp); 291 268 if (err) 292 269 goto err; 293 270 ··· 298 275 err = -EFAULT; 299 276 goto err; 300 277 } 278 + 279 + err = set_user_sq_size(qp, &ucmd); 280 + if (err) 281 + goto err; 301 282 302 283 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, 303 284 qp->buf_size, 0); ··· 324 297 if (err) 325 298 goto err_mtt; 326 299 } else { 300 + err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); 301 + if (err) 302 + goto err; 303 + 327 304 err = mlx4_ib_db_alloc(dev, &qp->db, 0); 328 305 if (err) 329 306 goto err; ··· 604 573 } 605 574 } 606 575 607 - static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *attr, 576 + static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr, 608 577 int attr_mask) 609 578 { 610 579 u8 dest_rd_atomic; ··· 634 603 return cpu_to_be32(hw_access_flags); 635 604 } 636 605 637 - static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr, 606 + static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr, 638 607 int attr_mask) 639 608 { 640 609 if (attr_mask & IB_QP_PKEY_INDEX) ··· 650 619 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); 651 620 } 652 621 653 - static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah, 622 + static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, 654 623 struct mlx4_qp_path *path, u8 port) 655 624 { 656 625 path->grh_mylmc = ah->src_path_bits & 0x7f; ··· 686 655 return 0; 687 656 } 688 657 689 - int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 690 - int attr_mask, struct ib_udata *udata) 658 + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, 659 + const struct ib_qp_attr *attr, int attr_mask, 660 + enum ib_qp_state cur_state, enum ib_qp_state new_state) 691 661 { 692 662 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 693 663 struct mlx4_ib_qp *qp = to_mqp(ibqp); 694 664 struct mlx4_qp_context *context; 695 665 enum mlx4_qp_optpar optpar = 0; 696 - enum ib_qp_state cur_state, new_state; 697 666 int sqd_event; 698 667 int err = -EINVAL; 699 668 700 669 context = kzalloc(sizeof *context, GFP_KERNEL); 701 670 if (!context) 702 671 return -ENOMEM; 703 - 704 - mutex_lock(&qp->mutex); 705 - 706 - cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; 707 - new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 708 - 709 - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) 710 - goto out; 711 - 712 - if ((attr_mask & IB_QP_PKEY_INDEX) && 713 - attr->pkey_index >= dev->dev->caps.pkey_table_len) { 714 - goto out; 715 - } 716 - 717 - if ((attr_mask & IB_QP_PORT) && 718 - (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { 719 - goto out; 720 - } 721 - 722 - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && 723 - attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { 724 - goto out; 725 - } 726 - 727 - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && 728 - attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) { 729 - goto out; 730 - } 731 672 732 673 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | 733 674 (to_mlx4_st(ibqp->qp_type) << 16)); ··· 923 920 } 924 921 925 922 out: 926 - mutex_unlock(&qp->mutex); 927 923 kfree(context); 924 + return err; 925 + } 926 + 927 + static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 }; 928 + static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = { 929 + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 930 + IB_QP_PORT | 931 + IB_QP_QKEY), 932 + [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 933 + IB_QP_PORT | 934 + IB_QP_ACCESS_FLAGS), 935 + [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 936 + IB_QP_PORT | 937 + IB_QP_ACCESS_FLAGS), 938 + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 939 + IB_QP_QKEY), 940 + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 941 + IB_QP_QKEY), 942 + }; 943 + 944 + int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 945 + int attr_mask, struct ib_udata *udata) 946 + { 947 + struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 948 + struct mlx4_ib_qp *qp = to_mqp(ibqp); 949 + enum ib_qp_state cur_state, new_state; 950 + int err = -EINVAL; 951 + 952 + mutex_lock(&qp->mutex); 953 + 954 + cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; 955 + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 956 + 957 + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) 958 + goto out; 959 + 960 + if ((attr_mask & IB_QP_PKEY_INDEX) && 961 + attr->pkey_index >= dev->dev->caps.pkey_table_len) { 962 + goto out; 963 + } 964 + 965 + if ((attr_mask & IB_QP_PORT) && 966 + (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { 967 + goto out; 968 + } 969 + 970 + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && 971 + attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { 972 + goto out; 973 + } 974 + 975 + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && 976 + attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { 977 + goto out; 978 + } 979 + 980 + if (cur_state == new_state && cur_state == IB_QPS_RESET) { 981 + err = 0; 982 + goto out; 983 + } 984 + 985 + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { 986 + err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr, 987 + mlx4_ib_qp_attr_mask_table[ibqp->qp_type], 988 + IB_QPS_RESET, IB_QPS_INIT); 989 + if (err) 990 + goto out; 991 + cur_state = IB_QPS_INIT; 992 + } 993 + 994 + err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); 995 + 996 + out: 997 + mutex_unlock(&qp->mutex); 928 998 return err; 929 999 } 930 1000 ··· 1028 952 (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; 1029 953 sqp->ud_header.grh.flow_label = 1030 954 ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 955 + sqp->ud_header.grh.hop_limit = ah->av.hop_limit; 1031 956 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, 1032 957 ah->av.gid_index, &sqp->ud_header.grh.source_gid); 1033 958 memcpy(sqp->ud_header.grh.destination_gid.raw, ··· 1269 1192 */ 1270 1193 wmb(); 1271 1194 1272 - if (wr->opcode < 0 || wr->opcode > ARRAY_SIZE(mlx4_ib_opcode)) { 1195 + if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { 1273 1196 err = -EINVAL; 1274 1197 goto out; 1275 1198 }
+6
drivers/infiniband/hw/mlx4/srq.c
··· 297 297 break; 298 298 } 299 299 300 + if (unlikely(srq->head == srq->tail)) { 301 + err = -ENOMEM; 302 + *bad_wr = wr; 303 + break; 304 + } 305 + 300 306 srq->wrid[srq->head] = wr->wr_id; 301 307 302 308 next = get_wqe(srq, srq->head);
+4 -1
drivers/infiniband/hw/mlx4/user.h
··· 39 39 * Increment this value if any changes that break userspace ABI 40 40 * compatibility are made. 41 41 */ 42 - #define MLX4_IB_UVERBS_ABI_VERSION 1 42 + #define MLX4_IB_UVERBS_ABI_VERSION 2 43 43 44 44 /* 45 45 * Make sure that all structs defined in this file remain laid out so ··· 87 87 struct mlx4_ib_create_qp { 88 88 __u64 buf_addr; 89 89 __u64 db_addr; 90 + __u8 log_sq_bb_count; 91 + __u8 log_sq_stride; 92 + __u8 reserved[6]; 90 93 }; 91 94 92 95 #endif /* MLX4_IB_USER_H */
+1
drivers/infiniband/hw/mthca/mthca_av.c
··· 279 279 (be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff; 280 280 header->grh.flow_label = 281 281 ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff); 282 + header->grh.hop_limit = ah->av->hop_limit; 282 283 ib_get_cached_gid(&dev->ib_dev, 283 284 be32_to_cpu(ah->av->port_pd) >> 24, 284 285 ah->av->gid_index % dev->limits.gid_table_len,
+3 -1
drivers/infiniband/hw/mthca/mthca_main.c
··· 1250 1250 int __mthca_restart_one(struct pci_dev *pdev) 1251 1251 { 1252 1252 struct mthca_dev *mdev; 1253 + int hca_type; 1253 1254 1254 1255 mdev = pci_get_drvdata(pdev); 1255 1256 if (!mdev) 1256 1257 return -ENODEV; 1258 + hca_type = mdev->hca_type; 1257 1259 __mthca_remove_one(pdev); 1258 - return __mthca_init_one(pdev, mdev->hca_type); 1260 + return __mthca_init_one(pdev, hca_type); 1259 1261 } 1260 1262 1261 1263 static int __devinit mthca_init_one(struct pci_dev *pdev,
+98 -60
drivers/infiniband/hw/mthca/mthca_qp.c
··· 296 296 } 297 297 } 298 298 299 - static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr, 299 + static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr, 300 300 int attr_mask) 301 301 { 302 302 if (attr_mask & IB_QP_PKEY_INDEX) ··· 328 328 mthca_warn(dev, "INIT_IB returned status %02x.\n", status); 329 329 } 330 330 331 - static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr, 331 + static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr, 332 332 int attr_mask) 333 333 { 334 334 u8 dest_rd_atomic; ··· 511 511 return err; 512 512 } 513 513 514 - static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah, 514 + static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah, 515 515 struct mthca_qp_path *path, u8 port) 516 516 { 517 517 path->g_mylmc = ah->src_path_bits & 0x7f; ··· 539 539 return 0; 540 540 } 541 541 542 - int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, 543 - struct ib_udata *udata) 542 + static int __mthca_modify_qp(struct ib_qp *ibqp, 543 + const struct ib_qp_attr *attr, int attr_mask, 544 + enum ib_qp_state cur_state, enum ib_qp_state new_state) 544 545 { 545 546 struct mthca_dev *dev = to_mdev(ibqp->device); 546 547 struct mthca_qp *qp = to_mqp(ibqp); 547 - enum ib_qp_state cur_state, new_state; 548 548 struct mthca_mailbox *mailbox; 549 549 struct mthca_qp_param *qp_param; 550 550 struct mthca_qp_context *qp_context; 551 551 u32 sqd_event = 0; 552 552 u8 status; 553 553 int err = -EINVAL; 554 - 555 - mutex_lock(&qp->mutex); 556 - 557 - if (attr_mask & IB_QP_CUR_STATE) { 558 - cur_state = attr->cur_qp_state; 559 - } else { 560 - spin_lock_irq(&qp->sq.lock); 561 - spin_lock(&qp->rq.lock); 562 - cur_state = qp->state; 563 - spin_unlock(&qp->rq.lock); 564 - spin_unlock_irq(&qp->sq.lock); 565 - } 566 - 567 - new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 568 - 569 - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { 570 - mthca_dbg(dev, "Bad QP transition (transport %d) " 571 - "%d->%d with attr 0x%08x\n", 572 - qp->transport, cur_state, new_state, 573 - attr_mask); 574 - goto out; 575 - } 576 - 577 - if (cur_state == new_state && cur_state == IB_QPS_RESET) { 578 - err = 0; 579 - goto out; 580 - } 581 - 582 - if ((attr_mask & IB_QP_PKEY_INDEX) && 583 - attr->pkey_index >= dev->limits.pkey_table_len) { 584 - mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", 585 - attr->pkey_index, dev->limits.pkey_table_len-1); 586 - goto out; 587 - } 588 - 589 - if ((attr_mask & IB_QP_PORT) && 590 - (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) { 591 - mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num); 592 - goto out; 593 - } 594 - 595 - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && 596 - attr->max_rd_atomic > dev->limits.max_qp_init_rdma) { 597 - mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n", 598 - attr->max_rd_atomic, dev->limits.max_qp_init_rdma); 599 - goto out; 600 - } 601 - 602 - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && 603 - attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) { 604 - mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n", 605 - attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift); 606 - goto out; 607 - } 608 554 609 555 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 610 556 if (IS_ERR(mailbox)) { ··· 838 892 839 893 out_mailbox: 840 894 mthca_free_mailbox(dev, mailbox); 895 + out: 896 + return err; 897 + } 898 + 899 + static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 }; 900 + static const int dummy_init_attr_mask[] = { 901 + [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 902 + IB_QP_PORT | 903 + IB_QP_QKEY), 904 + [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 905 + IB_QP_PORT | 906 + IB_QP_ACCESS_FLAGS), 907 + [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 908 + IB_QP_PORT | 909 + IB_QP_ACCESS_FLAGS), 910 + [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 911 + IB_QP_QKEY), 912 + [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 913 + IB_QP_QKEY), 914 + }; 915 + 916 + int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, 917 + struct ib_udata *udata) 918 + { 919 + struct mthca_dev *dev = to_mdev(ibqp->device); 920 + struct mthca_qp *qp = to_mqp(ibqp); 921 + enum ib_qp_state cur_state, new_state; 922 + int err = -EINVAL; 923 + 924 + mutex_lock(&qp->mutex); 925 + if (attr_mask & IB_QP_CUR_STATE) { 926 + cur_state = attr->cur_qp_state; 927 + } else { 928 + spin_lock_irq(&qp->sq.lock); 929 + spin_lock(&qp->rq.lock); 930 + cur_state = qp->state; 931 + spin_unlock(&qp->rq.lock); 932 + spin_unlock_irq(&qp->sq.lock); 933 + } 934 + 935 + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 936 + 937 + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { 938 + mthca_dbg(dev, "Bad QP transition (transport %d) " 939 + "%d->%d with attr 0x%08x\n", 940 + qp->transport, cur_state, new_state, 941 + attr_mask); 942 + goto out; 943 + } 944 + 945 + if ((attr_mask & IB_QP_PKEY_INDEX) && 946 + attr->pkey_index >= dev->limits.pkey_table_len) { 947 + mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n", 948 + attr->pkey_index, dev->limits.pkey_table_len-1); 949 + goto out; 950 + } 951 + 952 + if ((attr_mask & IB_QP_PORT) && 953 + (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) { 954 + mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num); 955 + goto out; 956 + } 957 + 958 + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && 959 + attr->max_rd_atomic > dev->limits.max_qp_init_rdma) { 960 + mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n", 961 + attr->max_rd_atomic, dev->limits.max_qp_init_rdma); 962 + goto out; 963 + } 964 + 965 + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && 966 + attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) { 967 + mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n", 968 + attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift); 969 + goto out; 970 + } 971 + 972 + if (cur_state == new_state && cur_state == IB_QPS_RESET) { 973 + err = 0; 974 + goto out; 975 + } 976 + 977 + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { 978 + err = __mthca_modify_qp(ibqp, &dummy_init_attr, 979 + dummy_init_attr_mask[ibqp->qp_type], 980 + IB_QPS_RESET, IB_QPS_INIT); 981 + if (err) 982 + goto out; 983 + cur_state = IB_QPS_INIT; 984 + } 985 + 986 + err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); 841 987 842 988 out: 843 989 mutex_unlock(&qp->mutex);
+46 -3
drivers/infiniband/ulp/ipoib/ipoib.h
··· 132 132 __be32 mtu; 133 133 }; 134 134 135 + /* 136 + * Quoting 10.3.1 Queue Pair and EE Context States: 137 + * 138 + * Note, for QPs that are associated with an SRQ, the Consumer should take the 139 + * QP through the Error State before invoking a Destroy QP or a Modify QP to the 140 + * Reset State. The Consumer may invoke the Destroy QP without first performing 141 + * a Modify QP to the Error State and waiting for the Affiliated Asynchronous 142 + * Last WQE Reached Event. However, if the Consumer does not wait for the 143 + * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment 144 + * leakage may occur. Therefore, it is good programming practice to tear down a 145 + * QP that is associated with an SRQ by using the following process: 146 + * 147 + * - Put the QP in the Error State 148 + * - Wait for the Affiliated Asynchronous Last WQE Reached Event; 149 + * - either: 150 + * drain the CQ by invoking the Poll CQ verb and either wait for CQ 151 + * to be empty or the number of Poll CQ operations has exceeded 152 + * CQ capacity size; 153 + * - or 154 + * post another WR that completes on the same CQ and wait for this 155 + * WR to return as a WC; 156 + * - and then invoke a Destroy QP or Reset QP. 157 + * 158 + * We use the second option and wait for a completion on the 159 + * rx_drain_qp before destroying QPs attached to our SRQ. 160 + */ 161 + 162 + enum ipoib_cm_state { 163 + IPOIB_CM_RX_LIVE, 164 + IPOIB_CM_RX_ERROR, /* Ignored by stale task */ 165 + IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */ 166 + }; 167 + 135 168 struct ipoib_cm_rx { 136 169 struct ib_cm_id *id; 137 170 struct ib_qp *qp; 138 171 struct list_head list; 139 172 struct net_device *dev; 140 173 unsigned long jiffies; 174 + enum ipoib_cm_state state; 141 175 }; 142 176 143 177 struct ipoib_cm_tx { ··· 199 165 struct ib_srq *srq; 200 166 struct ipoib_cm_rx_buf *srq_ring; 201 167 struct ib_cm_id *id; 202 - struct list_head passive_ids; 168 + struct ib_qp *rx_drain_qp; /* generates WR described in 10.3.1 */ 169 + struct list_head passive_ids; /* state: LIVE */ 170 + struct list_head rx_error_list; /* state: ERROR */ 171 + struct list_head rx_flush_list; /* state: FLUSH, drain not started */ 172 + struct list_head rx_drain_list; /* state: FLUSH, drain started */ 173 + struct list_head rx_reap_list; /* state: FLUSH, drain done */ 203 174 struct work_struct start_task; 204 175 struct work_struct reap_task; 205 176 struct work_struct skb_task; 177 + struct work_struct rx_reap_task; 206 178 struct delayed_work stale_task; 207 179 struct sk_buff_head skb_queue; 208 180 struct list_head start_list; ··· 241 201 struct list_head multicast_list; 242 202 struct rb_root multicast_tree; 243 203 244 - struct delayed_work pkey_task; 204 + struct delayed_work pkey_poll_task; 245 205 struct delayed_work mcast_task; 246 206 struct work_struct flush_task; 247 207 struct work_struct restart_task; 248 208 struct delayed_work ah_reap_task; 209 + struct work_struct pkey_event_task; 249 210 250 211 struct ib_device *ca; 251 212 u8 port; 252 213 u16 pkey; 214 + u16 pkey_index; 253 215 struct ib_pd *pd; 254 216 struct ib_mr *mr; 255 217 struct ib_cq *cq; ··· 375 333 376 334 int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 377 335 void ipoib_ib_dev_flush(struct work_struct *work); 336 + void ipoib_pkey_event(struct work_struct *work); 378 337 void ipoib_ib_dev_cleanup(struct net_device *dev); 379 338 380 339 int ipoib_ib_dev_open(struct net_device *dev); 381 340 int ipoib_ib_dev_up(struct net_device *dev); 382 341 int ipoib_ib_dev_down(struct net_device *dev, int flush); 383 - int ipoib_ib_dev_stop(struct net_device *dev); 342 + int ipoib_ib_dev_stop(struct net_device *dev, int flush); 384 343 385 344 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 386 345 void ipoib_dev_cleanup(struct net_device *dev);
+169 -34
drivers/infiniband/ulp/ipoib/ipoib_cm.c
··· 37 37 #include <net/dst.h> 38 38 #include <net/icmp.h> 39 39 #include <linux/icmpv6.h> 40 + #include <linux/delay.h> 40 41 41 42 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 42 43 static int data_debug_level; ··· 61 60 int flags; 62 61 u32 remote_qpn; 63 62 u32 remote_mtu; 63 + }; 64 + 65 + static struct ib_qp_attr ipoib_cm_err_attr = { 66 + .qp_state = IB_QPS_ERR 67 + }; 68 + 69 + #define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff 70 + 71 + static struct ib_recv_wr ipoib_cm_rx_drain_wr = { 72 + .wr_id = IPOIB_CM_RX_DRAIN_WRID 64 73 }; 65 74 66 75 static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, ··· 161 150 return NULL; 162 151 } 163 152 153 + static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv) 154 + { 155 + struct ib_recv_wr *bad_wr; 156 + 157 + /* rx_drain_qp send queue depth is 1, so 158 + * make sure we have at most 1 outstanding WR. */ 159 + if (list_empty(&priv->cm.rx_flush_list) || 160 + !list_empty(&priv->cm.rx_drain_list)) 161 + return; 162 + 163 + if (ib_post_recv(priv->cm.rx_drain_qp, &ipoib_cm_rx_drain_wr, &bad_wr)) 164 + ipoib_warn(priv, "failed to post rx_drain wr\n"); 165 + 166 + list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); 167 + } 168 + 169 + static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx) 170 + { 171 + struct ipoib_cm_rx *p = ctx; 172 + struct ipoib_dev_priv *priv = netdev_priv(p->dev); 173 + unsigned long flags; 174 + 175 + if (event->event != IB_EVENT_QP_LAST_WQE_REACHED) 176 + return; 177 + 178 + spin_lock_irqsave(&priv->lock, flags); 179 + list_move(&p->list, &priv->cm.rx_flush_list); 180 + p->state = IPOIB_CM_RX_FLUSH; 181 + ipoib_cm_start_rx_drain(priv); 182 + spin_unlock_irqrestore(&priv->lock, flags); 183 + } 184 + 164 185 static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, 165 186 struct ipoib_cm_rx *p) 166 187 { 167 188 struct ipoib_dev_priv *priv = netdev_priv(dev); 168 189 struct ib_qp_init_attr attr = { 190 + .event_handler = ipoib_cm_rx_event_handler, 169 191 .send_cq = priv->cq, /* does not matter, we never send anything */ 170 192 .recv_cq = priv->cq, 171 193 .srq = priv->cm.srq, ··· 300 256 301 257 cm_id->context = p; 302 258 p->jiffies = jiffies; 259 + p->state = IPOIB_CM_RX_LIVE; 303 260 spin_lock_irq(&priv->lock); 304 261 if (list_empty(&priv->cm.passive_ids)) 305 262 queue_delayed_work(ipoib_workqueue, ··· 322 277 { 323 278 struct ipoib_cm_rx *p; 324 279 struct ipoib_dev_priv *priv; 325 - int ret; 326 280 327 281 switch (event->event) { 328 282 case IB_CM_REQ_RECEIVED: ··· 333 289 case IB_CM_REJ_RECEIVED: 334 290 p = cm_id->context; 335 291 priv = netdev_priv(p->dev); 336 - spin_lock_irq(&priv->lock); 337 - if (list_empty(&p->list)) 338 - ret = 0; /* Connection is going away already. */ 339 - else { 340 - list_del_init(&p->list); 341 - ret = -ECONNRESET; 342 - } 343 - spin_unlock_irq(&priv->lock); 344 - if (ret) { 345 - ib_destroy_qp(p->qp); 346 - kfree(p); 347 - return ret; 348 - } 349 - return 0; 292 + if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) 293 + ipoib_warn(priv, "unable to move qp to error state\n"); 294 + /* Fall through */ 350 295 default: 351 296 return 0; 352 297 } ··· 387 354 wr_id, wc->status); 388 355 389 356 if (unlikely(wr_id >= ipoib_recvq_size)) { 390 - ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 391 - wr_id, ipoib_recvq_size); 357 + if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) { 358 + spin_lock_irqsave(&priv->lock, flags); 359 + list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); 360 + ipoib_cm_start_rx_drain(priv); 361 + queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); 362 + spin_unlock_irqrestore(&priv->lock, flags); 363 + } else 364 + ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 365 + wr_id, ipoib_recvq_size); 392 366 return; 393 367 } 394 368 ··· 414 374 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 415 375 spin_lock_irqsave(&priv->lock, flags); 416 376 p->jiffies = jiffies; 417 - /* Move this entry to list head, but do 418 - * not re-add it if it has been removed. */ 419 - if (!list_empty(&p->list)) 377 + /* Move this entry to list head, but do not re-add it 378 + * if it has been moved out of list. */ 379 + if (p->state == IPOIB_CM_RX_LIVE) 420 380 list_move(&p->list, &priv->cm.passive_ids); 421 381 spin_unlock_irqrestore(&priv->lock, flags); 422 382 } ··· 623 583 int ipoib_cm_dev_open(struct net_device *dev) 624 584 { 625 585 struct ipoib_dev_priv *priv = netdev_priv(dev); 586 + struct ib_qp_init_attr qp_init_attr = { 587 + .send_cq = priv->cq, /* does not matter, we never send anything */ 588 + .recv_cq = priv->cq, 589 + .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */ 590 + .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ 591 + .cap.max_recv_wr = 1, 592 + .cap.max_recv_sge = 1, /* FIXME: 0 Seems not to work */ 593 + .sq_sig_type = IB_SIGNAL_ALL_WR, 594 + .qp_type = IB_QPT_UC, 595 + }; 626 596 int ret; 627 597 628 598 if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) 629 599 return 0; 630 600 601 + priv->cm.rx_drain_qp = ib_create_qp(priv->pd, &qp_init_attr); 602 + if (IS_ERR(priv->cm.rx_drain_qp)) { 603 + printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); 604 + ret = PTR_ERR(priv->cm.rx_drain_qp); 605 + return ret; 606 + } 607 + 608 + /* 609 + * We put the QP in error state directly. This way, a "flush 610 + * error" WC will be immediately generated for each WR we post. 611 + */ 612 + ret = ib_modify_qp(priv->cm.rx_drain_qp, &ipoib_cm_err_attr, IB_QP_STATE); 613 + if (ret) { 614 + ipoib_warn(priv, "failed to modify drain QP to error: %d\n", ret); 615 + goto err_qp; 616 + } 617 + 631 618 priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); 632 619 if (IS_ERR(priv->cm.id)) { 633 620 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); 634 621 ret = PTR_ERR(priv->cm.id); 635 - priv->cm.id = NULL; 636 - return ret; 622 + goto err_cm; 637 623 } 638 624 639 625 ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), ··· 667 601 if (ret) { 668 602 printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, 669 603 IPOIB_CM_IETF_ID | priv->qp->qp_num); 670 - ib_destroy_cm_id(priv->cm.id); 671 - priv->cm.id = NULL; 672 - return ret; 604 + goto err_listen; 673 605 } 606 + 674 607 return 0; 608 + 609 + err_listen: 610 + ib_destroy_cm_id(priv->cm.id); 611 + err_cm: 612 + priv->cm.id = NULL; 613 + err_qp: 614 + ib_destroy_qp(priv->cm.rx_drain_qp); 615 + return ret; 675 616 } 676 617 677 618 void ipoib_cm_dev_stop(struct net_device *dev) 678 619 { 679 620 struct ipoib_dev_priv *priv = netdev_priv(dev); 680 - struct ipoib_cm_rx *p; 621 + struct ipoib_cm_rx *p, *n; 622 + unsigned long begin; 623 + LIST_HEAD(list); 624 + int ret; 681 625 682 626 if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id) 683 627 return; 684 628 685 629 ib_destroy_cm_id(priv->cm.id); 686 630 priv->cm.id = NULL; 631 + 687 632 spin_lock_irq(&priv->lock); 688 633 while (!list_empty(&priv->cm.passive_ids)) { 689 634 p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); 690 - list_del_init(&p->list); 635 + list_move(&p->list, &priv->cm.rx_error_list); 636 + p->state = IPOIB_CM_RX_ERROR; 691 637 spin_unlock_irq(&priv->lock); 638 + ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); 639 + if (ret) 640 + ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); 641 + spin_lock_irq(&priv->lock); 642 + } 643 + 644 + /* Wait for all RX to be drained */ 645 + begin = jiffies; 646 + 647 + while (!list_empty(&priv->cm.rx_error_list) || 648 + !list_empty(&priv->cm.rx_flush_list) || 649 + !list_empty(&priv->cm.rx_drain_list)) { 650 + if (!time_after(jiffies, begin + 5 * HZ)) { 651 + ipoib_warn(priv, "RX drain timing out\n"); 652 + 653 + /* 654 + * assume the HW is wedged and just free up everything. 655 + */ 656 + list_splice_init(&priv->cm.rx_flush_list, &list); 657 + list_splice_init(&priv->cm.rx_error_list, &list); 658 + list_splice_init(&priv->cm.rx_drain_list, &list); 659 + break; 660 + } 661 + spin_unlock_irq(&priv->lock); 662 + msleep(1); 663 + spin_lock_irq(&priv->lock); 664 + } 665 + 666 + list_splice_init(&priv->cm.rx_reap_list, &list); 667 + 668 + spin_unlock_irq(&priv->lock); 669 + 670 + list_for_each_entry_safe(p, n, &list, list) { 692 671 ib_destroy_cm_id(p->id); 693 672 ib_destroy_qp(p->qp); 694 673 kfree(p); 695 - spin_lock_irq(&priv->lock); 696 674 } 697 - spin_unlock_irq(&priv->lock); 698 675 676 + ib_destroy_qp(priv->cm.rx_drain_qp); 699 677 cancel_delayed_work(&priv->cm.stale_task); 700 678 } 701 679 ··· 1189 1079 queue_work(ipoib_workqueue, &priv->cm.skb_task); 1190 1080 } 1191 1081 1082 + static void ipoib_cm_rx_reap(struct work_struct *work) 1083 + { 1084 + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1085 + cm.rx_reap_task); 1086 + struct ipoib_cm_rx *p, *n; 1087 + LIST_HEAD(list); 1088 + 1089 + spin_lock_irq(&priv->lock); 1090 + list_splice_init(&priv->cm.rx_reap_list, &list); 1091 + spin_unlock_irq(&priv->lock); 1092 + 1093 + list_for_each_entry_safe(p, n, &list, list) { 1094 + ib_destroy_cm_id(p->id); 1095 + ib_destroy_qp(p->qp); 1096 + kfree(p); 1097 + } 1098 + } 1099 + 1192 1100 static void ipoib_cm_stale_task(struct work_struct *work) 1193 1101 { 1194 1102 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1195 1103 cm.stale_task.work); 1196 1104 struct ipoib_cm_rx *p; 1105 + int ret; 1197 1106 1198 1107 spin_lock_irq(&priv->lock); 1199 1108 while (!list_empty(&priv->cm.passive_ids)) { 1200 - /* List if sorted by LRU, start from tail, 1109 + /* List is sorted by LRU, start from tail, 1201 1110 * stop when we see a recently used entry */ 1202 1111 p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); 1203 1112 if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) 1204 1113 break; 1205 - list_del_init(&p->list); 1114 + list_move(&p->list, &priv->cm.rx_error_list); 1115 + p->state = IPOIB_CM_RX_ERROR; 1206 1116 spin_unlock_irq(&priv->lock); 1207 - ib_destroy_cm_id(p->id); 1208 - ib_destroy_qp(p->qp); 1209 - kfree(p); 1117 + ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); 1118 + if (ret) 1119 + ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); 1210 1120 spin_lock_irq(&priv->lock); 1211 1121 } 1212 1122 ··· 1294 1164 INIT_LIST_HEAD(&priv->cm.passive_ids); 1295 1165 INIT_LIST_HEAD(&priv->cm.reap_list); 1296 1166 INIT_LIST_HEAD(&priv->cm.start_list); 1167 + INIT_LIST_HEAD(&priv->cm.rx_error_list); 1168 + INIT_LIST_HEAD(&priv->cm.rx_flush_list); 1169 + INIT_LIST_HEAD(&priv->cm.rx_drain_list); 1170 + INIT_LIST_HEAD(&priv->cm.rx_reap_list); 1297 1171 INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); 1298 1172 INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); 1299 1173 INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); 1174 + INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap); 1300 1175 INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); 1301 1176 1302 1177 skb_queue_head_init(&priv->cm.skb_queue);
+69 -18
drivers/infiniband/ulp/ipoib/ipoib_ib.c
··· 448 448 struct ipoib_dev_priv *priv = netdev_priv(dev); 449 449 int ret; 450 450 451 + if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) { 452 + ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey); 453 + clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 454 + return -1; 455 + } 456 + set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 457 + 451 458 ret = ipoib_init_qp(dev); 452 459 if (ret) { 453 460 ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); ··· 464 457 ret = ipoib_ib_post_receives(dev); 465 458 if (ret) { 466 459 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); 467 - ipoib_ib_dev_stop(dev); 460 + ipoib_ib_dev_stop(dev, 1); 468 461 return -1; 469 462 } 470 463 471 464 ret = ipoib_cm_dev_open(dev); 472 465 if (ret) { 473 - ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); 474 - ipoib_ib_dev_stop(dev); 466 + ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret); 467 + ipoib_ib_dev_stop(dev, 1); 475 468 return -1; 476 469 } 477 470 ··· 523 516 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { 524 517 mutex_lock(&pkey_mutex); 525 518 set_bit(IPOIB_PKEY_STOP, &priv->flags); 526 - cancel_delayed_work(&priv->pkey_task); 519 + cancel_delayed_work(&priv->pkey_poll_task); 527 520 mutex_unlock(&pkey_mutex); 528 521 if (flush) 529 522 flush_workqueue(ipoib_workqueue); ··· 550 543 return pending; 551 544 } 552 545 553 - int ipoib_ib_dev_stop(struct net_device *dev) 546 + int ipoib_ib_dev_stop(struct net_device *dev, int flush) 554 547 { 555 548 struct ipoib_dev_priv *priv = netdev_priv(dev); 556 549 struct ib_qp_attr qp_attr; ··· 636 629 /* Wait for all AHs to be reaped */ 637 630 set_bit(IPOIB_STOP_REAPER, &priv->flags); 638 631 cancel_delayed_work(&priv->ah_reap_task); 639 - flush_workqueue(ipoib_workqueue); 632 + if (flush) 633 + flush_workqueue(ipoib_workqueue); 640 634 641 635 begin = jiffies; 642 636 ··· 681 673 return 0; 682 674 } 683 675 684 - void ipoib_ib_dev_flush(struct work_struct *work) 676 + static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) 685 677 { 686 - struct ipoib_dev_priv *cpriv, *priv = 687 - container_of(work, struct ipoib_dev_priv, flush_task); 678 + struct ipoib_dev_priv *cpriv; 688 679 struct net_device *dev = priv->dev; 680 + u16 new_index; 689 681 690 - if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) { 682 + mutex_lock(&priv->vlan_mutex); 683 + 684 + /* 685 + * Flush any child interfaces too -- they might be up even if 686 + * the parent is down. 687 + */ 688 + list_for_each_entry(cpriv, &priv->child_intfs, list) 689 + __ipoib_ib_dev_flush(cpriv, pkey_event); 690 + 691 + mutex_unlock(&priv->vlan_mutex); 692 + 693 + if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) { 691 694 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); 692 695 return; 693 696 } ··· 708 689 return; 709 690 } 710 691 692 + if (pkey_event) { 693 + if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { 694 + clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 695 + ipoib_ib_dev_down(dev, 0); 696 + ipoib_pkey_dev_delay_open(dev); 697 + return; 698 + } 699 + set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 700 + 701 + /* restart QP only if P_Key index is changed */ 702 + if (new_index == priv->pkey_index) { 703 + ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n"); 704 + return; 705 + } 706 + priv->pkey_index = new_index; 707 + } 708 + 711 709 ipoib_dbg(priv, "flushing\n"); 712 710 713 711 ipoib_ib_dev_down(dev, 0); 712 + 713 + if (pkey_event) { 714 + ipoib_ib_dev_stop(dev, 0); 715 + ipoib_ib_dev_open(dev); 716 + } 714 717 715 718 /* 716 719 * The device could have been brought down between the start and when ··· 742 701 ipoib_ib_dev_up(dev); 743 702 ipoib_mcast_restart_task(&priv->restart_task); 744 703 } 704 + } 745 705 746 - mutex_lock(&priv->vlan_mutex); 706 + void ipoib_ib_dev_flush(struct work_struct *work) 707 + { 708 + struct ipoib_dev_priv *priv = 709 + container_of(work, struct ipoib_dev_priv, flush_task); 747 710 748 - /* Flush any child interfaces too */ 749 - list_for_each_entry(cpriv, &priv->child_intfs, list) 750 - ipoib_ib_dev_flush(&cpriv->flush_task); 711 + ipoib_dbg(priv, "Flushing %s\n", priv->dev->name); 712 + __ipoib_ib_dev_flush(priv, 0); 713 + } 751 714 752 - mutex_unlock(&priv->vlan_mutex); 715 + void ipoib_pkey_event(struct work_struct *work) 716 + { 717 + struct ipoib_dev_priv *priv = 718 + container_of(work, struct ipoib_dev_priv, pkey_event_task); 719 + 720 + ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name); 721 + __ipoib_ib_dev_flush(priv, 1); 753 722 } 754 723 755 724 void ipoib_ib_dev_cleanup(struct net_device *dev) ··· 787 736 void ipoib_pkey_poll(struct work_struct *work) 788 737 { 789 738 struct ipoib_dev_priv *priv = 790 - container_of(work, struct ipoib_dev_priv, pkey_task.work); 739 + container_of(work, struct ipoib_dev_priv, pkey_poll_task.work); 791 740 struct net_device *dev = priv->dev; 792 741 793 742 ipoib_pkey_dev_check_presence(dev); ··· 798 747 mutex_lock(&pkey_mutex); 799 748 if (!test_bit(IPOIB_PKEY_STOP, &priv->flags)) 800 749 queue_delayed_work(ipoib_workqueue, 801 - &priv->pkey_task, 750 + &priv->pkey_poll_task, 802 751 HZ); 803 752 mutex_unlock(&pkey_mutex); 804 753 } ··· 817 766 mutex_lock(&pkey_mutex); 818 767 clear_bit(IPOIB_PKEY_STOP, &priv->flags); 819 768 queue_delayed_work(ipoib_workqueue, 820 - &priv->pkey_task, 769 + &priv->pkey_poll_task, 821 770 HZ); 822 771 mutex_unlock(&pkey_mutex); 823 772 return 1;
+4 -3
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 107 107 return -EINVAL; 108 108 109 109 if (ipoib_ib_dev_up(dev)) { 110 - ipoib_ib_dev_stop(dev); 110 + ipoib_ib_dev_stop(dev, 1); 111 111 return -EINVAL; 112 112 } 113 113 ··· 152 152 flush_workqueue(ipoib_workqueue); 153 153 154 154 ipoib_ib_dev_down(dev, 1); 155 - ipoib_ib_dev_stop(dev); 155 + ipoib_ib_dev_stop(dev, 1); 156 156 157 157 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 158 158 struct ipoib_dev_priv *cpriv; ··· 988 988 INIT_LIST_HEAD(&priv->dead_ahs); 989 989 INIT_LIST_HEAD(&priv->multicast_list); 990 990 991 - INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll); 991 + INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); 992 + INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event); 992 993 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 993 994 INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); 994 995 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
+1 -1
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
··· 524 524 return; 525 525 526 526 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 527 - ipoib_warn(priv, "ib_gid_entry_get() failed\n"); 527 + ipoib_warn(priv, "ib_query_gid() failed\n"); 528 528 else 529 529 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 530 530
+16 -24
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
··· 33 33 * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ 34 34 */ 35 35 36 - #include <rdma/ib_cache.h> 37 - 38 36 #include "ipoib.h" 39 37 40 38 int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) ··· 47 49 if (!qp_attr) 48 50 goto out; 49 51 50 - if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { 52 + if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { 51 53 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 52 54 ret = -ENXIO; 53 55 goto out; ··· 92 94 { 93 95 struct ipoib_dev_priv *priv = netdev_priv(dev); 94 96 int ret; 95 - u16 pkey_index; 96 97 struct ib_qp_attr qp_attr; 97 98 int attr_mask; 98 99 99 - /* 100 - * Search through the port P_Key table for the requested pkey value. 101 - * The port has to be assigned to the respective IB partition in 102 - * advance. 103 - */ 104 - ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index); 105 - if (ret) { 106 - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 107 - return ret; 108 - } 109 - set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 100 + if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) 101 + return -1; 110 102 111 103 qp_attr.qp_state = IB_QPS_INIT; 112 104 qp_attr.qkey = 0; 113 105 qp_attr.port_num = priv->port; 114 - qp_attr.pkey_index = pkey_index; 106 + qp_attr.pkey_index = priv->pkey_index; 115 107 attr_mask = 116 108 IB_QP_QKEY | 117 109 IB_QP_PORT | ··· 173 185 size = ipoib_sendq_size + ipoib_recvq_size + 1; 174 186 ret = ipoib_cm_dev_init(dev); 175 187 if (!ret) 176 - size += ipoib_recvq_size; 188 + size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */; 177 189 178 190 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); 179 191 if (IS_ERR(priv->cq)) { ··· 247 259 struct ipoib_dev_priv *priv = 248 260 container_of(handler, struct ipoib_dev_priv, event_handler); 249 261 250 - if ((record->event == IB_EVENT_PORT_ERR || 251 - record->event == IB_EVENT_PKEY_CHANGE || 252 - record->event == IB_EVENT_PORT_ACTIVE || 253 - record->event == IB_EVENT_LID_CHANGE || 254 - record->event == IB_EVENT_SM_CHANGE || 255 - record->event == IB_EVENT_CLIENT_REREGISTER) && 256 - record->element.port_num == priv->port) { 262 + if (record->element.port_num != priv->port) 263 + return; 264 + 265 + if (record->event == IB_EVENT_PORT_ERR || 266 + record->event == IB_EVENT_PORT_ACTIVE || 267 + record->event == IB_EVENT_LID_CHANGE || 268 + record->event == IB_EVENT_SM_CHANGE || 269 + record->event == IB_EVENT_CLIENT_REREGISTER) { 257 270 ipoib_dbg(priv, "Port state change event\n"); 258 271 queue_work(ipoib_workqueue, &priv->flush_task); 272 + } else if (record->event == IB_EVENT_PKEY_CHANGE) { 273 + ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port); 274 + queue_work(ipoib_workqueue, &priv->pkey_event_task); 259 275 } 260 276 }
+1 -1
drivers/net/mlx4/fw.c
··· 90 90 int i; 91 91 92 92 mlx4_dbg(dev, "DEV_CAP flags:\n"); 93 - for (i = 0; i < 32; ++i) 93 + for (i = 0; i < ARRAY_SIZE(fname); ++i) 94 94 if (fname[i] && (flags & (1 << i))) 95 95 mlx4_dbg(dev, " %s\n", fname[i]); 96 96 }
+8
include/rdma/ib_verbs.h
··· 890 890 spinlock_t client_data_lock; 891 891 892 892 struct ib_cache cache; 893 + int *pkey_tbl_len; 894 + int *gid_tbl_len; 893 895 894 896 u32 flags; 895 897 ··· 1119 1117 int ib_modify_port(struct ib_device *device, 1120 1118 u8 port_num, int port_modify_mask, 1121 1119 struct ib_port_modify *port_modify); 1120 + 1121 + int ib_find_gid(struct ib_device *device, union ib_gid *gid, 1122 + u8 *port_num, u16 *index); 1123 + 1124 + int ib_find_pkey(struct ib_device *device, 1125 + u8 port_num, u16 pkey, u16 *index); 1122 1126 1123 1127 /** 1124 1128 * ib_alloc_pd - Allocates an unused protection domain.