Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branches 'mlx4', 'mlx5' and 'ocrdma' into k.o/for-4.6

+2455 -359
+8
drivers/infiniband/core/cma.c
··· 1206 1206 req->has_gid = true; 1207 1207 req->service_id = req_param->primary_path->service_id; 1208 1208 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1209 + if (req->pkey != req_param->bth_pkey) 1210 + pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1211 + "RDMA CMA: in the future this may cause the request to be dropped\n", 1212 + req_param->bth_pkey, req->pkey); 1209 1213 break; 1210 1214 case IB_CM_SIDR_REQ_RECEIVED: 1211 1215 req->device = sidr_param->listen_id->device; ··· 1217 1213 req->has_gid = false; 1218 1214 req->service_id = sidr_param->service_id; 1219 1215 req->pkey = sidr_param->pkey; 1216 + if (req->pkey != sidr_param->bth_pkey) 1217 + pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1218 + "RDMA CMA: in the future this may cause the request to be dropped\n", 1219 + sidr_param->bth_pkey, req->pkey); 1220 1220 break; 1221 1221 default: 1222 1222 return -EINVAL;
+15 -1
drivers/infiniband/core/uverbs_cmd.c
··· 1174 1174 struct ib_uobject *uobj; 1175 1175 struct ib_pd *pd; 1176 1176 struct ib_mw *mw; 1177 + struct ib_udata udata; 1177 1178 int ret; 1178 1179 1179 1180 if (out_len < sizeof(resp)) ··· 1196 1195 goto err_free; 1197 1196 } 1198 1197 1199 - mw = pd->device->alloc_mw(pd, cmd.mw_type); 1198 + INIT_UDATA(&udata, buf + sizeof(cmd), 1199 + (unsigned long)cmd.response + sizeof(resp), 1200 + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), 1201 + out_len - sizeof(resp)); 1202 + 1203 + mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); 1200 1204 if (IS_ERR(mw)) { 1201 1205 ret = PTR_ERR(mw); 1202 1206 goto err_put; ··· 3091 3085 if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER && 3092 3086 !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) 3093 3087 return -EPERM; 3088 + 3089 + if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED) 3090 + return -EINVAL; 3091 + 3092 + if ((cmd.flow_attr.flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && 3093 + ((cmd.flow_attr.type == IB_FLOW_ATTR_ALL_DEFAULT) || 3094 + (cmd.flow_attr.type == IB_FLOW_ATTR_MC_DEFAULT))) 3095 + return -EINVAL; 3094 3096 3095 3097 if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) 3096 3098 return -EINVAL;
+2
drivers/infiniband/core/verbs.c
··· 1567 1567 * - The last sg element is allowed to have length less than page_size. 1568 1568 * - If sg_nents total byte length exceeds the mr max_num_sge * page_size 1569 1569 * then only max_num_sg entries will be mapped. 1570 + * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS_REG, non of these 1571 + * constraints holds and the page_size argument is ignored. 1570 1572 * 1571 1573 * Returns the number of sg elements that were mapped to the memory region. 1572 1574 *
+2 -1
drivers/infiniband/hw/cxgb3/iwch_provider.c
··· 657 657 return ERR_PTR(err); 658 658 } 659 659 660 - static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) 660 + static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 661 + struct ib_udata *udata) 661 662 { 662 663 struct iwch_dev *rhp; 663 664 struct iwch_pd *php;
+2 -1
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
··· 963 963 struct scatterlist *sg, 964 964 int sg_nents); 965 965 int c4iw_dealloc_mw(struct ib_mw *mw); 966 - struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); 966 + struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 967 + struct ib_udata *udata); 967 968 struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, 968 969 u64 length, u64 virt, int acc, 969 970 struct ib_udata *udata);
+3 -1
drivers/infiniband/hw/cxgb4/mem.c
··· 34 34 #include <linux/moduleparam.h> 35 35 #include <rdma/ib_umem.h> 36 36 #include <linux/atomic.h> 37 + #include <rdma/ib_user_verbs.h> 37 38 38 39 #include "iw_cxgb4.h" 39 40 ··· 553 552 return ERR_PTR(err); 554 553 } 555 554 556 - struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) 555 + struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 556 + struct ib_udata *udata) 557 557 { 558 558 struct c4iw_dev *rhp; 559 559 struct c4iw_pd *php;
+3 -3
drivers/infiniband/hw/mlx4/alias_GUID.c
··· 310 310 if (status) { 311 311 pr_debug("(port: %d) failed: status = %d\n", 312 312 cb_ctx->port, status); 313 - rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC; 313 + rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC; 314 314 goto out; 315 315 } 316 316 ··· 416 416 be64_to_cpu((__force __be64)rec->guid_indexes), 417 417 be64_to_cpu((__force __be64)applied_guid_indexes), 418 418 be64_to_cpu((__force __be64)declined_guid_indexes)); 419 - rec->time_to_run = ktime_get_real_ns() + 419 + rec->time_to_run = ktime_get_boot_ns() + 420 420 resched_delay_sec * NSEC_PER_SEC; 421 421 } else { 422 422 rec->status = MLX4_GUID_INFO_STATUS_SET; ··· 708 708 } 709 709 } 710 710 if (resched_delay_sec) { 711 - u64 curr_time = ktime_get_real_ns(); 711 + u64 curr_time = ktime_get_boot_ns(); 712 712 713 713 *resched_delay_sec = (low_record_time < curr_time) ? 0 : 714 714 div_u64((low_record_time - curr_time), NSEC_PER_SEC);
+69 -3
drivers/infiniband/hw/mlx4/main.c
··· 1643 1643 return err; 1644 1644 } 1645 1645 1646 + static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev, 1647 + struct ib_flow_attr *flow_attr, 1648 + enum mlx4_net_trans_promisc_mode *type) 1649 + { 1650 + int err = 0; 1651 + 1652 + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) || 1653 + (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) || 1654 + (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) { 1655 + return -EOPNOTSUPP; 1656 + } 1657 + 1658 + if (flow_attr->num_of_specs == 0) { 1659 + type[0] = MLX4_FS_MC_SNIFFER; 1660 + type[1] = MLX4_FS_UC_SNIFFER; 1661 + } else { 1662 + union ib_flow_spec *ib_spec; 1663 + 1664 + ib_spec = (union ib_flow_spec *)(flow_attr + 1); 1665 + if (ib_spec->type != IB_FLOW_SPEC_ETH) 1666 + return -EINVAL; 1667 + 1668 + /* if all is zero than MC and UC */ 1669 + if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) { 1670 + type[0] = MLX4_FS_MC_SNIFFER; 1671 + type[1] = MLX4_FS_UC_SNIFFER; 1672 + } else { 1673 + u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01, 1674 + ib_spec->eth.mask.dst_mac[1], 1675 + ib_spec->eth.mask.dst_mac[2], 1676 + ib_spec->eth.mask.dst_mac[3], 1677 + ib_spec->eth.mask.dst_mac[4], 1678 + ib_spec->eth.mask.dst_mac[5]}; 1679 + 1680 + /* Above xor was only on MC bit, non empty mask is valid 1681 + * only if this bit is set and rest are zero. 1682 + */ 1683 + if (!is_zero_ether_addr(&mac[0])) 1684 + return -EINVAL; 1685 + 1686 + if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac)) 1687 + type[0] = MLX4_FS_MC_SNIFFER; 1688 + else 1689 + type[0] = MLX4_FS_UC_SNIFFER; 1690 + } 1691 + } 1692 + 1693 + return err; 1694 + } 1695 + 1646 1696 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, 1647 1697 struct ib_flow_attr *flow_attr, 1648 1698 int domain) ··· 1702 1652 enum mlx4_net_trans_promisc_mode type[2]; 1703 1653 struct mlx4_dev *dev = (to_mdev(qp->device))->dev; 1704 1654 int is_bonded = mlx4_is_bonded(dev); 1655 + 1656 + if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && 1657 + (flow_attr->type != IB_FLOW_ATTR_NORMAL)) 1658 + return ERR_PTR(-EOPNOTSUPP); 1705 1659 1706 1660 memset(type, 0, sizeof(type)); 1707 1661 ··· 1717 1663 1718 1664 switch (flow_attr->type) { 1719 1665 case IB_FLOW_ATTR_NORMAL: 1720 - type[0] = MLX4_FS_REGULAR; 1666 + /* If dont trap flag (continue match) is set, under specific 1667 + * condition traffic be replicated to given qp, 1668 + * without stealing it 1669 + */ 1670 + if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) { 1671 + err = mlx4_ib_add_dont_trap_rule(dev, 1672 + flow_attr, 1673 + type); 1674 + if (err) 1675 + goto err_free; 1676 + } else { 1677 + type[0] = MLX4_FS_REGULAR; 1678 + } 1721 1679 break; 1722 1680 1723 1681 case IB_FLOW_ATTR_ALL_DEFAULT: ··· 1741 1675 break; 1742 1676 1743 1677 case IB_FLOW_ATTR_SNIFFER: 1744 - type[0] = MLX4_FS_UC_SNIFFER; 1745 - type[1] = MLX4_FS_MC_SNIFFER; 1678 + type[0] = MLX4_FS_MIRROR_RX_PORT; 1679 + type[1] = MLX4_FS_MIRROR_SX_PORT; 1746 1680 break; 1747 1681 1748 1682 default:
+2 -1
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 711 711 u64 virt_addr, int access_flags, 712 712 struct ib_udata *udata); 713 713 int mlx4_ib_dereg_mr(struct ib_mr *mr); 714 - struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); 714 + struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 715 + struct ib_udata *udata); 715 716 int mlx4_ib_dealloc_mw(struct ib_mw *mw); 716 717 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, 717 718 enum ib_mr_type mr_type,
+3 -1
drivers/infiniband/hw/mlx4/mr.c
··· 32 32 */ 33 33 34 34 #include <linux/slab.h> 35 + #include <rdma/ib_user_verbs.h> 35 36 36 37 #include "mlx4_ib.h" 37 38 ··· 335 334 return 0; 336 335 } 337 336 338 - struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) 337 + struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 338 + struct ib_udata *udata) 339 339 { 340 340 struct mlx4_ib_dev *dev = to_mdev(pd->device); 341 341 struct mlx4_ib_mw *mw;
+1 -1
drivers/infiniband/hw/mlx5/Makefile
··· 1 1 obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o 2 2 3 - mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o 3 + mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o 4 4 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+90 -14
drivers/infiniband/hw/mlx5/cq.c
··· 207 207 break; 208 208 case MLX5_CQE_RESP_SEND: 209 209 wc->opcode = IB_WC_RECV; 210 - wc->wc_flags = 0; 210 + wc->wc_flags = IB_WC_IP_CSUM_OK; 211 + if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) && 212 + (cqe->hds_ip_ext & CQE_L4_OK)))) 213 + wc->wc_flags = 0; 211 214 break; 212 215 case MLX5_CQE_RESP_SEND_IMM: 213 216 wc->opcode = IB_WC_RECV; ··· 434 431 struct mlx5_core_qp *mqp; 435 432 struct mlx5_ib_wq *wq; 436 433 struct mlx5_sig_err_cqe *sig_err_cqe; 437 - struct mlx5_core_mr *mmr; 434 + struct mlx5_core_mkey *mmkey; 438 435 struct mlx5_ib_mr *mr; 439 436 uint8_t opcode; 440 437 uint32_t qpn; ··· 539 536 case MLX5_CQE_SIG_ERR: 540 537 sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; 541 538 542 - read_lock(&dev->mdev->priv.mr_table.lock); 543 - mmr = __mlx5_mr_lookup(dev->mdev, 544 - mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); 545 - if (unlikely(!mmr)) { 546 - read_unlock(&dev->mdev->priv.mr_table.lock); 539 + read_lock(&dev->mdev->priv.mkey_table.lock); 540 + mmkey = __mlx5_mr_lookup(dev->mdev, 541 + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); 542 + if (unlikely(!mmkey)) { 543 + read_unlock(&dev->mdev->priv.mkey_table.lock); 547 544 mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", 548 545 cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); 549 546 return -EINVAL; 550 547 } 551 548 552 - mr = to_mibmr(mmr); 549 + mr = to_mibmr(mmkey); 553 550 get_sig_err_item(sig_err_cqe, &mr->sig->err_item); 554 551 mr->sig->sig_err_exists = true; 555 552 mr->sig->sigerr_count++; ··· 561 558 mr->sig->err_item.expected, 562 559 mr->sig->err_item.actual); 563 560 564 - read_unlock(&dev->mdev->priv.mr_table.lock); 561 + read_unlock(&dev->mdev->priv.mkey_table.lock); 565 562 goto repoll; 566 563 } 567 564 568 565 return 0; 566 + } 567 + 568 + static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, 569 + struct ib_wc *wc) 570 + { 571 + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); 572 + struct mlx5_ib_wc *soft_wc, *next; 573 + int npolled = 0; 574 + 575 + list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) { 576 + if (npolled >= num_entries) 577 + break; 578 + 579 + mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n", 580 + cq->mcq.cqn); 581 + 582 + wc[npolled++] = soft_wc->wc; 583 + list_del(&soft_wc->list); 584 + kfree(soft_wc); 585 + } 586 + 587 + return npolled; 569 588 } 570 589 571 590 int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) ··· 595 570 struct mlx5_ib_cq *cq = to_mcq(ibcq); 596 571 struct mlx5_ib_qp *cur_qp = NULL; 597 572 unsigned long flags; 573 + int soft_polled = 0; 598 574 int npolled; 599 575 int err = 0; 600 576 601 577 spin_lock_irqsave(&cq->lock, flags); 602 578 603 - for (npolled = 0; npolled < num_entries; npolled++) { 604 - err = mlx5_poll_one(cq, &cur_qp, wc + npolled); 579 + if (unlikely(!list_empty(&cq->wc_list))) 580 + soft_polled = poll_soft_wc(cq, num_entries, wc); 581 + 582 + for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { 583 + err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled); 605 584 if (err) 606 585 break; 607 586 } ··· 616 587 spin_unlock_irqrestore(&cq->lock, flags); 617 588 618 589 if (err == 0 || err == -EAGAIN) 619 - return npolled; 590 + return soft_polled + npolled; 620 591 else 621 592 return err; 622 593 } ··· 624 595 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 625 596 { 626 597 struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev; 598 + struct mlx5_ib_cq *cq = to_mcq(ibcq); 627 599 void __iomem *uar_page = mdev->priv.uuari.uars[0].map; 600 + unsigned long irq_flags; 601 + int ret = 0; 628 602 629 - mlx5_cq_arm(&to_mcq(ibcq)->mcq, 603 + spin_lock_irqsave(&cq->lock, irq_flags); 604 + if (cq->notify_flags != IB_CQ_NEXT_COMP) 605 + cq->notify_flags = flags & IB_CQ_SOLICITED_MASK; 606 + 607 + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list)) 608 + ret = 1; 609 + spin_unlock_irqrestore(&cq->lock, irq_flags); 610 + 611 + mlx5_cq_arm(&cq->mcq, 630 612 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 631 613 MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, 632 614 uar_page, 633 615 MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock), 634 616 to_mcq(ibcq)->mcq.cons_index); 635 617 636 - return 0; 618 + return ret; 637 619 } 638 620 639 621 static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, ··· 797 757 mlx5_db_free(dev->mdev, &cq->db); 798 758 } 799 759 760 + static void notify_soft_wc_handler(struct work_struct *work) 761 + { 762 + struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq, 763 + notify_work); 764 + 765 + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 766 + } 767 + 800 768 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, 801 769 const struct ib_cq_init_attr *attr, 802 770 struct ib_ucontext *context, ··· 855 807 &index, &inlen); 856 808 if (err) 857 809 goto err_create; 810 + 811 + INIT_WORK(&cq->notify_work, notify_soft_wc_handler); 858 812 } 859 813 860 814 cq->cqe_size = cqe_size; ··· 881 831 cq->mcq.irqn = irqn; 882 832 cq->mcq.comp = mlx5_ib_cq_comp; 883 833 cq->mcq.event = mlx5_ib_cq_event; 834 + 835 + INIT_LIST_HEAD(&cq->wc_list); 884 836 885 837 if (context) 886 838 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { ··· 1270 1218 1271 1219 cq = to_mcq(ibcq); 1272 1220 return cq->cqe_size; 1221 + } 1222 + 1223 + /* Called from atomic context */ 1224 + int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc) 1225 + { 1226 + struct mlx5_ib_wc *soft_wc; 1227 + struct mlx5_ib_cq *cq = to_mcq(ibcq); 1228 + unsigned long flags; 1229 + 1230 + soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC); 1231 + if (!soft_wc) 1232 + return -ENOMEM; 1233 + 1234 + soft_wc->wc = *wc; 1235 + spin_lock_irqsave(&cq->lock, flags); 1236 + list_add_tail(&soft_wc->list, &cq->wc_list); 1237 + if (cq->notify_flags == IB_CQ_NEXT_COMP || 1238 + wc->status != IB_WC_SUCCESS) { 1239 + cq->notify_flags = 0; 1240 + schedule_work(&cq->notify_work); 1241 + } 1242 + spin_unlock_irqrestore(&cq->lock, flags); 1243 + 1244 + return 0; 1273 1245 }
+548
drivers/infiniband/hw/mlx5/gsi.c
··· 1 + /* 2 + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 + * 4 + * This software is available to you under a choice of one of two 5 + * licenses. You may choose to be licensed under the terms of the GNU 6 + * General Public License (GPL) Version 2, available from the file 7 + * COPYING in the main directory of this source tree, or the 8 + * OpenIB.org BSD license below: 9 + * 10 + * Redistribution and use in source and binary forms, with or 11 + * without modification, are permitted provided that the following 12 + * conditions are met: 13 + * 14 + * - Redistributions of source code must retain the above 15 + * copyright notice, this list of conditions and the following 16 + * disclaimer. 17 + * 18 + * - Redistributions in binary form must reproduce the above 19 + * copyright notice, this list of conditions and the following 20 + * disclaimer in the documentation and/or other materials 21 + * provided with the distribution. 22 + * 23 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 + * SOFTWARE. 31 + */ 32 + 33 + #include "mlx5_ib.h" 34 + 35 + struct mlx5_ib_gsi_wr { 36 + struct ib_cqe cqe; 37 + struct ib_wc wc; 38 + int send_flags; 39 + bool completed:1; 40 + }; 41 + 42 + struct mlx5_ib_gsi_qp { 43 + struct ib_qp ibqp; 44 + struct ib_qp *rx_qp; 45 + u8 port_num; 46 + struct ib_qp_cap cap; 47 + enum ib_sig_type sq_sig_type; 48 + /* Serialize qp state modifications */ 49 + struct mutex mutex; 50 + struct ib_cq *cq; 51 + struct mlx5_ib_gsi_wr *outstanding_wrs; 52 + u32 outstanding_pi, outstanding_ci; 53 + int num_qps; 54 + /* Protects access to the tx_qps. Post send operations synchronize 55 + * with tx_qp creation in setup_qp(). Also protects the 56 + * outstanding_wrs array and indices. 57 + */ 58 + spinlock_t lock; 59 + struct ib_qp **tx_qps; 60 + }; 61 + 62 + static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp) 63 + { 64 + return container_of(qp, struct mlx5_ib_gsi_qp, ibqp); 65 + } 66 + 67 + static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev) 68 + { 69 + return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); 70 + } 71 + 72 + static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index) 73 + { 74 + return ++index % gsi->cap.max_send_wr; 75 + } 76 + 77 + #define for_each_outstanding_wr(gsi, index) \ 78 + for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \ 79 + index = next_outstanding(gsi, index)) 80 + 81 + /* Call with gsi->lock locked */ 82 + static void generate_completions(struct mlx5_ib_gsi_qp *gsi) 83 + { 84 + struct ib_cq *gsi_cq = gsi->ibqp.send_cq; 85 + struct mlx5_ib_gsi_wr *wr; 86 + u32 index; 87 + 88 + for_each_outstanding_wr(gsi, index) { 89 + wr = &gsi->outstanding_wrs[index]; 90 + 91 + if (!wr->completed) 92 + break; 93 + 94 + if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR || 95 + wr->send_flags & IB_SEND_SIGNALED) 96 + WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc)); 97 + 98 + wr->completed = false; 99 + } 100 + 101 + gsi->outstanding_ci = index; 102 + } 103 + 104 + static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc) 105 + { 106 + struct mlx5_ib_gsi_qp *gsi = cq->cq_context; 107 + struct mlx5_ib_gsi_wr *wr = 108 + container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe); 109 + u64 wr_id; 110 + unsigned long flags; 111 + 112 + spin_lock_irqsave(&gsi->lock, flags); 113 + wr->completed = true; 114 + wr_id = wr->wc.wr_id; 115 + wr->wc = *wc; 116 + wr->wc.wr_id = wr_id; 117 + wr->wc.qp = &gsi->ibqp; 118 + 119 + generate_completions(gsi); 120 + spin_unlock_irqrestore(&gsi->lock, flags); 121 + } 122 + 123 + struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, 124 + struct ib_qp_init_attr *init_attr) 125 + { 126 + struct mlx5_ib_dev *dev = to_mdev(pd->device); 127 + struct mlx5_ib_gsi_qp *gsi; 128 + struct ib_qp_init_attr hw_init_attr = *init_attr; 129 + const u8 port_num = init_attr->port_num; 130 + const int num_pkeys = pd->device->attrs.max_pkeys; 131 + const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0; 132 + int ret; 133 + 134 + mlx5_ib_dbg(dev, "creating GSI QP\n"); 135 + 136 + if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) { 137 + mlx5_ib_warn(dev, 138 + "invalid port number %d during GSI QP creation\n", 139 + port_num); 140 + return ERR_PTR(-EINVAL); 141 + } 142 + 143 + gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); 144 + if (!gsi) 145 + return ERR_PTR(-ENOMEM); 146 + 147 + gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL); 148 + if (!gsi->tx_qps) { 149 + ret = -ENOMEM; 150 + goto err_free; 151 + } 152 + 153 + gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr, 154 + sizeof(*gsi->outstanding_wrs), 155 + GFP_KERNEL); 156 + if (!gsi->outstanding_wrs) { 157 + ret = -ENOMEM; 158 + goto err_free_tx; 159 + } 160 + 161 + mutex_init(&gsi->mutex); 162 + 163 + mutex_lock(&dev->devr.mutex); 164 + 165 + if (dev->devr.ports[port_num - 1].gsi) { 166 + mlx5_ib_warn(dev, "GSI QP already exists on port %d\n", 167 + port_num); 168 + ret = -EBUSY; 169 + goto err_free_wrs; 170 + } 171 + gsi->num_qps = num_qps; 172 + spin_lock_init(&gsi->lock); 173 + 174 + gsi->cap = init_attr->cap; 175 + gsi->sq_sig_type = init_attr->sq_sig_type; 176 + gsi->ibqp.qp_num = 1; 177 + gsi->port_num = port_num; 178 + 179 + gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0, 180 + IB_POLL_SOFTIRQ); 181 + if (IS_ERR(gsi->cq)) { 182 + mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n", 183 + PTR_ERR(gsi->cq)); 184 + ret = PTR_ERR(gsi->cq); 185 + goto err_free_wrs; 186 + } 187 + 188 + hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI; 189 + hw_init_attr.send_cq = gsi->cq; 190 + if (num_qps) { 191 + hw_init_attr.cap.max_send_wr = 0; 192 + hw_init_attr.cap.max_send_sge = 0; 193 + hw_init_attr.cap.max_inline_data = 0; 194 + } 195 + gsi->rx_qp = ib_create_qp(pd, &hw_init_attr); 196 + if (IS_ERR(gsi->rx_qp)) { 197 + mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", 198 + PTR_ERR(gsi->rx_qp)); 199 + ret = PTR_ERR(gsi->rx_qp); 200 + goto err_destroy_cq; 201 + } 202 + 203 + dev->devr.ports[init_attr->port_num - 1].gsi = gsi; 204 + 205 + mutex_unlock(&dev->devr.mutex); 206 + 207 + return &gsi->ibqp; 208 + 209 + err_destroy_cq: 210 + ib_free_cq(gsi->cq); 211 + err_free_wrs: 212 + mutex_unlock(&dev->devr.mutex); 213 + kfree(gsi->outstanding_wrs); 214 + err_free_tx: 215 + kfree(gsi->tx_qps); 216 + err_free: 217 + kfree(gsi); 218 + return ERR_PTR(ret); 219 + } 220 + 221 + int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) 222 + { 223 + struct mlx5_ib_dev *dev = to_mdev(qp->device); 224 + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); 225 + const int port_num = gsi->port_num; 226 + int qp_index; 227 + int ret; 228 + 229 + mlx5_ib_dbg(dev, "destroying GSI QP\n"); 230 + 231 + mutex_lock(&dev->devr.mutex); 232 + ret = ib_destroy_qp(gsi->rx_qp); 233 + if (ret) { 234 + mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n", 235 + ret); 236 + mutex_unlock(&dev->devr.mutex); 237 + return ret; 238 + } 239 + dev->devr.ports[port_num - 1].gsi = NULL; 240 + mutex_unlock(&dev->devr.mutex); 241 + gsi->rx_qp = NULL; 242 + 243 + for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) { 244 + if (!gsi->tx_qps[qp_index]) 245 + continue; 246 + WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index])); 247 + gsi->tx_qps[qp_index] = NULL; 248 + } 249 + 250 + ib_free_cq(gsi->cq); 251 + 252 + kfree(gsi->outstanding_wrs); 253 + kfree(gsi->tx_qps); 254 + kfree(gsi); 255 + 256 + return 0; 257 + } 258 + 259 + static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) 260 + { 261 + struct ib_pd *pd = gsi->rx_qp->pd; 262 + struct ib_qp_init_attr init_attr = { 263 + .event_handler = gsi->rx_qp->event_handler, 264 + .qp_context = gsi->rx_qp->qp_context, 265 + .send_cq = gsi->cq, 266 + .recv_cq = gsi->rx_qp->recv_cq, 267 + .cap = { 268 + .max_send_wr = gsi->cap.max_send_wr, 269 + .max_send_sge = gsi->cap.max_send_sge, 270 + .max_inline_data = gsi->cap.max_inline_data, 271 + }, 272 + .sq_sig_type = gsi->sq_sig_type, 273 + .qp_type = IB_QPT_UD, 274 + .create_flags = mlx5_ib_create_qp_sqpn_qp1(), 275 + }; 276 + 277 + return ib_create_qp(pd, &init_attr); 278 + } 279 + 280 + static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp, 281 + u16 qp_index) 282 + { 283 + struct mlx5_ib_dev *dev = to_mdev(qp->device); 284 + struct ib_qp_attr attr; 285 + int mask; 286 + int ret; 287 + 288 + mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT; 289 + attr.qp_state = IB_QPS_INIT; 290 + attr.pkey_index = qp_index; 291 + attr.qkey = IB_QP1_QKEY; 292 + attr.port_num = gsi->port_num; 293 + ret = ib_modify_qp(qp, &attr, mask); 294 + if (ret) { 295 + mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n", 296 + qp->qp_num, ret); 297 + return ret; 298 + } 299 + 300 + attr.qp_state = IB_QPS_RTR; 301 + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 302 + if (ret) { 303 + mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n", 304 + qp->qp_num, ret); 305 + return ret; 306 + } 307 + 308 + attr.qp_state = IB_QPS_RTS; 309 + attr.sq_psn = 0; 310 + ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN); 311 + if (ret) { 312 + mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n", 313 + qp->qp_num, ret); 314 + return ret; 315 + } 316 + 317 + return 0; 318 + } 319 + 320 + static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) 321 + { 322 + struct ib_device *device = gsi->rx_qp->device; 323 + struct mlx5_ib_dev *dev = to_mdev(device); 324 + struct ib_qp *qp; 325 + unsigned long flags; 326 + u16 pkey; 327 + int ret; 328 + 329 + ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey); 330 + if (ret) { 331 + mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n", 332 + gsi->port_num, qp_index); 333 + return; 334 + } 335 + 336 + if (!pkey) { 337 + mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d. Skipping.\n", 338 + gsi->port_num, qp_index); 339 + return; 340 + } 341 + 342 + spin_lock_irqsave(&gsi->lock, flags); 343 + qp = gsi->tx_qps[qp_index]; 344 + spin_unlock_irqrestore(&gsi->lock, flags); 345 + if (qp) { 346 + mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n", 347 + gsi->port_num, qp_index); 348 + return; 349 + } 350 + 351 + qp = create_gsi_ud_qp(gsi); 352 + if (IS_ERR(qp)) { 353 + mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n", 354 + PTR_ERR(qp)); 355 + return; 356 + } 357 + 358 + ret = modify_to_rts(gsi, qp, qp_index); 359 + if (ret) 360 + goto err_destroy_qp; 361 + 362 + spin_lock_irqsave(&gsi->lock, flags); 363 + WARN_ON_ONCE(gsi->tx_qps[qp_index]); 364 + gsi->tx_qps[qp_index] = qp; 365 + spin_unlock_irqrestore(&gsi->lock, flags); 366 + 367 + return; 368 + 369 + err_destroy_qp: 370 + WARN_ON_ONCE(qp); 371 + } 372 + 373 + static void setup_qps(struct mlx5_ib_gsi_qp *gsi) 374 + { 375 + u16 qp_index; 376 + 377 + for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) 378 + setup_qp(gsi, qp_index); 379 + } 380 + 381 + int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, 382 + int attr_mask) 383 + { 384 + struct mlx5_ib_dev *dev = to_mdev(qp->device); 385 + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); 386 + int ret; 387 + 388 + mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state); 389 + 390 + mutex_lock(&gsi->mutex); 391 + ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask); 392 + if (ret) { 393 + mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret); 394 + goto unlock; 395 + } 396 + 397 + if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS) 398 + setup_qps(gsi); 399 + 400 + unlock: 401 + mutex_unlock(&gsi->mutex); 402 + 403 + return ret; 404 + } 405 + 406 + int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, 407 + int qp_attr_mask, 408 + struct ib_qp_init_attr *qp_init_attr) 409 + { 410 + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); 411 + int ret; 412 + 413 + mutex_lock(&gsi->mutex); 414 + ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr); 415 + qp_init_attr->cap = gsi->cap; 416 + mutex_unlock(&gsi->mutex); 417 + 418 + return ret; 419 + } 420 + 421 + /* Call with gsi->lock locked */ 422 + static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, 423 + struct ib_ud_wr *wr, struct ib_wc *wc) 424 + { 425 + struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); 426 + struct mlx5_ib_gsi_wr *gsi_wr; 427 + 428 + if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) { 429 + mlx5_ib_warn(dev, "no available GSI work request.\n"); 430 + return -ENOMEM; 431 + } 432 + 433 + gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi]; 434 + gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi); 435 + 436 + if (!wc) { 437 + memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc)); 438 + gsi_wr->wc.pkey_index = wr->pkey_index; 439 + gsi_wr->wc.wr_id = wr->wr.wr_id; 440 + } else { 441 + gsi_wr->wc = *wc; 442 + gsi_wr->completed = true; 443 + } 444 + 445 + gsi_wr->cqe.done = &handle_single_completion; 446 + wr->wr.wr_cqe = &gsi_wr->cqe; 447 + 448 + return 0; 449 + } 450 + 451 + /* Call with gsi->lock locked */ 452 + static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi, 453 + struct ib_ud_wr *wr) 454 + { 455 + struct ib_wc wc = { 456 + { .wr_id = wr->wr.wr_id }, 457 + .status = IB_WC_SUCCESS, 458 + .opcode = IB_WC_SEND, 459 + .qp = &gsi->ibqp, 460 + }; 461 + int ret; 462 + 463 + ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc); 464 + if (ret) 465 + return ret; 466 + 467 + generate_completions(gsi); 468 + 469 + return 0; 470 + } 471 + 472 + /* Call with gsi->lock locked */ 473 + static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) 474 + { 475 + struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); 476 + int qp_index = wr->pkey_index; 477 + 478 + if (!mlx5_ib_deth_sqpn_cap(dev)) 479 + return gsi->rx_qp; 480 + 481 + if (qp_index >= gsi->num_qps) 482 + return NULL; 483 + 484 + return gsi->tx_qps[qp_index]; 485 + } 486 + 487 + int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, 488 + struct ib_send_wr **bad_wr) 489 + { 490 + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); 491 + struct ib_qp *tx_qp; 492 + unsigned long flags; 493 + int ret; 494 + 495 + for (; wr; wr = wr->next) { 496 + struct ib_ud_wr cur_wr = *ud_wr(wr); 497 + 498 + cur_wr.wr.next = NULL; 499 + 500 + spin_lock_irqsave(&gsi->lock, flags); 501 + tx_qp = get_tx_qp(gsi, &cur_wr); 502 + if (!tx_qp) { 503 + ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr); 504 + if (ret) 505 + goto err; 506 + spin_unlock_irqrestore(&gsi->lock, flags); 507 + continue; 508 + } 509 + 510 + ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL); 511 + if (ret) 512 + goto err; 513 + 514 + ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); 515 + if (ret) { 516 + /* Undo the effect of adding the outstanding wr */ 517 + gsi->outstanding_pi = (gsi->outstanding_pi - 1) % 518 + gsi->cap.max_send_wr; 519 + goto err; 520 + } 521 + spin_unlock_irqrestore(&gsi->lock, flags); 522 + } 523 + 524 + return 0; 525 + 526 + err: 527 + spin_unlock_irqrestore(&gsi->lock, flags); 528 + *bad_wr = wr; 529 + return ret; 530 + } 531 + 532 + int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, 533 + struct ib_recv_wr **bad_wr) 534 + { 535 + struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); 536 + 537 + return ib_post_recv(gsi->rx_qp, wr, bad_wr); 538 + } 539 + 540 + void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi) 541 + { 542 + if (!gsi) 543 + return; 544 + 545 + mutex_lock(&gsi->mutex); 546 + setup_qps(gsi); 547 + mutex_unlock(&gsi->mutex); 548 + }
+155 -11
drivers/infiniband/hw/mlx5/mad.c
··· 31 31 */ 32 32 33 33 #include <linux/mlx5/cmd.h> 34 + #include <linux/mlx5/vport.h> 34 35 #include <rdma/ib_mad.h> 35 36 #include <rdma/ib_smi.h> 37 + #include <rdma/ib_pma.h> 36 38 #include "mlx5_ib.h" 37 39 38 40 enum { ··· 59 57 return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); 60 58 } 61 59 62 - int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 63 - const struct ib_wc *in_wc, const struct ib_grh *in_grh, 64 - const struct ib_mad_hdr *in, size_t in_mad_size, 65 - struct ib_mad_hdr *out, size_t *out_mad_size, 66 - u16 *out_mad_pkey_index) 60 + static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 61 + const struct ib_wc *in_wc, const struct ib_grh *in_grh, 62 + const struct ib_mad *in_mad, struct ib_mad *out_mad) 67 63 { 68 64 u16 slid; 69 65 int err; 70 - const struct ib_mad *in_mad = (const struct ib_mad *)in; 71 - struct ib_mad *out_mad = (struct ib_mad *)out; 72 - 73 - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || 74 - *out_mad_size != sizeof(*out_mad))) 75 - return IB_MAD_RESULT_FAILURE; 76 66 77 67 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); 78 68 ··· 109 115 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; 110 116 111 117 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; 118 + } 119 + 120 + static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext, 121 + void *out) 122 + { 123 + #define MLX5_SUM_CNT(p, cntr1, cntr2) \ 124 + (MLX5_GET64(query_vport_counter_out, p, cntr1) + \ 125 + MLX5_GET64(query_vport_counter_out, p, cntr2)) 126 + 127 + pma_cnt_ext->port_xmit_data = 128 + cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets, 129 + transmitted_ib_multicast.octets) >> 2); 130 + pma_cnt_ext->port_xmit_data = 131 + cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets, 132 + received_ib_multicast.octets) >> 2); 133 + pma_cnt_ext->port_xmit_packets = 134 + cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.packets, 135 + transmitted_ib_multicast.packets)); 136 + pma_cnt_ext->port_rcv_packets = 137 + cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.packets, 138 + received_ib_multicast.packets)); 139 + pma_cnt_ext->port_unicast_xmit_packets = 140 + MLX5_GET64_BE(query_vport_counter_out, 141 + out, transmitted_ib_unicast.packets); 142 + pma_cnt_ext->port_unicast_rcv_packets = 143 + MLX5_GET64_BE(query_vport_counter_out, 144 + out, received_ib_unicast.packets); 145 + pma_cnt_ext->port_multicast_xmit_packets = 146 + MLX5_GET64_BE(query_vport_counter_out, 147 + out, transmitted_ib_multicast.packets); 148 + pma_cnt_ext->port_multicast_rcv_packets = 149 + MLX5_GET64_BE(query_vport_counter_out, 150 + out, received_ib_multicast.packets); 151 + } 152 + 153 + static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt, 154 + void *out) 155 + { 156 + /* Traffic counters will be reported in 157 + * their 64bit form via ib_pma_portcounters_ext by default. 158 + */ 159 + void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out, 160 + counter_set); 161 + 162 + #define MLX5_ASSIGN_PMA_CNTR(counter_var, counter_name) { \ 163 + counter_var = MLX5_GET_BE(typeof(counter_var), \ 164 + ib_port_cntrs_grp_data_layout, \ 165 + out_pma, counter_name); \ 166 + } 167 + 168 + MLX5_ASSIGN_PMA_CNTR(pma_cnt->symbol_error_counter, 169 + symbol_error_counter); 170 + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_error_recovery_counter, 171 + link_error_recovery_counter); 172 + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_downed_counter, 173 + link_downed_counter); 174 + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_errors, 175 + port_rcv_errors); 176 + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_remphys_errors, 177 + port_rcv_remote_physical_errors); 178 + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_switch_relay_errors, 179 + port_rcv_switch_relay_errors); 180 + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_discards, 181 + port_xmit_discards); 182 + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors, 183 + port_xmit_constraint_errors); 184 + MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors, 185 + port_rcv_constraint_errors); 186 + MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors, 187 + link_overrun_errors); 188 + MLX5_ASSIGN_PMA_CNTR(pma_cnt->vl15_dropped, 189 + vl_15_dropped); 190 + } 191 + 192 + static int process_pma_cmd(struct ib_device *ibdev, u8 port_num, 193 + const struct ib_mad *in_mad, struct ib_mad *out_mad) 194 + { 195 + struct mlx5_ib_dev *dev = to_mdev(ibdev); 196 + int err; 197 + void *out_cnt; 198 + 199 + /* Decalring support of extended counters */ 200 + if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) { 201 + struct ib_class_port_info cpi = {}; 202 + 203 + cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; 204 + memcpy((out_mad->data + 40), &cpi, sizeof(cpi)); 205 + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; 206 + } 207 + 208 + if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) { 209 + struct ib_pma_portcounters_ext *pma_cnt_ext = 210 + (struct ib_pma_portcounters_ext *)(out_mad->data + 40); 211 + int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out); 212 + 213 + out_cnt = mlx5_vzalloc(sz); 214 + if (!out_cnt) 215 + return IB_MAD_RESULT_FAILURE; 216 + 217 + err = mlx5_core_query_vport_counter(dev->mdev, 0, 218 + port_num, out_cnt, sz); 219 + if (!err) 220 + pma_cnt_ext_assign(pma_cnt_ext, out_cnt); 221 + } else { 222 + struct ib_pma_portcounters *pma_cnt = 223 + (struct ib_pma_portcounters *)(out_mad->data + 40); 224 + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); 225 + 226 + out_cnt = mlx5_vzalloc(sz); 227 + if (!out_cnt) 228 + return IB_MAD_RESULT_FAILURE; 229 + 230 + err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num, 231 + out_cnt, sz); 232 + if (!err) 233 + pma_cnt_assign(pma_cnt, out_cnt); 234 + } 235 + 236 + kvfree(out_cnt); 237 + if (err) 238 + return IB_MAD_RESULT_FAILURE; 239 + 240 + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; 241 + } 242 + 243 + int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 244 + const struct ib_wc *in_wc, const struct ib_grh *in_grh, 245 + const struct ib_mad_hdr *in, size_t in_mad_size, 246 + struct ib_mad_hdr *out, size_t *out_mad_size, 247 + u16 *out_mad_pkey_index) 248 + { 249 + struct mlx5_ib_dev *dev = to_mdev(ibdev); 250 + struct mlx5_core_dev *mdev = dev->mdev; 251 + const struct ib_mad *in_mad = (const struct ib_mad *)in; 252 + struct ib_mad *out_mad = (struct ib_mad *)out; 253 + 254 + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || 255 + *out_mad_size != sizeof(*out_mad))) 256 + return IB_MAD_RESULT_FAILURE; 257 + 258 + memset(out_mad->data, 0, sizeof(out_mad->data)); 259 + 260 + if (MLX5_CAP_GEN(mdev, vport_counters) && 261 + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && 262 + in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { 263 + return process_pma_cmd(ibdev, port_num, in_mad, out_mad); 264 + } else { 265 + return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, 266 + in_mad, out_mad); 267 + } 112 268 } 113 269 114 270 int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
+105 -14
drivers/infiniband/hw/mlx5/main.c
··· 487 487 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; 488 488 if (MLX5_CAP_GEN(mdev, xrc)) 489 489 props->device_cap_flags |= IB_DEVICE_XRC; 490 + if (MLX5_CAP_GEN(mdev, imaicl)) { 491 + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW | 492 + IB_DEVICE_MEM_WINDOW_TYPE_2B; 493 + props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 494 + /* We support 'Gappy' memory registration too */ 495 + props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; 496 + } 490 497 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 491 498 if (MLX5_CAP_GEN(mdev, sho)) { 492 499 props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; ··· 510 503 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && 511 504 (MLX5_CAP_ETH(dev->mdev, csum_cap))) 512 505 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; 506 + 507 + if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { 508 + props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 509 + props->device_cap_flags |= IB_DEVICE_UD_TSO; 510 + } 513 511 514 512 props->vendor_part_id = mdev->pdev->device; 515 513 props->hw_ver = mdev->pdev->revision; ··· 541 529 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); 542 530 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 543 531 props->max_srq_sge = max_rq_sg - 1; 544 - props->max_fast_reg_page_list_len = (unsigned int)-1; 532 + props->max_fast_reg_page_list_len = 533 + 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); 545 534 get_atomic_caps(dev, props); 546 535 props->masked_atomic_cap = IB_ATOMIC_NONE; 547 536 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); ··· 1382 1369 return 0; 1383 1370 } 1384 1371 1372 + static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) 1373 + { 1374 + priority *= 2; 1375 + if (!dont_trap) 1376 + priority++; 1377 + return priority; 1378 + } 1379 + 1385 1380 #define MLX5_FS_MAX_TYPES 10 1386 1381 #define MLX5_FS_MAX_ENTRIES 32000UL 1387 1382 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, 1388 1383 struct ib_flow_attr *flow_attr) 1389 1384 { 1385 + bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; 1390 1386 struct mlx5_flow_namespace *ns = NULL; 1391 1387 struct mlx5_ib_flow_prio *prio; 1392 1388 struct mlx5_flow_table *ft; ··· 1405 1383 int err = 0; 1406 1384 1407 1385 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 1408 - if (flow_is_multicast_only(flow_attr)) 1386 + if (flow_is_multicast_only(flow_attr) && 1387 + !dont_trap) 1409 1388 priority = MLX5_IB_FLOW_MCAST_PRIO; 1410 1389 else 1411 - priority = flow_attr->priority; 1390 + priority = ib_prio_to_core_prio(flow_attr->priority, 1391 + dont_trap); 1412 1392 ns = mlx5_get_flow_namespace(dev->mdev, 1413 1393 MLX5_FLOW_NAMESPACE_BYPASS); 1414 1394 num_entries = MLX5_FS_MAX_ENTRIES; ··· 1458 1434 unsigned int spec_index; 1459 1435 u32 *match_c; 1460 1436 u32 *match_v; 1437 + u32 action; 1461 1438 int err = 0; 1462 1439 1463 1440 if (!is_valid_attr(flow_attr)) ··· 1484 1459 1485 1460 /* Outer header support only */ 1486 1461 match_criteria_enable = (!outer_header_zero(match_c)) << 0; 1462 + action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : 1463 + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; 1487 1464 handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable, 1488 1465 match_c, match_v, 1489 - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 1466 + action, 1490 1467 MLX5_FS_DEFAULT_FLOW_TAG, 1491 1468 dst); 1492 1469 ··· 1508 1481 return err ? ERR_PTR(err) : handler; 1509 1482 } 1510 1483 1484 + static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, 1485 + struct mlx5_ib_flow_prio *ft_prio, 1486 + struct ib_flow_attr *flow_attr, 1487 + struct mlx5_flow_destination *dst) 1488 + { 1489 + struct mlx5_ib_flow_handler *handler_dst = NULL; 1490 + struct mlx5_ib_flow_handler *handler = NULL; 1491 + 1492 + handler = create_flow_rule(dev, ft_prio, flow_attr, NULL); 1493 + if (!IS_ERR(handler)) { 1494 + handler_dst = create_flow_rule(dev, ft_prio, 1495 + flow_attr, dst); 1496 + if (IS_ERR(handler_dst)) { 1497 + mlx5_del_flow_rule(handler->rule); 1498 + kfree(handler); 1499 + handler = handler_dst; 1500 + } else { 1501 + list_add(&handler_dst->list, &handler->list); 1502 + } 1503 + } 1504 + 1505 + return handler; 1506 + } 1511 1507 enum { 1512 1508 LEFTOVERS_MC, 1513 1509 LEFTOVERS_UC, ··· 1608 1558 1609 1559 if (domain != IB_FLOW_DOMAIN_USER || 1610 1560 flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || 1611 - flow_attr->flags) 1561 + (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) 1612 1562 return ERR_PTR(-EINVAL); 1613 1563 1614 1564 dst = kzalloc(sizeof(*dst), GFP_KERNEL); ··· 1627 1577 dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; 1628 1578 1629 1579 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 1630 - handler = create_flow_rule(dev, ft_prio, flow_attr, 1631 - dst); 1580 + if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { 1581 + handler = create_dont_trap_rule(dev, ft_prio, 1582 + flow_attr, dst); 1583 + } else { 1584 + handler = create_flow_rule(dev, ft_prio, flow_attr, 1585 + dst); 1586 + } 1632 1587 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 1633 1588 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 1634 1589 handler = create_leftovers_rule(dev, ft_prio, flow_attr, ··· 1771 1716 &dev_attr_reg_pages, 1772 1717 }; 1773 1718 1719 + static void pkey_change_handler(struct work_struct *work) 1720 + { 1721 + struct mlx5_ib_port_resources *ports = 1722 + container_of(work, struct mlx5_ib_port_resources, 1723 + pkey_change_work); 1724 + 1725 + mutex_lock(&ports->devr->mutex); 1726 + mlx5_ib_gsi_pkey_change(ports->gsi); 1727 + mutex_unlock(&ports->devr->mutex); 1728 + } 1729 + 1774 1730 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, 1775 1731 enum mlx5_dev_event event, unsigned long param) 1776 1732 { ··· 1818 1752 case MLX5_DEV_EVENT_PKEY_CHANGE: 1819 1753 ibev.event = IB_EVENT_PKEY_CHANGE; 1820 1754 port = (u8)param; 1755 + 1756 + schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); 1821 1757 break; 1822 1758 1823 1759 case MLX5_DEV_EVENT_GUID_CHANGE: ··· 1906 1838 mlx5_ib_warn(dev, "mr cache cleanup failed\n"); 1907 1839 1908 1840 mlx5_ib_destroy_qp(dev->umrc.qp); 1909 - ib_destroy_cq(dev->umrc.cq); 1841 + ib_free_cq(dev->umrc.cq); 1910 1842 ib_dealloc_pd(dev->umrc.pd); 1911 1843 } 1912 1844 ··· 1921 1853 struct ib_pd *pd; 1922 1854 struct ib_cq *cq; 1923 1855 struct ib_qp *qp; 1924 - struct ib_cq_init_attr cq_attr = {}; 1925 1856 int ret; 1926 1857 1927 1858 attr = kzalloc(sizeof(*attr), GFP_KERNEL); ··· 1937 1870 goto error_0; 1938 1871 } 1939 1872 1940 - cq_attr.cqe = 128; 1941 - cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 1942 - &cq_attr); 1873 + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); 1943 1874 if (IS_ERR(cq)) { 1944 1875 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); 1945 1876 ret = PTR_ERR(cq); 1946 1877 goto error_2; 1947 1878 } 1948 - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 1949 1879 1950 1880 init_attr->send_cq = cq; 1951 1881 init_attr->recv_cq = cq; ··· 2009 1945 mlx5_ib_destroy_qp(qp); 2010 1946 2011 1947 error_3: 2012 - ib_destroy_cq(cq); 1948 + ib_free_cq(cq); 2013 1949 2014 1950 error_2: 2015 1951 ib_dealloc_pd(pd); ··· 2025 1961 struct ib_srq_init_attr attr; 2026 1962 struct mlx5_ib_dev *dev; 2027 1963 struct ib_cq_init_attr cq_attr = {.cqe = 1}; 1964 + int port; 2028 1965 int ret = 0; 2029 1966 2030 1967 dev = container_of(devr, struct mlx5_ib_dev, devr); 1968 + 1969 + mutex_init(&devr->mutex); 2031 1970 2032 1971 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); 2033 1972 if (IS_ERR(devr->p0)) { ··· 2119 2052 atomic_inc(&devr->p0->usecnt); 2120 2053 atomic_set(&devr->s0->usecnt, 0); 2121 2054 2055 + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { 2056 + INIT_WORK(&devr->ports[port].pkey_change_work, 2057 + pkey_change_handler); 2058 + devr->ports[port].devr = devr; 2059 + } 2060 + 2122 2061 return 0; 2123 2062 2124 2063 error5: ··· 2143 2070 2144 2071 static void destroy_dev_resources(struct mlx5_ib_resources *devr) 2145 2072 { 2073 + struct mlx5_ib_dev *dev = 2074 + container_of(devr, struct mlx5_ib_dev, devr); 2075 + int port; 2076 + 2146 2077 mlx5_ib_destroy_srq(devr->s1); 2147 2078 mlx5_ib_destroy_srq(devr->s0); 2148 2079 mlx5_ib_dealloc_xrcd(devr->x0); 2149 2080 mlx5_ib_dealloc_xrcd(devr->x1); 2150 2081 mlx5_ib_destroy_cq(devr->c0); 2151 2082 mlx5_ib_dealloc_pd(devr->p0); 2083 + 2084 + /* Make sure no change P_Key work items are still executing */ 2085 + for (port = 0; port < dev->num_ports; ++port) 2086 + cancel_work_sync(&devr->ports[port].pkey_change_work); 2152 2087 } 2153 2088 2154 2089 static u32 get_core_cap_flags(struct ib_device *ibdev) ··· 2279 2198 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2280 2199 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2281 2200 (1ull << IB_USER_VERBS_CMD_REG_MR) | 2201 + (1ull << IB_USER_VERBS_CMD_REREG_MR) | 2282 2202 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2283 2203 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2284 2204 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | ··· 2340 2258 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; 2341 2259 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; 2342 2260 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; 2261 + dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; 2343 2262 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; 2344 2263 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; 2345 2264 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; ··· 2351 2268 dev->ib_dev.get_port_immutable = mlx5_port_immutable; 2352 2269 2353 2270 mlx5_ib_internal_fill_odp_caps(dev); 2271 + 2272 + if (MLX5_CAP_GEN(mdev, imaicl)) { 2273 + dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; 2274 + dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; 2275 + dev->ib_dev.uverbs_cmd_mask |= 2276 + (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | 2277 + (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); 2278 + } 2354 2279 2355 2280 if (MLX5_CAP_GEN(mdev, xrc)) { 2356 2281 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
+91 -17
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 43 43 #include <linux/mlx5/srq.h> 44 44 #include <linux/types.h> 45 45 #include <linux/mlx5/transobj.h> 46 + #include <rdma/ib_user_verbs.h> 46 47 47 48 #define mlx5_ib_dbg(dev, format, arg...) \ 48 49 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ ··· 127 126 }; 128 127 129 128 #define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) 130 - #define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1) 129 + #define MLX5_IB_FLOW_LAST_PRIO (MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1) 131 130 #if (MLX5_IB_FLOW_LAST_PRIO <= 0) 132 131 #error "Invalid number of bypass priorities" 133 132 #endif ··· 163 162 #define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START 164 163 #define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1) 165 164 #define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2) 165 + 166 + #define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3) 167 + #define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4) 168 + #define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END 169 + 166 170 #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 171 + /* 172 + * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI 173 + * creates the actual hardware QP. 174 + */ 175 + #define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2 167 176 #define MLX5_IB_WR_UMR IB_WR_RESERVED1 177 + 178 + /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. 179 + * 180 + * These flags are intended for internal use by the mlx5_ib driver, and they 181 + * rely on the range reserved for that use in the ib_qp_create_flags enum. 182 + */ 183 + 184 + /* Create a UD QP whose source QP number is 1 */ 185 + static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) 186 + { 187 + return IB_QP_CREATE_RESERVED_START; 188 + } 168 189 169 190 struct wr_list { 170 191 u16 opcode; ··· 348 325 }; 349 326 350 327 enum mlx5_ib_qp_flags { 351 - MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0, 352 - MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, 353 - MLX5_IB_QP_CROSS_CHANNEL = 1 << 2, 354 - MLX5_IB_QP_MANAGED_SEND = 1 << 3, 355 - MLX5_IB_QP_MANAGED_RECV = 1 << 4, 328 + MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, 329 + MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, 330 + MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL, 331 + MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND, 332 + MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV, 333 + MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, 334 + /* QP uses 1 as its source QP number */ 335 + MLX5_IB_QP_SQPN_QP1 = 1 << 6, 356 336 }; 357 337 358 338 struct mlx5_umr_wr { ··· 399 373 struct ib_umem *resize_umem; 400 374 int cqe_size; 401 375 u32 create_flags; 376 + struct list_head wc_list; 377 + enum ib_cq_notify_flags notify_flags; 378 + struct work_struct notify_work; 379 + }; 380 + 381 + struct mlx5_ib_wc { 382 + struct ib_wc wc; 383 + struct list_head list; 402 384 }; 403 385 404 386 struct mlx5_ib_srq { ··· 447 413 int ndescs; 448 414 int max_descs; 449 415 int desc_size; 450 - struct mlx5_core_mr mmr; 416 + int access_mode; 417 + struct mlx5_core_mkey mmkey; 451 418 struct ib_umem *umem; 452 419 struct mlx5_shared_mr_info *smr_info; 453 420 struct list_head list; ··· 460 425 struct mlx5_core_sig_ctx *sig; 461 426 int live; 462 427 void *descs_alloc; 428 + int access_flags; /* Needed for rereg MR */ 429 + }; 430 + 431 + struct mlx5_ib_mw { 432 + struct ib_mw ibmw; 433 + struct mlx5_core_mkey mmkey; 463 434 }; 464 435 465 436 struct mlx5_ib_umr_context { 437 + struct ib_cqe cqe; 466 438 enum ib_wc_status status; 467 439 struct completion done; 468 440 }; 469 - 470 - static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) 471 - { 472 - context->status = -1; 473 - init_completion(&context->done); 474 - } 475 441 476 442 struct umr_common { 477 443 struct ib_pd *pd; ··· 523 487 unsigned long last_add; 524 488 }; 525 489 490 + struct mlx5_ib_gsi_qp; 491 + 492 + struct mlx5_ib_port_resources { 493 + struct mlx5_ib_resources *devr; 494 + struct mlx5_ib_gsi_qp *gsi; 495 + struct work_struct pkey_change_work; 496 + }; 497 + 526 498 struct mlx5_ib_resources { 527 499 struct ib_cq *c0; 528 500 struct ib_xrcd *x0; ··· 538 494 struct ib_pd *p0; 539 495 struct ib_srq *s0; 540 496 struct ib_srq *s1; 497 + struct mlx5_ib_port_resources ports[2]; 498 + /* Protects changes to the port resources */ 499 + struct mutex mutex; 541 500 }; 542 501 543 502 struct mlx5_roce { ··· 605 558 return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp; 606 559 } 607 560 608 - static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) 561 + static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) 609 562 { 610 - return container_of(mmr, struct mlx5_ib_mr, mmr); 563 + return container_of(mmkey, struct mlx5_ib_mr, mmkey); 611 564 } 612 565 613 566 static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) ··· 633 586 static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) 634 587 { 635 588 return container_of(ibmr, struct mlx5_ib_mr, ibmr); 589 + } 590 + 591 + static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw) 592 + { 593 + return container_of(ibmw, struct mlx5_ib_mw, ibmw); 636 594 } 637 595 638 596 struct mlx5_ib_ah { ··· 700 648 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 701 649 u64 virt_addr, int access_flags, 702 650 struct ib_udata *udata); 651 + struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 652 + struct ib_udata *udata); 653 + int mlx5_ib_dealloc_mw(struct ib_mw *mw); 703 654 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, 704 655 int npages, int zap); 656 + int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 657 + u64 length, u64 virt_addr, int access_flags, 658 + struct ib_pd *pd, struct ib_udata *udata); 705 659 int mlx5_ib_dereg_mr(struct ib_mr *ibmr); 706 660 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, 707 661 enum ib_mr_type mr_type, ··· 758 700 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); 759 701 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); 760 702 int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); 761 - void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); 762 703 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 763 704 struct ib_mr_status *mr_status); 764 705 ··· 796 739 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, 797 740 int index); 798 741 742 + /* GSI QP helper functions */ 743 + struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, 744 + struct ib_qp_init_attr *init_attr); 745 + int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp); 746 + int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, 747 + int attr_mask); 748 + int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, 749 + int qp_attr_mask, 750 + struct ib_qp_init_attr *qp_init_attr); 751 + int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, 752 + struct ib_send_wr **bad_wr); 753 + int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, 754 + struct ib_recv_wr **bad_wr); 755 + void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); 756 + 757 + int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); 758 + 799 759 static inline void init_query_mad(struct ib_smp *mad) 800 760 { 801 761 mad->base_version = 1; ··· 832 758 833 759 static inline int is_qp1(enum ib_qp_type qp_type) 834 760 { 835 - return qp_type == IB_QPT_GSI; 761 + return qp_type == MLX5_IB_QPT_HW_GSI; 836 762 } 837 763 838 764 #define MLX5_MAX_UMR_SHIFT 16
+474 -133
drivers/infiniband/hw/mlx5/mr.c
··· 40 40 #include <rdma/ib_umem_odp.h> 41 41 #include <rdma/ib_verbs.h> 42 42 #include "mlx5_ib.h" 43 + #include "user.h" 43 44 44 45 enum { 45 46 MAX_PENDING_REG_MR = 8, ··· 58 57 59 58 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 60 59 { 61 - int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); 60 + int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 62 61 63 62 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 64 63 /* Wait until all page fault handlers using the mr complete. */ ··· 78 77 return order - cache->ent[0].order; 79 78 } 80 79 80 + static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) 81 + { 82 + return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= 83 + length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); 84 + } 85 + 86 + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 87 + static void update_odp_mr(struct mlx5_ib_mr *mr) 88 + { 89 + if (mr->umem->odp_data) { 90 + /* 91 + * This barrier prevents the compiler from moving the 92 + * setting of umem->odp_data->private to point to our 93 + * MR, before reg_umr finished, to ensure that the MR 94 + * initialization have finished before starting to 95 + * handle invalidations. 96 + */ 97 + smp_wmb(); 98 + mr->umem->odp_data->private = mr; 99 + /* 100 + * Make sure we will see the new 101 + * umem->odp_data->private value in the invalidation 102 + * routines, before we can get page faults on the 103 + * MR. Page faults can happen once we put the MR in 104 + * the tree, below this line. Without the barrier, 105 + * there can be a fault handling and an invalidation 106 + * before umem->odp_data->private == mr is visible to 107 + * the invalidation handler. 108 + */ 109 + smp_wmb(); 110 + } 111 + } 112 + #endif 113 + 81 114 static void reg_mr_callback(int status, void *context) 82 115 { 83 116 struct mlx5_ib_mr *mr = context; ··· 121 86 struct mlx5_cache_ent *ent = &cache->ent[c]; 122 87 u8 key; 123 88 unsigned long flags; 124 - struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; 89 + struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table; 125 90 int err; 126 91 127 92 spin_lock_irqsave(&ent->lock, flags); ··· 148 113 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 149 114 key = dev->mdev->priv.mkey_key++; 150 115 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 151 - mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; 116 + mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; 152 117 153 118 cache->last_add = jiffies; 154 119 ··· 159 124 spin_unlock_irqrestore(&ent->lock, flags); 160 125 161 126 write_lock_irqsave(&table->lock, flags); 162 - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), 163 - &mr->mmr); 127 + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key), 128 + &mr->mmkey); 164 129 if (err) 165 - pr_err("Error inserting to mr tree. 0x%x\n", -err); 130 + pr_err("Error inserting to mkey tree. 0x%x\n", -err); 166 131 write_unlock_irqrestore(&table->lock, flags); 167 132 } 168 133 ··· 203 168 spin_lock_irq(&ent->lock); 204 169 ent->pending++; 205 170 spin_unlock_irq(&ent->lock); 206 - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, 171 + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, 207 172 sizeof(*in), reg_mr_callback, 208 173 mr, &mr->out); 209 174 if (err) { ··· 692 657 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 693 658 seg->start_addr = 0; 694 659 695 - err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, 660 + err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL, 696 661 NULL); 697 662 if (err) 698 663 goto err_in; 699 664 700 665 kfree(in); 701 - mr->ibmr.lkey = mr->mmr.key; 702 - mr->ibmr.rkey = mr->mmr.key; 666 + mr->ibmr.lkey = mr->mmkey.key; 667 + mr->ibmr.rkey = mr->mmkey.key; 703 668 mr->umem = NULL; 704 669 705 670 return &mr->ibmr; ··· 728 693 return order <= MLX5_MAX_UMR_SHIFT; 729 694 } 730 695 731 - static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 732 - struct ib_sge *sg, u64 dma, int n, u32 key, 733 - int page_shift, u64 virt_addr, u64 len, 734 - int access_flags) 696 + static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 697 + int npages, int page_shift, int *size, 698 + __be64 **mr_pas, dma_addr_t *dma) 699 + { 700 + __be64 *pas; 701 + struct device *ddev = dev->ib_dev.dma_device; 702 + 703 + /* 704 + * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. 705 + * To avoid copying garbage after the pas array, we allocate 706 + * a little more. 707 + */ 708 + *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); 709 + *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 710 + if (!(*mr_pas)) 711 + return -ENOMEM; 712 + 713 + pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN); 714 + mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); 715 + /* Clear padding after the actual pages. */ 716 + memset(pas + npages, 0, *size - npages * sizeof(u64)); 717 + 718 + *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE); 719 + if (dma_mapping_error(ddev, *dma)) { 720 + kfree(*mr_pas); 721 + return -ENOMEM; 722 + } 723 + 724 + return 0; 725 + } 726 + 727 + static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr, 728 + struct ib_sge *sg, u64 dma, int n, u32 key, 729 + int page_shift) 735 730 { 736 731 struct mlx5_ib_dev *dev = to_mdev(pd->device); 737 732 struct mlx5_umr_wr *umrwr = umr_wr(wr); ··· 771 706 sg->lkey = dev->umrc.pd->local_dma_lkey; 772 707 773 708 wr->next = NULL; 774 - wr->send_flags = 0; 775 709 wr->sg_list = sg; 776 710 if (n) 777 711 wr->num_sge = 1; ··· 782 718 umrwr->npages = n; 783 719 umrwr->page_shift = page_shift; 784 720 umrwr->mkey = key; 721 + } 722 + 723 + static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 724 + struct ib_sge *sg, u64 dma, int n, u32 key, 725 + int page_shift, u64 virt_addr, u64 len, 726 + int access_flags) 727 + { 728 + struct mlx5_umr_wr *umrwr = umr_wr(wr); 729 + 730 + prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift); 731 + 732 + wr->send_flags = 0; 733 + 785 734 umrwr->target.virt_addr = virt_addr; 786 735 umrwr->length = len; 787 736 umrwr->access_flags = access_flags; ··· 811 734 umrwr->mkey = key; 812 735 } 813 736 814 - void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) 737 + static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, 738 + int access_flags, int *npages, 739 + int *page_shift, int *ncont, int *order) 815 740 { 816 - struct mlx5_ib_umr_context *context; 817 - struct ib_wc wc; 818 - int err; 819 - 820 - while (1) { 821 - err = ib_poll_cq(cq, 1, &wc); 822 - if (err < 0) { 823 - pr_warn("poll cq error %d\n", err); 824 - return; 825 - } 826 - if (err == 0) 827 - break; 828 - 829 - context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; 830 - context->status = wc.status; 831 - complete(&context->done); 741 + struct mlx5_ib_dev *dev = to_mdev(pd->device); 742 + struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, 743 + access_flags, 0); 744 + if (IS_ERR(umem)) { 745 + mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); 746 + return (void *)umem; 832 747 } 833 - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 748 + 749 + mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); 750 + if (!*npages) { 751 + mlx5_ib_warn(dev, "avoid zero region\n"); 752 + ib_umem_release(umem); 753 + return ERR_PTR(-EINVAL); 754 + } 755 + 756 + mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 757 + *npages, *ncont, *order, *page_shift); 758 + 759 + return umem; 760 + } 761 + 762 + static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) 763 + { 764 + struct mlx5_ib_umr_context *context = 765 + container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); 766 + 767 + context->status = wc->status; 768 + complete(&context->done); 769 + } 770 + 771 + static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) 772 + { 773 + context->cqe.done = mlx5_ib_umr_done; 774 + context->status = -1; 775 + init_completion(&context->done); 834 776 } 835 777 836 778 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, ··· 860 764 struct device *ddev = dev->ib_dev.dma_device; 861 765 struct umr_common *umrc = &dev->umrc; 862 766 struct mlx5_ib_umr_context umr_context; 863 - struct mlx5_umr_wr umrwr; 767 + struct mlx5_umr_wr umrwr = {}; 864 768 struct ib_send_wr *bad; 865 769 struct mlx5_ib_mr *mr; 866 770 struct ib_sge sg; 867 771 int size; 868 772 __be64 *mr_pas; 869 - __be64 *pas; 870 773 dma_addr_t dma; 871 774 int err = 0; 872 775 int i; ··· 885 790 if (!mr) 886 791 return ERR_PTR(-EAGAIN); 887 792 888 - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. 889 - * To avoid copying garbage after the pas array, we allocate 890 - * a little more. */ 891 - size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); 892 - mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 893 - if (!mr_pas) { 894 - err = -ENOMEM; 793 + err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas, 794 + &dma); 795 + if (err) 895 796 goto free_mr; 896 - } 897 - 898 - pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN); 899 - mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); 900 - /* Clear padding after the actual pages. */ 901 - memset(pas + npages, 0, size - npages * sizeof(u64)); 902 - 903 - dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); 904 - if (dma_mapping_error(ddev, dma)) { 905 - err = -ENOMEM; 906 - goto free_pas; 907 - } 908 - 909 - memset(&umrwr, 0, sizeof(umrwr)); 910 - umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; 911 - prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key, 912 - page_shift, virt_addr, len, access_flags); 913 797 914 798 mlx5_ib_init_umr_context(&umr_context); 799 + 800 + umrwr.wr.wr_cqe = &umr_context.cqe; 801 + prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, 802 + page_shift, virt_addr, len, access_flags); 803 + 915 804 down(&umrc->sem); 916 805 err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 917 806 if (err) { ··· 909 830 } 910 831 } 911 832 912 - mr->mmr.iova = virt_addr; 913 - mr->mmr.size = len; 914 - mr->mmr.pd = to_mpd(pd)->pdn; 833 + mr->mmkey.iova = virt_addr; 834 + mr->mmkey.size = len; 835 + mr->mmkey.pd = to_mpd(pd)->pdn; 915 836 916 837 mr->live = 1; 917 838 ··· 919 840 up(&umrc->sem); 920 841 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 921 842 922 - free_pas: 923 843 kfree(mr_pas); 924 844 925 845 free_mr: ··· 1007 929 1008 930 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 1009 931 932 + mlx5_ib_init_umr_context(&umr_context); 933 + 1010 934 memset(&wr, 0, sizeof(wr)); 1011 - wr.wr.wr_id = (u64)(unsigned long)&umr_context; 935 + wr.wr.wr_cqe = &umr_context.cqe; 1012 936 1013 937 sg.addr = dma; 1014 938 sg.length = ALIGN(npages * sizeof(u64), ··· 1024 944 wr.wr.opcode = MLX5_IB_WR_UMR; 1025 945 wr.npages = sg.length / sizeof(u64); 1026 946 wr.page_shift = PAGE_SHIFT; 1027 - wr.mkey = mr->mmr.key; 947 + wr.mkey = mr->mmkey.key; 1028 948 wr.target.offset = start_page_index; 1029 949 1030 - mlx5_ib_init_umr_context(&umr_context); 1031 950 down(&umrc->sem); 1032 951 err = ib_post_send(umrc->qp, &wr.wr, &bad); 1033 952 if (err) { ··· 1053 974 } 1054 975 #endif 1055 976 1056 - static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 1057 - u64 length, struct ib_umem *umem, 1058 - int npages, int page_shift, 1059 - int access_flags) 977 + /* 978 + * If ibmr is NULL it will be allocated by reg_create. 979 + * Else, the given ibmr will be used. 980 + */ 981 + static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, 982 + u64 virt_addr, u64 length, 983 + struct ib_umem *umem, int npages, 984 + int page_shift, int access_flags) 1060 985 { 1061 986 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1062 987 struct mlx5_create_mkey_mbox_in *in; ··· 1069 986 int err; 1070 987 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 1071 988 1072 - mr = kzalloc(sizeof(*mr), GFP_KERNEL); 989 + mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); 1073 990 if (!mr) 1074 991 return ERR_PTR(-ENOMEM); 1075 992 ··· 1096 1013 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1097 1014 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1098 1015 1 << page_shift)); 1099 - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL, 1016 + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL, 1100 1017 NULL, NULL); 1101 1018 if (err) { 1102 1019 mlx5_ib_warn(dev, "create mkey failed\n"); ··· 1107 1024 mr->live = 1; 1108 1025 kvfree(in); 1109 1026 1110 - mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); 1027 + mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); 1111 1028 1112 1029 return mr; 1113 1030 ··· 1115 1032 kvfree(in); 1116 1033 1117 1034 err_1: 1118 - kfree(mr); 1035 + if (!ibmr) 1036 + kfree(mr); 1119 1037 1120 1038 return ERR_PTR(err); 1039 + } 1040 + 1041 + static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 1042 + int npages, u64 length, int access_flags) 1043 + { 1044 + mr->npages = npages; 1045 + atomic_add(npages, &dev->mdev->priv.reg_pages); 1046 + mr->ibmr.lkey = mr->mmkey.key; 1047 + mr->ibmr.rkey = mr->mmkey.key; 1048 + mr->ibmr.length = length; 1049 + mr->access_flags = access_flags; 1121 1050 } 1122 1051 1123 1052 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ··· 1147 1052 1148 1053 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1149 1054 start, virt_addr, length, access_flags); 1150 - umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 1151 - 0); 1152 - if (IS_ERR(umem)) { 1153 - mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); 1055 + umem = mr_umem_get(pd, start, length, access_flags, &npages, 1056 + &page_shift, &ncont, &order); 1057 + 1058 + if (IS_ERR(umem)) 1154 1059 return (void *)umem; 1155 - } 1156 - 1157 - mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); 1158 - if (!npages) { 1159 - mlx5_ib_warn(dev, "avoid zero region\n"); 1160 - err = -EINVAL; 1161 - goto error; 1162 - } 1163 - 1164 - mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 1165 - npages, ncont, order, page_shift); 1166 1060 1167 1061 if (use_umr(order)) { 1168 1062 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, ··· 1167 1083 } 1168 1084 1169 1085 if (!mr) 1170 - mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, 1171 - access_flags); 1086 + mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, 1087 + page_shift, access_flags); 1172 1088 1173 1089 if (IS_ERR(mr)) { 1174 1090 err = PTR_ERR(mr); 1175 1091 goto error; 1176 1092 } 1177 1093 1178 - mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); 1094 + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1179 1095 1180 1096 mr->umem = umem; 1181 - mr->npages = npages; 1182 - atomic_add(npages, &dev->mdev->priv.reg_pages); 1183 - mr->ibmr.lkey = mr->mmr.key; 1184 - mr->ibmr.rkey = mr->mmr.key; 1097 + set_mr_fileds(dev, mr, npages, length, access_flags); 1185 1098 1186 1099 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1187 - if (umem->odp_data) { 1188 - /* 1189 - * This barrier prevents the compiler from moving the 1190 - * setting of umem->odp_data->private to point to our 1191 - * MR, before reg_umr finished, to ensure that the MR 1192 - * initialization have finished before starting to 1193 - * handle invalidations. 1194 - */ 1195 - smp_wmb(); 1196 - mr->umem->odp_data->private = mr; 1197 - /* 1198 - * Make sure we will see the new 1199 - * umem->odp_data->private value in the invalidation 1200 - * routines, before we can get page faults on the 1201 - * MR. Page faults can happen once we put the MR in 1202 - * the tree, below this line. Without the barrier, 1203 - * there can be a fault handling and an invalidation 1204 - * before umem->odp_data->private == mr is visible to 1205 - * the invalidation handler. 1206 - */ 1207 - smp_wmb(); 1208 - } 1100 + update_odp_mr(mr); 1209 1101 #endif 1210 1102 1211 1103 return &mr->ibmr; ··· 1195 1135 { 1196 1136 struct umr_common *umrc = &dev->umrc; 1197 1137 struct mlx5_ib_umr_context umr_context; 1198 - struct mlx5_umr_wr umrwr; 1138 + struct mlx5_umr_wr umrwr = {}; 1199 1139 struct ib_send_wr *bad; 1200 1140 int err; 1201 1141 1202 - memset(&umrwr.wr, 0, sizeof(umrwr)); 1203 - umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; 1204 - prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key); 1205 - 1206 1142 mlx5_ib_init_umr_context(&umr_context); 1143 + 1144 + umrwr.wr.wr_cqe = &umr_context.cqe; 1145 + prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key); 1146 + 1207 1147 down(&umrc->sem); 1208 1148 err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 1209 1149 if (err) { ··· 1223 1163 1224 1164 error: 1225 1165 return err; 1166 + } 1167 + 1168 + static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, 1169 + u64 length, int npages, int page_shift, int order, 1170 + int access_flags, int flags) 1171 + { 1172 + struct mlx5_ib_dev *dev = to_mdev(pd->device); 1173 + struct device *ddev = dev->ib_dev.dma_device; 1174 + struct mlx5_ib_umr_context umr_context; 1175 + struct ib_send_wr *bad; 1176 + struct mlx5_umr_wr umrwr = {}; 1177 + struct ib_sge sg; 1178 + struct umr_common *umrc = &dev->umrc; 1179 + dma_addr_t dma = 0; 1180 + __be64 *mr_pas = NULL; 1181 + int size; 1182 + int err; 1183 + 1184 + mlx5_ib_init_umr_context(&umr_context); 1185 + 1186 + umrwr.wr.wr_cqe = &umr_context.cqe; 1187 + umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1188 + 1189 + if (flags & IB_MR_REREG_TRANS) { 1190 + err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size, 1191 + &mr_pas, &dma); 1192 + if (err) 1193 + return err; 1194 + 1195 + umrwr.target.virt_addr = virt_addr; 1196 + umrwr.length = length; 1197 + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1198 + } 1199 + 1200 + prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, 1201 + page_shift); 1202 + 1203 + if (flags & IB_MR_REREG_PD) { 1204 + umrwr.pd = pd; 1205 + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD; 1206 + } 1207 + 1208 + if (flags & IB_MR_REREG_ACCESS) { 1209 + umrwr.access_flags = access_flags; 1210 + umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS; 1211 + } 1212 + 1213 + /* post send request to UMR QP */ 1214 + down(&umrc->sem); 1215 + err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 1216 + 1217 + if (err) { 1218 + mlx5_ib_warn(dev, "post send failed, err %d\n", err); 1219 + } else { 1220 + wait_for_completion(&umr_context.done); 1221 + if (umr_context.status != IB_WC_SUCCESS) { 1222 + mlx5_ib_warn(dev, "reg umr failed (%u)\n", 1223 + umr_context.status); 1224 + err = -EFAULT; 1225 + } 1226 + } 1227 + 1228 + up(&umrc->sem); 1229 + if (flags & IB_MR_REREG_TRANS) { 1230 + dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 1231 + kfree(mr_pas); 1232 + } 1233 + return err; 1234 + } 1235 + 1236 + int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1237 + u64 length, u64 virt_addr, int new_access_flags, 1238 + struct ib_pd *new_pd, struct ib_udata *udata) 1239 + { 1240 + struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1241 + struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1242 + struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; 1243 + int access_flags = flags & IB_MR_REREG_ACCESS ? 1244 + new_access_flags : 1245 + mr->access_flags; 1246 + u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; 1247 + u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; 1248 + int page_shift = 0; 1249 + int npages = 0; 1250 + int ncont = 0; 1251 + int order = 0; 1252 + int err; 1253 + 1254 + mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1255 + start, virt_addr, length, access_flags); 1256 + 1257 + if (flags != IB_MR_REREG_PD) { 1258 + /* 1259 + * Replace umem. This needs to be done whether or not UMR is 1260 + * used. 1261 + */ 1262 + flags |= IB_MR_REREG_TRANS; 1263 + ib_umem_release(mr->umem); 1264 + mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, 1265 + &page_shift, &ncont, &order); 1266 + if (IS_ERR(mr->umem)) { 1267 + err = PTR_ERR(mr->umem); 1268 + mr->umem = NULL; 1269 + return err; 1270 + } 1271 + } 1272 + 1273 + if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { 1274 + /* 1275 + * UMR can't be used - MKey needs to be replaced. 1276 + */ 1277 + if (mr->umred) { 1278 + err = unreg_umr(dev, mr); 1279 + if (err) 1280 + mlx5_ib_warn(dev, "Failed to unregister MR\n"); 1281 + } else { 1282 + err = destroy_mkey(dev, mr); 1283 + if (err) 1284 + mlx5_ib_warn(dev, "Failed to destroy MKey\n"); 1285 + } 1286 + if (err) 1287 + return err; 1288 + 1289 + mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, 1290 + page_shift, access_flags); 1291 + 1292 + if (IS_ERR(mr)) 1293 + return PTR_ERR(mr); 1294 + 1295 + mr->umred = 0; 1296 + } else { 1297 + /* 1298 + * Send a UMR WQE 1299 + */ 1300 + err = rereg_umr(pd, mr, addr, len, npages, page_shift, 1301 + order, access_flags, flags); 1302 + if (err) { 1303 + mlx5_ib_warn(dev, "Failed to rereg UMR\n"); 1304 + return err; 1305 + } 1306 + } 1307 + 1308 + if (flags & IB_MR_REREG_PD) { 1309 + ib_mr->pd = pd; 1310 + mr->mmkey.pd = to_mpd(pd)->pdn; 1311 + } 1312 + 1313 + if (flags & IB_MR_REREG_ACCESS) 1314 + mr->access_flags = access_flags; 1315 + 1316 + if (flags & IB_MR_REREG_TRANS) { 1317 + atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); 1318 + set_mr_fileds(dev, mr, npages, len, access_flags); 1319 + mr->mmkey.iova = addr; 1320 + mr->mmkey.size = len; 1321 + } 1322 + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1323 + update_odp_mr(mr); 1324 + #endif 1325 + 1326 + return 0; 1226 1327 } 1227 1328 1228 1329 static int ··· 1457 1236 err = destroy_mkey(dev, mr); 1458 1237 if (err) { 1459 1238 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1460 - mr->mmr.key, err); 1239 + mr->mmkey.key, err); 1461 1240 return err; 1462 1241 } 1463 1242 } else { ··· 1521 1300 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1522 1301 struct mlx5_create_mkey_mbox_in *in; 1523 1302 struct mlx5_ib_mr *mr; 1524 - int access_mode, err; 1525 - int ndescs = roundup(max_num_sg, 4); 1303 + int ndescs = ALIGN(max_num_sg, 4); 1304 + int err; 1526 1305 1527 1306 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1528 1307 if (!mr) ··· 1540 1319 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1541 1320 1542 1321 if (mr_type == IB_MR_TYPE_MEM_REG) { 1543 - access_mode = MLX5_ACCESS_MODE_MTT; 1322 + mr->access_mode = MLX5_ACCESS_MODE_MTT; 1544 1323 in->seg.log2_page_size = PAGE_SHIFT; 1545 1324 1546 1325 err = mlx5_alloc_priv_descs(pd->device, mr, ··· 1549 1328 goto err_free_in; 1550 1329 1551 1330 mr->desc_size = sizeof(u64); 1331 + mr->max_descs = ndescs; 1332 + } else if (mr_type == IB_MR_TYPE_SG_GAPS) { 1333 + mr->access_mode = MLX5_ACCESS_MODE_KLM; 1334 + 1335 + err = mlx5_alloc_priv_descs(pd->device, mr, 1336 + ndescs, sizeof(struct mlx5_klm)); 1337 + if (err) 1338 + goto err_free_in; 1339 + mr->desc_size = sizeof(struct mlx5_klm); 1552 1340 mr->max_descs = ndescs; 1553 1341 } else if (mr_type == IB_MR_TYPE_SIGNATURE) { 1554 1342 u32 psv_index[2]; ··· 1577 1347 if (err) 1578 1348 goto err_free_sig; 1579 1349 1580 - access_mode = MLX5_ACCESS_MODE_KLM; 1350 + mr->access_mode = MLX5_ACCESS_MODE_KLM; 1581 1351 mr->sig->psv_memory.psv_idx = psv_index[0]; 1582 1352 mr->sig->psv_wire.psv_idx = psv_index[1]; 1583 1353 ··· 1591 1361 goto err_free_in; 1592 1362 } 1593 1363 1594 - in->seg.flags = MLX5_PERM_UMR_EN | access_mode; 1595 - err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), 1364 + in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode; 1365 + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in), 1596 1366 NULL, NULL, NULL); 1597 1367 if (err) 1598 1368 goto err_destroy_psv; 1599 1369 1600 - mr->ibmr.lkey = mr->mmr.key; 1601 - mr->ibmr.rkey = mr->mmr.key; 1370 + mr->ibmr.lkey = mr->mmkey.key; 1371 + mr->ibmr.rkey = mr->mmkey.key; 1602 1372 mr->umem = NULL; 1603 1373 kfree(in); 1604 1374 ··· 1623 1393 err_free: 1624 1394 kfree(mr); 1625 1395 return ERR_PTR(err); 1396 + } 1397 + 1398 + struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 1399 + struct ib_udata *udata) 1400 + { 1401 + struct mlx5_ib_dev *dev = to_mdev(pd->device); 1402 + struct mlx5_create_mkey_mbox_in *in = NULL; 1403 + struct mlx5_ib_mw *mw = NULL; 1404 + int ndescs; 1405 + int err; 1406 + struct mlx5_ib_alloc_mw req = {}; 1407 + struct { 1408 + __u32 comp_mask; 1409 + __u32 response_length; 1410 + } resp = {}; 1411 + 1412 + err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 1413 + if (err) 1414 + return ERR_PTR(err); 1415 + 1416 + if (req.comp_mask || req.reserved1 || req.reserved2) 1417 + return ERR_PTR(-EOPNOTSUPP); 1418 + 1419 + if (udata->inlen > sizeof(req) && 1420 + !ib_is_udata_cleared(udata, sizeof(req), 1421 + udata->inlen - sizeof(req))) 1422 + return ERR_PTR(-EOPNOTSUPP); 1423 + 1424 + ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); 1425 + 1426 + mw = kzalloc(sizeof(*mw), GFP_KERNEL); 1427 + in = kzalloc(sizeof(*in), GFP_KERNEL); 1428 + if (!mw || !in) { 1429 + err = -ENOMEM; 1430 + goto free; 1431 + } 1432 + 1433 + in->seg.status = MLX5_MKEY_STATUS_FREE; 1434 + in->seg.xlt_oct_size = cpu_to_be32(ndescs); 1435 + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1436 + in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM | 1437 + MLX5_PERM_LOCAL_READ; 1438 + if (type == IB_MW_TYPE_2) 1439 + in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); 1440 + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1441 + 1442 + err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in), 1443 + NULL, NULL, NULL); 1444 + if (err) 1445 + goto free; 1446 + 1447 + mw->ibmw.rkey = mw->mmkey.key; 1448 + 1449 + resp.response_length = min(offsetof(typeof(resp), response_length) + 1450 + sizeof(resp.response_length), udata->outlen); 1451 + if (resp.response_length) { 1452 + err = ib_copy_to_udata(udata, &resp, resp.response_length); 1453 + if (err) { 1454 + mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); 1455 + goto free; 1456 + } 1457 + } 1458 + 1459 + kfree(in); 1460 + return &mw->ibmw; 1461 + 1462 + free: 1463 + kfree(mw); 1464 + kfree(in); 1465 + return ERR_PTR(err); 1466 + } 1467 + 1468 + int mlx5_ib_dealloc_mw(struct ib_mw *mw) 1469 + { 1470 + struct mlx5_ib_mw *mmw = to_mmw(mw); 1471 + int err; 1472 + 1473 + err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, 1474 + &mmw->mmkey); 1475 + if (!err) 1476 + kfree(mmw); 1477 + return err; 1626 1478 } 1627 1479 1628 1480 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, ··· 1748 1436 return ret; 1749 1437 } 1750 1438 1439 + static int 1440 + mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 1441 + struct scatterlist *sgl, 1442 + unsigned short sg_nents) 1443 + { 1444 + struct scatterlist *sg = sgl; 1445 + struct mlx5_klm *klms = mr->descs; 1446 + u32 lkey = mr->ibmr.pd->local_dma_lkey; 1447 + int i; 1448 + 1449 + mr->ibmr.iova = sg_dma_address(sg); 1450 + mr->ibmr.length = 0; 1451 + mr->ndescs = sg_nents; 1452 + 1453 + for_each_sg(sgl, sg, sg_nents, i) { 1454 + if (unlikely(i > mr->max_descs)) 1455 + break; 1456 + klms[i].va = cpu_to_be64(sg_dma_address(sg)); 1457 + klms[i].bcount = cpu_to_be32(sg_dma_len(sg)); 1458 + klms[i].key = cpu_to_be32(lkey); 1459 + mr->ibmr.length += sg_dma_len(sg); 1460 + } 1461 + 1462 + return i; 1463 + } 1464 + 1751 1465 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 1752 1466 { 1753 1467 struct mlx5_ib_mr *mr = to_mmr(ibmr); ··· 1801 1463 mr->desc_size * mr->max_descs, 1802 1464 DMA_TO_DEVICE); 1803 1465 1804 - n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); 1466 + if (mr->access_mode == MLX5_ACCESS_MODE_KLM) 1467 + n = mlx5_ib_sg_to_klms(mr, sg, sg_nents); 1468 + else 1469 + n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); 1805 1470 1806 1471 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 1807 1472 mr->desc_size * mr->max_descs,
+5 -5
drivers/infiniband/hw/mlx5/odp.c
··· 142 142 u32 key) 143 143 { 144 144 u32 base_key = mlx5_base_mkey(key); 145 - struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key); 146 - struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr); 145 + struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key); 146 + struct mlx5_ib_mr *mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); 147 147 148 - if (!mmr || mmr->key != key || !mr->live) 148 + if (!mmkey || mmkey->key != key || !mr->live) 149 149 return NULL; 150 150 151 - return container_of(mmr, struct mlx5_ib_mr, mmr); 151 + return container_of(mmkey, struct mlx5_ib_mr, mmkey); 152 152 } 153 153 154 154 static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp, ··· 232 232 io_virt += pfault->mpfault.bytes_committed; 233 233 bcnt -= pfault->mpfault.bytes_committed; 234 234 235 - start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT; 235 + start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT; 236 236 237 237 if (mr->umem->writable) 238 238 access_mask |= ODP_WRITE_ALLOWED_BIT;
+242 -29
drivers/infiniband/hw/mlx5/qp.c
··· 58 58 59 59 static const u32 mlx5_ib_opcode[] = { 60 60 [IB_WR_SEND] = MLX5_OPCODE_SEND, 61 + [IB_WR_LSO] = MLX5_OPCODE_LSO, 61 62 [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, 62 63 [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, 63 64 [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, ··· 73 72 [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, 74 73 }; 75 74 75 + struct mlx5_wqe_eth_pad { 76 + u8 rsvd0[16]; 77 + }; 76 78 77 79 static int is_qp0(enum ib_qp_type qp_type) 78 80 { ··· 264 260 return 0; 265 261 } 266 262 267 - static int sq_overhead(enum ib_qp_type qp_type) 263 + static int sq_overhead(struct ib_qp_init_attr *attr) 268 264 { 269 265 int size = 0; 270 266 271 - switch (qp_type) { 267 + switch (attr->qp_type) { 272 268 case IB_QPT_XRC_INI: 273 269 size += sizeof(struct mlx5_wqe_xrc_seg); 274 270 /* fall through */ ··· 291 287 break; 292 288 293 289 case IB_QPT_UD: 290 + if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) 291 + size += sizeof(struct mlx5_wqe_eth_pad) + 292 + sizeof(struct mlx5_wqe_eth_seg); 293 + /* fall through */ 294 294 case IB_QPT_SMI: 295 - case IB_QPT_GSI: 295 + case MLX5_IB_QPT_HW_GSI: 296 296 size += sizeof(struct mlx5_wqe_ctrl_seg) + 297 297 sizeof(struct mlx5_wqe_datagram_seg); 298 298 break; ··· 319 311 int inl_size = 0; 320 312 int size; 321 313 322 - size = sq_overhead(attr->qp_type); 314 + size = sq_overhead(attr); 323 315 if (size < 0) 324 316 return size; 325 317 ··· 356 348 return -EINVAL; 357 349 } 358 350 359 - qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) - 360 - sizeof(struct mlx5_wqe_inline_seg); 351 + qp->max_inline_data = wqe_size - sq_overhead(attr) - 352 + sizeof(struct mlx5_wqe_inline_seg); 361 353 attr->cap.max_inline_data = qp->max_inline_data; 362 354 363 355 if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) ··· 598 590 case IB_QPT_XRC_INI: 599 591 case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC; 600 592 case IB_QPT_SMI: return MLX5_QP_ST_QP0; 601 - case IB_QPT_GSI: return MLX5_QP_ST_QP1; 593 + case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1; 602 594 case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; 603 595 case IB_QPT_RAW_PACKET: 604 596 case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; ··· 791 783 int err; 792 784 793 785 uuari = &dev->mdev->priv.uuari; 794 - if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) 786 + if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | 787 + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | 788 + IB_QP_CREATE_IPOIB_UD_LSO | 789 + mlx5_ib_create_qp_sqpn_qp1())) 795 790 return -EINVAL; 796 791 797 792 if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) ··· 838 827 /* Set "fast registration enabled" for all kernel QPs */ 839 828 (*in)->ctx.params1 |= cpu_to_be32(1 << 11); 840 829 (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); 830 + 831 + if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { 832 + (*in)->ctx.deth_sqpn = cpu_to_be32(1); 833 + qp->flags |= MLX5_IB_QP_SQPN_QP1; 834 + } 841 835 842 836 mlx5_fill_page_array(&qp->buf, (*in)->pas); 843 837 ··· 1244 1228 if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) 1245 1229 qp->flags |= MLX5_IB_QP_MANAGED_RECV; 1246 1230 } 1231 + 1232 + if (init_attr->qp_type == IB_QPT_UD && 1233 + (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) 1234 + if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { 1235 + mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n"); 1236 + return -EOPNOTSUPP; 1237 + } 1238 + 1247 1239 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 1248 1240 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; 1249 1241 ··· 1293 1269 if (ucmd.sq_wqe_count > max_wqes) { 1294 1270 mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", 1295 1271 ucmd.sq_wqe_count, max_wqes); 1272 + return -EINVAL; 1273 + } 1274 + if (init_attr->create_flags & 1275 + mlx5_ib_create_qp_sqpn_qp1()) { 1276 + mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); 1296 1277 return -EINVAL; 1297 1278 } 1298 1279 err = create_user_qp(dev, pd, qp, udata, init_attr, &in, ··· 1414 1385 /* 0xffffff means we ask to work with cqe version 0 */ 1415 1386 MLX5_SET(qpc, qpc, user_index, uidx); 1416 1387 } 1388 + /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ 1389 + if (init_attr->qp_type == IB_QPT_UD && 1390 + (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) { 1391 + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 1392 + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); 1393 + qp->flags |= MLX5_IB_QP_LSO; 1394 + } 1417 1395 1418 1396 if (init_attr->qp_type == IB_QPT_RAW_PACKET) { 1419 1397 qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; ··· 1530 1494 break; 1531 1495 1532 1496 case IB_QPT_SMI: 1533 - case IB_QPT_GSI: 1497 + case MLX5_IB_QPT_HW_GSI: 1534 1498 case IB_QPT_RC: 1535 1499 case IB_QPT_UC: 1536 1500 case IB_QPT_UD: ··· 1693 1657 case IB_QPT_UC: 1694 1658 case IB_QPT_UD: 1695 1659 case IB_QPT_SMI: 1696 - case IB_QPT_GSI: 1660 + case MLX5_IB_QPT_HW_GSI: 1697 1661 case MLX5_IB_QPT_REG_UMR: 1698 1662 qp = kzalloc(sizeof(*qp), GFP_KERNEL); 1699 1663 if (!qp) ··· 1722 1686 1723 1687 break; 1724 1688 1689 + case IB_QPT_GSI: 1690 + return mlx5_ib_gsi_create_qp(pd, init_attr); 1691 + 1725 1692 case IB_QPT_RAW_IPV6: 1726 1693 case IB_QPT_RAW_ETHERTYPE: 1727 1694 case IB_QPT_MAX: ··· 1742 1703 { 1743 1704 struct mlx5_ib_dev *dev = to_mdev(qp->device); 1744 1705 struct mlx5_ib_qp *mqp = to_mqp(qp); 1706 + 1707 + if (unlikely(qp->qp_type == IB_QPT_GSI)) 1708 + return mlx5_ib_gsi_destroy_qp(qp); 1745 1709 1746 1710 destroy_qp_common(dev, mqp); 1747 1711 ··· 2203 2161 2204 2162 context = &in->ctx; 2205 2163 err = to_mlx5_st(ibqp->qp_type); 2206 - if (err < 0) 2164 + if (err < 0) { 2165 + mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); 2207 2166 goto out; 2167 + } 2208 2168 2209 2169 context->flags = cpu_to_be32(err << 16); 2210 2170 ··· 2226 2182 } 2227 2183 } 2228 2184 2229 - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { 2185 + if (is_sqp(ibqp->qp_type)) { 2230 2186 context->mtu_msgmax = (IB_MTU_256 << 5) | 8; 2231 2187 } else if (ibqp->qp_type == IB_QPT_UD || 2232 2188 ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { ··· 2328 2284 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 2329 2285 context->sq_crq_size |= cpu_to_be16(1 << 4); 2330 2286 2287 + if (qp->flags & MLX5_IB_QP_SQPN_QP1) 2288 + context->deth_sqpn = cpu_to_be32(1); 2331 2289 2332 2290 mlx5_cur = to_mlx5_state(cur_state); 2333 2291 mlx5_new = to_mlx5_state(new_state); ··· 2409 2363 { 2410 2364 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 2411 2365 struct mlx5_ib_qp *qp = to_mqp(ibqp); 2366 + enum ib_qp_type qp_type; 2412 2367 enum ib_qp_state cur_state, new_state; 2413 2368 int err = -EINVAL; 2414 2369 int port; 2415 2370 enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; 2371 + 2372 + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) 2373 + return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); 2374 + 2375 + qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? 2376 + IB_QPT_GSI : ibqp->qp_type; 2416 2377 2417 2378 mutex_lock(&qp->mutex); 2418 2379 ··· 2431 2378 ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); 2432 2379 } 2433 2380 2434 - if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && 2435 - !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, 2436 - ll)) 2381 + if (qp_type != MLX5_IB_QPT_REG_UMR && 2382 + !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { 2383 + mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", 2384 + cur_state, new_state, ibqp->qp_type, attr_mask); 2437 2385 goto out; 2386 + } 2438 2387 2439 2388 if ((attr_mask & IB_QP_PORT) && 2440 2389 (attr->port_num == 0 || 2441 - attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) 2390 + attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) { 2391 + mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n", 2392 + attr->port_num, dev->num_ports); 2442 2393 goto out; 2394 + } 2443 2395 2444 2396 if (attr_mask & IB_QP_PKEY_INDEX) { 2445 2397 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 2446 2398 if (attr->pkey_index >= 2447 - dev->mdev->port_caps[port - 1].pkey_table_len) 2399 + dev->mdev->port_caps[port - 1].pkey_table_len) { 2400 + mlx5_ib_dbg(dev, "invalid pkey index %d\n", 2401 + attr->pkey_index); 2448 2402 goto out; 2403 + } 2449 2404 } 2450 2405 2451 2406 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && 2452 2407 attr->max_rd_atomic > 2453 - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) 2408 + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) { 2409 + mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", 2410 + attr->max_rd_atomic); 2454 2411 goto out; 2412 + } 2455 2413 2456 2414 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && 2457 2415 attr->max_dest_rd_atomic > 2458 - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) 2416 + (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) { 2417 + mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", 2418 + attr->max_dest_rd_atomic); 2459 2419 goto out; 2420 + } 2460 2421 2461 2422 if (cur_state == new_state && cur_state == IB_QPS_RESET) { 2462 2423 err = 0; ··· 2507 2440 rseg->raddr = cpu_to_be64(remote_addr); 2508 2441 rseg->rkey = cpu_to_be32(rkey); 2509 2442 rseg->reserved = 0; 2443 + } 2444 + 2445 + static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, 2446 + struct ib_send_wr *wr, void *qend, 2447 + struct mlx5_ib_qp *qp, int *size) 2448 + { 2449 + void *seg = eseg; 2450 + 2451 + memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); 2452 + 2453 + if (wr->send_flags & IB_SEND_IP_CSUM) 2454 + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | 2455 + MLX5_ETH_WQE_L4_CSUM; 2456 + 2457 + seg += sizeof(struct mlx5_wqe_eth_seg); 2458 + *size += sizeof(struct mlx5_wqe_eth_seg) / 16; 2459 + 2460 + if (wr->opcode == IB_WR_LSO) { 2461 + struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); 2462 + int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start); 2463 + u64 left, leftlen, copysz; 2464 + void *pdata = ud_wr->header; 2465 + 2466 + left = ud_wr->hlen; 2467 + eseg->mss = cpu_to_be16(ud_wr->mss); 2468 + eseg->inline_hdr_sz = cpu_to_be16(left); 2469 + 2470 + /* 2471 + * check if there is space till the end of queue, if yes, 2472 + * copy all in one shot, otherwise copy till the end of queue, 2473 + * rollback and than the copy the left 2474 + */ 2475 + leftlen = qend - (void *)eseg->inline_hdr_start; 2476 + copysz = min_t(u64, leftlen, left); 2477 + 2478 + memcpy(seg - size_of_inl_hdr_start, pdata, copysz); 2479 + 2480 + if (likely(copysz > size_of_inl_hdr_start)) { 2481 + seg += ALIGN(copysz - size_of_inl_hdr_start, 16); 2482 + *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16; 2483 + } 2484 + 2485 + if (unlikely(copysz < left)) { /* the last wqe in the queue */ 2486 + seg = mlx5_get_send_wqe(qp, 0); 2487 + left -= copysz; 2488 + pdata += copysz; 2489 + memcpy(seg, pdata, left); 2490 + seg += ALIGN(left, 16); 2491 + *size += ALIGN(left, 16) / 16; 2492 + } 2493 + } 2494 + 2495 + return seg; 2510 2496 } 2511 2497 2512 2498 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, ··· 2629 2509 int ndescs = mr->ndescs; 2630 2510 2631 2511 memset(umr, 0, sizeof(*umr)); 2512 + 2513 + if (mr->access_mode == MLX5_ACCESS_MODE_KLM) 2514 + /* KLMs take twice the size of MTTs */ 2515 + ndescs *= 2; 2516 + 2632 2517 umr->flags = MLX5_UMR_CHECK_NOT_FREE; 2633 2518 umr->klm_octowords = get_klm_octo(ndescs); 2634 2519 umr->mkey_mask = frwr_mkey_mask(); ··· 2683 2558 return cpu_to_be64(result); 2684 2559 } 2685 2560 2561 + static __be64 get_umr_update_translation_mask(void) 2562 + { 2563 + u64 result; 2564 + 2565 + result = MLX5_MKEY_MASK_LEN | 2566 + MLX5_MKEY_MASK_PAGE_SIZE | 2567 + MLX5_MKEY_MASK_START_ADDR | 2568 + MLX5_MKEY_MASK_KEY | 2569 + MLX5_MKEY_MASK_FREE; 2570 + 2571 + return cpu_to_be64(result); 2572 + } 2573 + 2574 + static __be64 get_umr_update_access_mask(void) 2575 + { 2576 + u64 result; 2577 + 2578 + result = MLX5_MKEY_MASK_LW | 2579 + MLX5_MKEY_MASK_RR | 2580 + MLX5_MKEY_MASK_RW | 2581 + MLX5_MKEY_MASK_A | 2582 + MLX5_MKEY_MASK_KEY | 2583 + MLX5_MKEY_MASK_FREE; 2584 + 2585 + return cpu_to_be64(result); 2586 + } 2587 + 2588 + static __be64 get_umr_update_pd_mask(void) 2589 + { 2590 + u64 result; 2591 + 2592 + result = MLX5_MKEY_MASK_PD | 2593 + MLX5_MKEY_MASK_KEY | 2594 + MLX5_MKEY_MASK_FREE; 2595 + 2596 + return cpu_to_be64(result); 2597 + } 2598 + 2686 2599 static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, 2687 2600 struct ib_send_wr *wr) 2688 2601 { ··· 2739 2576 umr->mkey_mask = get_umr_update_mtt_mask(); 2740 2577 umr->bsf_octowords = get_klm_octo(umrwr->target.offset); 2741 2578 umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; 2742 - } else { 2743 - umr->mkey_mask = get_umr_reg_mr_mask(); 2744 2579 } 2580 + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) 2581 + umr->mkey_mask |= get_umr_update_translation_mask(); 2582 + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS) 2583 + umr->mkey_mask |= get_umr_update_access_mask(); 2584 + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD) 2585 + umr->mkey_mask |= get_umr_update_pd_mask(); 2586 + if (!umr->mkey_mask) 2587 + umr->mkey_mask = get_umr_reg_mr_mask(); 2745 2588 } else { 2746 2589 umr->mkey_mask = get_umr_unreg_mr_mask(); 2747 2590 } ··· 2772 2603 int ndescs = ALIGN(mr->ndescs, 8) >> 1; 2773 2604 2774 2605 memset(seg, 0, sizeof(*seg)); 2775 - seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT; 2606 + 2607 + if (mr->access_mode == MLX5_ACCESS_MODE_MTT) 2608 + seg->log2_page_size = ilog2(mr->ibmr.page_size); 2609 + else if (mr->access_mode == MLX5_ACCESS_MODE_KLM) 2610 + /* KLMs take twice the size of MTTs */ 2611 + ndescs *= 2; 2612 + 2613 + seg->flags = get_umr_flags(access) | mr->access_mode; 2776 2614 seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); 2777 2615 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); 2778 2616 seg->start_addr = cpu_to_be64(mr->ibmr.iova); 2779 2617 seg->len = cpu_to_be64(mr->ibmr.length); 2780 2618 seg->xlt_oct_size = cpu_to_be32(ndescs); 2781 - seg->log2_page_size = ilog2(mr->ibmr.page_size); 2782 2619 } 2783 2620 2784 2621 static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) ··· 2805 2630 2806 2631 seg->flags = convert_access(umrwr->access_flags); 2807 2632 if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) { 2808 - seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); 2633 + if (umrwr->pd) 2634 + seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); 2809 2635 seg->start_addr = cpu_to_be64(umrwr->target.virt_addr); 2810 2636 } 2811 2637 seg->len = cpu_to_be64(umrwr->length); ··· 3372 3196 { 3373 3197 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ 3374 3198 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 3375 - struct mlx5_ib_qp *qp = to_mqp(ibqp); 3199 + struct mlx5_ib_qp *qp; 3376 3200 struct mlx5_ib_mr *mr; 3377 3201 struct mlx5_wqe_data_seg *dpseg; 3378 3202 struct mlx5_wqe_xrc_seg *xrc; 3379 - struct mlx5_bf *bf = qp->bf; 3203 + struct mlx5_bf *bf; 3380 3204 int uninitialized_var(size); 3381 - void *qend = qp->sq.qend; 3205 + void *qend; 3382 3206 unsigned long flags; 3383 3207 unsigned idx; 3384 3208 int err = 0; ··· 3389 3213 int i; 3390 3214 u8 next_fence = 0; 3391 3215 u8 fence; 3216 + 3217 + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) 3218 + return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); 3219 + 3220 + qp = to_mqp(ibqp); 3221 + bf = qp->bf; 3222 + qend = qp->sq.qend; 3392 3223 3393 3224 spin_lock_irqsave(&qp->sq.lock, flags); 3394 3225 ··· 3556 3373 } 3557 3374 break; 3558 3375 3559 - case IB_QPT_UD: 3560 3376 case IB_QPT_SMI: 3561 - case IB_QPT_GSI: 3377 + case MLX5_IB_QPT_HW_GSI: 3562 3378 set_datagram_seg(seg, wr); 3563 3379 seg += sizeof(struct mlx5_wqe_datagram_seg); 3564 3380 size += sizeof(struct mlx5_wqe_datagram_seg) / 16; 3565 3381 if (unlikely((seg == qend))) 3566 3382 seg = mlx5_get_send_wqe(qp, 0); 3567 3383 break; 3384 + case IB_QPT_UD: 3385 + set_datagram_seg(seg, wr); 3386 + seg += sizeof(struct mlx5_wqe_datagram_seg); 3387 + size += sizeof(struct mlx5_wqe_datagram_seg) / 16; 3568 3388 3389 + if (unlikely((seg == qend))) 3390 + seg = mlx5_get_send_wqe(qp, 0); 3391 + 3392 + /* handle qp that supports ud offload */ 3393 + if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { 3394 + struct mlx5_wqe_eth_pad *pad; 3395 + 3396 + pad = seg; 3397 + memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); 3398 + seg += sizeof(struct mlx5_wqe_eth_pad); 3399 + size += sizeof(struct mlx5_wqe_eth_pad) / 16; 3400 + 3401 + seg = set_eth_seg(seg, wr, qend, qp, &size); 3402 + 3403 + if (unlikely((seg == qend))) 3404 + seg = mlx5_get_send_wqe(qp, 0); 3405 + } 3406 + break; 3569 3407 case MLX5_IB_QPT_REG_UMR: 3570 3408 if (wr->opcode != MLX5_IB_WR_UMR) { 3571 3409 err = -EINVAL; ··· 3705 3501 int nreq; 3706 3502 int ind; 3707 3503 int i; 3504 + 3505 + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) 3506 + return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); 3708 3507 3709 3508 spin_lock_irqsave(&qp->rq.lock, flags); 3710 3509 ··· 4029 3822 int err = 0; 4030 3823 u8 raw_packet_qp_state; 4031 3824 3825 + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) 3826 + return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, 3827 + qp_init_attr); 3828 + 4032 3829 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 4033 3830 /* 4034 3831 * Wait for any outstanding page faults, in case the user frees memory ··· 4085 3874 qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; 4086 3875 if (qp->flags & MLX5_IB_QP_MANAGED_RECV) 4087 3876 qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; 3877 + if (qp->flags & MLX5_IB_QP_SQPN_QP1) 3878 + qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1(); 4088 3879 4089 3880 qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? 4090 3881 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+7
drivers/infiniband/hw/mlx5/user.h
··· 152 152 __u32 uuar_index; 153 153 }; 154 154 155 + struct mlx5_ib_alloc_mw { 156 + __u32 comp_mask; 157 + __u8 num_klms; 158 + __u8 reserved1; 159 + __u16 reserved2; 160 + }; 161 + 155 162 static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, 156 163 struct mlx5_ib_create_qp *ucmd, 157 164 int inlen,
+2 -1
drivers/infiniband/hw/nes/nes_verbs.c
··· 56 56 /** 57 57 * nes_alloc_mw 58 58 */ 59 - static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type) 59 + static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type, 60 + struct ib_udata *udata) 60 61 { 61 62 struct nes_pd *nespd = to_nespd(ibpd); 62 63 struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
+8
drivers/infiniband/hw/ocrdma/ocrdma.h
··· 114 114 u8 local_ca_ack_delay; 115 115 u8 ird; 116 116 u8 num_ird_pages; 117 + u8 udp_encap; 117 118 }; 118 119 119 120 struct ocrdma_dma_mem { ··· 357 356 struct ocrdma_av *av; 358 357 u16 sgid_index; 359 358 u32 id; 359 + u8 hdr_type; 360 360 }; 361 361 362 362 struct ocrdma_qp_hwq_info { ··· 598 596 static inline u8 ocrdma_get_ae_link_state(u32 ae_state) 599 597 { 600 598 return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT); 599 + } 600 + 601 + static inline bool ocrdma_is_udp_encap_supported(struct ocrdma_dev *dev) 602 + { 603 + return (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV4) || 604 + (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV6); 601 605 } 602 606 603 607 #endif
+64 -13
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
··· 55 55 56 56 #define OCRDMA_VID_PCP_SHIFT 0xD 57 57 58 + static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type) 59 + { 60 + switch (hdr_type) { 61 + case OCRDMA_L3_TYPE_IB_GRH: 62 + return (u16)0x8915; 63 + case OCRDMA_L3_TYPE_IPV4: 64 + return (u16)0x0800; 65 + case OCRDMA_L3_TYPE_IPV6: 66 + return (u16)0x86dd; 67 + default: 68 + pr_err("ocrdma%d: Invalid network header\n", devid); 69 + return 0; 70 + } 71 + } 72 + 58 73 static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, 59 74 struct ib_ah_attr *attr, union ib_gid *sgid, 60 75 int pdid, bool *isvlan, u16 vlan_tag) 61 76 { 62 - int status = 0; 77 + int status; 63 78 struct ocrdma_eth_vlan eth; 64 79 struct ocrdma_grh grh; 65 80 int eth_sz; 81 + u16 proto_num = 0; 82 + u8 nxthdr = 0x11; 83 + struct iphdr ipv4; 84 + union { 85 + struct sockaddr _sockaddr; 86 + struct sockaddr_in _sockaddr_in; 87 + struct sockaddr_in6 _sockaddr_in6; 88 + } sgid_addr, dgid_addr; 66 89 67 90 memset(&eth, 0, sizeof(eth)); 68 91 memset(&grh, 0, sizeof(grh)); 69 92 93 + /* Protocol Number */ 94 + proto_num = ocrdma_hdr_type_to_proto_num(dev->id, ah->hdr_type); 95 + if (!proto_num) 96 + return -EINVAL; 97 + nxthdr = (proto_num == 0x8915) ? 0x1b : 0x11; 70 98 /* VLAN */ 71 99 if (!vlan_tag || (vlan_tag > 0xFFF)) 72 100 vlan_tag = dev->pvid; ··· 106 78 dev->id); 107 79 } 108 80 eth.eth_type = cpu_to_be16(0x8100); 109 - eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); 81 + eth.roce_eth_type = cpu_to_be16(proto_num); 110 82 vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT; 111 83 eth.vlan_tag = cpu_to_be16(vlan_tag); 112 84 eth_sz = sizeof(struct ocrdma_eth_vlan); 113 85 *isvlan = true; 114 86 } else { 115 - eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); 87 + eth.eth_type = cpu_to_be16(proto_num); 116 88 eth_sz = sizeof(struct ocrdma_eth_basic); 117 89 } 118 90 /* MAC */ ··· 121 93 if (status) 122 94 return status; 123 95 ah->sgid_index = attr->grh.sgid_index; 124 - memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid)); 125 - memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw)); 126 - 127 - grh.tclass_flow = cpu_to_be32((6 << 28) | 128 - (attr->grh.traffic_class << 24) | 129 - attr->grh.flow_label); 130 - /* 0x1b is next header value in GRH */ 131 - grh.pdid_hoplimit = cpu_to_be32((pdid << 16) | 132 - (0x1b << 8) | attr->grh.hop_limit); 133 96 /* Eth HDR */ 134 97 memcpy(&ah->av->eth_hdr, &eth, eth_sz); 135 - memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh)); 98 + if (ah->hdr_type == RDMA_NETWORK_IPV4) { 99 + *((__be16 *)&ipv4) = htons((4 << 12) | (5 << 8) | 100 + attr->grh.traffic_class); 101 + ipv4.id = cpu_to_be16(pdid); 102 + ipv4.frag_off = htons(IP_DF); 103 + ipv4.tot_len = htons(0); 104 + ipv4.ttl = attr->grh.hop_limit; 105 + ipv4.protocol = nxthdr; 106 + rdma_gid2ip(&sgid_addr._sockaddr, sgid); 107 + ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr; 108 + rdma_gid2ip(&dgid_addr._sockaddr, &attr->grh.dgid); 109 + ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr; 110 + memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr)); 111 + } else { 112 + memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid)); 113 + grh.tclass_flow = cpu_to_be32((6 << 28) | 114 + (attr->grh.traffic_class << 24) | 115 + attr->grh.flow_label); 116 + memcpy(&grh.dgid[0], attr->grh.dgid.raw, 117 + sizeof(attr->grh.dgid.raw)); 118 + grh.pdid_hoplimit = cpu_to_be32((pdid << 16) | 119 + (nxthdr << 8) | 120 + attr->grh.hop_limit); 121 + memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh)); 122 + } 136 123 if (*isvlan) 137 124 ah->av->valid |= OCRDMA_AV_VLAN_VALID; 138 125 ah->av->valid = cpu_to_le32(ah->av->valid); ··· 171 128 172 129 if (atomic_cmpxchg(&dev->update_sl, 1, 0)) 173 130 ocrdma_init_service_level(dev); 131 + 174 132 ah = kzalloc(sizeof(*ah), GFP_ATOMIC); 175 133 if (!ah) 176 134 return ERR_PTR(-ENOMEM); ··· 192 148 vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); 193 149 dev_put(sgid_attr.ndev); 194 150 } 151 + /* Get network header type for this GID */ 152 + ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); 195 153 196 154 if ((pd->uctx) && 197 155 (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) && ··· 218 172 ahid_addr = pd->uctx->ah_tbl.va + attr->dlid; 219 173 *ahid_addr = 0; 220 174 *ahid_addr |= ah->id & OCRDMA_AH_ID_MASK; 175 + if (ocrdma_is_udp_encap_supported(dev)) { 176 + *ahid_addr |= ((u32)ah->hdr_type & 177 + OCRDMA_AH_L3_TYPE_MASK) << 178 + OCRDMA_AH_L3_TYPE_SHIFT; 179 + } 221 180 if (isvlan) 222 181 *ahid_addr |= (OCRDMA_AH_VLAN_VALID_MASK << 223 182 OCRDMA_AH_VLAN_VALID_SHIFT);
+3 -2
drivers/infiniband/hw/ocrdma/ocrdma_ah.h
··· 46 46 enum { 47 47 OCRDMA_AH_ID_MASK = 0x3FF, 48 48 OCRDMA_AH_VLAN_VALID_MASK = 0x01, 49 - OCRDMA_AH_VLAN_VALID_SHIFT = 0x1F 49 + OCRDMA_AH_VLAN_VALID_SHIFT = 0x1F, 50 + OCRDMA_AH_L3_TYPE_MASK = 0x03, 51 + OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ 50 52 }; 51 - 52 53 struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *); 53 54 int ocrdma_destroy_ah(struct ib_ah *); 54 55 int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
+27 -6
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
··· 1113 1113 static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe, 1114 1114 void *payload_va) 1115 1115 { 1116 - int status = 0; 1116 + int status; 1117 1117 struct ocrdma_mbx_rsp *rsp = payload_va; 1118 1118 1119 1119 if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> ··· 1144 1144 attr->max_pd = 1145 1145 (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >> 1146 1146 OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT; 1147 + attr->udp_encap = (rsp->max_pd_ca_ack_delay & 1148 + OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK) >> 1149 + OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT; 1147 1150 attr->max_dpp_pds = 1148 1151 (rsp->max_dpp_pds_credits & OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK) >> 1149 1152 OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET; ··· 2141 2138 enum ib_qp_state *old_ib_state) 2142 2139 { 2143 2140 unsigned long flags; 2144 - int status = 0; 2145 2141 enum ocrdma_qp_state new_state; 2146 2142 new_state = get_ocrdma_qp_state(new_ib_state); 2147 2143 ··· 2165 2163 qp->state = new_state; 2166 2164 2167 2165 spin_unlock_irqrestore(&qp->q_lock, flags); 2168 - return status; 2166 + return 0; 2169 2167 } 2170 2168 2171 2169 static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp) ··· 2503 2501 union ib_gid sgid, zgid; 2504 2502 struct ib_gid_attr sgid_attr; 2505 2503 u32 vlan_id = 0xFFFF; 2506 - u8 mac_addr[6]; 2504 + u8 mac_addr[6], hdr_type; 2505 + union { 2506 + struct sockaddr _sockaddr; 2507 + struct sockaddr_in _sockaddr_in; 2508 + struct sockaddr_in6 _sockaddr_in6; 2509 + } sgid_addr, dgid_addr; 2507 2510 struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); 2508 2511 2509 2512 if ((ah_attr->ah_flags & IB_AH_GRH) == 0) ··· 2523 2516 cmd->params.hop_lmt_rq_psn |= 2524 2517 (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT); 2525 2518 cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID; 2519 + 2520 + /* GIDs */ 2526 2521 memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], 2527 2522 sizeof(cmd->params.dgid)); 2528 2523 ··· 2547 2538 return status; 2548 2539 cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) | 2549 2540 (mac_addr[2] << 16) | (mac_addr[3] << 24); 2541 + 2542 + hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); 2543 + if (hdr_type == RDMA_NETWORK_IPV4) { 2544 + rdma_gid2ip(&sgid_addr._sockaddr, &sgid); 2545 + rdma_gid2ip(&dgid_addr._sockaddr, &ah_attr->grh.dgid); 2546 + memcpy(&cmd->params.dgid[0], 2547 + &dgid_addr._sockaddr_in.sin_addr.s_addr, 4); 2548 + memcpy(&cmd->params.sgid[0], 2549 + &sgid_addr._sockaddr_in.sin_addr.s_addr, 4); 2550 + } 2550 2551 /* convert them to LE format. */ 2551 2552 ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); 2552 2553 ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); ··· 2577 2558 cmd->params.rnt_rc_sl_fl |= 2578 2559 (dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT; 2579 2560 } 2580 - 2561 + cmd->params.max_sge_recv_flags |= ((hdr_type << 2562 + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT) & 2563 + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK); 2581 2564 return 0; 2582 2565 } 2583 2566 ··· 2892 2871 static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype, 2893 2872 struct ocrdma_dcbx_cfg *dcbxcfg) 2894 2873 { 2895 - int status = 0; 2874 + int status; 2896 2875 dma_addr_t pa; 2897 2876 struct ocrdma_mqe cmd; 2898 2877
+4
drivers/infiniband/hw/ocrdma/ocrdma_main.c
··· 89 89 struct ib_port_immutable *immutable) 90 90 { 91 91 struct ib_port_attr attr; 92 + struct ocrdma_dev *dev; 92 93 int err; 93 94 95 + dev = get_ocrdma_dev(ibdev); 94 96 err = ocrdma_query_port(ibdev, port_num, &attr); 95 97 if (err) 96 98 return err; ··· 100 98 immutable->pkey_tbl_len = attr.pkey_tbl_len; 101 99 immutable->gid_tbl_len = attr.gid_tbl_len; 102 100 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; 101 + if (ocrdma_is_udp_encap_supported(dev)) 102 + immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; 103 103 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 104 104 105 105 return 0;
+13 -3
drivers/infiniband/hw/ocrdma/ocrdma_sli.h
··· 140 140 OCRDMA_DB_RQ_SHIFT = 24 141 141 }; 142 142 143 - #define OCRDMA_ROUDP_FLAGS_SHIFT 0x03 143 + enum { 144 + OCRDMA_L3_TYPE_IB_GRH = 0x00, 145 + OCRDMA_L3_TYPE_IPV4 = 0x01, 146 + OCRDMA_L3_TYPE_IPV6 = 0x02 147 + }; 144 148 145 149 #define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */ 146 150 #define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */ ··· 550 546 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT = 8, 551 547 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK = 0xFF << 552 548 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT, 553 - 549 + OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT = 3, 550 + OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK = 0x18, 554 551 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0, 555 552 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF, 556 553 OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16, ··· 1112 1107 OCRDMA_QP_PARAMS_STATE_MASK = BIT(5) | BIT(6) | BIT(7), 1113 1108 OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = BIT(8), 1114 1109 OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = BIT(9), 1110 + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT = 11, 1111 + OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK = BIT(11) | BIT(12) | BIT(13), 1115 1112 OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16, 1116 1113 OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF << 1117 1114 OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT, ··· 1742 1735 1743 1736 /* w1 */ 1744 1737 OCRDMA_CQE_UD_XFER_LEN_SHIFT = 16, 1738 + OCRDMA_CQE_UD_XFER_LEN_MASK = 0x1FFF, 1745 1739 OCRDMA_CQE_PKEY_SHIFT = 0, 1746 1740 OCRDMA_CQE_PKEY_MASK = 0xFFFF, 1741 + OCRDMA_CQE_UD_L3TYPE_SHIFT = 29, 1742 + OCRDMA_CQE_UD_L3TYPE_MASK = 0x07, 1747 1743 1748 1744 /* w2 */ 1749 1745 OCRDMA_CQE_QPN_SHIFT = 0, ··· 1871 1861 u32 rsvd_dest_qpn; 1872 1862 u32 qkey; 1873 1863 u32 rsvd_ahid; 1874 - u32 rsvd; 1864 + u32 hdr_type; 1875 1865 }; 1876 1866 1877 1867 /* extended wqe followed by hdr_wqe for Fast Memory register */
+2 -2
drivers/infiniband/hw/ocrdma/ocrdma_stats.c
··· 610 610 static void ocrdma_update_stats(struct ocrdma_dev *dev) 611 611 { 612 612 ulong now = jiffies, secs; 613 - int status = 0; 613 + int status; 614 614 struct ocrdma_rdma_stats_resp *rdma_stats = 615 615 (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; 616 616 struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats; ··· 641 641 { 642 642 char tmp_str[32]; 643 643 long reset; 644 - int status = 0; 644 + int status; 645 645 struct ocrdma_stats *pstats = filp->private_data; 646 646 struct ocrdma_dev *dev = pstats->dev; 647 647
+26 -12
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
··· 419 419 struct ib_udata *udata) 420 420 { 421 421 struct ocrdma_pd *pd = NULL; 422 - int status = 0; 422 + int status; 423 423 424 424 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 425 425 if (!pd) ··· 468 468 static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev, 469 469 struct ocrdma_pd *pd) 470 470 { 471 - int status = 0; 471 + int status; 472 472 473 473 if (dev->pd_mgr->pd_prealloc_valid) 474 474 status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled); ··· 596 596 597 597 int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) 598 598 { 599 - int status = 0; 599 + int status; 600 600 struct ocrdma_mm *mm, *tmp; 601 601 struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); 602 602 struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); ··· 623 623 unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; 624 624 u64 unmapped_db = (u64) dev->nic_info.unmapped_db; 625 625 unsigned long len = (vma->vm_end - vma->vm_start); 626 - int status = 0; 626 + int status; 627 627 bool found; 628 628 629 629 if (vma->vm_start & (PAGE_SIZE - 1)) ··· 1285 1285 struct ib_udata *udata, int dpp_offset, 1286 1286 int dpp_credit_lmt, int srq) 1287 1287 { 1288 - int status = 0; 1288 + int status; 1289 1289 u64 usr_db; 1290 1290 struct ocrdma_create_qp_uresp uresp; 1291 1291 struct ocrdma_pd *pd = qp->pd; ··· 1494 1494 */ 1495 1495 if (status < 0) 1496 1496 return status; 1497 - status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); 1498 - 1499 - return status; 1497 + return ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); 1500 1498 } 1501 1499 1502 1500 int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ··· 1947 1949 enum ib_srq_attr_mask srq_attr_mask, 1948 1950 struct ib_udata *udata) 1949 1951 { 1950 - int status = 0; 1952 + int status; 1951 1953 struct ocrdma_srq *srq; 1952 1954 1953 1955 srq = get_ocrdma_srq(ibsrq); ··· 2003 2005 else 2004 2006 ud_hdr->qkey = ud_wr(wr)->remote_qkey; 2005 2007 ud_hdr->rsvd_ahid = ah->id; 2008 + ud_hdr->hdr_type = ah->hdr_type; 2006 2009 if (ah->av->valid & OCRDMA_AV_VLAN_VALID) 2007 2010 hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT); 2008 2011 } ··· 2716 2717 return expand; 2717 2718 } 2718 2719 2719 - static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe) 2720 + static int ocrdma_update_ud_rcqe(struct ocrdma_dev *dev, struct ib_wc *ibwc, 2721 + struct ocrdma_cqe *cqe) 2720 2722 { 2721 2723 int status; 2724 + u16 hdr_type = 0; 2722 2725 2723 2726 status = (le32_to_cpu(cqe->flags_status_srcqpn) & 2724 2727 OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT; ··· 2729 2728 ibwc->pkey_index = 0; 2730 2729 ibwc->wc_flags = IB_WC_GRH; 2731 2730 ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >> 2732 - OCRDMA_CQE_UD_XFER_LEN_SHIFT); 2731 + OCRDMA_CQE_UD_XFER_LEN_SHIFT) & 2732 + OCRDMA_CQE_UD_XFER_LEN_MASK; 2733 + 2734 + if (ocrdma_is_udp_encap_supported(dev)) { 2735 + hdr_type = (le32_to_cpu(cqe->ud.rxlen_pkey) >> 2736 + OCRDMA_CQE_UD_L3TYPE_SHIFT) & 2737 + OCRDMA_CQE_UD_L3TYPE_MASK; 2738 + ibwc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; 2739 + ibwc->network_hdr_type = hdr_type; 2740 + } 2741 + 2733 2742 return status; 2734 2743 } 2735 2744 ··· 2802 2791 static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp, 2803 2792 struct ocrdma_cqe *cqe, struct ib_wc *ibwc) 2804 2793 { 2794 + struct ocrdma_dev *dev; 2795 + 2796 + dev = get_ocrdma_dev(qp->ibqp.device); 2805 2797 ibwc->opcode = IB_WC_RECV; 2806 2798 ibwc->qp = &qp->ibqp; 2807 2799 ibwc->status = IB_WC_SUCCESS; 2808 2800 2809 2801 if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) 2810 - ocrdma_update_ud_rcqe(ibwc, cqe); 2802 + ocrdma_update_ud_rcqe(dev, ibwc, cqe); 2811 2803 else 2812 2804 ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen); 2813 2805
+10 -1
drivers/infiniband/ulp/iser/iscsi_iser.c
··· 969 969 970 970 static int iscsi_iser_slave_alloc(struct scsi_device *sdev) 971 971 { 972 - blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); 972 + struct iscsi_session *session; 973 + struct iser_conn *iser_conn; 974 + struct ib_device *ib_dev; 975 + 976 + session = starget_to_session(scsi_target(sdev))->dd_data; 977 + iser_conn = session->leadconn->dd_data; 978 + ib_dev = iser_conn->ib_conn.device->ib_device; 979 + 980 + if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) 981 + blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); 973 982 974 983 return 0; 975 984 }
+15 -8
drivers/infiniband/ulp/iser/iser_verbs.c
··· 252 252 } 253 253 254 254 static int 255 - iser_alloc_reg_res(struct ib_device *ib_device, 255 + iser_alloc_reg_res(struct iser_device *device, 256 256 struct ib_pd *pd, 257 257 struct iser_reg_resources *res, 258 258 unsigned int size) 259 259 { 260 + struct ib_device *ib_dev = device->ib_device; 261 + enum ib_mr_type mr_type; 260 262 int ret; 261 263 262 - res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, size); 264 + if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) 265 + mr_type = IB_MR_TYPE_SG_GAPS; 266 + else 267 + mr_type = IB_MR_TYPE_MEM_REG; 268 + 269 + res->mr = ib_alloc_mr(pd, mr_type, size); 263 270 if (IS_ERR(res->mr)) { 264 271 ret = PTR_ERR(res->mr); 265 272 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); ··· 284 277 } 285 278 286 279 static int 287 - iser_alloc_pi_ctx(struct ib_device *ib_device, 280 + iser_alloc_pi_ctx(struct iser_device *device, 288 281 struct ib_pd *pd, 289 282 struct iser_fr_desc *desc, 290 283 unsigned int size) ··· 298 291 299 292 pi_ctx = desc->pi_ctx; 300 293 301 - ret = iser_alloc_reg_res(ib_device, pd, &pi_ctx->rsc, size); 294 + ret = iser_alloc_reg_res(device, pd, &pi_ctx->rsc, size); 302 295 if (ret) { 303 296 iser_err("failed to allocate reg_resources\n"); 304 297 goto alloc_reg_res_err; ··· 331 324 } 332 325 333 326 static struct iser_fr_desc * 334 - iser_create_fastreg_desc(struct ib_device *ib_device, 327 + iser_create_fastreg_desc(struct iser_device *device, 335 328 struct ib_pd *pd, 336 329 bool pi_enable, 337 330 unsigned int size) ··· 343 336 if (!desc) 344 337 return ERR_PTR(-ENOMEM); 345 338 346 - ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc, size); 339 + ret = iser_alloc_reg_res(device, pd, &desc->rsc, size); 347 340 if (ret) 348 341 goto reg_res_alloc_failure; 349 342 350 343 if (pi_enable) { 351 - ret = iser_alloc_pi_ctx(ib_device, pd, desc, size); 344 + ret = iser_alloc_pi_ctx(device, pd, desc, size); 352 345 if (ret) 353 346 goto pi_ctx_alloc_failure; 354 347 } ··· 381 374 spin_lock_init(&fr_pool->lock); 382 375 fr_pool->size = 0; 383 376 for (i = 0; i < cmds_max; i++) { 384 - desc = iser_create_fastreg_desc(device->ib_device, device->pd, 377 + desc = iser_create_fastreg_desc(device, device->pd, 385 378 ib_conn->pi_support, size); 386 379 if (IS_ERR(desc)) { 387 380 ret = PTR_ERR(desc);
+4 -1
drivers/net/ethernet/mellanox/mlx4/fw.c
··· 157 157 [29] = "802.1ad offload support", 158 158 [31] = "Modifying loopback source checks using UPDATE_QP support", 159 159 [32] = "Loopback source checks support", 160 - [33] = "RoCEv2 support" 160 + [33] = "RoCEv2 support", 161 + [34] = "DMFS Sniffer support (UC & MC)" 161 162 }; 162 163 int i; 163 164 ··· 811 810 if (field & 0x80) 812 811 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; 813 812 dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; 813 + if (field & 0x20) 814 + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER; 814 815 MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET); 815 816 if (field & 0x80) 816 817 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON;
+4 -2
drivers/net/ethernet/mellanox/mlx4/mcg.c
··· 752 752 [MLX4_FS_REGULAR] = 0x0, 753 753 [MLX4_FS_ALL_DEFAULT] = 0x1, 754 754 [MLX4_FS_MC_DEFAULT] = 0x3, 755 - [MLX4_FS_UC_SNIFFER] = 0x4, 756 - [MLX4_FS_MC_SNIFFER] = 0x5, 755 + [MLX4_FS_MIRROR_RX_PORT] = 0x4, 756 + [MLX4_FS_MIRROR_SX_PORT] = 0x5, 757 + [MLX4_FS_UC_SNIFFER] = 0x6, 758 + [MLX4_FS_MC_SNIFFER] = 0x7, 757 759 }; 758 760 759 761 int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev,
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en.h
··· 515 515 struct mlx5_uar cq_uar; 516 516 u32 pdn; 517 517 u32 tdn; 518 - struct mlx5_core_mr mr; 518 + struct mlx5_core_mkey mkey; 519 519 struct mlx5e_rq drop_rq; 520 520 521 521 struct mlx5e_channel **channel;
+6 -6
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 982 982 c->cpu = cpu; 983 983 c->pdev = &priv->mdev->pdev->dev; 984 984 c->netdev = priv->netdev; 985 - c->mkey_be = cpu_to_be32(priv->mr.key); 985 + c->mkey_be = cpu_to_be32(priv->mkey.key); 986 986 c->num_tc = priv->params.num_tc; 987 987 988 988 mlx5e_build_channeltc_to_txq_map(priv, ix); ··· 2194 2194 } 2195 2195 2196 2196 static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, 2197 - struct mlx5_core_mr *mr) 2197 + struct mlx5_core_mkey *mkey) 2198 2198 { 2199 2199 struct mlx5_core_dev *mdev = priv->mdev; 2200 2200 struct mlx5_create_mkey_mbox_in *in; ··· 2210 2210 in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); 2211 2211 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 2212 2212 2213 - err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL, 2213 + err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, 2214 2214 NULL); 2215 2215 2216 2216 kvfree(in); ··· 2259 2259 goto err_dealloc_pd; 2260 2260 } 2261 2261 2262 - err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); 2262 + err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey); 2263 2263 if (err) { 2264 2264 mlx5_core_err(mdev, "create mkey failed, %d\n", err); 2265 2265 goto err_dealloc_transport_domain; ··· 2333 2333 mlx5e_destroy_tises(priv); 2334 2334 2335 2335 err_destroy_mkey: 2336 - mlx5_core_destroy_mkey(mdev, &priv->mr); 2336 + mlx5_core_destroy_mkey(mdev, &priv->mkey); 2337 2337 2338 2338 err_dealloc_transport_domain: 2339 2339 mlx5_core_dealloc_transport_domain(mdev, priv->tdn); ··· 2367 2367 mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); 2368 2368 mlx5e_close_drop_rq(priv); 2369 2369 mlx5e_destroy_tises(priv); 2370 - mlx5_core_destroy_mkey(priv->mdev, &priv->mr); 2370 + mlx5_core_destroy_mkey(priv->mdev, &priv->mkey); 2371 2371 mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); 2372 2372 mlx5_core_dealloc_pd(priv->mdev, priv->pdn); 2373 2373 mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
+212 -13
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
··· 77 77 #define KERNEL_NUM_PRIOS 1 78 78 #define KENREL_MIN_LEVEL 2 79 79 80 + #define ANCHOR_MAX_FT 1 81 + #define ANCHOR_NUM_PRIOS 1 82 + #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) 80 83 struct node_caps { 81 84 size_t arr_sz; 82 85 long *caps; ··· 95 92 int max_ft; 96 93 } root_fs = { 97 94 .type = FS_TYPE_NAMESPACE, 98 - .ar_size = 3, 95 + .ar_size = 4, 99 96 .children = (struct init_tree_node[]) { 100 97 ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, 101 98 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), ··· 111 108 FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), 112 109 FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), 113 110 ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))), 111 + ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, 112 + ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_MAX_FT))), 114 113 } 115 114 }; 116 115 ··· 201 196 202 197 static int tree_remove_node(struct fs_node *node) 203 198 { 204 - if (atomic_read(&node->refcount) > 1) 205 - return -EPERM; 199 + if (atomic_read(&node->refcount) > 1) { 200 + atomic_dec(&node->refcount); 201 + return -EEXIST; 202 + } 206 203 tree_put_node(node); 207 204 return 0; 208 205 } ··· 367 360 memcpy(match_value, fte->val, sizeof(fte->val)); 368 361 fs_get_obj(ft, fg->node.parent); 369 362 list_del(&rule->node.list); 363 + if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { 364 + mutex_lock(&rule->dest_attr.ft->lock); 365 + list_del(&rule->next_ft); 366 + mutex_unlock(&rule->dest_attr.ft->lock); 367 + } 370 368 fte->dests_size--; 371 369 if (fte->dests_size) { 372 370 err = mlx5_cmd_update_fte(dev, ft, ··· 477 465 ft->node.type = FS_TYPE_FLOW_TABLE; 478 466 ft->type = table_type; 479 467 ft->max_fte = max_fte; 468 + INIT_LIST_HEAD(&ft->fwd_rules); 469 + mutex_init(&ft->lock); 480 470 481 471 return ft; 482 472 } ··· 615 601 return err; 616 602 } 617 603 604 + static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, 605 + struct mlx5_flow_destination *dest) 606 + { 607 + struct mlx5_flow_table *ft; 608 + struct mlx5_flow_group *fg; 609 + struct fs_fte *fte; 610 + int err = 0; 611 + 612 + fs_get_obj(fte, rule->node.parent); 613 + if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) 614 + return -EINVAL; 615 + lock_ref_node(&fte->node); 616 + fs_get_obj(fg, fte->node.parent); 617 + fs_get_obj(ft, fg->node.parent); 618 + 619 + memcpy(&rule->dest_attr, dest, sizeof(*dest)); 620 + err = mlx5_cmd_update_fte(get_dev(&ft->node), 621 + ft, fg->id, fte); 622 + unlock_ref_node(&fte->node); 623 + 624 + return err; 625 + } 626 + 627 + /* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */ 628 + static int connect_fwd_rules(struct mlx5_core_dev *dev, 629 + struct mlx5_flow_table *new_next_ft, 630 + struct mlx5_flow_table *old_next_ft) 631 + { 632 + struct mlx5_flow_destination dest; 633 + struct mlx5_flow_rule *iter; 634 + int err = 0; 635 + 636 + /* new_next_ft and old_next_ft could be NULL only 637 + * when we create/destroy the anchor flow table. 638 + */ 639 + if (!new_next_ft || !old_next_ft) 640 + return 0; 641 + 642 + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 643 + dest.ft = new_next_ft; 644 + 645 + mutex_lock(&old_next_ft->lock); 646 + list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules); 647 + mutex_unlock(&old_next_ft->lock); 648 + list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) { 649 + err = mlx5_modify_rule_destination(iter, &dest); 650 + if (err) 651 + pr_err("mlx5_core: failed to modify rule to point on flow table %d\n", 652 + new_next_ft->id); 653 + } 654 + return 0; 655 + } 656 + 618 657 static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, 619 658 struct fs_prio *prio) 620 659 { 660 + struct mlx5_flow_table *next_ft; 621 661 int err = 0; 622 662 623 663 /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ 624 664 625 665 if (list_empty(&prio->node.children)) { 626 666 err = connect_prev_fts(dev, ft, prio); 667 + if (err) 668 + return err; 669 + 670 + next_ft = find_next_chained_ft(prio); 671 + err = connect_fwd_rules(dev, ft, next_ft); 627 672 if (err) 628 673 return err; 629 674 } ··· 835 762 if (!rule) 836 763 return NULL; 837 764 765 + INIT_LIST_HEAD(&rule->next_ft); 838 766 rule->node.type = FS_TYPE_FLOW_DEST; 839 767 memcpy(&rule->dest_attr, dest, sizeof(*dest)); 840 768 ··· 856 782 return ERR_PTR(-ENOMEM); 857 783 858 784 fs_get_obj(ft, fg->node.parent); 859 - /* Add dest to dests list- added as first element after the head */ 785 + /* Add dest to dests list- we need flow tables to be in the 786 + * end of the list for forward to next prio rules. 787 + */ 860 788 tree_init_node(&rule->node, 1, del_rule); 861 - list_add_tail(&rule->node.list, &fte->node.children); 789 + if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) 790 + list_add(&rule->node.list, &fte->node.children); 791 + else 792 + list_add_tail(&rule->node.list, &fte->node.children); 862 793 fte->dests_size++; 863 794 if (fte->dests_size == 1) 864 795 err = mlx5_cmd_create_fte(get_dev(&ft->node), ··· 982 903 return fg; 983 904 } 984 905 906 + static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, 907 + struct mlx5_flow_destination *dest) 908 + { 909 + struct mlx5_flow_rule *rule; 910 + 911 + list_for_each_entry(rule, &fte->node.children, node.list) { 912 + if (rule->dest_attr.type == dest->type) { 913 + if ((dest->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && 914 + dest->vport_num == rule->dest_attr.vport_num) || 915 + (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && 916 + dest->ft == rule->dest_attr.ft) || 917 + (dest->type == MLX5_FLOW_DESTINATION_TYPE_TIR && 918 + dest->tir_num == rule->dest_attr.tir_num)) 919 + return rule; 920 + } 921 + } 922 + return NULL; 923 + } 924 + 985 925 static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, 986 926 u32 *match_value, 987 927 u8 action, ··· 1017 919 nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); 1018 920 if (compare_match_value(&fg->mask, match_value, &fte->val) && 1019 921 action == fte->action && flow_tag == fte->flow_tag) { 922 + rule = find_flow_rule(fte, dest); 923 + if (rule) { 924 + atomic_inc(&rule->node.refcount); 925 + unlock_ref_node(&fte->node); 926 + unlock_ref_node(&fg->node); 927 + return rule; 928 + } 1020 929 rule = add_rule_fte(fte, fg, dest); 1021 930 unlock_ref_node(&fte->node); 1022 931 if (IS_ERR(rule)) ··· 1089 984 return rule; 1090 985 } 1091 986 1092 - struct mlx5_flow_rule * 1093 - mlx5_add_flow_rule(struct mlx5_flow_table *ft, 1094 - u8 match_criteria_enable, 1095 - u32 *match_criteria, 1096 - u32 *match_value, 1097 - u32 action, 1098 - u32 flow_tag, 1099 - struct mlx5_flow_destination *dest) 987 + static struct mlx5_flow_rule * 988 + _mlx5_add_flow_rule(struct mlx5_flow_table *ft, 989 + u8 match_criteria_enable, 990 + u32 *match_criteria, 991 + u32 *match_value, 992 + u32 action, 993 + u32 flow_tag, 994 + struct mlx5_flow_destination *dest) 1100 995 { 1101 996 struct mlx5_flow_group *g; 1102 997 struct mlx5_flow_rule *rule; ··· 1117 1012 match_value, action, flow_tag, dest); 1118 1013 unlock: 1119 1014 unlock_ref_node(&ft->node); 1015 + return rule; 1016 + } 1017 + 1018 + static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) 1019 + { 1020 + return ((ft->type == FS_FT_NIC_RX) && 1021 + (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs))); 1022 + } 1023 + 1024 + struct mlx5_flow_rule * 1025 + mlx5_add_flow_rule(struct mlx5_flow_table *ft, 1026 + u8 match_criteria_enable, 1027 + u32 *match_criteria, 1028 + u32 *match_value, 1029 + u32 action, 1030 + u32 flow_tag, 1031 + struct mlx5_flow_destination *dest) 1032 + { 1033 + struct mlx5_flow_root_namespace *root = find_root(&ft->node); 1034 + struct mlx5_flow_destination gen_dest; 1035 + struct mlx5_flow_table *next_ft = NULL; 1036 + struct mlx5_flow_rule *rule = NULL; 1037 + u32 sw_action = action; 1038 + struct fs_prio *prio; 1039 + 1040 + fs_get_obj(prio, ft->node.parent); 1041 + if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { 1042 + if (!fwd_next_prio_supported(ft)) 1043 + return ERR_PTR(-EOPNOTSUPP); 1044 + if (dest) 1045 + return ERR_PTR(-EINVAL); 1046 + mutex_lock(&root->chain_lock); 1047 + next_ft = find_next_chained_ft(prio); 1048 + if (next_ft) { 1049 + gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; 1050 + gen_dest.ft = next_ft; 1051 + dest = &gen_dest; 1052 + action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1053 + } else { 1054 + mutex_unlock(&root->chain_lock); 1055 + return ERR_PTR(-EOPNOTSUPP); 1056 + } 1057 + } 1058 + 1059 + rule = _mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, 1060 + match_value, action, flow_tag, dest); 1061 + 1062 + if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { 1063 + if (!IS_ERR_OR_NULL(rule) && 1064 + (list_empty(&rule->next_ft))) { 1065 + mutex_lock(&next_ft->lock); 1066 + list_add(&rule->next_ft, &next_ft->fwd_rules); 1067 + mutex_unlock(&next_ft->lock); 1068 + rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; 1069 + } 1070 + mutex_unlock(&root->chain_lock); 1071 + } 1120 1072 return rule; 1121 1073 } 1122 1074 EXPORT_SYMBOL(mlx5_add_flow_rule); ··· 1239 1077 return 0; 1240 1078 1241 1079 next_ft = find_next_chained_ft(prio); 1080 + err = connect_fwd_rules(dev, next_ft, ft); 1081 + if (err) 1082 + return err; 1083 + 1242 1084 err = connect_prev_fts(dev, next_ft, prio); 1243 1085 if (err) 1244 1086 mlx5_core_warn(dev, "Failed to disconnect flow table %d\n", ··· 1292 1126 case MLX5_FLOW_NAMESPACE_BYPASS: 1293 1127 case MLX5_FLOW_NAMESPACE_KERNEL: 1294 1128 case MLX5_FLOW_NAMESPACE_LEFTOVERS: 1129 + case MLX5_FLOW_NAMESPACE_ANCHOR: 1295 1130 prio = type; 1296 1131 break; 1297 1132 case MLX5_FLOW_NAMESPACE_FDB: ··· 1518 1351 } 1519 1352 } 1520 1353 1354 + #define ANCHOR_PRIO 0 1355 + #define ANCHOR_SIZE 1 1356 + static int create_anchor_flow_table(struct mlx5_core_dev 1357 + *dev) 1358 + { 1359 + struct mlx5_flow_namespace *ns = NULL; 1360 + struct mlx5_flow_table *ft; 1361 + 1362 + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR); 1363 + if (!ns) 1364 + return -EINVAL; 1365 + ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE); 1366 + if (IS_ERR(ft)) { 1367 + mlx5_core_err(dev, "Failed to create last anchor flow table"); 1368 + return PTR_ERR(ft); 1369 + } 1370 + return 0; 1371 + } 1372 + 1521 1373 static int init_root_ns(struct mlx5_core_dev *dev) 1522 1374 { 1523 1375 ··· 1548 1362 goto cleanup; 1549 1363 1550 1364 set_prio_attrs(dev->priv.root_ns); 1365 + 1366 + if (create_anchor_flow_table(dev)) 1367 + goto cleanup; 1551 1368 1552 1369 return 0; 1553 1370 ··· 1581 1392 root_ns = NULL; 1582 1393 } 1583 1394 1395 + static void destroy_flow_tables(struct fs_prio *prio) 1396 + { 1397 + struct mlx5_flow_table *iter; 1398 + struct mlx5_flow_table *tmp; 1399 + 1400 + fs_for_each_ft_safe(iter, tmp, prio) 1401 + mlx5_destroy_flow_table(iter); 1402 + } 1403 + 1584 1404 static void cleanup_root_ns(struct mlx5_core_dev *dev) 1585 1405 { 1586 1406 struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; ··· 1618 1420 list); 1619 1421 1620 1422 fs_get_obj(obj_iter_prio2, iter_prio2); 1423 + destroy_flow_tables(obj_iter_prio2); 1621 1424 if (tree_remove_node(iter_prio2)) { 1622 1425 mlx5_core_warn(dev, 1623 1426 "Priority %d wasn't destroyed, refcount > 1\n",
+15
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
··· 68 68 struct mlx5_flow_rule { 69 69 struct fs_node node; 70 70 struct mlx5_flow_destination dest_attr; 71 + /* next_ft should be accessed under chain_lock and only of 72 + * destination type is FWD_NEXT_fT. 73 + */ 74 + struct list_head next_ft; 75 + u32 sw_action; 71 76 }; 72 77 73 78 /* Type of children is mlx5_flow_group */ ··· 87 82 unsigned int required_groups; 88 83 unsigned int num_groups; 89 84 } autogroup; 85 + /* Protect fwd_rules */ 86 + struct mutex lock; 87 + /* FWD rules that point on this flow table */ 88 + struct list_head fwd_rules; 90 89 }; 91 90 92 91 /* Type of children is mlx5_flow_rule */ ··· 151 142 #define fs_list_for_each_entry(pos, root) \ 152 143 list_for_each_entry(pos, root, node.list) 153 144 145 + #define fs_list_for_each_entry_safe(pos, tmp, root) \ 146 + list_for_each_entry_safe(pos, tmp, root, node.list) 147 + 154 148 #define fs_for_each_ns_or_ft_reverse(pos, prio) \ 155 149 list_for_each_entry_reverse(pos, &(prio)->node.children, list) 156 150 ··· 168 156 169 157 #define fs_for_each_ft(pos, prio) \ 170 158 fs_list_for_each_entry(pos, &(prio)->node.children) 159 + 160 + #define fs_for_each_ft_safe(pos, tmp, prio) \ 161 + fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children) 171 162 172 163 #define fs_for_each_fg(pos, ft) \ 173 164 fs_list_for_each_entry(pos, &(ft)->node.children)
+3 -3
drivers/net/ethernet/mellanox/mlx5/core/main.c
··· 1117 1117 mlx5_init_cq_table(dev); 1118 1118 mlx5_init_qp_table(dev); 1119 1119 mlx5_init_srq_table(dev); 1120 - mlx5_init_mr_table(dev); 1120 + mlx5_init_mkey_table(dev); 1121 1121 1122 1122 err = mlx5_init_fs(dev); 1123 1123 if (err) { ··· 1164 1164 err_reg_dev: 1165 1165 mlx5_cleanup_fs(dev); 1166 1166 err_fs: 1167 - mlx5_cleanup_mr_table(dev); 1167 + mlx5_cleanup_mkey_table(dev); 1168 1168 mlx5_cleanup_srq_table(dev); 1169 1169 mlx5_cleanup_qp_table(dev); 1170 1170 mlx5_cleanup_cq_table(dev); ··· 1237 1237 #endif 1238 1238 1239 1239 mlx5_cleanup_fs(dev); 1240 - mlx5_cleanup_mr_table(dev); 1240 + mlx5_cleanup_mkey_table(dev); 1241 1241 mlx5_cleanup_srq_table(dev); 1242 1242 mlx5_cleanup_qp_table(dev); 1243 1243 mlx5_cleanup_cq_table(dev);
+28 -26
drivers/net/ethernet/mellanox/mlx5/core/mr.c
··· 36 36 #include <linux/mlx5/cmd.h> 37 37 #include "mlx5_core.h" 38 38 39 - void mlx5_init_mr_table(struct mlx5_core_dev *dev) 39 + void mlx5_init_mkey_table(struct mlx5_core_dev *dev) 40 40 { 41 - struct mlx5_mr_table *table = &dev->priv.mr_table; 41 + struct mlx5_mkey_table *table = &dev->priv.mkey_table; 42 42 43 43 memset(table, 0, sizeof(*table)); 44 44 rwlock_init(&table->lock); 45 45 INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); 46 46 } 47 47 48 - void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev) 48 + void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) 49 49 { 50 50 } 51 51 52 - int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 52 + int mlx5_core_create_mkey(struct mlx5_core_dev *dev, 53 + struct mlx5_core_mkey *mkey, 53 54 struct mlx5_create_mkey_mbox_in *in, int inlen, 54 55 mlx5_cmd_cbk_t callback, void *context, 55 56 struct mlx5_create_mkey_mbox_out *out) 56 57 { 57 - struct mlx5_mr_table *table = &dev->priv.mr_table; 58 + struct mlx5_mkey_table *table = &dev->priv.mkey_table; 58 59 struct mlx5_create_mkey_mbox_out lout; 59 60 int err; 60 61 u8 key; ··· 84 83 return mlx5_cmd_status_to_err(&lout.hdr); 85 84 } 86 85 87 - mr->iova = be64_to_cpu(in->seg.start_addr); 88 - mr->size = be64_to_cpu(in->seg.len); 89 - mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; 90 - mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; 86 + mkey->iova = be64_to_cpu(in->seg.start_addr); 87 + mkey->size = be64_to_cpu(in->seg.len); 88 + mkey->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; 89 + mkey->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; 91 90 92 91 mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", 93 - be32_to_cpu(lout.mkey), key, mr->key); 92 + be32_to_cpu(lout.mkey), key, mkey->key); 94 93 95 - /* connect to MR tree */ 94 + /* connect to mkey tree */ 96 95 write_lock_irq(&table->lock); 97 - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr); 96 + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey); 98 97 write_unlock_irq(&table->lock); 99 98 if (err) { 100 - mlx5_core_warn(dev, "failed radix tree insert of mr 0x%x, %d\n", 101 - mlx5_base_mkey(mr->key), err); 102 - mlx5_core_destroy_mkey(dev, mr); 99 + mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", 100 + mlx5_base_mkey(mkey->key), err); 101 + mlx5_core_destroy_mkey(dev, mkey); 103 102 } 104 103 105 104 return err; 106 105 } 107 106 EXPORT_SYMBOL(mlx5_core_create_mkey); 108 107 109 - int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) 108 + int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, 109 + struct mlx5_core_mkey *mkey) 110 110 { 111 - struct mlx5_mr_table *table = &dev->priv.mr_table; 111 + struct mlx5_mkey_table *table = &dev->priv.mkey_table; 112 112 struct mlx5_destroy_mkey_mbox_in in; 113 113 struct mlx5_destroy_mkey_mbox_out out; 114 - struct mlx5_core_mr *deleted_mr; 114 + struct mlx5_core_mkey *deleted_mkey; 115 115 unsigned long flags; 116 116 int err; 117 117 ··· 120 118 memset(&out, 0, sizeof(out)); 121 119 122 120 write_lock_irqsave(&table->lock, flags); 123 - deleted_mr = radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key)); 121 + deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); 124 122 write_unlock_irqrestore(&table->lock, flags); 125 - if (!deleted_mr) { 126 - mlx5_core_warn(dev, "failed radix tree delete of mr 0x%x\n", 127 - mlx5_base_mkey(mr->key)); 123 + if (!deleted_mkey) { 124 + mlx5_core_warn(dev, "failed radix tree delete of mkey 0x%x\n", 125 + mlx5_base_mkey(mkey->key)); 128 126 return -ENOENT; 129 127 } 130 128 131 129 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY); 132 - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); 130 + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); 133 131 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); 134 132 if (err) 135 133 return err; ··· 141 139 } 142 140 EXPORT_SYMBOL(mlx5_core_destroy_mkey); 143 141 144 - int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 142 + int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, 145 143 struct mlx5_query_mkey_mbox_out *out, int outlen) 146 144 { 147 145 struct mlx5_query_mkey_mbox_in in; ··· 151 149 memset(out, 0, outlen); 152 150 153 151 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY); 154 - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); 152 + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); 155 153 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); 156 154 if (err) 157 155 return err; ··· 163 161 } 164 162 EXPORT_SYMBOL(mlx5_core_query_mkey); 165 163 166 - int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 164 + int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, 167 165 u32 *mkey) 168 166 { 169 167 struct mlx5_query_special_ctxs_mbox_in in;
+23
drivers/net/ethernet/mellanox/mlx5/core/port.c
··· 324 324 } 325 325 EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); 326 326 327 + int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, 328 + u8 port_num, void *out, size_t sz) 329 + { 330 + u32 *in; 331 + int err; 332 + 333 + in = mlx5_vzalloc(sz); 334 + if (!in) { 335 + err = -ENOMEM; 336 + return err; 337 + } 338 + 339 + MLX5_SET(ppcnt_reg, in, local_port, port_num); 340 + 341 + MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP); 342 + err = mlx5_core_access_reg(dev, in, sz, out, 343 + sz, MLX5_REG_PPCNT, 0, 0); 344 + 345 + kvfree(in); 346 + return err; 347 + } 348 + EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt); 349 + 327 350 int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) 328 351 { 329 352 u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+40
drivers/net/ethernet/mellanox/mlx5/core/vport.c
··· 850 850 return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); 851 851 } 852 852 EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); 853 + 854 + int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, 855 + u8 port_num, void *out, size_t out_sz) 856 + { 857 + int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in); 858 + int is_group_manager; 859 + void *in; 860 + int err; 861 + 862 + is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); 863 + in = mlx5_vzalloc(in_sz); 864 + if (!in) { 865 + err = -ENOMEM; 866 + return err; 867 + } 868 + 869 + MLX5_SET(query_vport_counter_in, in, opcode, 870 + MLX5_CMD_OP_QUERY_VPORT_COUNTER); 871 + if (other_vport) { 872 + if (is_group_manager) { 873 + MLX5_SET(query_vport_counter_in, in, other_vport, 1); 874 + MLX5_SET(query_vport_counter_in, in, vport_number, 0); 875 + } else { 876 + err = -EPERM; 877 + goto free; 878 + } 879 + } 880 + if (MLX5_CAP_GEN(dev, num_ports) == 2) 881 + MLX5_SET(query_vport_counter_in, in, port_num, port_num); 882 + 883 + err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); 884 + if (err) 885 + goto free; 886 + err = mlx5_cmd_status_to_err_v2(out); 887 + 888 + free: 889 + kvfree(in); 890 + return err; 891 + } 892 + EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
+3
include/linux/mlx4/device.h
··· 219 219 MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, 220 220 MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, 221 221 MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, 222 + MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, 222 223 }; 223 224 224 225 enum { ··· 1161 1160 MLX4_FS_REGULAR = 1, 1162 1161 MLX4_FS_ALL_DEFAULT, 1163 1162 MLX4_FS_MC_DEFAULT, 1163 + MLX4_FS_MIRROR_RX_PORT, 1164 + MLX4_FS_MIRROR_SX_PORT, 1164 1165 MLX4_FS_UC_SNIFFER, 1165 1166 MLX4_FS_MC_SNIFFER, 1166 1167 MLX4_FS_MODE_NUM, /* should be last */
+31 -2
include/linux/mlx5/device.h
··· 105 105 ___t; \ 106 106 }) 107 107 108 + /* Big endian getters */ 109 + #define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\ 110 + __mlx5_64_off(typ, fld))) 111 + 112 + #define MLX5_GET_BE(type_t, typ, p, fld) ({ \ 113 + type_t tmp; \ 114 + switch (sizeof(tmp)) { \ 115 + case sizeof(u8): \ 116 + tmp = (__force type_t)MLX5_GET(typ, p, fld); \ 117 + break; \ 118 + case sizeof(u16): \ 119 + tmp = (__force type_t)cpu_to_be16(MLX5_GET(typ, p, fld)); \ 120 + break; \ 121 + case sizeof(u32): \ 122 + tmp = (__force type_t)cpu_to_be32(MLX5_GET(typ, p, fld)); \ 123 + break; \ 124 + case sizeof(u64): \ 125 + tmp = (__force type_t)MLX5_GET64_BE(typ, p, fld); \ 126 + break; \ 127 + } \ 128 + tmp; \ 129 + }) 130 + 108 131 enum { 109 132 MLX5_MAX_COMMANDS = 32, 110 133 MLX5_CMD_DATA_BLOCK_SIZE = 512, ··· 1307 1284 MLX5_RFC_3635_COUNTERS_GROUP = 0x3, 1308 1285 MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5, 1309 1286 MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, 1310 - MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11 1287 + MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11, 1288 + MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, 1311 1289 }; 1312 1290 1313 1291 static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) ··· 1318 1294 return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; 1319 1295 } 1320 1296 1321 - #define MLX5_BY_PASS_NUM_PRIOS 9 1297 + #define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8 1298 + #define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8 1299 + #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1 1300 + #define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\ 1301 + MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\ 1302 + MLX5_BY_PASS_NUM_MULTICAST_PRIOS) 1322 1303 1323 1304 #endif /* MLX5_DEVICE_H */
+15 -11
include/linux/mlx5/driver.h
··· 338 338 u32 sigerr_count; 339 339 }; 340 340 341 - struct mlx5_core_mr { 341 + struct mlx5_core_mkey { 342 342 u64 iova; 343 343 u64 size; 344 344 u32 key; ··· 426 426 struct radix_tree_root tree; 427 427 }; 428 428 429 - struct mlx5_mr_table { 429 + struct mlx5_mkey_table { 430 430 /* protect radix tree 431 431 */ 432 432 rwlock_t lock; ··· 484 484 struct mlx5_cq_table cq_table; 485 485 /* end: cq staff */ 486 486 487 - /* start: mr staff */ 488 - struct mlx5_mr_table mr_table; 489 - /* end: mr staff */ 487 + /* start: mkey staff */ 488 + struct mlx5_mkey_table mkey_table; 489 + /* end: mkey staff */ 490 490 491 491 /* start: alloc staff */ 492 492 /* protect buffer alocation according to numa node */ ··· 739 739 struct mlx5_query_srq_mbox_out *out); 740 740 int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 741 741 u16 lwm, int is_srq); 742 - void mlx5_init_mr_table(struct mlx5_core_dev *dev); 743 - void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev); 744 - int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 742 + void mlx5_init_mkey_table(struct mlx5_core_dev *dev); 743 + void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); 744 + int mlx5_core_create_mkey(struct mlx5_core_dev *dev, 745 + struct mlx5_core_mkey *mkey, 745 746 struct mlx5_create_mkey_mbox_in *in, int inlen, 746 747 mlx5_cmd_cbk_t callback, void *context, 747 748 struct mlx5_create_mkey_mbox_out *out); 748 - int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr); 749 - int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 749 + int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, 750 + struct mlx5_core_mkey *mkey); 751 + int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, 750 752 struct mlx5_query_mkey_mbox_out *out, int outlen); 751 - int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 753 + int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, 752 754 u32 *mkey); 753 755 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); 754 756 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); ··· 849 847 void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); 850 848 int mlx5_query_odp_caps(struct mlx5_core_dev *dev, 851 849 struct mlx5_odp_caps *odp_caps); 850 + int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, 851 + u8 port_num, void *out, size_t sz); 852 852 853 853 static inline int fw_initializing(struct mlx5_core_dev *dev) 854 854 {
+5
include/linux/mlx5/fs.h
··· 38 38 39 39 #define MLX5_FS_DEFAULT_FLOW_TAG 0x0 40 40 41 + enum { 42 + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, 43 + }; 44 + 41 45 #define LEFTOVERS_RULE_NUM 2 42 46 static inline void build_leftovers_ft_param(int *priority, 43 47 int *n_ent, ··· 56 52 MLX5_FLOW_NAMESPACE_BYPASS, 57 53 MLX5_FLOW_NAMESPACE_KERNEL, 58 54 MLX5_FLOW_NAMESPACE_LEFTOVERS, 55 + MLX5_FLOW_NAMESPACE_ANCHOR, 59 56 MLX5_FLOW_NAMESPACE_FDB, 60 57 }; 61 58
+45 -6
include/linux/mlx5/mlx5_ifc.h
··· 458 458 }; 459 459 460 460 struct mlx5_ifc_flow_table_nic_cap_bits { 461 - u8 reserved_at_0[0x200]; 461 + u8 nic_rx_multi_path_tirs[0x1]; 462 + u8 reserved_at_1[0x1ff]; 462 463 463 464 struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; 464 465 ··· 737 736 u8 cqe_version[0x4]; 738 737 739 738 u8 compact_address_vector[0x1]; 740 - u8 reserved_at_200[0xe]; 739 + u8 reserved_at_200[0x3]; 740 + u8 ipoib_basic_offloads[0x1]; 741 + u8 reserved_at_204[0xa]; 741 742 u8 drain_sigerr[0x1]; 742 743 u8 cmdif_checksum[0x2]; 743 744 u8 sigerr_cqe[0x1]; ··· 770 767 u8 cd[0x1]; 771 768 u8 reserved_at_22c[0x1]; 772 769 u8 apm[0x1]; 773 - u8 reserved_at_22e[0x7]; 770 + u8 reserved_at_22e[0x2]; 771 + u8 imaicl[0x1]; 772 + u8 reserved_at_231[0x4]; 774 773 u8 qkv[0x1]; 775 774 u8 pkv[0x1]; 776 - u8 reserved_at_237[0x4]; 775 + u8 set_deth_sqpn[0x1]; 776 + u8 reserved_at_239[0x3]; 777 777 u8 xrc[0x1]; 778 778 u8 ud[0x1]; 779 779 u8 uc[0x1]; ··· 1212 1206 u8 successful_recovery_events[0x20]; 1213 1207 1214 1208 u8 reserved_at_640[0x180]; 1209 + }; 1210 + 1211 + struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits { 1212 + u8 symbol_error_counter[0x10]; 1213 + 1214 + u8 link_error_recovery_counter[0x8]; 1215 + 1216 + u8 link_downed_counter[0x8]; 1217 + 1218 + u8 port_rcv_errors[0x10]; 1219 + 1220 + u8 port_rcv_remote_physical_errors[0x10]; 1221 + 1222 + u8 port_rcv_switch_relay_errors[0x10]; 1223 + 1224 + u8 port_xmit_discards[0x10]; 1225 + 1226 + u8 port_xmit_constraint_errors[0x8]; 1227 + 1228 + u8 port_rcv_constraint_errors[0x8]; 1229 + 1230 + u8 reserved_at_70[0x8]; 1231 + 1232 + u8 link_overrun_errors[0x8]; 1233 + 1234 + u8 reserved_at_80[0x10]; 1235 + 1236 + u8 vl_15_dropped[0x10]; 1237 + 1238 + u8 reserved_at_a0[0xa0]; 1215 1239 }; 1216 1240 1217 1241 struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { ··· 1816 1780 u8 log_sq_size[0x4]; 1817 1781 u8 reserved_at_55[0x6]; 1818 1782 u8 rlky[0x1]; 1819 - u8 reserved_at_5c[0x4]; 1783 + u8 ulp_stateless_offload_mode[0x4]; 1820 1784 1821 1785 u8 counter_set_id[0x8]; 1822 1786 u8 uar_page[0x18]; ··· 2654 2618 struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; 2655 2619 struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; 2656 2620 struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; 2621 + struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout; 2657 2622 struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; 2658 2623 u8 reserved_at_0[0x7c0]; 2659 2624 }; ··· 3163 3126 u8 op_mod[0x10]; 3164 3127 3165 3128 u8 other_vport[0x1]; 3166 - u8 reserved_at_41[0xf]; 3129 + u8 reserved_at_41[0xb]; 3130 + u8 port_num[0x4]; 3167 3131 u8 vport_number[0x10]; 3168 3132 3169 3133 u8 reserved_at_60[0x60]; ··· 6992 6954 struct mlx5_ifc_peir_reg_bits peir_reg; 6993 6955 struct mlx5_ifc_pelc_reg_bits pelc_reg; 6994 6956 struct mlx5_ifc_pfcc_reg_bits pfcc_reg; 6957 + struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout; 6995 6958 struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; 6996 6959 struct mlx5_ifc_pifr_reg_bits pifr_reg; 6997 6960 struct mlx5_ifc_pipg_reg_bits pipg_reg;
+4 -3
include/linux/mlx5/qp.h
··· 499 499 u8 reserved2[4]; 500 500 __be32 next_send_psn; 501 501 __be32 cqn_send; 502 - u8 reserved3[8]; 502 + __be32 deth_sqpn; 503 + u8 reserved3[4]; 503 504 __be32 last_acked_psn; 504 505 __be32 ssn; 505 506 __be32 params2; ··· 622 621 return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); 623 622 } 624 623 625 - static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) 624 + static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) 626 625 { 627 - return radix_tree_lookup(&dev->priv.mr_table.tree, key); 626 + return radix_tree_lookup(&dev->priv.mkey_table.tree, key); 628 627 } 629 628 630 629 struct mlx5_page_fault_resume_mbox_in {
+2
include/linux/mlx5/vport.h
··· 92 92 93 93 int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); 94 94 int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); 95 + int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, 96 + u8 port_num, void *out, size_t out_sz); 95 97 96 98 #endif /* __MLX5_VPORT_H__ */
+13 -1
include/rdma/ib_verbs.h
··· 212 212 IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), 213 213 IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), 214 214 IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), 215 + IB_DEVICE_SG_GAPS_REG = (1ULL << 32), 215 216 }; 216 217 217 218 enum ib_signature_prot_cap { ··· 663 662 * @IB_MR_TYPE_SIGNATURE: memory region that is used for 664 663 * signature operations (data-integrity 665 664 * capable regions) 665 + * @IB_MR_TYPE_SG_GAPS: memory region that is capable to 666 + * register any arbitrary sg lists (without 667 + * the normal mr constraints - see 668 + * ib_map_mr_sg) 666 669 */ 667 670 enum ib_mr_type { 668 671 IB_MR_TYPE_MEM_REG, 669 672 IB_MR_TYPE_SIGNATURE, 673 + IB_MR_TYPE_SG_GAPS, 670 674 }; 671 675 672 676 /** ··· 1493 1487 IB_FLOW_DOMAIN_NUM /* Must be last */ 1494 1488 }; 1495 1489 1490 + enum ib_flow_flags { 1491 + IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */ 1492 + IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 2 /* Must be last */ 1493 + }; 1494 + 1496 1495 struct ib_flow_eth_filter { 1497 1496 u8 dst_mac[6]; 1498 1497 u8 src_mac[6]; ··· 1819 1808 struct scatterlist *sg, 1820 1809 int sg_nents); 1821 1810 struct ib_mw * (*alloc_mw)(struct ib_pd *pd, 1822 - enum ib_mw_type type); 1811 + enum ib_mw_type type, 1812 + struct ib_udata *udata); 1823 1813 int (*dealloc_mw)(struct ib_mw *mw); 1824 1814 struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, 1825 1815 int mr_access_flags,