Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
"A typical collection of patches this cycle, mostly fixing with a few
new features:

- Fixes from static tools. clang warnings, dead code, unused
variable, coccinelle sweeps, etc

- Driver bug fixes and minor improvements in rxe, bnxt_re, hfi1,
mlx5, irdma, qedr

- rtrs ULP bug fixes an improvments

- Additional counters for bnxt_re

- Support verbs CQ notifications in EFA

- Continued reworking and fixing of rxe

- netlink control to enable/disable optional device counters

- rxe now can use AH objects for its UD path, fixing various bugs in
the process

- Add DMABUF support to EFA"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (103 commits)
RDMA/core: Require the driver to set the IOVA correctly during rereg_mr
RDMA/bnxt_re: Remove unsupported bnxt_re_modify_ah callback
RDMA/irdma: optimize rx path by removing unnecessary copy
RDMA/qed: Use helper function to set GUIDs
RDMA/hns: Use the core code to manage the fixed mmap entries
IB/opa_vnic: Rebranding of OPA VNIC driver to Cornelis Networks
IB/qib: Rebranding of qib driver to Cornelis Networks
IB/hfi1: Rebranding of hfi1 driver to Cornelis Networks
RDMA/bnxt_re: Use helper function to set GUIDs
RDMA/bnxt_re: Fix kernel panic when trying to access bnxt_re_stat_descs
RDMA/qedr: Fix NULL deref for query_qp on the GSI QP
RDMA/hns: Modify the value of MAX_LP_MSG_LEN to meet hardware compatibility
RDMA/hns: Fix initial arm_st of CQ
RDMA/rxe: Make rxe_type_info static const
RDMA/rxe: Use 'bitmap_zalloc()' when applicable
RDMA/rxe: Save a few bytes from struct rxe_pool
RDMA/irdma: Remove the unused variable local_qp
RDMA/core: Fix missed initialization of rdma_hw_stats::lock
RDMA/efa: Add support for dmabuf memory regions
RDMA/umem: Allow pinned dmabuf umem usage
...

+3655 -1946
+18 -16
drivers/infiniband/core/cma.c
··· 453 453 id_priv->id.device = cma_dev->device; 454 454 id_priv->id.route.addr.dev_addr.transport = 455 455 rdma_node_get_transport(cma_dev->device->node_type); 456 - list_add_tail(&id_priv->list, &cma_dev->id_list); 456 + list_add_tail(&id_priv->device_item, &cma_dev->id_list); 457 457 458 458 trace_cm_id_attach(id_priv, cma_dev->device); 459 459 } ··· 470 470 static void cma_release_dev(struct rdma_id_private *id_priv) 471 471 { 472 472 mutex_lock(&lock); 473 - list_del(&id_priv->list); 473 + list_del_init(&id_priv->device_item); 474 474 cma_dev_put(id_priv->cma_dev); 475 475 id_priv->cma_dev = NULL; 476 476 id_priv->id.device = NULL; ··· 854 854 init_completion(&id_priv->comp); 855 855 refcount_set(&id_priv->refcount, 1); 856 856 mutex_init(&id_priv->handler_mutex); 857 + INIT_LIST_HEAD(&id_priv->device_item); 857 858 INIT_LIST_HEAD(&id_priv->listen_list); 858 859 INIT_LIST_HEAD(&id_priv->mc_list); 859 860 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); ··· 1648 1647 return id_priv; 1649 1648 list_for_each_entry(id_priv_dev, 1650 1649 &id_priv->listen_list, 1651 - listen_list) { 1650 + listen_item) { 1652 1651 if (id_priv_dev->id.device == cm_id->device && 1653 1652 cma_match_net_dev(&id_priv_dev->id, 1654 1653 net_dev, req)) ··· 1757 1756 * Remove from listen_any_list to prevent added devices from spawning 1758 1757 * additional listen requests. 1759 1758 */ 1760 - list_del(&id_priv->list); 1759 + list_del_init(&id_priv->listen_any_item); 1761 1760 1762 1761 while (!list_empty(&id_priv->listen_list)) { 1763 - dev_id_priv = list_entry(id_priv->listen_list.next, 1764 - struct rdma_id_private, listen_list); 1762 + dev_id_priv = 1763 + list_first_entry(&id_priv->listen_list, 1764 + struct rdma_id_private, listen_item); 1765 1765 /* sync with device removal to avoid duplicate destruction */ 1766 - list_del_init(&dev_id_priv->list); 1767 - list_del(&dev_id_priv->listen_list); 1766 + list_del_init(&dev_id_priv->device_item); 1767 + list_del_init(&dev_id_priv->listen_item); 1768 1768 mutex_unlock(&lock); 1769 1769 1770 1770 rdma_destroy_id(&dev_id_priv->id); ··· 2566 2564 ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); 2567 2565 if (ret) 2568 2566 goto err_listen; 2569 - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2567 + list_add_tail(&dev_id_priv->listen_item, &id_priv->listen_list); 2570 2568 return 0; 2571 2569 err_listen: 2572 2570 /* Caller must destroy this after releasing lock */ ··· 2582 2580 int ret; 2583 2581 2584 2582 mutex_lock(&lock); 2585 - list_add_tail(&id_priv->list, &listen_any_list); 2583 + list_add_tail(&id_priv->listen_any_item, &listen_any_list); 2586 2584 list_for_each_entry(cma_dev, &dev_list, list) { 2587 2585 ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); 2588 2586 if (ret) { 2589 2587 /* Prevent racing with cma_process_remove() */ 2590 2588 if (to_destroy) 2591 - list_del_init(&to_destroy->list); 2589 + list_del_init(&to_destroy->device_item); 2592 2590 goto err_listen; 2593 2591 } 2594 2592 } ··· 4897 4895 4898 4896 mutex_lock(&lock); 4899 4897 list_for_each_entry(cma_dev, &dev_list, list) 4900 - list_for_each_entry(id_priv, &cma_dev->id_list, list) { 4898 + list_for_each_entry(id_priv, &cma_dev->id_list, device_item) { 4901 4899 ret = cma_netdev_change(ndev, id_priv); 4902 4900 if (ret) 4903 4901 goto out; ··· 4957 4955 mutex_lock(&lock); 4958 4956 while (!list_empty(&cma_dev->id_list)) { 4959 4957 struct rdma_id_private *id_priv = list_first_entry( 4960 - &cma_dev->id_list, struct rdma_id_private, list); 4958 + &cma_dev->id_list, struct rdma_id_private, device_item); 4961 4959 4962 - list_del(&id_priv->listen_list); 4963 - list_del_init(&id_priv->list); 4960 + list_del_init(&id_priv->listen_item); 4961 + list_del_init(&id_priv->device_item); 4964 4962 cma_id_get(id_priv); 4965 4963 mutex_unlock(&lock); 4966 4964 ··· 5037 5035 5038 5036 mutex_lock(&lock); 5039 5037 list_add_tail(&cma_dev->list, &dev_list); 5040 - list_for_each_entry(id_priv, &listen_any_list, list) { 5038 + list_for_each_entry(id_priv, &listen_any_list, listen_any_item) { 5041 5039 ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); 5042 5040 if (ret) 5043 5041 goto free_listen;
+9 -2
drivers/infiniband/core/cma_priv.h
··· 55 55 56 56 struct rdma_bind_list *bind_list; 57 57 struct hlist_node node; 58 - struct list_head list; /* listen_any_list or cma_device.list */ 59 - struct list_head listen_list; /* per device listens */ 58 + union { 59 + struct list_head device_item; /* On cma_device->id_list */ 60 + struct list_head listen_any_item; /* On listen_any_list */ 61 + }; 62 + union { 63 + /* On rdma_id_private->listen_list */ 64 + struct list_head listen_item; 65 + struct list_head listen_list; 66 + }; 60 67 struct cma_device *cma_dev; 61 68 struct list_head mc_list; 62 69
+36 -4
drivers/infiniband/core/counters.c
··· 106 106 return ret; 107 107 } 108 108 109 + int rdma_counter_modify(struct ib_device *dev, u32 port, 110 + unsigned int index, bool enable) 111 + { 112 + struct rdma_hw_stats *stats; 113 + int ret = 0; 114 + 115 + if (!dev->ops.modify_hw_stat) 116 + return -EOPNOTSUPP; 117 + 118 + stats = ib_get_hw_stats_port(dev, port); 119 + if (!stats || index >= stats->num_counters || 120 + !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) 121 + return -EINVAL; 122 + 123 + mutex_lock(&stats->lock); 124 + 125 + if (enable != test_bit(index, stats->is_disabled)) 126 + goto out; 127 + 128 + ret = dev->ops.modify_hw_stat(dev, port, index, enable); 129 + if (ret) 130 + goto out; 131 + 132 + if (enable) 133 + clear_bit(index, stats->is_disabled); 134 + else 135 + set_bit(index, stats->is_disabled); 136 + out: 137 + mutex_unlock(&stats->lock); 138 + return ret; 139 + } 140 + 109 141 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, 110 142 struct ib_qp *qp, 111 143 enum rdma_nl_counter_mode mode) ··· 197 165 return counter; 198 166 199 167 err_mode: 200 - kfree(counter->stats); 168 + rdma_free_hw_stats_struct(counter->stats); 201 169 err_stats: 202 170 rdma_restrack_put(&counter->res); 203 171 kfree(counter); ··· 218 186 mutex_unlock(&port_counter->lock); 219 187 220 188 rdma_restrack_del(&counter->res); 221 - kfree(counter->stats); 189 + rdma_free_hw_stats_struct(counter->stats); 222 190 kfree(counter); 223 191 } 224 192 ··· 650 618 fail: 651 619 for (i = port; i >= rdma_start_port(dev); i--) { 652 620 port_counter = &dev->port_data[port].port_counter; 653 - kfree(port_counter->hstats); 621 + rdma_free_hw_stats_struct(port_counter->hstats); 654 622 port_counter->hstats = NULL; 655 623 mutex_destroy(&port_counter->lock); 656 624 } ··· 663 631 664 632 rdma_for_each_port(dev, port) { 665 633 port_counter = &dev->port_data[port].port_counter; 666 - kfree(port_counter->hstats); 634 + rdma_free_hw_stats_struct(port_counter->hstats); 667 635 mutex_destroy(&port_counter->lock); 668 636 } 669 637 }
+1
drivers/infiniband/core/device.c
··· 2676 2676 SET_DEVICE_OP(dev_ops, modify_cq); 2677 2677 SET_DEVICE_OP(dev_ops, modify_device); 2678 2678 SET_DEVICE_OP(dev_ops, modify_flow_action_esp); 2679 + SET_DEVICE_OP(dev_ops, modify_hw_stat); 2679 2680 SET_DEVICE_OP(dev_ops, modify_port); 2680 2681 SET_DEVICE_OP(dev_ops, modify_qp); 2681 2682 SET_DEVICE_OP(dev_ops, modify_srq);
+1 -1
drivers/infiniband/core/iwpm_util.c
··· 762 762 { 763 763 struct sk_buff *skb = NULL; 764 764 struct nlmsghdr *nlh; 765 - const char *err_str = ""; 765 + const char *err_str; 766 766 int ret = -EINVAL; 767 767 768 768 skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client);
+232 -46
drivers/infiniband/core/nldev.c
··· 154 154 [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, 155 155 [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, 156 156 [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 }, 157 + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, 158 + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, 157 159 }; 158 160 159 161 static int put_driver_name_print_type(struct sk_buff *msg, const char *name, ··· 970 968 if (!table_attr) 971 969 return -EMSGSIZE; 972 970 973 - for (i = 0; i < st->num_counters; i++) 974 - if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i])) 971 + mutex_lock(&st->lock); 972 + for (i = 0; i < st->num_counters; i++) { 973 + if (test_bit(i, st->is_disabled)) 974 + continue; 975 + if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name, 976 + st->value[i])) 975 977 goto err; 978 + } 979 + mutex_unlock(&st->lock); 976 980 977 981 nla_nest_end(msg, table_attr); 978 982 return 0; 979 983 980 984 err: 985 + mutex_unlock(&st->lock); 981 986 nla_nest_cancel(msg, table_attr); 982 987 return -EMSGSIZE; 983 988 } ··· 1897 1888 return err; 1898 1889 } 1899 1890 1891 + static int nldev_stat_set_mode_doit(struct sk_buff *msg, 1892 + struct netlink_ext_ack *extack, 1893 + struct nlattr *tb[], 1894 + struct ib_device *device, u32 port) 1895 + { 1896 + u32 mode, mask = 0, qpn, cntn = 0; 1897 + int ret; 1898 + 1899 + /* Currently only counter for QP is supported */ 1900 + if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) 1901 + return -EINVAL; 1902 + 1903 + mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); 1904 + if (mode == RDMA_COUNTER_MODE_AUTO) { 1905 + if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) 1906 + mask = nla_get_u32( 1907 + tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); 1908 + return rdma_counter_set_auto_mode(device, port, mask, extack); 1909 + } 1910 + 1911 + if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) 1912 + return -EINVAL; 1913 + 1914 + qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); 1915 + if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { 1916 + cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); 1917 + ret = rdma_counter_bind_qpn(device, port, qpn, cntn); 1918 + if (ret) 1919 + return ret; 1920 + } else { 1921 + ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn); 1922 + if (ret) 1923 + return ret; 1924 + } 1925 + 1926 + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || 1927 + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { 1928 + ret = -EMSGSIZE; 1929 + goto err_fill; 1930 + } 1931 + 1932 + return 0; 1933 + 1934 + err_fill: 1935 + rdma_counter_unbind_qpn(device, port, qpn, cntn); 1936 + return ret; 1937 + } 1938 + 1939 + static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], 1940 + struct ib_device *device, 1941 + u32 port) 1942 + { 1943 + struct rdma_hw_stats *stats; 1944 + int rem, i, index, ret = 0; 1945 + struct nlattr *entry_attr; 1946 + unsigned long *target; 1947 + 1948 + stats = ib_get_hw_stats_port(device, port); 1949 + if (!stats) 1950 + return -EINVAL; 1951 + 1952 + target = kcalloc(BITS_TO_LONGS(stats->num_counters), 1953 + sizeof(*stats->is_disabled), GFP_KERNEL); 1954 + if (!target) 1955 + return -ENOMEM; 1956 + 1957 + nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], 1958 + rem) { 1959 + index = nla_get_u32(entry_attr); 1960 + if ((index >= stats->num_counters) || 1961 + !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) { 1962 + ret = -EINVAL; 1963 + goto out; 1964 + } 1965 + 1966 + set_bit(index, target); 1967 + } 1968 + 1969 + for (i = 0; i < stats->num_counters; i++) { 1970 + if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL)) 1971 + continue; 1972 + 1973 + ret = rdma_counter_modify(device, port, i, test_bit(i, target)); 1974 + if (ret) 1975 + goto out; 1976 + } 1977 + 1978 + out: 1979 + kfree(target); 1980 + return ret; 1981 + } 1982 + 1900 1983 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 1901 1984 struct netlink_ext_ack *extack) 1902 1985 { 1903 - u32 index, port, mode, mask = 0, qpn, cntn = 0; 1904 1986 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 1905 1987 struct ib_device *device; 1906 1988 struct sk_buff *msg; 1989 + u32 index, port; 1907 1990 int ret; 1908 1991 1909 - ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 1910 - nldev_policy, extack); 1911 - /* Currently only counter for QP is supported */ 1912 - if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] || 1913 - !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 1914 - !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE]) 1915 - return -EINVAL; 1916 - 1917 - if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) 1992 + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, 1993 + extack); 1994 + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 1995 + !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 1918 1996 return -EINVAL; 1919 1997 1920 1998 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); ··· 2012 1916 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2013 1917 if (!rdma_is_port_valid(device, port)) { 2014 1918 ret = -EINVAL; 2015 - goto err; 1919 + goto err_put_device; 1920 + } 1921 + 1922 + if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] && 1923 + !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { 1924 + ret = -EINVAL; 1925 + goto err_put_device; 2016 1926 } 2017 1927 2018 1928 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2019 1929 if (!msg) { 2020 1930 ret = -ENOMEM; 2021 - goto err; 1931 + goto err_put_device; 2022 1932 } 2023 1933 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2024 1934 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 2025 1935 RDMA_NLDEV_CMD_STAT_SET), 2026 1936 0, 0); 1937 + if (fill_nldev_handle(msg, device) || 1938 + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { 1939 + ret = -EMSGSIZE; 1940 + goto err_free_msg; 1941 + } 2027 1942 2028 - mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); 2029 - if (mode == RDMA_COUNTER_MODE_AUTO) { 2030 - if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) 2031 - mask = nla_get_u32( 2032 - tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); 2033 - ret = rdma_counter_set_auto_mode(device, port, mask, extack); 1943 + if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) { 1944 + ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port); 2034 1945 if (ret) 2035 - goto err_msg; 2036 - } else { 2037 - if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) 2038 - goto err_msg; 2039 - qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); 2040 - if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { 2041 - cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); 2042 - ret = rdma_counter_bind_qpn(device, port, qpn, cntn); 2043 - } else { 2044 - ret = rdma_counter_bind_qpn_alloc(device, port, 2045 - qpn, &cntn); 2046 - } 2047 - if (ret) 2048 - goto err_msg; 1946 + goto err_free_msg; 1947 + } 2049 1948 2050 - if (fill_nldev_handle(msg, device) || 2051 - nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || 2052 - nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || 2053 - nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { 2054 - ret = -EMSGSIZE; 2055 - goto err_fill; 2056 - } 1949 + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { 1950 + ret = nldev_stat_set_counter_dynamic_doit(tb, device, port); 1951 + if (ret) 1952 + goto err_free_msg; 2057 1953 } 2058 1954 2059 1955 nlmsg_end(msg, nlh); 2060 1956 ib_device_put(device); 2061 1957 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2062 1958 2063 - err_fill: 2064 - rdma_counter_unbind_qpn(device, port, qpn, cntn); 2065 - err_msg: 1959 + err_free_msg: 2066 1960 nlmsg_free(msg); 2067 - err: 1961 + err_put_device: 2068 1962 ib_device_put(device); 2069 1963 return ret; 2070 1964 } ··· 2189 2103 goto err_stats; 2190 2104 } 2191 2105 for (i = 0; i < num_cnts; i++) { 2106 + if (test_bit(i, stats->is_disabled)) 2107 + continue; 2108 + 2192 2109 v = stats->value[i] + 2193 2110 rdma_counter_get_hwstat_value(device, port, i); 2194 - if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) { 2111 + if (rdma_nl_stat_hwcounter_entry(msg, 2112 + stats->descs[i].name, v)) { 2195 2113 ret = -EMSGSIZE; 2196 2114 goto err_table; 2197 2115 } ··· 2343 2253 return ret; 2344 2254 } 2345 2255 2256 + static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, 2257 + struct nlmsghdr *nlh, 2258 + struct netlink_ext_ack *extack) 2259 + { 2260 + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry; 2261 + struct rdma_hw_stats *stats; 2262 + struct ib_device *device; 2263 + struct sk_buff *msg; 2264 + u32 devid, port; 2265 + int ret, i; 2266 + 2267 + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 2268 + nldev_policy, extack); 2269 + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 2270 + !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 2271 + return -EINVAL; 2272 + 2273 + devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 2274 + device = ib_device_get_by_index(sock_net(skb->sk), devid); 2275 + if (!device) 2276 + return -EINVAL; 2277 + 2278 + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 2279 + if (!rdma_is_port_valid(device, port)) { 2280 + ret = -EINVAL; 2281 + goto err; 2282 + } 2283 + 2284 + stats = ib_get_hw_stats_port(device, port); 2285 + if (!stats) { 2286 + ret = -EINVAL; 2287 + goto err; 2288 + } 2289 + 2290 + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2291 + if (!msg) { 2292 + ret = -ENOMEM; 2293 + goto err; 2294 + } 2295 + 2296 + nlh = nlmsg_put( 2297 + msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 2298 + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS), 2299 + 0, 0); 2300 + 2301 + ret = -EMSGSIZE; 2302 + if (fill_nldev_handle(msg, device) || 2303 + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) 2304 + goto err_msg; 2305 + 2306 + table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); 2307 + if (!table) 2308 + goto err_msg; 2309 + 2310 + mutex_lock(&stats->lock); 2311 + for (i = 0; i < stats->num_counters; i++) { 2312 + entry = nla_nest_start(msg, 2313 + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); 2314 + if (!entry) 2315 + goto err_msg_table; 2316 + 2317 + if (nla_put_string(msg, 2318 + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, 2319 + stats->descs[i].name) || 2320 + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i)) 2321 + goto err_msg_entry; 2322 + 2323 + if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) && 2324 + (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, 2325 + !test_bit(i, stats->is_disabled)))) 2326 + goto err_msg_entry; 2327 + 2328 + nla_nest_end(msg, entry); 2329 + } 2330 + mutex_unlock(&stats->lock); 2331 + 2332 + nla_nest_end(msg, table); 2333 + nlmsg_end(msg, nlh); 2334 + ib_device_put(device); 2335 + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); 2336 + 2337 + err_msg_entry: 2338 + nla_nest_cancel(msg, entry); 2339 + err_msg_table: 2340 + mutex_unlock(&stats->lock); 2341 + nla_nest_cancel(msg, table); 2342 + err_msg: 2343 + nlmsg_free(msg); 2344 + err: 2345 + ib_device_put(device); 2346 + return ret; 2347 + } 2348 + 2346 2349 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { 2347 2350 [RDMA_NLDEV_CMD_GET] = { 2348 2351 .doit = nldev_get_doit, ··· 2524 2341 .doit = nldev_res_get_mr_raw_doit, 2525 2342 .dump = nldev_res_get_mr_raw_dumpit, 2526 2343 .flags = RDMA_NL_ADMIN_PERM, 2344 + }, 2345 + [RDMA_NLDEV_CMD_STAT_GET_STATUS] = { 2346 + .doit = nldev_stat_get_counter_status_doit, 2527 2347 }, 2528 2348 }; 2529 2349
+41 -25
drivers/infiniband/core/rw.c
··· 282 282 ib_dma_unmap_sg(dev, sg, sg_cnt, dir); 283 283 } 284 284 285 - static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, 286 - u32 sg_cnt, enum dma_data_direction dir) 285 + static int rdma_rw_map_sgtable(struct ib_device *dev, struct sg_table *sgt, 286 + enum dma_data_direction dir) 287 287 { 288 - if (is_pci_p2pdma_page(sg_page(sg))) { 288 + int nents; 289 + 290 + if (is_pci_p2pdma_page(sg_page(sgt->sgl))) { 289 291 if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) 290 292 return 0; 291 - return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); 293 + nents = pci_p2pdma_map_sg(dev->dma_device, sgt->sgl, 294 + sgt->orig_nents, dir); 295 + if (!nents) 296 + return -EIO; 297 + sgt->nents = nents; 298 + return 0; 292 299 } 293 - return ib_dma_map_sg(dev, sg, sg_cnt, dir); 300 + return ib_dma_map_sgtable_attrs(dev, sgt, dir, 0); 294 301 } 295 302 296 303 /** ··· 320 313 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 321 314 { 322 315 struct ib_device *dev = qp->pd->device; 316 + struct sg_table sgt = { 317 + .sgl = sg, 318 + .orig_nents = sg_cnt, 319 + }; 323 320 int ret; 324 321 325 - ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); 326 - if (!ret) 327 - return -ENOMEM; 328 - sg_cnt = ret; 322 + ret = rdma_rw_map_sgtable(dev, &sgt, dir); 323 + if (ret) 324 + return ret; 325 + sg_cnt = sgt.nents; 329 326 330 327 /* 331 328 * Skip to the S/G entry that sg_offset falls into: ··· 365 354 return ret; 366 355 367 356 out_unmap_sg: 368 - rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); 357 + rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); 369 358 return ret; 370 359 } 371 360 EXPORT_SYMBOL(rdma_rw_ctx_init); ··· 396 385 struct ib_device *dev = qp->pd->device; 397 386 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, 398 387 qp->integrity_en); 388 + struct sg_table sgt = { 389 + .sgl = sg, 390 + .orig_nents = sg_cnt, 391 + }; 392 + struct sg_table prot_sgt = { 393 + .sgl = prot_sg, 394 + .orig_nents = prot_sg_cnt, 395 + }; 399 396 struct ib_rdma_wr *rdma_wr; 400 397 int count = 0, ret; 401 398 ··· 413 394 return -EINVAL; 414 395 } 415 396 416 - ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); 417 - if (!ret) 418 - return -ENOMEM; 419 - sg_cnt = ret; 397 + ret = rdma_rw_map_sgtable(dev, &sgt, dir); 398 + if (ret) 399 + return ret; 420 400 421 401 if (prot_sg_cnt) { 422 - ret = rdma_rw_map_sg(dev, prot_sg, prot_sg_cnt, dir); 423 - if (!ret) { 424 - ret = -ENOMEM; 402 + ret = rdma_rw_map_sgtable(dev, &prot_sgt, dir); 403 + if (ret) 425 404 goto out_unmap_sg; 426 - } 427 - prot_sg_cnt = ret; 428 405 } 429 406 430 407 ctx->type = RDMA_RW_SIG_MR; ··· 441 426 442 427 memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs)); 443 428 444 - ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg, 445 - prot_sg_cnt, NULL, SZ_4K); 429 + ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sgt.nents, NULL, prot_sg, 430 + prot_sgt.nents, NULL, SZ_4K); 446 431 if (unlikely(ret)) { 447 - pr_err("failed to map PI sg (%u)\n", sg_cnt + prot_sg_cnt); 432 + pr_err("failed to map PI sg (%u)\n", 433 + sgt.nents + prot_sgt.nents); 448 434 goto out_destroy_sig_mr; 449 435 } 450 436 ··· 484 468 out_free_ctx: 485 469 kfree(ctx->reg); 486 470 out_unmap_prot_sg: 487 - if (prot_sg_cnt) 488 - rdma_rw_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); 471 + if (prot_sgt.nents) 472 + rdma_rw_unmap_sg(dev, prot_sgt.sgl, prot_sgt.orig_nents, dir); 489 473 out_unmap_sg: 490 - rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); 474 + rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); 491 475 return ret; 492 476 } 493 477 EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
-1
drivers/infiniband/core/sa_query.c
··· 2262 2262 void ib_sa_cleanup(void) 2263 2263 { 2264 2264 cancel_delayed_work(&ib_nl_timed_work); 2265 - flush_workqueue(ib_nl_wq); 2266 2265 destroy_workqueue(ib_nl_wq); 2267 2266 mcast_cleanup(); 2268 2267 ib_unregister_client(&sa_client);
+36 -22
drivers/infiniband/core/sysfs.c
··· 755 755 for (i = 0; i != ARRAY_SIZE(port->groups); i++) 756 756 kfree(port->groups[i].attrs); 757 757 if (port->hw_stats_data) 758 - kfree(port->hw_stats_data->stats); 758 + rdma_free_hw_stats_struct(port->hw_stats_data->stats); 759 759 kfree(port->hw_stats_data); 760 - kfree(port); 760 + kvfree(port); 761 761 } 762 762 763 763 static void ib_port_gid_attr_release(struct kobject *kobj) ··· 895 895 stats = ibdev->ops.alloc_hw_device_stats(ibdev); 896 896 if (!stats) 897 897 return ERR_PTR(-ENOMEM); 898 - if (!stats->names || stats->num_counters <= 0) 898 + if (!stats->descs || stats->num_counters <= 0) 899 899 goto err_free_stats; 900 900 901 901 /* ··· 911 911 if (!data->group.attrs) 912 912 goto err_free_data; 913 913 914 - mutex_init(&stats->lock); 915 914 data->group.name = "hw_counters"; 916 915 data->stats = stats; 917 916 return data; ··· 918 919 err_free_data: 919 920 kfree(data); 920 921 err_free_stats: 921 - kfree(stats); 922 + rdma_free_hw_stats_struct(stats); 922 923 return ERR_PTR(-ENOMEM); 923 924 } 924 925 925 926 void ib_device_release_hw_stats(struct hw_stats_device_data *data) 926 927 { 927 928 kfree(data->group.attrs); 928 - kfree(data->stats); 929 + rdma_free_hw_stats_struct(data->stats); 929 930 kfree(data); 930 931 } 931 932 ··· 933 934 { 934 935 struct hw_stats_device_attribute *attr; 935 936 struct hw_stats_device_data *data; 936 - int i, ret; 937 + bool opstat_skipped = false; 938 + int i, ret, pos = 0; 937 939 938 940 data = alloc_hw_stats_device(ibdev); 939 941 if (IS_ERR(data)) { ··· 955 955 data->stats->timestamp = jiffies; 956 956 957 957 for (i = 0; i < data->stats->num_counters; i++) { 958 - attr = &data->attrs[i]; 958 + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { 959 + opstat_skipped = true; 960 + continue; 961 + } 962 + 963 + WARN_ON(opstat_skipped); 964 + attr = &data->attrs[pos]; 959 965 sysfs_attr_init(&attr->attr.attr); 960 - attr->attr.attr.name = data->stats->names[i]; 966 + attr->attr.attr.name = data->stats->descs[i].name; 961 967 attr->attr.attr.mode = 0444; 962 968 attr->attr.show = hw_stat_device_show; 963 969 attr->show = show_hw_stats; 964 - data->group.attrs[i] = &attr->attr.attr; 970 + data->group.attrs[pos] = &attr->attr.attr; 971 + pos++; 965 972 } 966 973 967 - attr = &data->attrs[i]; 974 + attr = &data->attrs[pos]; 968 975 sysfs_attr_init(&attr->attr.attr); 969 976 attr->attr.attr.name = "lifespan"; 970 977 attr->attr.attr.mode = 0644; ··· 979 972 attr->show = show_stats_lifespan; 980 973 attr->attr.store = hw_stat_device_store; 981 974 attr->store = set_stats_lifespan; 982 - data->group.attrs[i] = &attr->attr.attr; 975 + data->group.attrs[pos] = &attr->attr.attr; 983 976 for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++) 984 977 if (!ibdev->groups[i]) { 985 978 ibdev->groups[i] = &data->group; ··· 1001 994 stats = ibdev->ops.alloc_hw_port_stats(port->ibdev, port->port_num); 1002 995 if (!stats) 1003 996 return ERR_PTR(-ENOMEM); 1004 - if (!stats->names || stats->num_counters <= 0) 997 + if (!stats->descs || stats->num_counters <= 0) 1005 998 goto err_free_stats; 1006 999 1007 1000 /* ··· 1017 1010 if (!group->attrs) 1018 1011 goto err_free_data; 1019 1012 1020 - mutex_init(&stats->lock); 1021 1013 group->name = "hw_counters"; 1022 1014 data->stats = stats; 1023 1015 return data; ··· 1024 1018 err_free_data: 1025 1019 kfree(data); 1026 1020 err_free_stats: 1027 - kfree(stats); 1021 + rdma_free_hw_stats_struct(stats); 1028 1022 return ERR_PTR(-ENOMEM); 1029 1023 } 1030 1024 ··· 1033 1027 { 1034 1028 struct hw_stats_port_attribute *attr; 1035 1029 struct hw_stats_port_data *data; 1036 - int i, ret; 1030 + bool opstat_skipped = false; 1031 + int i, ret, pos = 0; 1037 1032 1038 1033 data = alloc_hw_stats_port(port, group); 1039 1034 if (IS_ERR(data)) ··· 1052 1045 data->stats->timestamp = jiffies; 1053 1046 1054 1047 for (i = 0; i < data->stats->num_counters; i++) { 1055 - attr = &data->attrs[i]; 1048 + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { 1049 + opstat_skipped = true; 1050 + continue; 1051 + } 1052 + 1053 + WARN_ON(opstat_skipped); 1054 + attr = &data->attrs[pos]; 1056 1055 sysfs_attr_init(&attr->attr.attr); 1057 - attr->attr.attr.name = data->stats->names[i]; 1056 + attr->attr.attr.name = data->stats->descs[i].name; 1058 1057 attr->attr.attr.mode = 0444; 1059 1058 attr->attr.show = hw_stat_port_show; 1060 1059 attr->show = show_hw_stats; 1061 - group->attrs[i] = &attr->attr.attr; 1060 + group->attrs[pos] = &attr->attr.attr; 1061 + pos++; 1062 1062 } 1063 1063 1064 - attr = &data->attrs[i]; 1064 + attr = &data->attrs[pos]; 1065 1065 sysfs_attr_init(&attr->attr.attr); 1066 1066 attr->attr.attr.name = "lifespan"; 1067 1067 attr->attr.attr.mode = 0644; ··· 1076 1062 attr->show = show_stats_lifespan; 1077 1063 attr->attr.store = hw_stat_port_store; 1078 1064 attr->store = set_stats_lifespan; 1079 - group->attrs[i] = &attr->attr.attr; 1065 + group->attrs[pos] = &attr->attr.attr; 1080 1066 1081 1067 port->hw_stats_data = data; 1082 1068 return 0; ··· 1203 1189 struct ib_port *p; 1204 1190 int ret; 1205 1191 1206 - p = kzalloc(struct_size(p, attrs_list, 1192 + p = kvzalloc(struct_size(p, attrs_list, 1207 1193 attr->gid_tbl_len + attr->pkey_tbl_len), 1208 1194 GFP_KERNEL); 1209 1195 if (!p)
+51
drivers/infiniband/core/umem_dmabuf.c
··· 163 163 } 164 164 EXPORT_SYMBOL(ib_umem_dmabuf_get); 165 165 166 + static void 167 + ib_umem_dmabuf_unsupported_move_notify(struct dma_buf_attachment *attach) 168 + { 169 + struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; 170 + 171 + ibdev_warn_ratelimited(umem_dmabuf->umem.ibdev, 172 + "Invalidate callback should not be called when memory is pinned\n"); 173 + } 174 + 175 + static struct dma_buf_attach_ops ib_umem_dmabuf_attach_pinned_ops = { 176 + .allow_peer2peer = true, 177 + .move_notify = ib_umem_dmabuf_unsupported_move_notify, 178 + }; 179 + 180 + struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, 181 + unsigned long offset, 182 + size_t size, int fd, 183 + int access) 184 + { 185 + struct ib_umem_dmabuf *umem_dmabuf; 186 + int err; 187 + 188 + umem_dmabuf = ib_umem_dmabuf_get(device, offset, size, fd, access, 189 + &ib_umem_dmabuf_attach_pinned_ops); 190 + if (IS_ERR(umem_dmabuf)) 191 + return umem_dmabuf; 192 + 193 + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); 194 + err = dma_buf_pin(umem_dmabuf->attach); 195 + if (err) 196 + goto err_release; 197 + umem_dmabuf->pinned = 1; 198 + 199 + err = ib_umem_dmabuf_map_pages(umem_dmabuf); 200 + if (err) 201 + goto err_unpin; 202 + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); 203 + 204 + return umem_dmabuf; 205 + 206 + err_unpin: 207 + dma_buf_unpin(umem_dmabuf->attach); 208 + err_release: 209 + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); 210 + ib_umem_release(&umem_dmabuf->umem); 211 + return ERR_PTR(err); 212 + } 213 + EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned); 214 + 166 215 void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) 167 216 { 168 217 struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf; 169 218 170 219 dma_resv_lock(dmabuf->resv, NULL); 171 220 ib_umem_dmabuf_unmap_pages(umem_dmabuf); 221 + if (umem_dmabuf->pinned) 222 + dma_buf_unpin(umem_dmabuf->attach); 172 223 dma_resv_unlock(dmabuf->resv); 173 224 174 225 dma_buf_detach(dmabuf, umem_dmabuf->attach);
-3
drivers/infiniband/core/uverbs_cmd.c
··· 837 837 new_mr->device = new_pd->device; 838 838 new_mr->pd = new_pd; 839 839 new_mr->type = IB_MR_TYPE_USER; 840 - new_mr->dm = NULL; 841 - new_mr->sig_attrs = NULL; 842 840 new_mr->uobject = uobj; 843 841 atomic_inc(&new_pd->usecnt); 844 - new_mr->iova = cmd.hca_va; 845 842 new_uobj->object = new_mr; 846 843 847 844 rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR);
+49
drivers/infiniband/core/verbs.c
··· 2976 2976 return true; 2977 2977 } 2978 2978 EXPORT_SYMBOL(__rdma_block_iter_next); 2979 + 2980 + /** 2981 + * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct 2982 + * for the drivers. 2983 + * @descs: array of static descriptors 2984 + * @num_counters: number of elements in array 2985 + * @lifespan: milliseconds between updates 2986 + */ 2987 + struct rdma_hw_stats *rdma_alloc_hw_stats_struct( 2988 + const struct rdma_stat_desc *descs, int num_counters, 2989 + unsigned long lifespan) 2990 + { 2991 + struct rdma_hw_stats *stats; 2992 + 2993 + stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL); 2994 + if (!stats) 2995 + return NULL; 2996 + 2997 + stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters), 2998 + sizeof(*stats->is_disabled), GFP_KERNEL); 2999 + if (!stats->is_disabled) 3000 + goto err; 3001 + 3002 + stats->descs = descs; 3003 + stats->num_counters = num_counters; 3004 + stats->lifespan = msecs_to_jiffies(lifespan); 3005 + mutex_init(&stats->lock); 3006 + 3007 + return stats; 3008 + 3009 + err: 3010 + kfree(stats); 3011 + return NULL; 3012 + } 3013 + EXPORT_SYMBOL(rdma_alloc_hw_stats_struct); 3014 + 3015 + /** 3016 + * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats 3017 + * @stats: statistics to release 3018 + */ 3019 + void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats) 3020 + { 3021 + if (!stats) 3022 + return; 3023 + 3024 + kfree(stats->is_disabled); 3025 + kfree(stats); 3026 + } 3027 + EXPORT_SYMBOL(rdma_free_hw_stats_struct);
+6 -13
drivers/infiniband/hw/bnxt_re/bnxt_re.h
··· 39 39 40 40 #ifndef __BNXT_RE_H__ 41 41 #define __BNXT_RE_H__ 42 + #include "hw_counters.h" 42 43 #define ROCE_DRV_MODULE_NAME "bnxt_re" 43 44 44 45 #define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver" 45 - #define BNXT_RE_PAGE_SHIFT_4K (12) 46 - #define BNXT_RE_PAGE_SHIFT_8K (13) 47 - #define BNXT_RE_PAGE_SHIFT_64K (16) 48 - #define BNXT_RE_PAGE_SHIFT_2M (21) 49 - #define BNXT_RE_PAGE_SHIFT_8M (23) 50 - #define BNXT_RE_PAGE_SHIFT_1G (30) 51 46 52 - #define BNXT_RE_PAGE_SIZE_4K BIT(BNXT_RE_PAGE_SHIFT_4K) 53 - #define BNXT_RE_PAGE_SIZE_8K BIT(BNXT_RE_PAGE_SHIFT_8K) 54 - #define BNXT_RE_PAGE_SIZE_64K BIT(BNXT_RE_PAGE_SHIFT_64K) 55 - #define BNXT_RE_PAGE_SIZE_2M BIT(BNXT_RE_PAGE_SHIFT_2M) 56 - #define BNXT_RE_PAGE_SIZE_8M BIT(BNXT_RE_PAGE_SHIFT_8M) 57 - #define BNXT_RE_PAGE_SIZE_1G BIT(BNXT_RE_PAGE_SHIFT_1G) 47 + #define BNXT_RE_PAGE_SHIFT_1G (30) 48 + #define BNXT_RE_PAGE_SIZE_SUPPORTED 0x7FFFF000 /* 4kb - 1G */ 58 49 59 50 #define BNXT_RE_MAX_MR_SIZE_LOW BIT_ULL(BNXT_RE_PAGE_SHIFT_1G) 60 51 #define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39) ··· 168 177 atomic_t srq_count; 169 178 atomic_t mr_count; 170 179 atomic_t mw_count; 180 + atomic_t ah_count; 181 + atomic_t pd_count; 171 182 /* Max of 2 lossless traffic class supported per port */ 172 183 u16 cosq[2]; 173 184 174 185 /* QP for for handling QP1 packets */ 175 186 struct bnxt_re_gsi_context gsi_ctx; 187 + struct bnxt_re_stats stats; 176 188 atomic_t nq_alloc_cnt; 177 189 u32 is_virtfn; 178 190 u32 num_vfs; 179 - struct bnxt_qplib_roce_stats stats; 180 191 }; 181 192 182 193 #define to_bnxt_re_dev(ptr, member) \
+231 -149
drivers/infiniband/hw/bnxt_re/hw_counters.c
··· 57 57 #include "bnxt_re.h" 58 58 #include "hw_counters.h" 59 59 60 - static const char * const bnxt_re_stat_name[] = { 61 - [BNXT_RE_ACTIVE_QP] = "active_qps", 62 - [BNXT_RE_ACTIVE_SRQ] = "active_srqs", 63 - [BNXT_RE_ACTIVE_CQ] = "active_cqs", 64 - [BNXT_RE_ACTIVE_MR] = "active_mrs", 65 - [BNXT_RE_ACTIVE_MW] = "active_mws", 66 - [BNXT_RE_RX_PKTS] = "rx_pkts", 67 - [BNXT_RE_RX_BYTES] = "rx_bytes", 68 - [BNXT_RE_TX_PKTS] = "tx_pkts", 69 - [BNXT_RE_TX_BYTES] = "tx_bytes", 70 - [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors", 71 - [BNXT_RE_RX_DROPS] = "rx_roce_drops", 72 - [BNXT_RE_RX_DISCARDS] = "rx_roce_discards", 73 - [BNXT_RE_TO_RETRANSMITS] = "to_retransmits", 74 - [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd", 75 - [BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded", 76 - [BNXT_RE_RNR_NAKS_RCVD] = "rnr_naks_rcvd", 77 - [BNXT_RE_MISSING_RESP] = "missing_resp", 78 - [BNXT_RE_UNRECOVERABLE_ERR] = "unrecoverable_err", 79 - [BNXT_RE_BAD_RESP_ERR] = "bad_resp_err", 80 - [BNXT_RE_LOCAL_QP_OP_ERR] = "local_qp_op_err", 81 - [BNXT_RE_LOCAL_PROTECTION_ERR] = "local_protection_err", 82 - [BNXT_RE_MEM_MGMT_OP_ERR] = "mem_mgmt_op_err", 83 - [BNXT_RE_REMOTE_INVALID_REQ_ERR] = "remote_invalid_req_err", 84 - [BNXT_RE_REMOTE_ACCESS_ERR] = "remote_access_err", 85 - [BNXT_RE_REMOTE_OP_ERR] = "remote_op_err", 86 - [BNXT_RE_DUP_REQ] = "dup_req", 87 - [BNXT_RE_RES_EXCEED_MAX] = "res_exceed_max", 88 - [BNXT_RE_RES_LENGTH_MISMATCH] = "res_length_mismatch", 89 - [BNXT_RE_RES_EXCEEDS_WQE] = "res_exceeds_wqe", 90 - [BNXT_RE_RES_OPCODE_ERR] = "res_opcode_err", 91 - [BNXT_RE_RES_RX_INVALID_RKEY] = "res_rx_invalid_rkey", 92 - [BNXT_RE_RES_RX_DOMAIN_ERR] = "res_rx_domain_err", 93 - [BNXT_RE_RES_RX_NO_PERM] = "res_rx_no_perm", 94 - [BNXT_RE_RES_RX_RANGE_ERR] = "res_rx_range_err", 95 - [BNXT_RE_RES_TX_INVALID_RKEY] = "res_tx_invalid_rkey", 96 - [BNXT_RE_RES_TX_DOMAIN_ERR] = "res_tx_domain_err", 97 - [BNXT_RE_RES_TX_NO_PERM] = "res_tx_no_perm", 98 - [BNXT_RE_RES_TX_RANGE_ERR] = "res_tx_range_err", 99 - [BNXT_RE_RES_IRRQ_OFLOW] = "res_irrq_oflow", 100 - [BNXT_RE_RES_UNSUP_OPCODE] = "res_unsup_opcode", 101 - [BNXT_RE_RES_UNALIGNED_ATOMIC] = "res_unaligned_atomic", 102 - [BNXT_RE_RES_REM_INV_ERR] = "res_rem_inv_err", 103 - [BNXT_RE_RES_MEM_ERROR] = "res_mem_err", 104 - [BNXT_RE_RES_SRQ_ERR] = "res_srq_err", 105 - [BNXT_RE_RES_CMP_ERR] = "res_cmp_err", 106 - [BNXT_RE_RES_INVALID_DUP_RKEY] = "res_invalid_dup_rkey", 107 - [BNXT_RE_RES_WQE_FORMAT_ERR] = "res_wqe_format_err", 108 - [BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err", 109 - [BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err", 110 - [BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err", 111 - [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err", 112 - [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count" 60 + static const struct rdma_stat_desc bnxt_re_stat_descs[] = { 61 + [BNXT_RE_ACTIVE_PD].name = "active_pds", 62 + [BNXT_RE_ACTIVE_AH].name = "active_ahs", 63 + [BNXT_RE_ACTIVE_QP].name = "active_qps", 64 + [BNXT_RE_ACTIVE_SRQ].name = "active_srqs", 65 + [BNXT_RE_ACTIVE_CQ].name = "active_cqs", 66 + [BNXT_RE_ACTIVE_MR].name = "active_mrs", 67 + [BNXT_RE_ACTIVE_MW].name = "active_mws", 68 + [BNXT_RE_RX_PKTS].name = "rx_pkts", 69 + [BNXT_RE_RX_BYTES].name = "rx_bytes", 70 + [BNXT_RE_TX_PKTS].name = "tx_pkts", 71 + [BNXT_RE_TX_BYTES].name = "tx_bytes", 72 + [BNXT_RE_RECOVERABLE_ERRORS].name = "recoverable_errors", 73 + [BNXT_RE_RX_ERRORS].name = "rx_roce_errors", 74 + [BNXT_RE_RX_DISCARDS].name = "rx_roce_discards", 75 + [BNXT_RE_TO_RETRANSMITS].name = "to_retransmits", 76 + [BNXT_RE_SEQ_ERR_NAKS_RCVD].name = "seq_err_naks_rcvd", 77 + [BNXT_RE_MAX_RETRY_EXCEEDED].name = "max_retry_exceeded", 78 + [BNXT_RE_RNR_NAKS_RCVD].name = "rnr_naks_rcvd", 79 + [BNXT_RE_MISSING_RESP].name = "missing_resp", 80 + [BNXT_RE_UNRECOVERABLE_ERR].name = "unrecoverable_err", 81 + [BNXT_RE_BAD_RESP_ERR].name = "bad_resp_err", 82 + [BNXT_RE_LOCAL_QP_OP_ERR].name = "local_qp_op_err", 83 + [BNXT_RE_LOCAL_PROTECTION_ERR].name = "local_protection_err", 84 + [BNXT_RE_MEM_MGMT_OP_ERR].name = "mem_mgmt_op_err", 85 + [BNXT_RE_REMOTE_INVALID_REQ_ERR].name = "remote_invalid_req_err", 86 + [BNXT_RE_REMOTE_ACCESS_ERR].name = "remote_access_err", 87 + [BNXT_RE_REMOTE_OP_ERR].name = "remote_op_err", 88 + [BNXT_RE_DUP_REQ].name = "dup_req", 89 + [BNXT_RE_RES_EXCEED_MAX].name = "res_exceed_max", 90 + [BNXT_RE_RES_LENGTH_MISMATCH].name = "res_length_mismatch", 91 + [BNXT_RE_RES_EXCEEDS_WQE].name = "res_exceeds_wqe", 92 + [BNXT_RE_RES_OPCODE_ERR].name = "res_opcode_err", 93 + [BNXT_RE_RES_RX_INVALID_RKEY].name = "res_rx_invalid_rkey", 94 + [BNXT_RE_RES_RX_DOMAIN_ERR].name = "res_rx_domain_err", 95 + [BNXT_RE_RES_RX_NO_PERM].name = "res_rx_no_perm", 96 + [BNXT_RE_RES_RX_RANGE_ERR].name = "res_rx_range_err", 97 + [BNXT_RE_RES_TX_INVALID_RKEY].name = "res_tx_invalid_rkey", 98 + [BNXT_RE_RES_TX_DOMAIN_ERR].name = "res_tx_domain_err", 99 + [BNXT_RE_RES_TX_NO_PERM].name = "res_tx_no_perm", 100 + [BNXT_RE_RES_TX_RANGE_ERR].name = "res_tx_range_err", 101 + [BNXT_RE_RES_IRRQ_OFLOW].name = "res_irrq_oflow", 102 + [BNXT_RE_RES_UNSUP_OPCODE].name = "res_unsup_opcode", 103 + [BNXT_RE_RES_UNALIGNED_ATOMIC].name = "res_unaligned_atomic", 104 + [BNXT_RE_RES_REM_INV_ERR].name = "res_rem_inv_err", 105 + [BNXT_RE_RES_MEM_ERROR].name = "res_mem_err", 106 + [BNXT_RE_RES_SRQ_ERR].name = "res_srq_err", 107 + [BNXT_RE_RES_CMP_ERR].name = "res_cmp_err", 108 + [BNXT_RE_RES_INVALID_DUP_RKEY].name = "res_invalid_dup_rkey", 109 + [BNXT_RE_RES_WQE_FORMAT_ERR].name = "res_wqe_format_err", 110 + [BNXT_RE_RES_CQ_LOAD_ERR].name = "res_cq_load_err", 111 + [BNXT_RE_RES_SRQ_LOAD_ERR].name = "res_srq_load_err", 112 + [BNXT_RE_RES_TX_PCI_ERR].name = "res_tx_pci_err", 113 + [BNXT_RE_RES_RX_PCI_ERR].name = "res_rx_pci_err", 114 + [BNXT_RE_OUT_OF_SEQ_ERR].name = "oos_drop_count", 115 + [BNXT_RE_TX_ATOMIC_REQ].name = "tx_atomic_req", 116 + [BNXT_RE_TX_READ_REQ].name = "tx_read_req", 117 + [BNXT_RE_TX_READ_RES].name = "tx_read_resp", 118 + [BNXT_RE_TX_WRITE_REQ].name = "tx_write_req", 119 + [BNXT_RE_TX_SEND_REQ].name = "tx_send_req", 120 + [BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_req", 121 + [BNXT_RE_RX_READ_REQ].name = "rx_read_req", 122 + [BNXT_RE_RX_READ_RESP].name = "rx_read_resp", 123 + [BNXT_RE_RX_WRITE_REQ].name = "rx_write_req", 124 + [BNXT_RE_RX_SEND_REQ].name = "rx_send_req", 125 + [BNXT_RE_RX_ROCE_GOOD_PKTS].name = "rx_roce_good_pkts", 126 + [BNXT_RE_RX_ROCE_GOOD_BYTES].name = "rx_roce_good_bytes", 127 + [BNXT_RE_OOB].name = "rx_out_of_buffer" 113 128 }; 129 + 130 + static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev, 131 + struct rdma_hw_stats *stats, 132 + struct bnxt_qplib_ext_stat *s) 133 + { 134 + stats->value[BNXT_RE_TX_ATOMIC_REQ] = s->tx_atomic_req; 135 + stats->value[BNXT_RE_TX_READ_REQ] = s->tx_read_req; 136 + stats->value[BNXT_RE_TX_READ_RES] = s->tx_read_res; 137 + stats->value[BNXT_RE_TX_WRITE_REQ] = s->tx_write_req; 138 + stats->value[BNXT_RE_TX_SEND_REQ] = s->tx_send_req; 139 + stats->value[BNXT_RE_RX_ATOMIC_REQ] = s->rx_atomic_req; 140 + stats->value[BNXT_RE_RX_READ_REQ] = s->rx_read_req; 141 + stats->value[BNXT_RE_RX_READ_RESP] = s->rx_read_res; 142 + stats->value[BNXT_RE_RX_WRITE_REQ] = s->rx_write_req; 143 + stats->value[BNXT_RE_RX_SEND_REQ] = s->rx_send_req; 144 + stats->value[BNXT_RE_RX_ROCE_GOOD_PKTS] = s->rx_roce_good_pkts; 145 + stats->value[BNXT_RE_RX_ROCE_GOOD_BYTES] = s->rx_roce_good_bytes; 146 + stats->value[BNXT_RE_OOB] = s->rx_out_of_buffer; 147 + } 148 + 149 + static int bnxt_re_get_ext_stat(struct bnxt_re_dev *rdev, 150 + struct rdma_hw_stats *stats) 151 + { 152 + struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat; 153 + u32 fid; 154 + int rc; 155 + 156 + fid = PCI_FUNC(rdev->en_dev->pdev->devfn); 157 + rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat); 158 + if (rc) 159 + goto done; 160 + bnxt_re_copy_ext_stats(rdev, stats, estat); 161 + 162 + done: 163 + return rc; 164 + } 165 + 166 + static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev, 167 + struct rdma_hw_stats *stats, 168 + struct bnxt_qplib_roce_stats *err_s) 169 + { 170 + stats->value[BNXT_RE_TO_RETRANSMITS] = 171 + err_s->to_retransmits; 172 + stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] = 173 + err_s->seq_err_naks_rcvd; 174 + stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] = 175 + err_s->max_retry_exceeded; 176 + stats->value[BNXT_RE_RNR_NAKS_RCVD] = 177 + err_s->rnr_naks_rcvd; 178 + stats->value[BNXT_RE_MISSING_RESP] = 179 + err_s->missing_resp; 180 + stats->value[BNXT_RE_UNRECOVERABLE_ERR] = 181 + err_s->unrecoverable_err; 182 + stats->value[BNXT_RE_BAD_RESP_ERR] = 183 + err_s->bad_resp_err; 184 + stats->value[BNXT_RE_LOCAL_QP_OP_ERR] = 185 + err_s->local_qp_op_err; 186 + stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] = 187 + err_s->local_protection_err; 188 + stats->value[BNXT_RE_MEM_MGMT_OP_ERR] = 189 + err_s->mem_mgmt_op_err; 190 + stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] = 191 + err_s->remote_invalid_req_err; 192 + stats->value[BNXT_RE_REMOTE_ACCESS_ERR] = 193 + err_s->remote_access_err; 194 + stats->value[BNXT_RE_REMOTE_OP_ERR] = 195 + err_s->remote_op_err; 196 + stats->value[BNXT_RE_DUP_REQ] = 197 + err_s->dup_req; 198 + stats->value[BNXT_RE_RES_EXCEED_MAX] = 199 + err_s->res_exceed_max; 200 + stats->value[BNXT_RE_RES_LENGTH_MISMATCH] = 201 + err_s->res_length_mismatch; 202 + stats->value[BNXT_RE_RES_EXCEEDS_WQE] = 203 + err_s->res_exceeds_wqe; 204 + stats->value[BNXT_RE_RES_OPCODE_ERR] = 205 + err_s->res_opcode_err; 206 + stats->value[BNXT_RE_RES_RX_INVALID_RKEY] = 207 + err_s->res_rx_invalid_rkey; 208 + stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] = 209 + err_s->res_rx_domain_err; 210 + stats->value[BNXT_RE_RES_RX_NO_PERM] = 211 + err_s->res_rx_no_perm; 212 + stats->value[BNXT_RE_RES_RX_RANGE_ERR] = 213 + err_s->res_rx_range_err; 214 + stats->value[BNXT_RE_RES_TX_INVALID_RKEY] = 215 + err_s->res_tx_invalid_rkey; 216 + stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] = 217 + err_s->res_tx_domain_err; 218 + stats->value[BNXT_RE_RES_TX_NO_PERM] = 219 + err_s->res_tx_no_perm; 220 + stats->value[BNXT_RE_RES_TX_RANGE_ERR] = 221 + err_s->res_tx_range_err; 222 + stats->value[BNXT_RE_RES_IRRQ_OFLOW] = 223 + err_s->res_irrq_oflow; 224 + stats->value[BNXT_RE_RES_UNSUP_OPCODE] = 225 + err_s->res_unsup_opcode; 226 + stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] = 227 + err_s->res_unaligned_atomic; 228 + stats->value[BNXT_RE_RES_REM_INV_ERR] = 229 + err_s->res_rem_inv_err; 230 + stats->value[BNXT_RE_RES_MEM_ERROR] = 231 + err_s->res_mem_error; 232 + stats->value[BNXT_RE_RES_SRQ_ERR] = 233 + err_s->res_srq_err; 234 + stats->value[BNXT_RE_RES_CMP_ERR] = 235 + err_s->res_cmp_err; 236 + stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] = 237 + err_s->res_invalid_dup_rkey; 238 + stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] = 239 + err_s->res_wqe_format_err; 240 + stats->value[BNXT_RE_RES_CQ_LOAD_ERR] = 241 + err_s->res_cq_load_err; 242 + stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] = 243 + err_s->res_srq_load_err; 244 + stats->value[BNXT_RE_RES_TX_PCI_ERR] = 245 + err_s->res_tx_pci_err; 246 + stats->value[BNXT_RE_RES_RX_PCI_ERR] = 247 + err_s->res_rx_pci_err; 248 + stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = 249 + err_s->res_oos_drop_count; 250 + } 114 251 115 252 int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, 116 253 struct rdma_hw_stats *stats, 117 254 u32 port, int index) 118 255 { 119 256 struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); 120 - struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma; 257 + struct ctx_hw_stats *hw_stats = NULL; 258 + struct bnxt_qplib_roce_stats *err_s = NULL; 121 259 int rc = 0; 122 260 261 + hw_stats = rdev->qplib_ctx.stats.dma; 123 262 if (!port || !stats) 124 263 return -EINVAL; 125 264 ··· 267 128 stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&rdev->cq_count); 268 129 stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&rdev->mr_count); 269 130 stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&rdev->mw_count); 270 - if (bnxt_re_stats) { 131 + stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&rdev->pd_count); 132 + stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&rdev->ah_count); 133 + 134 + if (hw_stats) { 271 135 stats->value[BNXT_RE_RECOVERABLE_ERRORS] = 272 - le64_to_cpu(bnxt_re_stats->tx_bcast_pkts); 273 - stats->value[BNXT_RE_RX_DROPS] = 274 - le64_to_cpu(bnxt_re_stats->rx_error_pkts); 136 + le64_to_cpu(hw_stats->tx_bcast_pkts); 137 + stats->value[BNXT_RE_RX_ERRORS] = 138 + le64_to_cpu(hw_stats->rx_error_pkts); 275 139 stats->value[BNXT_RE_RX_DISCARDS] = 276 - le64_to_cpu(bnxt_re_stats->rx_discard_pkts); 140 + le64_to_cpu(hw_stats->rx_discard_pkts); 277 141 stats->value[BNXT_RE_RX_PKTS] = 278 - le64_to_cpu(bnxt_re_stats->rx_ucast_pkts); 142 + le64_to_cpu(hw_stats->rx_ucast_pkts); 279 143 stats->value[BNXT_RE_RX_BYTES] = 280 - le64_to_cpu(bnxt_re_stats->rx_ucast_bytes); 144 + le64_to_cpu(hw_stats->rx_ucast_bytes); 281 145 stats->value[BNXT_RE_TX_PKTS] = 282 - le64_to_cpu(bnxt_re_stats->tx_ucast_pkts); 146 + le64_to_cpu(hw_stats->tx_ucast_pkts); 283 147 stats->value[BNXT_RE_TX_BYTES] = 284 - le64_to_cpu(bnxt_re_stats->tx_ucast_bytes); 148 + le64_to_cpu(hw_stats->tx_ucast_bytes); 285 149 } 150 + err_s = &rdev->stats.rstat.errs; 286 151 if (test_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags)) { 287 - rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, &rdev->stats); 288 - if (rc) 152 + rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, err_s); 153 + if (rc) { 289 154 clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, 290 155 &rdev->flags); 291 - stats->value[BNXT_RE_TO_RETRANSMITS] = 292 - rdev->stats.to_retransmits; 293 - stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] = 294 - rdev->stats.seq_err_naks_rcvd; 295 - stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] = 296 - rdev->stats.max_retry_exceeded; 297 - stats->value[BNXT_RE_RNR_NAKS_RCVD] = 298 - rdev->stats.rnr_naks_rcvd; 299 - stats->value[BNXT_RE_MISSING_RESP] = 300 - rdev->stats.missing_resp; 301 - stats->value[BNXT_RE_UNRECOVERABLE_ERR] = 302 - rdev->stats.unrecoverable_err; 303 - stats->value[BNXT_RE_BAD_RESP_ERR] = 304 - rdev->stats.bad_resp_err; 305 - stats->value[BNXT_RE_LOCAL_QP_OP_ERR] = 306 - rdev->stats.local_qp_op_err; 307 - stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] = 308 - rdev->stats.local_protection_err; 309 - stats->value[BNXT_RE_MEM_MGMT_OP_ERR] = 310 - rdev->stats.mem_mgmt_op_err; 311 - stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] = 312 - rdev->stats.remote_invalid_req_err; 313 - stats->value[BNXT_RE_REMOTE_ACCESS_ERR] = 314 - rdev->stats.remote_access_err; 315 - stats->value[BNXT_RE_REMOTE_OP_ERR] = 316 - rdev->stats.remote_op_err; 317 - stats->value[BNXT_RE_DUP_REQ] = 318 - rdev->stats.dup_req; 319 - stats->value[BNXT_RE_RES_EXCEED_MAX] = 320 - rdev->stats.res_exceed_max; 321 - stats->value[BNXT_RE_RES_LENGTH_MISMATCH] = 322 - rdev->stats.res_length_mismatch; 323 - stats->value[BNXT_RE_RES_EXCEEDS_WQE] = 324 - rdev->stats.res_exceeds_wqe; 325 - stats->value[BNXT_RE_RES_OPCODE_ERR] = 326 - rdev->stats.res_opcode_err; 327 - stats->value[BNXT_RE_RES_RX_INVALID_RKEY] = 328 - rdev->stats.res_rx_invalid_rkey; 329 - stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] = 330 - rdev->stats.res_rx_domain_err; 331 - stats->value[BNXT_RE_RES_RX_NO_PERM] = 332 - rdev->stats.res_rx_no_perm; 333 - stats->value[BNXT_RE_RES_RX_RANGE_ERR] = 334 - rdev->stats.res_rx_range_err; 335 - stats->value[BNXT_RE_RES_TX_INVALID_RKEY] = 336 - rdev->stats.res_tx_invalid_rkey; 337 - stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] = 338 - rdev->stats.res_tx_domain_err; 339 - stats->value[BNXT_RE_RES_TX_NO_PERM] = 340 - rdev->stats.res_tx_no_perm; 341 - stats->value[BNXT_RE_RES_TX_RANGE_ERR] = 342 - rdev->stats.res_tx_range_err; 343 - stats->value[BNXT_RE_RES_IRRQ_OFLOW] = 344 - rdev->stats.res_irrq_oflow; 345 - stats->value[BNXT_RE_RES_UNSUP_OPCODE] = 346 - rdev->stats.res_unsup_opcode; 347 - stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] = 348 - rdev->stats.res_unaligned_atomic; 349 - stats->value[BNXT_RE_RES_REM_INV_ERR] = 350 - rdev->stats.res_rem_inv_err; 351 - stats->value[BNXT_RE_RES_MEM_ERROR] = 352 - rdev->stats.res_mem_error; 353 - stats->value[BNXT_RE_RES_SRQ_ERR] = 354 - rdev->stats.res_srq_err; 355 - stats->value[BNXT_RE_RES_CMP_ERR] = 356 - rdev->stats.res_cmp_err; 357 - stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] = 358 - rdev->stats.res_invalid_dup_rkey; 359 - stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] = 360 - rdev->stats.res_wqe_format_err; 361 - stats->value[BNXT_RE_RES_CQ_LOAD_ERR] = 362 - rdev->stats.res_cq_load_err; 363 - stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] = 364 - rdev->stats.res_srq_load_err; 365 - stats->value[BNXT_RE_RES_TX_PCI_ERR] = 366 - rdev->stats.res_tx_pci_err; 367 - stats->value[BNXT_RE_RES_RX_PCI_ERR] = 368 - rdev->stats.res_rx_pci_err; 369 - stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = 370 - rdev->stats.res_oos_drop_count; 156 + goto done; 157 + } 158 + if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) && 159 + !rdev->is_virtfn) { 160 + rc = bnxt_re_get_ext_stat(rdev, stats); 161 + if (rc) { 162 + clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, 163 + &rdev->flags); 164 + goto done; 165 + } 166 + } 167 + bnxt_re_copy_err_stats(rdev, stats, err_s); 371 168 } 372 169 373 - return ARRAY_SIZE(bnxt_re_stat_name); 170 + done: 171 + return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? 172 + BNXT_RE_NUM_EXT_COUNTERS : BNXT_RE_NUM_STD_COUNTERS; 374 173 } 375 174 376 175 struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, 377 176 u32 port_num) 378 177 { 379 - BUILD_BUG_ON(ARRAY_SIZE(bnxt_re_stat_name) != BNXT_RE_NUM_COUNTERS); 178 + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); 179 + int num_counters = 0; 380 180 381 - return rdma_alloc_hw_stats_struct(bnxt_re_stat_name, 382 - ARRAY_SIZE(bnxt_re_stat_name), 181 + if (bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) 182 + num_counters = BNXT_RE_NUM_EXT_COUNTERS; 183 + else 184 + num_counters = BNXT_RE_NUM_STD_COUNTERS; 185 + 186 + return rdma_alloc_hw_stats_struct(bnxt_re_stat_descs, num_counters, 383 187 RDMA_HW_STATS_DEFAULT_LIFESPAN); 384 188 }
+28 -2
drivers/infiniband/hw/bnxt_re/hw_counters.h
··· 41 41 #define __BNXT_RE_HW_STATS_H__ 42 42 43 43 enum bnxt_re_hw_stats { 44 + BNXT_RE_ACTIVE_PD, 45 + BNXT_RE_ACTIVE_AH, 44 46 BNXT_RE_ACTIVE_QP, 45 47 BNXT_RE_ACTIVE_SRQ, 46 48 BNXT_RE_ACTIVE_CQ, ··· 53 51 BNXT_RE_TX_PKTS, 54 52 BNXT_RE_TX_BYTES, 55 53 BNXT_RE_RECOVERABLE_ERRORS, 56 - BNXT_RE_RX_DROPS, 54 + BNXT_RE_RX_ERRORS, 57 55 BNXT_RE_RX_DISCARDS, 58 56 BNXT_RE_TO_RETRANSMITS, 59 57 BNXT_RE_SEQ_ERR_NAKS_RCVD, ··· 95 93 BNXT_RE_RES_TX_PCI_ERR, 96 94 BNXT_RE_RES_RX_PCI_ERR, 97 95 BNXT_RE_OUT_OF_SEQ_ERR, 98 - BNXT_RE_NUM_COUNTERS 96 + BNXT_RE_TX_ATOMIC_REQ, 97 + BNXT_RE_TX_READ_REQ, 98 + BNXT_RE_TX_READ_RES, 99 + BNXT_RE_TX_WRITE_REQ, 100 + BNXT_RE_TX_SEND_REQ, 101 + BNXT_RE_RX_ATOMIC_REQ, 102 + BNXT_RE_RX_READ_REQ, 103 + BNXT_RE_RX_READ_RESP, 104 + BNXT_RE_RX_WRITE_REQ, 105 + BNXT_RE_RX_SEND_REQ, 106 + BNXT_RE_RX_ROCE_GOOD_PKTS, 107 + BNXT_RE_RX_ROCE_GOOD_BYTES, 108 + BNXT_RE_OOB, 109 + BNXT_RE_NUM_EXT_COUNTERS 110 + }; 111 + 112 + #define BNXT_RE_NUM_STD_COUNTERS (BNXT_RE_OUT_OF_SEQ_ERR + 1) 113 + 114 + struct bnxt_re_rstat { 115 + struct bnxt_qplib_roce_stats errs; 116 + struct bnxt_qplib_ext_stat ext_stat; 117 + }; 118 + 119 + struct bnxt_re_stats { 120 + struct bnxt_re_rstat rstat; 99 121 }; 100 122 101 123 struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+29 -16
drivers/infiniband/hw/bnxt_re/ib_verbs.c
··· 41 41 #include <linux/pci.h> 42 42 #include <linux/netdevice.h> 43 43 #include <linux/if_ether.h> 44 + #include <net/addrconf.h> 44 45 45 46 #include <rdma/ib_verbs.h> 46 47 #include <rdma/ib_user_verbs.h> ··· 131 130 memcpy(&ib_attr->fw_ver, dev_attr->fw_ver, 132 131 min(sizeof(dev_attr->fw_ver), 133 132 sizeof(ib_attr->fw_ver))); 134 - bnxt_qplib_get_guid(rdev->netdev->dev_addr, 135 - (u8 *)&ib_attr->sys_image_guid); 133 + addrconf_addr_eui48((u8 *)&ib_attr->sys_image_guid, 134 + rdev->netdev->dev_addr); 136 135 ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE; 137 - ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M; 136 + ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_SUPPORTED; 138 137 139 138 ib_attr->vendor_id = rdev->en_dev->pdev->vendor; 140 139 ib_attr->vendor_part_id = rdev->en_dev->pdev->device; ··· 542 541 543 542 bnxt_re_destroy_fence_mr(pd); 544 543 545 - if (pd->qplib_pd.id) 546 - bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, 547 - &pd->qplib_pd); 544 + if (pd->qplib_pd.id) { 545 + if (!bnxt_qplib_dealloc_pd(&rdev->qplib_res, 546 + &rdev->qplib_res.pd_tbl, 547 + &pd->qplib_pd)) 548 + atomic_dec(&rdev->pd_count); 549 + } 548 550 return 0; 549 551 } 550 552 ··· 599 595 if (bnxt_re_create_fence_mr(pd)) 600 596 ibdev_warn(&rdev->ibdev, 601 597 "Failed to create Fence-MR\n"); 598 + atomic_inc(&rdev->pd_count); 599 + 602 600 return 0; 603 601 dbfail: 604 602 bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, ··· 617 611 618 612 bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, 619 613 !(flags & RDMA_DESTROY_AH_SLEEPABLE)); 614 + atomic_dec(&rdev->ah_count); 615 + 620 616 return 0; 621 617 } 622 618 ··· 703 695 wmb(); /* make sure cache is updated. */ 704 696 spin_unlock_irqrestore(&uctx->sh_lock, flag); 705 697 } 698 + atomic_inc(&rdev->ah_count); 706 699 707 - return 0; 708 - } 709 - 710 - int bnxt_re_modify_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) 711 - { 712 700 return 0; 713 701 } 714 702 ··· 764 760 bnxt_qplib_destroy_ah(&rdev->qplib_res, 765 761 &gsi_sah->qplib_ah, 766 762 true); 763 + atomic_dec(&rdev->ah_count); 767 764 bnxt_qplib_clean_qp(&qp->qplib_qp); 768 765 769 766 ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n"); ··· 1011 1006 "Failed to allocate HW AH for Shadow QP"); 1012 1007 goto fail; 1013 1008 } 1009 + atomic_inc(&rdev->ah_count); 1014 1010 1015 1011 return ah; 1016 1012 ··· 2484 2478 2485 2479 wqe->frmr.l_key = wr->key; 2486 2480 wqe->frmr.length = wr->mr->length; 2487 - wqe->frmr.pbl_pg_sz_log = (wr->mr->page_size >> PAGE_SHIFT_4K) - 1; 2481 + wqe->frmr.pbl_pg_sz_log = ilog2(PAGE_SIZE >> PAGE_SHIFT_4K); 2482 + wqe->frmr.pg_sz_log = ilog2(wr->mr->page_size >> PAGE_SHIFT_4K); 2488 2483 wqe->frmr.va = wr->mr->iova; 2489 2484 return 0; 2490 2485 } ··· 3361 3354 struct ib_wc *wc, 3362 3355 struct bnxt_qplib_cqe *cqe) 3363 3356 { 3357 + struct bnxt_re_dev *rdev; 3358 + u16 vlan_id = 0; 3364 3359 u8 nw_type; 3365 3360 3361 + rdev = qp->rdev; 3366 3362 wc->opcode = IB_WC_RECV; 3367 3363 wc->status = __rc_to_ib_wc_status(cqe->status); 3368 3364 ··· 3377 3367 memcpy(wc->smac, cqe->smac, ETH_ALEN); 3378 3368 wc->wc_flags |= IB_WC_WITH_SMAC; 3379 3369 if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) { 3380 - wc->vlan_id = (cqe->cfa_meta & 0xFFF); 3381 - if (wc->vlan_id < 0x1000) 3382 - wc->wc_flags |= IB_WC_WITH_VLAN; 3370 + vlan_id = (cqe->cfa_meta & 0xFFF); 3371 + } 3372 + /* Mark only if vlan_id is non zero */ 3373 + if (vlan_id && bnxt_re_check_if_vlan_valid(rdev, vlan_id)) { 3374 + wc->vlan_id = vlan_id; 3375 + wc->wc_flags |= IB_WC_WITH_VLAN; 3383 3376 } 3384 3377 nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >> 3385 3378 CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT; ··· 3811 3798 3812 3799 mr->qplib_mr.va = virt_addr; 3813 3800 page_size = ib_umem_find_best_pgsz( 3814 - umem, BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, virt_addr); 3801 + umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr); 3815 3802 if (!page_size) { 3816 3803 ibdev_err(&rdev->ibdev, "umem page size unsupported!"); 3817 3804 rc = -EFAULT;
-1
drivers/infiniband/hw/bnxt_re/ib_verbs.h
··· 166 166 int bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); 167 167 int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, 168 168 struct ib_udata *udata); 169 - int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); 170 169 int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); 171 170 int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); 172 171 int bnxt_re_create_srq(struct ib_srq *srq,
+10 -6
drivers/infiniband/hw/bnxt_re/main.c
··· 127 127 128 128 rdev->qplib_res.cctx = rdev->chip_ctx; 129 129 rdev->rcfw.res = &rdev->qplib_res; 130 + rdev->qplib_res.dattr = &rdev->dev_attr; 131 + rdev->qplib_res.is_vf = BNXT_VF(bp); 130 132 131 133 bnxt_re_set_drv_mode(rdev, wqe_mode); 132 134 if (bnxt_qplib_determine_atomics(en_dev->pdev)) ··· 525 523 u32 fw_stats_ctx_id) 526 524 { 527 525 struct bnxt_en_dev *en_dev = rdev->en_dev; 528 - struct hwrm_stat_ctx_free_input req = {0}; 526 + struct hwrm_stat_ctx_free_input req = {}; 527 + struct hwrm_stat_ctx_free_output resp = {}; 529 528 struct bnxt_fw_msg fw_msg; 530 529 int rc = -EINVAL; 531 530 ··· 540 537 541 538 bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1); 542 539 req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id); 543 - bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&req, 544 - sizeof(req), DFLT_HWRM_CMD_TIMEOUT); 540 + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, 541 + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); 545 542 rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); 546 543 if (rc) 547 544 ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x", ··· 696 693 .get_port_immutable = bnxt_re_get_port_immutable, 697 694 .map_mr_sg = bnxt_re_map_mr_sg, 698 695 .mmap = bnxt_re_mmap, 699 - .modify_ah = bnxt_re_modify_ah, 700 696 .modify_qp = bnxt_re_modify_qp, 701 697 .modify_srq = bnxt_re_modify_srq, 702 698 .poll_cq = bnxt_re_poll_cq, ··· 729 727 strlen(BNXT_RE_DESC) + 5); 730 728 ibdev->phys_port_cnt = 1; 731 729 732 - bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid); 730 + addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr); 733 731 734 732 ibdev->num_comp_vectors = rdev->num_msix - 1; 735 733 ibdev->dev.parent = &rdev->en_dev->pdev->dev; ··· 779 777 atomic_set(&rdev->srq_count, 0); 780 778 atomic_set(&rdev->mr_count, 0); 781 779 atomic_set(&rdev->mw_count, 0); 780 + atomic_set(&rdev->ah_count, 0); 781 + atomic_set(&rdev->pd_count, 0); 782 782 rdev->cosq[0] = 0xFFFF; 783 783 rdev->cosq[1] = 0xFFFF; 784 784 ··· 1729 1725 } 1730 1726 if (sch_work) { 1731 1727 /* Allocate for the deferred task */ 1732 - re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC); 1728 + re_work = kzalloc(sizeof(*re_work), GFP_KERNEL); 1733 1729 if (re_work) { 1734 1730 get_device(&rdev->ibdev.dev); 1735 1731 re_work->rdev = rdev;
+11 -4
drivers/infiniband/hw/bnxt_re/qplib_fp.c
··· 707 707 int rc = 0; 708 708 709 709 RCFW_CMD_PREP(req, QUERY_SRQ, cmd_flags); 710 - req.srq_cid = cpu_to_le32(srq->id); 711 710 712 711 /* Configure the request */ 713 712 sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); 714 713 if (!sbuf) 715 714 return -ENOMEM; 715 + req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; 716 + req.srq_cid = cpu_to_le32(srq->id); 716 717 sb = sbuf->sb; 717 718 rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, 718 719 (void *)sbuf, 0); ··· 1050 1049 qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; 1051 1050 if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) 1052 1051 qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED; 1052 + if (_is_ext_stats_supported(res->dattr->dev_cap_flags) && !res->is_vf) 1053 + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED; 1054 + 1053 1055 req.qp_flags = cpu_to_le32(qp_flags); 1054 1056 1055 1057 /* ORRQ and IRRQ */ ··· 2855 2851 struct cq_base *hw_cqe; 2856 2852 u32 sw_cons, raw_cons; 2857 2853 int budget, rc = 0; 2854 + u8 type; 2858 2855 2859 2856 raw_cons = cq->hwq.cons; 2860 2857 budget = num_cqes; ··· 2874 2869 */ 2875 2870 dma_rmb(); 2876 2871 /* From the device's respective CQE format to qplib_wc*/ 2877 - switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) { 2872 + type = hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; 2873 + switch (type) { 2878 2874 case CQ_BASE_CQE_TYPE_REQ: 2879 2875 rc = bnxt_qplib_cq_process_req(cq, 2880 2876 (struct cq_req *)hw_cqe, ··· 2922 2916 /* Error while processing the CQE, just skip to the 2923 2917 * next one 2924 2918 */ 2925 - dev_err(&cq->hwq.pdev->dev, 2926 - "process_cqe error rc = 0x%x\n", rc); 2919 + if (type != CQ_BASE_CQE_TYPE_TERMINAL) 2920 + dev_err(&cq->hwq.pdev->dev, 2921 + "process_cqe error rc = 0x%x\n", rc); 2927 2922 } 2928 2923 raw_cons++; 2929 2924 }
+3 -3
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
··· 78 78 if (!test_bit(cbit, cmdq->cmdq_bitmap)) 79 79 goto done; 80 80 do { 81 - mdelay(1); /* 1m sec */ 81 + udelay(1); 82 82 bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet); 83 83 } while (test_bit(cbit, cmdq->cmdq_bitmap) && --count); 84 84 done: ··· 848 848 { 849 849 struct bnxt_qplib_rcfw_sbuf *sbuf; 850 850 851 - sbuf = kzalloc(sizeof(*sbuf), GFP_ATOMIC); 851 + sbuf = kzalloc(sizeof(*sbuf), GFP_KERNEL); 852 852 if (!sbuf) 853 853 return NULL; 854 854 855 855 sbuf->size = size; 856 856 sbuf->sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf->size, 857 - &sbuf->dma_addr, GFP_ATOMIC); 857 + &sbuf->dma_addr, GFP_KERNEL); 858 858 if (!sbuf->sb) 859 859 goto bail; 860 860
+1 -1
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
··· 96 96 97 97 #define RCFW_MAX_COOKIE_VALUE 0x7FFF 98 98 #define RCFW_CMD_IS_BLOCKING 0x8000 99 - #define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20 99 + #define RCFW_BLOCKED_CMD_WAIT_COUNT 20000000UL /* 20 sec */ 100 100 101 101 #define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL 102 102
+3 -19
drivers/infiniband/hw/bnxt_re/qplib_res.c
··· 228 228 npages++; 229 229 } 230 230 231 - if (npages == MAX_PBL_LVL_0_PGS) { 231 + if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) { 232 232 /* This request is Level 0, map PTE */ 233 233 rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], hwq_attr->sginfo); 234 234 if (rc) 235 235 goto fail; 236 236 hwq->level = PBL_LVL_0; 237 + goto done; 237 238 } 238 239 239 - if (npages > MAX_PBL_LVL_0_PGS) { 240 + if (npages >= MAX_PBL_LVL_0_PGS) { 240 241 if (npages > MAX_PBL_LVL_1_PGS) { 241 242 u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ? 242 243 0 : PTU_PTE_VALID; ··· 570 569 fail: 571 570 bnxt_qplib_free_ctx(res, ctx); 572 571 return rc; 573 - } 574 - 575 - /* GUID */ 576 - void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid) 577 - { 578 - u8 mac[ETH_ALEN]; 579 - 580 - /* MAC-48 to EUI-64 mapping */ 581 - memcpy(mac, dev_addr, ETH_ALEN); 582 - guid[0] = mac[0] ^ 2; 583 - guid[1] = mac[1]; 584 - guid[2] = mac[2]; 585 - guid[3] = 0xff; 586 - guid[4] = 0xfe; 587 - guid[5] = mac[3]; 588 - guid[6] = mac[4]; 589 - guid[7] = mac[5]; 590 572 } 591 573 592 574 static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res,
+8 -2
drivers/infiniband/hw/bnxt_re/qplib_res.h
··· 253 253 struct bnxt_qplib_res { 254 254 struct pci_dev *pdev; 255 255 struct bnxt_qplib_chip_ctx *cctx; 256 + struct bnxt_qplib_dev_attr *dattr; 256 257 struct net_device *netdev; 257 - 258 258 struct bnxt_qplib_rcfw *rcfw; 259 259 struct bnxt_qplib_pd_tbl pd_tbl; 260 260 struct bnxt_qplib_sgid_tbl sgid_tbl; 261 261 struct bnxt_qplib_pkey_tbl pkey_tbl; 262 262 struct bnxt_qplib_dpi_tbl dpi_tbl; 263 263 bool prio; 264 + bool is_vf; 264 265 }; 265 266 266 267 static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) ··· 346 345 struct bnxt_qplib_hwq *hwq); 347 346 int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, 348 347 struct bnxt_qplib_hwq_attr *hwq_attr); 349 - void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid); 350 348 int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl, 351 349 struct bnxt_qplib_pd *pd); 352 350 int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res, ··· 449 449 bnxt_qplib_ring_db(info, type); 450 450 else 451 451 bnxt_qplib_ring_db32(info, arm); 452 + } 453 + 454 + static inline bool _is_ext_stats_supported(u16 dev_cap_flags) 455 + { 456 + return dev_cap_flags & 457 + CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; 452 458 } 453 459 #endif /* __BNXT_QPLIB_RES_H__ */
+54 -3
drivers/infiniband/hw/bnxt_re/qplib_sp.c
··· 161 161 attr->l2_db_size = (sb->l2_db_space_size + 1) * 162 162 (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); 163 163 attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED; 164 + attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); 164 165 165 166 bnxt_qplib_query_version(rcfw, attr->fw_ver); 166 167 ··· 287 286 } 288 287 289 288 int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, 290 - struct bnxt_qplib_gid *gid, u8 *smac, u16 vlan_id, 291 - bool update, u32 *index) 289 + struct bnxt_qplib_gid *gid, const u8 *smac, 290 + u16 vlan_id, bool update, u32 *index) 292 291 { 293 292 struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl, 294 293 struct bnxt_qplib_res, ··· 379 378 380 379 int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, 381 380 struct bnxt_qplib_gid *gid, u16 gid_idx, 382 - u8 *smac) 381 + const u8 *smac) 383 382 { 384 383 struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl, 385 384 struct bnxt_qplib_res, ··· 865 864 rcfw->oos_prev) & BNXT_QPLIB_OOS_COUNT_MASK; 866 865 rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count); 867 866 } 867 + 868 + bail: 869 + bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); 870 + return rc; 871 + } 872 + 873 + int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid, 874 + struct bnxt_qplib_ext_stat *estat) 875 + { 876 + struct creq_query_roce_stats_ext_resp resp = {}; 877 + struct creq_query_roce_stats_ext_resp_sb *sb; 878 + struct cmdq_query_roce_stats_ext req = {}; 879 + struct bnxt_qplib_rcfw_sbuf *sbuf; 880 + u16 cmd_flags = 0; 881 + int rc; 882 + 883 + sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); 884 + if (!sbuf) { 885 + dev_err(&rcfw->pdev->dev, 886 + "SP: QUERY_ROCE_STATS_EXT alloc sb failed"); 887 + return -ENOMEM; 888 + } 889 + 890 + RCFW_CMD_PREP(req, QUERY_ROCE_STATS_EXT, cmd_flags); 891 + 892 + req.resp_size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); 893 + req.resp_addr = cpu_to_le64(sbuf->dma_addr); 894 + req.function_id = cpu_to_le32(fid); 895 + req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID); 896 + 897 + rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, 898 + (void *)&resp, (void *)sbuf, 0); 899 + if (rc) 900 + goto bail; 901 + 902 + sb = sbuf->sb; 903 + estat->tx_atomic_req = le64_to_cpu(sb->tx_atomic_req_pkts); 904 + estat->tx_read_req = le64_to_cpu(sb->tx_read_req_pkts); 905 + estat->tx_read_res = le64_to_cpu(sb->tx_read_res_pkts); 906 + estat->tx_write_req = le64_to_cpu(sb->tx_write_req_pkts); 907 + estat->tx_send_req = le64_to_cpu(sb->tx_send_req_pkts); 908 + estat->rx_atomic_req = le64_to_cpu(sb->rx_atomic_req_pkts); 909 + estat->rx_read_req = le64_to_cpu(sb->rx_read_req_pkts); 910 + estat->rx_read_res = le64_to_cpu(sb->rx_read_res_pkts); 911 + estat->rx_write_req = le64_to_cpu(sb->rx_write_req_pkts); 912 + estat->rx_send_req = le64_to_cpu(sb->rx_send_req_pkts); 913 + estat->rx_roce_good_pkts = le64_to_cpu(sb->rx_roce_good_pkts); 914 + estat->rx_roce_good_bytes = le64_to_cpu(sb->rx_roce_good_bytes); 915 + estat->rx_out_of_buffer = le64_to_cpu(sb->rx_out_of_buffer_pkts); 916 + estat->rx_out_of_sequence = le64_to_cpu(sb->rx_out_of_sequence_pkts); 868 917 869 918 bail: 870 919 bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+31 -2
drivers/infiniband/hw/bnxt_re/qplib_sp.h
··· 71 71 u32 l2_db_size; 72 72 u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ]; 73 73 bool is_atomic; 74 + u16 dev_cap_flags; 74 75 }; 75 76 76 77 struct bnxt_qplib_pd { ··· 220 219 /* port 3 active qps */ 221 220 }; 222 221 222 + struct bnxt_qplib_ext_stat { 223 + u64 tx_atomic_req; 224 + u64 tx_read_req; 225 + u64 tx_read_res; 226 + u64 tx_write_req; 227 + u64 tx_send_req; 228 + u64 tx_roce_pkts; 229 + u64 tx_roce_bytes; 230 + u64 rx_atomic_req; 231 + u64 rx_read_req; 232 + u64 rx_read_res; 233 + u64 rx_write_req; 234 + u64 rx_send_req; 235 + u64 rx_roce_pkts; 236 + u64 rx_roce_bytes; 237 + u64 rx_roce_good_pkts; 238 + u64 rx_roce_good_bytes; 239 + u64 rx_out_of_buffer; 240 + u64 rx_out_of_sequence; 241 + u64 tx_cnp; 242 + u64 rx_cnp; 243 + u64 rx_ecn_marked; 244 + }; 245 + 223 246 int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, 224 247 struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, 225 248 struct bnxt_qplib_gid *gid); 226 249 int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, 227 250 struct bnxt_qplib_gid *gid, u16 vlan_id, bool update); 228 251 int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, 229 - struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id, 252 + struct bnxt_qplib_gid *gid, const u8 *mac, u16 vlan_id, 230 253 bool update, u32 *index); 231 254 int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, 232 - struct bnxt_qplib_gid *gid, u16 gid_idx, u8 *smac); 255 + struct bnxt_qplib_gid *gid, u16 gid_idx, 256 + const u8 *smac); 233 257 int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, 234 258 struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index, 235 259 u16 *pkey); ··· 289 263 int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids); 290 264 int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw, 291 265 struct bnxt_qplib_roce_stats *stats); 266 + int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid, 267 + struct bnxt_qplib_ext_stat *estat); 268 + 292 269 #endif /* __BNXT_QPLIB_SP_H__*/
+85
drivers/infiniband/hw/bnxt_re/roce_hsi.h
··· 1102 1102 #define CMDQ_BASE_OPCODE_MODIFY_CC 0x8cUL 1103 1103 #define CMDQ_BASE_OPCODE_QUERY_CC 0x8dUL 1104 1104 #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS 0x8eUL 1105 + #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL 1105 1106 u8 cmd_size; 1106 1107 __le16 flags; 1107 1108 __le16 cookie; ··· 1128 1127 #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL 1129 1128 #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL 1130 1129 #define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL 1130 + #define CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED 0x80UL 1131 + #define CMDQ_CREATE_QP_QP_FLAGS_LAST \ 1132 + CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED 1133 + 1131 1134 u8 type; 1132 1135 #define CMDQ_CREATE_QP_TYPE_RC 0x2UL 1133 1136 #define CMDQ_CREATE_QP_TYPE_UD 0x4UL ··· 2853 2848 __le16 max_qp_wr; 2854 2849 __le16 dev_cap_flags; 2855 2850 #define CREQ_QUERY_FUNC_RESP_SB_DEV_CAP_FLAGS_RESIZE_QP 0x1UL 2851 + #define CREQ_QUERY_FUNC_RESP_SB_EXT_STATS 0x10UL 2856 2852 __le32 max_cq; 2857 2853 __le32 max_cqe; 2858 2854 __le32 max_pd; ··· 3091 3085 __le64 active_qp_count_p1; 3092 3086 __le64 active_qp_count_p2; 3093 3087 __le64 active_qp_count_p3; 3088 + }; 3089 + 3090 + /* cmdq_query_roce_stats_ext (size:192b/24B) */ 3091 + struct cmdq_query_roce_stats_ext { 3092 + u8 opcode; 3093 + #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS 0x92UL 3094 + #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_LAST \ 3095 + CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS 3096 + u8 cmd_size; 3097 + __le16 flags; 3098 + #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_COLLECTION_ID 0x1UL 3099 + #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID 0x2UL 3100 + __le16 cookie; 3101 + u8 resp_size; 3102 + u8 collection_id; 3103 + __le64 resp_addr; 3104 + __le32 function_id; 3105 + #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_MASK 0xffUL 3106 + #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_SFT 0 3107 + #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_MASK 0xffff00UL 3108 + #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_SFT 8 3109 + #define CMDQ_QUERY_ROCE_STATS_EXT_VF_VALID 0x1000000UL 3110 + __le32 reserved32; 3111 + }; 3112 + 3113 + /* creq_query_roce_stats_ext_resp (size:128b/16B) */ 3114 + struct creq_query_roce_stats_ext_resp { 3115 + u8 type; 3116 + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_MASK 0x3fUL 3117 + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_SFT 0 3118 + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT 0x38UL 3119 + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_LAST \ 3120 + CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT 3121 + u8 status; 3122 + __le16 cookie; 3123 + __le32 size; 3124 + u8 v; 3125 + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_V 0x1UL 3126 + u8 event; 3127 + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT 0x92UL 3128 + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_LAST \ 3129 + CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT 3130 + u8 reserved48[6]; 3131 + }; 3132 + 3133 + /* creq_query_roce_stats_ext_resp_sb (size:1536b/192B) */ 3134 + struct creq_query_roce_stats_ext_resp_sb { 3135 + u8 opcode; 3136 + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL 3137 + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_LAST \ 3138 + CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT 3139 + u8 status; 3140 + __le16 cookie; 3141 + __le16 flags; 3142 + u8 resp_size; 3143 + u8 rsvd; 3144 + __le64 tx_atomic_req_pkts; 3145 + __le64 tx_read_req_pkts; 3146 + __le64 tx_read_res_pkts; 3147 + __le64 tx_write_req_pkts; 3148 + __le64 tx_send_req_pkts; 3149 + __le64 tx_roce_pkts; 3150 + __le64 tx_roce_bytes; 3151 + __le64 rx_atomic_req_pkts; 3152 + __le64 rx_read_req_pkts; 3153 + __le64 rx_read_res_pkts; 3154 + __le64 rx_write_req_pkts; 3155 + __le64 rx_send_req_pkts; 3156 + __le64 rx_roce_pkts; 3157 + __le64 rx_roce_bytes; 3158 + __le64 rx_roce_good_pkts; 3159 + __le64 rx_roce_good_bytes; 3160 + __le64 rx_out_of_buffer_pkts; 3161 + __le64 rx_out_of_sequence_pkts; 3162 + __le64 tx_cnp_pkts; 3163 + __le64 rx_cnp_pkts; 3164 + __le64 rx_ecn_marked_pkts; 3165 + __le64 tx_cnp_bytes; 3166 + __le64 rx_cnp_bytes; 3094 3167 }; 3095 3168 3096 3169 /* QP error notification event (16 bytes) */
-1
drivers/infiniband/hw/cxgb4/cm.c
··· 4464 4464 void c4iw_cm_term(void) 4465 4465 { 4466 4466 WARN_ON(!list_empty(&timeout_list)); 4467 - flush_workqueue(workq); 4468 4467 destroy_workqueue(workq); 4469 4468 }
-1
drivers/infiniband/hw/cxgb4/device.c
··· 1562 1562 kfree(ctx); 1563 1563 } 1564 1564 mutex_unlock(&dev_mutex); 1565 - flush_workqueue(reg_workq); 1566 1565 destroy_workqueue(reg_workq); 1567 1566 cxgb4_unregister_uld(CXGB4_ULD_RDMA); 1568 1567 c4iw_cm_term();
+11 -11
drivers/infiniband/hw/cxgb4/provider.c
··· 366 366 NR_COUNTERS 367 367 }; 368 368 369 - static const char * const names[] = { 370 - [IP4INSEGS] = "ip4InSegs", 371 - [IP4OUTSEGS] = "ip4OutSegs", 372 - [IP4RETRANSSEGS] = "ip4RetransSegs", 373 - [IP4OUTRSTS] = "ip4OutRsts", 374 - [IP6INSEGS] = "ip6InSegs", 375 - [IP6OUTSEGS] = "ip6OutSegs", 376 - [IP6RETRANSSEGS] = "ip6RetransSegs", 377 - [IP6OUTRSTS] = "ip6OutRsts" 369 + static const struct rdma_stat_desc cxgb4_descs[] = { 370 + [IP4INSEGS].name = "ip4InSegs", 371 + [IP4OUTSEGS].name = "ip4OutSegs", 372 + [IP4RETRANSSEGS].name = "ip4RetransSegs", 373 + [IP4OUTRSTS].name = "ip4OutRsts", 374 + [IP6INSEGS].name = "ip6InSegs", 375 + [IP6OUTSEGS].name = "ip6OutSegs", 376 + [IP6RETRANSSEGS].name = "ip6RetransSegs", 377 + [IP6OUTRSTS].name = "ip6OutRsts" 378 378 }; 379 379 380 380 static struct rdma_hw_stats *c4iw_alloc_device_stats(struct ib_device *ibdev) 381 381 { 382 - BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); 382 + BUILD_BUG_ON(ARRAY_SIZE(cxgb4_descs) != NR_COUNTERS); 383 383 384 384 /* FIXME: these look like port stats */ 385 - return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, 385 + return rdma_alloc_hw_stats_struct(cxgb4_descs, NR_COUNTERS, 386 386 RDMA_HW_STATS_DEFAULT_LIFESPAN); 387 387 } 388 388
+21 -2
drivers/infiniband/hw/efa/efa.h
··· 20 20 21 21 #define EFA_IRQNAME_SIZE 40 22 22 23 - /* 1 for AENQ + ADMIN */ 24 - #define EFA_NUM_MSIX_VEC 1 25 23 #define EFA_MGMNT_MSIX_VEC_IDX 0 24 + #define EFA_COMP_EQS_VEC_BASE 1 26 25 27 26 struct efa_irq { 28 27 irq_handler_t handler; 29 28 void *data; 30 29 u32 irqn; 30 + u32 vector; 31 31 cpumask_t affinity_hint_mask; 32 32 char name[EFA_IRQNAME_SIZE]; 33 33 }; ··· 61 61 struct efa_irq admin_irq; 62 62 63 63 struct efa_stats stats; 64 + 65 + /* Array of completion EQs */ 66 + struct efa_eq *eqs; 67 + unsigned int neqs; 68 + 69 + /* Only stores CQs with interrupts enabled */ 70 + struct xarray cqs_xa; 64 71 }; 65 72 66 73 struct efa_ucontext { ··· 91 84 dma_addr_t dma_addr; 92 85 void *cpu_addr; 93 86 struct rdma_user_mmap_entry *mmap_entry; 87 + struct rdma_user_mmap_entry *db_mmap_entry; 94 88 size_t size; 95 89 u16 cq_idx; 90 + /* NULL when no interrupts requested */ 91 + struct efa_eq *eq; 96 92 }; 97 93 98 94 struct efa_qp { ··· 126 116 u8 id[EFA_GID_SIZE]; 127 117 }; 128 118 119 + struct efa_eq { 120 + struct efa_com_eq eeq; 121 + struct efa_irq irq; 122 + }; 123 + 129 124 int efa_query_device(struct ib_device *ibdev, 130 125 struct ib_device_attr *props, 131 126 struct ib_udata *udata); ··· 154 139 struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, 155 140 u64 virt_addr, int access_flags, 156 141 struct ib_udata *udata); 142 + struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, 143 + u64 length, u64 virt_addr, 144 + int fd, int access_flags, 145 + struct ib_udata *udata); 157 146 int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); 158 147 int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num, 159 148 struct ib_port_immutable *immutable);
+95 -5
drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
··· 28 28 EFA_ADMIN_DEALLOC_PD = 15, 29 29 EFA_ADMIN_ALLOC_UAR = 16, 30 30 EFA_ADMIN_DEALLOC_UAR = 17, 31 - EFA_ADMIN_MAX_OPCODE = 17, 31 + EFA_ADMIN_CREATE_EQ = 18, 32 + EFA_ADMIN_DESTROY_EQ = 19, 33 + EFA_ADMIN_MAX_OPCODE = 19, 32 34 }; 33 35 34 36 enum efa_admin_aq_feature_id { ··· 40 38 EFA_ADMIN_QUEUE_ATTR = 4, 41 39 EFA_ADMIN_HW_HINTS = 5, 42 40 EFA_ADMIN_HOST_INFO = 6, 41 + EFA_ADMIN_EVENT_QUEUE_ATTR = 7, 43 42 }; 44 43 45 44 /* QP transport type */ ··· 433 430 /* 434 431 * 4:0 : reserved5 - MBZ 435 432 * 5 : interrupt_mode_enabled - if set, cq operates 436 - * in interrupt mode (i.e. CQ events and MSI-X are 437 - * generated), otherwise - polling 433 + * in interrupt mode (i.e. CQ events and EQ elements 434 + * are generated), otherwise - polling 438 435 * 6 : virt - If set, ring base address is virtual 439 436 * (IOVA returned by MR registration) 440 437 * 7 : reserved6 - MBZ ··· 451 448 /* completion queue depth in # of entries. must be power of 2 */ 452 449 u16 cq_depth; 453 450 454 - /* msix vector assigned to this cq */ 455 - u32 msix_vector_idx; 451 + /* EQ number assigned to this cq */ 452 + u16 eqn; 453 + 454 + /* MBZ */ 455 + u16 reserved; 456 456 457 457 /* 458 458 * CQ ring base address, virtual or physical depending on 'virt' ··· 486 480 487 481 /* actual cq depth in number of entries */ 488 482 u16 cq_actual_depth; 483 + 484 + /* CQ doorbell address, as offset to PCIe DB BAR */ 485 + u32 db_offset; 486 + 487 + /* 488 + * 0 : db_valid - If set, doorbell offset is valid. 489 + * Always set when interrupts are requested. 490 + */ 491 + u32 flags; 489 492 }; 490 493 491 494 struct efa_admin_destroy_cq_cmd { ··· 684 669 u16 max_tx_batch; 685 670 }; 686 671 672 + struct efa_admin_event_queue_attr_desc { 673 + /* The maximum number of event queues supported */ 674 + u32 max_eq; 675 + 676 + /* Maximum number of EQEs per Event Queue */ 677 + u32 max_eq_depth; 678 + 679 + /* Supported events bitmask */ 680 + u32 event_bitmask; 681 + }; 682 + 687 683 struct efa_admin_feature_aenq_desc { 688 684 /* bitmask for AENQ groups the device can report */ 689 685 u32 supported_groups; ··· 752 726 struct efa_admin_feature_network_attr_desc network_attr; 753 727 754 728 struct efa_admin_feature_queue_attr_desc queue_attr; 729 + 730 + struct efa_admin_event_queue_attr_desc event_queue_attr; 755 731 756 732 struct efa_admin_hw_hints hw_hints; 757 733 } u; ··· 835 807 }; 836 808 837 809 struct efa_admin_dealloc_uar_resp { 810 + struct efa_admin_acq_common_desc acq_common_desc; 811 + }; 812 + 813 + struct efa_admin_create_eq_cmd { 814 + struct efa_admin_aq_common_desc aq_common_descriptor; 815 + 816 + /* Size of the EQ in entries, must be power of 2 */ 817 + u16 depth; 818 + 819 + /* MSI-X table entry index */ 820 + u8 msix_vec; 821 + 822 + /* 823 + * 4:0 : entry_size_words - size of EQ entry in 824 + * 32-bit words 825 + * 7:5 : reserved - MBZ 826 + */ 827 + u8 caps; 828 + 829 + /* EQ ring base address */ 830 + struct efa_common_mem_addr ba; 831 + 832 + /* 833 + * Enabled events on this EQ 834 + * 0 : completion_events - Enable completion events 835 + * 31:1 : reserved - MBZ 836 + */ 837 + u32 event_bitmask; 838 + 839 + /* MBZ */ 840 + u32 reserved; 841 + }; 842 + 843 + struct efa_admin_create_eq_resp { 844 + struct efa_admin_acq_common_desc acq_common_desc; 845 + 846 + /* EQ number */ 847 + u16 eqn; 848 + 849 + /* MBZ */ 850 + u16 reserved; 851 + }; 852 + 853 + struct efa_admin_destroy_eq_cmd { 854 + struct efa_admin_aq_common_desc aq_common_descriptor; 855 + 856 + /* EQ number */ 857 + u16 eqn; 858 + 859 + /* MBZ */ 860 + u16 reserved; 861 + }; 862 + 863 + struct efa_admin_destroy_eq_resp { 838 864 struct efa_admin_acq_common_desc acq_common_desc; 839 865 }; 840 866 ··· 981 899 #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) 982 900 #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) 983 901 902 + /* create_cq_resp */ 903 + #define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0) 904 + 984 905 /* feature_device_attr_desc */ 985 906 #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) 986 907 #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) 908 + 909 + /* create_eq_cmd */ 910 + #define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) 911 + #define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK BIT(6) 912 + #define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0) 987 913 988 914 /* host_info */ 989 915 #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0)
+41
drivers/infiniband/hw/efa/efa_admin_defs.h
··· 118 118 u32 inline_data_w4[12]; 119 119 }; 120 120 121 + enum efa_admin_eqe_event_type { 122 + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION = 0, 123 + }; 124 + 125 + /* Completion event */ 126 + struct efa_admin_comp_event { 127 + /* CQ number */ 128 + u16 cqn; 129 + 130 + /* MBZ */ 131 + u16 reserved; 132 + 133 + /* MBZ */ 134 + u32 reserved2; 135 + }; 136 + 137 + /* Event Queue Element */ 138 + struct efa_admin_eqe { 139 + /* 140 + * 0 : phase 141 + * 8:1 : event_type - Event type 142 + * 31:9 : reserved - MBZ 143 + */ 144 + u32 common; 145 + 146 + /* MBZ */ 147 + u32 reserved; 148 + 149 + union { 150 + /* Event data */ 151 + u32 event_data[2]; 152 + 153 + /* Completion Event */ 154 + struct efa_admin_comp_event comp_event; 155 + } u; 156 + }; 157 + 121 158 /* aq_common_desc */ 122 159 #define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) 123 160 #define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) ··· 167 130 168 131 /* aenq_common_desc */ 169 132 #define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) 133 + 134 + /* eqe */ 135 + #define EFA_ADMIN_EQE_PHASE_MASK BIT(0) 136 + #define EFA_ADMIN_EQE_EVENT_TYPE_MASK GENMASK(8, 1) 170 137 171 138 #endif /* _EFA_ADMIN_H_ */
+164
drivers/infiniband/hw/efa/efa_com.c
··· 56 56 EFA_CMD_STR_CASE(DEALLOC_PD); 57 57 EFA_CMD_STR_CASE(ALLOC_UAR); 58 58 EFA_CMD_STR_CASE(DEALLOC_UAR); 59 + EFA_CMD_STR_CASE(CREATE_EQ); 60 + EFA_CMD_STR_CASE(DESTROY_EQ); 59 61 default: return "unknown command opcode"; 60 62 } 61 63 #undef EFA_CMD_STR_CASE 64 + } 65 + 66 + void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) 67 + { 68 + *addr_low = lower_32_bits(addr); 69 + *addr_high = upper_32_bits(addr); 62 70 } 63 71 64 72 static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) ··· 1088 1080 edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US; 1089 1081 1090 1082 return 0; 1083 + } 1084 + 1085 + static int efa_com_create_eq(struct efa_com_dev *edev, 1086 + struct efa_com_create_eq_params *params, 1087 + struct efa_com_create_eq_result *result) 1088 + { 1089 + struct efa_com_admin_queue *aq = &edev->aq; 1090 + struct efa_admin_create_eq_resp resp = {}; 1091 + struct efa_admin_create_eq_cmd cmd = {}; 1092 + int err; 1093 + 1094 + cmd.aq_common_descriptor.opcode = EFA_ADMIN_CREATE_EQ; 1095 + EFA_SET(&cmd.caps, EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS, 1096 + params->entry_size_in_bytes / 4); 1097 + cmd.depth = params->depth; 1098 + cmd.event_bitmask = params->event_bitmask; 1099 + cmd.msix_vec = params->msix_vec; 1100 + 1101 + efa_com_set_dma_addr(params->dma_addr, &cmd.ba.mem_addr_high, 1102 + &cmd.ba.mem_addr_low); 1103 + 1104 + err = efa_com_cmd_exec(aq, 1105 + (struct efa_admin_aq_entry *)&cmd, 1106 + sizeof(cmd), 1107 + (struct efa_admin_acq_entry *)&resp, 1108 + sizeof(resp)); 1109 + if (err) { 1110 + ibdev_err_ratelimited(edev->efa_dev, 1111 + "Failed to create eq[%d]\n", err); 1112 + return err; 1113 + } 1114 + 1115 + result->eqn = resp.eqn; 1116 + 1117 + return 0; 1118 + } 1119 + 1120 + static void efa_com_destroy_eq(struct efa_com_dev *edev, 1121 + struct efa_com_destroy_eq_params *params) 1122 + { 1123 + struct efa_com_admin_queue *aq = &edev->aq; 1124 + struct efa_admin_destroy_eq_resp resp = {}; 1125 + struct efa_admin_destroy_eq_cmd cmd = {}; 1126 + int err; 1127 + 1128 + cmd.aq_common_descriptor.opcode = EFA_ADMIN_DESTROY_EQ; 1129 + cmd.eqn = params->eqn; 1130 + 1131 + err = efa_com_cmd_exec(aq, 1132 + (struct efa_admin_aq_entry *)&cmd, 1133 + sizeof(cmd), 1134 + (struct efa_admin_acq_entry *)&resp, 1135 + sizeof(resp)); 1136 + if (err) 1137 + ibdev_err_ratelimited(edev->efa_dev, 1138 + "Failed to destroy EQ-%u [%d]\n", cmd.eqn, 1139 + err); 1140 + } 1141 + 1142 + static void efa_com_arm_eq(struct efa_com_dev *edev, struct efa_com_eq *eeq) 1143 + { 1144 + u32 val = 0; 1145 + 1146 + EFA_SET(&val, EFA_REGS_EQ_DB_EQN, eeq->eqn); 1147 + EFA_SET(&val, EFA_REGS_EQ_DB_ARM, 1); 1148 + 1149 + writel(val, edev->reg_bar + EFA_REGS_EQ_DB_OFF); 1150 + } 1151 + 1152 + void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, 1153 + struct efa_com_eq *eeq) 1154 + { 1155 + struct efa_admin_eqe *eqe; 1156 + u32 processed = 0; 1157 + u8 phase; 1158 + u32 ci; 1159 + 1160 + ci = eeq->cc & (eeq->depth - 1); 1161 + phase = eeq->phase; 1162 + eqe = &eeq->eqes[ci]; 1163 + 1164 + /* Go over all the events */ 1165 + while ((READ_ONCE(eqe->common) & EFA_ADMIN_EQE_PHASE_MASK) == phase) { 1166 + /* 1167 + * Do not read the rest of the completion entry before the 1168 + * phase bit was validated 1169 + */ 1170 + dma_rmb(); 1171 + 1172 + eeq->cb(eeq, eqe); 1173 + 1174 + /* Get next event entry */ 1175 + ci++; 1176 + processed++; 1177 + 1178 + if (ci == eeq->depth) { 1179 + ci = 0; 1180 + phase = !phase; 1181 + } 1182 + 1183 + eqe = &eeq->eqes[ci]; 1184 + } 1185 + 1186 + eeq->cc += processed; 1187 + eeq->phase = phase; 1188 + efa_com_arm_eq(eeq->edev, eeq); 1189 + } 1190 + 1191 + void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq) 1192 + { 1193 + struct efa_com_destroy_eq_params params = { 1194 + .eqn = eeq->eqn, 1195 + }; 1196 + 1197 + efa_com_destroy_eq(edev, &params); 1198 + dma_free_coherent(edev->dmadev, eeq->depth * sizeof(*eeq->eqes), 1199 + eeq->eqes, eeq->dma_addr); 1200 + } 1201 + 1202 + int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, 1203 + efa_eqe_handler cb, u16 depth, u8 msix_vec) 1204 + { 1205 + struct efa_com_create_eq_params params = {}; 1206 + struct efa_com_create_eq_result result = {}; 1207 + int err; 1208 + 1209 + params.depth = depth; 1210 + params.entry_size_in_bytes = sizeof(*eeq->eqes); 1211 + EFA_SET(&params.event_bitmask, 1212 + EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS, 1); 1213 + params.msix_vec = msix_vec; 1214 + 1215 + eeq->eqes = dma_alloc_coherent(edev->dmadev, 1216 + params.depth * sizeof(*eeq->eqes), 1217 + &params.dma_addr, GFP_KERNEL); 1218 + if (!eeq->eqes) 1219 + return -ENOMEM; 1220 + 1221 + err = efa_com_create_eq(edev, &params, &result); 1222 + if (err) 1223 + goto err_free_coherent; 1224 + 1225 + eeq->eqn = result.eqn; 1226 + eeq->edev = edev; 1227 + eeq->dma_addr = params.dma_addr; 1228 + eeq->phase = 1; 1229 + eeq->depth = params.depth; 1230 + eeq->cb = cb; 1231 + efa_com_arm_eq(edev, eeq); 1232 + 1233 + return 0; 1234 + 1235 + err_free_coherent: 1236 + dma_free_coherent(edev->dmadev, params.depth * sizeof(*eeq->eqes), 1237 + eeq->eqes, params.dma_addr); 1238 + return err; 1091 1239 }
+37 -1
drivers/infiniband/hw/efa/efa_com.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 2 /* 3 - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. 3 + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. 4 4 */ 5 5 6 6 #ifndef _EFA_COM_H_ ··· 80 80 }; 81 81 82 82 struct efa_aenq_handlers; 83 + struct efa_com_eq; 84 + typedef void (*efa_eqe_handler)(struct efa_com_eq *eeq, 85 + struct efa_admin_eqe *eqe); 83 86 84 87 struct efa_com_aenq { 85 88 struct efa_admin_aenq_entry *entries; ··· 115 112 struct efa_com_mmio_read mmio_read; 116 113 }; 117 114 115 + struct efa_com_eq { 116 + struct efa_com_dev *edev; 117 + struct efa_admin_eqe *eqes; 118 + dma_addr_t dma_addr; 119 + u32 cc; /* Consumer counter */ 120 + u16 eqn; 121 + u16 depth; 122 + u8 phase; 123 + efa_eqe_handler cb; 124 + }; 125 + 126 + struct efa_com_create_eq_params { 127 + dma_addr_t dma_addr; 128 + u32 event_bitmask; 129 + u16 depth; 130 + u8 entry_size_in_bytes; 131 + u8 msix_vec; 132 + }; 133 + 134 + struct efa_com_create_eq_result { 135 + u16 eqn; 136 + }; 137 + 138 + struct efa_com_destroy_eq_params { 139 + u16 eqn; 140 + }; 141 + 118 142 typedef void (*efa_aenq_handler)(void *data, 119 143 struct efa_admin_aenq_entry *aenq_e); 120 144 ··· 151 121 efa_aenq_handler unimplemented_handler; 152 122 }; 153 123 124 + void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); 154 125 int efa_com_admin_init(struct efa_com_dev *edev, 155 126 struct efa_aenq_handlers *aenq_handlers); 156 127 void efa_com_admin_destroy(struct efa_com_dev *edev); 128 + int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, 129 + efa_eqe_handler cb, u16 depth, u8 msix_vec); 130 + void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq); 157 131 int efa_com_dev_reset(struct efa_com_dev *edev, 158 132 enum efa_regs_reset_reason_types reset_reason); 159 133 void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling); ··· 174 140 struct efa_admin_acq_entry *comp, 175 141 size_t comp_size); 176 142 void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data); 143 + void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, 144 + struct efa_com_eq *eeq); 177 145 178 146 #endif /* _EFA_COM_H_ */
+27 -8
drivers/infiniband/hw/efa/efa_com_cmd.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause 2 2 /* 3 - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. 3 + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. 4 4 */ 5 5 6 6 #include "efa_com.h" 7 7 #include "efa_com_cmd.h" 8 - 9 - void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) 10 - { 11 - *addr_low = lower_32_bits(addr); 12 - *addr_high = upper_32_bits(addr); 13 - } 14 8 15 9 int efa_com_create_qp(struct efa_com_dev *edev, 16 10 struct efa_com_create_qp_params *params, ··· 151 157 struct efa_com_create_cq_params *params, 152 158 struct efa_com_create_cq_result *result) 153 159 { 154 - struct efa_admin_create_cq_resp cmd_completion; 160 + struct efa_admin_create_cq_resp cmd_completion = {}; 155 161 struct efa_admin_create_cq_cmd create_cmd = {}; 156 162 struct efa_com_admin_queue *aq = &edev->aq; 157 163 int err; ··· 163 169 create_cmd.cq_depth = params->cq_depth; 164 170 create_cmd.num_sub_cqs = params->num_sub_cqs; 165 171 create_cmd.uar = params->uarn; 172 + if (params->interrupt_mode_enabled) { 173 + EFA_SET(&create_cmd.cq_caps_1, 174 + EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1); 175 + create_cmd.eqn = params->eqn; 176 + } 166 177 167 178 efa_com_set_dma_addr(params->dma_addr, 168 179 &create_cmd.cq_ba.mem_addr_high, ··· 186 187 187 188 result->cq_idx = cmd_completion.cq_idx; 188 189 result->actual_depth = params->cq_depth; 190 + result->db_off = cmd_completion.db_offset; 191 + result->db_valid = EFA_GET(&cmd_completion.flags, 192 + EFA_ADMIN_CREATE_CQ_RESP_DB_VALID); 189 193 190 194 return 0; 191 195 } ··· 498 496 memcpy(result->addr, resp.u.network_attr.addr, 499 497 sizeof(resp.u.network_attr.addr)); 500 498 result->mtu = resp.u.network_attr.mtu; 499 + 500 + if (efa_com_check_supported_feature_id(edev, 501 + EFA_ADMIN_EVENT_QUEUE_ATTR)) { 502 + err = efa_com_get_feature(edev, &resp, 503 + EFA_ADMIN_EVENT_QUEUE_ATTR); 504 + if (err) { 505 + ibdev_err_ratelimited( 506 + edev->efa_dev, 507 + "Failed to get event queue attributes %d\n", 508 + err); 509 + return err; 510 + } 511 + 512 + result->max_eq = resp.u.event_queue_attr.max_eq; 513 + result->max_eq_depth = resp.u.event_queue_attr.max_eq_depth; 514 + result->event_bitmask = resp.u.event_queue_attr.event_bitmask; 515 + } 501 516 502 517 return 0; 503 518 }
+8 -2
drivers/infiniband/hw/efa/efa_com_cmd.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 2 /* 3 - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. 3 + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. 4 4 */ 5 5 6 6 #ifndef _EFA_COM_CMD_H_ ··· 73 73 u16 cq_depth; 74 74 u16 num_sub_cqs; 75 75 u16 uarn; 76 + u16 eqn; 76 77 u8 entry_size_in_bytes; 78 + bool interrupt_mode_enabled; 77 79 }; 78 80 79 81 struct efa_com_create_cq_result { ··· 83 81 u16 cq_idx; 84 82 /* actual cq depth in # of entries */ 85 83 u16 actual_depth; 84 + u32 db_off; 85 + bool db_valid; 86 86 }; 87 87 88 88 struct efa_com_destroy_cq_params { ··· 129 125 u32 max_llq_size; 130 126 u32 max_rdma_size; 131 127 u32 device_caps; 128 + u32 max_eq; 129 + u32 max_eq_depth; 130 + u32 event_bitmask; /* EQ events bitmask */ 132 131 u16 sub_cqs_per_cq; 133 132 u16 max_sq_sge; 134 133 u16 max_rq_sge; ··· 267 260 struct efa_com_rdma_read_stats rdma_read_stats; 268 261 }; 269 262 270 - void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); 271 263 int efa_com_create_qp(struct efa_com_dev *edev, 272 264 struct efa_com_create_qp_params *params, 273 265 struct efa_com_create_qp_result *res);
+156 -26
drivers/infiniband/hw/efa/efa_main.c
··· 67 67 pci_release_selected_regions(pdev, release_bars); 68 68 } 69 69 70 + static void efa_process_comp_eqe(struct efa_dev *dev, struct efa_admin_eqe *eqe) 71 + { 72 + u16 cqn = eqe->u.comp_event.cqn; 73 + struct efa_cq *cq; 74 + 75 + /* Safe to load as we're in irq and removal calls synchronize_irq() */ 76 + cq = xa_load(&dev->cqs_xa, cqn); 77 + if (unlikely(!cq)) { 78 + ibdev_err_ratelimited(&dev->ibdev, 79 + "Completion event on non-existent CQ[%u]", 80 + cqn); 81 + return; 82 + } 83 + 84 + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 85 + } 86 + 87 + static void efa_process_eqe(struct efa_com_eq *eeq, struct efa_admin_eqe *eqe) 88 + { 89 + struct efa_dev *dev = container_of(eeq->edev, struct efa_dev, edev); 90 + 91 + if (likely(EFA_GET(&eqe->common, EFA_ADMIN_EQE_EVENT_TYPE) == 92 + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION)) 93 + efa_process_comp_eqe(dev, eqe); 94 + else 95 + ibdev_err_ratelimited(&dev->ibdev, 96 + "Unknown event type received %lu", 97 + EFA_GET(&eqe->common, 98 + EFA_ADMIN_EQE_EVENT_TYPE)); 99 + } 100 + 101 + static irqreturn_t efa_intr_msix_comp(int irq, void *data) 102 + { 103 + struct efa_eq *eq = data; 104 + struct efa_com_dev *edev = eq->eeq.edev; 105 + 106 + efa_com_eq_comp_intr_handler(edev, &eq->eeq); 107 + 108 + return IRQ_HANDLED; 109 + } 110 + 70 111 static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) 71 112 { 72 113 struct efa_dev *dev = data; ··· 118 77 return IRQ_HANDLED; 119 78 } 120 79 121 - static int efa_request_mgmnt_irq(struct efa_dev *dev) 80 + static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq) 122 81 { 123 - struct efa_irq *irq; 124 82 int err; 125 83 126 - irq = &dev->admin_irq; 127 84 err = request_irq(irq->irqn, irq->handler, 0, irq->name, irq->data); 128 85 if (err) { 129 - dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n", 130 - err); 86 + dev_err(&dev->pdev->dev, "Failed to request irq %s (%d)\n", 87 + irq->name, err); 131 88 return err; 132 89 } 133 90 134 - dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n", 135 - nr_cpumask_bits, &irq->affinity_hint_mask, irq->irqn); 136 91 irq_set_affinity_hint(irq->irqn, &irq->affinity_hint_mask); 137 92 138 93 return 0; 94 + } 95 + 96 + static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, 97 + int vector) 98 + { 99 + u32 cpu; 100 + 101 + cpu = vector - EFA_COMP_EQS_VEC_BASE; 102 + snprintf(eq->irq.name, EFA_IRQNAME_SIZE, "efa-comp%d@pci:%s", cpu, 103 + pci_name(dev->pdev)); 104 + eq->irq.handler = efa_intr_msix_comp; 105 + eq->irq.data = eq; 106 + eq->irq.vector = vector; 107 + eq->irq.irqn = pci_irq_vector(dev->pdev, vector); 108 + cpumask_set_cpu(cpu, &eq->irq.affinity_hint_mask); 109 + } 110 + 111 + static void efa_free_irq(struct efa_dev *dev, struct efa_irq *irq) 112 + { 113 + irq_set_affinity_hint(irq->irqn, NULL); 114 + free_irq(irq->irqn, irq->data); 139 115 } 140 116 141 117 static void efa_setup_mgmnt_irq(struct efa_dev *dev) ··· 163 105 "efa-mgmnt@pci:%s", pci_name(dev->pdev)); 164 106 dev->admin_irq.handler = efa_intr_msix_mgmnt; 165 107 dev->admin_irq.data = dev; 166 - dev->admin_irq.irqn = 167 - pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx); 108 + dev->admin_irq.vector = dev->admin_msix_vector_idx; 109 + dev->admin_irq.irqn = pci_irq_vector(dev->pdev, 110 + dev->admin_msix_vector_idx); 168 111 cpu = cpumask_first(cpu_online_mask); 169 112 cpumask_set_cpu(cpu, 170 113 &dev->admin_irq.affinity_hint_mask); ··· 174 115 dev->admin_irq.name); 175 116 } 176 117 177 - static void efa_free_mgmnt_irq(struct efa_dev *dev) 178 - { 179 - struct efa_irq *irq; 180 - 181 - irq = &dev->admin_irq; 182 - irq_set_affinity_hint(irq->irqn, NULL); 183 - free_irq(irq->irqn, irq->data); 184 - } 185 - 186 118 static int efa_set_mgmnt_irq(struct efa_dev *dev) 187 119 { 188 120 efa_setup_mgmnt_irq(dev); 189 121 190 - return efa_request_mgmnt_irq(dev); 122 + return efa_request_irq(dev, &dev->admin_irq); 191 123 } 192 124 193 125 static int efa_request_doorbell_bar(struct efa_dev *dev) ··· 284 234 dma_free_coherent(&dev->pdev->dev, bufsz, hinf, hinf_dma); 285 235 } 286 236 237 + static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq) 238 + { 239 + efa_com_eq_destroy(&dev->edev, &eq->eeq); 240 + efa_free_irq(dev, &eq->irq); 241 + } 242 + 243 + static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u8 msix_vec) 244 + { 245 + int err; 246 + 247 + efa_setup_comp_irq(dev, eq, msix_vec); 248 + err = efa_request_irq(dev, &eq->irq); 249 + if (err) 250 + return err; 251 + 252 + err = efa_com_eq_init(&dev->edev, &eq->eeq, efa_process_eqe, 253 + dev->dev_attr.max_eq_depth, msix_vec); 254 + if (err) 255 + goto err_free_comp_irq; 256 + 257 + return 0; 258 + 259 + err_free_comp_irq: 260 + efa_free_irq(dev, &eq->irq); 261 + return err; 262 + } 263 + 264 + static int efa_create_eqs(struct efa_dev *dev) 265 + { 266 + unsigned int neqs = dev->dev_attr.max_eq; 267 + int err; 268 + int i; 269 + 270 + neqs = min_t(unsigned int, neqs, num_online_cpus()); 271 + dev->neqs = neqs; 272 + dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL); 273 + if (!dev->eqs) 274 + return -ENOMEM; 275 + 276 + for (i = 0; i < neqs; i++) { 277 + err = efa_create_eq(dev, &dev->eqs[i], 278 + i + EFA_COMP_EQS_VEC_BASE); 279 + if (err) 280 + goto err_destroy_eqs; 281 + } 282 + 283 + return 0; 284 + 285 + err_destroy_eqs: 286 + for (i--; i >= 0; i--) 287 + efa_destroy_eq(dev, &dev->eqs[i]); 288 + kfree(dev->eqs); 289 + 290 + return err; 291 + } 292 + 293 + static void efa_destroy_eqs(struct efa_dev *dev) 294 + { 295 + int i; 296 + 297 + for (i = 0; i < dev->neqs; i++) 298 + efa_destroy_eq(dev, &dev->eqs[i]); 299 + 300 + kfree(dev->eqs); 301 + } 302 + 287 303 static const struct ib_device_ops efa_dev_ops = { 288 304 .owner = THIS_MODULE, 289 305 .driver_id = RDMA_DRIVER_EFA, ··· 380 264 .query_port = efa_query_port, 381 265 .query_qp = efa_query_qp, 382 266 .reg_user_mr = efa_reg_mr, 267 + .reg_user_mr_dmabuf = efa_reg_user_mr_dmabuf, 383 268 384 269 INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah), 385 270 INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq), ··· 417 300 if (err) 418 301 goto err_release_doorbell_bar; 419 302 303 + err = efa_create_eqs(dev); 304 + if (err) 305 + goto err_release_doorbell_bar; 306 + 420 307 efa_set_host_info(dev); 421 308 422 309 dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED; 423 310 dev->ibdev.phys_port_cnt = 1; 424 - dev->ibdev.num_comp_vectors = 1; 311 + dev->ibdev.num_comp_vectors = dev->neqs ?: 1; 425 312 dev->ibdev.dev.parent = &pdev->dev; 426 313 427 314 ib_set_device_ops(&dev->ibdev, &efa_dev_ops); 428 315 429 316 err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); 430 317 if (err) 431 - goto err_release_doorbell_bar; 318 + goto err_destroy_eqs; 432 319 433 320 ibdev_info(&dev->ibdev, "IB device registered\n"); 434 321 435 322 return 0; 436 323 324 + err_destroy_eqs: 325 + efa_destroy_eqs(dev); 437 326 err_release_doorbell_bar: 438 327 efa_release_doorbell_bar(dev); 439 328 return err; ··· 447 324 448 325 static void efa_ib_device_remove(struct efa_dev *dev) 449 326 { 450 - efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); 451 327 ibdev_info(&dev->ibdev, "Unregister ib device\n"); 452 328 ib_unregister_device(&dev->ibdev); 329 + efa_destroy_eqs(dev); 330 + efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); 453 331 efa_release_doorbell_bar(dev); 454 332 } 455 333 ··· 463 339 { 464 340 int msix_vecs, irq_num; 465 341 466 - /* Reserve the max msix vectors we might need */ 467 - msix_vecs = EFA_NUM_MSIX_VEC; 342 + /* 343 + * Reserve the max msix vectors we might need, one vector is reserved 344 + * for admin. 345 + */ 346 + msix_vecs = min_t(int, pci_msix_vec_count(dev->pdev), 347 + num_online_cpus() + 1); 468 348 dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n", 469 349 msix_vecs); 470 350 ··· 549 421 edev->efa_dev = dev; 550 422 edev->dmadev = &pdev->dev; 551 423 dev->pdev = pdev; 424 + xa_init(&dev->cqs_xa); 552 425 553 426 bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK; 554 427 err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); ··· 605 476 return dev; 606 477 607 478 err_free_mgmnt_irq: 608 - efa_free_mgmnt_irq(dev); 479 + efa_free_irq(dev, &dev->admin_irq); 609 480 err_disable_msix: 610 481 efa_disable_msix(dev); 611 482 err_reg_read_destroy: ··· 628 499 629 500 edev = &dev->edev; 630 501 efa_com_admin_destroy(edev); 631 - efa_free_mgmnt_irq(dev); 502 + efa_free_irq(dev, &dev->admin_irq); 632 503 efa_disable_msix(dev); 633 504 efa_com_mmio_reg_read_destroy(edev); 634 505 devm_iounmap(&pdev->dev, edev->reg_bar); 635 506 efa_release_bars(dev, EFA_BASE_BAR_MASK); 507 + xa_destroy(&dev->cqs_xa); 636 508 ib_dealloc_device(&dev->ibdev); 637 509 pci_disable_device(pdev); 638 510 }
+6 -1
drivers/infiniband/hw/efa/efa_regs_defs.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 2 /* 3 - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. 3 + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. 4 4 */ 5 5 6 6 #ifndef _EFA_REGS_H_ ··· 42 42 #define EFA_REGS_MMIO_REG_READ_OFF 0x5c 43 43 #define EFA_REGS_MMIO_RESP_LO_OFF 0x60 44 44 #define EFA_REGS_MMIO_RESP_HI_OFF 0x64 45 + #define EFA_REGS_EQ_DB_OFF 0x68 45 46 46 47 /* version register */ 47 48 #define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff ··· 93 92 /* mmio_reg_read register */ 94 93 #define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff 95 94 #define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 95 + 96 + /* eq_db register */ 97 + #define EFA_REGS_EQ_DB_EQN_MASK 0xffff 98 + #define EFA_REGS_EQ_DB_ARM_MASK 0x80000000 96 99 97 100 #endif /* _EFA_REGS_H_ */
+166 -47
drivers/infiniband/hw/efa/efa_verbs.c
··· 3 3 * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. 4 4 */ 5 5 6 + #include <linux/dma-buf.h> 7 + #include <linux/dma-resv.h> 6 8 #include <linux/vmalloc.h> 7 9 #include <linux/log2.h> 8 10 ··· 62 60 op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ 63 61 64 62 #define EFA_STATS_ENUM(ename, name) ename, 65 - #define EFA_STATS_STR(ename, name) [ename] = name, 63 + #define EFA_STATS_STR(ename, nam) \ 64 + [ename].name = nam, 66 65 67 66 enum efa_hw_device_stats { 68 67 EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM) 69 68 }; 70 69 71 - static const char *const efa_device_stats_names[] = { 70 + static const struct rdma_stat_desc efa_device_stats_descs[] = { 72 71 EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR) 73 72 }; 74 73 ··· 77 74 EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM) 78 75 }; 79 76 80 - static const char *const efa_port_stats_names[] = { 77 + static const struct rdma_stat_desc efa_port_stats_descs[] = { 81 78 EFA_DEFINE_PORT_STATS(EFA_STATS_STR) 82 79 }; 83 80 ··· 247 244 248 245 if (EFA_DEV_CAP(dev, RNR_RETRY)) 249 246 resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; 247 + 248 + if (dev->neqs) 249 + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS; 250 250 251 251 err = ib_copy_to_udata(udata, &resp, 252 252 min(sizeof(resp), udata->outlen)); ··· 990 984 return efa_com_destroy_cq(&dev->edev, &params); 991 985 } 992 986 987 + static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq) 988 + { 989 + rdma_user_mmap_entry_remove(cq->db_mmap_entry); 990 + rdma_user_mmap_entry_remove(cq->mmap_entry); 991 + } 992 + 993 993 int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) 994 994 { 995 995 struct efa_dev *dev = to_edev(ibcq->device); ··· 1005 993 "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", 1006 994 cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); 1007 995 1008 - rdma_user_mmap_entry_remove(cq->mmap_entry); 996 + efa_cq_user_mmap_entries_remove(cq); 1009 997 efa_destroy_cq_idx(dev, cq->cq_idx); 998 + if (cq->eq) { 999 + xa_erase(&dev->cqs_xa, cq->cq_idx); 1000 + synchronize_irq(cq->eq->irq.irqn); 1001 + } 1010 1002 efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, 1011 1003 DMA_FROM_DEVICE); 1012 1004 return 0; 1013 1005 } 1014 1006 1007 + static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec) 1008 + { 1009 + return &dev->eqs[vec]; 1010 + } 1011 + 1015 1012 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, 1016 - struct efa_ibv_create_cq_resp *resp) 1013 + struct efa_ibv_create_cq_resp *resp, 1014 + bool db_valid) 1017 1015 { 1018 1016 resp->q_mmap_size = cq->size; 1019 1017 cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, ··· 1033 1011 if (!cq->mmap_entry) 1034 1012 return -ENOMEM; 1035 1013 1014 + if (db_valid) { 1015 + cq->db_mmap_entry = 1016 + efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, 1017 + dev->db_bar_addr + resp->db_off, 1018 + PAGE_SIZE, EFA_MMAP_IO_NC, 1019 + &resp->db_mmap_key); 1020 + if (!cq->db_mmap_entry) { 1021 + rdma_user_mmap_entry_remove(cq->mmap_entry); 1022 + return -ENOMEM; 1023 + } 1024 + 1025 + resp->db_off &= ~PAGE_MASK; 1026 + resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF; 1027 + } 1028 + 1036 1029 return 0; 1037 1030 } 1038 1031 ··· 1056 1019 { 1057 1020 struct efa_ucontext *ucontext = rdma_udata_to_drv_context( 1058 1021 udata, struct efa_ucontext, ibucontext); 1022 + struct efa_com_create_cq_params params = {}; 1059 1023 struct efa_ibv_create_cq_resp resp = {}; 1060 - struct efa_com_create_cq_params params; 1061 1024 struct efa_com_create_cq_result result; 1062 1025 struct ib_device *ibdev = ibcq->device; 1063 1026 struct efa_dev *dev = to_edev(ibdev); ··· 1102 1065 goto err_out; 1103 1066 } 1104 1067 1105 - if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) { 1068 + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) { 1106 1069 ibdev_dbg(ibdev, 1107 1070 "Incompatible ABI params, unknown fields in udata\n"); 1108 1071 err = -EINVAL; ··· 1138 1101 params.dma_addr = cq->dma_addr; 1139 1102 params.entry_size_in_bytes = cmd.cq_entry_size; 1140 1103 params.num_sub_cqs = cmd.num_sub_cqs; 1104 + if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { 1105 + cq->eq = efa_vec2eq(dev, attr->comp_vector); 1106 + params.eqn = cq->eq->eeq.eqn; 1107 + params.interrupt_mode_enabled = true; 1108 + } 1109 + 1141 1110 err = efa_com_create_cq(&dev->edev, &params, &result); 1142 1111 if (err) 1143 1112 goto err_free_mapped; 1144 1113 1114 + resp.db_off = result.db_off; 1145 1115 resp.cq_idx = result.cq_idx; 1146 1116 cq->cq_idx = result.cq_idx; 1147 1117 cq->ibcq.cqe = result.actual_depth; 1148 1118 WARN_ON_ONCE(entries != result.actual_depth); 1149 1119 1150 - err = cq_mmap_entries_setup(dev, cq, &resp); 1120 + err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid); 1151 1121 if (err) { 1152 1122 ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n", 1153 1123 cq->cq_idx); 1154 1124 goto err_destroy_cq; 1125 + } 1126 + 1127 + if (cq->eq) { 1128 + err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL)); 1129 + if (err) { 1130 + ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n", 1131 + cq->cq_idx); 1132 + goto err_remove_mmap; 1133 + } 1155 1134 } 1156 1135 1157 1136 if (udata->outlen) { ··· 1176 1123 if (err) { 1177 1124 ibdev_dbg(ibdev, 1178 1125 "Failed to copy udata for create_cq\n"); 1179 - goto err_remove_mmap; 1126 + goto err_xa_erase; 1180 1127 } 1181 1128 } 1182 1129 ··· 1185 1132 1186 1133 return 0; 1187 1134 1135 + err_xa_erase: 1136 + if (cq->eq) 1137 + xa_erase(&dev->cqs_xa, cq->cq_idx); 1188 1138 err_remove_mmap: 1189 - rdma_user_mmap_entry_remove(cq->mmap_entry); 1139 + efa_cq_user_mmap_entries_remove(cq); 1190 1140 err_destroy_cq: 1191 1141 efa_destroy_cq_idx(dev, cq->cq_idx); 1192 1142 err_free_mapped: ··· 1546 1490 return 0; 1547 1491 } 1548 1492 1549 - struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, 1550 - u64 virt_addr, int access_flags, 1551 - struct ib_udata *udata) 1493 + static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, 1494 + struct ib_udata *udata) 1552 1495 { 1553 1496 struct efa_dev *dev = to_edev(ibpd->device); 1554 - struct efa_com_reg_mr_params params = {}; 1555 - struct efa_com_reg_mr_result result = {}; 1556 - struct pbl_context pbl; 1557 1497 int supp_access_flags; 1558 - unsigned int pg_sz; 1559 1498 struct efa_mr *mr; 1560 - int inline_size; 1561 - int err; 1562 1499 1563 1500 if (udata && udata->inlen && 1564 1501 !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { 1565 1502 ibdev_dbg(&dev->ibdev, 1566 1503 "Incompatible ABI params, udata not cleared\n"); 1567 - err = -EINVAL; 1568 - goto err_out; 1504 + return ERR_PTR(-EINVAL); 1569 1505 } 1570 1506 1571 1507 supp_access_flags = ··· 1569 1521 ibdev_dbg(&dev->ibdev, 1570 1522 "Unsupported access flags[%#x], supported[%#x]\n", 1571 1523 access_flags, supp_access_flags); 1572 - err = -EOPNOTSUPP; 1573 - goto err_out; 1524 + return ERR_PTR(-EOPNOTSUPP); 1574 1525 } 1575 1526 1576 1527 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1577 - if (!mr) { 1578 - err = -ENOMEM; 1579 - goto err_out; 1580 - } 1528 + if (!mr) 1529 + return ERR_PTR(-ENOMEM); 1581 1530 1582 - mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); 1583 - if (IS_ERR(mr->umem)) { 1584 - err = PTR_ERR(mr->umem); 1585 - ibdev_dbg(&dev->ibdev, 1586 - "Failed to pin and map user space memory[%d]\n", err); 1587 - goto err_free; 1588 - } 1531 + return mr; 1532 + } 1533 + 1534 + static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start, 1535 + u64 length, u64 virt_addr, int access_flags) 1536 + { 1537 + struct efa_dev *dev = to_edev(ibpd->device); 1538 + struct efa_com_reg_mr_params params = {}; 1539 + struct efa_com_reg_mr_result result = {}; 1540 + struct pbl_context pbl; 1541 + unsigned int pg_sz; 1542 + int inline_size; 1543 + int err; 1589 1544 1590 1545 params.pd = to_epd(ibpd)->pdn; 1591 1546 params.iova = virt_addr; ··· 1599 1548 dev->dev_attr.page_size_cap, 1600 1549 virt_addr); 1601 1550 if (!pg_sz) { 1602 - err = -EOPNOTSUPP; 1603 1551 ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", 1604 1552 dev->dev_attr.page_size_cap); 1605 - goto err_unmap; 1553 + return -EOPNOTSUPP; 1606 1554 } 1607 1555 1608 1556 params.page_shift = order_base_2(pg_sz); ··· 1615 1565 if (params.page_num <= inline_size) { 1616 1566 err = efa_create_inline_pbl(dev, mr, &params); 1617 1567 if (err) 1618 - goto err_unmap; 1568 + return err; 1619 1569 1620 1570 err = efa_com_register_mr(&dev->edev, &params, &result); 1621 1571 if (err) 1622 - goto err_unmap; 1572 + return err; 1623 1573 } else { 1624 1574 err = efa_create_pbl(dev, &pbl, mr, &params); 1625 1575 if (err) 1626 - goto err_unmap; 1576 + return err; 1627 1577 1628 1578 err = efa_com_register_mr(&dev->edev, &params, &result); 1629 1579 pbl_destroy(dev, &pbl); 1630 1580 1631 1581 if (err) 1632 - goto err_unmap; 1582 + return err; 1633 1583 } 1634 1584 1635 1585 mr->ibmr.lkey = result.l_key; ··· 1637 1587 mr->ibmr.length = length; 1638 1588 ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); 1639 1589 1590 + return 0; 1591 + } 1592 + 1593 + struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, 1594 + u64 length, u64 virt_addr, 1595 + int fd, int access_flags, 1596 + struct ib_udata *udata) 1597 + { 1598 + struct efa_dev *dev = to_edev(ibpd->device); 1599 + struct ib_umem_dmabuf *umem_dmabuf; 1600 + struct efa_mr *mr; 1601 + int err; 1602 + 1603 + mr = efa_alloc_mr(ibpd, access_flags, udata); 1604 + if (IS_ERR(mr)) { 1605 + err = PTR_ERR(mr); 1606 + goto err_out; 1607 + } 1608 + 1609 + umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd, 1610 + access_flags); 1611 + if (IS_ERR(umem_dmabuf)) { 1612 + err = PTR_ERR(umem_dmabuf); 1613 + ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err); 1614 + goto err_free; 1615 + } 1616 + 1617 + mr->umem = &umem_dmabuf->umem; 1618 + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); 1619 + if (err) 1620 + goto err_release; 1621 + 1640 1622 return &mr->ibmr; 1641 1623 1642 - err_unmap: 1624 + err_release: 1625 + ib_umem_release(mr->umem); 1626 + err_free: 1627 + kfree(mr); 1628 + err_out: 1629 + atomic64_inc(&dev->stats.reg_mr_err); 1630 + return ERR_PTR(err); 1631 + } 1632 + 1633 + struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, 1634 + u64 virt_addr, int access_flags, 1635 + struct ib_udata *udata) 1636 + { 1637 + struct efa_dev *dev = to_edev(ibpd->device); 1638 + struct efa_mr *mr; 1639 + int err; 1640 + 1641 + mr = efa_alloc_mr(ibpd, access_flags, udata); 1642 + if (IS_ERR(mr)) { 1643 + err = PTR_ERR(mr); 1644 + goto err_out; 1645 + } 1646 + 1647 + mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); 1648 + if (IS_ERR(mr->umem)) { 1649 + err = PTR_ERR(mr->umem); 1650 + ibdev_dbg(&dev->ibdev, 1651 + "Failed to pin and map user space memory[%d]\n", err); 1652 + goto err_free; 1653 + } 1654 + 1655 + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); 1656 + if (err) 1657 + goto err_release; 1658 + 1659 + return &mr->ibmr; 1660 + 1661 + err_release: 1643 1662 ib_umem_release(mr->umem); 1644 1663 err_free: 1645 1664 kfree(mr); ··· 2025 1906 struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, 2026 1907 u32 port_num) 2027 1908 { 2028 - return rdma_alloc_hw_stats_struct(efa_port_stats_names, 2029 - ARRAY_SIZE(efa_port_stats_names), 1909 + return rdma_alloc_hw_stats_struct(efa_port_stats_descs, 1910 + ARRAY_SIZE(efa_port_stats_descs), 2030 1911 RDMA_HW_STATS_DEFAULT_LIFESPAN); 2031 1912 } 2032 1913 2033 1914 struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev) 2034 1915 { 2035 - return rdma_alloc_hw_stats_struct(efa_device_stats_names, 2036 - ARRAY_SIZE(efa_device_stats_names), 1916 + return rdma_alloc_hw_stats_struct(efa_device_stats_descs, 1917 + ARRAY_SIZE(efa_device_stats_descs), 2037 1918 RDMA_HW_STATS_DEFAULT_LIFESPAN); 2038 1919 } 2039 1920 ··· 2058 1939 stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err); 2059 1940 stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err); 2060 1941 2061 - return ARRAY_SIZE(efa_device_stats_names); 1942 + return ARRAY_SIZE(efa_device_stats_descs); 2062 1943 } 2063 1944 2064 1945 static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, ··· 2107 1988 stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; 2108 1989 stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; 2109 1990 2110 - return ARRAY_SIZE(efa_port_stats_names); 1991 + return ARRAY_SIZE(efa_port_stats_descs); 2111 1992 } 2112 1993 2113 1994 int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+2 -2
drivers/infiniband/hw/hfi1/Kconfig
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 config INFINIBAND_HFI1 3 - tristate "Intel OPA Gen1 support" 3 + tristate "Cornelis OPX Gen1 support" 4 4 depends on X86_64 && INFINIBAND_RDMAVT && I2C 5 5 select MMU_NOTIFIER 6 6 select CRC32 7 7 select I2C_ALGOBIT 8 8 help 9 - This is a low-level driver for Intel OPA Gen1 adapter. 9 + This is a low-level driver for Cornelis OPX Gen1 adapter. 10 10 config HFI1_DEBUG_SDMA_ORDER 11 11 bool "HFI1 SDMA Order debug" 12 12 depends on INFINIBAND_HFI1
+2 -1
drivers/infiniband/hw/hfi1/chip.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 2 /* 3 3 * Copyright(c) 2015 - 2020 Intel Corporation. 4 + * Copyright(c) 2021 Cornelis Networks. 4 5 */ 5 6 6 7 /* ··· 14919 14918 { 14920 14919 /* generic board description */ 14921 14920 const char generic[] = 14922 - "Intel Omni-Path Host Fabric Interface Adapter 100 Series"; 14921 + "Cornelis Omni-Path Host Fabric Interface Adapter 100 Series"; 14923 14922 unsigned long size; 14924 14923 int ret; 14925 14924
+2 -1
drivers/infiniband/hw/hfi1/driver.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 2 /* 3 3 * Copyright(c) 2015-2020 Intel Corporation. 4 + * Copyright(c) 2021 Cornelis Networks. 4 5 */ 5 6 6 7 #include <linux/spinlock.h> ··· 57 56 MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features"); 58 57 59 58 MODULE_LICENSE("Dual BSD/GPL"); 60 - MODULE_DESCRIPTION("Intel Omni-Path Architecture driver"); 59 + MODULE_DESCRIPTION("Cornelis Omni-Path Express driver"); 61 60 62 61 /* 63 62 * MAX_PKT_RCV is the max # if packets processed per receive interrupt.
+4 -6
drivers/infiniband/hw/hfi1/efivar.c
··· 3 3 * Copyright(c) 2015, 2016 Intel Corporation. 4 4 */ 5 5 6 - #include <linux/ctype.h> 6 + #include <linux/string.h> 7 + #include <linux/string_helpers.h> 8 + 7 9 #include "efivar.h" 8 10 9 11 /* GUID for HFI1 variables in EFI */ ··· 114 112 char prefix_name[64]; 115 113 char name[64]; 116 114 int result; 117 - int i; 118 115 119 116 /* create a common prefix */ 120 117 snprintf(prefix_name, sizeof(prefix_name), "%04x:%02x:%02x.%x", ··· 129 128 * variable. 130 129 */ 131 130 if (result) { 132 - /* Converting to uppercase */ 133 - for (i = 0; prefix_name[i]; i++) 134 - if (isalpha(prefix_name[i])) 135 - prefix_name[i] = toupper(prefix_name[i]); 131 + string_upper(prefix_name, prefix_name); 136 132 snprintf(name, sizeof(name), "%s-%s", prefix_name, kind); 137 133 result = read_efi_var(name, size, return_data); 138 134 }
+2 -1
drivers/infiniband/hw/hfi1/init.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 2 /* 3 3 * Copyright(c) 2015 - 2020 Intel Corporation. 4 + * Copyright(c) 2021 Cornelis Networks. 4 5 */ 5 6 6 7 #include <linux/pci.h> ··· 1343 1342 static int init_one(struct pci_dev *, const struct pci_device_id *); 1344 1343 static void shutdown_one(struct pci_dev *); 1345 1344 1346 - #define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: " 1345 + #define DRIVER_LOAD_MSG "Cornelis " DRIVER_NAME " loaded: " 1347 1346 #define PFX DRIVER_NAME ": " 1348 1347 1349 1348 const struct pci_device_id hfi1_pci_tbl[] = {
+50 -32
drivers/infiniband/hw/hfi1/ipoib.h
··· 44 44 }; 45 45 46 46 /** 47 - * struct hfi1_ipoib_circ_buf - List of items to be processed 48 - * @items: ring of items 49 - * @head: ring head 50 - * @tail: ring tail 51 - * @max_items: max items + 1 that the ring can contain 52 - * @producer_lock: producer sync lock 53 - * @consumer_lock: consumer sync lock 47 + * struct ipoib_txreq - IPOIB transmit descriptor 48 + * @txreq: sdma transmit request 49 + * @sdma_hdr: 9b ib headers 50 + * @sdma_status: status returned by sdma engine 51 + * @complete: non-zero implies complete 52 + * @priv: ipoib netdev private data 53 + * @txq: txq on which skb was output 54 + * @skb: skb to send 54 55 */ 55 - struct ipoib_txreq; 56 + struct ipoib_txreq { 57 + struct sdma_txreq txreq; 58 + struct hfi1_sdma_header sdma_hdr; 59 + int sdma_status; 60 + int complete; 61 + struct hfi1_ipoib_dev_priv *priv; 62 + struct hfi1_ipoib_txq *txq; 63 + struct sk_buff *skb; 64 + }; 65 + 66 + /** 67 + * struct hfi1_ipoib_circ_buf - List of items to be processed 68 + * @items: ring of items each a power of two size 69 + * @max_items: max items + 1 that the ring can contain 70 + * @shift: log2 of size for getting txreq 71 + * @sent_txreqs: count of txreqs posted to sdma 72 + * @tail: ring tail 73 + * @stops: count of stops of queue 74 + * @ring_full: ring has been filled 75 + * @no_desc: descriptor shortage seen 76 + * @complete_txreqs: count of txreqs completed by sdma 77 + * @head: ring head 78 + */ 56 79 struct hfi1_ipoib_circ_buf { 57 - struct ipoib_txreq **items; 58 - unsigned long head; 59 - unsigned long tail; 60 - unsigned long max_items; 61 - spinlock_t producer_lock; /* head sync lock */ 62 - spinlock_t consumer_lock; /* tail sync lock */ 80 + void *items; 81 + u32 max_items; 82 + u32 shift; 83 + /* consumer cache line */ 84 + u64 ____cacheline_aligned_in_smp sent_txreqs; 85 + u32 avail; 86 + u32 tail; 87 + atomic_t stops; 88 + atomic_t ring_full; 89 + atomic_t no_desc; 90 + /* producer cache line */ 91 + u64 ____cacheline_aligned_in_smp complete_txreqs; 92 + u32 head; 63 93 }; 64 94 65 95 /** ··· 98 68 * @sde: sdma engine 99 69 * @tx_list: tx request list 100 70 * @sent_txreqs: count of txreqs posted to sdma 101 - * @stops: count of stops of queue 102 - * @ring_full: ring has been filled 103 - * @no_desc: descriptor shortage seen 104 71 * @flow: tracks when list needs to be flushed for a flow change 105 72 * @q_idx: ipoib Tx queue index 106 73 * @pkts_sent: indicator packets have been sent from this queue 107 74 * @wait: iowait structure 108 - * @complete_txreqs: count of txreqs completed by sdma 109 75 * @napi: pointer to tx napi interface 110 76 * @tx_ring: ring of ipoib txreqs to be reaped by napi callback 111 77 */ 112 78 struct hfi1_ipoib_txq { 79 + struct napi_struct napi; 113 80 struct hfi1_ipoib_dev_priv *priv; 114 81 struct sdma_engine *sde; 115 82 struct list_head tx_list; 116 - u64 sent_txreqs; 117 - atomic_t stops; 118 - atomic_t ring_full; 119 - atomic_t no_desc; 120 83 union hfi1_ipoib_flow flow; 121 84 u8 q_idx; 122 85 bool pkts_sent; 123 86 struct iowait wait; 124 87 125 - atomic64_t ____cacheline_aligned_in_smp complete_txreqs; 126 - struct napi_struct *napi; 127 - struct hfi1_ipoib_circ_buf tx_ring; 88 + struct hfi1_ipoib_circ_buf ____cacheline_aligned_in_smp tx_ring; 128 89 }; 129 90 130 91 struct hfi1_ipoib_dev_priv { ··· 123 102 struct net_device *netdev; 124 103 struct ib_device *device; 125 104 struct hfi1_ipoib_txq *txqs; 126 - struct kmem_cache *txreq_cache; 127 - struct napi_struct *tx_napis; 128 - u16 pkey; 129 - u16 pkey_index; 130 - u32 qkey; 131 - u8 port_num; 132 - 133 105 const struct net_device_ops *netdev_ops; 134 106 struct rvt_qp *qp; 107 + u32 qkey; 108 + u16 pkey; 109 + u16 pkey_index; 110 + u8 port_num; 135 111 }; 136 112 137 113 /* hfi1 ipoib rdma netdev's private data structure */
+1 -1
drivers/infiniband/hw/hfi1/ipoib_main.c
··· 11 11 #include "ipoib.h" 12 12 #include "hfi.h" 13 13 14 - static u32 qpn_from_mac(u8 *mac_arr) 14 + static u32 qpn_from_mac(const u8 *mac_arr) 15 15 { 16 16 return (u32)mac_arr[1] << 16 | mac_arr[2] << 8 | mac_arr[3]; 17 17 }
+134 -182
drivers/infiniband/hw/hfi1/ipoib_tx.c
··· 22 22 #define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size) 23 23 #define CIRC_PREV(val, size) CIRC_ADD(val, -1, size) 24 24 25 - /** 26 - * struct ipoib_txreq - IPOIB transmit descriptor 27 - * @txreq: sdma transmit request 28 - * @sdma_hdr: 9b ib headers 29 - * @sdma_status: status returned by sdma engine 30 - * @priv: ipoib netdev private data 31 - * @txq: txq on which skb was output 32 - * @skb: skb to send 33 - */ 34 - struct ipoib_txreq { 35 - struct sdma_txreq txreq; 36 - struct hfi1_sdma_header sdma_hdr; 37 - int sdma_status; 38 - struct hfi1_ipoib_dev_priv *priv; 39 - struct hfi1_ipoib_txq *txq; 40 - struct sk_buff *skb; 41 - }; 42 - 43 25 struct ipoib_txparms { 44 26 struct hfi1_devdata *dd; 45 27 struct rdma_ah_attr *ah_attr; ··· 33 51 u8 entropy; 34 52 }; 35 53 36 - static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) 54 + static struct ipoib_txreq * 55 + hfi1_txreq_from_idx(struct hfi1_ipoib_circ_buf *r, u32 idx) 56 + { 57 + return (struct ipoib_txreq *)(r->items + (idx << r->shift)); 58 + } 59 + 60 + static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) 37 61 { 38 62 return sent - completed; 39 63 } 40 64 41 65 static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq) 42 66 { 43 - return hfi1_ipoib_txreqs(txq->sent_txreqs, 44 - atomic64_read(&txq->complete_txreqs)); 67 + return hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs, 68 + txq->tx_ring.complete_txreqs); 45 69 } 46 70 47 71 static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq) 48 72 { 49 73 trace_hfi1_txq_stop(txq); 50 - if (atomic_inc_return(&txq->stops) == 1) 74 + if (atomic_inc_return(&txq->tx_ring.stops) == 1) 51 75 netif_stop_subqueue(txq->priv->netdev, txq->q_idx); 52 76 } 53 77 54 78 static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq) 55 79 { 56 80 trace_hfi1_txq_wake(txq); 57 - if (atomic_dec_and_test(&txq->stops)) 81 + if (atomic_dec_and_test(&txq->tx_ring.stops)) 58 82 netif_wake_subqueue(txq->priv->netdev, txq->q_idx); 59 83 } 60 84 ··· 78 90 79 91 static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq) 80 92 { 81 - ++txq->sent_txreqs; 93 + ++txq->tx_ring.sent_txreqs; 82 94 if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) && 83 - !atomic_xchg(&txq->ring_full, 1)) { 95 + !atomic_xchg(&txq->tx_ring.ring_full, 1)) { 84 96 trace_hfi1_txq_full(txq); 85 97 hfi1_ipoib_stop_txq(txq); 86 98 } ··· 105 117 * to protect against ring overflow. 106 118 */ 107 119 if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) && 108 - atomic_xchg(&txq->ring_full, 0)) { 120 + atomic_xchg(&txq->tx_ring.ring_full, 0)) { 109 121 trace_hfi1_txq_xmit_unstopped(txq); 110 122 hfi1_ipoib_wake_txq(txq); 111 123 } ··· 113 125 114 126 static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) 115 127 { 116 - struct hfi1_ipoib_dev_priv *priv = tx->priv; 128 + struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; 117 129 118 130 if (likely(!tx->sdma_status)) { 119 131 dev_sw_netstats_tx_add(priv->netdev, 1, tx->skb->len); ··· 127 139 } 128 140 129 141 napi_consume_skb(tx->skb, budget); 142 + tx->skb = NULL; 130 143 sdma_txclean(priv->dd, &tx->txreq); 131 - kmem_cache_free(priv->txreq_cache, tx); 132 144 } 133 145 134 - static int hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq, int budget) 146 + static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq) 135 147 { 136 148 struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; 137 - unsigned long head; 138 - unsigned long tail; 139 - unsigned int max_tx; 140 - int work_done; 141 - int tx_count; 149 + int i; 150 + struct ipoib_txreq *tx; 142 151 143 - spin_lock_bh(&tx_ring->consumer_lock); 144 - 145 - /* Read index before reading contents at that index. */ 146 - head = smp_load_acquire(&tx_ring->head); 147 - tail = tx_ring->tail; 148 - max_tx = tx_ring->max_items; 149 - 150 - work_done = min_t(int, CIRC_CNT(head, tail, max_tx), budget); 151 - 152 - for (tx_count = work_done; tx_count; tx_count--) { 153 - hfi1_ipoib_free_tx(tx_ring->items[tail], budget); 154 - tail = CIRC_NEXT(tail, max_tx); 152 + for (i = 0; i < tx_ring->max_items; i++) { 153 + tx = hfi1_txreq_from_idx(tx_ring, i); 154 + tx->complete = 0; 155 + dev_kfree_skb_any(tx->skb); 156 + tx->skb = NULL; 157 + sdma_txclean(txq->priv->dd, &tx->txreq); 155 158 } 156 - 157 - atomic64_add(work_done, &txq->complete_txreqs); 158 - 159 - /* Finished freeing tx items so store the tail value. */ 160 - smp_store_release(&tx_ring->tail, tail); 161 - 162 - spin_unlock_bh(&tx_ring->consumer_lock); 163 - 164 - hfi1_ipoib_check_queue_stopped(txq); 165 - 166 - return work_done; 159 + tx_ring->head = 0; 160 + tx_ring->tail = 0; 161 + tx_ring->complete_txreqs = 0; 162 + tx_ring->sent_txreqs = 0; 163 + tx_ring->avail = hfi1_ipoib_ring_hwat(txq); 167 164 } 168 165 169 - static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget) 166 + static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget) 170 167 { 171 - struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev); 172 - struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis]; 168 + struct hfi1_ipoib_txq *txq = 169 + container_of(napi, struct hfi1_ipoib_txq, napi); 170 + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; 171 + u32 head = tx_ring->head; 172 + u32 max_tx = tx_ring->max_items; 173 + int work_done; 174 + struct ipoib_txreq *tx = hfi1_txreq_from_idx(tx_ring, head); 173 175 174 - int work_done = hfi1_ipoib_drain_tx_ring(txq, budget); 176 + trace_hfi1_txq_poll(txq); 177 + for (work_done = 0; work_done < budget; work_done++) { 178 + /* See hfi1_ipoib_sdma_complete() */ 179 + if (!smp_load_acquire(&tx->complete)) 180 + break; 181 + tx->complete = 0; 182 + trace_hfi1_tx_produce(tx, head); 183 + hfi1_ipoib_free_tx(tx, budget); 184 + head = CIRC_NEXT(head, max_tx); 185 + tx = hfi1_txreq_from_idx(tx_ring, head); 186 + } 187 + tx_ring->complete_txreqs += work_done; 188 + 189 + /* Finished freeing tx items so store the head value. */ 190 + smp_store_release(&tx_ring->head, head); 191 + 192 + hfi1_ipoib_check_queue_stopped(txq); 175 193 176 194 if (work_done < budget) 177 195 napi_complete_done(napi, work_done); ··· 185 191 return work_done; 186 192 } 187 193 188 - static void hfi1_ipoib_add_tx(struct ipoib_txreq *tx) 189 - { 190 - struct hfi1_ipoib_circ_buf *tx_ring = &tx->txq->tx_ring; 191 - unsigned long head; 192 - unsigned long tail; 193 - size_t max_tx; 194 - 195 - spin_lock(&tx_ring->producer_lock); 196 - 197 - head = tx_ring->head; 198 - tail = READ_ONCE(tx_ring->tail); 199 - max_tx = tx_ring->max_items; 200 - 201 - if (likely(CIRC_SPACE(head, tail, max_tx))) { 202 - tx_ring->items[head] = tx; 203 - 204 - /* Finish storing txreq before incrementing head. */ 205 - smp_store_release(&tx_ring->head, CIRC_ADD(head, 1, max_tx)); 206 - napi_schedule_irqoff(tx->txq->napi); 207 - } else { 208 - struct hfi1_ipoib_txq *txq = tx->txq; 209 - struct hfi1_ipoib_dev_priv *priv = tx->priv; 210 - 211 - /* Ring was full */ 212 - hfi1_ipoib_free_tx(tx, 0); 213 - atomic64_inc(&txq->complete_txreqs); 214 - dd_dev_dbg(priv->dd, "txq %d full.\n", txq->q_idx); 215 - } 216 - 217 - spin_unlock(&tx_ring->producer_lock); 218 - } 219 - 220 194 static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status) 221 195 { 222 196 struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq); 223 197 198 + trace_hfi1_txq_complete(tx->txq); 224 199 tx->sdma_status = status; 225 - 226 - hfi1_ipoib_add_tx(tx); 200 + /* see hfi1_ipoib_poll_tx_ring */ 201 + smp_store_release(&tx->complete, 1); 202 + napi_schedule_irqoff(&tx->txq->napi); 227 203 } 228 204 229 205 static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, ··· 255 291 static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx, 256 292 struct ipoib_txparms *txp) 257 293 { 258 - struct hfi1_ipoib_dev_priv *priv = tx->priv; 294 + struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; 259 295 struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; 260 296 struct sk_buff *skb = tx->skb; 261 297 struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp); ··· 326 362 327 363 ohdr->bth[0] = cpu_to_be32(bth0); 328 364 ohdr->bth[1] = cpu_to_be32(txp->dqpn); 329 - ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs)); 365 + ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->tx_ring.sent_txreqs)); 330 366 331 367 /* Build the deth */ 332 368 ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey); ··· 349 385 struct ipoib_txparms *txp) 350 386 { 351 387 struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); 388 + struct hfi1_ipoib_txq *txq = txp->txq; 352 389 struct ipoib_txreq *tx; 390 + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; 391 + u32 tail = tx_ring->tail; 353 392 int ret; 354 393 355 - tx = kmem_cache_alloc_node(priv->txreq_cache, 356 - GFP_ATOMIC, 357 - priv->dd->node); 358 - if (unlikely(!tx)) 359 - return ERR_PTR(-ENOMEM); 394 + if (unlikely(!tx_ring->avail)) { 395 + u32 head; 396 + 397 + if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq)) 398 + /* This shouldn't happen with a stopped queue */ 399 + return ERR_PTR(-ENOMEM); 400 + /* See hfi1_ipoib_poll_tx_ring() */ 401 + head = smp_load_acquire(&tx_ring->head); 402 + tx_ring->avail = 403 + min_t(u32, hfi1_ipoib_ring_hwat(txq), 404 + CIRC_CNT(head, tail, tx_ring->max_items)); 405 + } else { 406 + tx_ring->avail--; 407 + } 408 + tx = hfi1_txreq_from_idx(tx_ring, tail); 409 + trace_hfi1_txq_alloc_tx(txq); 360 410 361 411 /* so that we can test if the sdma descriptors are there */ 362 412 tx->txreq.num_desc = 0; 363 - tx->priv = priv; 364 - tx->txq = txp->txq; 413 + tx->txq = txq; 365 414 tx->skb = skb; 366 415 INIT_LIST_HEAD(&tx->txreq.list); 367 416 ··· 382 405 383 406 ret = hfi1_ipoib_build_tx_desc(tx, txp); 384 407 if (likely(!ret)) { 385 - if (txp->txq->flow.as_int != txp->flow.as_int) { 386 - txp->txq->flow.tx_queue = txp->flow.tx_queue; 387 - txp->txq->flow.sc5 = txp->flow.sc5; 388 - txp->txq->sde = 408 + if (txq->flow.as_int != txp->flow.as_int) { 409 + txq->flow.tx_queue = txp->flow.tx_queue; 410 + txq->flow.sc5 = txp->flow.sc5; 411 + txq->sde = 389 412 sdma_select_engine_sc(priv->dd, 390 413 txp->flow.tx_queue, 391 414 txp->flow.sc5); 392 - trace_hfi1_flow_switch(txp->txq); 415 + trace_hfi1_flow_switch(txq); 393 416 } 394 417 395 418 return tx; 396 419 } 397 420 398 421 sdma_txclean(priv->dd, &tx->txreq); 399 - kmem_cache_free(priv->txreq_cache, tx); 400 422 401 423 return ERR_PTR(ret); 402 424 } ··· 456 480 struct sk_buff *skb, 457 481 struct ipoib_txparms *txp) 458 482 { 459 - struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); 460 483 struct hfi1_ipoib_txq *txq = txp->txq; 484 + struct hfi1_ipoib_circ_buf *tx_ring; 461 485 struct ipoib_txreq *tx; 462 486 int ret; 463 487 ··· 475 499 return NETDEV_TX_OK; 476 500 } 477 501 502 + tx_ring = &txq->tx_ring; 503 + trace_hfi1_tx_consume(tx, tx_ring->tail); 504 + /* consume tx */ 505 + smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); 478 506 ret = hfi1_ipoib_submit_tx(txq, tx); 479 507 if (likely(!ret)) { 480 508 tx_ok: 481 - trace_sdma_output_ibhdr(tx->priv->dd, 509 + trace_sdma_output_ibhdr(txq->priv->dd, 482 510 &tx->sdma_hdr.hdr, 483 511 ib_is_sc5(txp->flow.sc5)); 484 512 hfi1_ipoib_check_queue_depth(txq); ··· 494 514 if (ret == -EBUSY || ret == -ECOMM) 495 515 goto tx_ok; 496 516 497 - sdma_txclean(priv->dd, &tx->txreq); 498 - dev_kfree_skb_any(skb); 499 - kmem_cache_free(priv->txreq_cache, tx); 517 + /* mark complete and kick napi tx */ 518 + smp_store_release(&tx->complete, 1); 519 + napi_schedule(&tx->txq->napi); 520 + 500 521 ++dev->stats.tx_carrier_errors; 501 522 502 523 return NETDEV_TX_OK; ··· 508 527 struct ipoib_txparms *txp) 509 528 { 510 529 struct hfi1_ipoib_txq *txq = txp->txq; 530 + struct hfi1_ipoib_circ_buf *tx_ring; 511 531 struct ipoib_txreq *tx; 512 532 513 533 /* Has the flow change ? */ ··· 538 556 return NETDEV_TX_OK; 539 557 } 540 558 559 + tx_ring = &txq->tx_ring; 560 + trace_hfi1_tx_consume(tx, tx_ring->tail); 561 + /* consume tx */ 562 + smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); 541 563 list_add_tail(&tx->txreq.list, &txq->tx_list); 542 564 543 565 hfi1_ipoib_check_queue_depth(txq); 544 566 545 - trace_sdma_output_ibhdr(tx->priv->dd, 567 + trace_sdma_output_ibhdr(txq->priv->dd, 546 568 &tx->sdma_hdr.hdr, 547 569 ib_is_sc5(txp->flow.sc5)); 548 570 ··· 632 646 if (list_empty(&txq->wait.list)) { 633 647 struct hfi1_ibport *ibp = &sde->ppd->ibport_data; 634 648 635 - if (!atomic_xchg(&txq->no_desc, 1)) { 649 + if (!atomic_xchg(&txq->tx_ring.no_desc, 1)) { 636 650 trace_hfi1_txq_queued(txq); 637 651 hfi1_ipoib_stop_txq(txq); 638 652 } ··· 675 689 676 690 if (likely(dev->reg_state == NETREG_REGISTERED) && 677 691 likely(!hfi1_ipoib_flush_tx_list(dev, txq))) 678 - if (atomic_xchg(&txq->no_desc, 0)) 692 + if (atomic_xchg(&txq->tx_ring.no_desc, 0)) 679 693 hfi1_ipoib_wake_txq(txq); 680 694 } 681 695 682 696 int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) 683 697 { 684 698 struct net_device *dev = priv->netdev; 685 - char buf[HFI1_IPOIB_TXREQ_NAME_LEN]; 686 - unsigned long tx_ring_size; 699 + u32 tx_ring_size, tx_item_size; 687 700 int i; 688 701 689 702 /* 690 703 * Ring holds 1 less than tx_ring_size 691 704 * Round up to next power of 2 in order to hold at least tx_queue_len 692 705 */ 693 - tx_ring_size = roundup_pow_of_two((unsigned long)dev->tx_queue_len + 1); 694 - 695 - snprintf(buf, sizeof(buf), "hfi1_%u_ipoib_txreq_cache", priv->dd->unit); 696 - priv->txreq_cache = kmem_cache_create(buf, 697 - sizeof(struct ipoib_txreq), 698 - 0, 699 - 0, 700 - NULL); 701 - if (!priv->txreq_cache) 702 - return -ENOMEM; 703 - 704 - priv->tx_napis = kcalloc_node(dev->num_tx_queues, 705 - sizeof(struct napi_struct), 706 - GFP_KERNEL, 707 - priv->dd->node); 708 - if (!priv->tx_napis) 709 - goto free_txreq_cache; 706 + tx_ring_size = roundup_pow_of_two(dev->tx_queue_len + 1); 707 + tx_item_size = roundup_pow_of_two(sizeof(struct ipoib_txreq)); 710 708 711 709 priv->txqs = kcalloc_node(dev->num_tx_queues, 712 710 sizeof(struct hfi1_ipoib_txq), 713 711 GFP_KERNEL, 714 712 priv->dd->node); 715 713 if (!priv->txqs) 716 - goto free_tx_napis; 714 + return -ENOMEM; 717 715 718 716 for (i = 0; i < dev->num_tx_queues; i++) { 719 717 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; ··· 713 743 txq->priv = priv; 714 744 txq->sde = NULL; 715 745 INIT_LIST_HEAD(&txq->tx_list); 716 - atomic64_set(&txq->complete_txreqs, 0); 717 - atomic_set(&txq->stops, 0); 718 - atomic_set(&txq->ring_full, 0); 719 - atomic_set(&txq->no_desc, 0); 746 + atomic_set(&txq->tx_ring.stops, 0); 747 + atomic_set(&txq->tx_ring.ring_full, 0); 748 + atomic_set(&txq->tx_ring.no_desc, 0); 720 749 txq->q_idx = i; 721 750 txq->flow.tx_queue = 0xff; 722 751 txq->flow.sc5 = 0xff; ··· 725 756 priv->dd->node); 726 757 727 758 txq->tx_ring.items = 728 - kcalloc_node(tx_ring_size, 729 - sizeof(struct ipoib_txreq *), 759 + kcalloc_node(tx_ring_size, tx_item_size, 730 760 GFP_KERNEL, priv->dd->node); 731 761 if (!txq->tx_ring.items) 732 762 goto free_txqs; 733 763 734 - spin_lock_init(&txq->tx_ring.producer_lock); 735 - spin_lock_init(&txq->tx_ring.consumer_lock); 736 764 txq->tx_ring.max_items = tx_ring_size; 765 + txq->tx_ring.shift = ilog2(tx_ring_size); 766 + txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq); 737 767 738 - txq->napi = &priv->tx_napis[i]; 739 - netif_tx_napi_add(dev, txq->napi, 740 - hfi1_ipoib_process_tx_ring, 768 + netif_tx_napi_add(dev, &txq->napi, 769 + hfi1_ipoib_poll_tx_ring, 741 770 NAPI_POLL_WEIGHT); 742 771 } 743 772 ··· 745 778 for (i--; i >= 0; i--) { 746 779 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; 747 780 748 - netif_napi_del(txq->napi); 781 + netif_napi_del(&txq->napi); 749 782 kfree(txq->tx_ring.items); 750 783 } 751 784 752 785 kfree(priv->txqs); 753 786 priv->txqs = NULL; 754 - 755 - free_tx_napis: 756 - kfree(priv->tx_napis); 757 - priv->tx_napis = NULL; 758 - 759 - free_txreq_cache: 760 - kmem_cache_destroy(priv->txreq_cache); 761 - priv->txreq_cache = NULL; 762 787 return -ENOMEM; 763 788 } 764 789 ··· 758 799 { 759 800 struct sdma_txreq *txreq; 760 801 struct sdma_txreq *txreq_tmp; 761 - atomic64_t *complete_txreqs = &txq->complete_txreqs; 762 802 763 803 list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) { 764 804 struct ipoib_txreq *tx = ··· 766 808 list_del(&txreq->list); 767 809 sdma_txclean(txq->priv->dd, &tx->txreq); 768 810 dev_kfree_skb_any(tx->skb); 769 - kmem_cache_free(txq->priv->txreq_cache, tx); 770 - atomic64_inc(complete_txreqs); 811 + tx->skb = NULL; 812 + txq->tx_ring.complete_txreqs++; 771 813 } 772 814 773 815 if (hfi1_ipoib_used(txq)) 774 816 dd_dev_warn(txq->priv->dd, 775 - "txq %d not empty found %llu requests\n", 817 + "txq %d not empty found %u requests\n", 776 818 txq->q_idx, 777 - hfi1_ipoib_txreqs(txq->sent_txreqs, 778 - atomic64_read(complete_txreqs))); 819 + hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs, 820 + txq->tx_ring.complete_txreqs)); 779 821 } 780 822 781 823 void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) ··· 788 830 iowait_cancel_work(&txq->wait); 789 831 iowait_sdma_drain(&txq->wait); 790 832 hfi1_ipoib_drain_tx_list(txq); 791 - netif_napi_del(txq->napi); 792 - (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); 833 + netif_napi_del(&txq->napi); 834 + hfi1_ipoib_drain_tx_ring(txq); 793 835 kfree(txq->tx_ring.items); 794 836 } 795 837 796 838 kfree(priv->txqs); 797 839 priv->txqs = NULL; 798 - 799 - kfree(priv->tx_napis); 800 - priv->tx_napis = NULL; 801 - 802 - kmem_cache_destroy(priv->txreq_cache); 803 - priv->txreq_cache = NULL; 804 840 } 805 841 806 842 void hfi1_ipoib_napi_tx_enable(struct net_device *dev) ··· 805 853 for (i = 0; i < dev->num_tx_queues; i++) { 806 854 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; 807 855 808 - napi_enable(txq->napi); 856 + napi_enable(&txq->napi); 809 857 } 810 858 } 811 859 ··· 817 865 for (i = 0; i < dev->num_tx_queues; i++) { 818 866 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; 819 867 820 - napi_disable(txq->napi); 821 - (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); 868 + napi_disable(&txq->napi); 869 + hfi1_ipoib_drain_tx_ring(txq); 822 870 } 823 871 } 824 872 ··· 826 874 { 827 875 struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); 828 876 struct hfi1_ipoib_txq *txq = &priv->txqs[q]; 829 - u64 completed = atomic64_read(&txq->complete_txreqs); 830 877 831 878 dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n", 832 879 txq, q, 833 880 __netif_subqueue_stopped(dev, txq->q_idx), 834 - atomic_read(&txq->stops), 835 - atomic_read(&txq->no_desc), 836 - atomic_read(&txq->ring_full)); 881 + atomic_read(&txq->tx_ring.stops), 882 + atomic_read(&txq->tx_ring.no_desc), 883 + atomic_read(&txq->tx_ring.ring_full)); 837 884 dd_dev_info(priv->dd, "sde %p engine %u\n", 838 885 txq->sde, 839 886 txq->sde ? txq->sde->this_idx : 0); 840 887 dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int); 841 888 dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n", 842 - txq->sent_txreqs, completed, hfi1_ipoib_used(txq)); 843 - dd_dev_info(priv->dd, "tx_queue_len %u max_items %lu\n", 889 + txq->tx_ring.sent_txreqs, txq->tx_ring.complete_txreqs, 890 + hfi1_ipoib_used(txq)); 891 + dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n", 844 892 dev->tx_queue_len, txq->tx_ring.max_items); 845 - dd_dev_info(priv->dd, "head %lu tail %lu\n", 893 + dd_dev_info(priv->dd, "head %u tail %u\n", 846 894 txq->tx_ring.head, txq->tx_ring.tail); 847 895 dd_dev_info(priv->dd, "wait queued %u\n", 848 896 !list_empty(&txq->wait.list));
+66 -5
drivers/infiniband/hw/hfi1/trace_tx.h
··· 917 917 __entry->tail = txq->tx_ring.tail; 918 918 __entry->idx = txq->q_idx; 919 919 __entry->used = 920 - txq->sent_txreqs - 921 - atomic64_read(&txq->complete_txreqs); 920 + txq->tx_ring.sent_txreqs - 921 + txq->tx_ring.complete_txreqs; 922 922 __entry->flow = txq->flow.as_int; 923 - __entry->stops = atomic_read(&txq->stops); 924 - __entry->no_desc = atomic_read(&txq->no_desc); 923 + __entry->stops = atomic_read(&txq->tx_ring.stops); 924 + __entry->no_desc = atomic_read(&txq->tx_ring.no_desc); 925 925 __entry->stopped = 926 926 __netif_subqueue_stopped(txq->priv->netdev, txq->q_idx); 927 927 ), 928 928 TP_printk(/* print */ 929 - "[%s] txq %llx idx %u sde %llx head %lx tail %lx flow %x used %u stops %d no_desc %d stopped %u", 929 + "[%s] txq %llx idx %u sde %llx:%u cpu %d head %lx tail %lx flow %x used %u stops %d no_desc %d stopped %u", 930 930 __get_str(dev), 931 931 (unsigned long long)__entry->txq, 932 932 __entry->idx, 933 933 (unsigned long long)__entry->sde, 934 + __entry->sde ? __entry->sde->this_idx : 0, 935 + __entry->sde ? __entry->sde->cpu : 0, 934 936 __entry->head, 935 937 __entry->tail, 936 938 __entry->flow, ··· 993 991 994 992 DEFINE_EVENT(/* xmit_unstopped */ 995 993 hfi1_ipoib_txq_template, hfi1_txq_xmit_unstopped, 994 + TP_PROTO(struct hfi1_ipoib_txq *txq), 995 + TP_ARGS(txq) 996 + ); 997 + 998 + DECLARE_EVENT_CLASS(/* AIP */ 999 + hfi1_ipoib_tx_template, 1000 + TP_PROTO(struct ipoib_txreq *tx, u32 idx), 1001 + TP_ARGS(tx, idx), 1002 + TP_STRUCT__entry(/* entry */ 1003 + DD_DEV_ENTRY(tx->txq->priv->dd) 1004 + __field(struct ipoib_txreq *, tx) 1005 + __field(struct hfi1_ipoib_txq *, txq) 1006 + __field(struct sk_buff *, skb) 1007 + __field(ulong, idx) 1008 + ), 1009 + TP_fast_assign(/* assign */ 1010 + DD_DEV_ASSIGN(tx->txq->priv->dd); 1011 + __entry->tx = tx; 1012 + __entry->skb = tx->skb; 1013 + __entry->txq = tx->txq; 1014 + __entry->idx = idx; 1015 + ), 1016 + TP_printk(/* print */ 1017 + "[%s] tx %llx txq %llx,%u skb %llx idx %lu", 1018 + __get_str(dev), 1019 + (unsigned long long)__entry->tx, 1020 + (unsigned long long)__entry->txq, 1021 + __entry->txq ? __entry->txq->q_idx : 0, 1022 + (unsigned long long)__entry->skb, 1023 + __entry->idx 1024 + ) 1025 + ); 1026 + 1027 + DEFINE_EVENT(/* produce */ 1028 + hfi1_ipoib_tx_template, hfi1_tx_produce, 1029 + TP_PROTO(struct ipoib_txreq *tx, u32 idx), 1030 + TP_ARGS(tx, idx) 1031 + ); 1032 + 1033 + DEFINE_EVENT(/* consume */ 1034 + hfi1_ipoib_tx_template, hfi1_tx_consume, 1035 + TP_PROTO(struct ipoib_txreq *tx, u32 idx), 1036 + TP_ARGS(tx, idx) 1037 + ); 1038 + 1039 + DEFINE_EVENT(/* alloc_tx */ 1040 + hfi1_ipoib_txq_template, hfi1_txq_alloc_tx, 1041 + TP_PROTO(struct hfi1_ipoib_txq *txq), 1042 + TP_ARGS(txq) 1043 + ); 1044 + 1045 + DEFINE_EVENT(/* poll */ 1046 + hfi1_ipoib_txq_template, hfi1_txq_poll, 1047 + TP_PROTO(struct hfi1_ipoib_txq *txq), 1048 + TP_ARGS(txq) 1049 + ); 1050 + 1051 + DEFINE_EVENT(/* complete */ 1052 + hfi1_ipoib_txq_template, hfi1_txq_complete, 996 1053 TP_PROTO(struct hfi1_ipoib_txq *txq), 997 1054 TP_ARGS(txq) 998 1055 );
+2 -3
drivers/infiniband/hw/hfi1/user_exp_rcv.c
··· 692 692 * Allocate the node first so we can handle a potential 693 693 * failure before we've programmed anything. 694 694 */ 695 - node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages), 696 - GFP_KERNEL); 695 + node = kzalloc(struct_size(node, pages, npages), GFP_KERNEL); 697 696 if (!node) 698 697 return -ENOMEM; 699 698 ··· 712 713 node->dma_addr = phys; 713 714 node->grp = grp; 714 715 node->freed = false; 715 - memcpy(node->pages, pages, sizeof(struct page *) * npages); 716 + memcpy(node->pages, pages, flex_array_size(node, pages, npages)); 716 717 717 718 if (fd->use_mn) { 718 719 ret = mmu_interval_notifier_insert(
+27 -26
drivers/infiniband/hw/hfi1/verbs.c
··· 1602 1602 }; 1603 1603 1604 1604 static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */ 1605 - static const char **dev_cntr_names; 1606 - static const char **port_cntr_names; 1605 + static struct rdma_stat_desc *dev_cntr_descs; 1606 + static struct rdma_stat_desc *port_cntr_descs; 1607 1607 int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); 1608 1608 static int num_dev_cntrs; 1609 1609 static int num_port_cntrs; ··· 1614 1614 * strings. Optionally some entries can be reserved in the array to hold extra 1615 1615 * external strings. 1616 1616 */ 1617 - static int init_cntr_names(const char *names_in, 1618 - const size_t names_len, 1619 - int num_extra_names, 1620 - int *num_cntrs, 1621 - const char ***cntr_names) 1617 + static int init_cntr_names(const char *names_in, const size_t names_len, 1618 + int num_extra_names, int *num_cntrs, 1619 + struct rdma_stat_desc **cntr_descs) 1622 1620 { 1623 - char *names_out, *p, **q; 1621 + struct rdma_stat_desc *q; 1622 + char *names_out, *p; 1624 1623 int i, n; 1625 1624 1626 1625 n = 0; ··· 1627 1628 if (names_in[i] == '\n') 1628 1629 n++; 1629 1630 1630 - names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len, 1631 - GFP_KERNEL); 1631 + names_out = 1632 + kmalloc((n + num_extra_names) * sizeof(struct rdma_stat_desc) + 1633 + names_len, 1634 + GFP_KERNEL); 1632 1635 if (!names_out) { 1633 1636 *num_cntrs = 0; 1634 - *cntr_names = NULL; 1637 + *cntr_descs = NULL; 1635 1638 return -ENOMEM; 1636 1639 } 1637 1640 1638 - p = names_out + (n + num_extra_names) * sizeof(char *); 1641 + p = names_out + (n + num_extra_names) * sizeof(struct rdma_stat_desc); 1639 1642 memcpy(p, names_in, names_len); 1640 1643 1641 - q = (char **)names_out; 1644 + q = (struct rdma_stat_desc *)names_out; 1642 1645 for (i = 0; i < n; i++) { 1643 - q[i] = p; 1646 + q[i].name = p; 1644 1647 p = strchr(p, '\n'); 1645 1648 *p++ = '\0'; 1646 1649 } 1647 1650 1648 1651 *num_cntrs = n; 1649 - *cntr_names = (const char **)names_out; 1652 + *cntr_descs = (struct rdma_stat_desc *)names_out; 1650 1653 return 0; 1651 1654 } 1652 1655 ··· 1662 1661 goto out_unlock; 1663 1662 1664 1663 err = init_cntr_names(dd->cntrnames, dd->cntrnameslen, num_driver_cntrs, 1665 - &num_dev_cntrs, &dev_cntr_names); 1664 + &num_dev_cntrs, &dev_cntr_descs); 1666 1665 if (err) 1667 1666 goto out_unlock; 1668 1667 1669 1668 for (i = 0; i < num_driver_cntrs; i++) 1670 - dev_cntr_names[num_dev_cntrs + i] = driver_cntr_names[i]; 1669 + dev_cntr_descs[num_dev_cntrs + i].name = driver_cntr_names[i]; 1671 1670 1672 1671 err = init_cntr_names(dd->portcntrnames, dd->portcntrnameslen, 0, 1673 - &num_port_cntrs, &port_cntr_names); 1672 + &num_port_cntrs, &port_cntr_descs); 1674 1673 if (err) { 1675 - kfree(dev_cntr_names); 1676 - dev_cntr_names = NULL; 1674 + kfree(dev_cntr_descs); 1675 + dev_cntr_descs = NULL; 1677 1676 goto out_unlock; 1678 1677 } 1679 1678 cntr_names_initialized = 1; ··· 1687 1686 { 1688 1687 if (init_counters(ibdev)) 1689 1688 return NULL; 1690 - return rdma_alloc_hw_stats_struct(dev_cntr_names, 1689 + return rdma_alloc_hw_stats_struct(dev_cntr_descs, 1691 1690 num_dev_cntrs + num_driver_cntrs, 1692 1691 RDMA_HW_STATS_DEFAULT_LIFESPAN); 1693 1692 } ··· 1697 1696 { 1698 1697 if (init_counters(ibdev)) 1699 1698 return NULL; 1700 - return rdma_alloc_hw_stats_struct(port_cntr_names, num_port_cntrs, 1699 + return rdma_alloc_hw_stats_struct(port_cntr_descs, num_port_cntrs, 1701 1700 RDMA_HW_STATS_DEFAULT_LIFESPAN); 1702 1701 } 1703 1702 ··· 1922 1921 verbs_txreq_exit(dev); 1923 1922 1924 1923 mutex_lock(&cntr_names_lock); 1925 - kfree(dev_cntr_names); 1926 - kfree(port_cntr_names); 1927 - dev_cntr_names = NULL; 1928 - port_cntr_names = NULL; 1924 + kfree(dev_cntr_descs); 1925 + kfree(port_cntr_descs); 1926 + dev_cntr_descs = NULL; 1927 + port_cntr_descs = NULL; 1929 1928 cntr_names_initialized = 0; 1930 1929 mutex_unlock(&cntr_names_lock); 1931 1930 }
+25 -1
drivers/infiniband/hw/hns/hns_roce_device.h
··· 225 225 unsigned long logic_idx; 226 226 }; 227 227 228 + enum hns_roce_mmap_type { 229 + HNS_ROCE_MMAP_TYPE_DB = 1, 230 + HNS_ROCE_MMAP_TYPE_TPTR, 231 + }; 232 + 233 + struct hns_user_mmap_entry { 234 + struct rdma_user_mmap_entry rdma_entry; 235 + enum hns_roce_mmap_type mmap_type; 236 + u64 address; 237 + }; 238 + 228 239 struct hns_roce_ucontext { 229 240 struct ib_ucontext ibucontext; 230 241 struct hns_roce_uar uar; 231 242 struct list_head page_list; 232 243 struct mutex page_mutex; 244 + struct hns_user_mmap_entry *db_mmap_entry; 245 + struct hns_user_mmap_entry *tptr_mmap_entry; 233 246 }; 234 247 235 248 struct hns_roce_pd { ··· 911 898 bool (*chk_mbox_avail)(struct hns_roce_dev *hr_dev, bool *is_busy); 912 899 int (*set_gid)(struct hns_roce_dev *hr_dev, u32 port, int gid_index, 913 900 const union ib_gid *gid, const struct ib_gid_attr *attr); 914 - int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); 901 + int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, 902 + const u8 *addr); 915 903 void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, 916 904 enum ib_mtu mtu); 917 905 int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, ··· 1061 1047 static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) 1062 1048 { 1063 1049 return container_of(ibsrq, struct hns_roce_srq, ibsrq); 1050 + } 1051 + 1052 + static inline struct hns_user_mmap_entry * 1053 + to_hns_mmap(struct rdma_user_mmap_entry *rdma_entry) 1054 + { 1055 + return container_of(rdma_entry, struct hns_user_mmap_entry, rdma_entry); 1064 1056 } 1065 1057 1066 1058 static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) ··· 1279 1259 void hns_roce_exit(struct hns_roce_dev *hr_dev); 1280 1260 int hns_roce_fill_res_cq_entry(struct sk_buff *msg, 1281 1261 struct ib_cq *ib_cq); 1262 + struct hns_user_mmap_entry * 1263 + hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, 1264 + size_t length, 1265 + enum hns_roce_mmap_type mmap_type); 1282 1266 #endif /* _HNS_ROCE_DEVICE_H */
+5 -5
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
··· 90 90 unsigned long flags = 0; 91 91 void *wqe = NULL; 92 92 __le32 doorbell[2]; 93 + const u8 *smac; 93 94 int ret = 0; 94 95 int loopback; 95 96 u32 wqe_idx; 96 97 int nreq; 97 - u8 *smac; 98 98 99 99 if (unlikely(ibqp->qp_type != IB_QPT_GSI && 100 100 ibqp->qp_type != IB_QPT_RC)) { ··· 154 154 UD_SEND_WQE_U32_8_DMAC_5_S, 155 155 ah->av.mac[5]); 156 156 157 - smac = (u8 *)hr_dev->dev_addr[qp->port]; 157 + smac = (const u8 *)hr_dev->dev_addr[qp->port]; 158 158 loopback = ether_addr_equal_unaligned(ah->av.mac, 159 159 smac) ? 1 : 0; 160 160 roce_set_bit(ud_sq_wqe->u32_8, ··· 1782 1782 } 1783 1783 1784 1784 static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, 1785 - u8 *addr) 1785 + const u8 *addr) 1786 1786 { 1787 1787 u32 reg_smac_l; 1788 1788 u16 reg_smac_h; ··· 2743 2743 __le32 doorbell[2] = {0}; 2744 2744 u64 *mtts_2 = NULL; 2745 2745 int ret = -EINVAL; 2746 + const u8 *smac; 2746 2747 u64 sq_ba = 0; 2747 2748 u64 rq_ba = 0; 2748 2749 u32 port; 2749 2750 u32 port_num; 2750 2751 u8 *dmac; 2751 - u8 *smac; 2752 2752 2753 2753 if (!check_qp_state(cur_state, new_state)) { 2754 2754 ibdev_err(ibqp->device, ··· 2947 2947 2948 2948 port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : 2949 2949 hr_qp->port; 2950 - smac = (u8 *)hr_dev->dev_addr[port]; 2950 + smac = (const u8 *)hr_dev->dev_addr[port]; 2951 2951 /* when dmac equals smac or loop_idc is 1, it should loopback */ 2952 2952 if (ether_addr_equal_unaligned(dmac, smac) || 2953 2953 hr_dev->loop_idc == 0x1)
+11 -21
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 1165 1165 { 1166 1166 int size = ring->desc_num * sizeof(struct hns_roce_cmq_desc); 1167 1167 1168 - ring->desc = kzalloc(size, GFP_KERNEL); 1168 + ring->desc = dma_alloc_coherent(hr_dev->dev, size, 1169 + &ring->desc_dma_addr, GFP_KERNEL); 1169 1170 if (!ring->desc) 1170 1171 return -ENOMEM; 1171 - 1172 - ring->desc_dma_addr = dma_map_single(hr_dev->dev, ring->desc, size, 1173 - DMA_BIDIRECTIONAL); 1174 - if (dma_mapping_error(hr_dev->dev, ring->desc_dma_addr)) { 1175 - ring->desc_dma_addr = 0; 1176 - kfree(ring->desc); 1177 - ring->desc = NULL; 1178 - 1179 - return -ENOMEM; 1180 - } 1181 1172 1182 1173 return 0; 1183 1174 } ··· 1176 1185 static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev, 1177 1186 struct hns_roce_v2_cmq_ring *ring) 1178 1187 { 1179 - dma_unmap_single(hr_dev->dev, ring->desc_dma_addr, 1180 - ring->desc_num * sizeof(struct hns_roce_cmq_desc), 1181 - DMA_BIDIRECTIONAL); 1188 + dma_free_coherent(hr_dev->dev, 1189 + ring->desc_num * sizeof(struct hns_roce_cmq_desc), 1190 + ring->desc, ring->desc_dma_addr); 1182 1191 1183 1192 ring->desc_dma_addr = 0; 1184 - kfree(ring->desc); 1185 1193 } 1186 1194 1187 1195 static int init_csq(struct hns_roce_dev *hr_dev, ··· 2982 2992 } 2983 2993 2984 2994 static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, 2985 - u8 *addr) 2995 + const u8 *addr) 2986 2996 { 2987 2997 struct hns_roce_cmq_desc desc; 2988 2998 struct hns_roce_cfg_smac_tb *smac_tb = ··· 3318 3328 memset(cq_context, 0, sizeof(*cq_context)); 3319 3329 3320 3330 hr_reg_write(cq_context, CQC_CQ_ST, V2_CQ_STATE_VALID); 3321 - hr_reg_write(cq_context, CQC_ARM_ST, REG_NXT_CEQE); 3331 + hr_reg_write(cq_context, CQC_ARM_ST, NO_ARMED); 3322 3332 hr_reg_write(cq_context, CQC_SHIFT, ilog2(hr_cq->cq_depth)); 3323 3333 hr_reg_write(cq_context, CQC_CEQN, hr_cq->vector); 3324 3334 hr_reg_write(cq_context, CQC_CQN, hr_cq->cqn); ··· 4308 4318 dma_addr_t trrl_ba; 4309 4319 dma_addr_t irrl_ba; 4310 4320 enum ib_mtu ib_mtu; 4321 + const u8 *smac; 4311 4322 u8 lp_pktn_ini; 4312 4323 u64 *mtts; 4313 4324 u8 *dmac; 4314 - u8 *smac; 4315 4325 u32 port; 4316 4326 int mtu; 4317 4327 int ret; ··· 4364 4374 4365 4375 port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; 4366 4376 4367 - smac = (u8 *)hr_dev->dev_addr[port]; 4377 + smac = (const u8 *)hr_dev->dev_addr[port]; 4368 4378 dmac = (u8 *)attr->ah_attr.roce.dmac; 4369 4379 /* when dmac equals smac or loop_idc is 1, it should loopback */ 4370 4380 if (ether_addr_equal_unaligned(dmac, smac) || ··· 4389 4399 mtu = ib_mtu_enum_to_int(ib_mtu); 4390 4400 if (WARN_ON(mtu <= 0)) 4391 4401 return -EINVAL; 4392 - #define MAX_LP_MSG_LEN 65536 4393 - /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */ 4402 + #define MAX_LP_MSG_LEN 16384 4403 + /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 16KB */ 4394 4404 lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu); 4395 4405 if (WARN_ON(lp_pktn_ini >= 0xF)) 4396 4406 return -EINVAL;
+114 -28
drivers/infiniband/hw/hns/hns_roce_main.c
··· 42 42 #include "hns_roce_device.h" 43 43 #include "hns_roce_hem.h" 44 44 45 - static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, u8 *addr) 45 + static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, 46 + const u8 *addr) 46 47 { 47 48 u8 phy_port; 48 49 u32 i; ··· 292 291 return 0; 293 292 } 294 293 294 + struct hns_user_mmap_entry * 295 + hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, 296 + size_t length, 297 + enum hns_roce_mmap_type mmap_type) 298 + { 299 + struct hns_user_mmap_entry *entry; 300 + int ret; 301 + 302 + entry = kzalloc(sizeof(*entry), GFP_KERNEL); 303 + if (!entry) 304 + return NULL; 305 + 306 + entry->address = address; 307 + entry->mmap_type = mmap_type; 308 + 309 + ret = rdma_user_mmap_entry_insert_exact( 310 + ucontext, &entry->rdma_entry, length, 311 + mmap_type == HNS_ROCE_MMAP_TYPE_DB ? 0 : 1); 312 + if (ret) { 313 + kfree(entry); 314 + return NULL; 315 + } 316 + 317 + return entry; 318 + } 319 + 320 + static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context) 321 + { 322 + if (context->db_mmap_entry) 323 + rdma_user_mmap_entry_remove( 324 + &context->db_mmap_entry->rdma_entry); 325 + 326 + if (context->tptr_mmap_entry) 327 + rdma_user_mmap_entry_remove( 328 + &context->tptr_mmap_entry->rdma_entry); 329 + } 330 + 331 + static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx) 332 + { 333 + struct hns_roce_ucontext *context = to_hr_ucontext(uctx); 334 + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); 335 + u64 address; 336 + int ret; 337 + 338 + address = context->uar.pfn << PAGE_SHIFT; 339 + context->db_mmap_entry = hns_roce_user_mmap_entry_insert( 340 + uctx, address, PAGE_SIZE, HNS_ROCE_MMAP_TYPE_DB); 341 + if (!context->db_mmap_entry) 342 + return -ENOMEM; 343 + 344 + if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size) 345 + return 0; 346 + 347 + /* 348 + * FIXME: using io_remap_pfn_range on the dma address returned 349 + * by dma_alloc_coherent is totally wrong. 350 + */ 351 + context->tptr_mmap_entry = 352 + hns_roce_user_mmap_entry_insert(uctx, hr_dev->tptr_dma_addr, 353 + hr_dev->tptr_size, 354 + HNS_ROCE_MMAP_TYPE_TPTR); 355 + if (!context->tptr_mmap_entry) { 356 + ret = -ENOMEM; 357 + goto err; 358 + } 359 + 360 + return 0; 361 + 362 + err: 363 + hns_roce_dealloc_uar_entry(context); 364 + return ret; 365 + } 366 + 295 367 static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, 296 368 struct ib_udata *udata) 297 369 { ··· 383 309 if (ret) 384 310 goto error_fail_uar_alloc; 385 311 312 + ret = hns_roce_alloc_uar_entry(uctx); 313 + if (ret) 314 + goto error_fail_uar_entry; 315 + 386 316 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || 387 317 hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { 388 318 INIT_LIST_HEAD(&context->page_list); ··· 403 325 return 0; 404 326 405 327 error_fail_copy_to_udata: 328 + hns_roce_dealloc_uar_entry(context); 329 + 330 + error_fail_uar_entry: 406 331 ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); 407 332 408 333 error_fail_uar_alloc: ··· 417 336 struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); 418 337 struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device); 419 338 339 + hns_roce_dealloc_uar_entry(context); 340 + 420 341 ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); 421 342 } 422 343 423 - static int hns_roce_mmap(struct ib_ucontext *context, 424 - struct vm_area_struct *vma) 344 + static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) 425 345 { 426 - struct hns_roce_dev *hr_dev = to_hr_dev(context->device); 346 + struct rdma_user_mmap_entry *rdma_entry; 347 + struct hns_user_mmap_entry *entry; 348 + phys_addr_t pfn; 349 + pgprot_t prot; 350 + int ret; 427 351 428 - switch (vma->vm_pgoff) { 429 - case 0: 430 - return rdma_user_mmap_io(context, vma, 431 - to_hr_ucontext(context)->uar.pfn, 432 - PAGE_SIZE, 433 - pgprot_noncached(vma->vm_page_prot), 434 - NULL); 435 - 436 - /* vm_pgoff: 1 -- TPTR */ 437 - case 1: 438 - if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size) 439 - return -EINVAL; 440 - /* 441 - * FIXME: using io_remap_pfn_range on the dma address returned 442 - * by dma_alloc_coherent is totally wrong. 443 - */ 444 - return rdma_user_mmap_io(context, vma, 445 - hr_dev->tptr_dma_addr >> PAGE_SHIFT, 446 - hr_dev->tptr_size, 447 - vma->vm_page_prot, 448 - NULL); 449 - 450 - default: 352 + rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff); 353 + if (!rdma_entry) 451 354 return -EINVAL; 452 - } 355 + 356 + entry = to_hns_mmap(rdma_entry); 357 + pfn = entry->address >> PAGE_SHIFT; 358 + prot = vma->vm_page_prot; 359 + 360 + if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR) 361 + prot = pgprot_noncached(prot); 362 + 363 + ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE, 364 + prot, rdma_entry); 365 + 366 + rdma_user_mmap_entry_put(rdma_entry); 367 + 368 + return ret; 369 + } 370 + 371 + static void hns_roce_free_mmap(struct rdma_user_mmap_entry *rdma_entry) 372 + { 373 + struct hns_user_mmap_entry *entry = to_hns_mmap(rdma_entry); 374 + 375 + kfree(entry); 453 376 } 454 377 455 378 static int hns_roce_port_immutable(struct ib_device *ib_dev, u32 port_num, ··· 529 444 .get_link_layer = hns_roce_get_link_layer, 530 445 .get_port_immutable = hns_roce_port_immutable, 531 446 .mmap = hns_roce_mmap, 447 + .mmap_free = hns_roce_free_mmap, 532 448 .modify_device = hns_roce_modify_device, 533 449 .modify_qp = hns_roce_modify_qp, 534 450 .query_ah = hns_roce_query_ah,
+2 -10
drivers/infiniband/hw/irdma/cm.h
··· 159 159 IRDMA_CM_EVENT_ABORTED, 160 160 }; 161 161 162 - struct irdma_bth { /* Base Trasnport Header */ 163 - u8 opcode; 164 - u8 flags; 165 - __be16 pkey; 166 - __be32 qpn; 167 - __be32 apsn; 168 - }; 169 - 170 162 struct ietf_mpa_v1 { 171 163 u8 key[IETF_MPA_KEY_SIZE]; 172 164 u8 flags; ··· 389 397 int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); 390 398 int irdma_create_listen(struct iw_cm_id *cm_id, int backlog); 391 399 int irdma_destroy_listen(struct iw_cm_id *cm_id); 392 - int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, u8 *mac); 400 + int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, const u8 *mac); 393 401 void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, 394 402 struct irdma_cm_info *nfo, 395 403 bool disconnect_all); ··· 398 406 bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr); 399 407 bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr); 400 408 int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4, 401 - u8 *mac_addr, u32 action); 409 + const u8 *mac_addr, u32 action); 402 410 void irdma_if_notify(struct irdma_device *iwdev, struct net_device *netdev, 403 411 u32 *ipaddr, bool ipv4, bool ifup); 404 412 bool irdma_port_in_use(struct irdma_cm_core *cm_core, u16 port);
+1 -42
drivers/infiniband/hw/irdma/ctrl.c
··· 1420 1420 } 1421 1421 1422 1422 /** 1423 - * irdma_sc_send_lsmm_nostag - for privilege qp 1424 - * @qp: sc qp struct 1425 - * @lsmm_buf: buffer with lsmm message 1426 - * @size: size of lsmm buffer 1427 - */ 1428 - void irdma_sc_send_lsmm_nostag(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size) 1429 - { 1430 - __le64 *wqe; 1431 - u64 hdr; 1432 - struct irdma_qp_uk *qp_uk; 1433 - 1434 - qp_uk = &qp->qp_uk; 1435 - wqe = qp_uk->sq_base->elem; 1436 - 1437 - set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf); 1438 - 1439 - if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) 1440 - set_64bit_val(wqe, 8, 1441 - FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, size)); 1442 - else 1443 - set_64bit_val(wqe, 8, 1444 - FIELD_PREP(IRDMAQPSQ_FRAG_LEN, size) | 1445 - FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity)); 1446 - set_64bit_val(wqe, 16, 0); 1447 - 1448 - hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_SEND) | 1449 - FIELD_PREP(IRDMAQPSQ_STREAMMODE, 1) | 1450 - FIELD_PREP(IRDMAQPSQ_WAITFORRCVPDU, 1) | 1451 - FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); 1452 - dma_wmb(); /* make sure WQE is written before valid bit is set */ 1453 - 1454 - set_64bit_val(wqe, 24, hdr); 1455 - 1456 - print_hex_dump_debug("WQE: SEND_LSMM_NOSTAG WQE", DUMP_PREFIX_OFFSET, 1457 - 16, 8, wqe, IRDMA_QP_WQE_MIN_SIZE, false); 1458 - } 1459 - 1460 - /** 1461 1423 * irdma_sc_send_rtt - send last read0 or write0 1462 1424 * @qp: sc qp struct 1463 1425 * @read: Do read0 or write0 ··· 2463 2501 enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq, 2464 2502 struct irdma_cq_init_info *info) 2465 2503 { 2466 - enum irdma_status_code ret_code; 2467 2504 u32 pble_obj_cnt; 2468 2505 2469 2506 pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; ··· 2474 2513 cq->ceq_id = info->ceq_id; 2475 2514 info->cq_uk_init_info.cqe_alloc_db = cq->dev->cq_arm_db; 2476 2515 info->cq_uk_init_info.cq_ack_db = cq->dev->cq_ack_db; 2477 - ret_code = irdma_uk_cq_init(&cq->cq_uk, &info->cq_uk_init_info); 2478 - if (ret_code) 2479 - return ret_code; 2516 + irdma_uk_cq_init(&cq->cq_uk, &info->cq_uk_init_info); 2480 2517 2481 2518 cq->virtual_map = info->virtual_map; 2482 2519 cq->pbl_chunk_size = info->pbl_chunk_size;
+4 -3
drivers/infiniband/hw/irdma/hw.c
··· 1057 1057 &iwdev->mac_ip_table_idx); 1058 1058 if (!status) { 1059 1059 status = irdma_add_local_mac_entry(iwdev->rf, 1060 - (u8 *)iwdev->netdev->dev_addr, 1060 + (const u8 *)iwdev->netdev->dev_addr, 1061 1061 (u8)iwdev->mac_ip_table_idx); 1062 1062 if (status) 1063 1063 irdma_del_local_mac_entry(iwdev->rf, ··· 2191 2191 * @mac_addr: pointer to mac address 2192 2192 * @idx: the index of the mac ip address to add 2193 2193 */ 2194 - int irdma_add_local_mac_entry(struct irdma_pci_f *rf, u8 *mac_addr, u16 idx) 2194 + int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx) 2195 2195 { 2196 2196 struct irdma_local_mac_entry_info *info; 2197 2197 struct irdma_cqp *iwcqp = &rf->cqp; ··· 2362 2362 * @ipv4: flag inicating IPv4 2363 2363 * @action: add, delete or modify 2364 2364 */ 2365 - void irdma_manage_arp_cache(struct irdma_pci_f *rf, unsigned char *mac_addr, 2365 + void irdma_manage_arp_cache(struct irdma_pci_f *rf, 2366 + const unsigned char *mac_addr, 2366 2367 u32 *ip_addr, bool ipv4, u32 action) 2367 2368 { 2368 2369 struct irdma_add_arp_cache_entry_info *info;
+3 -2
drivers/infiniband/hw/irdma/main.h
··· 467 467 void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp); 468 468 struct ib_qp *irdma_get_qp(struct ib_device *ibdev, int qpn); 469 469 void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask); 470 - void irdma_manage_arp_cache(struct irdma_pci_f *rf, unsigned char *mac_addr, 470 + void irdma_manage_arp_cache(struct irdma_pci_f *rf, 471 + const unsigned char *mac_addr, 471 472 u32 *ip_addr, bool ipv4, u32 action); 472 473 struct irdma_apbvt_entry *irdma_add_apbvt(struct irdma_device *iwdev, u16 port); 473 474 void irdma_del_apbvt(struct irdma_device *iwdev, ··· 480 479 void irdma_put_cqp_request(struct irdma_cqp *cqp, 481 480 struct irdma_cqp_request *cqp_request); 482 481 int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx); 483 - int irdma_add_local_mac_entry(struct irdma_pci_f *rf, u8 *mac_addr, u16 idx); 482 + int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx); 484 483 void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx); 485 484 486 485 u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf);
-1
drivers/infiniband/hw/irdma/osdep.h
··· 37 37 struct irdma_pci_f; 38 38 39 39 struct ib_device *to_ibdev(struct irdma_sc_dev *dev); 40 - u8 __iomem *irdma_get_hw_addr(void *dev); 41 40 void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); 42 41 enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev); 43 42 bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev);
-2
drivers/infiniband/hw/irdma/protos.h
··· 37 37 enum irdma_status_code 38 38 irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, 39 39 struct irdma_ws_node_info *node_info); 40 - enum irdma_status_code irdma_cqp_up_map_cmd(struct irdma_sc_dev *dev, u8 cmd, 41 - struct irdma_up_info *map_info); 42 40 enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, 43 41 struct irdma_sc_ceq *sc_ceq, u8 op); 44 42 enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev,
+5 -3
drivers/infiniband/hw/irdma/trace_cm.h
··· 144 144 DECLARE_EVENT_CLASS(qhash_template, 145 145 TP_PROTO(struct irdma_device *iwdev, 146 146 struct irdma_cm_listener *listener, 147 - char *dev_addr), 147 + const char *dev_addr), 148 148 TP_ARGS(iwdev, listener, dev_addr), 149 149 TP_STRUCT__entry(__field(struct irdma_device *, iwdev) 150 150 __field(u16, lport) ··· 173 173 174 174 DEFINE_EVENT(qhash_template, irdma_add_mqh_6, 175 175 TP_PROTO(struct irdma_device *iwdev, 176 - struct irdma_cm_listener *listener, char *dev_addr), 176 + struct irdma_cm_listener *listener, 177 + const char *dev_addr), 177 178 TP_ARGS(iwdev, listener, dev_addr)); 178 179 179 180 DEFINE_EVENT(qhash_template, irdma_add_mqh_4, 180 181 TP_PROTO(struct irdma_device *iwdev, 181 - struct irdma_cm_listener *listener, char *dev_addr), 182 + struct irdma_cm_listener *listener, 183 + const char *dev_addr), 182 184 TP_ARGS(iwdev, listener, dev_addr)); 183 185 184 186 TRACE_EVENT(irdma_addr_resolve,
+1 -2
drivers/infiniband/hw/irdma/type.h
··· 852 852 u16 err_rq_idx; 853 853 u32 qkey; 854 854 u32 dest_qp; 855 - u32 local_qp; 856 855 u8 roce_tver; 857 856 u8 ack_credits; 858 857 u8 err_rq_idx_valid; ··· 1255 1256 u64 scratch, bool post_sq); 1256 1257 void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size, 1257 1258 irdma_stag stag); 1258 - void irdma_sc_send_lsmm_nostag(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size); 1259 + 1259 1260 void irdma_sc_send_rtt(struct irdma_sc_qp *qp, bool read); 1260 1261 void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx, 1261 1262 struct irdma_qp_host_ctx_info *info);
+21 -80
drivers/infiniband/hw/irdma/uk.c
··· 13 13 * @sge: sge length and stag 14 14 * @valid: The wqe valid 15 15 */ 16 - static void irdma_set_fragment(__le64 *wqe, u32 offset, struct irdma_sge *sge, 16 + static void irdma_set_fragment(__le64 *wqe, u32 offset, struct ib_sge *sge, 17 17 u8 valid) 18 18 { 19 19 if (sge) { 20 20 set_64bit_val(wqe, offset, 21 - FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->tag_off)); 21 + FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr)); 22 22 set_64bit_val(wqe, offset + 8, 23 23 FIELD_PREP(IRDMAQPSQ_VALID, valid) | 24 - FIELD_PREP(IRDMAQPSQ_FRAG_LEN, sge->len) | 25 - FIELD_PREP(IRDMAQPSQ_FRAG_STAG, sge->stag)); 24 + FIELD_PREP(IRDMAQPSQ_FRAG_LEN, sge->length) | 25 + FIELD_PREP(IRDMAQPSQ_FRAG_STAG, sge->lkey)); 26 26 } else { 27 27 set_64bit_val(wqe, offset, 0); 28 28 set_64bit_val(wqe, offset + 8, ··· 38 38 * @valid: wqe valid flag 39 39 */ 40 40 static void irdma_set_fragment_gen_1(__le64 *wqe, u32 offset, 41 - struct irdma_sge *sge, u8 valid) 41 + struct ib_sge *sge, u8 valid) 42 42 { 43 43 if (sge) { 44 44 set_64bit_val(wqe, offset, 45 - FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->tag_off)); 45 + FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr)); 46 46 set_64bit_val(wqe, offset + 8, 47 - FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, sge->len) | 48 - FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, sge->stag)); 47 + FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, sge->length) | 48 + FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, sge->lkey)); 49 49 } else { 50 50 set_64bit_val(wqe, offset, 0); 51 51 set_64bit_val(wqe, offset + 8, 0); ··· 289 289 return IRDMA_ERR_INVALID_FRAG_COUNT; 290 290 291 291 for (i = 0; i < op_info->num_lo_sges; i++) 292 - total_size += op_info->lo_sg_list[i].len; 292 + total_size += op_info->lo_sg_list[i].length; 293 293 294 294 read_fence |= info->read_fence; 295 295 ··· 310 310 irdma_clr_wqes(qp, wqe_idx); 311 311 312 312 set_64bit_val(wqe, 16, 313 - FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); 313 + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); 314 314 315 315 if (info->imm_data_valid) { 316 316 set_64bit_val(wqe, 0, ··· 339 339 ++addl_frag_cnt; 340 340 } 341 341 342 - hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | 342 + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | 343 343 FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | 344 344 FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid) | 345 345 FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt) | ··· 391 391 return IRDMA_ERR_INVALID_FRAG_COUNT; 392 392 393 393 for (i = 0; i < op_info->num_lo_sges; i++) 394 - total_size += op_info->lo_sg_list[i].len; 394 + total_size += op_info->lo_sg_list[i].length; 395 395 396 396 ret_code = irdma_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta); 397 397 if (ret_code) ··· 426 426 ++addl_frag_cnt; 427 427 } 428 428 set_64bit_val(wqe, 16, 429 - FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); 430 - hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | 429 + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); 430 + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | 431 431 FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | 432 432 FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | 433 433 FIELD_PREP(IRDMAQPSQ_OPCODE, ··· 477 477 return IRDMA_ERR_INVALID_FRAG_COUNT; 478 478 479 479 for (i = 0; i < op_info->num_sges; i++) 480 - total_size += op_info->sg_list[i].len; 480 + total_size += op_info->sg_list[i].length; 481 481 482 482 if (info->imm_data_valid) 483 483 frag_cnt = op_info->num_sges + 1; ··· 705 705 706 706 read_fence |= info->read_fence; 707 707 set_64bit_val(wqe, 16, 708 - FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); 708 + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); 709 709 710 - hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | 710 + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | 711 711 FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | 712 712 FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) | 713 713 FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) | ··· 826 826 u64 hdr; 827 827 u32 wqe_idx; 828 828 bool local_fence = false; 829 - struct irdma_sge sge = {}; 829 + struct ib_sge sge = {}; 830 830 831 831 info->push_wqe = qp->push_db ? true : false; 832 832 op_info = &info->op.inv_local_stag; ··· 839 839 840 840 irdma_clr_wqes(qp, wqe_idx); 841 841 842 - sge.stag = op_info->target_stag; 842 + sge.lkey = op_info->target_stag; 843 843 qp->wqe_ops.iw_set_fragment(wqe, 0, &sge, 0); 844 844 845 845 set_64bit_val(wqe, 16, 0); 846 846 847 847 hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_INV_STAG) | 848 - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | 849 - FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | 850 - FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | 851 - FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | 852 - FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); 853 - 854 - dma_wmb(); /* make sure WQE is populated before valid bit is set */ 855 - 856 - set_64bit_val(wqe, 24, hdr); 857 - 858 - if (info->push_wqe) { 859 - irdma_qp_push_wqe(qp, wqe, IRDMA_QP_WQE_MIN_QUANTA, wqe_idx, 860 - post_sq); 861 - } else { 862 - if (post_sq) 863 - irdma_uk_qp_post_wr(qp); 864 - } 865 - 866 - return 0; 867 - } 868 - 869 - /** 870 - * irdma_uk_mw_bind - bind Memory Window 871 - * @qp: hw qp ptr 872 - * @info: post sq information 873 - * @post_sq: flag to post sq 874 - */ 875 - enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp, 876 - struct irdma_post_sq_info *info, 877 - bool post_sq) 878 - { 879 - __le64 *wqe; 880 - struct irdma_bind_window *op_info; 881 - u64 hdr; 882 - u32 wqe_idx; 883 - bool local_fence = false; 884 - 885 - info->push_wqe = qp->push_db ? true : false; 886 - op_info = &info->op.bind_window; 887 - local_fence |= info->local_fence; 888 - 889 - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, 890 - 0, info); 891 - if (!wqe) 892 - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; 893 - 894 - irdma_clr_wqes(qp, wqe_idx); 895 - 896 - qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info); 897 - 898 - hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_BIND_MW) | 899 - FIELD_PREP(IRDMAQPSQ_STAGRIGHTS, 900 - ((op_info->ena_reads << 2) | (op_info->ena_writes << 3))) | 901 - FIELD_PREP(IRDMAQPSQ_VABASEDTO, 902 - (op_info->addressing_type == IRDMA_ADDR_TYPE_VA_BASED ? 1 : 0)) | 903 - FIELD_PREP(IRDMAQPSQ_MEMWINDOWTYPE, 904 - (op_info->mem_window_type_1 ? 1 : 0)) | 905 848 FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | 906 849 FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | 907 850 FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | ··· 1446 1503 * @cq: hw cq 1447 1504 * @info: hw cq initialization info 1448 1505 */ 1449 - enum irdma_status_code irdma_uk_cq_init(struct irdma_cq_uk *cq, 1450 - struct irdma_cq_uk_init_info *info) 1506 + void irdma_uk_cq_init(struct irdma_cq_uk *cq, 1507 + struct irdma_cq_uk_init_info *info) 1451 1508 { 1452 1509 cq->cq_base = info->cq_base; 1453 1510 cq->cq_id = info->cq_id; ··· 1458 1515 cq->avoid_mem_cflct = info->avoid_mem_cflct; 1459 1516 IRDMA_RING_INIT(cq->cq_ring, cq->cq_size); 1460 1517 cq->polarity = 1; 1461 - 1462 - return 0; 1463 1518 } 1464 1519 1465 1520 /**
+11 -21
drivers/infiniband/hw/irdma/user.h
··· 16 16 #define irdma_access_privileges u32 17 17 #define irdma_physical_fragment u64 18 18 #define irdma_address_list u64 * 19 - #define irdma_sgl struct irdma_sge * 20 19 21 20 #define IRDMA_MAX_MR_SIZE 0x200000000000ULL 22 21 ··· 150 151 struct irdma_qp_uk_init_info; 151 152 struct irdma_cq_uk_init_info; 152 153 153 - struct irdma_sge { 154 - irdma_tagged_offset tag_off; 155 - u32 len; 156 - irdma_stag stag; 157 - }; 158 - 159 154 struct irdma_ring { 160 155 u32 head; 161 156 u32 tail; ··· 165 172 }; 166 173 167 174 struct irdma_post_send { 168 - irdma_sgl sg_list; 175 + struct ib_sge *sg_list; 169 176 u32 num_sges; 170 177 u32 qkey; 171 178 u32 dest_qp; ··· 182 189 183 190 struct irdma_post_rq_info { 184 191 u64 wr_id; 185 - irdma_sgl sg_list; 192 + struct ib_sge *sg_list; 186 193 u32 num_sges; 187 194 }; 188 195 189 196 struct irdma_rdma_write { 190 - irdma_sgl lo_sg_list; 197 + struct ib_sge *lo_sg_list; 191 198 u32 num_lo_sges; 192 - struct irdma_sge rem_addr; 199 + struct ib_sge rem_addr; 193 200 }; 194 201 195 202 struct irdma_inline_rdma_write { 196 203 void *data; 197 204 u32 len; 198 - struct irdma_sge rem_addr; 205 + struct ib_sge rem_addr; 199 206 }; 200 207 201 208 struct irdma_rdma_read { 202 - irdma_sgl lo_sg_list; 209 + struct ib_sge *lo_sg_list; 203 210 u32 num_lo_sges; 204 - struct irdma_sge rem_addr; 211 + struct ib_sge rem_addr; 205 212 }; 206 213 207 214 struct irdma_bind_window { ··· 276 283 enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp, 277 284 struct irdma_post_sq_info *info, 278 285 bool post_sq); 279 - enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp, 280 - struct irdma_post_sq_info *info, 281 - bool post_sq); 286 + 282 287 enum irdma_status_code irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, 283 288 bool signaled, bool post_sq); 284 289 enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, ··· 297 306 struct irdma_wqe_uk_ops { 298 307 void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity); 299 308 u16 (*iw_inline_data_size_to_quanta)(u32 data_size); 300 - void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct irdma_sge *sge, 309 + void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ib_sge *sge, 301 310 u8 valid); 302 311 void (*iw_set_mw_bind_wqe)(__le64 *wqe, 303 312 struct irdma_bind_window *op_info); ··· 309 318 enum irdma_cmpl_notify cq_notify); 310 319 void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int size); 311 320 void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *qp, u16 cnt); 312 - enum irdma_status_code irdma_uk_cq_init(struct irdma_cq_uk *cq, 313 - struct irdma_cq_uk_init_info *info); 321 + void irdma_uk_cq_init(struct irdma_cq_uk *cq, 322 + struct irdma_cq_uk_init_info *info); 314 323 enum irdma_status_code irdma_uk_qp_init(struct irdma_qp_uk *qp, 315 324 struct irdma_qp_uk_init_info *info); 316 325 struct irdma_sq_uk_wr_trk_info { ··· 360 369 bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */ 361 370 bool destroy_pending:1; /* Indicates the QP is being destroyed */ 362 371 void *back_qp; 363 - spinlock_t *lock; 364 372 u8 dbg_rq_flushed; 365 373 u8 sq_flush_seen; 366 374 u8 rq_flush_seen;
+2 -47
drivers/infiniband/hw/irdma/utils.c
··· 11 11 * @action: modify, delete or add 12 12 */ 13 13 int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4, 14 - u8 *mac_addr, u32 action) 14 + const u8 *mac_addr, u32 action) 15 15 { 16 16 unsigned long flags; 17 17 int arp_index; ··· 77 77 * @ipv4: IPv4 flag 78 78 * @mac: MAC address 79 79 */ 80 - int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, u8 *mac) 80 + int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, const u8 *mac) 81 81 { 82 82 int arpidx; 83 83 ··· 765 765 return NULL; 766 766 767 767 return &iwdev->rf->qp_table[qpn]->ibqp; 768 - } 769 - 770 - /** 771 - * irdma_get_hw_addr - return hw addr 772 - * @par: points to shared dev 773 - */ 774 - u8 __iomem *irdma_get_hw_addr(void *par) 775 - { 776 - struct irdma_sc_dev *dev = par; 777 - 778 - return dev->hw->hw_addr; 779 768 } 780 769 781 770 /** ··· 2043 2054 } 2044 2055 2045 2056 exit: 2046 - irdma_put_cqp_request(&rf->cqp, cqp_request); 2047 - 2048 - return status; 2049 - } 2050 - 2051 - /** 2052 - * irdma_cqp_up_map_cmd - Set the up-up mapping 2053 - * @dev: pointer to device structure 2054 - * @cmd: map command 2055 - * @map_info: pointer to up map info 2056 - */ 2057 - enum irdma_status_code irdma_cqp_up_map_cmd(struct irdma_sc_dev *dev, u8 cmd, 2058 - struct irdma_up_info *map_info) 2059 - { 2060 - struct irdma_pci_f *rf = dev_to_rf(dev); 2061 - struct irdma_cqp *iwcqp = &rf->cqp; 2062 - struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp; 2063 - struct irdma_cqp_request *cqp_request; 2064 - struct cqp_cmds_info *cqp_info; 2065 - enum irdma_status_code status; 2066 - 2067 - cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, false); 2068 - if (!cqp_request) 2069 - return IRDMA_ERR_NO_MEMORY; 2070 - 2071 - cqp_info = &cqp_request->info; 2072 - memset(cqp_info, 0, sizeof(*cqp_info)); 2073 - cqp_info->cqp_cmd = cmd; 2074 - cqp_info->post_sq = 1; 2075 - cqp_info->in.u.up_map.info = *map_info; 2076 - cqp_info->in.u.up_map.cqp = cqp; 2077 - cqp_info->in.u.up_map.scratch = (uintptr_t)cqp_request; 2078 - 2079 - status = irdma_handle_cqp_op(rf, cqp_request); 2080 2057 irdma_put_cqp_request(&rf->cqp, cqp_request); 2081 2058 2082 2059 return status;
+63 -83
drivers/infiniband/hw/irdma/verbs.c
··· 833 833 834 834 qp = &iwqp->sc_qp; 835 835 qp->qp_uk.back_qp = iwqp; 836 - qp->qp_uk.lock = &iwqp->lock; 837 836 qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX; 838 837 839 838 iwqp->iwdev = iwdev; ··· 1197 1198 av->attrs = attr->ah_attr; 1198 1199 rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); 1199 1200 rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid); 1200 - roce_info->local_qp = ibqp->qp_num; 1201 1201 if (av->sgid_addr.saddr.sa_family == AF_INET6) { 1202 1202 __be32 *daddr = 1203 1203 av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32; ··· 3039 3041 } 3040 3042 3041 3043 /** 3042 - * irdma_copy_sg_list - copy sg list for qp 3043 - * @sg_list: copied into sg_list 3044 - * @sgl: copy from sgl 3045 - * @num_sges: count of sg entries 3046 - */ 3047 - static void irdma_copy_sg_list(struct irdma_sge *sg_list, struct ib_sge *sgl, 3048 - int num_sges) 3049 - { 3050 - unsigned int i; 3051 - 3052 - for (i = 0; (i < num_sges) && (i < IRDMA_MAX_WQ_FRAGMENT_COUNT); i++) { 3053 - sg_list[i].tag_off = sgl[i].addr; 3054 - sg_list[i].len = sgl[i].length; 3055 - sg_list[i].stag = sgl[i].lkey; 3056 - } 3057 - } 3058 - 3059 - /** 3060 3044 * irdma_post_send - kernel application wr 3061 3045 * @ibqp: qp ptr for wr 3062 3046 * @ib_wr: work request ptr ··· 3114 3134 ret = irdma_uk_inline_send(ukqp, &info, false); 3115 3135 } else { 3116 3136 info.op.send.num_sges = ib_wr->num_sge; 3117 - info.op.send.sg_list = (struct irdma_sge *) 3118 - ib_wr->sg_list; 3137 + info.op.send.sg_list = ib_wr->sg_list; 3119 3138 if (iwqp->ibqp.qp_type == IB_QPT_UD || 3120 3139 iwqp->ibqp.qp_type == IB_QPT_GSI) { 3121 3140 ah = to_iwah(ud_wr(ib_wr)->ah); ··· 3149 3170 3150 3171 if (ib_wr->send_flags & IB_SEND_INLINE) { 3151 3172 info.op.inline_rdma_write.data = (void *)(uintptr_t)ib_wr->sg_list[0].addr; 3152 - info.op.inline_rdma_write.len = ib_wr->sg_list[0].length; 3153 - info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; 3154 - info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey; 3173 + info.op.inline_rdma_write.len = 3174 + ib_wr->sg_list[0].length; 3175 + info.op.inline_rdma_write.rem_addr.addr = 3176 + rdma_wr(ib_wr)->remote_addr; 3177 + info.op.inline_rdma_write.rem_addr.lkey = 3178 + rdma_wr(ib_wr)->rkey; 3155 3179 ret = irdma_uk_inline_rdma_write(ukqp, &info, false); 3156 3180 } else { 3157 3181 info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; 3158 3182 info.op.rdma_write.num_lo_sges = ib_wr->num_sge; 3159 - info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; 3160 - info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey; 3183 + info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr; 3184 + info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; 3161 3185 ret = irdma_uk_rdma_write(ukqp, &info, false); 3162 3186 } 3163 3187 ··· 3181 3199 break; 3182 3200 } 3183 3201 info.op_type = IRDMA_OP_TYPE_RDMA_READ; 3184 - info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; 3185 - info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey; 3202 + info.op.rdma_read.rem_addr.addr = rdma_wr(ib_wr)->remote_addr; 3203 + info.op.rdma_read.rem_addr.lkey = rdma_wr(ib_wr)->rkey; 3186 3204 info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; 3187 3205 info.op.rdma_read.num_lo_sges = ib_wr->num_sge; 3188 3206 ··· 3269 3287 struct irdma_qp *iwqp; 3270 3288 struct irdma_qp_uk *ukqp; 3271 3289 struct irdma_post_rq_info post_recv = {}; 3272 - struct irdma_sge sg_list[IRDMA_MAX_WQ_FRAGMENT_COUNT]; 3273 3290 enum irdma_status_code ret = 0; 3274 3291 unsigned long flags; 3275 3292 int err = 0; ··· 3283 3302 while (ib_wr) { 3284 3303 post_recv.num_sges = ib_wr->num_sge; 3285 3304 post_recv.wr_id = ib_wr->wr_id; 3286 - irdma_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); 3287 - post_recv.sg_list = sg_list; 3305 + post_recv.sg_list = ib_wr->sg_list; 3288 3306 ret = irdma_uk_post_receive(ukqp, &post_recv); 3289 3307 if (ret) { 3290 3308 ibdev_dbg(&iwqp->iwdev->ibdev, ··· 3631 3651 return 0; 3632 3652 } 3633 3653 3634 - static const char *const irdma_hw_stat_names[] = { 3654 + static const struct rdma_stat_desc irdma_hw_stat_descs[] = { 3635 3655 /* 32bit names */ 3636 - [IRDMA_HW_STAT_INDEX_RXVLANERR] = "rxVlanErrors", 3637 - [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards", 3638 - [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts", 3639 - [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes", 3640 - [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards", 3641 - [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts", 3642 - [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes", 3643 - [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs", 3644 - [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors", 3645 - [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors", 3646 - [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = "cnpHandled", 3647 - [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = "cnpIgnored", 3648 - [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = "cnpSent", 3656 + [IRDMA_HW_STAT_INDEX_RXVLANERR].name = "rxVlanErrors", 3657 + [IRDMA_HW_STAT_INDEX_IP4RXDISCARD].name = "ip4InDiscards", 3658 + [IRDMA_HW_STAT_INDEX_IP4RXTRUNC].name = "ip4InTruncatedPkts", 3659 + [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE].name = "ip4OutNoRoutes", 3660 + [IRDMA_HW_STAT_INDEX_IP6RXDISCARD].name = "ip6InDiscards", 3661 + [IRDMA_HW_STAT_INDEX_IP6RXTRUNC].name = "ip6InTruncatedPkts", 3662 + [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE].name = "ip6OutNoRoutes", 3663 + [IRDMA_HW_STAT_INDEX_TCPRTXSEG].name = "tcpRetransSegs", 3664 + [IRDMA_HW_STAT_INDEX_TCPRXOPTERR].name = "tcpInOptErrors", 3665 + [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR].name = "tcpInProtoErrors", 3666 + [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED].name = "cnpHandled", 3667 + [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED].name = "cnpIgnored", 3668 + [IRDMA_HW_STAT_INDEX_TXNPCNPSENT].name = "cnpSent", 3649 3669 3650 3670 /* 64bit names */ 3651 - [IRDMA_HW_STAT_INDEX_IP4RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3671 + [IRDMA_HW_STAT_INDEX_IP4RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3652 3672 "ip4InOctets", 3653 - [IRDMA_HW_STAT_INDEX_IP4RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3673 + [IRDMA_HW_STAT_INDEX_IP4RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3654 3674 "ip4InPkts", 3655 - [IRDMA_HW_STAT_INDEX_IP4RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = 3675 + [IRDMA_HW_STAT_INDEX_IP4RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3656 3676 "ip4InReasmRqd", 3657 - [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3677 + [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3658 3678 "ip4InMcastOctets", 3659 - [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3679 + [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3660 3680 "ip4InMcastPkts", 3661 - [IRDMA_HW_STAT_INDEX_IP4TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3681 + [IRDMA_HW_STAT_INDEX_IP4TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3662 3682 "ip4OutOctets", 3663 - [IRDMA_HW_STAT_INDEX_IP4TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3683 + [IRDMA_HW_STAT_INDEX_IP4TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3664 3684 "ip4OutPkts", 3665 - [IRDMA_HW_STAT_INDEX_IP4TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = 3685 + [IRDMA_HW_STAT_INDEX_IP4TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3666 3686 "ip4OutSegRqd", 3667 - [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3687 + [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3668 3688 "ip4OutMcastOctets", 3669 - [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3689 + [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3670 3690 "ip4OutMcastPkts", 3671 - [IRDMA_HW_STAT_INDEX_IP6RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3691 + [IRDMA_HW_STAT_INDEX_IP6RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3672 3692 "ip6InOctets", 3673 - [IRDMA_HW_STAT_INDEX_IP6RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3693 + [IRDMA_HW_STAT_INDEX_IP6RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3674 3694 "ip6InPkts", 3675 - [IRDMA_HW_STAT_INDEX_IP6RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = 3695 + [IRDMA_HW_STAT_INDEX_IP6RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3676 3696 "ip6InReasmRqd", 3677 - [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3697 + [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3678 3698 "ip6InMcastOctets", 3679 - [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3699 + [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3680 3700 "ip6InMcastPkts", 3681 - [IRDMA_HW_STAT_INDEX_IP6TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3701 + [IRDMA_HW_STAT_INDEX_IP6TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3682 3702 "ip6OutOctets", 3683 - [IRDMA_HW_STAT_INDEX_IP6TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3703 + [IRDMA_HW_STAT_INDEX_IP6TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3684 3704 "ip6OutPkts", 3685 - [IRDMA_HW_STAT_INDEX_IP6TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = 3705 + [IRDMA_HW_STAT_INDEX_IP6TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3686 3706 "ip6OutSegRqd", 3687 - [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3707 + [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3688 3708 "ip6OutMcastOctets", 3689 - [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3709 + [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3690 3710 "ip6OutMcastPkts", 3691 - [IRDMA_HW_STAT_INDEX_TCPRXSEGS + IRDMA_HW_STAT_INDEX_MAX_32] = 3711 + [IRDMA_HW_STAT_INDEX_TCPRXSEGS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3692 3712 "tcpInSegs", 3693 - [IRDMA_HW_STAT_INDEX_TCPTXSEG + IRDMA_HW_STAT_INDEX_MAX_32] = 3713 + [IRDMA_HW_STAT_INDEX_TCPTXSEG + IRDMA_HW_STAT_INDEX_MAX_32].name = 3694 3714 "tcpOutSegs", 3695 - [IRDMA_HW_STAT_INDEX_RDMARXRDS + IRDMA_HW_STAT_INDEX_MAX_32] = 3715 + [IRDMA_HW_STAT_INDEX_RDMARXRDS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3696 3716 "iwInRdmaReads", 3697 - [IRDMA_HW_STAT_INDEX_RDMARXSNDS + IRDMA_HW_STAT_INDEX_MAX_32] = 3717 + [IRDMA_HW_STAT_INDEX_RDMARXSNDS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3698 3718 "iwInRdmaSends", 3699 - [IRDMA_HW_STAT_INDEX_RDMARXWRS + IRDMA_HW_STAT_INDEX_MAX_32] = 3719 + [IRDMA_HW_STAT_INDEX_RDMARXWRS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3700 3720 "iwInRdmaWrites", 3701 - [IRDMA_HW_STAT_INDEX_RDMATXRDS + IRDMA_HW_STAT_INDEX_MAX_32] = 3721 + [IRDMA_HW_STAT_INDEX_RDMATXRDS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3702 3722 "iwOutRdmaReads", 3703 - [IRDMA_HW_STAT_INDEX_RDMATXSNDS + IRDMA_HW_STAT_INDEX_MAX_32] = 3723 + [IRDMA_HW_STAT_INDEX_RDMATXSNDS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3704 3724 "iwOutRdmaSends", 3705 - [IRDMA_HW_STAT_INDEX_RDMATXWRS + IRDMA_HW_STAT_INDEX_MAX_32] = 3725 + [IRDMA_HW_STAT_INDEX_RDMATXWRS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3706 3726 "iwOutRdmaWrites", 3707 - [IRDMA_HW_STAT_INDEX_RDMAVBND + IRDMA_HW_STAT_INDEX_MAX_32] = 3727 + [IRDMA_HW_STAT_INDEX_RDMAVBND + IRDMA_HW_STAT_INDEX_MAX_32].name = 3708 3728 "iwRdmaBnd", 3709 - [IRDMA_HW_STAT_INDEX_RDMAVINV + IRDMA_HW_STAT_INDEX_MAX_32] = 3729 + [IRDMA_HW_STAT_INDEX_RDMAVINV + IRDMA_HW_STAT_INDEX_MAX_32].name = 3710 3730 "iwRdmaInv", 3711 - [IRDMA_HW_STAT_INDEX_UDPRXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3731 + [IRDMA_HW_STAT_INDEX_UDPRXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3712 3732 "RxUDP", 3713 - [IRDMA_HW_STAT_INDEX_UDPTXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3733 + [IRDMA_HW_STAT_INDEX_UDPTXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = 3714 3734 "TxUDP", 3715 - [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = 3716 - "RxECNMrkd", 3735 + [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS + IRDMA_HW_STAT_INDEX_MAX_32] 3736 + .name = "RxECNMrkd", 3717 3737 }; 3718 3738 3719 3739 static void irdma_get_dev_fw_str(struct ib_device *dev, char *str) ··· 3737 3757 IRDMA_HW_STAT_INDEX_MAX_64; 3738 3758 unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN; 3739 3759 3740 - BUILD_BUG_ON(ARRAY_SIZE(irdma_hw_stat_names) != 3760 + BUILD_BUG_ON(ARRAY_SIZE(irdma_hw_stat_descs) != 3741 3761 (IRDMA_HW_STAT_INDEX_MAX_32 + IRDMA_HW_STAT_INDEX_MAX_64)); 3742 3762 3743 - return rdma_alloc_hw_stats_struct(irdma_hw_stat_names, num_counters, 3763 + return rdma_alloc_hw_stats_struct(irdma_hw_stat_descs, num_counters, 3744 3764 lifespan); 3745 3765 } 3746 3766 ··· 4310 4330 4311 4331 static __be64 irdma_mac_to_guid(struct net_device *ndev) 4312 4332 { 4313 - unsigned char *mac = ndev->dev_addr; 4333 + const unsigned char *mac = ndev->dev_addr; 4314 4334 __be64 guid; 4315 4335 unsigned char *dst = (unsigned char *)&guid; 4316 4336
+1 -3
drivers/infiniband/hw/mlx4/alias_GUID.c
··· 822 822 } 823 823 spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); 824 824 } 825 - for (i = 0 ; i < dev->num_ports; i++) { 826 - flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); 825 + for (i = 0 ; i < dev->num_ports; i++) 827 826 destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); 828 - } 829 827 ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); 830 828 kfree(dev->sriov.alias_guid.sa_client); 831 829 }
+21 -23
drivers/infiniband/hw/mlx4/main.c
··· 2105 2105 struct mlx4_ib_dev *dev = to_mdev(ibdev); 2106 2106 struct mlx4_ib_diag_counters *diag = dev->diag_counters; 2107 2107 2108 - if (!diag[0].name) 2108 + if (!diag[0].descs) 2109 2109 return NULL; 2110 2110 2111 - return rdma_alloc_hw_stats_struct(diag[0].name, diag[0].num_counters, 2111 + return rdma_alloc_hw_stats_struct(diag[0].descs, diag[0].num_counters, 2112 2112 RDMA_HW_STATS_DEFAULT_LIFESPAN); 2113 2113 } 2114 2114 ··· 2118 2118 struct mlx4_ib_dev *dev = to_mdev(ibdev); 2119 2119 struct mlx4_ib_diag_counters *diag = dev->diag_counters; 2120 2120 2121 - if (!diag[1].name) 2121 + if (!diag[1].descs) 2122 2122 return NULL; 2123 2123 2124 - return rdma_alloc_hw_stats_struct(diag[1].name, diag[1].num_counters, 2124 + return rdma_alloc_hw_stats_struct(diag[1].descs, diag[1].num_counters, 2125 2125 RDMA_HW_STATS_DEFAULT_LIFESPAN); 2126 2126 } 2127 2127 ··· 2151 2151 } 2152 2152 2153 2153 static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev, 2154 - const char ***name, 2155 - u32 **offset, 2156 - u32 *num, 2157 - bool port) 2154 + struct rdma_stat_desc **pdescs, 2155 + u32 **offset, u32 *num, bool port) 2158 2156 { 2159 2157 u32 num_counters; 2160 2158 ··· 2164 2166 if (!port) 2165 2167 num_counters += ARRAY_SIZE(diag_device_only); 2166 2168 2167 - *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL); 2168 - if (!*name) 2169 + *pdescs = kcalloc(num_counters, sizeof(struct rdma_stat_desc), 2170 + GFP_KERNEL); 2171 + if (!*pdescs) 2169 2172 return -ENOMEM; 2170 2173 2171 2174 *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL); 2172 2175 if (!*offset) 2173 - goto err_name; 2176 + goto err; 2174 2177 2175 2178 *num = num_counters; 2176 2179 2177 2180 return 0; 2178 2181 2179 - err_name: 2180 - kfree(*name); 2182 + err: 2183 + kfree(*pdescs); 2181 2184 return -ENOMEM; 2182 2185 } 2183 2186 2184 2187 static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev, 2185 - const char **name, 2186 - u32 *offset, 2187 - bool port) 2188 + struct rdma_stat_desc *descs, 2189 + u32 *offset, bool port) 2188 2190 { 2189 2191 int i; 2190 2192 int j; 2191 2193 2192 2194 for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) { 2193 - name[i] = diag_basic[i].name; 2195 + descs[i].name = diag_basic[i].name; 2194 2196 offset[i] = diag_basic[i].offset; 2195 2197 } 2196 2198 2197 2199 if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) { 2198 2200 for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) { 2199 - name[j] = diag_ext[i].name; 2201 + descs[j].name = diag_ext[i].name; 2200 2202 offset[j] = diag_ext[i].offset; 2201 2203 } 2202 2204 } 2203 2205 2204 2206 if (!port) { 2205 2207 for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) { 2206 - name[j] = diag_device_only[i].name; 2208 + descs[j].name = diag_device_only[i].name; 2207 2209 offset[j] = diag_device_only[i].offset; 2208 2210 } 2209 2211 } ··· 2231 2233 if (i && !per_port) 2232 2234 continue; 2233 2235 2234 - ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name, 2236 + ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].descs, 2235 2237 &diag[i].offset, 2236 2238 &diag[i].num_counters, i); 2237 2239 if (ret) 2238 2240 goto err_alloc; 2239 2241 2240 - mlx4_ib_fill_diag_counters(ibdev, diag[i].name, 2242 + mlx4_ib_fill_diag_counters(ibdev, diag[i].descs, 2241 2243 diag[i].offset, i); 2242 2244 } 2243 2245 ··· 2247 2249 2248 2250 err_alloc: 2249 2251 if (i) { 2250 - kfree(diag[i - 1].name); 2252 + kfree(diag[i - 1].descs); 2251 2253 kfree(diag[i - 1].offset); 2252 2254 } 2253 2255 ··· 2260 2262 2261 2263 for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { 2262 2264 kfree(ibdev->diag_counters[i].offset); 2263 - kfree(ibdev->diag_counters[i].name); 2265 + kfree(ibdev->diag_counters[i].descs); 2264 2266 } 2265 2267 } 2266 2268
+1 -1
drivers/infiniband/hw/mlx4/mlx4_ib.h
··· 601 601 #define MLX4_DIAG_COUNTERS_TYPES 2 602 602 603 603 struct mlx4_ib_diag_counters { 604 - const char **name; 604 + struct rdma_stat_desc *descs; 605 605 u32 *offset; 606 606 u32 num_counters; 607 607 };
+3 -1
drivers/infiniband/hw/mlx4/qp.c
··· 1099 1099 if (dev->steering_support == 1100 1100 MLX4_STEERING_MODE_DEVICE_MANAGED) 1101 1101 qp->flags |= MLX4_IB_QP_NETIF; 1102 - else 1102 + else { 1103 + err = -EINVAL; 1103 1104 goto err; 1105 + } 1104 1106 } 1105 1107 1106 1108 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
+245 -38
drivers/infiniband/hw/mlx5/counters.c
··· 12 12 struct mlx5_ib_counter { 13 13 const char *name; 14 14 size_t offset; 15 + u32 type; 15 16 }; 16 17 17 18 #define INIT_Q_COUNTER(_name) \ ··· 74 73 75 74 static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { 76 75 INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), 76 + }; 77 + 78 + #define INIT_OP_COUNTER(_name, _type) \ 79 + { .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type} 80 + 81 + static const struct mlx5_ib_counter basic_op_cnts[] = { 82 + INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS), 83 + }; 84 + 85 + static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = { 86 + INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS), 87 + }; 88 + 89 + static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = { 90 + INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS), 77 91 }; 78 92 79 93 static int mlx5_ib_read_counters(struct ib_counters *counters, ··· 177 161 return cnts->set_id; 178 162 } 179 163 164 + static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts) 165 + { 166 + struct rdma_hw_stats *stats; 167 + u32 num_hw_counters; 168 + int i; 169 + 170 + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 171 + cnts->num_ext_ppcnt_counters; 172 + stats = rdma_alloc_hw_stats_struct(cnts->descs, 173 + num_hw_counters + 174 + cnts->num_op_counters, 175 + RDMA_HW_STATS_DEFAULT_LIFESPAN); 176 + if (!stats) 177 + return NULL; 178 + 179 + for (i = 0; i < cnts->num_op_counters; i++) 180 + set_bit(num_hw_counters + i, stats->is_disabled); 181 + 182 + return stats; 183 + } 184 + 180 185 static struct rdma_hw_stats * 181 186 mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev) 182 187 { 183 188 struct mlx5_ib_dev *dev = to_mdev(ibdev); 184 189 const struct mlx5_ib_counters *cnts = &dev->port[0].cnts; 185 190 186 - return rdma_alloc_hw_stats_struct(cnts->names, 187 - cnts->num_q_counters + 188 - cnts->num_cong_counters + 189 - cnts->num_ext_ppcnt_counters, 190 - RDMA_HW_STATS_DEFAULT_LIFESPAN); 191 + return do_alloc_stats(cnts); 191 192 } 192 193 193 194 static struct rdma_hw_stats * ··· 213 180 struct mlx5_ib_dev *dev = to_mdev(ibdev); 214 181 const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts; 215 182 216 - return rdma_alloc_hw_stats_struct(cnts->names, 217 - cnts->num_q_counters + 218 - cnts->num_cong_counters + 219 - cnts->num_ext_ppcnt_counters, 220 - RDMA_HW_STATS_DEFAULT_LIFESPAN); 183 + return do_alloc_stats(cnts); 221 184 } 222 185 223 186 static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, ··· 270 241 return ret; 271 242 } 272 243 273 - static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 274 - struct rdma_hw_stats *stats, 275 - u32 port_num, int index) 244 + static int do_get_hw_stats(struct ib_device *ibdev, 245 + struct rdma_hw_stats *stats, 246 + u32 port_num, int index) 276 247 { 277 248 struct mlx5_ib_dev *dev = to_mdev(ibdev); 278 249 const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); ··· 324 295 return num_counters; 325 296 } 326 297 298 + static int do_get_op_stat(struct ib_device *ibdev, 299 + struct rdma_hw_stats *stats, 300 + u32 port_num, int index) 301 + { 302 + struct mlx5_ib_dev *dev = to_mdev(ibdev); 303 + const struct mlx5_ib_counters *cnts; 304 + const struct mlx5_ib_op_fc *opfcs; 305 + u64 packets = 0, bytes; 306 + u32 type; 307 + int ret; 308 + 309 + cnts = get_counters(dev, port_num - 1); 310 + opfcs = cnts->opfcs; 311 + type = *(u32 *)cnts->descs[index].priv; 312 + if (type >= MLX5_IB_OPCOUNTER_MAX) 313 + return -EINVAL; 314 + 315 + if (!opfcs[type].fc) 316 + goto out; 317 + 318 + ret = mlx5_fc_query(dev->mdev, opfcs[type].fc, 319 + &packets, &bytes); 320 + if (ret) 321 + return ret; 322 + 323 + out: 324 + stats->value[index] = packets; 325 + return index; 326 + } 327 + 328 + static int do_get_op_stats(struct ib_device *ibdev, 329 + struct rdma_hw_stats *stats, 330 + u32 port_num) 331 + { 332 + struct mlx5_ib_dev *dev = to_mdev(ibdev); 333 + const struct mlx5_ib_counters *cnts; 334 + int index, ret, num_hw_counters; 335 + 336 + cnts = get_counters(dev, port_num - 1); 337 + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 338 + cnts->num_ext_ppcnt_counters; 339 + for (index = num_hw_counters; 340 + index < (num_hw_counters + cnts->num_op_counters); index++) { 341 + ret = do_get_op_stat(ibdev, stats, port_num, index); 342 + if (ret != index) 343 + return ret; 344 + } 345 + 346 + return cnts->num_op_counters; 347 + } 348 + 349 + static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 350 + struct rdma_hw_stats *stats, 351 + u32 port_num, int index) 352 + { 353 + int num_counters, num_hw_counters, num_op_counters; 354 + struct mlx5_ib_dev *dev = to_mdev(ibdev); 355 + const struct mlx5_ib_counters *cnts; 356 + 357 + cnts = get_counters(dev, port_num - 1); 358 + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 359 + cnts->num_ext_ppcnt_counters; 360 + num_counters = num_hw_counters + cnts->num_op_counters; 361 + 362 + if (index < 0 || index > num_counters) 363 + return -EINVAL; 364 + else if (index > 0 && index < num_hw_counters) 365 + return do_get_hw_stats(ibdev, stats, port_num, index); 366 + else if (index >= num_hw_counters && index < num_counters) 367 + return do_get_op_stat(ibdev, stats, port_num, index); 368 + 369 + num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index); 370 + if (num_hw_counters < 0) 371 + return num_hw_counters; 372 + 373 + num_op_counters = do_get_op_stats(ibdev, stats, port_num); 374 + if (num_op_counters < 0) 375 + return num_op_counters; 376 + 377 + return num_hw_counters + num_op_counters; 378 + } 379 + 327 380 static struct rdma_hw_stats * 328 381 mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) 329 382 { ··· 413 302 const struct mlx5_ib_counters *cnts = 414 303 get_counters(dev, counter->port - 1); 415 304 416 - return rdma_alloc_hw_stats_struct(cnts->names, 417 - cnts->num_q_counters + 418 - cnts->num_cong_counters + 419 - cnts->num_ext_ppcnt_counters, 420 - RDMA_HW_STATS_DEFAULT_LIFESPAN); 305 + return do_alloc_stats(cnts); 421 306 } 422 307 423 308 static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) ··· 478 371 return mlx5_ib_qp_set_counter(qp, NULL); 479 372 } 480 373 481 - 482 374 static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, 483 - const char **names, 484 - size_t *offsets) 375 + struct rdma_stat_desc *descs, size_t *offsets) 485 376 { 486 377 int i; 487 378 int j = 0; 488 379 489 380 for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { 490 - names[j] = basic_q_cnts[i].name; 381 + descs[j].name = basic_q_cnts[i].name; 491 382 offsets[j] = basic_q_cnts[i].offset; 492 383 } 493 384 494 385 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { 495 386 for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { 496 - names[j] = out_of_seq_q_cnts[i].name; 387 + descs[j].name = out_of_seq_q_cnts[i].name; 497 388 offsets[j] = out_of_seq_q_cnts[i].offset; 498 389 } 499 390 } 500 391 501 392 if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { 502 393 for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { 503 - names[j] = retrans_q_cnts[i].name; 394 + descs[j].name = retrans_q_cnts[i].name; 504 395 offsets[j] = retrans_q_cnts[i].offset; 505 396 } 506 397 } 507 398 508 399 if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { 509 400 for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { 510 - names[j] = extended_err_cnts[i].name; 401 + descs[j].name = extended_err_cnts[i].name; 511 402 offsets[j] = extended_err_cnts[i].offset; 512 403 } 513 404 } 514 405 515 406 if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { 516 407 for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { 517 - names[j] = roce_accl_cnts[i].name; 408 + descs[j].name = roce_accl_cnts[i].name; 518 409 offsets[j] = roce_accl_cnts[i].offset; 519 410 } 520 411 } 521 412 522 413 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 523 414 for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { 524 - names[j] = cong_cnts[i].name; 415 + descs[j].name = cong_cnts[i].name; 525 416 offsets[j] = cong_cnts[i].offset; 526 417 } 527 418 } 528 419 529 420 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { 530 421 for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { 531 - names[j] = ext_ppcnt_cnts[i].name; 422 + descs[j].name = ext_ppcnt_cnts[i].name; 532 423 offsets[j] = ext_ppcnt_cnts[i].offset; 424 + } 425 + } 426 + 427 + for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) { 428 + descs[j].name = basic_op_cnts[i].name; 429 + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; 430 + descs[j].priv = &basic_op_cnts[i].type; 431 + } 432 + 433 + if (MLX5_CAP_FLOWTABLE(dev->mdev, 434 + ft_field_support_2_nic_receive_rdma.bth_opcode)) { 435 + for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) { 436 + descs[j].name = rdmarx_cnp_op_cnts[i].name; 437 + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; 438 + descs[j].priv = &rdmarx_cnp_op_cnts[i].type; 439 + } 440 + } 441 + 442 + if (MLX5_CAP_FLOWTABLE(dev->mdev, 443 + ft_field_support_2_nic_transmit_rdma.bth_opcode)) { 444 + for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) { 445 + descs[j].name = rdmatx_cnp_op_cnts[i].name; 446 + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; 447 + descs[j].priv = &rdmatx_cnp_op_cnts[i].type; 533 448 } 534 449 } 535 450 } ··· 560 431 static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, 561 432 struct mlx5_ib_counters *cnts) 562 433 { 563 - u32 num_counters; 434 + u32 num_counters, num_op_counters; 564 435 565 436 num_counters = ARRAY_SIZE(basic_q_cnts); 566 437 ··· 586 457 cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); 587 458 num_counters += ARRAY_SIZE(ext_ppcnt_cnts); 588 459 } 589 - cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL); 590 - if (!cnts->names) 460 + 461 + num_op_counters = ARRAY_SIZE(basic_op_cnts); 462 + 463 + if (MLX5_CAP_FLOWTABLE(dev->mdev, 464 + ft_field_support_2_nic_receive_rdma.bth_opcode)) 465 + num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts); 466 + 467 + if (MLX5_CAP_FLOWTABLE(dev->mdev, 468 + ft_field_support_2_nic_transmit_rdma.bth_opcode)) 469 + num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts); 470 + 471 + cnts->num_op_counters = num_op_counters; 472 + num_counters += num_op_counters; 473 + cnts->descs = kcalloc(num_counters, 474 + sizeof(struct rdma_stat_desc), GFP_KERNEL); 475 + if (!cnts->descs) 591 476 return -ENOMEM; 592 477 593 478 cnts->offsets = kcalloc(num_counters, 594 479 sizeof(*cnts->offsets), GFP_KERNEL); 595 480 if (!cnts->offsets) 596 - goto err_names; 481 + goto err; 597 482 598 483 return 0; 599 484 600 - err_names: 601 - kfree(cnts->names); 602 - cnts->names = NULL; 485 + err: 486 + kfree(cnts->descs); 487 + cnts->descs = NULL; 603 488 return -ENOMEM; 604 489 } 605 490 ··· 621 478 { 622 479 u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; 623 480 int num_cnt_ports; 624 - int i; 481 + int i, j; 625 482 626 483 num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; 627 484 ··· 634 491 dev->port[i].cnts.set_id); 635 492 mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); 636 493 } 637 - kfree(dev->port[i].cnts.names); 494 + kfree(dev->port[i].cnts.descs); 638 495 kfree(dev->port[i].cnts.offsets); 496 + 497 + for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) { 498 + if (!dev->port[i].cnts.opfcs[j].fc) 499 + continue; 500 + 501 + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) 502 + mlx5_ib_fs_remove_op_fc(dev, 503 + &dev->port[i].cnts.opfcs[j], j); 504 + mlx5_fc_destroy(dev->mdev, 505 + dev->port[i].cnts.opfcs[j].fc); 506 + dev->port[i].cnts.opfcs[j].fc = NULL; 507 + } 639 508 } 640 509 } 641 510 ··· 669 514 if (err) 670 515 goto err_alloc; 671 516 672 - mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, 517 + mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs, 673 518 dev->port[i].cnts.offsets); 674 519 675 520 MLX5_SET(alloc_q_counter_in, in, uid, ··· 827 672 mutex_unlock(&mcounters->mcntrs_mutex); 828 673 } 829 674 675 + static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, 676 + unsigned int index, bool enable) 677 + { 678 + struct mlx5_ib_dev *dev = to_mdev(device); 679 + struct mlx5_ib_counters *cnts; 680 + struct mlx5_ib_op_fc *opfc; 681 + u32 num_hw_counters, type; 682 + int ret; 683 + 684 + cnts = &dev->port[port - 1].cnts; 685 + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + 686 + cnts->num_ext_ppcnt_counters; 687 + if (index < num_hw_counters || 688 + index >= (num_hw_counters + cnts->num_op_counters)) 689 + return -EINVAL; 690 + 691 + if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) 692 + return -EINVAL; 693 + 694 + type = *(u32 *)cnts->descs[index].priv; 695 + if (type >= MLX5_IB_OPCOUNTER_MAX) 696 + return -EINVAL; 697 + 698 + opfc = &cnts->opfcs[type]; 699 + 700 + if (enable) { 701 + if (opfc->fc) 702 + return -EEXIST; 703 + 704 + opfc->fc = mlx5_fc_create(dev->mdev, false); 705 + if (IS_ERR(opfc->fc)) 706 + return PTR_ERR(opfc->fc); 707 + 708 + ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type); 709 + if (ret) { 710 + mlx5_fc_destroy(dev->mdev, opfc->fc); 711 + opfc->fc = NULL; 712 + } 713 + return ret; 714 + } 715 + 716 + if (!opfc->fc) 717 + return -EINVAL; 718 + 719 + mlx5_ib_fs_remove_op_fc(dev, opfc, type); 720 + mlx5_fc_destroy(dev->mdev, opfc->fc); 721 + opfc->fc = NULL; 722 + return 0; 723 + } 724 + 830 725 static const struct ib_device_ops hw_stats_ops = { 831 726 .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, 832 727 .get_hw_stats = mlx5_ib_get_hw_stats, ··· 885 680 .counter_dealloc = mlx5_ib_counter_dealloc, 886 681 .counter_alloc_stats = mlx5_ib_counter_alloc_stats, 887 682 .counter_update_stats = mlx5_ib_counter_update_stats, 683 + .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ? 684 + mlx5_ib_modify_stat : NULL, 888 685 }; 889 686 890 687 static const struct ib_device_ops hw_switchdev_stats_ops = {
+187
drivers/infiniband/hw/mlx5/fs.c
··· 10 10 #include <rdma/uverbs_std_types.h> 11 11 #include <rdma/mlx5_user_ioctl_cmds.h> 12 12 #include <rdma/mlx5_user_ioctl_verbs.h> 13 + #include <rdma/ib_hdrs.h> 13 14 #include <rdma/ib_umem.h> 14 15 #include <linux/mlx5/driver.h> 15 16 #include <linux/mlx5/fs.h> 16 17 #include <linux/mlx5/fs_helpers.h> 17 18 #include <linux/mlx5/accel.h> 18 19 #include <linux/mlx5/eswitch.h> 20 + #include <net/inet_ecn.h> 19 21 #include "mlx5_ib.h" 20 22 #include "counters.h" 21 23 #include "devx.h" ··· 847 845 flags); 848 846 849 847 return prio; 848 + } 849 + 850 + enum { 851 + RDMA_RX_ECN_OPCOUNTER_PRIO, 852 + RDMA_RX_CNP_OPCOUNTER_PRIO, 853 + }; 854 + 855 + enum { 856 + RDMA_TX_CNP_OPCOUNTER_PRIO, 857 + }; 858 + 859 + static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num, 860 + struct mlx5_flow_spec *spec) 861 + { 862 + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, 863 + ft_field_support.source_vhca_port) || 864 + !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, 865 + ft_field_support.source_vhca_port)) 866 + return -EOPNOTSUPP; 867 + 868 + MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria, 869 + misc_parameters.source_vhca_port); 870 + MLX5_SET(fte_match_param, &spec->match_value, 871 + misc_parameters.source_vhca_port, port_num); 872 + 873 + return 0; 874 + } 875 + 876 + static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num, 877 + struct mlx5_flow_spec *spec, int ipv) 878 + { 879 + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, 880 + ft_field_support.outer_ip_version)) 881 + return -EOPNOTSUPP; 882 + 883 + if (mlx5_core_mp_enabled(dev->mdev) && 884 + set_vhca_port_spec(dev, port_num, spec)) 885 + return -EOPNOTSUPP; 886 + 887 + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, 888 + outer_headers.ip_ecn); 889 + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn, 890 + INET_ECN_CE); 891 + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, 892 + outer_headers.ip_version); 893 + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, 894 + ipv); 895 + 896 + spec->match_criteria_enable = 897 + get_match_criteria_enable(spec->match_criteria); 898 + 899 + return 0; 900 + } 901 + 902 + static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num, 903 + struct mlx5_flow_spec *spec) 904 + { 905 + if (mlx5_core_mp_enabled(dev->mdev) && 906 + set_vhca_port_spec(dev, port_num, spec)) 907 + return -EOPNOTSUPP; 908 + 909 + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, 910 + misc_parameters.bth_opcode); 911 + MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode, 912 + IB_BTH_OPCODE_CNP); 913 + 914 + spec->match_criteria_enable = 915 + get_match_criteria_enable(spec->match_criteria); 916 + 917 + return 0; 918 + } 919 + 920 + int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, 921 + struct mlx5_ib_op_fc *opfc, 922 + enum mlx5_ib_optional_counter_type type) 923 + { 924 + enum mlx5_flow_namespace_type fn_type; 925 + int priority, i, err, spec_num; 926 + struct mlx5_flow_act flow_act = {}; 927 + struct mlx5_flow_destination dst; 928 + struct mlx5_flow_namespace *ns; 929 + struct mlx5_ib_flow_prio *prio; 930 + struct mlx5_flow_spec *spec; 931 + 932 + spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL); 933 + if (!spec) 934 + return -ENOMEM; 935 + 936 + switch (type) { 937 + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS: 938 + if (set_ecn_ce_spec(dev, port_num, &spec[0], 939 + MLX5_FS_IPV4_VERSION) || 940 + set_ecn_ce_spec(dev, port_num, &spec[1], 941 + MLX5_FS_IPV6_VERSION)) { 942 + err = -EOPNOTSUPP; 943 + goto free; 944 + } 945 + spec_num = 2; 946 + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; 947 + priority = RDMA_RX_ECN_OPCOUNTER_PRIO; 948 + break; 949 + 950 + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS: 951 + if (!MLX5_CAP_FLOWTABLE(dev->mdev, 952 + ft_field_support_2_nic_receive_rdma.bth_opcode) || 953 + set_cnp_spec(dev, port_num, &spec[0])) { 954 + err = -EOPNOTSUPP; 955 + goto free; 956 + } 957 + spec_num = 1; 958 + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; 959 + priority = RDMA_RX_CNP_OPCOUNTER_PRIO; 960 + break; 961 + 962 + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS: 963 + if (!MLX5_CAP_FLOWTABLE(dev->mdev, 964 + ft_field_support_2_nic_transmit_rdma.bth_opcode) || 965 + set_cnp_spec(dev, port_num, &spec[0])) { 966 + err = -EOPNOTSUPP; 967 + goto free; 968 + } 969 + spec_num = 1; 970 + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; 971 + priority = RDMA_TX_CNP_OPCOUNTER_PRIO; 972 + break; 973 + 974 + default: 975 + err = -EOPNOTSUPP; 976 + goto free; 977 + } 978 + 979 + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); 980 + if (!ns) { 981 + err = -EOPNOTSUPP; 982 + goto free; 983 + } 984 + 985 + prio = &dev->flow_db->opfcs[type]; 986 + if (!prio->flow_table) { 987 + prio = _get_prio(ns, prio, priority, 988 + dev->num_ports * MAX_OPFC_RULES, 1, 0); 989 + if (IS_ERR(prio)) { 990 + err = PTR_ERR(prio); 991 + goto free; 992 + } 993 + } 994 + 995 + dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 996 + dst.counter_id = mlx5_fc_id(opfc->fc); 997 + 998 + flow_act.action = 999 + MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW; 1000 + 1001 + for (i = 0; i < spec_num; i++) { 1002 + opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i], 1003 + &flow_act, &dst, 1); 1004 + if (IS_ERR(opfc->rule[i])) { 1005 + err = PTR_ERR(opfc->rule[i]); 1006 + goto del_rules; 1007 + } 1008 + } 1009 + prio->refcount += spec_num; 1010 + kfree(spec); 1011 + 1012 + return 0; 1013 + 1014 + del_rules: 1015 + for (i -= 1; i >= 0; i--) 1016 + mlx5_del_flow_rules(opfc->rule[i]); 1017 + put_flow_table(dev, prio, false); 1018 + free: 1019 + kfree(spec); 1020 + return err; 1021 + } 1022 + 1023 + void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, 1024 + struct mlx5_ib_op_fc *opfc, 1025 + enum mlx5_ib_optional_counter_type type) 1026 + { 1027 + int i; 1028 + 1029 + for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) { 1030 + mlx5_del_flow_rules(opfc->rule[i]); 1031 + put_flow_table(dev, &dev->flow_db->opfcs[type], true); 1032 + } 850 1033 } 851 1034 852 1035 static void set_underlay_qp(struct mlx5_ib_dev *dev,
+27 -1
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 263 263 struct mlx5_core_dev *mdev; 264 264 }; 265 265 266 + enum mlx5_ib_optional_counter_type { 267 + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS, 268 + MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS, 269 + MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS, 270 + 271 + MLX5_IB_OPCOUNTER_MAX, 272 + }; 273 + 266 274 struct mlx5_ib_flow_db { 267 275 struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; 268 276 struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; ··· 279 271 struct mlx5_ib_flow_prio fdb; 280 272 struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT]; 281 273 struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT]; 274 + struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX]; 282 275 struct mlx5_flow_table *lag_demux_ft; 283 276 /* Protect flow steering bypass flow tables 284 277 * when add/del flow rules. ··· 813 804 struct mlx5_ib_port_resources ports[2]; 814 805 }; 815 806 807 + #define MAX_OPFC_RULES 2 808 + 809 + struct mlx5_ib_op_fc { 810 + struct mlx5_fc *fc; 811 + struct mlx5_flow_handle *rule[MAX_OPFC_RULES]; 812 + }; 813 + 816 814 struct mlx5_ib_counters { 817 - const char **names; 815 + struct rdma_stat_desc *descs; 818 816 size_t *offsets; 819 817 u32 num_q_counters; 820 818 u32 num_cong_counters; 821 819 u32 num_ext_ppcnt_counters; 820 + u32 num_op_counters; 822 821 u16 set_id; 822 + struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX]; 823 823 }; 824 + 825 + int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, 826 + struct mlx5_ib_op_fc *opfc, 827 + enum mlx5_ib_optional_counter_type type); 828 + 829 + void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, 830 + struct mlx5_ib_op_fc *opfc, 831 + enum mlx5_ib_optional_counter_type type); 824 832 825 833 struct mlx5_ib_multiport_info; 826 834
+9 -17
drivers/infiniband/hw/mlx5/mr.c
··· 605 605 /* Return a MR already available in the cache */ 606 606 static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) 607 607 { 608 - struct mlx5_ib_dev *dev = req_ent->dev; 609 608 struct mlx5_ib_mr *mr = NULL; 610 609 struct mlx5_cache_ent *ent = req_ent; 611 610 612 - /* Try larger MR pools from the cache to satisfy the allocation */ 613 - for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) { 614 - mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order, 615 - ent - dev->cache.ent); 616 - 617 - spin_lock_irq(&ent->lock); 618 - if (!list_empty(&ent->head)) { 619 - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 620 - list); 621 - list_del(&mr->list); 622 - ent->available_mrs--; 623 - queue_adjust_cache_locked(ent); 624 - spin_unlock_irq(&ent->lock); 625 - mlx5_clear_mr(mr); 626 - return mr; 627 - } 611 + spin_lock_irq(&ent->lock); 612 + if (!list_empty(&ent->head)) { 613 + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 614 + list_del(&mr->list); 615 + ent->available_mrs--; 628 616 queue_adjust_cache_locked(ent); 629 617 spin_unlock_irq(&ent->lock); 618 + mlx5_clear_mr(mr); 619 + return mr; 630 620 } 621 + queue_adjust_cache_locked(ent); 622 + spin_unlock_irq(&ent->lock); 631 623 req_ent->miss++; 632 624 return NULL; 633 625 }
+25 -15
drivers/infiniband/hw/mlx5/odp.c
··· 1691 1691 1692 1692 xa_lock(&dev->odp_mkeys); 1693 1693 mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); 1694 - if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) 1694 + if (!mmkey || mmkey->key != lkey) { 1695 + mr = ERR_PTR(-ENOENT); 1695 1696 goto end; 1697 + } 1698 + if (mmkey->type != MLX5_MKEY_MR) { 1699 + mr = ERR_PTR(-EINVAL); 1700 + goto end; 1701 + } 1696 1702 1697 1703 mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); 1698 1704 1699 1705 if (mr->ibmr.pd != pd) { 1700 - mr = NULL; 1706 + mr = ERR_PTR(-EPERM); 1701 1707 goto end; 1702 1708 } 1703 1709 1704 1710 /* prefetch with write-access must be supported by the MR */ 1705 1711 if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && 1706 1712 !mr->umem->writable) { 1707 - mr = NULL; 1713 + mr = ERR_PTR(-EPERM); 1708 1714 goto end; 1709 1715 } 1710 1716 ··· 1742 1736 destroy_prefetch_work(work); 1743 1737 } 1744 1738 1745 - static bool init_prefetch_work(struct ib_pd *pd, 1739 + static int init_prefetch_work(struct ib_pd *pd, 1746 1740 enum ib_uverbs_advise_mr_advice advice, 1747 1741 u32 pf_flags, struct prefetch_mr_work *work, 1748 1742 struct ib_sge *sg_list, u32 num_sge) ··· 1753 1747 work->pf_flags = pf_flags; 1754 1748 1755 1749 for (i = 0; i < num_sge; ++i) { 1750 + struct mlx5_ib_mr *mr; 1751 + 1752 + mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); 1753 + if (IS_ERR(mr)) { 1754 + work->num_sge = i; 1755 + return PTR_ERR(mr); 1756 + } 1756 1757 work->frags[i].io_virt = sg_list[i].addr; 1757 1758 work->frags[i].length = sg_list[i].length; 1758 - work->frags[i].mr = 1759 - get_prefetchable_mr(pd, advice, sg_list[i].lkey); 1760 - if (!work->frags[i].mr) { 1761 - work->num_sge = i; 1762 - return false; 1763 - } 1759 + work->frags[i].mr = mr; 1764 1760 } 1765 1761 work->num_sge = num_sge; 1766 - return true; 1762 + return 0; 1767 1763 } 1768 1764 1769 1765 static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, ··· 1781 1773 struct mlx5_ib_mr *mr; 1782 1774 1783 1775 mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); 1784 - if (!mr) 1785 - return -ENOENT; 1776 + if (IS_ERR(mr)) 1777 + return PTR_ERR(mr); 1786 1778 ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, 1787 1779 &bytes_mapped, pf_flags); 1788 1780 if (ret < 0) { ··· 1802 1794 { 1803 1795 u32 pf_flags = 0; 1804 1796 struct prefetch_mr_work *work; 1797 + int rc; 1805 1798 1806 1799 if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) 1807 1800 pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; ··· 1818 1809 if (!work) 1819 1810 return -ENOMEM; 1820 1811 1821 - if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { 1812 + rc = init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge); 1813 + if (rc) { 1822 1814 destroy_prefetch_work(work); 1823 - return -EINVAL; 1815 + return rc; 1824 1816 } 1825 1817 queue_work(system_unbound_wq, &work->work); 1826 1818 return 0;
-1
drivers/infiniband/hw/qedr/main.c
··· 228 228 .query_srq = qedr_query_srq, 229 229 .reg_user_mr = qedr_reg_user_mr, 230 230 .req_notify_cq = qedr_arm_cq, 231 - .resize_cq = qedr_resize_cq, 232 231 233 232 INIT_RDMA_OBJ_SIZE(ib_ah, qedr_ah, ibah), 234 233 INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq),
+9 -16
drivers/infiniband/hw/qedr/verbs.c
··· 1052 1052 return -EINVAL; 1053 1053 } 1054 1054 1055 - int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) 1056 - { 1057 - struct qedr_dev *dev = get_qedr_dev(ibcq->device); 1058 - struct qedr_cq *cq = get_qedr_cq(ibcq); 1059 - 1060 - DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq); 1061 - 1062 - return 0; 1063 - } 1064 - 1065 1055 #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) 1066 1056 #define QEDR_DESTROY_CQ_ITER_DURATION (10) 1067 1057 ··· 2734 2744 int rc = 0; 2735 2745 2736 2746 memset(&params, 0, sizeof(params)); 2737 - 2738 - rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params); 2739 - if (rc) 2740 - goto err; 2741 - 2742 2747 memset(qp_attr, 0, sizeof(*qp_attr)); 2743 2748 memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 2744 2749 2745 - qp_attr->qp_state = qedr_get_ibqp_state(params.state); 2750 + if (qp->qp_type != IB_QPT_GSI) { 2751 + rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params); 2752 + if (rc) 2753 + goto err; 2754 + qp_attr->qp_state = qedr_get_ibqp_state(params.state); 2755 + } else { 2756 + qp_attr->qp_state = qedr_get_ibqp_state(QED_ROCE_QP_STATE_RTS); 2757 + } 2758 + 2746 2759 qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state); 2747 2760 qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu); 2748 2761 qp_attr->path_mig_state = IB_MIG_MIGRATED;
-1
drivers/infiniband/hw/qedr/verbs.h
··· 53 53 int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata); 54 54 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, 55 55 struct ib_udata *udata); 56 - int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); 57 56 int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); 58 57 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); 59 58 int qedr_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
+3 -2
drivers/infiniband/hw/qib/qib_driver.c
··· 1 1 /* 2 + * Copyright (c) 2021 Cornelis Networks. All rights reserved. 2 3 * Copyright (c) 2013 Intel Corporation. All rights reserved. 3 4 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. 4 5 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. ··· 63 62 "Attempt pre-IBTA 1.2 DDR speed negotiation"); 64 63 65 64 MODULE_LICENSE("Dual BSD/GPL"); 66 - MODULE_AUTHOR("Intel <ibsupport@intel.com>"); 67 - MODULE_DESCRIPTION("Intel IB driver"); 65 + MODULE_AUTHOR("Cornelis <support@cornelisnetworks.com>"); 66 + MODULE_DESCRIPTION("Cornelis IB driver"); 68 67 69 68 /* 70 69 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
+1 -1
drivers/infiniband/hw/usnic/usnic_fwd.c
··· 103 103 kfree(ufdev); 104 104 } 105 105 106 - void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]) 106 + void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, const char mac[ETH_ALEN]) 107 107 { 108 108 spin_lock(&ufdev->lock); 109 109 memcpy(&ufdev->mac, mac, sizeof(ufdev->mac));
+1 -1
drivers/infiniband/hw/usnic/usnic_fwd.h
··· 74 74 struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev); 75 75 void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev); 76 76 77 - void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]); 77 + void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, const char mac[ETH_ALEN]); 78 78 void usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr); 79 79 void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev); 80 80 void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev);
+19 -1
drivers/infiniband/sw/rxe/rxe_av.c
··· 101 101 102 102 struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) 103 103 { 104 + struct rxe_ah *ah; 105 + u32 ah_num; 106 + 104 107 if (!pkt || !pkt->qp) 105 108 return NULL; 106 109 107 110 if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) 108 111 return &pkt->qp->pri_av; 109 112 110 - return (pkt->wqe) ? &pkt->wqe->av : NULL; 113 + if (!pkt->wqe) 114 + return NULL; 115 + 116 + ah_num = pkt->wqe->wr.wr.ud.ah_num; 117 + if (ah_num) { 118 + /* only new user provider or kernel client */ 119 + ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); 120 + if (!ah || ah->ah_num != ah_num || rxe_ah_pd(ah) != pkt->qp->pd) { 121 + pr_warn("Unable to find AH matching ah_num\n"); 122 + return NULL; 123 + } 124 + return &ah->av; 125 + } 126 + 127 + /* only old user provider for UD sends*/ 128 + return &pkt->wqe->wr.wr.ud.av; 111 129 }
+27 -28
drivers/infiniband/sw/rxe/rxe_comp.c
··· 142 142 /* we come here whether or not we found a response packet to see if 143 143 * there are any posted WQEs 144 144 */ 145 - if (qp->is_user) 146 - wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_USER); 147 - else 148 - wqe = queue_head(qp->sq.queue, QUEUE_TYPE_KERNEL); 145 + wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); 149 146 *wqe_p = wqe; 150 147 151 148 /* no WQE or requester has not started it yet */ ··· 380 383 static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, 381 384 struct rxe_cqe *cqe) 382 385 { 386 + struct ib_wc *wc = &cqe->ibwc; 387 + struct ib_uverbs_wc *uwc = &cqe->uibwc; 388 + 383 389 memset(cqe, 0, sizeof(*cqe)); 384 390 385 391 if (!qp->is_user) { 386 - struct ib_wc *wc = &cqe->ibwc; 387 - 388 - wc->wr_id = wqe->wr.wr_id; 389 - wc->status = wqe->status; 390 - wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); 391 - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || 392 - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) 393 - wc->wc_flags = IB_WC_WITH_IMM; 394 - wc->byte_len = wqe->dma.length; 395 - wc->qp = &qp->ibqp; 392 + wc->wr_id = wqe->wr.wr_id; 393 + wc->status = wqe->status; 394 + wc->qp = &qp->ibqp; 396 395 } else { 397 - struct ib_uverbs_wc *uwc = &cqe->uibwc; 396 + uwc->wr_id = wqe->wr.wr_id; 397 + uwc->status = wqe->status; 398 + uwc->qp_num = qp->ibqp.qp_num; 399 + } 398 400 399 - uwc->wr_id = wqe->wr.wr_id; 400 - uwc->status = wqe->status; 401 - uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); 402 - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || 403 - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) 404 - uwc->wc_flags = IB_WC_WITH_IMM; 405 - uwc->byte_len = wqe->dma.length; 406 - uwc->qp_num = qp->ibqp.qp_num; 401 + if (wqe->status == IB_WC_SUCCESS) { 402 + if (!qp->is_user) { 403 + wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); 404 + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || 405 + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) 406 + wc->wc_flags = IB_WC_WITH_IMM; 407 + wc->byte_len = wqe->dma.length; 408 + } else { 409 + uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); 410 + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || 411 + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) 412 + uwc->wc_flags = IB_WC_WITH_IMM; 413 + uwc->byte_len = wqe->dma.length; 414 + } 407 415 } 408 416 } 409 417 ··· 434 432 if (post) 435 433 make_send_cqe(qp, wqe, &cqe); 436 434 437 - if (qp->is_user) 438 - advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_USER); 439 - else 440 - advance_consumer(qp->sq.queue, QUEUE_TYPE_KERNEL); 435 + queue_advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); 441 436 442 437 if (post) 443 438 rxe_cq_post(qp->scq, &cqe, 0); ··· 538 539 wqe->status = IB_WC_WR_FLUSH_ERR; 539 540 do_complete(qp, wqe); 540 541 } else { 541 - advance_consumer(q, q->type); 542 + queue_advance_consumer(q, q->type); 542 543 } 543 544 } 544 545 }
+6 -22
drivers/infiniband/sw/rxe/rxe_cq.c
··· 25 25 } 26 26 27 27 if (cq) { 28 - if (cq->is_user) 29 - count = queue_count(cq->queue, QUEUE_TYPE_TO_USER); 30 - else 31 - count = queue_count(cq->queue, QUEUE_TYPE_KERNEL); 32 - 28 + count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT); 33 29 if (cqe < count) { 34 30 pr_warn("cqe(%d) < current # elements in queue (%d)", 35 31 cqe, count); ··· 61 65 int err; 62 66 enum queue_type type; 63 67 64 - type = uresp ? QUEUE_TYPE_TO_USER : QUEUE_TYPE_KERNEL; 68 + type = QUEUE_TYPE_TO_CLIENT; 65 69 cq->queue = rxe_queue_init(rxe, &cqe, 66 70 sizeof(struct rxe_cqe), type); 67 71 if (!cq->queue) { ··· 77 81 return err; 78 82 } 79 83 80 - if (uresp) 81 - cq->is_user = 1; 84 + cq->is_user = uresp; 82 85 83 86 cq->is_dying = false; 84 87 ··· 112 117 113 118 spin_lock_irqsave(&cq->cq_lock, flags); 114 119 115 - if (cq->is_user) 116 - full = queue_full(cq->queue, QUEUE_TYPE_TO_USER); 117 - else 118 - full = queue_full(cq->queue, QUEUE_TYPE_KERNEL); 119 - 120 + full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT); 120 121 if (unlikely(full)) { 121 122 spin_unlock_irqrestore(&cq->cq_lock, flags); 122 123 if (cq->ibcq.event_handler) { ··· 125 134 return -EBUSY; 126 135 } 127 136 128 - if (cq->is_user) 129 - addr = producer_addr(cq->queue, QUEUE_TYPE_TO_USER); 130 - else 131 - addr = producer_addr(cq->queue, QUEUE_TYPE_KERNEL); 132 - 137 + addr = queue_producer_addr(cq->queue, QUEUE_TYPE_TO_CLIENT); 133 138 memcpy(addr, cqe, sizeof(*cqe)); 134 139 135 - if (cq->is_user) 136 - advance_producer(cq->queue, QUEUE_TYPE_TO_USER); 137 - else 138 - advance_producer(cq->queue, QUEUE_TYPE_KERNEL); 140 + queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT); 139 141 140 142 spin_unlock_irqrestore(&cq->cq_lock, flags); 141 143
+21 -21
drivers/infiniband/sw/rxe/rxe_hw_counters.c
··· 6 6 #include "rxe.h" 7 7 #include "rxe_hw_counters.h" 8 8 9 - static const char * const rxe_counter_name[] = { 10 - [RXE_CNT_SENT_PKTS] = "sent_pkts", 11 - [RXE_CNT_RCVD_PKTS] = "rcvd_pkts", 12 - [RXE_CNT_DUP_REQ] = "duplicate_request", 13 - [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_seq_request", 14 - [RXE_CNT_RCV_RNR] = "rcvd_rnr_err", 15 - [RXE_CNT_SND_RNR] = "send_rnr_err", 16 - [RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err", 17 - [RXE_CNT_COMPLETER_SCHED] = "ack_deferred", 18 - [RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err", 19 - [RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err", 20 - [RXE_CNT_COMP_RETRY] = "completer_retry_err", 21 - [RXE_CNT_SEND_ERR] = "send_err", 22 - [RXE_CNT_LINK_DOWNED] = "link_downed", 23 - [RXE_CNT_RDMA_SEND] = "rdma_sends", 24 - [RXE_CNT_RDMA_RECV] = "rdma_recvs", 9 + static const struct rdma_stat_desc rxe_counter_descs[] = { 10 + [RXE_CNT_SENT_PKTS].name = "sent_pkts", 11 + [RXE_CNT_RCVD_PKTS].name = "rcvd_pkts", 12 + [RXE_CNT_DUP_REQ].name = "duplicate_request", 13 + [RXE_CNT_OUT_OF_SEQ_REQ].name = "out_of_seq_request", 14 + [RXE_CNT_RCV_RNR].name = "rcvd_rnr_err", 15 + [RXE_CNT_SND_RNR].name = "send_rnr_err", 16 + [RXE_CNT_RCV_SEQ_ERR].name = "rcvd_seq_err", 17 + [RXE_CNT_COMPLETER_SCHED].name = "ack_deferred", 18 + [RXE_CNT_RETRY_EXCEEDED].name = "retry_exceeded_err", 19 + [RXE_CNT_RNR_RETRY_EXCEEDED].name = "retry_rnr_exceeded_err", 20 + [RXE_CNT_COMP_RETRY].name = "completer_retry_err", 21 + [RXE_CNT_SEND_ERR].name = "send_err", 22 + [RXE_CNT_LINK_DOWNED].name = "link_downed", 23 + [RXE_CNT_RDMA_SEND].name = "rdma_sends", 24 + [RXE_CNT_RDMA_RECV].name = "rdma_recvs", 25 25 }; 26 26 27 27 int rxe_ib_get_hw_stats(struct ib_device *ibdev, ··· 34 34 if (!port || !stats) 35 35 return -EINVAL; 36 36 37 - for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++) 37 + for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_descs); cnt++) 38 38 stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]); 39 39 40 - return ARRAY_SIZE(rxe_counter_name); 40 + return ARRAY_SIZE(rxe_counter_descs); 41 41 } 42 42 43 43 struct rdma_hw_stats *rxe_ib_alloc_hw_port_stats(struct ib_device *ibdev, 44 44 u32 port_num) 45 45 { 46 - BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_name) != RXE_NUM_OF_COUNTERS); 46 + BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_descs) != RXE_NUM_OF_COUNTERS); 47 47 48 - return rdma_alloc_hw_stats_struct(rxe_counter_name, 49 - ARRAY_SIZE(rxe_counter_name), 48 + return rdma_alloc_hw_stats_struct(rxe_counter_descs, 49 + ARRAY_SIZE(rxe_counter_descs), 50 50 RDMA_HW_STATS_DEFAULT_LIFESPAN); 51 51 }
+2
drivers/infiniband/sw/rxe/rxe_loc.h
··· 86 86 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length); 87 87 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); 88 88 int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey); 89 + int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe); 90 + int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr); 89 91 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); 90 92 void rxe_mr_cleanup(struct rxe_pool_entry *arg); 91 93
+200 -75
drivers/infiniband/sw/rxe/rxe_mr.c
··· 24 24 25 25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) 26 26 { 27 + struct rxe_map_set *set = mr->cur_map_set; 28 + 27 29 switch (mr->type) { 28 - case RXE_MR_TYPE_DMA: 30 + case IB_MR_TYPE_DMA: 29 31 return 0; 30 32 31 - case RXE_MR_TYPE_MR: 32 - if (iova < mr->iova || length > mr->length || 33 - iova > mr->iova + mr->length - length) 33 + case IB_MR_TYPE_USER: 34 + case IB_MR_TYPE_MEM_REG: 35 + if (iova < set->iova || length > set->length || 36 + iova > set->iova + set->length - length) 34 37 return -EFAULT; 35 38 return 0; 36 39 37 40 default: 41 + pr_warn("%s: mr type (%d) not supported\n", 42 + __func__, mr->type); 38 43 return -EFAULT; 39 44 } 40 45 } ··· 53 48 u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1); 54 49 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 55 50 56 - mr->ibmr.lkey = lkey; 57 - mr->ibmr.rkey = rkey; 51 + /* set ibmr->l/rkey and also copy into private l/rkey 52 + * for user MRs these will always be the same 53 + * for cases where caller 'owns' the key portion 54 + * they may be different until REG_MR WQE is executed. 55 + */ 56 + mr->lkey = mr->ibmr.lkey = lkey; 57 + mr->rkey = mr->ibmr.rkey = rkey; 58 + 58 59 mr->state = RXE_MR_STATE_INVALID; 59 - mr->type = RXE_MR_TYPE_NONE; 60 60 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 61 61 } 62 62 63 - static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) 63 + static void rxe_mr_free_map_set(int num_map, struct rxe_map_set *set) 64 64 { 65 65 int i; 66 - int num_map; 67 - struct rxe_map **map = mr->map; 68 66 69 - num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 67 + for (i = 0; i < num_map; i++) 68 + kfree(set->map[i]); 70 69 71 - mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 72 - if (!mr->map) 73 - goto err1; 70 + kfree(set->map); 71 + kfree(set); 72 + } 73 + 74 + static int rxe_mr_alloc_map_set(int num_map, struct rxe_map_set **setp) 75 + { 76 + int i; 77 + struct rxe_map_set *set; 78 + 79 + set = kmalloc(sizeof(*set), GFP_KERNEL); 80 + if (!set) 81 + goto err_out; 82 + 83 + set->map = kmalloc_array(num_map, sizeof(struct rxe_map *), GFP_KERNEL); 84 + if (!set->map) 85 + goto err_free_set; 74 86 75 87 for (i = 0; i < num_map; i++) { 76 - mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 77 - if (!mr->map[i]) 78 - goto err2; 88 + set->map[i] = kmalloc(sizeof(struct rxe_map), GFP_KERNEL); 89 + if (!set->map[i]) 90 + goto err_free_map; 79 91 } 80 92 81 - BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 82 - 83 - mr->map_shift = ilog2(RXE_BUF_PER_MAP); 84 - mr->map_mask = RXE_BUF_PER_MAP - 1; 85 - 86 - mr->num_buf = num_buf; 87 - mr->num_map = num_map; 88 - mr->max_buf = num_map * RXE_BUF_PER_MAP; 93 + *setp = set; 89 94 90 95 return 0; 91 96 92 - err2: 97 + err_free_map: 93 98 for (i--; i >= 0; i--) 94 - kfree(mr->map[i]); 99 + kfree(set->map[i]); 95 100 96 - kfree(mr->map); 97 - err1: 101 + kfree(set->map); 102 + err_free_set: 103 + kfree(set); 104 + err_out: 105 + return -ENOMEM; 106 + } 107 + 108 + /** 109 + * rxe_mr_alloc() - Allocate memory map array(s) for MR 110 + * @mr: Memory region 111 + * @num_buf: Number of buffer descriptors to support 112 + * @both: If non zero allocate both mr->map and mr->next_map 113 + * else just allocate mr->map. Used for fast MRs 114 + * 115 + * Return: 0 on success else an error 116 + */ 117 + static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both) 118 + { 119 + int ret; 120 + int num_map; 121 + 122 + BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 123 + num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 124 + 125 + mr->map_shift = ilog2(RXE_BUF_PER_MAP); 126 + mr->map_mask = RXE_BUF_PER_MAP - 1; 127 + mr->num_buf = num_buf; 128 + mr->max_buf = num_map * RXE_BUF_PER_MAP; 129 + mr->num_map = num_map; 130 + 131 + ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set); 132 + if (ret) 133 + goto err_out; 134 + 135 + if (both) { 136 + ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set); 137 + if (ret) { 138 + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); 139 + goto err_out; 140 + } 141 + } 142 + 143 + return 0; 144 + 145 + err_out: 98 146 return -ENOMEM; 99 147 } 100 148 ··· 158 100 mr->ibmr.pd = &pd->ibpd; 159 101 mr->access = access; 160 102 mr->state = RXE_MR_STATE_VALID; 161 - mr->type = RXE_MR_TYPE_DMA; 103 + mr->type = IB_MR_TYPE_DMA; 162 104 } 163 105 164 106 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, 165 107 int access, struct rxe_mr *mr) 166 108 { 109 + struct rxe_map_set *set; 167 110 struct rxe_map **map; 168 111 struct rxe_phys_buf *buf = NULL; 169 112 struct ib_umem *umem; ··· 172 113 int num_buf; 173 114 void *vaddr; 174 115 int err; 175 - int i; 176 116 177 117 umem = ib_umem_get(pd->ibpd.device, start, length, access); 178 118 if (IS_ERR(umem)) { ··· 185 127 186 128 rxe_mr_init(access, mr); 187 129 188 - err = rxe_mr_alloc(mr, num_buf); 130 + err = rxe_mr_alloc(mr, num_buf, 0); 189 131 if (err) { 190 132 pr_warn("%s: Unable to allocate memory for map\n", 191 133 __func__); 192 134 goto err_release_umem; 193 135 } 194 136 195 - mr->page_shift = PAGE_SHIFT; 196 - mr->page_mask = PAGE_SIZE - 1; 137 + set = mr->cur_map_set; 138 + set->page_shift = PAGE_SHIFT; 139 + set->page_mask = PAGE_SIZE - 1; 197 140 198 - num_buf = 0; 199 - map = mr->map; 141 + num_buf = 0; 142 + map = set->map; 143 + 200 144 if (length > 0) { 201 145 buf = map[0]->buf; 202 146 ··· 221 161 buf->size = PAGE_SIZE; 222 162 num_buf++; 223 163 buf++; 224 - 225 164 } 226 165 } 227 166 228 167 mr->ibmr.pd = &pd->ibpd; 229 168 mr->umem = umem; 230 169 mr->access = access; 231 - mr->length = length; 232 - mr->iova = iova; 233 - mr->va = start; 234 - mr->offset = ib_umem_offset(umem); 235 170 mr->state = RXE_MR_STATE_VALID; 236 - mr->type = RXE_MR_TYPE_MR; 171 + mr->type = IB_MR_TYPE_USER; 172 + 173 + set->length = length; 174 + set->iova = iova; 175 + set->va = start; 176 + set->offset = ib_umem_offset(umem); 237 177 238 178 return 0; 239 179 240 180 err_cleanup_map: 241 - for (i = 0; i < mr->num_map; i++) 242 - kfree(mr->map[i]); 243 - kfree(mr->map); 181 + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); 244 182 err_release_umem: 245 183 ib_umem_release(umem); 246 184 err_out: ··· 249 191 { 250 192 int err; 251 193 252 - rxe_mr_init(0, mr); 194 + /* always allow remote access for FMRs */ 195 + rxe_mr_init(IB_ACCESS_REMOTE, mr); 253 196 254 - /* In fastreg, we also set the rkey */ 255 - mr->ibmr.rkey = mr->ibmr.lkey; 256 - 257 - err = rxe_mr_alloc(mr, max_pages); 197 + err = rxe_mr_alloc(mr, max_pages, 1); 258 198 if (err) 259 199 goto err1; 260 200 261 201 mr->ibmr.pd = &pd->ibpd; 262 202 mr->max_buf = max_pages; 263 203 mr->state = RXE_MR_STATE_FREE; 264 - mr->type = RXE_MR_TYPE_MR; 204 + mr->type = IB_MR_TYPE_MEM_REG; 265 205 266 206 return 0; 267 207 ··· 270 214 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, 271 215 size_t *offset_out) 272 216 { 273 - size_t offset = iova - mr->iova + mr->offset; 217 + struct rxe_map_set *set = mr->cur_map_set; 218 + size_t offset = iova - set->iova + set->offset; 274 219 int map_index; 275 220 int buf_index; 276 221 u64 length; 222 + struct rxe_map *map; 277 223 278 - if (likely(mr->page_shift)) { 279 - *offset_out = offset & mr->page_mask; 280 - offset >>= mr->page_shift; 224 + if (likely(set->page_shift)) { 225 + *offset_out = offset & set->page_mask; 226 + offset >>= set->page_shift; 281 227 *n_out = offset & mr->map_mask; 282 228 *m_out = offset >> mr->map_shift; 283 229 } else { 284 230 map_index = 0; 285 231 buf_index = 0; 286 232 287 - length = mr->map[map_index]->buf[buf_index].size; 233 + map = set->map[map_index]; 234 + length = map->buf[buf_index].size; 288 235 289 236 while (offset >= length) { 290 237 offset -= length; ··· 297 238 map_index++; 298 239 buf_index = 0; 299 240 } 300 - length = mr->map[map_index]->buf[buf_index].size; 241 + map = set->map[map_index]; 242 + length = map->buf[buf_index].size; 301 243 } 302 244 303 245 *m_out = map_index; ··· 319 259 goto out; 320 260 } 321 261 322 - if (!mr->map) { 262 + if (!mr->cur_map_set) { 323 263 addr = (void *)(uintptr_t)iova; 324 264 goto out; 325 265 } ··· 332 272 333 273 lookup_iova(mr, iova, &m, &n, &offset); 334 274 335 - if (offset + length > mr->map[m]->buf[n].size) { 275 + if (offset + length > mr->cur_map_set->map[m]->buf[n].size) { 336 276 pr_warn("crosses page boundary\n"); 337 277 addr = NULL; 338 278 goto out; 339 279 } 340 280 341 - addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; 281 + addr = (void *)(uintptr_t)mr->cur_map_set->map[m]->buf[n].addr + offset; 342 282 343 283 out: 344 284 return addr; ··· 362 302 if (length == 0) 363 303 return 0; 364 304 365 - if (mr->type == RXE_MR_TYPE_DMA) { 305 + if (mr->type == IB_MR_TYPE_DMA) { 366 306 u8 *src, *dest; 367 307 368 308 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); ··· 374 314 return 0; 375 315 } 376 316 377 - WARN_ON_ONCE(!mr->map); 317 + WARN_ON_ONCE(!mr->cur_map_set); 378 318 379 319 err = mr_check_range(mr, iova, length); 380 320 if (err) { ··· 384 324 385 325 lookup_iova(mr, iova, &m, &i, &offset); 386 326 387 - map = mr->map + m; 327 + map = mr->cur_map_set->map + m; 388 328 buf = map[0]->buf + i; 389 329 390 330 while (length > 0) { ··· 567 507 if (!mr) 568 508 return NULL; 569 509 570 - if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) || 571 - (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) || 510 + if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || 511 + (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || 572 512 mr_pd(mr) != pd || (access && !(access & mr->access)) || 573 513 mr->state != RXE_MR_STATE_VALID)) { 574 514 rxe_drop_ref(mr); ··· 591 531 goto err; 592 532 } 593 533 594 - if (rkey != mr->ibmr.rkey) { 595 - pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n", 596 - __func__, rkey, mr->ibmr.rkey); 534 + if (rkey != mr->rkey) { 535 + pr_err("%s: rkey (%#x) doesn't match mr->rkey (%#x)\n", 536 + __func__, rkey, mr->rkey); 597 537 ret = -EINVAL; 598 538 goto err_drop_ref; 599 539 } ··· 601 541 if (atomic_read(&mr->num_mw) > 0) { 602 542 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n", 603 543 __func__); 544 + ret = -EINVAL; 545 + goto err_drop_ref; 546 + } 547 + 548 + if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) { 549 + pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type); 604 550 ret = -EINVAL; 605 551 goto err_drop_ref; 606 552 } ··· 620 554 return ret; 621 555 } 622 556 557 + /* user can (re)register fast MR by executing a REG_MR WQE. 558 + * user is expected to hold a reference on the ib mr until the 559 + * WQE completes. 560 + * Once a fast MR is created this is the only way to change the 561 + * private keys. It is the responsibility of the user to maintain 562 + * the ib mr keys in sync with rxe mr keys. 563 + */ 564 + int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) 565 + { 566 + struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr); 567 + u32 key = wqe->wr.wr.reg.key & 0xff; 568 + u32 access = wqe->wr.wr.reg.access; 569 + struct rxe_map_set *set; 570 + 571 + /* user can only register MR in free state */ 572 + if (unlikely(mr->state != RXE_MR_STATE_FREE)) { 573 + pr_warn("%s: mr->lkey = 0x%x not free\n", 574 + __func__, mr->lkey); 575 + return -EINVAL; 576 + } 577 + 578 + /* user can only register mr with qp in same protection domain */ 579 + if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { 580 + pr_warn("%s: qp->pd and mr->pd don't match\n", 581 + __func__); 582 + return -EINVAL; 583 + } 584 + 585 + mr->access = access; 586 + mr->lkey = (mr->lkey & ~0xff) | key; 587 + mr->rkey = (access & IB_ACCESS_REMOTE) ? mr->lkey : 0; 588 + mr->state = RXE_MR_STATE_VALID; 589 + 590 + set = mr->cur_map_set; 591 + mr->cur_map_set = mr->next_map_set; 592 + mr->cur_map_set->iova = wqe->wr.wr.reg.mr->iova; 593 + mr->next_map_set = set; 594 + 595 + return 0; 596 + } 597 + 598 + int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr) 599 + { 600 + struct rxe_mr *mr = to_rmr(ibmr); 601 + struct rxe_map_set *set = mr->next_map_set; 602 + struct rxe_map *map; 603 + struct rxe_phys_buf *buf; 604 + 605 + if (unlikely(set->nbuf == mr->num_buf)) 606 + return -ENOMEM; 607 + 608 + map = set->map[set->nbuf / RXE_BUF_PER_MAP]; 609 + buf = &map->buf[set->nbuf % RXE_BUF_PER_MAP]; 610 + 611 + buf->addr = addr; 612 + buf->size = ibmr->page_size; 613 + set->nbuf++; 614 + 615 + return 0; 616 + } 617 + 623 618 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 624 619 { 625 620 struct rxe_mr *mr = to_rmr(ibmr); ··· 691 564 return -EINVAL; 692 565 } 693 566 694 - mr->state = RXE_MR_STATE_ZOMBIE; 567 + mr->state = RXE_MR_STATE_INVALID; 695 568 rxe_drop_ref(mr_pd(mr)); 696 569 rxe_drop_index(mr); 697 570 rxe_drop_ref(mr); ··· 702 575 void rxe_mr_cleanup(struct rxe_pool_entry *arg) 703 576 { 704 577 struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem); 705 - int i; 706 578 707 579 ib_umem_release(mr->umem); 708 580 709 - if (mr->map) { 710 - for (i = 0; i < mr->num_map; i++) 711 - kfree(mr->map[i]); 581 + if (mr->cur_map_set) 582 + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); 712 583 713 - kfree(mr->map); 714 - } 584 + if (mr->next_map_set) 585 + rxe_mr_free_map_set(mr->num_map, mr->next_map_set); 715 586 }
+17 -19
drivers/infiniband/sw/rxe/rxe_mw.c
··· 21 21 } 22 22 23 23 rxe_add_index(mw); 24 - ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1); 24 + mw->rkey = ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1); 25 25 mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ? 26 26 RXE_MW_STATE_FREE : RXE_MW_STATE_VALID; 27 27 spin_lock_init(&mw->lock); ··· 71 71 static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, 72 72 struct rxe_mw *mw, struct rxe_mr *mr) 73 73 { 74 + u32 key = wqe->wr.wr.mw.rkey & 0xff; 75 + 74 76 if (mw->ibmw.type == IB_MW_TYPE_1) { 75 77 if (unlikely(mw->state != RXE_MW_STATE_VALID)) { 76 78 pr_err_once( ··· 110 108 } 111 109 } 112 110 113 - if (unlikely((wqe->wr.wr.mw.rkey & 0xff) == (mw->ibmw.rkey & 0xff))) { 111 + if (unlikely(key == (mw->rkey & 0xff))) { 114 112 pr_err_once("attempt to bind MW with same key\n"); 115 113 return -EINVAL; 116 114 } ··· 142 140 143 141 /* C10-75 */ 144 142 if (mw->access & IB_ZERO_BASED) { 145 - if (unlikely(wqe->wr.wr.mw.length > mr->length)) { 143 + if (unlikely(wqe->wr.wr.mw.length > mr->cur_map_set->length)) { 146 144 pr_err_once( 147 145 "attempt to bind a ZB MW outside of the MR\n"); 148 146 return -EINVAL; 149 147 } 150 148 } else { 151 - if (unlikely((wqe->wr.wr.mw.addr < mr->iova) || 149 + if (unlikely((wqe->wr.wr.mw.addr < mr->cur_map_set->iova) || 152 150 ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) > 153 - (mr->iova + mr->length)))) { 151 + (mr->cur_map_set->iova + mr->cur_map_set->length)))) { 154 152 pr_err_once( 155 153 "attempt to bind a VA MW outside of the MR\n"); 156 154 return -EINVAL; ··· 163 161 static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, 164 162 struct rxe_mw *mw, struct rxe_mr *mr) 165 163 { 166 - u32 rkey; 167 - u32 new_rkey; 164 + u32 key = wqe->wr.wr.mw.rkey & 0xff; 168 165 169 - rkey = mw->ibmw.rkey; 170 - new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.mw.rkey & 0x000000ff); 171 - 172 - mw->ibmw.rkey = new_rkey; 166 + mw->rkey = (mw->rkey & ~0xff) | key; 173 167 mw->access = wqe->wr.wr.mw.access; 174 168 mw->state = RXE_MW_STATE_VALID; 175 169 mw->addr = wqe->wr.wr.mw.addr; ··· 195 197 struct rxe_mw *mw; 196 198 struct rxe_mr *mr; 197 199 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 200 + u32 mw_rkey = wqe->wr.wr.mw.mw_rkey; 201 + u32 mr_lkey = wqe->wr.wr.mw.mr_lkey; 198 202 unsigned long flags; 199 203 200 - mw = rxe_pool_get_index(&rxe->mw_pool, 201 - wqe->wr.wr.mw.mw_rkey >> 8); 204 + mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8); 202 205 if (unlikely(!mw)) { 203 206 ret = -EINVAL; 204 207 goto err; 205 208 } 206 209 207 - if (unlikely(mw->ibmw.rkey != wqe->wr.wr.mw.mw_rkey)) { 210 + if (unlikely(mw->rkey != mw_rkey)) { 208 211 ret = -EINVAL; 209 212 goto err_drop_mw; 210 213 } 211 214 212 215 if (likely(wqe->wr.wr.mw.length)) { 213 - mr = rxe_pool_get_index(&rxe->mr_pool, 214 - wqe->wr.wr.mw.mr_lkey >> 8); 216 + mr = rxe_pool_get_index(&rxe->mr_pool, mr_lkey >> 8); 215 217 if (unlikely(!mr)) { 216 218 ret = -EINVAL; 217 219 goto err_drop_mw; 218 220 } 219 221 220 - if (unlikely(mr->ibmr.lkey != wqe->wr.wr.mw.mr_lkey)) { 222 + if (unlikely(mr->lkey != mr_lkey)) { 221 223 ret = -EINVAL; 222 224 goto err_drop_mr; 223 225 } ··· 290 292 goto err; 291 293 } 292 294 293 - if (rkey != mw->ibmw.rkey) { 295 + if (rkey != mw->rkey) { 294 296 ret = -EINVAL; 295 297 goto err_drop_ref; 296 298 } ··· 321 323 if (!mw) 322 324 return NULL; 323 325 324 - if (unlikely((rxe_mw_rkey(mw) != rkey) || rxe_mw_pd(mw) != pd || 326 + if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd || 325 327 (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) || 326 328 (mw->length == 0) || 327 329 (access && !(access & mw->access)) ||
+3 -3
drivers/infiniband/sw/rxe/rxe_opcode.h
··· 22 22 WR_LOCAL_OP_MASK = BIT(5), 23 23 24 24 WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, 25 - WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK, 26 25 WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK, 27 26 WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK, 28 27 }; ··· 81 82 82 83 RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), 83 84 84 - RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK), 85 - RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK), 85 + RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK), 86 + RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK), 87 + RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK), 86 88 }; 87 89 88 90 #define OPCODE_NONE (-1)
+19 -15
drivers/infiniband/sw/rxe/rxe_param.h
··· 9 9 10 10 #include <uapi/rdma/rdma_user_rxe.h> 11 11 12 + #define DEFAULT_MAX_VALUE (1 << 20) 13 + 12 14 static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu) 13 15 { 14 16 if (mtu < 256) ··· 39 37 enum rxe_device_param { 40 38 RXE_MAX_MR_SIZE = -1ull, 41 39 RXE_PAGE_SIZE_CAP = 0xfffff000, 42 - RXE_MAX_QP_WR = 0x4000, 40 + RXE_MAX_QP_WR = DEFAULT_MAX_VALUE, 43 41 RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR 44 42 | IB_DEVICE_BAD_QKEY_CNTR 45 43 | IB_DEVICE_AUTO_PATH_MIG ··· 60 58 RXE_MAX_INLINE_DATA = RXE_MAX_WQE_SIZE - 61 59 sizeof(struct rxe_send_wqe), 62 60 RXE_MAX_SGE_RD = 32, 63 - RXE_MAX_CQ = 16384, 61 + RXE_MAX_CQ = DEFAULT_MAX_VALUE, 64 62 RXE_MAX_LOG_CQE = 15, 65 - RXE_MAX_PD = 0x7ffc, 63 + RXE_MAX_PD = DEFAULT_MAX_VALUE, 66 64 RXE_MAX_QP_RD_ATOM = 128, 67 65 RXE_MAX_RES_RD_ATOM = 0x3f000, 68 66 RXE_MAX_QP_INIT_RD_ATOM = 128, 69 67 RXE_MAX_MCAST_GRP = 8192, 70 68 RXE_MAX_MCAST_QP_ATTACH = 56, 71 69 RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, 72 - RXE_MAX_AH = 100, 73 - RXE_MAX_SRQ_WR = 0x4000, 70 + RXE_MAX_AH = (1<<15) - 1, /* 32Ki - 1 */ 71 + RXE_MIN_AH_INDEX = 1, 72 + RXE_MAX_AH_INDEX = RXE_MAX_AH, 73 + RXE_MAX_SRQ_WR = DEFAULT_MAX_VALUE, 74 74 RXE_MIN_SRQ_WR = 1, 75 75 RXE_MAX_SRQ_SGE = 27, 76 76 RXE_MIN_SRQ_SGE = 1, 77 77 RXE_MAX_FMR_PAGE_LIST_LEN = 512, 78 - RXE_MAX_PKEYS = 1, 78 + RXE_MAX_PKEYS = 64, 79 79 RXE_LOCAL_CA_ACK_DELAY = 15, 80 80 81 - RXE_MAX_UCONTEXT = 512, 81 + RXE_MAX_UCONTEXT = DEFAULT_MAX_VALUE, 82 82 83 83 RXE_NUM_PORT = 1, 84 84 85 - RXE_MAX_QP = 0x10000, 86 85 RXE_MIN_QP_INDEX = 16, 87 - RXE_MAX_QP_INDEX = 0x00020000, 86 + RXE_MAX_QP_INDEX = DEFAULT_MAX_VALUE, 87 + RXE_MAX_QP = DEFAULT_MAX_VALUE - RXE_MIN_QP_INDEX, 88 88 89 - RXE_MAX_SRQ = 0x00001000, 90 89 RXE_MIN_SRQ_INDEX = 0x00020001, 91 - RXE_MAX_SRQ_INDEX = 0x00040000, 90 + RXE_MAX_SRQ_INDEX = DEFAULT_MAX_VALUE, 91 + RXE_MAX_SRQ = DEFAULT_MAX_VALUE - RXE_MIN_SRQ_INDEX, 92 92 93 - RXE_MAX_MR = 0x00001000, 94 - RXE_MAX_MW = 0x00001000, 95 93 RXE_MIN_MR_INDEX = 0x00000001, 96 - RXE_MAX_MR_INDEX = 0x00010000, 94 + RXE_MAX_MR_INDEX = DEFAULT_MAX_VALUE, 95 + RXE_MAX_MR = DEFAULT_MAX_VALUE - RXE_MIN_MR_INDEX, 97 96 RXE_MIN_MW_INDEX = 0x00010001, 98 97 RXE_MAX_MW_INDEX = 0x00020000, 98 + RXE_MAX_MW = 0x00001000, 99 99 100 100 RXE_MAX_PKT_PER_ACK = 64, 101 101 ··· 117 113 /* default/initial rxe port parameters */ 118 114 enum rxe_port_param { 119 115 RXE_PORT_GID_TBL_LEN = 1024, 120 - RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, 116 + RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP, 121 117 RXE_PORT_MAX_MSG_SZ = 0x800000, 122 118 RXE_PORT_BAD_PKEY_CNTR = 0, 123 119 RXE_PORT_QKEY_VIOL_CNTR = 0,
+23 -18
drivers/infiniband/sw/rxe/rxe_pool.c
··· 7 7 #include "rxe.h" 8 8 #include "rxe_loc.h" 9 9 10 - /* info about object pools 11 - */ 12 - struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { 10 + static const struct rxe_type_info { 11 + const char *name; 12 + size_t size; 13 + size_t elem_offset; 14 + void (*cleanup)(struct rxe_pool_entry *obj); 15 + enum rxe_pool_flags flags; 16 + u32 min_index; 17 + u32 max_index; 18 + size_t key_offset; 19 + size_t key_size; 20 + } rxe_type_info[RXE_NUM_TYPES] = { 13 21 [RXE_TYPE_UC] = { 14 22 .name = "rxe-uc", 15 23 .size = sizeof(struct rxe_ucontext), ··· 34 26 .name = "rxe-ah", 35 27 .size = sizeof(struct rxe_ah), 36 28 .elem_offset = offsetof(struct rxe_ah, pelem), 37 - .flags = RXE_POOL_NO_ALLOC, 29 + .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, 30 + .min_index = RXE_MIN_AH_INDEX, 31 + .max_index = RXE_MAX_AH_INDEX, 38 32 }, 39 33 [RXE_TYPE_SRQ] = { 40 34 .name = "rxe-srq", ··· 68 58 .elem_offset = offsetof(struct rxe_mr, pelem), 69 59 .cleanup = rxe_mr_cleanup, 70 60 .flags = RXE_POOL_INDEX, 71 - .max_index = RXE_MAX_MR_INDEX, 72 61 .min_index = RXE_MIN_MR_INDEX, 62 + .max_index = RXE_MAX_MR_INDEX, 73 63 }, 74 64 [RXE_TYPE_MW] = { 75 65 .name = "rxe-mw", ··· 77 67 .elem_offset = offsetof(struct rxe_mw, pelem), 78 68 .cleanup = rxe_mw_cleanup, 79 69 .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, 80 - .max_index = RXE_MAX_MW_INDEX, 81 70 .min_index = RXE_MIN_MW_INDEX, 71 + .max_index = RXE_MAX_MW_INDEX, 82 72 }, 83 73 [RXE_TYPE_MC_GRP] = { 84 74 .name = "rxe-mc_grp", ··· 104 94 static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) 105 95 { 106 96 int err = 0; 107 - size_t size; 108 97 109 98 if ((max - min + 1) < pool->max_elem) { 110 99 pr_warn("not enough indices for max_elem\n"); ··· 114 105 pool->index.max_index = max; 115 106 pool->index.min_index = min; 116 107 117 - size = BITS_TO_LONGS(max - min + 1) * sizeof(long); 118 - pool->index.table = kmalloc(size, GFP_KERNEL); 108 + pool->index.table = bitmap_zalloc(max - min + 1, GFP_KERNEL); 119 109 if (!pool->index.table) { 120 110 err = -ENOMEM; 121 111 goto out; 122 112 } 123 - 124 - pool->index.table_size = size; 125 - bitmap_zero(pool->index.table, max - min + 1); 126 113 127 114 out: 128 115 return err; ··· 171 166 pr_warn("%s pool destroyed with unfree'd elem\n", 172 167 pool_name(pool)); 173 168 174 - kfree(pool->index.table); 169 + bitmap_free(pool->index.table); 175 170 } 176 171 177 172 static u32 alloc_index(struct rxe_pool *pool) ··· 332 327 333 328 void *rxe_alloc_locked(struct rxe_pool *pool) 334 329 { 335 - struct rxe_type_info *info = &rxe_type_info[pool->type]; 330 + const struct rxe_type_info *info = &rxe_type_info[pool->type]; 336 331 struct rxe_pool_entry *elem; 337 332 u8 *obj; 338 333 ··· 357 352 358 353 void *rxe_alloc(struct rxe_pool *pool) 359 354 { 360 - struct rxe_type_info *info = &rxe_type_info[pool->type]; 355 + const struct rxe_type_info *info = &rxe_type_info[pool->type]; 361 356 struct rxe_pool_entry *elem; 362 357 u8 *obj; 363 358 ··· 400 395 struct rxe_pool_entry *elem = 401 396 container_of(kref, struct rxe_pool_entry, ref_cnt); 402 397 struct rxe_pool *pool = elem->pool; 403 - struct rxe_type_info *info = &rxe_type_info[pool->type]; 398 + const struct rxe_type_info *info = &rxe_type_info[pool->type]; 404 399 u8 *obj; 405 400 406 401 if (pool->cleanup) ··· 416 411 417 412 void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index) 418 413 { 419 - struct rxe_type_info *info = &rxe_type_info[pool->type]; 414 + const struct rxe_type_info *info = &rxe_type_info[pool->type]; 420 415 struct rb_node *node; 421 416 struct rxe_pool_entry *elem; 422 417 u8 *obj; ··· 458 453 459 454 void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) 460 455 { 461 - struct rxe_type_info *info = &rxe_type_info[pool->type]; 456 + const struct rxe_type_info *info = &rxe_type_info[pool->type]; 462 457 struct rb_node *node; 463 458 struct rxe_pool_entry *elem; 464 459 u8 *obj;
-15
drivers/infiniband/sw/rxe/rxe_pool.h
··· 32 32 33 33 struct rxe_pool_entry; 34 34 35 - struct rxe_type_info { 36 - const char *name; 37 - size_t size; 38 - size_t elem_offset; 39 - void (*cleanup)(struct rxe_pool_entry *obj); 40 - enum rxe_pool_flags flags; 41 - u32 max_index; 42 - u32 min_index; 43 - size_t key_offset; 44 - size_t key_size; 45 - }; 46 - 47 - extern struct rxe_type_info rxe_type_info[]; 48 - 49 35 struct rxe_pool_entry { 50 36 struct rxe_pool *pool; 51 37 struct kref ref_cnt; ··· 60 74 struct { 61 75 struct rb_root tree; 62 76 unsigned long *table; 63 - size_t table_size; 64 77 u32 last; 65 78 u32 max_index; 66 79 u32 min_index;
+4 -12
drivers/infiniband/sw/rxe/rxe_qp.c
··· 190 190 191 191 INIT_LIST_HEAD(&qp->grp_list); 192 192 193 - skb_queue_head_init(&qp->send_pkts); 194 - 195 193 spin_lock_init(&qp->grp_lock); 196 194 spin_lock_init(&qp->state_lock); 197 195 ··· 229 231 qp->sq.max_inline = init->cap.max_inline_data = wqe_size; 230 232 wqe_size += sizeof(struct rxe_send_wqe); 231 233 232 - type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL; 234 + type = QUEUE_TYPE_FROM_CLIENT; 233 235 qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, 234 236 wqe_size, type); 235 237 if (!qp->sq.queue) ··· 246 248 return err; 247 249 } 248 250 249 - if (qp->is_user) 250 - qp->req.wqe_index = producer_index(qp->sq.queue, 251 - QUEUE_TYPE_FROM_USER); 252 - else 253 - qp->req.wqe_index = producer_index(qp->sq.queue, 254 - QUEUE_TYPE_KERNEL); 251 + qp->req.wqe_index = queue_get_producer(qp->sq.queue, 252 + QUEUE_TYPE_FROM_CLIENT); 255 253 256 254 qp->req.state = QP_STATE_RESET; 257 255 qp->req.opcode = -1; ··· 287 293 pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", 288 294 qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); 289 295 290 - type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL; 296 + type = QUEUE_TYPE_FROM_CLIENT; 291 297 qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, 292 298 wqe_size, type); 293 299 if (!qp->rq.queue) ··· 306 312 307 313 spin_lock_init(&qp->rq.producer_lock); 308 314 spin_lock_init(&qp->rq.consumer_lock); 309 - 310 - qp->rq.is_user = qp->is_user; 311 315 312 316 skb_queue_head_init(&qp->resp_pkts); 313 317
+23 -7
drivers/infiniband/sw/rxe/rxe_queue.c
··· 111 111 static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q, 112 112 unsigned int num_elem) 113 113 { 114 - if (!queue_empty(q, q->type) && (num_elem < queue_count(q, q->type))) 114 + enum queue_type type = q->type; 115 + u32 prod; 116 + u32 cons; 117 + 118 + if (!queue_empty(q, q->type) && (num_elem < queue_count(q, type))) 115 119 return -EINVAL; 116 120 117 - while (!queue_empty(q, q->type)) { 118 - memcpy(producer_addr(new_q, new_q->type), 119 - consumer_addr(q, q->type), 120 - new_q->elem_size); 121 - advance_producer(new_q, new_q->type); 122 - advance_consumer(q, q->type); 121 + prod = queue_get_producer(new_q, type); 122 + cons = queue_get_consumer(q, type); 123 + 124 + while (!queue_empty(q, type)) { 125 + memcpy(queue_addr_from_index(new_q, prod), 126 + queue_addr_from_index(q, cons), new_q->elem_size); 127 + prod = queue_next_index(new_q, prod); 128 + cons = queue_next_index(q, cons); 123 129 } 124 130 131 + new_q->buf->producer_index = prod; 132 + q->buf->consumer_index = cons; 133 + 134 + /* update private index copies */ 135 + if (type == QUEUE_TYPE_TO_CLIENT) 136 + new_q->index = new_q->buf->producer_index; 137 + else 138 + q->index = q->buf->consumer_index; 139 + 140 + /* exchange rxe_queue headers */ 125 141 swap(*q, *new_q); 126 142 127 143 return 0;
+160 -214
drivers/infiniband/sw/rxe/rxe_queue.h
··· 10 10 /* for definition of shared struct rxe_queue_buf */ 11 11 #include <uapi/rdma/rdma_user_rxe.h> 12 12 13 - /* implements a simple circular buffer that can optionally be 14 - * shared between user space and the kernel and can be resized 15 - * the requested element size is rounded up to a power of 2 16 - * and the number of elements in the buffer is also rounded 17 - * up to a power of 2. Since the queue is empty when the 18 - * producer and consumer indices match the maximum capacity 19 - * of the queue is one less than the number of element slots 13 + /* Implements a simple circular buffer that is shared between user 14 + * and the driver and can be resized. The requested element size is 15 + * rounded up to a power of 2 and the number of elements in the buffer 16 + * is also rounded up to a power of 2. Since the queue is empty when 17 + * the producer and consumer indices match the maximum capacity of the 18 + * queue is one less than the number of element slots. 20 19 * 21 20 * Notes: 22 - * - Kernel space indices are always masked off to q->index_mask 23 - * before storing so do not need to be checked on reads. 24 - * - User space indices may be out of range and must be 25 - * masked before use when read. 26 - * - The kernel indices for shared queues must not be written 27 - * by user space so a local copy is used and a shared copy is 28 - * stored when the local copy changes. 21 + * - The driver indices are always masked off to q->index_mask 22 + * before storing so do not need to be checked on reads. 23 + * - The user whether user space or kernel is generally 24 + * not trusted so its parameters are masked to make sure 25 + * they do not access the queue out of bounds on reads. 26 + * - The driver indices for queues must not be written 27 + * by user so a local copy is used and a shared copy is 28 + * stored when the local copy is changed. 29 29 * - By passing the type in the parameter list separate from q 30 - * the compiler can eliminate the switch statement when the 31 - * actual queue type is known when the function is called. 32 - * In the performance path this is done. In less critical 33 - * paths just q->type is passed. 30 + * the compiler can eliminate the switch statement when the 31 + * actual queue type is known when the function is called at 32 + * compile time. 33 + * - These queues are lock free. The user and driver must protect 34 + * changes to their end of the queues with locks if more than one 35 + * CPU can be accessing it at the same time. 34 36 */ 35 37 36 - /* type of queue */ 38 + /** 39 + * enum queue_type - type of queue 40 + * @QUEUE_TYPE_TO_CLIENT: Queue is written by rxe driver and 41 + * read by client. Used by rxe driver only. 42 + * @QUEUE_TYPE_FROM_CLIENT: Queue is written by client and 43 + * read by rxe driver. Used by rxe driver only. 44 + * @QUEUE_TYPE_TO_DRIVER: Queue is written by client and 45 + * read by rxe driver. Used by kernel client only. 46 + * @QUEUE_TYPE_FROM_DRIVER: Queue is written by rxe driver and 47 + * read by client. Used by kernel client only. 48 + */ 37 49 enum queue_type { 38 - QUEUE_TYPE_KERNEL, 39 - QUEUE_TYPE_TO_USER, 40 - QUEUE_TYPE_FROM_USER, 50 + QUEUE_TYPE_TO_CLIENT, 51 + QUEUE_TYPE_FROM_CLIENT, 52 + QUEUE_TYPE_TO_DRIVER, 53 + QUEUE_TYPE_FROM_DRIVER, 41 54 }; 42 55 43 56 struct rxe_queue { ··· 82 69 int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, 83 70 unsigned int elem_size, struct ib_udata *udata, 84 71 struct mminfo __user *outbuf, 85 - /* Protect producers while resizing queue */ 86 - spinlock_t *producer_lock, 87 - /* Protect consumers while resizing queue */ 88 - spinlock_t *consumer_lock); 72 + spinlock_t *producer_lock, spinlock_t *consumer_lock); 89 73 90 74 void rxe_queue_cleanup(struct rxe_queue *queue); 91 75 92 - static inline int next_index(struct rxe_queue *q, int index) 76 + static inline u32 queue_next_index(struct rxe_queue *q, int index) 93 77 { 94 - return (index + 1) & q->buf->index_mask; 78 + return (index + 1) & q->index_mask; 95 79 } 96 80 97 - static inline int queue_empty(struct rxe_queue *q, enum queue_type type) 98 - { 99 - u32 prod; 100 - u32 cons; 101 - 102 - switch (type) { 103 - case QUEUE_TYPE_FROM_USER: 104 - /* protect user space index */ 105 - prod = smp_load_acquire(&q->buf->producer_index); 106 - cons = q->index; 107 - break; 108 - case QUEUE_TYPE_TO_USER: 109 - prod = q->index; 110 - /* protect user space index */ 111 - cons = smp_load_acquire(&q->buf->consumer_index); 112 - break; 113 - case QUEUE_TYPE_KERNEL: 114 - prod = q->buf->producer_index; 115 - cons = q->buf->consumer_index; 116 - break; 117 - } 118 - 119 - return ((prod - cons) & q->index_mask) == 0; 120 - } 121 - 122 - static inline int queue_full(struct rxe_queue *q, enum queue_type type) 123 - { 124 - u32 prod; 125 - u32 cons; 126 - 127 - switch (type) { 128 - case QUEUE_TYPE_FROM_USER: 129 - /* protect user space index */ 130 - prod = smp_load_acquire(&q->buf->producer_index); 131 - cons = q->index; 132 - break; 133 - case QUEUE_TYPE_TO_USER: 134 - prod = q->index; 135 - /* protect user space index */ 136 - cons = smp_load_acquire(&q->buf->consumer_index); 137 - break; 138 - case QUEUE_TYPE_KERNEL: 139 - prod = q->buf->producer_index; 140 - cons = q->buf->consumer_index; 141 - break; 142 - } 143 - 144 - return ((prod + 1 - cons) & q->index_mask) == 0; 145 - } 146 - 147 - static inline unsigned int queue_count(const struct rxe_queue *q, 148 - enum queue_type type) 149 - { 150 - u32 prod; 151 - u32 cons; 152 - 153 - switch (type) { 154 - case QUEUE_TYPE_FROM_USER: 155 - /* protect user space index */ 156 - prod = smp_load_acquire(&q->buf->producer_index); 157 - cons = q->index; 158 - break; 159 - case QUEUE_TYPE_TO_USER: 160 - prod = q->index; 161 - /* protect user space index */ 162 - cons = smp_load_acquire(&q->buf->consumer_index); 163 - break; 164 - case QUEUE_TYPE_KERNEL: 165 - prod = q->buf->producer_index; 166 - cons = q->buf->consumer_index; 167 - break; 168 - } 169 - 170 - return (prod - cons) & q->index_mask; 171 - } 172 - 173 - static inline void advance_producer(struct rxe_queue *q, enum queue_type type) 81 + static inline u32 queue_get_producer(const struct rxe_queue *q, 82 + enum queue_type type) 174 83 { 175 84 u32 prod; 176 85 177 86 switch (type) { 178 - case QUEUE_TYPE_FROM_USER: 179 - pr_warn_once("Normally kernel should not write user space index\n"); 180 - /* protect user space index */ 87 + case QUEUE_TYPE_FROM_CLIENT: 88 + /* protect user index */ 181 89 prod = smp_load_acquire(&q->buf->producer_index); 182 - prod = (prod + 1) & q->index_mask; 183 - /* same */ 184 - smp_store_release(&q->buf->producer_index, prod); 185 90 break; 186 - case QUEUE_TYPE_TO_USER: 187 - prod = q->index; 188 - q->index = (prod + 1) & q->index_mask; 189 - q->buf->producer_index = q->index; 190 - break; 191 - case QUEUE_TYPE_KERNEL: 192 - prod = q->buf->producer_index; 193 - q->buf->producer_index = (prod + 1) & q->index_mask; 194 - break; 195 - } 196 - } 197 - 198 - static inline void advance_consumer(struct rxe_queue *q, enum queue_type type) 199 - { 200 - u32 cons; 201 - 202 - switch (type) { 203 - case QUEUE_TYPE_FROM_USER: 204 - cons = q->index; 205 - q->index = (cons + 1) & q->index_mask; 206 - q->buf->consumer_index = q->index; 207 - break; 208 - case QUEUE_TYPE_TO_USER: 209 - pr_warn_once("Normally kernel should not write user space index\n"); 210 - /* protect user space index */ 211 - cons = smp_load_acquire(&q->buf->consumer_index); 212 - cons = (cons + 1) & q->index_mask; 213 - /* same */ 214 - smp_store_release(&q->buf->consumer_index, cons); 215 - break; 216 - case QUEUE_TYPE_KERNEL: 217 - cons = q->buf->consumer_index; 218 - q->buf->consumer_index = (cons + 1) & q->index_mask; 219 - break; 220 - } 221 - } 222 - 223 - static inline void *producer_addr(struct rxe_queue *q, enum queue_type type) 224 - { 225 - u32 prod; 226 - 227 - switch (type) { 228 - case QUEUE_TYPE_FROM_USER: 229 - /* protect user space index */ 230 - prod = smp_load_acquire(&q->buf->producer_index); 231 - prod &= q->index_mask; 232 - break; 233 - case QUEUE_TYPE_TO_USER: 91 + case QUEUE_TYPE_TO_CLIENT: 234 92 prod = q->index; 235 93 break; 236 - case QUEUE_TYPE_KERNEL: 237 - prod = q->buf->producer_index; 238 - break; 239 - } 240 - 241 - return q->buf->data + (prod << q->log2_elem_size); 242 - } 243 - 244 - static inline void *consumer_addr(struct rxe_queue *q, enum queue_type type) 245 - { 246 - u32 cons; 247 - 248 - switch (type) { 249 - case QUEUE_TYPE_FROM_USER: 250 - cons = q->index; 251 - break; 252 - case QUEUE_TYPE_TO_USER: 253 - /* protect user space index */ 254 - cons = smp_load_acquire(&q->buf->consumer_index); 255 - cons &= q->index_mask; 256 - break; 257 - case QUEUE_TYPE_KERNEL: 258 - cons = q->buf->consumer_index; 259 - break; 260 - } 261 - 262 - return q->buf->data + (cons << q->log2_elem_size); 263 - } 264 - 265 - static inline unsigned int producer_index(struct rxe_queue *q, 266 - enum queue_type type) 267 - { 268 - u32 prod; 269 - 270 - switch (type) { 271 - case QUEUE_TYPE_FROM_USER: 272 - /* protect user space index */ 94 + case QUEUE_TYPE_FROM_DRIVER: 95 + /* protect driver index */ 273 96 prod = smp_load_acquire(&q->buf->producer_index); 274 - prod &= q->index_mask; 275 97 break; 276 - case QUEUE_TYPE_TO_USER: 277 - prod = q->index; 278 - break; 279 - case QUEUE_TYPE_KERNEL: 98 + case QUEUE_TYPE_TO_DRIVER: 280 99 prod = q->buf->producer_index; 281 100 break; 282 101 } ··· 116 271 return prod; 117 272 } 118 273 119 - static inline unsigned int consumer_index(struct rxe_queue *q, 120 - enum queue_type type) 274 + static inline u32 queue_get_consumer(const struct rxe_queue *q, 275 + enum queue_type type) 121 276 { 122 277 u32 cons; 123 278 124 279 switch (type) { 125 - case QUEUE_TYPE_FROM_USER: 280 + case QUEUE_TYPE_FROM_CLIENT: 126 281 cons = q->index; 127 282 break; 128 - case QUEUE_TYPE_TO_USER: 129 - /* protect user space index */ 283 + case QUEUE_TYPE_TO_CLIENT: 284 + /* protect user index */ 130 285 cons = smp_load_acquire(&q->buf->consumer_index); 131 - cons &= q->index_mask; 132 286 break; 133 - case QUEUE_TYPE_KERNEL: 287 + case QUEUE_TYPE_FROM_DRIVER: 134 288 cons = q->buf->consumer_index; 289 + break; 290 + case QUEUE_TYPE_TO_DRIVER: 291 + /* protect driver index */ 292 + cons = smp_load_acquire(&q->buf->consumer_index); 135 293 break; 136 294 } 137 295 138 296 return cons; 139 297 } 140 298 141 - static inline void *addr_from_index(struct rxe_queue *q, 142 - unsigned int index) 299 + static inline int queue_empty(struct rxe_queue *q, enum queue_type type) 143 300 { 144 - return q->buf->data + ((index & q->index_mask) 145 - << q->buf->log2_elem_size); 301 + u32 prod = queue_get_producer(q, type); 302 + u32 cons = queue_get_consumer(q, type); 303 + 304 + return ((prod - cons) & q->index_mask) == 0; 146 305 } 147 306 148 - static inline unsigned int index_from_addr(const struct rxe_queue *q, 307 + static inline int queue_full(struct rxe_queue *q, enum queue_type type) 308 + { 309 + u32 prod = queue_get_producer(q, type); 310 + u32 cons = queue_get_consumer(q, type); 311 + 312 + return ((prod + 1 - cons) & q->index_mask) == 0; 313 + } 314 + 315 + static inline u32 queue_count(const struct rxe_queue *q, 316 + enum queue_type type) 317 + { 318 + u32 prod = queue_get_producer(q, type); 319 + u32 cons = queue_get_consumer(q, type); 320 + 321 + return (prod - cons) & q->index_mask; 322 + } 323 + 324 + static inline void queue_advance_producer(struct rxe_queue *q, 325 + enum queue_type type) 326 + { 327 + u32 prod; 328 + 329 + switch (type) { 330 + case QUEUE_TYPE_FROM_CLIENT: 331 + pr_warn("%s: attempt to advance client index\n", 332 + __func__); 333 + break; 334 + case QUEUE_TYPE_TO_CLIENT: 335 + prod = q->index; 336 + prod = (prod + 1) & q->index_mask; 337 + q->index = prod; 338 + /* protect user index */ 339 + smp_store_release(&q->buf->producer_index, prod); 340 + break; 341 + case QUEUE_TYPE_FROM_DRIVER: 342 + pr_warn("%s: attempt to advance driver index\n", 343 + __func__); 344 + break; 345 + case QUEUE_TYPE_TO_DRIVER: 346 + prod = q->buf->producer_index; 347 + prod = (prod + 1) & q->index_mask; 348 + q->buf->producer_index = prod; 349 + break; 350 + } 351 + } 352 + 353 + static inline void queue_advance_consumer(struct rxe_queue *q, 354 + enum queue_type type) 355 + { 356 + u32 cons; 357 + 358 + switch (type) { 359 + case QUEUE_TYPE_FROM_CLIENT: 360 + cons = q->index; 361 + cons = (cons + 1) & q->index_mask; 362 + q->index = cons; 363 + /* protect user index */ 364 + smp_store_release(&q->buf->consumer_index, cons); 365 + break; 366 + case QUEUE_TYPE_TO_CLIENT: 367 + pr_warn("%s: attempt to advance client index\n", 368 + __func__); 369 + break; 370 + case QUEUE_TYPE_FROM_DRIVER: 371 + cons = q->buf->consumer_index; 372 + cons = (cons + 1) & q->index_mask; 373 + q->buf->consumer_index = cons; 374 + break; 375 + case QUEUE_TYPE_TO_DRIVER: 376 + pr_warn("%s: attempt to advance driver index\n", 377 + __func__); 378 + break; 379 + } 380 + } 381 + 382 + static inline void *queue_producer_addr(struct rxe_queue *q, 383 + enum queue_type type) 384 + { 385 + u32 prod = queue_get_producer(q, type); 386 + 387 + return q->buf->data + (prod << q->log2_elem_size); 388 + } 389 + 390 + static inline void *queue_consumer_addr(struct rxe_queue *q, 391 + enum queue_type type) 392 + { 393 + u32 cons = queue_get_consumer(q, type); 394 + 395 + return q->buf->data + (cons << q->log2_elem_size); 396 + } 397 + 398 + static inline void *queue_addr_from_index(struct rxe_queue *q, u32 index) 399 + { 400 + return q->buf->data + ((index & q->index_mask) 401 + << q->log2_elem_size); 402 + } 403 + 404 + static inline u32 queue_index_from_addr(const struct rxe_queue *q, 149 405 const void *addr) 150 406 { 151 407 return (((u8 *)addr - q->buf->data) >> q->log2_elem_size) ··· 255 309 256 310 static inline void *queue_head(struct rxe_queue *q, enum queue_type type) 257 311 { 258 - return queue_empty(q, type) ? NULL : consumer_addr(q, type); 312 + return queue_empty(q, type) ? NULL : queue_consumer_addr(q, type); 259 313 } 260 314 261 315 #endif /* RXE_QUEUE_H */
+27 -38
drivers/infiniband/sw/rxe/rxe_req.c
··· 49 49 unsigned int cons; 50 50 unsigned int prod; 51 51 52 - if (qp->is_user) { 53 - cons = consumer_index(q, QUEUE_TYPE_FROM_USER); 54 - prod = producer_index(q, QUEUE_TYPE_FROM_USER); 55 - } else { 56 - cons = consumer_index(q, QUEUE_TYPE_KERNEL); 57 - prod = producer_index(q, QUEUE_TYPE_KERNEL); 58 - } 52 + cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); 53 + prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); 59 54 60 55 qp->req.wqe_index = cons; 61 56 qp->req.psn = qp->comp.psn; 62 57 qp->req.opcode = -1; 63 58 64 59 for (wqe_index = cons; wqe_index != prod; 65 - wqe_index = next_index(q, wqe_index)) { 66 - wqe = addr_from_index(qp->sq.queue, wqe_index); 60 + wqe_index = queue_next_index(q, wqe_index)) { 61 + wqe = queue_addr_from_index(qp->sq.queue, wqe_index); 67 62 mask = wr_opcode_mask(wqe->wr.opcode, qp); 68 63 69 64 if (wqe->state == wqe_state_posted) ··· 116 121 unsigned int cons; 117 122 unsigned int prod; 118 123 119 - if (qp->is_user) { 120 - wqe = queue_head(q, QUEUE_TYPE_FROM_USER); 121 - cons = consumer_index(q, QUEUE_TYPE_FROM_USER); 122 - prod = producer_index(q, QUEUE_TYPE_FROM_USER); 123 - } else { 124 - wqe = queue_head(q, QUEUE_TYPE_KERNEL); 125 - cons = consumer_index(q, QUEUE_TYPE_KERNEL); 126 - prod = producer_index(q, QUEUE_TYPE_KERNEL); 127 - } 124 + wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); 125 + cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); 126 + prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); 128 127 129 128 if (unlikely(qp->req.state == QP_STATE_DRAIN)) { 130 129 /* check to see if we are drained; ··· 159 170 if (index == prod) 160 171 return NULL; 161 172 162 - wqe = addr_from_index(q, index); 173 + wqe = queue_addr_from_index(q, index); 163 174 164 175 if (unlikely((qp->req.state == QP_STATE_DRAIN || 165 176 qp->req.state == QP_STATE_DRAINED) && ··· 379 390 /* length from start of bth to end of icrc */ 380 391 paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; 381 392 382 - /* pkt->hdr, rxe, port_num and mask are initialized in ifc 383 - * layer 384 - */ 393 + /* pkt->hdr, port_num and mask are initialized in ifc layer */ 394 + pkt->rxe = rxe; 385 395 pkt->opcode = opcode; 386 396 pkt->qp = qp; 387 397 pkt->psn = qp->req.psn; ··· 390 402 391 403 /* init skb */ 392 404 av = rxe_get_av(pkt); 405 + if (!av) 406 + return NULL; 407 + 393 408 skb = rxe_init_packet(rxe, av, paylen, pkt); 394 409 if (unlikely(!skb)) 395 410 return NULL; ··· 463 472 if (err) 464 473 return err; 465 474 466 - if (pkt->mask & RXE_WRITE_OR_SEND) { 475 + if (pkt->mask & RXE_WRITE_OR_SEND_MASK) { 467 476 if (wqe->wr.send_flags & IB_SEND_INLINE) { 468 477 u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; 469 478 ··· 551 560 qp->req.opcode = pkt->opcode; 552 561 553 562 if (pkt->mask & RXE_END_MASK) 554 - qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); 563 + qp->req.wqe_index = queue_next_index(qp->sq.queue, 564 + qp->req.wqe_index); 555 565 556 566 qp->need_req_skb = 0; 557 567 ··· 564 572 static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) 565 573 { 566 574 u8 opcode = wqe->wr.opcode; 567 - struct rxe_mr *mr; 568 575 u32 rkey; 569 576 int ret; 570 577 ··· 581 590 } 582 591 break; 583 592 case IB_WR_REG_MR: 584 - mr = to_rmr(wqe->wr.wr.reg.mr); 585 - rxe_add_ref(mr); 586 - mr->state = RXE_MR_STATE_VALID; 587 - mr->access = wqe->wr.wr.reg.access; 588 - mr->ibmr.lkey = wqe->wr.wr.reg.key; 589 - mr->ibmr.rkey = wqe->wr.wr.reg.key; 590 - mr->iova = wqe->wr.wr.reg.mr->iova; 591 - rxe_drop_ref(mr); 593 + ret = rxe_reg_fast_mr(qp, wqe); 594 + if (unlikely(ret)) { 595 + wqe->status = IB_WC_LOC_QP_OP_ERR; 596 + return ret; 597 + } 592 598 break; 593 599 case IB_WR_BIND_MW: 594 600 ret = rxe_bind_mw(qp, wqe); ··· 602 614 603 615 wqe->state = wqe_state_done; 604 616 wqe->status = IB_WC_SUCCESS; 605 - qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); 617 + qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); 606 618 607 619 if ((wqe->wr.send_flags & IB_SEND_SIGNALED) || 608 620 qp->sq_sig_type == IB_SIGNAL_ALL_WR) ··· 633 645 goto exit; 634 646 635 647 if (unlikely(qp->req.state == QP_STATE_RESET)) { 636 - qp->req.wqe_index = consumer_index(q, q->type); 648 + qp->req.wqe_index = queue_get_consumer(q, 649 + QUEUE_TYPE_FROM_CLIENT); 637 650 qp->req.opcode = -1; 638 651 qp->req.need_rd_atomic = 0; 639 652 qp->req.wait_psn = 0; ··· 680 691 } 681 692 682 693 mask = rxe_opcode[opcode].mask; 683 - if (unlikely(mask & RXE_READ_OR_ATOMIC)) { 694 + if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) { 684 695 if (check_init_depth(qp, wqe)) 685 696 goto exit; 686 697 } 687 698 688 699 mtu = get_mtu(qp); 689 - payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0; 700 + payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0; 690 701 if (payload > mtu) { 691 702 if (qp_type(qp) == IB_QPT_UD) { 692 703 /* C10-93.1.1: If the total sum of all the buffer lengths specified for a ··· 700 711 wqe->last_psn = qp->req.psn; 701 712 qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; 702 713 qp->req.opcode = IB_OPCODE_UD_SEND_ONLY; 703 - qp->req.wqe_index = next_index(qp->sq.queue, 714 + qp->req.wqe_index = queue_next_index(qp->sq.queue, 704 715 qp->req.wqe_index); 705 716 wqe->state = wqe_state_done; 706 717 wqe->status = IB_WC_SUCCESS;
+13 -37
drivers/infiniband/sw/rxe/rxe_resp.c
··· 303 303 304 304 spin_lock_bh(&srq->rq.consumer_lock); 305 305 306 - if (qp->is_user) 307 - wqe = queue_head(q, QUEUE_TYPE_FROM_USER); 308 - else 309 - wqe = queue_head(q, QUEUE_TYPE_KERNEL); 306 + wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); 310 307 if (!wqe) { 311 308 spin_unlock_bh(&srq->rq.consumer_lock); 312 309 return RESPST_ERR_RNR; ··· 319 322 memcpy(&qp->resp.srq_wqe, wqe, size); 320 323 321 324 qp->resp.wqe = &qp->resp.srq_wqe.wqe; 322 - if (qp->is_user) { 323 - advance_consumer(q, QUEUE_TYPE_FROM_USER); 324 - count = queue_count(q, QUEUE_TYPE_FROM_USER); 325 - } else { 326 - advance_consumer(q, QUEUE_TYPE_KERNEL); 327 - count = queue_count(q, QUEUE_TYPE_KERNEL); 328 - } 325 + queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); 326 + count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); 329 327 330 328 if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { 331 329 srq->limit = 0; ··· 349 357 qp->resp.status = IB_WC_WR_FLUSH_ERR; 350 358 return RESPST_COMPLETE; 351 359 } else if (!srq) { 352 - if (qp->is_user) 353 - qp->resp.wqe = queue_head(qp->rq.queue, 354 - QUEUE_TYPE_FROM_USER); 355 - else 356 - qp->resp.wqe = queue_head(qp->rq.queue, 357 - QUEUE_TYPE_KERNEL); 360 + qp->resp.wqe = queue_head(qp->rq.queue, 361 + QUEUE_TYPE_FROM_CLIENT); 358 362 if (qp->resp.wqe) { 359 363 qp->resp.status = IB_WC_WR_FLUSH_ERR; 360 364 return RESPST_COMPLETE; ··· 362 374 } 363 375 } 364 376 365 - if (pkt->mask & RXE_READ_OR_ATOMIC) { 377 + if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) { 366 378 /* it is the requesters job to not send 367 379 * too many read/atomic ops, we just 368 380 * recycle the responder resource queue ··· 377 389 if (srq) 378 390 return get_srq_wqe(qp); 379 391 380 - if (qp->is_user) 381 - qp->resp.wqe = queue_head(qp->rq.queue, 382 - QUEUE_TYPE_FROM_USER); 383 - else 384 - qp->resp.wqe = queue_head(qp->rq.queue, 385 - QUEUE_TYPE_KERNEL); 392 + qp->resp.wqe = queue_head(qp->rq.queue, 393 + QUEUE_TYPE_FROM_CLIENT); 386 394 return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; 387 395 } 388 396 ··· 413 429 enum resp_states state; 414 430 int access; 415 431 416 - if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) { 432 + if (pkt->mask & RXE_READ_OR_WRITE_MASK) { 417 433 if (pkt->mask & RXE_RETH_MASK) { 418 434 qp->resp.va = reth_va(pkt); 419 435 qp->resp.offset = 0; ··· 434 450 } 435 451 436 452 /* A zero-byte op is not required to set an addr or rkey. */ 437 - if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) && 453 + if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && 438 454 (pkt->mask & RXE_RETH_MASK) && 439 455 reth_len(pkt) == 0) { 440 456 return RESPST_EXECUTE; ··· 860 876 wc->opcode = (pkt->mask & RXE_IMMDT_MASK && 861 877 pkt->mask & RXE_WRITE_MASK) ? 862 878 IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; 863 - wc->vendor_err = 0; 864 879 wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && 865 880 pkt->mask & RXE_WRITE_MASK) ? 866 881 qp->resp.length : wqe->dma.length - wqe->dma.resid; ··· 879 896 uwc->wc_flags |= IB_WC_WITH_INVALIDATE; 880 897 uwc->ex.invalidate_rkey = ieth_rkey(pkt); 881 898 } 882 - 883 - uwc->qp_num = qp->ibqp.qp_num; 884 899 885 900 if (pkt->mask & RXE_DETH_MASK) 886 901 uwc->src_qp = deth_sqp(pkt); ··· 911 930 if (pkt->mask & RXE_DETH_MASK) 912 931 wc->src_qp = deth_sqp(pkt); 913 932 914 - wc->qp = &qp->ibqp; 915 933 wc->port_num = qp->attr.port_num; 916 934 } 917 935 } 918 936 919 937 /* have copy for srq and reference for !srq */ 920 - if (!qp->srq) { 921 - if (qp->is_user) 922 - advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_USER); 923 - else 924 - advance_consumer(qp->rq.queue, QUEUE_TYPE_KERNEL); 925 - } 938 + if (!qp->srq) 939 + queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); 926 940 927 941 qp->resp.wqe = NULL; 928 942 ··· 1189 1213 return; 1190 1214 1191 1215 while (!qp->srq && q && queue_head(q, q->type)) 1192 - advance_consumer(q, q->type); 1216 + queue_advance_consumer(q, q->type); 1193 1217 } 1194 1218 1195 1219 int rxe_responder(void *arg)
+1 -2
drivers/infiniband/sw/rxe/rxe_srq.c
··· 86 86 srq->srq_num = srq->pelem.index; 87 87 srq->rq.max_wr = init->attr.max_wr; 88 88 srq->rq.max_sge = init->attr.max_sge; 89 - srq->rq.is_user = srq->is_user; 90 89 91 90 srq_wqe_size = rcv_wqe_size(srq->rq.max_sge); 92 91 93 92 spin_lock_init(&srq->rq.producer_lock); 94 93 spin_lock_init(&srq->rq.consumer_lock); 95 94 96 - type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL; 95 + type = QUEUE_TYPE_FROM_CLIENT; 97 96 q = rxe_queue_init(rxe, &srq->rq.max_wr, 98 97 srq_wqe_size, type); 99 98 if (!q) {
+56 -83
drivers/infiniband/sw/rxe/rxe_verbs.c
··· 29 29 u32 port_num, struct ib_port_attr *attr) 30 30 { 31 31 struct rxe_dev *rxe = to_rdev(dev); 32 - struct rxe_port *port; 33 32 int rc; 34 33 35 - port = &rxe->port; 36 - 37 34 /* *attr being zeroed by the caller, avoid zeroing it here */ 38 - *attr = port->attr; 35 + *attr = rxe->port.attr; 39 36 40 37 mutex_lock(&rxe->usdev_lock); 41 38 rc = ib_get_eth_speed(dev, port_num, &attr->active_speed, ··· 158 161 struct ib_udata *udata) 159 162 160 163 { 161 - int err; 162 164 struct rxe_dev *rxe = to_rdev(ibah->device); 163 165 struct rxe_ah *ah = to_rah(ibah); 166 + struct rxe_create_ah_resp __user *uresp = NULL; 167 + int err; 168 + 169 + if (udata) { 170 + /* test if new user provider */ 171 + if (udata->outlen >= sizeof(*uresp)) 172 + uresp = udata->outbuf; 173 + ah->is_user = true; 174 + } else { 175 + ah->is_user = false; 176 + } 164 177 165 178 err = rxe_av_chk_attr(rxe, init_attr->ah_attr); 166 179 if (err) ··· 179 172 err = rxe_add_to_pool(&rxe->ah_pool, ah); 180 173 if (err) 181 174 return err; 175 + 176 + /* create index > 0 */ 177 + rxe_add_index(ah); 178 + ah->ah_num = ah->pelem.index; 179 + 180 + if (uresp) { 181 + /* only if new user provider */ 182 + err = copy_to_user(&uresp->ah_num, &ah->ah_num, 183 + sizeof(uresp->ah_num)); 184 + if (err) { 185 + rxe_drop_index(ah); 186 + rxe_drop_ref(ah); 187 + return -EFAULT; 188 + } 189 + } else if (ah->is_user) { 190 + /* only if old user provider */ 191 + ah->ah_num = 0; 192 + } 182 193 183 194 rxe_init_av(init_attr->ah_attr, &ah->av); 184 195 return 0; ··· 230 205 { 231 206 struct rxe_ah *ah = to_rah(ibah); 232 207 208 + rxe_drop_index(ah); 233 209 rxe_drop_ref(ah); 234 210 return 0; 235 211 } ··· 244 218 int num_sge = ibwr->num_sge; 245 219 int full; 246 220 247 - if (rq->is_user) 248 - full = queue_full(rq->queue, QUEUE_TYPE_FROM_USER); 249 - else 250 - full = queue_full(rq->queue, QUEUE_TYPE_KERNEL); 251 - 221 + full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER); 252 222 if (unlikely(full)) { 253 223 err = -ENOMEM; 254 224 goto err1; ··· 259 237 for (i = 0; i < num_sge; i++) 260 238 length += ibwr->sg_list[i].length; 261 239 262 - if (rq->is_user) 263 - recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_FROM_USER); 264 - else 265 - recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_KERNEL); 266 - 240 + recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER); 267 241 recv_wqe->wr_id = ibwr->wr_id; 268 242 recv_wqe->num_sge = num_sge; 269 243 ··· 272 254 recv_wqe->dma.cur_sge = 0; 273 255 recv_wqe->dma.sge_offset = 0; 274 256 275 - if (rq->is_user) 276 - advance_producer(rq->queue, QUEUE_TYPE_FROM_USER); 277 - else 278 - advance_producer(rq->queue, QUEUE_TYPE_KERNEL); 257 + queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER); 279 258 280 259 return 0; 281 260 ··· 296 281 if (udata->outlen < sizeof(*uresp)) 297 282 return -EINVAL; 298 283 uresp = udata->outbuf; 299 - srq->is_user = true; 300 - } else { 301 - srq->is_user = false; 302 284 } 303 285 304 286 err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); ··· 534 522 if (qp_type(qp) == IB_QPT_UD || 535 523 qp_type(qp) == IB_QPT_SMI || 536 524 qp_type(qp) == IB_QPT_GSI) { 525 + struct ib_ah *ibah = ud_wr(ibwr)->ah; 526 + 537 527 wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; 538 528 wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; 529 + wr->wr.ud.ah_num = to_rah(ibah)->ah_num; 539 530 if (qp_type(qp) == IB_QPT_GSI) 540 531 wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; 541 532 if (wr->opcode == IB_WR_SEND_WITH_IMM) ··· 610 595 return; 611 596 } 612 597 613 - if (qp_type(qp) == IB_QPT_UD || 614 - qp_type(qp) == IB_QPT_SMI || 615 - qp_type(qp) == IB_QPT_GSI) 616 - memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); 617 - 618 598 if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) 619 599 copy_inline_data_to_wqe(wqe, ibwr); 620 600 else ··· 643 633 644 634 spin_lock_irqsave(&qp->sq.sq_lock, flags); 645 635 646 - if (qp->is_user) 647 - full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER); 648 - else 649 - full = queue_full(sq->queue, QUEUE_TYPE_KERNEL); 636 + full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER); 650 637 651 638 if (unlikely(full)) { 652 639 spin_unlock_irqrestore(&qp->sq.sq_lock, flags); 653 640 return -ENOMEM; 654 641 } 655 642 656 - if (qp->is_user) 657 - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER); 658 - else 659 - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL); 660 - 643 + send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_TO_DRIVER); 661 644 init_send_wqe(qp, ibwr, mask, length, send_wqe); 662 645 663 - if (qp->is_user) 664 - advance_producer(sq->queue, QUEUE_TYPE_FROM_USER); 665 - else 666 - advance_producer(sq->queue, QUEUE_TYPE_KERNEL); 646 + queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER); 667 647 668 648 spin_unlock_irqrestore(&qp->sq.sq_lock, flags); 669 649 ··· 845 845 846 846 spin_lock_irqsave(&cq->cq_lock, flags); 847 847 for (i = 0; i < num_entries; i++) { 848 - if (cq->is_user) 849 - cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER); 850 - else 851 - cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL); 848 + cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER); 852 849 if (!cqe) 853 850 break; 854 851 855 852 memcpy(wc++, &cqe->ibwc, sizeof(*wc)); 856 - if (cq->is_user) 857 - advance_consumer(cq->queue, QUEUE_TYPE_TO_USER); 858 - else 859 - advance_consumer(cq->queue, QUEUE_TYPE_KERNEL); 853 + queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER); 860 854 } 861 855 spin_unlock_irqrestore(&cq->cq_lock, flags); 862 856 ··· 862 868 struct rxe_cq *cq = to_rcq(ibcq); 863 869 int count; 864 870 865 - if (cq->is_user) 866 - count = queue_count(cq->queue, QUEUE_TYPE_TO_USER); 867 - else 868 - count = queue_count(cq->queue, QUEUE_TYPE_KERNEL); 871 + count = queue_count(cq->queue, QUEUE_TYPE_FROM_DRIVER); 869 872 870 873 return (count > wc_cnt) ? wc_cnt : count; 871 874 } ··· 878 887 if (cq->notify != IB_CQ_NEXT_COMP) 879 888 cq->notify = flags & IB_CQ_SOLICITED_MASK; 880 889 881 - if (cq->is_user) 882 - empty = queue_empty(cq->queue, QUEUE_TYPE_TO_USER); 883 - else 884 - empty = queue_empty(cq->queue, QUEUE_TYPE_KERNEL); 890 + empty = queue_empty(cq->queue, QUEUE_TYPE_FROM_DRIVER); 885 891 886 892 if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty) 887 893 ret = 1; ··· 975 987 return ERR_PTR(err); 976 988 } 977 989 978 - static int rxe_set_page(struct ib_mr *ibmr, u64 addr) 979 - { 980 - struct rxe_mr *mr = to_rmr(ibmr); 981 - struct rxe_map *map; 982 - struct rxe_phys_buf *buf; 983 - 984 - if (unlikely(mr->nbuf == mr->num_buf)) 985 - return -ENOMEM; 986 - 987 - map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; 988 - buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; 989 - 990 - buf->addr = addr; 991 - buf->size = ibmr->page_size; 992 - mr->nbuf++; 993 - 994 - return 0; 995 - } 996 - 990 + /* build next_map_set from scatterlist 991 + * The IB_WR_REG_MR WR will swap map_sets 992 + */ 997 993 static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 998 994 int sg_nents, unsigned int *sg_offset) 999 995 { 1000 996 struct rxe_mr *mr = to_rmr(ibmr); 997 + struct rxe_map_set *set = mr->next_map_set; 1001 998 int n; 1002 999 1003 - mr->nbuf = 0; 1000 + set->nbuf = 0; 1004 1001 1005 - n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); 1002 + n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_mr_set_page); 1006 1003 1007 - mr->va = ibmr->iova; 1008 - mr->iova = ibmr->iova; 1009 - mr->length = ibmr->length; 1010 - mr->page_shift = ilog2(ibmr->page_size); 1011 - mr->page_mask = ibmr->page_size - 1; 1012 - mr->offset = mr->iova & mr->page_mask; 1004 + set->va = ibmr->iova; 1005 + set->iova = ibmr->iova; 1006 + set->length = ibmr->length; 1007 + set->page_shift = ilog2(ibmr->page_size); 1008 + set->page_mask = ibmr->page_size - 1; 1009 + set->offset = set->iova & set->page_mask; 1013 1010 1014 1011 return n; 1015 1012 }
+25 -37
drivers/infiniband/sw/rxe/rxe_verbs.h
··· 46 46 struct rxe_ah { 47 47 struct ib_ah ibah; 48 48 struct rxe_pool_entry pelem; 49 - struct rxe_pd *pd; 50 49 struct rxe_av av; 50 + bool is_user; 51 + int ah_num; 51 52 }; 52 53 53 54 struct rxe_cqe { ··· 65 64 spinlock_t cq_lock; 66 65 u8 notify; 67 66 bool is_dying; 68 - int is_user; 67 + bool is_user; 69 68 struct tasklet_struct comp_task; 70 69 }; 71 70 ··· 78 77 }; 79 78 80 79 struct rxe_sq { 81 - bool is_user; 82 80 int max_wr; 83 81 int max_sge; 84 82 int max_inline; ··· 86 86 }; 87 87 88 88 struct rxe_rq { 89 - bool is_user; 90 89 int max_wr; 91 90 int max_sge; 92 91 spinlock_t producer_lock; /* guard queue producer */ ··· 99 100 struct rxe_pd *pd; 100 101 struct rxe_rq rq; 101 102 u32 srq_num; 102 - bool is_user; 103 103 104 104 int limit; 105 105 int error; ··· 238 240 239 241 struct sk_buff_head req_pkts; 240 242 struct sk_buff_head resp_pkts; 241 - struct sk_buff_head send_pkts; 242 243 243 244 struct rxe_req_info req; 244 245 struct rxe_comp_info comp; ··· 264 267 }; 265 268 266 269 enum rxe_mr_state { 267 - RXE_MR_STATE_ZOMBIE, 268 270 RXE_MR_STATE_INVALID, 269 271 RXE_MR_STATE_FREE, 270 272 RXE_MR_STATE_VALID, 271 - }; 272 - 273 - enum rxe_mr_type { 274 - RXE_MR_TYPE_NONE, 275 - RXE_MR_TYPE_DMA, 276 - RXE_MR_TYPE_MR, 277 273 }; 278 274 279 275 enum rxe_mr_copy_dir { ··· 290 300 struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; 291 301 }; 292 302 303 + struct rxe_map_set { 304 + struct rxe_map **map; 305 + u64 va; 306 + u64 iova; 307 + size_t length; 308 + u32 offset; 309 + u32 nbuf; 310 + int page_shift; 311 + int page_mask; 312 + }; 313 + 293 314 static inline int rkey_is_mw(u32 rkey) 294 315 { 295 316 u32 index = rkey >> 8; ··· 314 313 315 314 struct ib_umem *umem; 316 315 316 + u32 lkey; 317 + u32 rkey; 317 318 enum rxe_mr_state state; 318 - enum rxe_mr_type type; 319 - u64 va; 320 - u64 iova; 321 - size_t length; 322 - u32 offset; 319 + enum ib_mr_type type; 323 320 int access; 324 321 325 - int page_shift; 326 - int page_mask; 327 322 int map_shift; 328 323 int map_mask; 329 324 330 325 u32 num_buf; 331 - u32 nbuf; 332 326 333 327 u32 max_buf; 334 328 u32 num_map; 335 329 336 330 atomic_t num_mw; 337 331 338 - struct rxe_map **map; 332 + struct rxe_map_set *cur_map_set; 333 + struct rxe_map_set *next_map_set; 339 334 }; 340 335 341 336 enum rxe_mw_state { ··· 347 350 enum rxe_mw_state state; 348 351 struct rxe_qp *qp; /* Type 2 only */ 349 352 struct rxe_mr *mr; 353 + u32 rkey; 350 354 int access; 351 355 u64 addr; 352 356 u64 length; ··· 467 469 return mw ? container_of(mw, struct rxe_mw, ibmw) : NULL; 468 470 } 469 471 472 + static inline struct rxe_pd *rxe_ah_pd(struct rxe_ah *ah) 473 + { 474 + return to_rpd(ah->ibah.pd); 475 + } 476 + 470 477 static inline struct rxe_pd *mr_pd(struct rxe_mr *mr) 471 478 { 472 479 return to_rpd(mr->ibmr.pd); 473 480 } 474 481 475 - static inline u32 mr_lkey(struct rxe_mr *mr) 476 - { 477 - return mr->ibmr.lkey; 478 - } 479 - 480 - static inline u32 mr_rkey(struct rxe_mr *mr) 481 - { 482 - return mr->ibmr.rkey; 483 - } 484 - 485 482 static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw) 486 483 { 487 484 return to_rpd(mw->ibmw.pd); 488 - } 489 - 490 - static inline u32 rxe_mw_rkey(struct rxe_mw *mw) 491 - { 492 - return mw->ibmw.rkey; 493 485 } 494 486 495 487 int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
+1 -3
drivers/infiniband/sw/siw/siw_cm.c
··· 1951 1951 1952 1952 void siw_cm_exit(void) 1953 1953 { 1954 - if (siw_cm_wq) { 1955 - flush_workqueue(siw_cm_wq); 1954 + if (siw_cm_wq) 1956 1955 destroy_workqueue(siw_cm_wq); 1957 - } 1958 1956 }
+3 -1
drivers/infiniband/ulp/ipoib/ipoib_cm.c
··· 1583 1583 { 1584 1584 struct ipoib_dev_priv *priv = ipoib_priv(dev); 1585 1585 int max_srq_sge, i; 1586 + u8 addr; 1586 1587 1587 1588 INIT_LIST_HEAD(&priv->cm.passive_ids); 1588 1589 INIT_LIST_HEAD(&priv->cm.reap_list); ··· 1637 1636 } 1638 1637 } 1639 1638 1640 - priv->dev->dev_addr[0] = IPOIB_FLAGS_RC; 1639 + addr = IPOIB_FLAGS_RC; 1640 + dev_addr_mod(dev, 0, &addr, 1); 1641 1641 return 0; 1642 1642 } 1643 1643
+4 -5
drivers/infiniband/ulp/ipoib/ipoib_ib.c
··· 1057 1057 { 1058 1058 union ib_gid search_gid; 1059 1059 union ib_gid gid0; 1060 - union ib_gid *netdev_gid; 1061 1060 int err; 1062 1061 u16 index; 1063 1062 u32 port; 1064 1063 bool ret = false; 1065 1064 1066 - netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4); 1067 1065 if (rdma_query_gid(priv->ca, priv->port, 0, &gid0)) 1068 1066 return false; 1069 1067 ··· 1071 1073 * to do it later 1072 1074 */ 1073 1075 priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix; 1074 - netdev_gid->global.subnet_prefix = gid0.global.subnet_prefix; 1076 + dev_addr_mod(priv->dev, 4, (u8 *)&gid0.global.subnet_prefix, 1077 + sizeof(gid0.global.subnet_prefix)); 1075 1078 search_gid.global.subnet_prefix = gid0.global.subnet_prefix; 1076 1079 1077 1080 search_gid.global.interface_id = priv->local_gid.global.interface_id; ··· 1134 1135 if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) { 1135 1136 memcpy(&priv->local_gid, &gid0, 1136 1137 sizeof(priv->local_gid)); 1137 - memcpy(priv->dev->dev_addr + 4, &gid0, 1138 - sizeof(priv->local_gid)); 1138 + dev_addr_mod(priv->dev, 4, (u8 *)&gid0, 1139 + sizeof(priv->local_gid)); 1139 1140 ret = true; 1140 1141 } 1141 1142 }
+9 -9
drivers/infiniband/ulp/ipoib/ipoib_main.c
··· 1696 1696 static int ipoib_dev_init_default(struct net_device *dev) 1697 1697 { 1698 1698 struct ipoib_dev_priv *priv = ipoib_priv(dev); 1699 + u8 addr_mod[3]; 1699 1700 1700 1701 ipoib_napi_add(dev); 1701 1702 ··· 1724 1723 } 1725 1724 1726 1725 /* after qp created set dev address */ 1727 - priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; 1728 - priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; 1729 - priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff; 1726 + addr_mod[0] = (priv->qp->qp_num >> 16) & 0xff; 1727 + addr_mod[1] = (priv->qp->qp_num >> 8) & 0xff; 1728 + addr_mod[2] = (priv->qp->qp_num) & 0xff; 1729 + dev_addr_mod(priv->dev, 1, addr_mod, sizeof(addr_mod)); 1730 1730 1731 1731 return 0; 1732 1732 ··· 1888 1886 priv->ca->name, priv->port, result); 1889 1887 return result; 1890 1888 } 1891 - memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, 1892 - sizeof(union ib_gid)); 1889 + dev_addr_mod(priv->dev, 4, priv->local_gid.raw, sizeof(union ib_gid)); 1893 1890 1894 1891 SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent); 1895 1892 priv->dev->dev_port = priv->port - 1; ··· 1909 1908 memcpy(&priv->local_gid, priv->dev->dev_addr + 4, 1910 1909 sizeof(priv->local_gid)); 1911 1910 else { 1912 - memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, 1913 - INFINIBAND_ALEN); 1911 + __dev_addr_set(priv->dev, ppriv->dev->dev_addr, 1912 + INFINIBAND_ALEN); 1914 1913 memcpy(&priv->local_gid, &ppriv->local_gid, 1915 1914 sizeof(priv->local_gid)); 1916 1915 } ··· 1998 1997 if (priv->wq) { 1999 1998 /* See ipoib_mcast_carrier_on_task() */ 2000 1999 WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); 2001 - flush_workqueue(priv->wq); 2002 2000 destroy_workqueue(priv->wq); 2003 2001 priv->wq = NULL; 2004 2002 } ··· 2327 2327 memcpy(&priv->local_gid.global.interface_id, 2328 2328 &gid->global.interface_id, 2329 2329 sizeof(gid->global.interface_id)); 2330 - memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid)); 2330 + dev_addr_mod(netdev, 4, (u8 *)&priv->local_gid, sizeof(priv->local_gid)); 2331 2331 clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); 2332 2332 2333 2333 netif_addr_unlock_bh(netdev);
+2 -2
drivers/infiniband/ulp/opa_vnic/Kconfig
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 config INFINIBAND_OPA_VNIC 3 - tristate "Intel OPA VNIC support" 3 + tristate "Cornelis OPX VNIC support" 4 4 depends on X86_64 && INFINIBAND 5 5 help 6 - This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC) 6 + This is Omni-Path Express (OPX) Virtual Network Interface Controller (VNIC) 7 7 driver for Ethernet over Omni-Path feature. It implements the HW 8 8 independent VNIC functionality. It interfaces with Linux stack for 9 9 data path and IB MAD for the control path.
+2 -1
drivers/infiniband/ulp/opa_vnic/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 - # Makefile - Intel Omni-Path Virtual Network Controller driver 2 + # Makefile - Cornelis Omni-Path Express Virtual Network Controller driver 3 3 # Copyright(c) 2017, Intel Corporation. 4 + # Copyright(c) 2021, Cornelis Networks. 4 5 # 5 6 obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o 6 7
+4 -3
drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
··· 1 1 /* 2 2 * Copyright(c) 2017 Intel Corporation. 3 + * Copyright(c) 2021 Cornelis Networks. 3 4 * 4 5 * This file is provided under a dual BSD/GPLv2 license. When using or 5 6 * redistributing this file, you may do so under either license. ··· 47 46 */ 48 47 49 48 /* 50 - * This file contains OPA Virtual Network Interface Controller (VNIC) 49 + * This file contains OPX Virtual Network Interface Controller (VNIC) 51 50 * Ethernet Management Agent (EMA) driver 52 51 */ 53 52 ··· 1052 1051 module_exit(opa_vnic_deinit); 1053 1052 1054 1053 MODULE_LICENSE("Dual BSD/GPL"); 1055 - MODULE_AUTHOR("Intel Corporation"); 1056 - MODULE_DESCRIPTION("Intel OPA Virtual Network driver"); 1054 + MODULE_AUTHOR("Cornelis Networks"); 1055 + MODULE_DESCRIPTION("Cornelis OPX Virtual Network driver");
+29 -26
drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c
··· 37 37 s->rdma.failover_cnt++; 38 38 } 39 39 40 - int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, 41 - char *buf, size_t len) 40 + int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf) 42 41 { 43 42 struct rtrs_clt_stats_pcpu *s; 44 43 45 44 size_t used; 46 45 int cpu; 47 46 48 - used = scnprintf(buf, len, " "); 49 - for_each_possible_cpu(cpu) 50 - used += scnprintf(buf + used, len - used, " CPU%u", cpu); 51 - 52 - used += scnprintf(buf + used, len - used, "\nfrom:"); 47 + used = 0; 53 48 for_each_possible_cpu(cpu) { 54 49 s = per_cpu_ptr(stats->pcpu_stats, cpu); 55 - used += scnprintf(buf + used, len - used, " %d", 50 + used += sysfs_emit_at(buf, used, "%d ", 56 51 atomic_read(&s->cpu_migr.from)); 57 52 } 58 53 59 - used += scnprintf(buf + used, len - used, "\nto :"); 60 - for_each_possible_cpu(cpu) { 61 - s = per_cpu_ptr(stats->pcpu_stats, cpu); 62 - used += scnprintf(buf + used, len - used, " %d", 63 - s->cpu_migr.to); 64 - } 65 - used += scnprintf(buf + used, len - used, "\n"); 54 + used += sysfs_emit_at(buf, used, "\n"); 66 55 67 56 return used; 68 57 } 69 58 70 - int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf, 71 - size_t len) 59 + int rtrs_clt_stats_migration_to_cnt_to_str(struct rtrs_clt_stats *stats, char *buf) 72 60 { 73 - return scnprintf(buf, len, "%d %d\n", 74 - stats->reconnects.successful_cnt, 75 - stats->reconnects.fail_cnt); 61 + struct rtrs_clt_stats_pcpu *s; 62 + 63 + size_t used; 64 + int cpu; 65 + 66 + used = 0; 67 + for_each_possible_cpu(cpu) { 68 + s = per_cpu_ptr(stats->pcpu_stats, cpu); 69 + used += sysfs_emit_at(buf, used, "%d ", s->cpu_migr.to); 70 + } 71 + 72 + used += sysfs_emit_at(buf, used, "\n"); 73 + 74 + return used; 76 75 } 77 76 78 - ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, 79 - char *page, size_t len) 77 + int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf) 78 + { 79 + return sysfs_emit(buf, "%d %d\n", stats->reconnects.successful_cnt, 80 + stats->reconnects.fail_cnt); 81 + } 82 + 83 + ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, char *page) 80 84 { 81 85 struct rtrs_clt_stats_rdma sum; 82 86 struct rtrs_clt_stats_rdma *r; ··· 98 94 sum.failover_cnt += r->failover_cnt; 99 95 } 100 96 101 - return scnprintf(page, len, "%llu %llu %llu %llu %u %llu\n", 97 + return sysfs_emit(page, "%llu %llu %llu %llu %u %llu\n", 102 98 sum.dir[READ].cnt, sum.dir[READ].size_total, 103 99 sum.dir[WRITE].cnt, sum.dir[WRITE].size_total, 104 100 atomic_read(&stats->inflight), sum.failover_cnt); 105 101 } 106 102 107 - ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *s, 108 - char *page, size_t len) 103 + ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *s, char *page) 109 104 { 110 - return scnprintf(page, len, "echo 1 to reset all statistics\n"); 105 + return sysfs_emit(page, "echo 1 to reset all statistics\n"); 111 106 } 112 107 113 108 int rtrs_clt_reset_rdma_stats(struct rtrs_clt_stats *stats, bool enable)
+8 -3
drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
··· 296 296 __ATTR(remove_path, 0644, rtrs_clt_remove_path_show, 297 297 rtrs_clt_remove_path_store); 298 298 299 - STAT_ATTR(struct rtrs_clt_stats, cpu_migration, 300 - rtrs_clt_stats_migration_cnt_to_str, 299 + STAT_ATTR(struct rtrs_clt_stats, cpu_migration_from, 300 + rtrs_clt_stats_migration_from_cnt_to_str, 301 + rtrs_clt_reset_cpu_migr_stats); 302 + 303 + STAT_ATTR(struct rtrs_clt_stats, cpu_migration_to, 304 + rtrs_clt_stats_migration_to_cnt_to_str, 301 305 rtrs_clt_reset_cpu_migr_stats); 302 306 303 307 STAT_ATTR(struct rtrs_clt_stats, reconnects, ··· 317 313 rtrs_clt_reset_all_stats); 318 314 319 315 static struct attribute *rtrs_clt_stats_attrs[] = { 320 - &cpu_migration_attr.attr, 316 + &cpu_migration_from_attr.attr, 317 + &cpu_migration_to_attr.attr, 321 318 &reconnects_attr.attr, 322 319 &rdma_attr.attr, 323 320 &reset_all_attr.attr,
+6
drivers/infiniband/ulp/rtrs/rtrs-clt.c
··· 2788 2788 struct rtrs_clt *clt; 2789 2789 int err, i; 2790 2790 2791 + if (strchr(sessname, '/') || strchr(sessname, '.')) { 2792 + pr_err("sessname cannot contain / and .\n"); 2793 + err = -EINVAL; 2794 + goto out; 2795 + } 2796 + 2791 2797 clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv, 2792 2798 ops->link_ev, 2793 2799 reconnect_delay_sec,
+6 -7
drivers/infiniband/ulp/rtrs/rtrs-clt.h
··· 224 224 int rtrs_clt_reset_rdma_lat_distr_stats(struct rtrs_clt_stats *stats, 225 225 bool enable); 226 226 ssize_t rtrs_clt_stats_rdma_lat_distr_to_str(struct rtrs_clt_stats *stats, 227 - char *page, size_t len); 227 + char *page); 228 228 int rtrs_clt_reset_cpu_migr_stats(struct rtrs_clt_stats *stats, bool enable); 229 - int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, char *buf, 230 - size_t len); 229 + int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf); 230 + int rtrs_clt_stats_migration_to_cnt_to_str(struct rtrs_clt_stats *stats, char *buf); 231 231 int rtrs_clt_reset_reconnects_stat(struct rtrs_clt_stats *stats, bool enable); 232 - int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf, 233 - size_t len); 232 + int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf); 234 233 int rtrs_clt_reset_rdma_stats(struct rtrs_clt_stats *stats, bool enable); 235 234 ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, 236 - char *page, size_t len); 235 + char *page); 237 236 int rtrs_clt_reset_all_stats(struct rtrs_clt_stats *stats, bool enable); 238 237 ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats, 239 - char *page, size_t len); 238 + char *page); 240 239 241 240 /* rtrs-clt-sysfs.c */ 242 241
+1 -1
drivers/infiniband/ulp/rtrs/rtrs-pri.h
··· 398 398 { \ 399 399 type *stats = container_of(kobj, type, kobj_stats); \ 400 400 \ 401 - return print(stats, page, PAGE_SIZE); \ 401 + return print(stats, page); \ 402 402 } 403 403 404 404 #define STAT_ATTR(type, stat, print, reset) \
+1 -2
drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c
··· 23 23 return -EINVAL; 24 24 } 25 25 26 - ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, 27 - char *page, size_t len) 26 + ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page) 28 27 { 29 28 struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats; 30 29
+1 -1
drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
··· 102 102 sess = container_of(kobj, struct rtrs_srv_sess, kobj); 103 103 cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, 104 104 page, PAGE_SIZE); 105 - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); 105 + return cnt + sysfs_emit_at(page, cnt, "\n"); 106 106 } 107 107 108 108 static struct kobj_attribute rtrs_srv_src_addr_attr =
+6
drivers/infiniband/ulp/rtrs/rtrs-srv.c
··· 803 803 return err; 804 804 } 805 805 806 + if (strchr(msg->sessname, '/') || strchr(msg->sessname, '.')) { 807 + rtrs_err(s, "sessname cannot contain / and .\n"); 808 + return -EINVAL; 809 + } 810 + 806 811 if (exist_sessname(sess->srv->ctx, 807 812 msg->sessname, &sess->srv->paths_uuid)) { 808 813 rtrs_err(s, "sessname is duplicated: %s\n", msg->sessname); ··· 1771 1766 strscpy(sess->s.sessname, str, sizeof(sess->s.sessname)); 1772 1767 1773 1768 sess->s.con_num = con_num; 1769 + sess->s.irq_con_num = con_num; 1774 1770 sess->s.recon_cnt = recon_cnt; 1775 1771 uuid_copy(&sess->s.uuid, uuid); 1776 1772 spin_lock_init(&sess->state_lock);
+1 -2
drivers/infiniband/ulp/rtrs/rtrs-srv.h
··· 136 136 137 137 /* functions which are implemented in rtrs-srv-stats.c */ 138 138 int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable); 139 - ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, 140 - char *page, size_t len); 139 + ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page); 141 140 int rtrs_srv_reset_all_stats(struct rtrs_srv_stats *stats, bool enable); 142 141 ssize_t rtrs_srv_reset_all_help(struct rtrs_srv_stats *stats, 143 142 char *page, size_t len);
+24 -7
drivers/infiniband/ulp/rtrs/rtrs.c
··· 222 222 } 223 223 } 224 224 225 + static bool is_pollqueue(struct rtrs_con *con) 226 + { 227 + return con->cid >= con->sess->irq_con_num; 228 + } 229 + 225 230 static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe, 226 231 enum ib_poll_context poll_ctx) 227 232 { 228 233 struct rdma_cm_id *cm_id = con->cm_id; 229 234 struct ib_cq *cq; 230 235 231 - cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx); 236 + if (is_pollqueue(con)) 237 + cq = ib_alloc_cq(cm_id->device, con, nr_cqe, cq_vector, 238 + poll_ctx); 239 + else 240 + cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx); 241 + 232 242 if (IS_ERR(cq)) { 233 243 rtrs_err(con->sess, "Creating completion queue failed, errno: %ld\n", 234 244 PTR_ERR(cq)); ··· 279 269 return ret; 280 270 } 281 271 272 + static void destroy_cq(struct rtrs_con *con) 273 + { 274 + if (con->cq) { 275 + if (is_pollqueue(con)) 276 + ib_free_cq(con->cq); 277 + else 278 + ib_cq_pool_put(con->cq, con->nr_cqe); 279 + } 280 + con->cq = NULL; 281 + } 282 + 282 283 int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, 283 284 u32 max_send_sge, int cq_vector, int nr_cqe, 284 285 u32 max_send_wr, u32 max_recv_wr, ··· 304 283 err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr, 305 284 max_send_sge); 306 285 if (err) { 307 - ib_cq_pool_put(con->cq, con->nr_cqe); 308 - con->cq = NULL; 286 + destroy_cq(con); 309 287 return err; 310 288 } 311 289 con->sess = sess; ··· 319 299 rdma_destroy_qp(con->cm_id); 320 300 con->qp = NULL; 321 301 } 322 - if (con->cq) { 323 - ib_cq_pool_put(con->cq, con->nr_cqe); 324 - con->cq = NULL; 325 - } 302 + destroy_cq(con); 326 303 } 327 304 EXPORT_SYMBOL_GPL(rtrs_cq_qp_destroy); 328 305
+5 -3
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
··· 474 474 { 475 475 struct mlx5e_priv *priv = mlx5i_epriv(dev); 476 476 struct mlx5i_priv *ipriv = priv->ppriv; 477 + u8 addr_mod[3]; 477 478 478 479 /* Set dev address using underlay QP */ 479 - dev->dev_addr[1] = (ipriv->qpn >> 16) & 0xff; 480 - dev->dev_addr[2] = (ipriv->qpn >> 8) & 0xff; 481 - dev->dev_addr[3] = (ipriv->qpn) & 0xff; 480 + addr_mod[0] = (ipriv->qpn >> 16) & 0xff; 481 + addr_mod[1] = (ipriv->qpn >> 8) & 0xff; 482 + addr_mod[2] = (ipriv->qpn) & 0xff; 483 + dev_addr_mod(dev, 1, addr_mod, sizeof(addr_mod)); 482 484 483 485 /* Add QPN to net-device mapping to HT */ 484 486 mlx5i_pkey_add_qpn(dev, ipriv->qpn);
+4 -13
drivers/net/ethernet/qlogic/qed/qed_rdma.c
··· 19 19 #include <linux/slab.h> 20 20 #include <linux/spinlock.h> 21 21 #include <linux/string.h> 22 + #include <net/addrconf.h> 22 23 #include "qed.h" 23 24 #include "qed_cxt.h" 24 25 #include "qed_hsi.h" ··· 411 410 qed_rdma_resc_free(p_hwfn); 412 411 } 413 412 414 - static void qed_rdma_get_guid(struct qed_hwfn *p_hwfn, u8 *guid) 415 - { 416 - guid[0] = p_hwfn->hw_info.hw_mac_addr[0] ^ 2; 417 - guid[1] = p_hwfn->hw_info.hw_mac_addr[1]; 418 - guid[2] = p_hwfn->hw_info.hw_mac_addr[2]; 419 - guid[3] = 0xff; 420 - guid[4] = 0xfe; 421 - guid[5] = p_hwfn->hw_info.hw_mac_addr[3]; 422 - guid[6] = p_hwfn->hw_info.hw_mac_addr[4]; 423 - guid[7] = p_hwfn->hw_info.hw_mac_addr[5]; 424 - } 425 - 426 413 static void qed_rdma_init_events(struct qed_hwfn *p_hwfn, 427 414 struct qed_rdma_start_in_params *params) 428 415 { ··· 438 449 dev->fw_ver = (FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) | 439 450 (FW_REVISION_VERSION << 8) | (FW_ENGINEERING_VERSION); 440 451 441 - qed_rdma_get_guid(p_hwfn, (u8 *)&dev->sys_image_guid); 452 + addrconf_addr_eui48((u8 *)&dev->sys_image_guid, 453 + p_hwfn->hw_info.hw_mac_addr); 454 + 442 455 dev->node_guid = dev->sys_image_guid; 443 456 444 457 dev->max_sge = min_t(u32, RDMA_MAX_SGE_PER_SQ_WQE,
+2 -2
include/linux/dma-buf.h
··· 86 86 * @pin: 87 87 * 88 88 * This is called by dma_buf_pin() and lets the exporter know that the 89 - * DMA-buf can't be moved any more. The exporter should pin the buffer 90 - * into system memory to make sure it is generally accessible by other 89 + * DMA-buf can't be moved any more. Ideally, the exporter should 90 + * pin the buffer so that it is generally accessible by all 91 91 * devices. 92 92 * 93 93 * This is called with the &dmabuf.resv object locked and is mutual
+1
include/rdma/ib_hdrs.h
··· 232 232 #define IB_BTH_SE_SHIFT 23 233 233 #define IB_BTH_TVER_MASK 0xf 234 234 #define IB_BTH_TVER_SHIFT 16 235 + #define IB_BTH_OPCODE_CNP 0x81 235 236 236 237 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) 237 238 {
+11
include/rdma/ib_umem.h
··· 38 38 unsigned long first_sg_offset; 39 39 unsigned long last_sg_trim; 40 40 void *private; 41 + u8 pinned : 1; 41 42 }; 42 43 43 44 static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem) ··· 140 139 unsigned long offset, size_t size, 141 140 int fd, int access, 142 141 const struct dma_buf_attach_ops *ops); 142 + struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, 143 + unsigned long offset, 144 + size_t size, int fd, 145 + int access); 143 146 int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); 144 147 void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); 145 148 void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); ··· 181 176 size_t size, int fd, 182 177 int access, 183 178 struct dma_buf_attach_ops *ops) 179 + { 180 + return ERR_PTR(-EOPNOTSUPP); 181 + } 182 + static inline struct ib_umem_dmabuf * 183 + ib_umem_dmabuf_get_pinned(struct ib_device *device, unsigned long offset, 184 + size_t size, int fd, int access) 184 185 { 185 186 return ERR_PTR(-EOPNOTSUPP); 186 187 }
+48 -26
include/rdma/ib_verbs.h
··· 545 545 IB_SPEED_NDR = 128, 546 546 }; 547 547 548 + enum ib_stat_flag { 549 + IB_STAT_FLAG_OPTIONAL = 1 << 0, 550 + }; 551 + 552 + /** 553 + * struct rdma_stat_desc 554 + * @name - The name of the counter 555 + * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL 556 + * @priv - Driver private information; Core code should not use 557 + */ 558 + struct rdma_stat_desc { 559 + const char *name; 560 + unsigned int flags; 561 + const void *priv; 562 + }; 563 + 548 564 /** 549 565 * struct rdma_hw_stats 550 566 * @lock - Mutex to protect parallel write access to lifespan and values ··· 571 555 * should be before being updated again. Stored in jiffies, defaults 572 556 * to 10 milliseconds, drivers can override the default be specifying 573 557 * their own value during their allocation routine. 574 - * @name - Array of pointers to static names used for the counters in 575 - * directory. 558 + * @descs - Array of pointers to static descriptors used for the counters 559 + * in directory. 560 + * @is_disabled - A bitmap to indicate each counter is currently disabled 561 + * or not. 576 562 * @num_counters - How many hardware counters there are. If name is 577 563 * shorter than this number, a kernel oops will result. Driver authors 578 564 * are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters) ··· 586 568 struct mutex lock; /* Protect lifespan and values[] */ 587 569 unsigned long timestamp; 588 570 unsigned long lifespan; 589 - const char * const *names; 571 + const struct rdma_stat_desc *descs; 572 + unsigned long *is_disabled; 590 573 int num_counters; 591 574 u64 value[]; 592 575 }; 593 576 594 577 #define RDMA_HW_STATS_DEFAULT_LIFESPAN 10 595 - /** 596 - * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct 597 - * for drivers. 598 - * @names - Array of static const char * 599 - * @num_counters - How many elements in array 600 - * @lifespan - How many milliseconds between updates 601 - */ 602 - static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( 603 - const char * const *names, int num_counters, 604 - unsigned long lifespan) 605 - { 606 - struct rdma_hw_stats *stats; 607 578 608 - stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64), 609 - GFP_KERNEL); 610 - if (!stats) 611 - return NULL; 612 - stats->names = names; 613 - stats->num_counters = num_counters; 614 - stats->lifespan = msecs_to_jiffies(lifespan); 579 + struct rdma_hw_stats *rdma_alloc_hw_stats_struct( 580 + const struct rdma_stat_desc *descs, int num_counters, 581 + unsigned long lifespan); 615 582 616 - return stats; 617 - } 618 - 583 + void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats); 619 584 620 585 /* Define bits for the various functionality this port needs to be supported by 621 586 * the core. ··· 2571 2570 struct rdma_hw_stats *stats, u32 port, int index); 2572 2571 2573 2572 /** 2573 + * modify_hw_stat - Modify the counter configuration 2574 + * @enable: true/false when enable/disable a counter 2575 + * Return codes - 0 on success or error code otherwise. 2576 + */ 2577 + int (*modify_hw_stat)(struct ib_device *device, u32 port, 2578 + unsigned int counter_index, bool enable); 2579 + /** 2574 2580 * Allows rdma drivers to add their own restrack attributes. 2575 2581 */ 2576 2582 int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); ··· 2913 2905 struct rdma_user_mmap_entry *entry, 2914 2906 size_t length, u32 min_pgoff, 2915 2907 u32 max_pgoff); 2908 + 2909 + static inline int 2910 + rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext, 2911 + struct rdma_user_mmap_entry *entry, 2912 + size_t length, u32 pgoff) 2913 + { 2914 + return rdma_user_mmap_entry_insert_range(ucontext, entry, length, pgoff, 2915 + pgoff); 2916 + } 2916 2917 2917 2918 struct rdma_user_mmap_entry * 2918 2919 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, ··· 4114 4097 enum dma_data_direction direction, 4115 4098 unsigned long dma_attrs) 4116 4099 { 4100 + int nents; 4101 + 4117 4102 if (ib_uses_virt_dma(dev)) { 4118 - ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); 4103 + nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); 4104 + if (!nents) 4105 + return -EIO; 4106 + sgt->nents = nents; 4119 4107 return 0; 4120 4108 } 4121 4109 return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs);
+2
include/rdma/rdma_counter.h
··· 63 63 enum rdma_nl_counter_mode *mode, 64 64 enum rdma_nl_counter_mask *mask); 65 65 66 + int rdma_counter_modify(struct ib_device *dev, u32 port, 67 + unsigned int index, bool enable); 66 68 #endif /* _RDMA_COUNTER_H_ */
+15 -3
include/uapi/rdma/efa-abi.h
··· 1 1 /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ 2 2 /* 3 - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. 3 + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. 4 4 */ 5 5 6 6 #ifndef EFA_ABI_USER_H ··· 52 52 __u8 reserved_30[2]; 53 53 }; 54 54 55 + enum { 56 + EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0, 57 + }; 58 + 55 59 struct efa_ibv_create_cq { 56 60 __u32 comp_mask; 57 61 __u32 cq_entry_size; 58 62 __u16 num_sub_cqs; 59 - __u8 reserved_50[6]; 63 + __u8 flags; 64 + __u8 reserved_58[5]; 65 + }; 66 + 67 + enum { 68 + EFA_CREATE_CQ_RESP_DB_OFF = 1 << 0, 60 69 }; 61 70 62 71 struct efa_ibv_create_cq_resp { ··· 74 65 __aligned_u64 q_mmap_key; 75 66 __aligned_u64 q_mmap_size; 76 67 __u16 cq_idx; 77 - __u8 reserved_d0[6]; 68 + __u8 reserved_d0[2]; 69 + __u32 db_off; 70 + __aligned_u64 db_mmap_key; 78 71 }; 79 72 80 73 enum { ··· 117 106 enum { 118 107 EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0, 119 108 EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1, 109 + EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS = 1 << 2, 120 110 }; 121 111 122 112 struct efa_ibv_ex_query_device_resp {
+5
include/uapi/rdma/rdma_netlink.h
··· 297 297 298 298 RDMA_NLDEV_CMD_RES_SRQ_GET, /* can dump */ 299 299 300 + RDMA_NLDEV_CMD_STAT_GET_STATUS, 301 + 300 302 RDMA_NLDEV_NUM_OPS 301 303 }; 302 304 ··· 550 548 RDMA_NLDEV_ATTR_MAX_RANGE, /* u32 */ 551 549 552 550 RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, /* u8 */ 551 + 552 + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, /* u32 */ 553 + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, /* u8 */ 553 554 554 555 /* 555 556 * Always the end
+9 -1
include/uapi/rdma/rdma_user_rxe.h
··· 98 98 __u32 remote_qpn; 99 99 __u32 remote_qkey; 100 100 __u16 pkey_index; 101 + __u16 reserved; 102 + __u32 ah_num; 103 + __u32 pad[4]; 104 + struct rxe_av av; 101 105 } ud; 102 106 struct { 103 107 __aligned_u64 addr; ··· 152 148 153 149 struct rxe_send_wqe { 154 150 struct rxe_send_wr wr; 155 - struct rxe_av av; 156 151 __u32 status; 157 152 __u32 state; 158 153 __aligned_u64 iova; ··· 169 166 __u32 num_sge; 170 167 __u32 padding; 171 168 struct rxe_dma_info dma; 169 + }; 170 + 171 + struct rxe_create_ah_resp { 172 + __u32 ah_num; 173 + __u32 reserved; 172 174 }; 173 175 174 176 struct rxe_create_cq_resp {